add models
|
@ -0,0 +1,28 @@
|
||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
model.safetensors filter=lfs diff=lfs merge=lfs -text
|
|
@ -0,0 +1,195 @@
|
||||||
|
# Cat-Dog Classification Model
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
This repository contains a Cat-Dog classification model based on the ResNet-50 architecture. The model is trained to distinguish between images of cats and dogs.
|
||||||
|
|
||||||
|
## ResNet Model
|
||||||
|
|
||||||
|
ResNet-50 is a deep convolutional neural network with 50 layers. It is designed to overcome the vanishing gradient problem, which is common in very deep networks, by using skip connections or residuals. This allows the network to be significantly deeper while still being easy to optimize.
|
||||||
|
|
||||||
|
## Training
|
||||||
|
|
||||||
|
The model is trained on a dataset of cat and dog images. The training process involves the following steps:
|
||||||
|
|
||||||
|
1. **Data Preprocessing**: Images are resized, cropped, and normalized.
|
||||||
|
2. **Model Initialization**: A pre-trained ResNet-50 model is loaded and the final fully connected layer is adjusted to output two classes (cat and dog).
|
||||||
|
3. **Training Loop**: The model is trained using a standard training loop with stochastic gradient descent (SGD) and a learning rate scheduler.
|
||||||
|
4. **Model Evaluation**: The best model is selected based on validation accuracy and saved for inference.
|
||||||
|
|
||||||
|
### Training Code
|
||||||
|
|
||||||
|
Here is a simplified version of the training code:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
|
from torch.utils.data import DataLoader, Dataset
|
||||||
|
from torchvision import models, transforms
|
||||||
|
from PIL import Image
|
||||||
|
from safetensors.torch import save_file
|
||||||
|
|
||||||
|
class CatDogDataset(Dataset):
|
||||||
|
def __init__(self, root_dir, transform=None):
|
||||||
|
self.root_dir = root_dir
|
||||||
|
self.transform = transform
|
||||||
|
self.image_paths = []
|
||||||
|
self.labels = []
|
||||||
|
|
||||||
|
for filename in os.listdir(root_dir):
|
||||||
|
if 'cat' in filename:
|
||||||
|
self.image_paths.append(os.path.join(root_dir, filename))
|
||||||
|
self.labels.append(0) # cat
|
||||||
|
elif 'dog' in filename:
|
||||||
|
self.image_paths.append(os.path.join(root_dir, filename))
|
||||||
|
self.labels.append(1) # dog
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.image_paths)
|
||||||
|
|
||||||
|
def __getitem__(self, idx):
|
||||||
|
img_path = self.image_paths[idx]
|
||||||
|
image = Image.open(img_path).convert('RGB')
|
||||||
|
label = self.labels[idx]
|
||||||
|
|
||||||
|
if self.transform:
|
||||||
|
image = self.transform(image)
|
||||||
|
|
||||||
|
return image, label
|
||||||
|
|
||||||
|
# Data preprocessing
|
||||||
|
data_transforms = {
|
||||||
|
'train': transforms.Compose([
|
||||||
|
transforms.RandomResizedCrop(224),
|
||||||
|
transforms.RandomHorizontalFlip(),
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
||||||
|
]),
|
||||||
|
'val': transforms.Compose([
|
||||||
|
transforms.Resize(256),
|
||||||
|
transforms.CenterCrop(224),
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
||||||
|
]),
|
||||||
|
}
|
||||||
|
|
||||||
|
data_dir = 'dog-cat'
|
||||||
|
image_datasets = {x: CatDogDataset(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
|
||||||
|
dataloaders = {x: DataLoader(image_datasets[x], batch_size=32, shuffle=True, num_workers=4) for x in ['train', 'val']}
|
||||||
|
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
|
||||||
|
class_names = ['cat', 'dog']
|
||||||
|
|
||||||
|
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||||
|
|
||||||
|
# Load and modify ResNet-50
|
||||||
|
model_ft = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
|
||||||
|
num_ftrs = model_ft.fc.in_features
|
||||||
|
model_ft.fc = nn.Linear(num_ftrs, len(class_names))
|
||||||
|
|
||||||
|
model_ft = model_ft.to(device)
|
||||||
|
|
||||||
|
criterion = nn.CrossEntropyLoss()
|
||||||
|
|
||||||
|
# Optimizer
|
||||||
|
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
|
||||||
|
|
||||||
|
# Learning rate scheduler
|
||||||
|
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
|
||||||
|
|
||||||
|
# Training function
|
||||||
|
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
|
||||||
|
best_model_wts = model.state_dict()
|
||||||
|
best_acc = 0.0
|
||||||
|
|
||||||
|
for epoch in range(num_epochs):
|
||||||
|
print(f'Epoch {epoch}/{num_epochs - 1}')
|
||||||
|
print('-' * 10)
|
||||||
|
|
||||||
|
for phase in ['train', 'val']:
|
||||||
|
if phase == 'train':
|
||||||
|
model.train()
|
||||||
|
else:
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
running_loss = 0.0
|
||||||
|
running_corrects = 0
|
||||||
|
|
||||||
|
for inputs, labels in dataloaders[phase]:
|
||||||
|
inputs = inputs.to(device)
|
||||||
|
labels = labels.to(device)
|
||||||
|
|
||||||
|
optimizer.zero_grad()
|
||||||
|
|
||||||
|
with torch.set_grad_enabled(phase == 'train'):
|
||||||
|
outputs = model(inputs)
|
||||||
|
_, preds = torch.max(outputs, 1)
|
||||||
|
loss = criterion(outputs, labels)
|
||||||
|
|
||||||
|
if phase == 'train':
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
running_loss += loss.item() * inputs.size(0)
|
||||||
|
running_corrects += torch.sum(preds == labels.data)
|
||||||
|
|
||||||
|
if phase == 'train':
|
||||||
|
scheduler.step()
|
||||||
|
|
||||||
|
epoch_loss = running_loss / dataset_sizes[phase]
|
||||||
|
epoch_acc = running_corrects.double() / dataset_sizes[phase]
|
||||||
|
|
||||||
|
print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
|
||||||
|
|
||||||
|
if phase == 'val' and epoch_acc > best_acc:
|
||||||
|
best_acc = epoch_acc
|
||||||
|
best_model_wts = model.state_dict()
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
print(f'Best val Acc: {best_acc:4f}')
|
||||||
|
model.load_state_dict(best_model_wts)
|
||||||
|
return model
|
||||||
|
|
||||||
|
# Train and evaluate the model
|
||||||
|
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25)
|
||||||
|
|
||||||
|
# Save the model
|
||||||
|
torch.save(model_ft.state_dict(), 'model_cat_dog_classifier.pt')
|
||||||
|
save_file(model_ft.state_dict(), 'model_cat_dog_classifier.safetensors')
|
||||||
|
```
|
||||||
|
|
||||||
|
## Inference
|
||||||
|
|
||||||
|
To perform inference, you can use the following code. The inference is based on the transformer model with model ID `ailb/resnet-dogcat`.
|
||||||
|
|
||||||
|
### Inference Code
|
||||||
|
|
||||||
|
**You need set env HF_ENDPOINT=http://10.0.101.71**
|
||||||
|
|
||||||
|
```python
|
||||||
|
from transformers import AutoImageProcessor, ResNetForImageClassification
|
||||||
|
from PIL import Image
|
||||||
|
import requests
|
||||||
|
import torch
|
||||||
|
|
||||||
|
# Load model
|
||||||
|
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
||||||
|
image = Image.open(requests.get(url, stream=True).raw)
|
||||||
|
processor = AutoImageProcessor.from_pretrained("ailab/resnet-dogcat")
|
||||||
|
model = ResNetForImageClassification.from_pretrained("ailab/resnet-dogcat")
|
||||||
|
|
||||||
|
inputs = processor(image, return_tensors="pt")
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
logits = model(**inputs).logits
|
||||||
|
|
||||||
|
# model predicts one of the 1000 ImageNet classes
|
||||||
|
predicted_label = logits.argmax(-1).item()
|
||||||
|
print(model.config.id2label[predicted_label])
|
||||||
|
```
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
This repository provides a comprehensive solution for training and performing inference on a Cat-Dog classification task using a ResNet-50 model. The training script demonstrates how to preprocess data, train the model, and save the trained model. The inference script shows how to use the trained model to classify new images.
|
|
@ -0,0 +1,7 @@
|
||||||
|
{
|
||||||
|
"model_type": "resnet",
|
||||||
|
"id2label": {
|
||||||
|
"0": "cat",
|
||||||
|
"1": "dog"
|
||||||
|
}
|
||||||
|
}
|
After Width: | Height: | Size: 12 KiB |
After Width: | Height: | Size: 14 KiB |
After Width: | Height: | Size: 38 KiB |
After Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 34 KiB |
After Width: | Height: | Size: 9.0 KiB |
After Width: | Height: | Size: 34 KiB |
After Width: | Height: | Size: 23 KiB |
After Width: | Height: | Size: 23 KiB |
After Width: | Height: | Size: 27 KiB |
After Width: | Height: | Size: 28 KiB |
After Width: | Height: | Size: 12 KiB |
After Width: | Height: | Size: 8.2 KiB |
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 18 KiB |
After Width: | Height: | Size: 7.6 KiB |
After Width: | Height: | Size: 31 KiB |
After Width: | Height: | Size: 6.0 KiB |
After Width: | Height: | Size: 7.5 KiB |
After Width: | Height: | Size: 30 KiB |
After Width: | Height: | Size: 18 KiB |
After Width: | Height: | Size: 5.7 KiB |
After Width: | Height: | Size: 40 KiB |
After Width: | Height: | Size: 21 KiB |
After Width: | Height: | Size: 15 KiB |
After Width: | Height: | Size: 39 KiB |
After Width: | Height: | Size: 35 KiB |
After Width: | Height: | Size: 27 KiB |
After Width: | Height: | Size: 15 KiB |
After Width: | Height: | Size: 23 KiB |
After Width: | Height: | Size: 20 KiB |
After Width: | Height: | Size: 12 KiB |
After Width: | Height: | Size: 13 KiB |
After Width: | Height: | Size: 33 KiB |
After Width: | Height: | Size: 16 KiB |
After Width: | Height: | Size: 20 KiB |
After Width: | Height: | Size: 25 KiB |
After Width: | Height: | Size: 38 KiB |
After Width: | Height: | Size: 18 KiB |
After Width: | Height: | Size: 21 KiB |
After Width: | Height: | Size: 10 KiB |
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 7.5 KiB |
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 31 KiB |
After Width: | Height: | Size: 22 KiB |
After Width: | Height: | Size: 19 KiB |
After Width: | Height: | Size: 4.5 KiB |
After Width: | Height: | Size: 19 KiB |
After Width: | Height: | Size: 8.2 KiB |
After Width: | Height: | Size: 25 KiB |
After Width: | Height: | Size: 3.2 KiB |
After Width: | Height: | Size: 9.7 KiB |
After Width: | Height: | Size: 14 KiB |
After Width: | Height: | Size: 9.6 KiB |
After Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 44 KiB |
After Width: | Height: | Size: 12 KiB |
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 48 KiB |
After Width: | Height: | Size: 20 KiB |
After Width: | Height: | Size: 28 KiB |
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 20 KiB |
After Width: | Height: | Size: 9.8 KiB |
After Width: | Height: | Size: 37 KiB |
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 59 KiB |
After Width: | Height: | Size: 44 KiB |
After Width: | Height: | Size: 27 KiB |
After Width: | Height: | Size: 34 KiB |
After Width: | Height: | Size: 28 KiB |
After Width: | Height: | Size: 15 KiB |
After Width: | Height: | Size: 22 KiB |
After Width: | Height: | Size: 5.8 KiB |
|
@ -0,0 +1,18 @@
|
||||||
|
{
|
||||||
|
"crop_pct": 0.875,
|
||||||
|
"do_normalize": true,
|
||||||
|
"do_resize": true,
|
||||||
|
"feature_extractor_type": "ConvNextFeatureExtractor",
|
||||||
|
"image_mean": [
|
||||||
|
0.485,
|
||||||
|
0.456,
|
||||||
|
0.406
|
||||||
|
],
|
||||||
|
"image_std": [
|
||||||
|
0.229,
|
||||||
|
0.224,
|
||||||
|
0.225
|
||||||
|
],
|
||||||
|
"resample": 3,
|
||||||
|
"size": 224
|
||||||
|
}
|
|
@ -0,0 +1,150 @@
|
||||||
|
import os
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
|
from torch.utils.data import DataLoader, Dataset
|
||||||
|
from torchvision import models, transforms
|
||||||
|
from PIL import Image
|
||||||
|
from safetensors.torch import save_file
|
||||||
|
|
||||||
|
class CatDogDataset(Dataset):
|
||||||
|
def __init__(self, root_dir, transform=None):
|
||||||
|
self.root_dir = root_dir
|
||||||
|
self.transform = transform
|
||||||
|
self.image_paths = []
|
||||||
|
self.labels = []
|
||||||
|
|
||||||
|
for filename in os.listdir(root_dir):
|
||||||
|
if 'cat' in filename:
|
||||||
|
self.image_paths.append(os.path.join(root_dir, filename))
|
||||||
|
self.labels.append(0) # cat
|
||||||
|
elif 'dog' in filename:
|
||||||
|
self.image_paths.append(os.path.join(root_dir, filename))
|
||||||
|
self.labels.append(1) # dog
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.image_paths)
|
||||||
|
|
||||||
|
def __getitem__(self, idx):
|
||||||
|
img_path = self.image_paths[idx]
|
||||||
|
image = Image.open(img_path).convert('RGB')
|
||||||
|
label = self.labels[idx]
|
||||||
|
|
||||||
|
if self.transform:
|
||||||
|
image = self.transform(image)
|
||||||
|
|
||||||
|
return image, label
|
||||||
|
|
||||||
|
# 数据预处理
|
||||||
|
data_transforms = {
|
||||||
|
'train': transforms.Compose([
|
||||||
|
transforms.RandomResizedCrop(224),
|
||||||
|
transforms.RandomHorizontalFlip(),
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
||||||
|
]),
|
||||||
|
'val': transforms.Compose([
|
||||||
|
transforms.Resize(256),
|
||||||
|
transforms.CenterCrop(224),
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
||||||
|
]),
|
||||||
|
}
|
||||||
|
|
||||||
|
data_dir = 'dog-cat'
|
||||||
|
image_datasets = {x: CatDogDataset(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
|
||||||
|
dataloaders = {x: DataLoader(image_datasets[x], batch_size=32, shuffle=True, num_workers=4) for x in ['train', 'val']}
|
||||||
|
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
|
||||||
|
class_names = ['cat', 'dog']
|
||||||
|
|
||||||
|
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||||
|
|
||||||
|
# 加载预训练的 ResNet-50 模型并进行微调
|
||||||
|
model_ft = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
|
||||||
|
num_ftrs = model_ft.fc.in_features
|
||||||
|
model_ft.fc = nn.Linear(num_ftrs, len(class_names))
|
||||||
|
|
||||||
|
model_ft = model_ft.to(device)
|
||||||
|
|
||||||
|
criterion = nn.CrossEntropyLoss()
|
||||||
|
|
||||||
|
# 优化器
|
||||||
|
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
|
||||||
|
|
||||||
|
# 学习率调度器
|
||||||
|
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
|
||||||
|
|
||||||
|
# 训练模型
|
||||||
|
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
|
||||||
|
best_model_wts = model.state_dict()
|
||||||
|
best_acc = 0.0
|
||||||
|
|
||||||
|
for epoch in range(num_epochs):
|
||||||
|
print(f'Epoch {epoch}/{num_epochs - 1}')
|
||||||
|
print('-' * 10)
|
||||||
|
|
||||||
|
# 每个epoch都有训练和验证阶段
|
||||||
|
for phase in ['train', 'val']:
|
||||||
|
if phase == 'train':
|
||||||
|
model.train() # 设置模型为训练模式
|
||||||
|
else:
|
||||||
|
model.eval() # 设置模型为评估模式
|
||||||
|
|
||||||
|
running_loss = 0.0
|
||||||
|
running_corrects = 0
|
||||||
|
|
||||||
|
# 遍历数据
|
||||||
|
for inputs, labels in dataloaders[phase]:
|
||||||
|
inputs = inputs.to(device)
|
||||||
|
labels = labels.to(device)
|
||||||
|
|
||||||
|
# 清零参数梯度
|
||||||
|
optimizer.zero_grad()
|
||||||
|
|
||||||
|
# 前向传播
|
||||||
|
with torch.set_grad_enabled(phase == 'train'):
|
||||||
|
outputs = model(inputs)
|
||||||
|
_, preds = torch.max(outputs, 1)
|
||||||
|
loss = criterion(outputs, labels)
|
||||||
|
|
||||||
|
# 只有在训练阶段才反向传播+优化
|
||||||
|
if phase == 'train':
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
# 统计
|
||||||
|
running_loss += loss.item() * inputs.size(0)
|
||||||
|
running_corrects += torch.sum(preds == labels.data)
|
||||||
|
|
||||||
|
if phase == 'train':
|
||||||
|
scheduler.step()
|
||||||
|
|
||||||
|
epoch_loss = running_loss / dataset_sizes[phase]
|
||||||
|
epoch_acc = running_corrects.double() / dataset_sizes[phase]
|
||||||
|
|
||||||
|
print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
|
||||||
|
|
||||||
|
# 深拷贝模型
|
||||||
|
if phase == 'val' and epoch_acc > best_acc:
|
||||||
|
best_acc = epoch_acc
|
||||||
|
best_model_wts = model.state_dict()
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
print(f'Best val Acc: {best_acc:4f}')
|
||||||
|
|
||||||
|
# 加载最佳模型权重
|
||||||
|
model.load_state_dict(best_model_wts)
|
||||||
|
return model
|
||||||
|
|
||||||
|
# 训练和评估模型
|
||||||
|
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25)
|
||||||
|
|
||||||
|
# 保存模型为 .pt 文件
|
||||||
|
torch.save(model_ft.state_dict(), 'pytorch_model.bin')
|
||||||
|
|
||||||
|
metadata = {"format": "pt"}
|
||||||
|
|
||||||
|
# 保存模型为 .safetensors 文件
|
||||||
|
save_file(model_ft.state_dict(), 'model.safetensors', metadata=metadata)
|
||||||
|
|