# Cat-Dog Classification Model ## Introduction This repository contains a Cat-Dog classification model based on the ResNet-50 architecture. The model is trained to distinguish between images of cats and dogs. ## ResNet Model ResNet-50 is a deep convolutional neural network with 50 layers. It is designed to overcome the vanishing gradient problem, which is common in very deep networks, by using skip connections or residuals. This allows the network to be significantly deeper while still being easy to optimize. ## Training The model is trained on a dataset of cat and dog images. The training process involves the following steps: 1. **Data Preprocessing**: Images are resized, cropped, and normalized. 2. **Model Initialization**: A pre-trained ResNet-50 model is loaded and the final fully connected layer is adjusted to output two classes (cat and dog). 3. **Training Loop**: The model is trained using a standard training loop with stochastic gradient descent (SGD) and a learning rate scheduler. 4. **Model Evaluation**: The best model is selected based on validation accuracy and saved for inference. ### Training Code Here is a simplified version of the training code: ```python import os import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, Dataset from torchvision import models, transforms from PIL import Image from safetensors.torch import save_file class CatDogDataset(Dataset): def __init__(self, root_dir, transform=None): self.root_dir = root_dir self.transform = transform self.image_paths = [] self.labels = [] for filename in os.listdir(root_dir): if 'cat' in filename: self.image_paths.append(os.path.join(root_dir, filename)) self.labels.append(0) # cat elif 'dog' in filename: self.image_paths.append(os.path.join(root_dir, filename)) self.labels.append(1) # dog def __len__(self): return len(self.image_paths) def __getitem__(self, idx): img_path = self.image_paths[idx] image = Image.open(img_path).convert('RGB') label = self.labels[idx] if self.transform: image = self.transform(image) return image, label # Data preprocessing data_transforms = { 'train': transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'val': transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } data_dir = 'dog-cat' image_datasets = {x: CatDogDataset(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']} dataloaders = {x: DataLoader(image_datasets[x], batch_size=32, shuffle=True, num_workers=4) for x in ['train', 'val']} dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} class_names = ['cat', 'dog'] device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load and modify ResNet-50 model_ft = models.resnet50(weights=models.ResNet50_Weights.DEFAULT) num_ftrs = model_ft.fc.in_features model_ft.fc = nn.Linear(num_ftrs, len(class_names)) model_ft = model_ft.to(device) criterion = nn.CrossEntropyLoss() # Optimizer optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9) # Learning rate scheduler exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) # Training function def train_model(model, criterion, optimizer, scheduler, num_epochs=25): best_model_wts = model.state_dict() best_acc = 0.0 for epoch in range(num_epochs): print(f'Epoch {epoch}/{num_epochs - 1}') print('-' * 10) for phase in ['train', 'val']: if phase == 'train': model.train() else: model.eval() running_loss = 0.0 running_corrects = 0 for inputs, labels in dataloaders[phase]: inputs = inputs.to(device) labels = labels.to(device) optimizer.zero_grad() with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) _, preds = torch.max(outputs, 1) loss = criterion(outputs, labels) if phase == 'train': loss.backward() optimizer.step() running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) if phase == 'train': scheduler.step() epoch_loss = running_loss / dataset_sizes[phase] epoch_acc = running_corrects.double() / dataset_sizes[phase] print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}') if phase == 'val' and epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = model.state_dict() print() print(f'Best val Acc: {best_acc:4f}') model.load_state_dict(best_model_wts) return model # Train and evaluate the model model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25) # Save the model torch.save(model_ft.state_dict(), 'model_cat_dog_classifier.pt') save_file(model_ft.state_dict(), 'model_cat_dog_classifier.safetensors') ``` ## Inference To perform inference, you can use the following code. The inference is based on the transformer model with model ID `ailb/resnet-dogcat`. ### Inference Code **You need set env HF_ENDPOINT=http://10.0.101.71** ```python from transformers import AutoImageProcessor, ResNetForImageClassification from PIL import Image import requests import torch # Load model url = "http://images.cocodataset.org/val2017/000000039769.jpg" image = Image.open(requests.get(url, stream=True).raw) processor = AutoImageProcessor.from_pretrained("ailab/resnet-dogcat") model = ResNetForImageClassification.from_pretrained("ailab/resnet-dogcat") inputs = processor(image, return_tensors="pt") with torch.no_grad(): logits = model(**inputs).logits # model predicts one of the 1000 ImageNet classes predicted_label = logits.argmax(-1).item() print(model.config.id2label[predicted_label]) ``` ## Conclusion This repository provides a comprehensive solution for training and performing inference on a Cat-Dog classification task using a ResNet-50 model. The training script demonstrates how to preprocess data, train the model, and save the trained model. The inference script shows how to use the trained model to classify new images.