{ "framework":"pytorch", "task":"image-classification", "pipeline":{ "type":"nextvit-small_image-classification_Dailylife-labels" }, "model":{ "type": "ClassificationModel", "mm_model": { "type": "ImageClassifier", "pretrained": null, "backbone": { "type": "NextViT", "arch": "small", "path_dropout": 0.2 }, "neck": { "type": "GlobalAveragePooling" }, "head": { "type": "LinearClsHead", "num_classes": 1296, "in_channels": 1024, "loss": { "type": "CrossEntropyLoss", "loss_weight": 1.0 }, "init_cfg": [{ "type": "TruncNormal", "layer": "Linear", "std": 0.02, "bias": 0.0 }] } } }, "dataset": { "classes": null }, "preprocessor": { "type": "image-classification-bypass-preprocessor", "train": [ { "type": "LoadImageFromFile" }, { "type": "RandomResizedCrop", "size": 224, "backend": "pillow", "interpolation": "bicubic" }, { "type": "RandomFlip", "flip_prob": 0.5, "direction": "horizontal" }, { "type": "RandAugment", "policies": [ { "type": "AutoContrast" }, { "type": "Equalize" }, { "type": "Invert" }, { "type": "Rotate", "magnitude_key": "angle", "magnitude_range": [0, 30] }, { "type": "Posterize", "magnitude_key": "bits", "magnitude_range": [4, 0] }, { "type": "Solarize", "magnitude_key": "thr", "magnitude_range": [256, 0] }, { "type": "SolarizeAdd", "magnitude_key": "magnitude", "magnitude_range": [0, 110] }, { "type": "ColorTransform", "magnitude_key": "magnitude", "magnitude_range": [0, 0.9] }, { "type": "Contrast", "magnitude_key": "magnitude", "magnitude_range": [0, 0.9] }, { "type": "Brightness", "magnitude_key": "magnitude", "magnitude_range": [0, 0.9] }, { "type": "Sharpness", "magnitude_key": "magnitude", "magnitude_range": [0, 0.9] }, { "type": "Shear", "magnitude_key": "magnitude", "magnitude_range": [0, 0.3], "direction": "horizontal" }, { "type": "Shear", "magnitude_key": "magnitude", "magnitude_range": [0, 0.3], "direction": "vertical" }, { "type": "Translate", "magnitude_key": "magnitude", "magnitude_range": [0, 0.45], "direction": "horizontal" }, { "type": "Translate", "magnitude_key": "magnitude", "magnitude_range": [0, 0.45], "direction": "vertical" } ], "num_policies": 2, "total_level": 10, "magnitude_level": 9, "magnitude_std": 0.5, "hparams": { "pad_val": [104, 116, 124], "interpolation": "bicubic" } }, { "type": "RandomErasing", "erase_prob": 0.25, "mode": "rand", "min_area_ratio": 0.02, "max_area_ratio": 0.3333333333333333, "fill_color": [103.53, 116.28, 123.675], "fill_std": [57.375, 57.12, 58.395] }, { "type": "Normalize", "mean": [123.675, 116.28, 103.53], "std": [58.395, 57.12, 57.375], "to_rgb": true }, { "type": "ImageToTensor", "keys": ["img"] }, { "type": "ToTensor", "keys": ["gt_label"] }, { "type": "Collect", "keys": ["img", "gt_label"] } ], "val": [ { "type": "LoadImageFromFile" }, { "type": "Resize", "size": [256, -1], "backend": "pillow", "interpolation": "bicubic" }, { "type": "CenterCrop", "crop_size": 224 }, { "type": "Normalize", "mean": [123.675, 116.28, 103.53], "std": [58.395, 57.12, 57.375], "to_rgb": true }, { "type": "ImageToTensor", "keys": ["img"] }, { "type": "Collect", "keys": ["img"] } ] }, "train": { "dataloader": { "batch_size_per_gpu": 32, "workers_per_gpu": 4 }, "max_epochs": 1, "runner": { "type": "EpochBasedRunner", "max_epochs": 300 }, "evaluation": { "interval": 1, "metric": "accuracy", "save_best": "auto" }, "checkpoint_config": { "interval": 1, "max_keep_ckpts": 20, "create_symlink": true }, "log_config": { "interval": 100, "hooks": [ { "type": "TextLoggerHook" } ] }, "custom_hooks": [ { "type": "EMAHook", "momentum": 4e-05, "priority": "ABOVE_NORMAL" } ], "workflow": [ ["train", 1] ], "work_dir": "./work_dir", "optimizer": { "type": "AdamW", "lr": 0.001, "weight_decay": 0.1, "eps": 1e-08, "betas": [0.9, 0.999], "paramwise_cfg": { "norm_decay_mult": 0.0, "bias_decay_mult": 0.0, "custom_keys": { ".cls_token": { "decay_mult": 0.0 }, ".pos_embed": { "decay_mult": 0.0 } } } }, "optimizer_config": { "grad_clip": { "max_norm": 5.0 } }, "lr_config": { "policy": "CosineAnnealing", "by_epoch": false, "min_lr_ratio": 0.01, "warmup": "linear", "warmup_ratio": 0.001, "warmup_iters": 20, "warmup_by_epoch": true } }, "evaluation": { "dataloader": { "batch_size_per_gpu": 32, "workers_per_gpu": 4 }, "metrics": ["accuracy"] } }