NextViT_image_classification/configuration.json

{
    "framework":"pytorch",
    "task":"image-classification",

    "pipeline":{
        "type":"nextvit-small_image-classification_Dailylife-labels"
    },

    "model":{
        "type": "ClassificationModel",
        "mm_model": {
            "type": "ImageClassifier",
            "pretrained": null,
            "backbone": {
                "type": "NextViT",
                "arch": "small",
                "path_dropout": 0.2
            },
            "neck": {
                "type": "GlobalAveragePooling"
            },
            "head": {
                "type": "LinearClsHead",
                "num_classes": 1296,
                "in_channels": 1024,
                "loss": {
                    "type": "CrossEntropyLoss",
                    "loss_weight": 1.0
                },
                "init_cfg": [{
                    "type": "TruncNormal",
                    "layer": "Linear",
                    "std": 0.02,
                    "bias": 0.0
                }]
            }
        }
    },

    "dataset": {
        "classes": null
    },

    "preprocessor": {
        "type": "image-classification-bypass-preprocessor",
        "train": [
            {
                "type": "LoadImageFromFile"
            },
            {
                "type": "RandomResizedCrop",
                "size": 224,
                "backend": "pillow",
                "interpolation": "bicubic"
            },
            {
                "type": "RandomFlip",
                "flip_prob": 0.5,
                "direction": "horizontal"
            },
            {
                "type": "RandAugment",
                "policies": [
                    {
                        "type": "AutoContrast"
                    },
                    {
                        "type": "Equalize"
                    },
                    {
                        "type": "Invert"
                    },
                    {
                        "type": "Rotate",
                        "magnitude_key": "angle",
                        "magnitude_range": [0, 30]

                    },
                    {
                        "type": "Posterize",
                        "magnitude_key": "bits",
                        "magnitude_range": [4, 0]

                    },
                    {
                        "type": "Solarize",
                        "magnitude_key": "thr",
                        "magnitude_range": [256, 0]

                    },
                    {
                        "type": "SolarizeAdd",
                        "magnitude_key": "magnitude",
                        "magnitude_range": [0, 110]

                    },
                    {
                        "type": "ColorTransform",
                        "magnitude_key": "magnitude",
                        "magnitude_range": [0, 0.9]

                    },
                    {
                        "type": "Contrast",
                        "magnitude_key": "magnitude",
                        "magnitude_range": [0, 0.9]

                    },
                    {
                        "type": "Brightness",
                        "magnitude_key": "magnitude",
                        "magnitude_range": [0, 0.9]

                    },
                    {
                        "type": "Sharpness",
                        "magnitude_key": "magnitude",
                        "magnitude_range": [0, 0.9]

                    },
                    {
                        "type": "Shear",
                        "magnitude_key": "magnitude",
                        "magnitude_range": [0, 0.3],
                        "direction": "horizontal"

                    },
                    {
                        "type": "Shear",
                        "magnitude_key": "magnitude",
                        "magnitude_range": [0, 0.3],
                        "direction": "vertical"

                    },
                    {
                        "type": "Translate",
                        "magnitude_key": "magnitude",
                        "magnitude_range": [0, 0.45],
                        "direction": "horizontal"

                    },
                    {
                        "type": "Translate",
                        "magnitude_key": "magnitude",
                        "magnitude_range": [0, 0.45],
                        "direction": "vertical"

                    }
                ],
                "num_policies": 2,
                "total_level": 10,
                "magnitude_level": 9,
                "magnitude_std": 0.5,
                "hparams": {
                    "pad_val": [104, 116, 124],
                    "interpolation": "bicubic"
                }
            },
            {
                "type": "RandomErasing",
                "erase_prob": 0.25,
                "mode": "rand",
                "min_area_ratio": 0.02,
                "max_area_ratio": 0.3333333333333333,
                "fill_color": [103.53, 116.28, 123.675],
                "fill_std": [57.375, 57.12, 58.395]
            },
            {
                "type": "Normalize",
                "mean": [123.675, 116.28, 103.53],
                "std": [58.395, 57.12, 57.375],
                "to_rgb": true
            },
            {
                "type": "ImageToTensor",
                "keys": ["img"]
            },
            {
                "type": "ToTensor",
                "keys": ["gt_label"]
            },
            {
                "type": "Collect",
                "keys": ["img", "gt_label"]
            }
        ],

        "val": [
            {
                "type": "LoadImageFromFile"
            },
            {
                "type": "Resize",
                "size": [256, -1],
                "backend": "pillow",
                "interpolation": "bicubic"
            },
            {
                "type": "CenterCrop",
                "crop_size": 224
            },
            {
                "type": "Normalize",
                "mean": [123.675, 116.28, 103.53],
                "std": [58.395, 57.12, 57.375],
                "to_rgb": true
            },
            {
                "type": "ImageToTensor",
                "keys": ["img"]
            },
            {
                "type": "Collect",
                "keys": ["img"]
            }
        ]
    },

    "train": {
        "dataloader": {
            "batch_size_per_gpu": 32,
            "workers_per_gpu": 4
        },
        "max_epochs": 1,
        "runner": {
            "type": "EpochBasedRunner",
            "max_epochs": 300
        },
        "evaluation": {
            "interval": 1,
            "metric": "accuracy",
            "save_best": "auto"
        },
        "checkpoint_config": {
            "interval": 1,
            "max_keep_ckpts": 20,
            "create_symlink": true
        },
        "log_config": {
            "interval": 100,
            "hooks": [
                {
                    "type": "TextLoggerHook"
                }
            ]
        },
        "custom_hooks": [
            {
                "type": "EMAHook",
                "momentum": 4e-05,
                "priority": "ABOVE_NORMAL"
            }
        ],
        "workflow": [
            ["train", 1]
        ],
        "work_dir": "./work_dir",
        "optimizer": {
            "type": "AdamW",
            "lr": 0.001,
            "weight_decay": 0.1,
            "eps": 1e-08,
            "betas": [0.9, 0.999],
            "paramwise_cfg": {
                "norm_decay_mult": 0.0,
                "bias_decay_mult": 0.0,
                "custom_keys": {
                    ".cls_token": {
                        "decay_mult": 0.0
                    },
                    ".pos_embed": {
                        "decay_mult": 0.0
                    }
                }
            }
        },
        "optimizer_config": {
            "grad_clip": {
                "max_norm": 5.0
            }
        },
        "lr_config": {
            "policy": "CosineAnnealing",
            "by_epoch": false,
            "min_lr_ratio": 0.01,
            "warmup": "linear",
            "warmup_ratio": 0.001,
            "warmup_iters": 20,
            "warmup_by_epoch": true
        }
    },

    "evaluation": {
        "dataloader": {
            "batch_size_per_gpu": 32,
            "workers_per_gpu": 4
        },
        "metrics": ["accuracy"]
    }
}