From c26fa0e99d15401fed1419dec0f19693d1f72ffc Mon Sep 17 00:00:00 2001 From: YYJ-aaaa <1058387004@qq.com> Date: Fri, 1 Nov 2024 14:56:53 +0800 Subject: [PATCH] first commit --- config.json | 258 +++++++++++++++++++++++++++++++++++++++ model.safetensors | 3 + preprocessor_config.json | 47 +++++++ 3 files changed, 308 insertions(+) create mode 100644 config.json create mode 100644 model.safetensors create mode 100644 preprocessor_config.json diff --git a/config.json b/config.json new file mode 100644 index 0000000..fb8b7fc --- /dev/null +++ b/config.json @@ -0,0 +1,258 @@ +{ + "activation_dropout": 0.0, + "activation_function": "silu", + "anchor_image_size": null, + "architectures": [ + "RTDetrForObjectDetection" + ], + "attention_dropout": 0.0, + "auxiliary_loss": true, + "backbone": null, + "backbone_config": { + "depths": [ + 3, + 4, + 23, + 3 + ], + "model_type": "rt_detr_resnet", + "out_features": [ + "stage2", + "stage3", + "stage4" + ], + "out_indices": [ + 2, + 3, + 4 + ] + }, + "backbone_kwargs": null, + "batch_norm_eps": 1e-05, + "box_noise_scale": 1.0, + "d_model": 256, + "decoder_activation_function": "relu", + "decoder_attention_heads": 8, + "decoder_ffn_dim": 1024, + "decoder_in_channels": [ + 384, + 384, + 384 + ], + "decoder_layers": 6, + "decoder_n_points": 4, + "disable_custom_kernels": true, + "dropout": 0.0, + "encode_proj_layers": [ + 2 + ], + "encoder_activation_function": "gelu", + "encoder_attention_heads": 8, + "encoder_ffn_dim": 2048, + "encoder_hidden_dim": 384, + "encoder_in_channels": [ + 512, + 1024, + 2048 + ], + "encoder_layers": 1, + "eos_coefficient": 0.0001, + "eval_size": null, + "feat_strides": [ + 8, + 16, + 32 + ], + "focal_loss_alpha": 0.75, + "focal_loss_gamma": 2.0, + "hidden_expansion": 1.0, + "id2label": { + "0": "person", + "1": "bicycle", + "2": "car", + "3": "motorbike", + "4": "aeroplane", + "5": "bus", + "6": "train", + "7": "truck", + "8": "boat", + "9": "traffic light", + "10": "fire hydrant", + "11": "stop sign", + "12": "parking meter", + "13": "bench", + "14": "bird", + "15": "cat", + "16": "dog", + "17": "horse", + "18": "sheep", + "19": "cow", + "20": "elephant", + "21": "bear", + "22": "zebra", + "23": "giraffe", + "24": "backpack", + "25": "umbrella", + "26": "handbag", + "27": "tie", + "28": "suitcase", + "29": "frisbee", + "30": "skis", + "31": "snowboard", + "32": "sports ball", + "33": "kite", + "34": "baseball bat", + "35": "baseball glove", + "36": "skateboard", + "37": "surfboard", + "38": "tennis racket", + "39": "bottle", + "40": "wine glass", + "41": "cup", + "42": "fork", + "43": "knife", + "44": "spoon", + "45": "bowl", + "46": "banana", + "47": "apple", + "48": "sandwich", + "49": "orange", + "50": "broccoli", + "51": "carrot", + "52": "hot dog", + "53": "pizza", + "54": "donut", + "55": "cake", + "56": "chair", + "57": "sofa", + "58": "pottedplant", + "59": "bed", + "60": "diningtable", + "61": "toilet", + "62": "tvmonitor", + "63": "laptop", + "64": "mouse", + "65": "remote", + "66": "keyboard", + "67": "cell phone", + "68": "microwave", + "69": "oven", + "70": "toaster", + "71": "sink", + "72": "refrigerator", + "73": "book", + "74": "clock", + "75": "vase", + "76": "scissors", + "77": "teddy bear", + "78": "hair drier", + "79": "toothbrush" + }, + "initializer_range": 0.01, + "is_encoder_decoder": true, + "label2id": { + "aeroplane": 4, + "apple": 47, + "backpack": 24, + "banana": 46, + "baseball bat": 34, + "baseball glove": 35, + "bear": 21, + "bed": 59, + "bench": 13, + "bicycle": 1, + "bird": 14, + "boat": 8, + "book": 73, + "bottle": 39, + "bowl": 45, + "broccoli": 50, + "bus": 5, + "cake": 55, + "car": 2, + "carrot": 51, + "cat": 15, + "cell phone": 67, + "chair": 56, + "clock": 74, + "cow": 19, + "cup": 41, + "diningtable": 60, + "dog": 16, + "donut": 54, + "elephant": 20, + "fire hydrant": 10, + "fork": 42, + "frisbee": 29, + "giraffe": 23, + "hair drier": 78, + "handbag": 26, + "horse": 17, + "hot dog": 52, + "keyboard": 66, + "kite": 33, + "knife": 43, + "laptop": 63, + "microwave": 68, + "motorbike": 3, + "mouse": 64, + "orange": 49, + "oven": 69, + "parking meter": 12, + "person": 0, + "pizza": 53, + "pottedplant": 58, + "refrigerator": 72, + "remote": 65, + "sandwich": 48, + "scissors": 76, + "sheep": 18, + "sink": 71, + "skateboard": 36, + "skis": 30, + "snowboard": 31, + "sofa": 57, + "spoon": 44, + "sports ball": 32, + "stop sign": 11, + "suitcase": 28, + "surfboard": 37, + "teddy bear": 77, + "tennis racket": 38, + "tie": 27, + "toaster": 70, + "toilet": 61, + "toothbrush": 79, + "traffic light": 9, + "train": 6, + "truck": 7, + "tvmonitor": 62, + "umbrella": 25, + "vase": 75, + "wine glass": 40, + "zebra": 22 + }, + "label_noise_ratio": 0.5, + "layer_norm_eps": 1e-05, + "learn_initial_query": false, + "matcher_alpha": 0.25, + "matcher_bbox_cost": 5.0, + "matcher_class_cost": 2.0, + "matcher_gamma": 2.0, + "matcher_giou_cost": 2.0, + "model_type": "rt_detr", + "normalize_before": false, + "num_denoising": 100, + "num_feature_levels": 3, + "num_queries": 300, + "positional_encoding_temperature": 10000, + "torch_dtype": "float32", + "transformers_version": "4.42.0.dev0", + "use_focal_loss": true, + "use_pretrained_backbone": false, + "use_timm_backbone": false, + "weight_loss_bbox": 5.0, + "weight_loss_giou": 2.0, + "weight_loss_vfl": 1.0, + "with_box_refine": true +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..f9cc0cf --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48681f4087d220273d86b052001e19b738d1d1e54f02dfe98d71f4e7957458c5 +size 307331000 diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000..0eaa5c0 --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,47 @@ +{ + "_valid_processor_keys": [ + "images", + "annotations", + "return_segmentation_masks", + "masks_path", + "do_resize", + "size", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "do_convert_annotations", + "image_mean", + "image_std", + "do_pad", + "pad_size", + "format", + "return_tensors", + "data_format", + "input_data_format" + ], + "do_convert_annotations": true, + "do_normalize": false, + "do_pad": false, + "do_rescale": true, + "do_resize": true, + "format": "coco_detection", + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "RTDetrImageProcessor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "pad_size": null, + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 640, + "width": 640 + } +}