diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0348ea9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +checkpoint-*/ \ No newline at end of file diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..426b4ad --- /dev/null +++ b/all_results.json @@ -0,0 +1,13 @@ +{ + "epoch": 4.0, + "eval_accuracy": 0.9572649572649573, + "eval_loss": 0.3159617781639099, + "eval_runtime": 1.9143, + "eval_samples_per_second": 122.237, + "eval_steps_per_second": 7.836, + "total_flos": 7.25481043402752e+17, + "train_loss": 0.6950366432602341, + "train_runtime": 183.7888, + "train_samples_per_second": 50.928, + "train_steps_per_second": 1.611 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..2e26f06 --- /dev/null +++ b/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "google/vit-base-patch16-224-in21k", + "architectures": [ + "ViTForImageClassification" + ], + "attention_probs_dropout_prob": 0.0, + "encoder_stride": 16, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_size": 768, + "id2label": { + "0": "A", + "1": "B", + "10": "K", + "11": "L", + "12": "M", + "13": "N", + "14": "O", + "15": "P", + "16": "Q", + "17": "R", + "18": "S", + "19": "T", + "2": "C", + "20": "U", + "21": "V", + "22": "W", + "23": "X", + "24": "Y", + "25": "Z", + "3": "D", + "4": "E", + "5": "F", + "6": "G", + "7": "H", + "8": "I", + "9": "J" + }, + "image_size": 224, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "A": "0", + "B": "1", + "C": "2", + "D": "3", + "E": "4", + "F": "5", + "G": "6", + "H": "7", + "I": "8", + "J": "9", + "K": "10", + "L": "11", + "M": "12", + "N": "13", + "O": "14", + "P": "15", + "Q": "16", + "R": "17", + "S": "18", + "T": "19", + "U": "20", + "V": "21", + "W": "22", + "X": "23", + "Y": "24", + "Z": "25" + }, + "layer_norm_eps": 1e-12, + "model_type": "vit", + "num_attention_heads": 12, + "num_channels": 3, + "num_hidden_layers": 12, + "patch_size": 16, + "problem_type": "single_label_classification", + "qkv_bias": true, + "torch_dtype": "float32", + "transformers_version": "4.26.1" +} diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000..02018de --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,22 @@ +{ + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.5, + 0.5, + 0.5 + ], + "image_processor_type": "ViTFeatureExtractor", + "image_std": [ + 0.5, + 0.5, + 0.5 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 224, + "width": 224 + } +} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000..f3a3855 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9fae575c53d11abf9dd427c22b78f34a1e14791e41c6ce1f3bc54f5b8a29408 +size 343342509 diff --git a/test_results.json b/test_results.json new file mode 100644 index 0000000..7cc059d --- /dev/null +++ b/test_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 4.0, + "eval_accuracy": 0.9572649572649573, + "eval_loss": 0.3159617781639099, + "eval_runtime": 1.9143, + "eval_samples_per_second": 122.237, + "eval_steps_per_second": 7.836 +} \ No newline at end of file diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..39392cd --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 4.0, + "total_flos": 7.25481043402752e+17, + "train_loss": 0.6950366432602341, + "train_runtime": 183.7888, + "train_samples_per_second": 50.928, + "train_steps_per_second": 1.611 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..17ad5b6 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": 0.3159617781639099, + "best_model_checkpoint": "./vit-base-uppercase-english-characters/checkpoint-200", + "epoch": 4.0, + "global_step": 296, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.14, + "learning_rate": 0.00019324324324324326, + "loss": 3.1876, + "step": 10 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001864864864864865, + "loss": 2.7336, + "step": 20 + }, + { + "epoch": 0.41, + "learning_rate": 0.00017972972972972974, + "loss": 2.2045, + "step": 30 + }, + { + "epoch": 0.54, + "learning_rate": 0.000172972972972973, + "loss": 1.7453, + "step": 40 + }, + { + "epoch": 0.68, + "learning_rate": 0.00016621621621621622, + "loss": 1.4371, + "step": 50 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015945945945945947, + "loss": 1.1114, + "step": 60 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001527027027027027, + "loss": 0.9965, + "step": 70 + }, + { + "epoch": 1.08, + "learning_rate": 0.00014594594594594595, + "loss": 0.7593, + "step": 80 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001391891891891892, + "loss": 0.6622, + "step": 90 + }, + { + "epoch": 1.35, + "learning_rate": 0.00013243243243243243, + "loss": 0.5944, + "step": 100 + }, + { + "epoch": 1.35, + "eval_accuracy": 0.9487179487179487, + "eval_loss": 0.5538277626037598, + "eval_runtime": 1.9221, + "eval_samples_per_second": 121.742, + "eval_steps_per_second": 7.804, + "step": 100 + }, + { + "epoch": 1.49, + "learning_rate": 0.00012567567567567568, + "loss": 0.4882, + "step": 110 + }, + { + "epoch": 1.62, + "learning_rate": 0.00011891891891891893, + "loss": 0.46, + "step": 120 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011216216216216217, + "loss": 0.4453, + "step": 130 + }, + { + "epoch": 1.89, + "learning_rate": 0.0001054054054054054, + "loss": 0.4454, + "step": 140 + }, + { + "epoch": 2.03, + "learning_rate": 9.864864864864865e-05, + "loss": 0.3618, + "step": 150 + }, + { + "epoch": 2.16, + "learning_rate": 9.18918918918919e-05, + "loss": 0.3138, + "step": 160 + }, + { + "epoch": 2.3, + "learning_rate": 8.513513513513514e-05, + "loss": 0.2605, + "step": 170 + }, + { + "epoch": 2.43, + "learning_rate": 7.837837837837838e-05, + "loss": 0.292, + "step": 180 + }, + { + "epoch": 2.57, + "learning_rate": 7.162162162162162e-05, + "loss": 0.2502, + "step": 190 + }, + { + "epoch": 2.7, + "learning_rate": 6.486486486486487e-05, + "loss": 0.2241, + "step": 200 + }, + { + "epoch": 2.7, + "eval_accuracy": 0.9572649572649573, + "eval_loss": 0.3159617781639099, + "eval_runtime": 1.8767, + "eval_samples_per_second": 124.688, + "eval_steps_per_second": 7.993, + "step": 200 + }, + { + "epoch": 2.84, + "learning_rate": 5.8108108108108105e-05, + "loss": 0.2273, + "step": 210 + }, + { + "epoch": 2.97, + "learning_rate": 5.135135135135135e-05, + "loss": 0.2002, + "step": 220 + }, + { + "epoch": 3.11, + "learning_rate": 4.4594594594594596e-05, + "loss": 0.163, + "step": 230 + }, + { + "epoch": 3.24, + "learning_rate": 3.783783783783784e-05, + "loss": 0.1654, + "step": 240 + }, + { + "epoch": 3.38, + "learning_rate": 3.108108108108108e-05, + "loss": 0.1569, + "step": 250 + }, + { + "epoch": 3.51, + "learning_rate": 2.4324324324324327e-05, + "loss": 0.1507, + "step": 260 + }, + { + "epoch": 3.65, + "learning_rate": 1.756756756756757e-05, + "loss": 0.1543, + "step": 270 + }, + { + "epoch": 3.78, + "learning_rate": 1.0810810810810812e-05, + "loss": 0.158, + "step": 280 + }, + { + "epoch": 3.92, + "learning_rate": 4.0540540540540545e-06, + "loss": 0.14, + "step": 290 + }, + { + "epoch": 4.0, + "step": 296, + "total_flos": 7.25481043402752e+17, + "train_loss": 0.6950366432602341, + "train_runtime": 183.7888, + "train_samples_per_second": 50.928, + "train_steps_per_second": 1.611 + } + ], + "max_steps": 296, + "num_train_epochs": 4, + "total_flos": 7.25481043402752e+17, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..c773d65 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48c5d19c8a5fa8d5238b9556ad6bcb94e473dd14dcdb31d0b3eb225eca60817a +size 3515