diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..acddab3 --- /dev/null +++ b/all_results.json @@ -0,0 +1,12 @@ +{ + "epoch": 5.0, + "eval_accuracy": 0.9922928709055877, + "eval_loss": 0.03933868557214737, + "eval_runtime": 6.4711, + "eval_samples_per_second": 80.203, + "eval_steps_per_second": 10.045, + "train_loss": 0.10440107471431079, + "train_runtime": 430.0921, + "train_samples_per_second": 34.167, + "train_steps_per_second": 4.278 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..04ccf89 --- /dev/null +++ b/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "google/vit-base-patch16-224-in21k", + "architectures": [ + "ViTForImageClassification" + ], + "attention_probs_dropout_prob": 0.0, + "encoder_stride": 16, + "finetuning_task": "image-classification", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_size": 768, + "id2label": { + "0": "no", + "1": "yes" + }, + "image_size": 224, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "no": "0", + "yes": "1" + }, + "layer_norm_eps": 1e-12, + "model_type": "vit", + "num_attention_heads": 12, + "num_channels": 3, + "num_hidden_layers": 12, + "patch_size": 16, + "problem_type": "single_label_classification", + "qkv_bias": true, + "torch_dtype": "float32", + "transformers_version": "4.36.0.dev0" +} diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000..7536ae6 --- /dev/null +++ b/eval_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 5.0, + "eval_accuracy": 0.9922928709055877, + "eval_loss": 0.03933868557214737, + "eval_runtime": 6.4711, + "eval_samples_per_second": 80.203, + "eval_steps_per_second": 10.045 +} \ No newline at end of file diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..a803f35 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eec7fd63f2d68e64acfd7da1e395d440b46e493d3ca9e9d563225cfaffd81db +size 343223968 diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000..273d249 --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,22 @@ +{ + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.5, + 0.5, + 0.5 + ], + "image_processor_type": "ViTImageProcessor", + "image_std": [ + 0.5, + 0.5, + 0.5 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 224, + "width": 224 + } +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..64edcae --- /dev/null +++ b/train_results.json @@ -0,0 +1,7 @@ +{ + "epoch": 5.0, + "train_loss": 0.10440107471431079, + "train_runtime": 430.0921, + "train_samples_per_second": 34.167, + "train_steps_per_second": 4.278 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..1d7c24b --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,1178 @@ +{ + "best_metric": 0.03933868557214737, + "best_model_checkpoint": "./rorshark_outputs/checkpoint-1840", + "epoch": 5.0, + "eval_steps": 500, + "global_step": 1840, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 1.989130434782609e-05, + "loss": 0.5675, + "step": 10 + }, + { + "epoch": 0.05, + "learning_rate": 1.9782608695652176e-05, + "loss": 0.4112, + "step": 20 + }, + { + "epoch": 0.08, + "learning_rate": 1.9673913043478263e-05, + "loss": 0.3002, + "step": 30 + }, + { + "epoch": 0.11, + "learning_rate": 1.956521739130435e-05, + "loss": 0.3774, + "step": 40 + }, + { + "epoch": 0.14, + "learning_rate": 1.9456521739130436e-05, + "loss": 0.3295, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 1.9347826086956523e-05, + "loss": 0.3067, + "step": 60 + }, + { + "epoch": 0.19, + "learning_rate": 1.923913043478261e-05, + "loss": 0.2457, + "step": 70 + }, + { + "epoch": 0.22, + "learning_rate": 1.9130434782608697e-05, + "loss": 0.3318, + "step": 80 + }, + { + "epoch": 0.24, + "learning_rate": 1.9021739130434784e-05, + "loss": 0.1932, + "step": 90 + }, + { + "epoch": 0.27, + "learning_rate": 1.891304347826087e-05, + "loss": 0.1948, + "step": 100 + }, + { + "epoch": 0.3, + "learning_rate": 1.8804347826086958e-05, + "loss": 0.2475, + "step": 110 + }, + { + "epoch": 0.33, + "learning_rate": 1.8695652173913045e-05, + "loss": 0.1432, + "step": 120 + }, + { + "epoch": 0.35, + "learning_rate": 1.8586956521739132e-05, + "loss": 0.2069, + "step": 130 + }, + { + "epoch": 0.38, + "learning_rate": 1.847826086956522e-05, + "loss": 0.1986, + "step": 140 + }, + { + "epoch": 0.41, + "learning_rate": 1.8369565217391306e-05, + "loss": 0.2156, + "step": 150 + }, + { + "epoch": 0.43, + "learning_rate": 1.8260869565217393e-05, + "loss": 0.1187, + "step": 160 + }, + { + "epoch": 0.46, + "learning_rate": 1.815217391304348e-05, + "loss": 0.1192, + "step": 170 + }, + { + "epoch": 0.49, + "learning_rate": 1.8043478260869567e-05, + "loss": 0.1748, + "step": 180 + }, + { + "epoch": 0.52, + "learning_rate": 1.7934782608695654e-05, + "loss": 0.0779, + "step": 190 + }, + { + "epoch": 0.54, + "learning_rate": 1.782608695652174e-05, + "loss": 0.1075, + "step": 200 + }, + { + "epoch": 0.57, + "learning_rate": 1.7717391304347828e-05, + "loss": 0.1298, + "step": 210 + }, + { + "epoch": 0.6, + "learning_rate": 1.7608695652173915e-05, + "loss": 0.0728, + "step": 220 + }, + { + "epoch": 0.62, + "learning_rate": 1.7500000000000002e-05, + "loss": 0.1189, + "step": 230 + }, + { + "epoch": 0.65, + "learning_rate": 1.739130434782609e-05, + "loss": 0.1102, + "step": 240 + }, + { + "epoch": 0.68, + "learning_rate": 1.7282608695652176e-05, + "loss": 0.1183, + "step": 250 + }, + { + "epoch": 0.71, + "learning_rate": 1.7173913043478263e-05, + "loss": 0.3006, + "step": 260 + }, + { + "epoch": 0.73, + "learning_rate": 1.706521739130435e-05, + "loss": 0.1408, + "step": 270 + }, + { + "epoch": 0.76, + "learning_rate": 1.6956521739130437e-05, + "loss": 0.141, + "step": 280 + }, + { + "epoch": 0.79, + "learning_rate": 1.6847826086956524e-05, + "loss": 0.1208, + "step": 290 + }, + { + "epoch": 0.82, + "learning_rate": 1.673913043478261e-05, + "loss": 0.1004, + "step": 300 + }, + { + "epoch": 0.84, + "learning_rate": 1.6630434782608698e-05, + "loss": 0.206, + "step": 310 + }, + { + "epoch": 0.87, + "learning_rate": 1.6521739130434785e-05, + "loss": 0.12, + "step": 320 + }, + { + "epoch": 0.9, + "learning_rate": 1.641304347826087e-05, + "loss": 0.0705, + "step": 330 + }, + { + "epoch": 0.92, + "learning_rate": 1.630434782608696e-05, + "loss": 0.1018, + "step": 340 + }, + { + "epoch": 0.95, + "learning_rate": 1.6195652173913045e-05, + "loss": 0.1501, + "step": 350 + }, + { + "epoch": 0.98, + "learning_rate": 1.6086956521739132e-05, + "loss": 0.0597, + "step": 360 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9865125240847784, + "eval_loss": 0.05456383526325226, + "eval_runtime": 6.5116, + "eval_samples_per_second": 79.704, + "eval_steps_per_second": 9.982, + "step": 368 + }, + { + "epoch": 1.01, + "learning_rate": 1.597826086956522e-05, + "loss": 0.0878, + "step": 370 + }, + { + "epoch": 1.03, + "learning_rate": 1.5869565217391306e-05, + "loss": 0.1651, + "step": 380 + }, + { + "epoch": 1.06, + "learning_rate": 1.576086956521739e-05, + "loss": 0.0645, + "step": 390 + }, + { + "epoch": 1.09, + "learning_rate": 1.565217391304348e-05, + "loss": 0.1085, + "step": 400 + }, + { + "epoch": 1.11, + "learning_rate": 1.5543478260869567e-05, + "loss": 0.0967, + "step": 410 + }, + { + "epoch": 1.14, + "learning_rate": 1.5434782608695654e-05, + "loss": 0.1178, + "step": 420 + }, + { + "epoch": 1.17, + "learning_rate": 1.532608695652174e-05, + "loss": 0.0605, + "step": 430 + }, + { + "epoch": 1.2, + "learning_rate": 1.5217391304347828e-05, + "loss": 0.1394, + "step": 440 + }, + { + "epoch": 1.22, + "learning_rate": 1.5108695652173915e-05, + "loss": 0.1113, + "step": 450 + }, + { + "epoch": 1.25, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.0225, + "step": 460 + }, + { + "epoch": 1.28, + "learning_rate": 1.4891304347826087e-05, + "loss": 0.1861, + "step": 470 + }, + { + "epoch": 1.3, + "learning_rate": 1.4782608695652174e-05, + "loss": 0.0879, + "step": 480 + }, + { + "epoch": 1.33, + "learning_rate": 1.4673913043478263e-05, + "loss": 0.094, + "step": 490 + }, + { + "epoch": 1.36, + "learning_rate": 1.456521739130435e-05, + "loss": 0.1837, + "step": 500 + }, + { + "epoch": 1.39, + "learning_rate": 1.4456521739130435e-05, + "loss": 0.057, + "step": 510 + }, + { + "epoch": 1.41, + "learning_rate": 1.4347826086956522e-05, + "loss": 0.0504, + "step": 520 + }, + { + "epoch": 1.44, + "learning_rate": 1.423913043478261e-05, + "loss": 0.03, + "step": 530 + }, + { + "epoch": 1.47, + "learning_rate": 1.4130434782608698e-05, + "loss": 0.0637, + "step": 540 + }, + { + "epoch": 1.49, + "learning_rate": 1.4021739130434783e-05, + "loss": 0.1572, + "step": 550 + }, + { + "epoch": 1.52, + "learning_rate": 1.391304347826087e-05, + "loss": 0.2074, + "step": 560 + }, + { + "epoch": 1.55, + "learning_rate": 1.3804347826086957e-05, + "loss": 0.1031, + "step": 570 + }, + { + "epoch": 1.58, + "learning_rate": 1.3695652173913046e-05, + "loss": 0.075, + "step": 580 + }, + { + "epoch": 1.6, + "learning_rate": 1.3586956521739133e-05, + "loss": 0.0854, + "step": 590 + }, + { + "epoch": 1.63, + "learning_rate": 1.3478260869565218e-05, + "loss": 0.0897, + "step": 600 + }, + { + "epoch": 1.66, + "learning_rate": 1.3369565217391305e-05, + "loss": 0.1017, + "step": 610 + }, + { + "epoch": 1.68, + "learning_rate": 1.3260869565217392e-05, + "loss": 0.132, + "step": 620 + }, + { + "epoch": 1.71, + "learning_rate": 1.315217391304348e-05, + "loss": 0.0471, + "step": 630 + }, + { + "epoch": 1.74, + "learning_rate": 1.3043478260869566e-05, + "loss": 0.0707, + "step": 640 + }, + { + "epoch": 1.77, + "learning_rate": 1.2934782608695653e-05, + "loss": 0.0506, + "step": 650 + }, + { + "epoch": 1.79, + "learning_rate": 1.282608695652174e-05, + "loss": 0.1308, + "step": 660 + }, + { + "epoch": 1.82, + "learning_rate": 1.2717391304347828e-05, + "loss": 0.1188, + "step": 670 + }, + { + "epoch": 1.85, + "learning_rate": 1.2608695652173915e-05, + "loss": 0.1021, + "step": 680 + }, + { + "epoch": 1.88, + "learning_rate": 1.25e-05, + "loss": 0.1199, + "step": 690 + }, + { + "epoch": 1.9, + "learning_rate": 1.2391304347826088e-05, + "loss": 0.1068, + "step": 700 + }, + { + "epoch": 1.93, + "learning_rate": 1.2282608695652175e-05, + "loss": 0.0535, + "step": 710 + }, + { + "epoch": 1.96, + "learning_rate": 1.2173913043478263e-05, + "loss": 0.0723, + "step": 720 + }, + { + "epoch": 1.98, + "learning_rate": 1.2065217391304348e-05, + "loss": 0.2009, + "step": 730 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9865125240847784, + "eval_loss": 0.05307452380657196, + "eval_runtime": 6.4841, + "eval_samples_per_second": 80.043, + "eval_steps_per_second": 10.025, + "step": 736 + }, + { + "epoch": 2.01, + "learning_rate": 1.1956521739130435e-05, + "loss": 0.0156, + "step": 740 + }, + { + "epoch": 2.04, + "learning_rate": 1.1847826086956522e-05, + "loss": 0.169, + "step": 750 + }, + { + "epoch": 2.07, + "learning_rate": 1.1739130434782611e-05, + "loss": 0.0866, + "step": 760 + }, + { + "epoch": 2.09, + "learning_rate": 1.1630434782608698e-05, + "loss": 0.0973, + "step": 770 + }, + { + "epoch": 2.12, + "learning_rate": 1.1521739130434783e-05, + "loss": 0.0427, + "step": 780 + }, + { + "epoch": 2.15, + "learning_rate": 1.141304347826087e-05, + "loss": 0.1296, + "step": 790 + }, + { + "epoch": 2.17, + "learning_rate": 1.1304347826086957e-05, + "loss": 0.0265, + "step": 800 + }, + { + "epoch": 2.2, + "learning_rate": 1.1195652173913046e-05, + "loss": 0.1574, + "step": 810 + }, + { + "epoch": 2.23, + "learning_rate": 1.1086956521739131e-05, + "loss": 0.0655, + "step": 820 + }, + { + "epoch": 2.26, + "learning_rate": 1.0978260869565218e-05, + "loss": 0.0785, + "step": 830 + }, + { + "epoch": 2.28, + "learning_rate": 1.0869565217391305e-05, + "loss": 0.1273, + "step": 840 + }, + { + "epoch": 2.31, + "learning_rate": 1.076086956521739e-05, + "loss": 0.0374, + "step": 850 + }, + { + "epoch": 2.34, + "learning_rate": 1.0652173913043479e-05, + "loss": 0.2576, + "step": 860 + }, + { + "epoch": 2.36, + "learning_rate": 1.0543478260869566e-05, + "loss": 0.0417, + "step": 870 + }, + { + "epoch": 2.39, + "learning_rate": 1.0434782608695653e-05, + "loss": 0.115, + "step": 880 + }, + { + "epoch": 2.42, + "learning_rate": 1.032608695652174e-05, + "loss": 0.105, + "step": 890 + }, + { + "epoch": 2.45, + "learning_rate": 1.0217391304347829e-05, + "loss": 0.1704, + "step": 900 + }, + { + "epoch": 2.47, + "learning_rate": 1.0108695652173914e-05, + "loss": 0.0442, + "step": 910 + }, + { + "epoch": 2.5, + "learning_rate": 1e-05, + "loss": 0.079, + "step": 920 + }, + { + "epoch": 2.53, + "learning_rate": 9.891304347826088e-06, + "loss": 0.0214, + "step": 930 + }, + { + "epoch": 2.55, + "learning_rate": 9.782608695652175e-06, + "loss": 0.112, + "step": 940 + }, + { + "epoch": 2.58, + "learning_rate": 9.673913043478262e-06, + "loss": 0.0467, + "step": 950 + }, + { + "epoch": 2.61, + "learning_rate": 9.565217391304349e-06, + "loss": 0.0944, + "step": 960 + }, + { + "epoch": 2.64, + "learning_rate": 9.456521739130436e-06, + "loss": 0.0195, + "step": 970 + }, + { + "epoch": 2.66, + "learning_rate": 9.347826086956523e-06, + "loss": 0.1084, + "step": 980 + }, + { + "epoch": 2.69, + "learning_rate": 9.23913043478261e-06, + "loss": 0.0598, + "step": 990 + }, + { + "epoch": 2.72, + "learning_rate": 9.130434782608697e-06, + "loss": 0.0563, + "step": 1000 + }, + { + "epoch": 2.74, + "learning_rate": 9.021739130434784e-06, + "loss": 0.1212, + "step": 1010 + }, + { + "epoch": 2.77, + "learning_rate": 8.91304347826087e-06, + "loss": 0.103, + "step": 1020 + }, + { + "epoch": 2.8, + "learning_rate": 8.804347826086957e-06, + "loss": 0.0708, + "step": 1030 + }, + { + "epoch": 2.83, + "learning_rate": 8.695652173913044e-06, + "loss": 0.0639, + "step": 1040 + }, + { + "epoch": 2.85, + "learning_rate": 8.586956521739131e-06, + "loss": 0.0153, + "step": 1050 + }, + { + "epoch": 2.88, + "learning_rate": 8.478260869565218e-06, + "loss": 0.028, + "step": 1060 + }, + { + "epoch": 2.91, + "learning_rate": 8.369565217391305e-06, + "loss": 0.029, + "step": 1070 + }, + { + "epoch": 2.93, + "learning_rate": 8.260869565217392e-06, + "loss": 0.0915, + "step": 1080 + }, + { + "epoch": 2.96, + "learning_rate": 8.15217391304348e-06, + "loss": 0.0186, + "step": 1090 + }, + { + "epoch": 2.99, + "learning_rate": 8.043478260869566e-06, + "loss": 0.0114, + "step": 1100 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9903660886319846, + "eval_loss": 0.04182479530572891, + "eval_runtime": 6.3668, + "eval_samples_per_second": 81.517, + "eval_steps_per_second": 10.209, + "step": 1104 + }, + { + "epoch": 3.02, + "learning_rate": 7.934782608695653e-06, + "loss": 0.2106, + "step": 1110 + }, + { + "epoch": 3.04, + "learning_rate": 7.82608695652174e-06, + "loss": 0.0515, + "step": 1120 + }, + { + "epoch": 3.07, + "learning_rate": 7.717391304347827e-06, + "loss": 0.0406, + "step": 1130 + }, + { + "epoch": 3.1, + "learning_rate": 7.608695652173914e-06, + "loss": 0.0355, + "step": 1140 + }, + { + "epoch": 3.12, + "learning_rate": 7.500000000000001e-06, + "loss": 0.1842, + "step": 1150 + }, + { + "epoch": 3.15, + "learning_rate": 7.391304347826087e-06, + "loss": 0.0545, + "step": 1160 + }, + { + "epoch": 3.18, + "learning_rate": 7.282608695652175e-06, + "loss": 0.1349, + "step": 1170 + }, + { + "epoch": 3.21, + "learning_rate": 7.173913043478261e-06, + "loss": 0.0104, + "step": 1180 + }, + { + "epoch": 3.23, + "learning_rate": 7.065217391304349e-06, + "loss": 0.1324, + "step": 1190 + }, + { + "epoch": 3.26, + "learning_rate": 6.956521739130435e-06, + "loss": 0.0934, + "step": 1200 + }, + { + "epoch": 3.29, + "learning_rate": 6.847826086956523e-06, + "loss": 0.0966, + "step": 1210 + }, + { + "epoch": 3.32, + "learning_rate": 6.739130434782609e-06, + "loss": 0.0588, + "step": 1220 + }, + { + "epoch": 3.34, + "learning_rate": 6.630434782608696e-06, + "loss": 0.0802, + "step": 1230 + }, + { + "epoch": 3.37, + "learning_rate": 6.521739130434783e-06, + "loss": 0.0576, + "step": 1240 + }, + { + "epoch": 3.4, + "learning_rate": 6.41304347826087e-06, + "loss": 0.0419, + "step": 1250 + }, + { + "epoch": 3.42, + "learning_rate": 6.304347826086958e-06, + "loss": 0.0481, + "step": 1260 + }, + { + "epoch": 3.45, + "learning_rate": 6.195652173913044e-06, + "loss": 0.0861, + "step": 1270 + }, + { + "epoch": 3.48, + "learning_rate": 6.086956521739132e-06, + "loss": 0.1023, + "step": 1280 + }, + { + "epoch": 3.51, + "learning_rate": 5.978260869565218e-06, + "loss": 0.0584, + "step": 1290 + }, + { + "epoch": 3.53, + "learning_rate": 5.8695652173913055e-06, + "loss": 0.1282, + "step": 1300 + }, + { + "epoch": 3.56, + "learning_rate": 5.760869565217392e-06, + "loss": 0.0277, + "step": 1310 + }, + { + "epoch": 3.59, + "learning_rate": 5.652173913043479e-06, + "loss": 0.1837, + "step": 1320 + }, + { + "epoch": 3.61, + "learning_rate": 5.543478260869566e-06, + "loss": 0.0264, + "step": 1330 + }, + { + "epoch": 3.64, + "learning_rate": 5.4347826086956525e-06, + "loss": 0.1224, + "step": 1340 + }, + { + "epoch": 3.67, + "learning_rate": 5.3260869565217395e-06, + "loss": 0.0434, + "step": 1350 + }, + { + "epoch": 3.7, + "learning_rate": 5.2173913043478265e-06, + "loss": 0.1337, + "step": 1360 + }, + { + "epoch": 3.72, + "learning_rate": 5.108695652173914e-06, + "loss": 0.0071, + "step": 1370 + }, + { + "epoch": 3.75, + "learning_rate": 5e-06, + "loss": 0.0568, + "step": 1380 + }, + { + "epoch": 3.78, + "learning_rate": 4.891304347826087e-06, + "loss": 0.043, + "step": 1390 + }, + { + "epoch": 3.8, + "learning_rate": 4.782608695652174e-06, + "loss": 0.0719, + "step": 1400 + }, + { + "epoch": 3.83, + "learning_rate": 4.673913043478261e-06, + "loss": 0.1128, + "step": 1410 + }, + { + "epoch": 3.86, + "learning_rate": 4.565217391304348e-06, + "loss": 0.0477, + "step": 1420 + }, + { + "epoch": 3.89, + "learning_rate": 4.456521739130435e-06, + "loss": 0.0791, + "step": 1430 + }, + { + "epoch": 3.91, + "learning_rate": 4.347826086956522e-06, + "loss": 0.037, + "step": 1440 + }, + { + "epoch": 3.94, + "learning_rate": 4.239130434782609e-06, + "loss": 0.1466, + "step": 1450 + }, + { + "epoch": 3.97, + "learning_rate": 4.130434782608696e-06, + "loss": 0.0467, + "step": 1460 + }, + { + "epoch": 3.99, + "learning_rate": 4.021739130434783e-06, + "loss": 0.0998, + "step": 1470 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9903660886319846, + "eval_loss": 0.04251210391521454, + "eval_runtime": 6.5268, + "eval_samples_per_second": 79.518, + "eval_steps_per_second": 9.959, + "step": 1472 + }, + { + "epoch": 4.02, + "learning_rate": 3.91304347826087e-06, + "loss": 0.1285, + "step": 1480 + }, + { + "epoch": 4.05, + "learning_rate": 3.804347826086957e-06, + "loss": 0.1634, + "step": 1490 + }, + { + "epoch": 4.08, + "learning_rate": 3.6956521739130436e-06, + "loss": 0.0462, + "step": 1500 + }, + { + "epoch": 4.1, + "learning_rate": 3.5869565217391305e-06, + "loss": 0.0846, + "step": 1510 + }, + { + "epoch": 4.13, + "learning_rate": 3.4782608695652175e-06, + "loss": 0.1239, + "step": 1520 + }, + { + "epoch": 4.16, + "learning_rate": 3.3695652173913045e-06, + "loss": 0.1818, + "step": 1530 + }, + { + "epoch": 4.18, + "learning_rate": 3.2608695652173914e-06, + "loss": 0.021, + "step": 1540 + }, + { + "epoch": 4.21, + "learning_rate": 3.152173913043479e-06, + "loss": 0.0741, + "step": 1550 + }, + { + "epoch": 4.24, + "learning_rate": 3.043478260869566e-06, + "loss": 0.182, + "step": 1560 + }, + { + "epoch": 4.27, + "learning_rate": 2.9347826086956528e-06, + "loss": 0.0433, + "step": 1570 + }, + { + "epoch": 4.29, + "learning_rate": 2.8260869565217393e-06, + "loss": 0.0437, + "step": 1580 + }, + { + "epoch": 4.32, + "learning_rate": 2.7173913043478263e-06, + "loss": 0.0382, + "step": 1590 + }, + { + "epoch": 4.35, + "learning_rate": 2.6086956521739132e-06, + "loss": 0.046, + "step": 1600 + }, + { + "epoch": 4.38, + "learning_rate": 2.5e-06, + "loss": 0.0213, + "step": 1610 + }, + { + "epoch": 4.4, + "learning_rate": 2.391304347826087e-06, + "loss": 0.0186, + "step": 1620 + }, + { + "epoch": 4.43, + "learning_rate": 2.282608695652174e-06, + "loss": 0.0671, + "step": 1630 + }, + { + "epoch": 4.46, + "learning_rate": 2.173913043478261e-06, + "loss": 0.0908, + "step": 1640 + }, + { + "epoch": 4.48, + "learning_rate": 2.065217391304348e-06, + "loss": 0.0697, + "step": 1650 + }, + { + "epoch": 4.51, + "learning_rate": 1.956521739130435e-06, + "loss": 0.0637, + "step": 1660 + }, + { + "epoch": 4.54, + "learning_rate": 1.8478260869565218e-06, + "loss": 0.0819, + "step": 1670 + }, + { + "epoch": 4.57, + "learning_rate": 1.7391304347826088e-06, + "loss": 0.0623, + "step": 1680 + }, + { + "epoch": 4.59, + "learning_rate": 1.6304347826086957e-06, + "loss": 0.0114, + "step": 1690 + }, + { + "epoch": 4.62, + "learning_rate": 1.521739130434783e-06, + "loss": 0.0342, + "step": 1700 + }, + { + "epoch": 4.65, + "learning_rate": 1.4130434782608697e-06, + "loss": 0.0859, + "step": 1710 + }, + { + "epoch": 4.67, + "learning_rate": 1.3043478260869566e-06, + "loss": 0.0462, + "step": 1720 + }, + { + "epoch": 4.7, + "learning_rate": 1.1956521739130436e-06, + "loss": 0.1022, + "step": 1730 + }, + { + "epoch": 4.73, + "learning_rate": 1.0869565217391306e-06, + "loss": 0.0571, + "step": 1740 + }, + { + "epoch": 4.76, + "learning_rate": 9.782608695652175e-07, + "loss": 0.0108, + "step": 1750 + }, + { + "epoch": 4.78, + "learning_rate": 8.695652173913044e-07, + "loss": 0.0893, + "step": 1760 + }, + { + "epoch": 4.81, + "learning_rate": 7.608695652173914e-07, + "loss": 0.0214, + "step": 1770 + }, + { + "epoch": 4.84, + "learning_rate": 6.521739130434783e-07, + "loss": 0.0416, + "step": 1780 + }, + { + "epoch": 4.86, + "learning_rate": 5.434782608695653e-07, + "loss": 0.1022, + "step": 1790 + }, + { + "epoch": 4.89, + "learning_rate": 4.347826086956522e-07, + "loss": 0.0628, + "step": 1800 + }, + { + "epoch": 4.92, + "learning_rate": 3.2608695652173915e-07, + "loss": 0.0691, + "step": 1810 + }, + { + "epoch": 4.95, + "learning_rate": 2.173913043478261e-07, + "loss": 0.0371, + "step": 1820 + }, + { + "epoch": 4.97, + "learning_rate": 1.0869565217391305e-07, + "loss": 0.0714, + "step": 1830 + }, + { + "epoch": 5.0, + "learning_rate": 0.0, + "loss": 0.1244, + "step": 1840 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9922928709055877, + "eval_loss": 0.03933868557214737, + "eval_runtime": 6.3674, + "eval_samples_per_second": 81.509, + "eval_steps_per_second": 10.208, + "step": 1840 + }, + { + "epoch": 5.0, + "step": 1840, + "total_flos": 1.1387447873864294e+18, + "train_loss": 0.10440107471431079, + "train_runtime": 430.0921, + "train_samples_per_second": 34.167, + "train_steps_per_second": 4.278 + } + ], + "logging_steps": 10, + "max_steps": 1840, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "total_flos": 1.1387447873864294e+18, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..6d717d2 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffcc3f59e70812095756ed77abfa7f9b0700337a7c9d25983e9245c2dc331049 +size 4728