From 36fbfa99d81a729e2c4b208b0200a8b40c03cc2a Mon Sep 17 00:00:00 2001 From: YYJ-aaaa <1058387004@qq.com> Date: Tue, 29 Oct 2024 16:33:43 +0800 Subject: [PATCH] first commit --- all_results.json | 12 + config.json | 228 ++++++++++ eval_results.json | 8 + preprocessor_config.json | 17 + pytorch_model.bin | 3 + train_results.json | 7 + trainer_state.json | 958 +++++++++++++++++++++++++++++++++++++++ training_args.bin | 3 + 8 files changed, 1236 insertions(+) create mode 100644 all_results.json create mode 100644 config.json create mode 100644 eval_results.json create mode 100644 preprocessor_config.json create mode 100644 pytorch_model.bin create mode 100644 train_results.json create mode 100644 trainer_state.json create mode 100644 training_args.bin diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..121ed7c --- /dev/null +++ b/all_results.json @@ -0,0 +1,12 @@ +{ + "epoch": 5.0, + "eval_accuracy": 0.8912871287128713, + "eval_loss": 0.45006364583969116, + "eval_runtime": 107.735, + "eval_samples_per_second": 234.371, + "eval_steps_per_second": 1.838, + "train_loss": 0.48942885282071863, + "train_runtime": 2221.2523, + "train_samples_per_second": 170.512, + "train_steps_per_second": 1.333 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..a3ecb2d --- /dev/null +++ b/config.json @@ -0,0 +1,228 @@ +{ + "_name_or_path": "google/vit-base-patch16-224-in21k", + "architectures": [ + "ViTForImageClassification" + ], + "attention_probs_dropout_prob": 0.0, + "finetuning_task": "image-classification", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_size": 768, + "id2label": { + "0": "apple_pie", + "1": "baby_back_ribs", + "10": "bruschetta", + "100": "waffles", + "11": "caesar_salad", + "12": "cannoli", + "13": "caprese_salad", + "14": "carrot_cake", + "15": "ceviche", + "16": "cheese_plate", + "17": "cheesecake", + "18": "chicken_curry", + "19": "chicken_quesadilla", + "2": "baklava", + "20": "chicken_wings", + "21": "chocolate_cake", + "22": "chocolate_mousse", + "23": "churros", + "24": "clam_chowder", + "25": "club_sandwich", + "26": "crab_cakes", + "27": "creme_brulee", + "28": "croque_madame", + "29": "cup_cakes", + "3": "beef_carpaccio", + "30": "deviled_eggs", + "31": "donuts", + "32": "dumplings", + "33": "edamame", + "34": "eggs_benedict", + "35": "escargots", + "36": "falafel", + "37": "filet_mignon", + "38": "fish_and_chips", + "39": "foie_gras", + "4": "beef_tartare", + "40": "french_fries", + "41": "french_onion_soup", + "42": "french_toast", + "43": "fried_calamari", + "44": "fried_rice", + "45": "frozen_yogurt", + "46": "garlic_bread", + "47": "gnocchi", + "48": "greek_salad", + "49": "grilled_cheese_sandwich", + "5": "beet_salad", + "50": "grilled_salmon", + "51": "guacamole", + "52": "gyoza", + "53": "hamburger", + "54": "hot_and_sour_soup", + "55": "hot_dog", + "56": "huevos_rancheros", + "57": "hummus", + "58": "ice_cream", + "59": "lasagna", + "6": "beignets", + "60": "lobster_bisque", + "61": "lobster_roll_sandwich", + "62": "macaroni_and_cheese", + "63": "macarons", + "64": "miso_soup", + "65": "mussels", + "66": "nachos", + "67": "omelette", + "68": "onion_rings", + "69": "oysters", + "7": "bibimbap", + "70": "pad_thai", + "71": "paella", + "72": "pancakes", + "73": "panna_cotta", + "74": "peking_duck", + "75": "pho", + "76": "pizza", + "77": "pork_chop", + "78": "poutine", + "79": "prime_rib", + "8": "bread_pudding", + "80": "pulled_pork_sandwich", + "81": "ramen", + "82": "ravioli", + "83": "red_velvet_cake", + "84": "risotto", + "85": "samosa", + "86": "sashimi", + "87": "scallops", + "88": "seaweed_salad", + "89": "shrimp_and_grits", + "9": "breakfast_burrito", + "90": "spaghetti_bolognese", + "91": "spaghetti_carbonara", + "92": "spring_rolls", + "93": "steak", + "94": "strawberry_shortcake", + "95": "sushi", + "96": "tacos", + "97": "takoyaki", + "98": "tiramisu", + "99": "tuna_tartare" + }, + "image_size": 224, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "apple_pie": "0", + "baby_back_ribs": "1", + "baklava": "2", + "beef_carpaccio": "3", + "beef_tartare": "4", + "beet_salad": "5", + "beignets": "6", + "bibimbap": "7", + "bread_pudding": "8", + "breakfast_burrito": "9", + "bruschetta": "10", + "caesar_salad": "11", + "cannoli": "12", + "caprese_salad": "13", + "carrot_cake": "14", + "ceviche": "15", + "cheese_plate": "16", + "cheesecake": "17", + "chicken_curry": "18", + "chicken_quesadilla": "19", + "chicken_wings": "20", + "chocolate_cake": "21", + "chocolate_mousse": "22", + "churros": "23", + "clam_chowder": "24", + "club_sandwich": "25", + "crab_cakes": "26", + "creme_brulee": "27", + "croque_madame": "28", + "cup_cakes": "29", + "deviled_eggs": "30", + "donuts": "31", + "dumplings": "32", + "edamame": "33", + "eggs_benedict": "34", + "escargots": "35", + "falafel": "36", + "filet_mignon": "37", + "fish_and_chips": "38", + "foie_gras": "39", + "french_fries": "40", + "french_onion_soup": "41", + "french_toast": "42", + "fried_calamari": "43", + "fried_rice": "44", + "frozen_yogurt": "45", + "garlic_bread": "46", + "gnocchi": "47", + "greek_salad": "48", + "grilled_cheese_sandwich": "49", + "grilled_salmon": "50", + "guacamole": "51", + "gyoza": "52", + "hamburger": "53", + "hot_and_sour_soup": "54", + "hot_dog": "55", + "huevos_rancheros": "56", + "hummus": "57", + "ice_cream": "58", + "lasagna": "59", + "lobster_bisque": "60", + "lobster_roll_sandwich": "61", + "macaroni_and_cheese": "62", + "macarons": "63", + "miso_soup": "64", + "mussels": "65", + "nachos": "66", + "omelette": "67", + "onion_rings": "68", + "oysters": "69", + "pad_thai": "70", + "paella": "71", + "pancakes": "72", + "panna_cotta": "73", + "peking_duck": "74", + "pho": "75", + "pizza": "76", + "pork_chop": "77", + "poutine": "78", + "prime_rib": "79", + "pulled_pork_sandwich": "80", + "ramen": "81", + "ravioli": "82", + "red_velvet_cake": "83", + "risotto": "84", + "samosa": "85", + "sashimi": "86", + "scallops": "87", + "seaweed_salad": "88", + "shrimp_and_grits": "89", + "spaghetti_bolognese": "90", + "spaghetti_carbonara": "91", + "spring_rolls": "92", + "steak": "93", + "strawberry_shortcake": "94", + "sushi": "95", + "tacos": "96", + "takoyaki": "97", + "tiramisu": "98", + "tuna_tartare": "99", + "waffles": "100" + }, + "layer_norm_eps": 1e-12, + "model_type": "vit", + "num_attention_heads": 12, + "num_channels": 3, + "num_hidden_layers": 12, + "patch_size": 16, + "torch_dtype": "float32", + "transformers_version": "4.8.1" +} diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000..b12731b --- /dev/null +++ b/eval_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 5.0, + "eval_accuracy": 0.8912871287128713, + "eval_loss": 0.45006364583969116, + "eval_runtime": 107.735, + "eval_samples_per_second": 234.371, + "eval_steps_per_second": 1.838 +} \ No newline at end of file diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000..b7414e7 --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,17 @@ +{ + "do_normalize": true, + "do_resize": true, + "feature_extractor_type": "ViTFeatureExtractor", + "image_mean": [ + 0.5, + 0.5, + 0.5 + ], + "image_std": [ + 0.5, + 0.5, + 0.5 + ], + "resample": 2, + "size": 224 +} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000..a0d30ee --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33d6b7d44c0bb524fbb6f6d3b7d1f7c8d7c703b58882bf2a845355d9411f566d +size 343584369 diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..c5c3a9f --- /dev/null +++ b/train_results.json @@ -0,0 +1,7 @@ +{ + "epoch": 5.0, + "train_loss": 0.48942885282071863, + "train_runtime": 2221.2523, + "train_samples_per_second": 170.512, + "train_steps_per_second": 1.333 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..77b8fda --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,958 @@ +{ + "best_metric": 0.8912871287128713, + "best_model_checkpoint": "food101_outputs/checkpoint-2960", + "epoch": 5.0, + "global_step": 2960, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 0.00019864864864864865, + "loss": 4.4083, + "step": 20 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001972972972972973, + "loss": 3.884, + "step": 40 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019594594594594594, + "loss": 3.4068, + "step": 60 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019459459459459462, + "loss": 2.9784, + "step": 80 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019324324324324326, + "loss": 2.6549, + "step": 100 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001918918918918919, + "loss": 2.3437, + "step": 120 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019054054054054055, + "loss": 2.1107, + "step": 140 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001891891891891892, + "loss": 1.8803, + "step": 160 + }, + { + "epoch": 0.3, + "learning_rate": 0.00018783783783783784, + "loss": 1.7025, + "step": 180 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001864864864864865, + "loss": 1.5403, + "step": 200 + }, + { + "epoch": 0.37, + "learning_rate": 0.00018513513513513513, + "loss": 1.48, + "step": 220 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001837837837837838, + "loss": 1.358, + "step": 240 + }, + { + "epoch": 0.44, + "learning_rate": 0.00018243243243243245, + "loss": 1.2943, + "step": 260 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001810810810810811, + "loss": 1.2301, + "step": 280 + }, + { + "epoch": 0.51, + "learning_rate": 0.00017972972972972974, + "loss": 1.1578, + "step": 300 + }, + { + "epoch": 0.54, + "learning_rate": 0.00017837837837837839, + "loss": 1.0811, + "step": 320 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017702702702702703, + "loss": 1.0662, + "step": 340 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017567567567567568, + "loss": 1.0146, + "step": 360 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017432432432432432, + "loss": 0.9584, + "step": 380 + }, + { + "epoch": 0.68, + "learning_rate": 0.000172972972972973, + "loss": 0.973, + "step": 400 + }, + { + "epoch": 0.71, + "learning_rate": 0.00017162162162162164, + "loss": 0.9817, + "step": 420 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017027027027027028, + "loss": 0.9552, + "step": 440 + }, + { + "epoch": 0.78, + "learning_rate": 0.00016891891891891893, + "loss": 0.916, + "step": 460 + }, + { + "epoch": 0.81, + "learning_rate": 0.00016756756756756757, + "loss": 0.8896, + "step": 480 + }, + { + "epoch": 0.84, + "learning_rate": 0.00016621621621621622, + "loss": 0.8855, + "step": 500 + }, + { + "epoch": 0.88, + "learning_rate": 0.00016486486486486486, + "loss": 0.8823, + "step": 520 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001635135135135135, + "loss": 0.8059, + "step": 540 + }, + { + "epoch": 0.95, + "learning_rate": 0.00016216216216216218, + "loss": 0.8323, + "step": 560 + }, + { + "epoch": 0.98, + "learning_rate": 0.00016081081081081083, + "loss": 0.8271, + "step": 580 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.8561584158415841, + "eval_loss": 0.6070069074630737, + "eval_runtime": 142.5311, + "eval_samples_per_second": 177.154, + "eval_steps_per_second": 1.389, + "step": 592 + }, + { + "epoch": 1.01, + "learning_rate": 0.00015945945945945947, + "loss": 0.6876, + "step": 600 + }, + { + "epoch": 1.05, + "learning_rate": 0.00015810810810810812, + "loss": 0.4771, + "step": 620 + }, + { + "epoch": 1.08, + "learning_rate": 0.00015675675675675676, + "loss": 0.4998, + "step": 640 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001554054054054054, + "loss": 0.4753, + "step": 660 + }, + { + "epoch": 1.15, + "learning_rate": 0.00015405405405405405, + "loss": 0.5197, + "step": 680 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001527027027027027, + "loss": 0.527, + "step": 700 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015135135135135137, + "loss": 0.5371, + "step": 720 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015000000000000001, + "loss": 0.4992, + "step": 740 + }, + { + "epoch": 1.28, + "learning_rate": 0.00014864864864864866, + "loss": 0.4728, + "step": 760 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001472972972972973, + "loss": 0.5185, + "step": 780 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014594594594594595, + "loss": 0.5071, + "step": 800 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014459459459459462, + "loss": 0.4728, + "step": 820 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014324324324324324, + "loss": 0.4731, + "step": 840 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014189189189189188, + "loss": 0.5211, + "step": 860 + }, + { + "epoch": 1.49, + "learning_rate": 0.00014054054054054056, + "loss": 0.4949, + "step": 880 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001391891891891892, + "loss": 0.4847, + "step": 900 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013783783783783785, + "loss": 0.4626, + "step": 920 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001364864864864865, + "loss": 0.456, + "step": 940 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013513513513513514, + "loss": 0.4938, + "step": 960 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001337837837837838, + "loss": 0.4846, + "step": 980 + }, + { + "epoch": 1.69, + "learning_rate": 0.00013243243243243243, + "loss": 0.4576, + "step": 1000 + }, + { + "epoch": 1.72, + "learning_rate": 0.00013108108108108107, + "loss": 0.4766, + "step": 1020 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012972972972972974, + "loss": 0.4741, + "step": 1040 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001283783783783784, + "loss": 0.4225, + "step": 1060 + }, + { + "epoch": 1.82, + "learning_rate": 0.00012702702702702703, + "loss": 0.4201, + "step": 1080 + }, + { + "epoch": 1.86, + "learning_rate": 0.00012567567567567568, + "loss": 0.4327, + "step": 1100 + }, + { + "epoch": 1.89, + "learning_rate": 0.00012432432432432433, + "loss": 0.4771, + "step": 1120 + }, + { + "epoch": 1.93, + "learning_rate": 0.000122972972972973, + "loss": 0.4473, + "step": 1140 + }, + { + "epoch": 1.96, + "learning_rate": 0.00012162162162162163, + "loss": 0.4569, + "step": 1160 + }, + { + "epoch": 1.99, + "learning_rate": 0.00012027027027027027, + "loss": 0.4376, + "step": 1180 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8691485148514851, + "eval_loss": 0.4947212338447571, + "eval_runtime": 107.3521, + "eval_samples_per_second": 235.207, + "eval_steps_per_second": 1.844, + "step": 1184 + }, + { + "epoch": 2.03, + "learning_rate": 0.00011891891891891893, + "loss": 0.2526, + "step": 1200 + }, + { + "epoch": 2.06, + "learning_rate": 0.00011756756756756758, + "loss": 0.2468, + "step": 1220 + }, + { + "epoch": 2.09, + "learning_rate": 0.00011621621621621621, + "loss": 0.2298, + "step": 1240 + }, + { + "epoch": 2.13, + "learning_rate": 0.00011486486486486487, + "loss": 0.2294, + "step": 1260 + }, + { + "epoch": 2.16, + "learning_rate": 0.00011351351351351351, + "loss": 0.2408, + "step": 1280 + }, + { + "epoch": 2.2, + "learning_rate": 0.00011216216216216217, + "loss": 0.2245, + "step": 1300 + }, + { + "epoch": 2.23, + "learning_rate": 0.00011081081081081082, + "loss": 0.2391, + "step": 1320 + }, + { + "epoch": 2.26, + "learning_rate": 0.00010945945945945946, + "loss": 0.241, + "step": 1340 + }, + { + "epoch": 2.3, + "learning_rate": 0.00010810810810810812, + "loss": 0.2197, + "step": 1360 + }, + { + "epoch": 2.33, + "learning_rate": 0.00010675675675675677, + "loss": 0.2467, + "step": 1380 + }, + { + "epoch": 2.36, + "learning_rate": 0.0001054054054054054, + "loss": 0.2397, + "step": 1400 + }, + { + "epoch": 2.4, + "learning_rate": 0.00010405405405405406, + "loss": 0.2308, + "step": 1420 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001027027027027027, + "loss": 0.25, + "step": 1440 + }, + { + "epoch": 2.47, + "learning_rate": 0.00010135135135135136, + "loss": 0.2291, + "step": 1460 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001, + "loss": 0.2496, + "step": 1480 + }, + { + "epoch": 2.53, + "learning_rate": 9.864864864864865e-05, + "loss": 0.2322, + "step": 1500 + }, + { + "epoch": 2.57, + "learning_rate": 9.729729729729731e-05, + "loss": 0.2266, + "step": 1520 + }, + { + "epoch": 2.6, + "learning_rate": 9.594594594594595e-05, + "loss": 0.2201, + "step": 1540 + }, + { + "epoch": 2.64, + "learning_rate": 9.45945945945946e-05, + "loss": 0.2497, + "step": 1560 + }, + { + "epoch": 2.67, + "learning_rate": 9.324324324324324e-05, + "loss": 0.2276, + "step": 1580 + }, + { + "epoch": 2.7, + "learning_rate": 9.18918918918919e-05, + "loss": 0.1945, + "step": 1600 + }, + { + "epoch": 2.74, + "learning_rate": 9.054054054054055e-05, + "loss": 0.2174, + "step": 1620 + }, + { + "epoch": 2.77, + "learning_rate": 8.918918918918919e-05, + "loss": 0.2423, + "step": 1640 + }, + { + "epoch": 2.8, + "learning_rate": 8.783783783783784e-05, + "loss": 0.2242, + "step": 1660 + }, + { + "epoch": 2.84, + "learning_rate": 8.64864864864865e-05, + "loss": 0.2383, + "step": 1680 + }, + { + "epoch": 2.87, + "learning_rate": 8.513513513513514e-05, + "loss": 0.2582, + "step": 1700 + }, + { + "epoch": 2.91, + "learning_rate": 8.378378378378379e-05, + "loss": 0.2125, + "step": 1720 + }, + { + "epoch": 2.94, + "learning_rate": 8.243243243243243e-05, + "loss": 0.2307, + "step": 1740 + }, + { + "epoch": 2.97, + "learning_rate": 8.108108108108109e-05, + "loss": 0.2089, + "step": 1760 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.8746930693069307, + "eval_loss": 0.48760801553726196, + "eval_runtime": 106.972, + "eval_samples_per_second": 236.043, + "eval_steps_per_second": 1.851, + "step": 1776 + }, + { + "epoch": 3.01, + "learning_rate": 7.972972972972974e-05, + "loss": 0.1821, + "step": 1780 + }, + { + "epoch": 3.04, + "learning_rate": 7.837837837837838e-05, + "loss": 0.1332, + "step": 1800 + }, + { + "epoch": 3.07, + "learning_rate": 7.702702702702703e-05, + "loss": 0.116, + "step": 1820 + }, + { + "epoch": 3.11, + "learning_rate": 7.567567567567568e-05, + "loss": 0.119, + "step": 1840 + }, + { + "epoch": 3.14, + "learning_rate": 7.432432432432433e-05, + "loss": 0.1222, + "step": 1860 + }, + { + "epoch": 3.18, + "learning_rate": 7.297297297297297e-05, + "loss": 0.118, + "step": 1880 + }, + { + "epoch": 3.21, + "learning_rate": 7.162162162162162e-05, + "loss": 0.1078, + "step": 1900 + }, + { + "epoch": 3.24, + "learning_rate": 7.027027027027028e-05, + "loss": 0.0982, + "step": 1920 + }, + { + "epoch": 3.28, + "learning_rate": 6.891891891891892e-05, + "loss": 0.1056, + "step": 1940 + }, + { + "epoch": 3.31, + "learning_rate": 6.756756756756757e-05, + "loss": 0.099, + "step": 1960 + }, + { + "epoch": 3.34, + "learning_rate": 6.621621621621621e-05, + "loss": 0.0961, + "step": 1980 + }, + { + "epoch": 3.38, + "learning_rate": 6.486486486486487e-05, + "loss": 0.1051, + "step": 2000 + }, + { + "epoch": 3.41, + "learning_rate": 6.358108108108109e-05, + "loss": 0.1161, + "step": 2020 + }, + { + "epoch": 3.45, + "learning_rate": 6.222972972972973e-05, + "loss": 0.0919, + "step": 2040 + }, + { + "epoch": 3.48, + "learning_rate": 6.087837837837839e-05, + "loss": 0.1181, + "step": 2060 + }, + { + "epoch": 3.51, + "learning_rate": 5.952702702702703e-05, + "loss": 0.1215, + "step": 2080 + }, + { + "epoch": 3.55, + "learning_rate": 5.817567567567568e-05, + "loss": 0.0959, + "step": 2100 + }, + { + "epoch": 3.58, + "learning_rate": 5.682432432432433e-05, + "loss": 0.0866, + "step": 2120 + }, + { + "epoch": 3.61, + "learning_rate": 5.547297297297298e-05, + "loss": 0.117, + "step": 2140 + }, + { + "epoch": 3.65, + "learning_rate": 5.412162162162162e-05, + "loss": 0.1063, + "step": 2160 + }, + { + "epoch": 3.68, + "learning_rate": 5.277027027027027e-05, + "loss": 0.0993, + "step": 2180 + }, + { + "epoch": 3.72, + "learning_rate": 5.1418918918918925e-05, + "loss": 0.1057, + "step": 2200 + }, + { + "epoch": 3.75, + "learning_rate": 5.006756756756758e-05, + "loss": 0.1194, + "step": 2220 + }, + { + "epoch": 3.78, + "learning_rate": 4.871621621621622e-05, + "loss": 0.0929, + "step": 2240 + }, + { + "epoch": 3.82, + "learning_rate": 4.736486486486487e-05, + "loss": 0.08, + "step": 2260 + }, + { + "epoch": 3.85, + "learning_rate": 4.601351351351352e-05, + "loss": 0.1133, + "step": 2280 + }, + { + "epoch": 3.89, + "learning_rate": 4.4662162162162164e-05, + "loss": 0.0996, + "step": 2300 + }, + { + "epoch": 3.92, + "learning_rate": 4.3310810810810816e-05, + "loss": 0.0992, + "step": 2320 + }, + { + "epoch": 3.95, + "learning_rate": 4.195945945945946e-05, + "loss": 0.0941, + "step": 2340 + }, + { + "epoch": 3.99, + "learning_rate": 4.060810810810811e-05, + "loss": 0.0882, + "step": 2360 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8856633663366337, + "eval_loss": 0.463856965303421, + "eval_runtime": 107.2753, + "eval_samples_per_second": 235.376, + "eval_steps_per_second": 1.846, + "step": 2368 + }, + { + "epoch": 4.02, + "learning_rate": 3.925675675675676e-05, + "loss": 0.0796, + "step": 2380 + }, + { + "epoch": 4.05, + "learning_rate": 3.790540540540541e-05, + "loss": 0.0353, + "step": 2400 + }, + { + "epoch": 4.09, + "learning_rate": 3.6554054054054055e-05, + "loss": 0.0536, + "step": 2420 + }, + { + "epoch": 4.12, + "learning_rate": 3.520270270270271e-05, + "loss": 0.0564, + "step": 2440 + }, + { + "epoch": 4.16, + "learning_rate": 3.385135135135135e-05, + "loss": 0.0506, + "step": 2460 + }, + { + "epoch": 4.19, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.0547, + "step": 2480 + }, + { + "epoch": 4.22, + "learning_rate": 3.114864864864865e-05, + "loss": 0.0462, + "step": 2500 + }, + { + "epoch": 4.26, + "learning_rate": 2.97972972972973e-05, + "loss": 0.0501, + "step": 2520 + }, + { + "epoch": 4.29, + "learning_rate": 2.8445945945945946e-05, + "loss": 0.0588, + "step": 2540 + }, + { + "epoch": 4.32, + "learning_rate": 2.7094594594594598e-05, + "loss": 0.0303, + "step": 2560 + }, + { + "epoch": 4.36, + "learning_rate": 2.5743243243243243e-05, + "loss": 0.0411, + "step": 2580 + }, + { + "epoch": 4.39, + "learning_rate": 2.4391891891891895e-05, + "loss": 0.0406, + "step": 2600 + }, + { + "epoch": 4.43, + "learning_rate": 2.3040540540540543e-05, + "loss": 0.0378, + "step": 2620 + }, + { + "epoch": 4.46, + "learning_rate": 2.1689189189189192e-05, + "loss": 0.0391, + "step": 2640 + }, + { + "epoch": 4.49, + "learning_rate": 2.033783783783784e-05, + "loss": 0.038, + "step": 2660 + }, + { + "epoch": 4.53, + "learning_rate": 1.898648648648649e-05, + "loss": 0.0443, + "step": 2680 + }, + { + "epoch": 4.56, + "learning_rate": 1.7635135135135137e-05, + "loss": 0.0391, + "step": 2700 + }, + { + "epoch": 4.59, + "learning_rate": 1.6283783783783786e-05, + "loss": 0.0439, + "step": 2720 + }, + { + "epoch": 4.63, + "learning_rate": 1.4932432432432433e-05, + "loss": 0.0532, + "step": 2740 + }, + { + "epoch": 4.66, + "learning_rate": 1.3581081081081081e-05, + "loss": 0.0395, + "step": 2760 + }, + { + "epoch": 4.7, + "learning_rate": 1.222972972972973e-05, + "loss": 0.0458, + "step": 2780 + }, + { + "epoch": 4.73, + "learning_rate": 1.0878378378378378e-05, + "loss": 0.0588, + "step": 2800 + }, + { + "epoch": 4.76, + "learning_rate": 9.527027027027027e-06, + "loss": 0.0465, + "step": 2820 + }, + { + "epoch": 4.8, + "learning_rate": 8.175675675675675e-06, + "loss": 0.036, + "step": 2840 + }, + { + "epoch": 4.83, + "learning_rate": 6.8243243243243244e-06, + "loss": 0.0437, + "step": 2860 + }, + { + "epoch": 4.86, + "learning_rate": 5.472972972972974e-06, + "loss": 0.0487, + "step": 2880 + }, + { + "epoch": 4.9, + "learning_rate": 4.121621621621622e-06, + "loss": 0.0273, + "step": 2900 + }, + { + "epoch": 4.93, + "learning_rate": 2.7702702702702708e-06, + "loss": 0.0392, + "step": 2920 + }, + { + "epoch": 4.97, + "learning_rate": 1.418918918918919e-06, + "loss": 0.0458, + "step": 2940 + }, + { + "epoch": 5.0, + "learning_rate": 6.756756756756757e-08, + "loss": 0.0452, + "step": 2960 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.8912871287128713, + "eval_loss": 0.45006364583969116, + "eval_runtime": 106.893, + "eval_samples_per_second": 236.218, + "eval_steps_per_second": 1.852, + "step": 2960 + }, + { + "epoch": 5.0, + "step": 2960, + "total_flos": 0.0, + "train_loss": 0.48942885282071863, + "train_runtime": 2221.2523, + "train_samples_per_second": 170.512, + "train_steps_per_second": 1.333 + } + ], + "max_steps": 2960, + "num_train_epochs": 5, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..c3e5fc5 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:854337aa979b4d66fa7b7c37eeeed817ea454fd96fce3eeddd27799e56c2a3d1 +size 2671