From ff50af5b492be13c976467e8df8a4c909a94965e Mon Sep 17 00:00:00 2001 From: YYJ-aaaa <1058387004@qq.com> Date: Mon, 6 Jan 2025 14:33:14 +0800 Subject: [PATCH] first commit --- all_results.json | 12 ++ config.json | 46 ++++++ eval_results.json | 8 + preprocessor_config.json | 14 ++ pytorch_model.bin | 3 + train.py | 211 ++++++++++++++++++++++++++ train_results.json | 7 + trainer_state.json | 310 +++++++++++++++++++++++++++++++++++++++ training_args.bin | 3 + 9 files changed, 614 insertions(+) create mode 100644 all_results.json create mode 100644 config.json create mode 100644 eval_results.json create mode 100644 preprocessor_config.json create mode 100644 pytorch_model.bin create mode 100644 train.py create mode 100644 train_results.json create mode 100644 trainer_state.json create mode 100644 training_args.bin diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..8ace439 --- /dev/null +++ b/all_results.json @@ -0,0 +1,12 @@ +{ + "epoch": 6.0, + "eval_accuracy": 0.9852222222222222, + "eval_loss": 0.05230661854147911, + "eval_runtime": 2.6574, + "eval_samples_per_second": 3386.794, + "eval_steps_per_second": 423.349, + "train_loss": 0.1922683648263396, + "train_runtime": 134.4457, + "train_samples_per_second": 2276.012, + "train_steps_per_second": 71.137 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..8a9bf35 --- /dev/null +++ b/config.json @@ -0,0 +1,46 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2 + ], + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 32, + 64 + ], + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2", + "3": "LABEL_3", + "4": "LABEL_4", + "5": "LABEL_5", + "6": "LABEL_6", + "7": "LABEL_7", + "8": "LABEL_8", + "9": "LABEL_9" + }, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2, + "LABEL_3": 3, + "LABEL_4": 4, + "LABEL_5": 5, + "LABEL_6": 6, + "LABEL_7": 7, + "LABEL_8": 8, + "LABEL_9": 9 + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 1, + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.19.0.dev0" +} diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000..6feac54 --- /dev/null +++ b/eval_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 6.0, + "eval_accuracy": 0.9852222222222222, + "eval_loss": 0.05230661854147911, + "eval_runtime": 2.6574, + "eval_samples_per_second": 3386.794, + "eval_steps_per_second": 423.349 +} \ No newline at end of file diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000..0178bbd --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "crop_pct": null, + "do_normalize": false, + "do_resize": false, + "feature_extractor_type": "ConvNextFeatureExtractor", + "image_mean": [ + 0.45 + ], + "image_std": [ + 0.22 + ], + "resample": 3, + "size": 224 +} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000..4dc12c3 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72b3ed2e1f131afbe98687a782109fa539b77a1b60713d8be2cb09dab092db7f +size 763481 diff --git a/train.py b/train.py new file mode 100644 index 0000000..b986df3 --- /dev/null +++ b/train.py @@ -0,0 +1,211 @@ +import logging +import sys +from dataclasses import dataclass, field +from typing import Optional + +import datasets +import torch +import transformers +from torchinfo import summary +from torchvision.transforms import Compose, Normalize, ToTensor +from transformers import ( + ConvNextFeatureExtractor, + HfArgumentParser, + ResNetConfig, + ResNetForImageClassification, + Trainer, + TrainingArguments, +) +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + +import numpy as np + + +@dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + Using `HfArgumentParser` we can turn this class into argparse arguments to be able to specify + them on the command line. + """ + + train_val_split: Optional[float] = field( + default=0.15, metadata={"help": "Percent to split off of train for validation."} + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + + +def collate_fn(examples): + pixel_values = torch.stack([example["pixel_values"] for example in examples]) + labels = torch.tensor([example["label"] for example in examples]) + return {"pixel_values": pixel_values, "labels": labels} + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.19.0.dev0") + +require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt") + +logger = logging.getLogger(__name__) + +def main(): + parser = HfArgumentParser((DataTrainingArguments, TrainingArguments)) + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + data_args, training_args = parser.parse_json_file( + json_file=os.path.abspath(sys.argv[1]) + ) + else: + data_args, training_args = parser.parse_args_into_dataclasses() + + # Setup logging + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + + log_level = training_args.get_process_log_level() + logger.setLevel(log_level) + transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() + + # Log on each process the small summary: + logger.warning( + f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" + ) + + dataset = datasets.load_dataset("mnist") + + data_args.train_val_split = ( + None if "validation" in dataset.keys() else data_args.train_val_split + ) + if isinstance(data_args.train_val_split, float) and data_args.train_val_split > 0.0: + split = dataset["train"].train_test_split(data_args.train_val_split) + dataset["train"] = split["train"] + dataset["validation"] = split["test"] + + feature_extractor = ConvNextFeatureExtractor( + do_resize=False, do_normalize=False, image_mean=[0.45], image_std=[0.22] + ) + + config = ResNetConfig( + num_channels=1, + layer_type="basic", + depths=[2, 2], + hidden_sizes=[32, 64], + num_labels=10, + ) + + model = ResNetForImageClassification(config) + + # Define torchvision transforms to be applied to each image. + normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std) + _transforms = Compose([ToTensor(), normalize]) + + def transforms(example_batch): + """Apply _train_transforms across a batch.""" + # black and white + example_batch["pixel_values"] = [_transforms(pil_img.convert("L")) for pil_img in example_batch["image"]] + return example_batch + + # Load the accuracy metric from the datasets package + metric = datasets.load_metric("accuracy") + + # Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a + # predictions and label_ids field) and has to return a dictionary string to float. + def compute_metrics(p): + """Computes accuracy on a batch of predictions""" + + accuracy = metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids) + return accuracy + + if training_args.do_train: + if data_args.max_train_samples is not None: + dataset["train"] = ( + dataset["train"] + .shuffle(seed=training_args.seed) + .select(range(data_args.max_train_samples)) + ) + + logger.info("Setting train transform") + # Set the training transforms + dataset["train"].set_transform(transforms) + + if training_args.do_eval: + if "validation" not in dataset: + raise ValueError("--do_eval requires a validation dataset") + if data_args.max_eval_samples is not None: + dataset["validation"] = ( + dataset["validation"] + .shuffle(seed=training_args.seed) + .select(range(data_args.max_eval_samples)) + ) + + logger.info("Setting validation transform") + # Set the validation transforms + dataset["validation"].set_transform(transforms) + + from transformers import trainer_utils + + print(dataset) + + training_args = transformers.TrainingArguments( + output_dir=training_args.output_dir, + do_eval=training_args.do_eval, + do_train=training_args.do_train, + logging_steps = 500, + eval_steps = 500, + save_steps= 500, + remove_unused_columns = False, # we need to pass the `label` and `image` + per_device_train_batch_size = 32, + save_total_limit = 2, + evaluation_strategy = "steps", + num_train_epochs = 6, + ) + + logger.info(f"Training/evaluation parameters {training_args}") + + trainer = Trainer( + model=model, + args=training_args, + train_dataset=dataset["train"] if training_args.do_train else None, + eval_dataset=dataset["validation"] if training_args.do_eval else None, + compute_metrics=compute_metrics, + tokenizer=feature_extractor, + data_collator=collate_fn, + ) + + # Training + if training_args.do_train: + train_result = trainer.train() + trainer.save_model() + trainer.log_metrics("train", train_result.metrics) + trainer.save_metrics("train", train_result.metrics) + trainer.save_state() + + # Evaluation + if training_args.do_eval: + metrics = trainer.evaluate() + trainer.log_metrics("eval", metrics) + trainer.save_metrics("eval", metrics) + +if __name__ == "__main__": + main() diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..ff93c0d --- /dev/null +++ b/train_results.json @@ -0,0 +1,7 @@ +{ + "epoch": 6.0, + "train_loss": 0.1922683648263396, + "train_runtime": 134.4457, + "train_samples_per_second": 2276.012, + "train_steps_per_second": 71.137 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..e67da33 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,310 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.0, + "global_step": 9564, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.31, + "learning_rate": 4.7386030949393564e-05, + "loss": 1.4207, + "step": 500 + }, + { + "epoch": 0.31, + "eval_accuracy": 0.9008888888888889, + "eval_loss": 0.7066789269447327, + "eval_runtime": 2.6965, + "eval_samples_per_second": 3337.621, + "eval_steps_per_second": 417.203, + "step": 500 + }, + { + "epoch": 0.63, + "learning_rate": 4.477206189878712e-05, + "loss": 0.5086, + "step": 1000 + }, + { + "epoch": 0.63, + "eval_accuracy": 0.9516666666666667, + "eval_loss": 0.3055577874183655, + "eval_runtime": 2.6576, + "eval_samples_per_second": 3386.509, + "eval_steps_per_second": 423.314, + "step": 1000 + }, + { + "epoch": 0.94, + "learning_rate": 4.215809284818068e-05, + "loss": 0.2731, + "step": 1500 + }, + { + "epoch": 0.94, + "eval_accuracy": 0.9648888888888889, + "eval_loss": 0.18555375933647156, + "eval_runtime": 2.6597, + "eval_samples_per_second": 3383.793, + "eval_steps_per_second": 422.974, + "step": 1500 + }, + { + "epoch": 1.25, + "learning_rate": 3.954412379757424e-05, + "loss": 0.1976, + "step": 2000 + }, + { + "epoch": 1.25, + "eval_accuracy": 0.9701111111111111, + "eval_loss": 0.14159560203552246, + "eval_runtime": 2.715, + "eval_samples_per_second": 3314.86, + "eval_steps_per_second": 414.357, + "step": 2000 + }, + { + "epoch": 1.57, + "learning_rate": 3.69301547469678e-05, + "loss": 0.1565, + "step": 2500 + }, + { + "epoch": 1.57, + "eval_accuracy": 0.9738888888888889, + "eval_loss": 0.11081045866012573, + "eval_runtime": 2.6963, + "eval_samples_per_second": 3337.905, + "eval_steps_per_second": 417.238, + "step": 2500 + }, + { + "epoch": 1.88, + "learning_rate": 3.431618569636136e-05, + "loss": 0.128, + "step": 3000 + }, + { + "epoch": 1.88, + "eval_accuracy": 0.976, + "eval_loss": 0.09747562557458878, + "eval_runtime": 2.6961, + "eval_samples_per_second": 3338.209, + "eval_steps_per_second": 417.276, + "step": 3000 + }, + { + "epoch": 2.2, + "learning_rate": 3.170221664575492e-05, + "loss": 0.1133, + "step": 3500 + }, + { + "epoch": 2.2, + "eval_accuracy": 0.9788888888888889, + "eval_loss": 0.08474569022655487, + "eval_runtime": 2.7245, + "eval_samples_per_second": 3303.375, + "eval_steps_per_second": 412.922, + "step": 3500 + }, + { + "epoch": 2.51, + "learning_rate": 2.9088247595148475e-05, + "loss": 0.1031, + "step": 4000 + }, + { + "epoch": 2.51, + "eval_accuracy": 0.9804444444444445, + "eval_loss": 0.07724875211715698, + "eval_runtime": 2.6363, + "eval_samples_per_second": 3413.847, + "eval_steps_per_second": 426.731, + "step": 4000 + }, + { + "epoch": 2.82, + "learning_rate": 2.6474278544542037e-05, + "loss": 0.09, + "step": 4500 + }, + { + "epoch": 2.82, + "eval_accuracy": 0.9818888888888889, + "eval_loss": 0.0697416290640831, + "eval_runtime": 2.6295, + "eval_samples_per_second": 3422.689, + "eval_steps_per_second": 427.836, + "step": 4500 + }, + { + "epoch": 3.14, + "learning_rate": 2.386030949393559e-05, + "loss": 0.0871, + "step": 5000 + }, + { + "epoch": 3.14, + "eval_accuracy": 0.9815555555555555, + "eval_loss": 0.066066212952137, + "eval_runtime": 2.6946, + "eval_samples_per_second": 3340.06, + "eval_steps_per_second": 417.507, + "step": 5000 + }, + { + "epoch": 3.45, + "learning_rate": 2.1246340443329153e-05, + "loss": 0.0733, + "step": 5500 + }, + { + "epoch": 3.45, + "eval_accuracy": 0.9822222222222222, + "eval_loss": 0.06342040002346039, + "eval_runtime": 2.6897, + "eval_samples_per_second": 3346.09, + "eval_steps_per_second": 418.261, + "step": 5500 + }, + { + "epoch": 3.76, + "learning_rate": 1.863237139272271e-05, + "loss": 0.0761, + "step": 6000 + }, + { + "epoch": 3.76, + "eval_accuracy": 0.983, + "eval_loss": 0.06072380393743515, + "eval_runtime": 2.6938, + "eval_samples_per_second": 3340.98, + "eval_steps_per_second": 417.623, + "step": 6000 + }, + { + "epoch": 4.08, + "learning_rate": 1.601840234211627e-05, + "loss": 0.0739, + "step": 6500 + }, + { + "epoch": 4.08, + "eval_accuracy": 0.9832222222222222, + "eval_loss": 0.05795769765973091, + "eval_runtime": 2.6767, + "eval_samples_per_second": 3362.391, + "eval_steps_per_second": 420.299, + "step": 6500 + }, + { + "epoch": 4.39, + "learning_rate": 1.340443329150983e-05, + "loss": 0.0643, + "step": 7000 + }, + { + "epoch": 4.39, + "eval_accuracy": 0.9844444444444445, + "eval_loss": 0.05685265362262726, + "eval_runtime": 2.6876, + "eval_samples_per_second": 3348.672, + "eval_steps_per_second": 418.584, + "step": 7000 + }, + { + "epoch": 4.71, + "learning_rate": 1.0790464240903388e-05, + "loss": 0.0678, + "step": 7500 + }, + { + "epoch": 4.71, + "eval_accuracy": 0.984, + "eval_loss": 0.05617769435048103, + "eval_runtime": 2.6484, + "eval_samples_per_second": 3398.278, + "eval_steps_per_second": 424.785, + "step": 7500 + }, + { + "epoch": 5.02, + "learning_rate": 8.176495190296946e-06, + "loss": 0.0617, + "step": 8000 + }, + { + "epoch": 5.02, + "eval_accuracy": 0.9853333333333333, + "eval_loss": 0.053985536098480225, + "eval_runtime": 2.672, + "eval_samples_per_second": 3368.244, + "eval_steps_per_second": 421.03, + "step": 8000 + }, + { + "epoch": 5.33, + "learning_rate": 5.562526139690506e-06, + "loss": 0.0571, + "step": 8500 + }, + { + "epoch": 5.33, + "eval_accuracy": 0.9847777777777778, + "eval_loss": 0.05352585390210152, + "eval_runtime": 2.7082, + "eval_samples_per_second": 3323.274, + "eval_steps_per_second": 415.409, + "step": 8500 + }, + { + "epoch": 5.65, + "learning_rate": 2.9485570890840656e-06, + "loss": 0.0608, + "step": 9000 + }, + { + "epoch": 5.65, + "eval_accuracy": 0.9851111111111112, + "eval_loss": 0.053133774548769, + "eval_runtime": 2.6753, + "eval_samples_per_second": 3364.134, + "eval_steps_per_second": 420.517, + "step": 9000 + }, + { + "epoch": 5.96, + "learning_rate": 3.345880384776244e-07, + "loss": 0.0571, + "step": 9500 + }, + { + "epoch": 5.96, + "eval_accuracy": 0.9847777777777778, + "eval_loss": 0.05344167724251747, + "eval_runtime": 2.6425, + "eval_samples_per_second": 3405.863, + "eval_steps_per_second": 425.733, + "step": 9500 + }, + { + "epoch": 6.0, + "step": 9564, + "total_flos": 264960533376000.0, + "train_loss": 0.1922683648263396, + "train_runtime": 134.4457, + "train_samples_per_second": 2276.012, + "train_steps_per_second": 71.137 + } + ], + "max_steps": 9564, + "num_train_epochs": 6, + "total_flos": 264960533376000.0, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..b44ccb1 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa4e95a4ea032aa40c0216647955b0d7d2e98a98aba8f2db221e4606d6d0d474 +size 3055