first commit

2025-05-30 14:08:12 +08:00 · 2025-05-30 14:08:12 +08:00 · 86a8679b6c
parent 6d674315bd
commit 86a8679b6c
28 changed files with 573 additions and 0 deletions
--- a/checkpoint-1656/config.json
+++ b/checkpoint-1656/config.json
@ -0,0 +1,52 @@
+{
+  "architectures": [
+    "SiglipForImageClassification"
+  ],
+  "id2label": {
+    "0": "Anime Picture",
+    "1": "Hentai",
+    "2": "Normal",
+    "3": "Pornography",
+    "4": "Enticing or Sensual"
+  },
+  "initializer_factor": 1.0,
+  "label2id": {
+    "Anime Picture": 0,
+    "Enticing or Sensual": 4,
+    "Hentai": 1,
+    "Normal": 2,
+    "Pornography": 3
+  },
+  "model_type": "siglip",
+  "problem_type": "single_label_classification",
+  "text_config": {
+    "attention_dropout": 0.0,
+    "hidden_act": "gelu_pytorch_tanh",
+    "hidden_size": 768,
+    "intermediate_size": 3072,
+    "layer_norm_eps": 1e-06,
+    "max_position_embeddings": 64,
+    "model_type": "siglip_text_model",
+    "num_attention_heads": 12,
+    "num_hidden_layers": 12,
+    "projection_size": 768,
+    "torch_dtype": "float32",
+    "vocab_size": 256000
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.0",
+  "vision_config": {
+    "attention_dropout": 0.0,
+    "hidden_act": "gelu_pytorch_tanh",
+    "hidden_size": 768,
+    "image_size": 256,
+    "intermediate_size": 3072,
+    "layer_norm_eps": 1e-06,
+    "model_type": "siglip_vision_model",
+    "num_attention_heads": 12,
+    "num_channels": 3,
+    "num_hidden_layers": 12,
+    "patch_size": 16,
+    "torch_dtype": "float32"
+  }
+}
--- a/checkpoint-1656/model.safetensors
+++ b/checkpoint-1656/model.safetensors
--- a/checkpoint-1656/optimizer.pt
+++ b/checkpoint-1656/optimizer.pt
--- a/checkpoint-1656/preprocessor_config.json
+++ b/checkpoint-1656/preprocessor_config.json
@ -0,0 +1,24 @@
+{
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "SiglipImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "processor_class": "SiglipProcessor",
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 256,
+    "width": 256
+  }
+}
--- a/checkpoint-1656/rng_state.pth
+++ b/checkpoint-1656/rng_state.pth
--- a/checkpoint-1656/scheduler.pt
+++ b/checkpoint-1656/scheduler.pt
--- a/checkpoint-1656/trainer_state.json
+++ b/checkpoint-1656/trainer_state.json
@ -0,0 +1,75 @@
+{
+  "best_global_step": 1656,
+  "best_metric": 0.33448827266693115,
+  "best_model_checkpoint": "siglip2-finetune-full/checkpoint-1656",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 1656,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.6038647342995169,
+      "grad_norm": 5.860389709472656,
+      "learning_rate": 0.00016302382908792113,
+      "loss": 0.823,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7698523581165276,
+      "eval_loss": 0.577109158039093,
+      "eval_model_preparation_time": 0.0023,
+      "eval_runtime": 605.3087,
+      "eval_samples_per_second": 43.751,
+      "eval_steps_per_second": 5.47,
+      "step": 828
+    },
+    {
+      "epoch": 1.2077294685990339,
+      "grad_norm": 5.253721237182617,
+      "learning_rate": 0.00012193919474116682,
+      "loss": 0.6183,
+      "step": 1000
+    },
+    {
+      "epoch": 1.8115942028985508,
+      "grad_norm": 4.610513210296631,
+      "learning_rate": 8.08545603944125e-05,
+      "loss": 0.5136,
+      "step": 1500
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.8734282369822151,
+      "eval_loss": 0.33448827266693115,
+      "eval_model_preparation_time": 0.0023,
+      "eval_runtime": 598.3057,
+      "eval_samples_per_second": 44.263,
+      "eval_steps_per_second": 5.534,
+      "step": 1656
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2484,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.794356182313075e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
--- a/checkpoint-1656/training_args.bin
+++ b/checkpoint-1656/training_args.bin
--- a/checkpoint-2484/config.json
+++ b/checkpoint-2484/config.json
@ -0,0 +1,52 @@
+{
+  "architectures": [
+    "SiglipForImageClassification"
+  ],
+  "id2label": {
+    "0": "Anime Picture",
+    "1": "Hentai",
+    "2": "Normal",
+    "3": "Pornography",
+    "4": "Enticing or Sensual"
+  },
+  "initializer_factor": 1.0,
+  "label2id": {
+    "Anime Picture": 0,
+    "Enticing or Sensual": 4,
+    "Hentai": 1,
+    "Normal": 2,
+    "Pornography": 3
+  },
+  "model_type": "siglip",
+  "problem_type": "single_label_classification",
+  "text_config": {
+    "attention_dropout": 0.0,
+    "hidden_act": "gelu_pytorch_tanh",
+    "hidden_size": 768,
+    "intermediate_size": 3072,
+    "layer_norm_eps": 1e-06,
+    "max_position_embeddings": 64,
+    "model_type": "siglip_text_model",
+    "num_attention_heads": 12,
+    "num_hidden_layers": 12,
+    "projection_size": 768,
+    "torch_dtype": "float32",
+    "vocab_size": 256000
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.0",
+  "vision_config": {
+    "attention_dropout": 0.0,
+    "hidden_act": "gelu_pytorch_tanh",
+    "hidden_size": 768,
+    "image_size": 256,
+    "intermediate_size": 3072,
+    "layer_norm_eps": 1e-06,
+    "model_type": "siglip_vision_model",
+    "num_attention_heads": 12,
+    "num_channels": 3,
+    "num_hidden_layers": 12,
+    "patch_size": 16,
+    "torch_dtype": "float32"
+  }
+}
--- a/checkpoint-2484/model.safetensors
+++ b/checkpoint-2484/model.safetensors
--- a/checkpoint-2484/optimizer.pt
+++ b/checkpoint-2484/optimizer.pt
--- a/checkpoint-2484/preprocessor_config.json
+++ b/checkpoint-2484/preprocessor_config.json
@ -0,0 +1,24 @@
+{
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "SiglipImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "processor_class": "SiglipProcessor",
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 256,
+    "width": 256
+  }
+}
--- a/checkpoint-2484/rng_state.pth
+++ b/checkpoint-2484/rng_state.pth
--- a/checkpoint-2484/scheduler.pt
+++ b/checkpoint-2484/scheduler.pt
--- a/checkpoint-2484/trainer_state.json
+++ b/checkpoint-2484/trainer_state.json
@ -0,0 +1,92 @@
+{
+  "best_global_step": 2484,
+  "best_metric": 0.23217608034610748,
+  "best_model_checkpoint": "siglip2-finetune-full/checkpoint-2484",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 2484,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.6038647342995169,
+      "grad_norm": 5.860389709472656,
+      "learning_rate": 0.00016302382908792113,
+      "loss": 0.823,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7698523581165276,
+      "eval_loss": 0.577109158039093,
+      "eval_model_preparation_time": 0.0023,
+      "eval_runtime": 605.3087,
+      "eval_samples_per_second": 43.751,
+      "eval_steps_per_second": 5.47,
+      "step": 828
+    },
+    {
+      "epoch": 1.2077294685990339,
+      "grad_norm": 5.253721237182617,
+      "learning_rate": 0.00012193919474116682,
+      "loss": 0.6183,
+      "step": 1000
+    },
+    {
+      "epoch": 1.8115942028985508,
+      "grad_norm": 4.610513210296631,
+      "learning_rate": 8.08545603944125e-05,
+      "loss": 0.5136,
+      "step": 1500
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.8734282369822151,
+      "eval_loss": 0.33448827266693115,
+      "eval_model_preparation_time": 0.0023,
+      "eval_runtime": 598.3057,
+      "eval_samples_per_second": 44.263,
+      "eval_steps_per_second": 5.534,
+      "step": 1656
+    },
+    {
+      "epoch": 2.4154589371980677,
+      "grad_norm": 5.658606052398682,
+      "learning_rate": 3.976992604765818e-05,
+      "loss": 0.3894,
+      "step": 2000
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.9137182343390099,
+      "eval_loss": 0.23217608034610748,
+      "eval_model_preparation_time": 0.0023,
+      "eval_runtime": 604.5474,
+      "eval_samples_per_second": 43.806,
+      "eval_steps_per_second": 5.477,
+      "step": 2484
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2484,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8.691534273469612e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
--- a/checkpoint-2484/training_args.bin
+++ b/checkpoint-2484/training_args.bin
--- a/checkpoint-828/config.json
+++ b/checkpoint-828/config.json
@ -0,0 +1,52 @@
+{
+  "architectures": [
+    "SiglipForImageClassification"
+  ],
+  "id2label": {
+    "0": "Anime Picture",
+    "1": "Hentai",
+    "2": "Normal",
+    "3": "Pornography",
+    "4": "Enticing or Sensual"
+  },
+  "initializer_factor": 1.0,
+  "label2id": {
+    "Anime Picture": 0,
+    "Enticing or Sensual": 4,
+    "Hentai": 1,
+    "Normal": 2,
+    "Pornography": 3
+  },
+  "model_type": "siglip",
+  "problem_type": "single_label_classification",
+  "text_config": {
+    "attention_dropout": 0.0,
+    "hidden_act": "gelu_pytorch_tanh",
+    "hidden_size": 768,
+    "intermediate_size": 3072,
+    "layer_norm_eps": 1e-06,
+    "max_position_embeddings": 64,
+    "model_type": "siglip_text_model",
+    "num_attention_heads": 12,
+    "num_hidden_layers": 12,
+    "projection_size": 768,
+    "torch_dtype": "float32",
+    "vocab_size": 256000
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.0",
+  "vision_config": {
+    "attention_dropout": 0.0,
+    "hidden_act": "gelu_pytorch_tanh",
+    "hidden_size": 768,
+    "image_size": 256,
+    "intermediate_size": 3072,
+    "layer_norm_eps": 1e-06,
+    "model_type": "siglip_vision_model",
+    "num_attention_heads": 12,
+    "num_channels": 3,
+    "num_hidden_layers": 12,
+    "patch_size": 16,
+    "torch_dtype": "float32"
+  }
+}
--- a/checkpoint-828/model.safetensors
+++ b/checkpoint-828/model.safetensors
--- a/checkpoint-828/optimizer.pt
+++ b/checkpoint-828/optimizer.pt
--- a/checkpoint-828/preprocessor_config.json
+++ b/checkpoint-828/preprocessor_config.json
@ -0,0 +1,24 @@
+{
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "SiglipImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "processor_class": "SiglipProcessor",
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 256,
+    "width": 256
+  }
+}
--- a/checkpoint-828/rng_state.pth
+++ b/checkpoint-828/rng_state.pth
--- a/checkpoint-828/scheduler.pt
+++ b/checkpoint-828/scheduler.pt
--- a/checkpoint-828/trainer_state.json
+++ b/checkpoint-828/trainer_state.json
@ -0,0 +1,51 @@
+{
+  "best_global_step": 828,
+  "best_metric": 0.577109158039093,
+  "best_model_checkpoint": "siglip2-finetune-full/checkpoint-828",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 828,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.6038647342995169,
+      "grad_norm": 5.860389709472656,
+      "learning_rate": 0.00016302382908792113,
+      "loss": 0.823,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7698523581165276,
+      "eval_loss": 0.577109158039093,
+      "eval_model_preparation_time": 0.0023,
+      "eval_runtime": 605.3087,
+      "eval_samples_per_second": 43.751,
+      "eval_steps_per_second": 5.47,
+      "step": 828
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2484,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.8971780911565373e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
--- a/checkpoint-828/training_args.bin
+++ b/checkpoint-828/training_args.bin
--- a/config.json
+++ b/config.json
@ -0,0 +1,52 @@
+{
+  "architectures": [
+    "SiglipForImageClassification"
+  ],
+  "id2label": {
+    "0": "Anime Picture",
+    "1": "Hentai",
+    "2": "Normal",
+    "3": "Pornography",
+    "4": "Enticing or Sensual"
+  },
+  "initializer_factor": 1.0,
+  "label2id": {
+    "Anime Picture": 0,
+    "Enticing or Sensual": 4,
+    "Hentai": 1,
+    "Normal": 2,
+    "Pornography": 3
+  },
+  "model_type": "siglip",
+  "problem_type": "single_label_classification",
+  "text_config": {
+    "attention_dropout": 0.0,
+    "hidden_act": "gelu_pytorch_tanh",
+    "hidden_size": 768,
+    "intermediate_size": 3072,
+    "layer_norm_eps": 1e-06,
+    "max_position_embeddings": 64,
+    "model_type": "siglip_text_model",
+    "num_attention_heads": 12,
+    "num_hidden_layers": 12,
+    "projection_size": 768,
+    "torch_dtype": "float32",
+    "vocab_size": 256000
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.0",
+  "vision_config": {
+    "attention_dropout": 0.0,
+    "hidden_act": "gelu_pytorch_tanh",
+    "hidden_size": 768,
+    "image_size": 256,
+    "intermediate_size": 3072,
+    "layer_norm_eps": 1e-06,
+    "model_type": "siglip_vision_model",
+    "num_attention_heads": 12,
+    "num_channels": 3,
+    "num_hidden_layers": 12,
+    "patch_size": 16,
+    "torch_dtype": "float32"
+  }
+}
--- a/model.safetensors
+++ b/model.safetensors
--- a/preprocessor_config.json
+++ b/preprocessor_config.json
@ -0,0 +1,24 @@
+{
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "SiglipImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "processor_class": "SiglipProcessor",
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 256,
+    "width": 256
+  }
+}
--- a/training_args.bin
+++ b/training_args.bin