From 586431db13af396e27dbe826365d85cdaa5f68b1 Mon Sep 17 00:00:00 2001 From: xxl <505279206@qq.com> Date: Mon, 17 Mar 2025 10:04:14 +0800 Subject: [PATCH] first commit --- README.md | 46 ++++++++++++++++++++++++++++++++++++++++++++++ model.safetensors | 3 +++ pytorch_model.bin | 3 +++ 3 files changed, 52 insertions(+) create mode 100644 model.safetensors create mode 100644 pytorch_model.bin diff --git a/README.md b/README.md index 8884bab..a735015 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,52 @@ The EuroBERT family exhibits strong multilingual performance across domains and EuroBERT +### Suggested Fine-Tuning Hyperparameters + +If you plan to fine-tune this model on some downstream tasks, you can follow the hyperparameters we found in our paper. + +#### Base Hyperparameters (unchanged across tasks) + +- Warmup Ratio: 0.1 +- Learning Rate Scheduler: Linear +- Adam Beta 1: 0.9 +- Adam Beta 2: 0.95 +- Adam Epsilon: 1e-5 +- Weight Decay: 0.1 + +#### Task-Specific Learning Rates + +##### Sequence Classification: + +| Dataset | EuroBERT-210m | EuroBERT-610m | EuroBERT-2.1B | +|--------------------------------------|----------------|----------------|----------------| +| XNLI | 3.6e-05 | 3.6e-05 | 2.8e-05 | +| PAWS-X | 3.6e-05 | 4.6e-05 | 3.6e-05 | +| QAM | 3.6e-05 | 2.8e-05 | 2.2e-05 | +| AmazonReviews | 3.6e-05 | 2.8e-05 | 3.6e-05 | +| MassiveIntent | 6.0e-05 | 4.6e-05 | 2.8e-05 | +| CodeDefect | 3.6e-05 | 2.8e-05 | 1.3e-05 | +| CodeComplexity | 3.6e-05 | 3.6e-05 | 1.0e-05 | +| MathShepherd | 7.7e-05 | 2.8e-05 | 1.7e-05 | + +##### Sequence Regression: + +| Dataset | EuroBERT-210m | EuroBERT-610m | EuroBERT-2.1B | +|--------------------------|----------------|----------------|----------------| +| SeaHorse | 3.6e-05 | 3.6e-05 | 2.8e-05 | +| SummevalMultilingual | 3.6e-05 | 2.8e-05 | 3.6e-05 | +| WMT | 2.8e-05 | 2.8e-05 | 1.3e-05 | + +##### Retrieval: +| Dataset | EuroBERT-210m | EuroBERT-610m | EuroBERT-2.1B | +|-----------------------------------------|----------------|----------------|----------------| +| MIRACL | 4.6e-05 | 3.6e-05 | 2.8e-05 | +| MLDR | 2.8e-05 | 2.2e-05 | 4.6e-05 | +| CC-News | 4.6e-05 | 4.6e-05 | 3.6e-05 | +| Wikipedia | 2.8e-05 | 3.6e-05 | 2.8e-05 | +| CodeSearchNet | 4.6e-05 | 2.8e-05 | 3.6e-05 | +| CqaDupStackMath | 4.6e-05 | 2.8e-05 | 3.6e-05 | +| MathFormula | 1.7e-05 | 3.6e-05 | 3.6e-05 | ## License diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..09f97ef --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50e41b32655cbe62c63dac675b1a2a6625632ed1991243eed5d641d2f6952791 +size 3022529592 diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000..90f7aac --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9ec9ab592ee33fee99ffc6d4ca90800866630f1023cbda2f56cec88625601dc +size 3022568750