Go to file
Charles95 755b710565 first commit 2024-09-12 07:30:00 +00:00
1_Pooling first commit 2024-09-12 07:30:00 +00:00
2_Dense first commit 2024-09-12 07:30:00 +00:00
.gitattributes first commit 2024-09-12 07:30:00 +00:00
README.md first commit 2024-09-12 07:30:00 +00:00
config.json first commit 2024-09-12 07:30:00 +00:00
config_sentence_transformers.json first commit 2024-09-12 07:30:00 +00:00
modules.json first commit 2024-09-12 07:30:00 +00:00
pytorch_model.bin first commit 2024-09-12 07:30:00 +00:00
sentence_bert_config.json first commit 2024-09-12 07:30:00 +00:00
special_tokens_map.json first commit 2024-09-12 07:30:00 +00:00
tokenizer.json first commit 2024-09-12 07:30:00 +00:00
tokenizer_config.json first commit 2024-09-12 07:30:00 +00:00
vocab.txt first commit 2024-09-12 07:30:00 +00:00

README.md

tags model-index license library_name
mteb
name results
conan-embedding
task dataset metrics
type
STS
type name config split revision
C-MTEB/AFQMC MTEB AFQMC default validation None
type value
cos_sim_pearson 56.613572467148856
type value
cos_sim_spearman 60.66446211824284
type value
euclidean_pearson 58.42080485872613
type value
euclidean_spearman 59.82750030458164
type value
manhattan_pearson 58.39885271199772
type value
manhattan_spearman 59.817749720366734
task dataset metrics
type
STS
type name config split revision
C-MTEB/ATEC MTEB ATEC default test None
type value
cos_sim_pearson 56.60530380552331
type value
cos_sim_spearman 58.63822441736707
type value
euclidean_pearson 62.18551665180664
type value
euclidean_spearman 58.23168804495912
type value
manhattan_pearson 62.17191480770053
type value
manhattan_spearman 58.22556219601401
task dataset metrics
type
Classification
type name config split revision
mteb/amazon_reviews_multi MTEB AmazonReviewsClassification (zh) zh test 1399c76144fd37290681b995c656ef9b2e06e26d
type value
accuracy 50.308
type value
f1 46.927458607895126
task dataset metrics
type
STS
type name config split revision
C-MTEB/BQ MTEB BQ default test None
type value
cos_sim_pearson 72.6472074172711
type value
cos_sim_spearman 74.50748447236577
type value
euclidean_pearson 72.51833296451854
type value
euclidean_spearman 73.9898922606105
type value
manhattan_pearson 72.50184948939338
type value
manhattan_spearman 73.97797921509638
task dataset metrics
type
Clustering
type name config split revision
C-MTEB/CLSClusteringP2P MTEB CLSClusteringP2P default test None
type value
v_measure 60.63545326048343
task dataset metrics
type
Clustering
type name config split revision
C-MTEB/CLSClusteringS2S MTEB CLSClusteringS2S default test None
type value
v_measure 52.64834762325994
task dataset metrics
type
Reranking
type name config split revision
C-MTEB/CMedQAv1-reranking MTEB CMedQAv1 default test None
type value
map 91.38528814655234
type value
mrr 93.35857142857144
task dataset metrics
type
Reranking
type name config split revision
C-MTEB/CMedQAv2-reranking MTEB CMedQAv2 default test None
type value
map 89.72084678877096
type value
mrr 91.74380952380953
task dataset metrics
type
Retrieval
type name config split revision
C-MTEB/CmedqaRetrieval MTEB CmedqaRetrieval default dev None
type value
map_at_1 26.987
type value
map_at_10 40.675
type value
map_at_100 42.495
type value
map_at_1000 42.596000000000004
type value
map_at_3 36.195
type value
map_at_5 38.704
type value
mrr_at_1 41.21
type value
mrr_at_10 49.816
type value
mrr_at_100 50.743
type value
mrr_at_1000 50.77700000000001
type value
mrr_at_3 47.312
type value
mrr_at_5 48.699999999999996
type value
ndcg_at_1 41.21
type value
ndcg_at_10 47.606
type value
ndcg_at_100 54.457
type value
ndcg_at_1000 56.16100000000001
type value
ndcg_at_3 42.108000000000004
type value
ndcg_at_5 44.393
type value
precision_at_1 41.21
type value
precision_at_10 10.593
type value
precision_at_100 1.609
type value
precision_at_1000 0.183
type value
precision_at_3 23.881
type value
precision_at_5 17.339
type value
recall_at_1 26.987
type value
recall_at_10 58.875
type value
recall_at_100 87.023
type value
recall_at_1000 98.328
type value
recall_at_3 42.265
type value
recall_at_5 49.334
task dataset metrics
type
PairClassification
type name config split revision
C-MTEB/CMNLI MTEB Cmnli default validation None
type value
cos_sim_accuracy 85.91701743836441
type value
cos_sim_ap 92.53650618807644
type value
cos_sim_f1 86.80265975431082
type value
cos_sim_precision 83.79025239338556
type value
cos_sim_recall 90.039747486556
type value
dot_accuracy 77.17378232110643
type value
dot_ap 85.40244368166546
type value
dot_f1 79.03038001481951
type value
dot_precision 72.20502901353966
type value
dot_recall 87.2808043020809
type value
euclidean_accuracy 84.65423932651834
type value
euclidean_ap 91.47775530034588
type value
euclidean_f1 85.64471499723298
type value
euclidean_precision 81.31567885666246
type value
euclidean_recall 90.46060322656068
type value
manhattan_accuracy 84.58208057726999
type value
manhattan_ap 91.46228709402014
type value
manhattan_f1 85.6631626034444
type value
manhattan_precision 82.10075026795283
type value
manhattan_recall 89.5487491232172
type value
max_accuracy 85.91701743836441
type value
max_ap 92.53650618807644
type value
max_f1 86.80265975431082
task dataset metrics
type
Retrieval
type name config split revision
C-MTEB/CovidRetrieval MTEB CovidRetrieval default dev None
type value
map_at_1 83.693
type value
map_at_10 90.098
type value
map_at_100 90.145
type value
map_at_1000 90.146
type value
map_at_3 89.445
type value
map_at_5 89.935
type value
mrr_at_1 83.878
type value
mrr_at_10 90.007
type value
mrr_at_100 90.045
type value
mrr_at_1000 90.046
type value
mrr_at_3 89.34
type value
mrr_at_5 89.835
type value
ndcg_at_1 84.089
type value
ndcg_at_10 92.351
type value
ndcg_at_100 92.54599999999999
type value
ndcg_at_1000 92.561
type value
ndcg_at_3 91.15299999999999
type value
ndcg_at_5 91.968
type value
precision_at_1 84.089
type value
precision_at_10 10.011000000000001
type value
precision_at_100 1.009
type value
precision_at_1000 0.101
type value
precision_at_3 32.28
type value
precision_at_5 19.789
type value
recall_at_1 83.693
type value
recall_at_10 99.05199999999999
type value
recall_at_100 99.895
type value
recall_at_1000 100
type value
recall_at_3 95.917
type value
recall_at_5 97.893
task dataset metrics
type
Retrieval
type name config split revision
C-MTEB/DuRetrieval MTEB DuRetrieval default dev None
type value
map_at_1 26.924
type value
map_at_10 81.392
type value
map_at_100 84.209
type value
map_at_1000 84.237
type value
map_at_3 56.998000000000005
type value
map_at_5 71.40100000000001
type value
mrr_at_1 91.75
type value
mrr_at_10 94.45
type value
mrr_at_100 94.503
type value
mrr_at_1000 94.505
type value
mrr_at_3 94.258
type value
mrr_at_5 94.381
type value
ndcg_at_1 91.75
type value
ndcg_at_10 88.53
type value
ndcg_at_100 91.13900000000001
type value
ndcg_at_1000 91.387
type value
ndcg_at_3 87.925
type value
ndcg_at_5 86.461
type value
precision_at_1 91.75
type value
precision_at_10 42.05
type value
precision_at_100 4.827
type value
precision_at_1000 0.48900000000000005
type value
precision_at_3 78.55
type value
precision_at_5 65.82000000000001
type value
recall_at_1 26.924
type value
recall_at_10 89.338
type value
recall_at_100 97.856
type value
recall_at_1000 99.11
type value
recall_at_3 59.202999999999996
type value
recall_at_5 75.642
task dataset metrics
type
Retrieval
type name config split revision
C-MTEB/EcomRetrieval MTEB EcomRetrieval default dev None
type value
map_at_1 54.800000000000004
type value
map_at_10 65.613
type value
map_at_100 66.185
type value
map_at_1000 66.191
type value
map_at_3 62.8
type value
map_at_5 64.535
type value
mrr_at_1 54.800000000000004
type value
mrr_at_10 65.613
type value
mrr_at_100 66.185
type value
mrr_at_1000 66.191
type value
mrr_at_3 62.8
type value
mrr_at_5 64.535
type value
ndcg_at_1 54.800000000000004
type value
ndcg_at_10 70.991
type value
ndcg_at_100 73.434
type value
ndcg_at_1000 73.587
type value
ndcg_at_3 65.324
type value
ndcg_at_5 68.431
type value
precision_at_1 54.800000000000004
type value
precision_at_10 8.790000000000001
type value
precision_at_100 0.9860000000000001
type value
precision_at_1000 0.1
type value
precision_at_3 24.2
type value
precision_at_5 16.02
type value
recall_at_1 54.800000000000004
type value
recall_at_10 87.9
type value
recall_at_100 98.6
type value
recall_at_1000 99.8
type value
recall_at_3 72.6
type value
recall_at_5 80.10000000000001
task dataset metrics
type
Classification
type name config split revision
C-MTEB/IFlyTek-classification MTEB IFlyTek default validation None
type value
accuracy 51.94305502116199
type value
f1 39.82197338426721
task dataset metrics
type
Classification
type name config split revision
C-MTEB/JDReview-classification MTEB JDReview default test None
type value
accuracy 90.31894934333957
type value
ap 63.89821836499594
type value
f1 85.93687177603624
task dataset metrics
type
STS
type name config split revision
C-MTEB/LCQMC MTEB LCQMC default test None
type value
cos_sim_pearson 73.18906216730208
type value
cos_sim_spearman 79.44570226735877
type value
euclidean_pearson 78.8105072242798
type value
euclidean_spearman 79.15605680863212
type value
manhattan_pearson 78.80576507484064
type value
manhattan_spearman 79.14625534068364
task dataset metrics
type
Reranking
type name config split revision
C-MTEB/Mmarco-reranking MTEB MMarcoReranking default dev None
type value
map 41.58107192600853
type value
mrr 41.37063492063492
task dataset metrics
type
Retrieval
type name config split revision
C-MTEB/MMarcoRetrieval MTEB MMarcoRetrieval default dev None
type value
map_at_1 68.33
type value
map_at_10 78.261
type value
map_at_100 78.522
type value
map_at_1000 78.527
type value
map_at_3 76.236
type value
map_at_5 77.557
type value
mrr_at_1 70.602
type value
mrr_at_10 78.779
type value
mrr_at_100 79.00500000000001
type value
mrr_at_1000 79.01
type value
mrr_at_3 77.037
type value
mrr_at_5 78.157
type value
ndcg_at_1 70.602
type value
ndcg_at_10 82.254
type value
ndcg_at_100 83.319
type value
ndcg_at_1000 83.449
type value
ndcg_at_3 78.46
type value
ndcg_at_5 80.679
type value
precision_at_1 70.602
type value
precision_at_10 9.989
type value
precision_at_100 1.05
type value
precision_at_1000 0.106
type value
precision_at_3 29.598999999999997
type value
precision_at_5 18.948
type value
recall_at_1 68.33
type value
recall_at_10 94.00800000000001
type value
recall_at_100 98.589
type value
recall_at_1000 99.60799999999999
type value
recall_at_3 84.057
type value
recall_at_5 89.32900000000001
task dataset metrics
type
Classification
type name config split revision
mteb/amazon_massive_intent MTEB MassiveIntentClassification (zh-CN) zh-CN test 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
type value
accuracy 78.13718897108272
type value
f1 74.07613180855328
task dataset metrics
type
Classification
type name config split revision
mteb/amazon_massive_scenario MTEB MassiveScenarioClassification (zh-CN) zh-CN test 7d571f92784cd94a019292a1f45445077d0ef634
type value
accuracy 86.20040349697376
type value
f1 85.05282136519973
task dataset metrics
type
Retrieval
type name config split revision
C-MTEB/MedicalRetrieval MTEB MedicalRetrieval default dev None
type value
map_at_1 56.8
type value
map_at_10 64.199
type value
map_at_100 64.89
type value
map_at_1000 64.917
type value
map_at_3 62.383
type value
map_at_5 63.378
type value
mrr_at_1 56.8
type value
mrr_at_10 64.199
type value
mrr_at_100 64.89
type value
mrr_at_1000 64.917
type value
mrr_at_3 62.383
type value
mrr_at_5 63.378
type value
ndcg_at_1 56.8
type value
ndcg_at_10 67.944
type value
ndcg_at_100 71.286
type value
ndcg_at_1000 71.879
type value
ndcg_at_3 64.163
type value
ndcg_at_5 65.96600000000001
type value
precision_at_1 56.8
type value
precision_at_10 7.9799999999999995
type value
precision_at_100 0.954
type value
precision_at_1000 0.1
type value
precision_at_3 23.1
type value
precision_at_5 14.74
type value
recall_at_1 56.8
type value
recall_at_10 79.80000000000001
type value
recall_at_100 95.39999999999999
type value
recall_at_1000 99.8
type value
recall_at_3 69.3
type value
recall_at_5 73.7
task dataset metrics
type
Classification
type name config split revision
C-MTEB/MultilingualSentiment-classification MTEB MultilingualSentiment default validation None
type value
accuracy 78.57666666666667
type value
f1 78.23373528202681
task dataset metrics
type
PairClassification
type name config split revision
C-MTEB/OCNLI MTEB Ocnli default validation None
type value
cos_sim_accuracy 85.43584190579317
type value
cos_sim_ap 90.76665640338129
type value
cos_sim_f1 86.5021770682148
type value
cos_sim_precision 79.82142857142858
type value
cos_sim_recall 94.40337909186906
type value
dot_accuracy 78.66811044937737
type value
dot_ap 85.84084363880804
type value
dot_f1 80.10075566750629
type value
dot_precision 76.58959537572254
type value
dot_recall 83.9493136219641
type value
euclidean_accuracy 84.46128857606931
type value
euclidean_ap 88.62351100230491
type value
euclidean_f1 85.7709469509172
type value
euclidean_precision 80.8411214953271
type value
euclidean_recall 91.34107708553326
type value
manhattan_accuracy 84.51543042772063
type value
manhattan_ap 88.53975607870393
type value
manhattan_f1 85.75697211155378
type value
manhattan_precision 81.14985862393968
type value
manhattan_recall 90.91869060190075
type value
max_accuracy 85.43584190579317
type value
max_ap 90.76665640338129
type value
max_f1 86.5021770682148
task dataset metrics
type
Classification
type name config split revision
C-MTEB/OnlineShopping-classification MTEB OnlineShopping default test None
type value
accuracy 95.06999999999998
type value
ap 93.45104559324996
type value
f1 95.06036329426092
task dataset metrics
type
STS
type name config split revision
C-MTEB/PAWSX MTEB PAWSX default test None
type value
cos_sim_pearson 40.01998290519605
type value
cos_sim_spearman 46.5989769986853
type value
euclidean_pearson 45.37905883182924
type value
euclidean_spearman 46.22213849806378
type value
manhattan_pearson 45.40925124776211
type value
manhattan_spearman 46.250705124226386
task dataset metrics
type
STS
type name config split revision
C-MTEB/QBQTC MTEB QBQTC default test None
type value
cos_sim_pearson 42.719516197112526
type value
cos_sim_spearman 44.57507789581106
type value
euclidean_pearson 35.73062264160721
type value
euclidean_spearman 40.473523909913695
type value
manhattan_pearson 35.69868964086357
type value
manhattan_spearman 40.46349925372903
task dataset metrics
type
STS
type name config split revision
mteb/sts22-crosslingual-sts MTEB STS22 (zh) zh test 6d1ba47164174a496b7fa5d3569dae26a6813b80
type value
cos_sim_pearson 62.340118285801104
type value
cos_sim_spearman 67.72781908620632
type value
euclidean_pearson 63.161965746091596
type value
euclidean_spearman 67.36825684340769
type value
manhattan_pearson 63.089863788261425
type value
manhattan_spearman 67.40868898995384
task dataset metrics
type
STS
type name config split revision
C-MTEB/STSB MTEB STSB default test None
type value
cos_sim_pearson 79.1646360962365
type value
cos_sim_spearman 81.24426700767087
type value
euclidean_pearson 79.43826409936123
type value
euclidean_spearman 79.71787965300125
type value
manhattan_pearson 79.43377784961737
type value
manhattan_spearman 79.69348376886967
task dataset metrics
type
Reranking
type name config split revision
C-MTEB/T2Reranking MTEB T2Reranking default dev None
type value
map 68.35595092507496
type value
mrr 79.00244892585788
task dataset metrics
type
Retrieval
type name config split revision
C-MTEB/T2Retrieval MTEB T2Retrieval default dev None
type value
map_at_1 26.588
type value
map_at_10 75.327
type value
map_at_100 79.095
type value
map_at_1000 79.163
type value
map_at_3 52.637
type value
map_at_5 64.802
type value
mrr_at_1 88.103
type value
mrr_at_10 91.29899999999999
type value
mrr_at_100 91.408
type value
mrr_at_1000 91.411
type value
mrr_at_3 90.801
type value
mrr_at_5 91.12700000000001
type value
ndcg_at_1 88.103
type value
ndcg_at_10 83.314
type value
ndcg_at_100 87.201
type value
ndcg_at_1000 87.83999999999999
type value
ndcg_at_3 84.408
type value
ndcg_at_5 83.078
type value
precision_at_1 88.103
type value
precision_at_10 41.638999999999996
type value
precision_at_100 5.006
type value
precision_at_1000 0.516
type value
precision_at_3 73.942
type value
precision_at_5 62.056
type value
recall_at_1 26.588
type value
recall_at_10 82.819
type value
recall_at_100 95.334
type value
recall_at_1000 98.51299999999999
type value
recall_at_3 54.74
type value
recall_at_5 68.864
task dataset metrics
type
Classification
type name config split revision
C-MTEB/TNews-classification MTEB TNews default validation None
type value
accuracy 55.029
type value
f1 53.043617905026764
task dataset metrics
type
Clustering
type name config split revision
C-MTEB/ThuNewsClusteringP2P MTEB ThuNewsClusteringP2P default test None
type value
v_measure 77.83675116835911
task dataset metrics
type
Clustering
type name config split revision
C-MTEB/ThuNewsClusteringS2S MTEB ThuNewsClusteringS2S default test None
type value
v_measure 74.19701455865277
task dataset metrics
type
Retrieval
type name config split revision
C-MTEB/VideoRetrieval MTEB VideoRetrieval default dev None
type value
map_at_1 64.7
type value
map_at_10 75.593
type value
map_at_100 75.863
type value
map_at_1000 75.863
type value
map_at_3 73.63300000000001
type value
map_at_5 74.923
type value
mrr_at_1 64.7
type value
mrr_at_10 75.593
type value
mrr_at_100 75.863
type value
mrr_at_1000 75.863
type value
mrr_at_3 73.63300000000001
type value
mrr_at_5 74.923
type value
ndcg_at_1 64.7
type value
ndcg_at_10 80.399
type value
ndcg_at_100 81.517
type value
ndcg_at_1000 81.517
type value
ndcg_at_3 76.504
type value
ndcg_at_5 78.79899999999999
type value
precision_at_1 64.7
type value
precision_at_10 9.520000000000001
type value
precision_at_100 1
type value
precision_at_1000 0.1
type value
precision_at_3 28.266999999999996
type value
precision_at_5 18.060000000000002
type value
recall_at_1 64.7
type value
recall_at_10 95.19999999999999
type value
recall_at_100 100
type value
recall_at_1000 100
type value
recall_at_3 84.8
type value
recall_at_5 90.3
task dataset metrics
type
Classification
type name config split revision
C-MTEB/waimai-classification MTEB Waimai default test None
type value
accuracy 89.69999999999999
type value
ap 75.91371640164184
type value
f1 88.34067777698694
cc-by-nc-4.0 sentence-transformers

Conan-embedding-v1

Performance

Model Average CLS Clustering Reranking Retrieval STS Pair_CLS
gte-Qwen2-7B-instruct 72.05 75.09 66.06 68.92 76.03 65.33 87.48
xiaobu-embedding-v2 72.43 74.67 65.17 72.58 76.5 64.53 91.87
Conan-embedding-v1 72.62 75.03 66.33 72.76 76.67 64.18 91.66

Methods and Training Detials

Please refer to our technical report.

Citation

If you find our models / papers useful in your research, please consider giving ❤️ and citations. Thanks!

@misc{li2024conanembeddinggeneraltextembedding,
  title={Conan-embedding: General Text Embedding with More and Better Negative Samples}, 
  author={Shiyu Li and Yang Tang and Shizhe Chen and Xi Chen},
  year={2024},
  eprint={2408.15710},
  archivePrefix={arXiv},
  primaryClass={cs.CL},
  url={https://arxiv.org/abs/2408.15710}, 
}

About

Created by the Tencent BAC Group. All rights reserved.

License

This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.