62 lines
48 KiB
Plaintext
62 lines
48 KiB
Plaintext
|
/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl_eval2/lib/python3.10/site-packages/bitsandbytes/cextension.py:34: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.
|
||
|
warn("The installed version of bitsandbytes was compiled without GPU support. "
|
||
|
/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl_eval2/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32
|
||
|
model path is /mnt/petrelfs/wangweiyun/workspace_cz/InternVL/internvl_chat_dev/work_dirs/internvl_chat_v2_5/internvl_chat_v2_5_internlm2_5_7b_dynamic_res_finetune_datav162
|
||
|
11/19 10:57:39 - OpenCompass - WARNING - No previous results to reuse!
|
||
|
11/19 10:57:39 - OpenCompass - INFO - Reusing experiements from 20241119_105739
|
||
|
11/19 10:57:39 - OpenCompass - INFO - Current exp folder: /mnt/petrelfs/wangweiyun/workspace_cz/InternVL/internvl_chat_dev/work_dirs/internvl_chat_v2_5/internvl_chat_v2_5_internlm2_5_7b_dynamic_res_finetune_datav162/20241119_105739
|
||
|
11/19 10:57:42 - OpenCompass - INFO - Partitioned into 256 tasks.
|
||
|
[ ] 0/256, elapsed: 0s, ETA:
[ ] 1/256, 0.0 task/s, elapsed: 356s, ETA: 90694s
[ ] 2/256, 0.0 task/s, elapsed: 375s, ETA: 47629s
[ ] 3/256, 0.0 task/s, elapsed: 375s, ETA: 31636s
[ ] 4/256, 0.0 task/s, elapsed: 381s, ETA: 24028s
[ ] 5/256, 0.0 task/s, elapsed: 391s, ETA: 19650s
[ ] 6/256, 0.0 task/s, elapsed: 394s, ETA: 16397s
[ ] 7/256, 0.0 task/s, elapsed: 395s, ETA: 14057s
[ ] 8/256, 0.0 task/s, elapsed: 395s, ETA: 12252s
[> ] 9/256, 0.0 task/s, elapsed: 396s, ETA: 10860s
[> ] 10/256, 0.0 task/s, elapsed: 397s, ETA: 9765s
[> ] 11/256, 0.0 task/s, elapsed: 397s, ETA: 8844s
[> ] 12/256, 0.0 task/s, elapsed: 397s, ETA: 8075s
[> ] 13/256, 0.0 task/s, elapsed: 400s, ETA: 7480s
[> ] 14/256, 0.0 task/s, elapsed: 403s, ETA: 6967s
[> ] 15/256, 0.0 task/s, elapsed: 405s, ETA: 6509s
[> ] 16/256, 0.0 task/s, elapsed: 406s, ETA: 6090s
[> ] 17/256, 0.0 task/s, elapsed: 407s, ETA: 5723s
[>> ] 18/256, 0.0 task/s, elapsed: 408s, ETA: 5390s
[>> ] 19/256, 0.0 task/s, elapsed: 408s, ETA: 5085s
[>> ] 20/256, 0.0 task/s, elapsed: 408s, ETA: 4811s
[>> ] 21/256, 0.1 task/s, elapsed: 410s, ETA: 4584s
[>> ] 22/256, 0.1 task/s, elapsed: 411s, ETA: 4371s
[>> ] 23/256, 0.1 task/s, elapsed: 417s, ETA: 4229s
[>> ] 24/256, 0.1 task/s, elapsed: 418s, ETA: 4037s
[>> ] 25/256, 0.1 task/s, elapsed: 419s, ETA: 3868s
[>>> ] 26/256, 0.1 task/s, elapsed: 420s, ETA: 3712s
[>>> ] 27/256, 0.1 task/s, elapsed: 420s, ETA: 3561s
[>>> ] 28/256, 0.1 task/s, elapsed: 426s, ETA: 3473s
[>>> ] 29/256, 0.1 task/s, elapsed: 427s, ETA: 3343s
[>>> ] 30/256, 0.1 task/s, elapsed: 431s, ETA: 3250s
[>>> ] 31/256, 0.1 task/s, elapsed: 433s, ETA: 3143s
[>>> ] 32/256, 0.1 task/s, elapsed: 434s, ETA: 3035s
[>>> ] 33/256, 0.1 task/s, elapsed: 435s, ETA: 2941s
[>>> ] 34/256, 0.1 task/s, elapsed: 438s, ETA: 2858s
[>>>> ] 35/256, 0.1 task/s, elapsed: 438s, ETA: 2764s
[>>>> ] 36/256, 0.1 task/s, elapsed: 438s, ETA: 2675s
[>>>> ] 37/256, 0.1 task/s, elapsed: 441s, ETA: 2609s
[>>>> ] 38/256, 0.1 task/s, elapsed: 442s, ETA: 2536s
[>>>> ] 39/256, 0.1 task/s, elapsed: 442s, ETA: 2460s
[>>>> ] 40/256, 0.1 task/s, elapsed: 443s, ETA: 2394s
[>>>> ] 41/256, 0.1 task/s, elapsed: 443s, ETA: 2325s
[>>>> ] 42/256, 0.1 task/s, elapsed: 446s, ETA: 2275s
[>>>>> ] 43/256, 0.1 task/s, elapsed: 447s, ETA: 2212s
[>>>>> ] 44/256, 0.1 task/s, elapsed: 448s, ETA: 2159s
[>>>>> ] 45/256, 0.1 task/s, elapsed: 450s, ETA: 2108s
[>>>>> ] 46/256, 0.1 task/s, elapsed: 450s, ETA: 2053s
[>>>>> ] 47/256, 0.1 task/s, elapsed: 450s, ETA: 2002s
[>>>>> ] 48/256, 0.1 task/s, elapsed: 451s, ETA: 1954s
[>>>>> ] 49/256, 0.1 task/s, elapsed: 453s, ETA: 1915s
[>>>>> ] 50/256, 0.1 task/s, elapsed: 457s, ETA: 1882s
[>>>>>
|
||
|
11/19 11:10:45 - OpenCompass - INFO - Partitioned into 287 tasks.
|
||
|
[ ] 0/287, elapsed: 0s, ETA:
[ ] 1/287, 0.0 task/s, elapsed: 55s, ETA: 15790s
[ ] 2/287, 0.0 task/s, elapsed: 55s, ETA: 7869s
[ ] 3/287, 0.1 task/s, elapsed: 55s, ETA: 5229s
[ ] 4/287, 0.1 task/s, elapsed: 55s, ETA: 3909s
[ ] 5/287, 0.1 task/s, elapsed: 55s, ETA: 3124s
[ ] 6/287, 0.1 task/s, elapsed: 55s, ETA: 2594s
[ ] 7/287, 0.1 task/s, elapsed: 55s, ETA: 2216s
[ ] 8/287, 0.1 task/s, elapsed: 55s, ETA: 1933s
[> ] 9/287, 0.2 task/s, elapsed: 55s, ETA: 1712s
[> ] 10/287, 0.2 task/s, elapsed: 55s, ETA: 1536s
[> ] 11/287, 0.2 task/s, elapsed: 55s, ETA: 1392s
[> ] 12/287, 0.2 task/s, elapsed: 55s, ETA: 1271s
[> ] 13/287, 0.2 task/s, elapsed: 55s, ETA: 1169s
[> ] 14/287, 0.3 task/s, elapsed: 56s, ETA: 1082s
[> ] 15/287, 0.3 task/s, elapsed: 56s, ETA: 1007s
[> ] 16/287, 0.3 task/s, elapsed: 56s, ETA: 941s
[> ] 17/287, 0.3 task/s, elapsed: 56s, ETA: 882s
[> ] 18/287, 0.3 task/s, elapsed: 56s, ETA: 830s
[>> ] 19/287, 0.3 task/s, elapsed: 56s, ETA: 784s
[>> ] 20/287, 0.4 task/s, elapsed: 56s, ETA: 742s
[>> ] 21/287, 0.4 task/s, elapsed: 56s, ETA: 704s
[>> ] 22/287, 0.4 task/s, elapsed: 56s, ETA: 670s
[>> ] 23/287, 0.4 task/s, elapsed: 56s, ETA: 638s
[>> ] 24/287, 0.4 task/s, elapsed: 56s, ETA: 610s
[>> ] 25/287, 0.4 task/s, elapsed: 56s, ETA: 583s
[>> ] 26/287, 0.5 task/s, elapsed: 56s, ETA: 559s
[>> ] 27/287, 0.5 task/s, elapsed: 56s, ETA: 536s
[>>> ] 28/287, 0.5 task/s, elapsed: 56s, ETA: 515s
[>>> ] 29/287, 0.5 task/s, elapsed: 56s, ETA: 495s
[>>> ] 30/287, 0.5 task/s, elapsed: 56s, ETA: 477s
[>>> ] 31/287, 0.6 task/s, elapsed: 56s, ETA: 460s
[>>> ] 32/287, 0.6 task/s, elapsed: 56s, ETA: 444s
[>>> ] 33/287, 0.6 task/s, elapsed: 56s, ETA: 429s
[>>> ] 34/287, 0.6 task/s, elapsed: 56s, ETA: 415s
[>>> ] 35/287, 0.6 task/s, elapsed: 56s, ETA: 402s
[>>> ] 36/287, 0.6 task/s, elapsed: 56s, ETA: 389s
[>>> ] 37/287, 0.7 task/s, elapsed: 56s, ETA: 378s
[>>>> ] 38/287, 0.7 task/s, elapsed: 56s, ETA: 366s
[>>>> ] 39/287, 0.7 task/s, elapsed: 56s, ETA: 357s
[>>>> ] 40/287, 0.7 task/s, elapsed: 56s, ETA: 347s
[>>>> ] 41/287, 0.7 task/s, elapsed: 58s, ETA: 350s
[>>>> ] 42/287, 0.7 task/s, elapsed: 58s, ETA: 341s
[>>>> ] 43/287, 0.7 task/s, elapsed: 58s, ETA: 331s
[>>>> ] 44/287, 0.8 task/s, elapsed: 58s, ETA: 323s
[>>>> ] 45/287, 0.8 task/s, elapsed: 59s, ETA: 315s
[>>>> ] 46/287, 0.8 task/s, elapsed: 59s, ETA: 307s
[>>>>> ] 47/287, 0.8 task/s, elapsed: 59s, ETA: 299s
[>>>>> ] 48/287, 0.8 task/s, elapsed: 59s, ETA: 293s
[>>>>> ] 49/287, 0.8 task/s, elapsed: 59s, ETA: 286s
[>>>>> ] 50/287, 0.9 task/s, elapsed: 59s, ETA: 279s
[>>>>>
|
||
|
dataset version metric mode internvl-chat-20b
|
||
|
---------------------------- --------- ---------------------------- ------ -------------------
|
||
|
mmlu - naive_average gen 74.61
|
||
|
mmlu_pro - - - -
|
||
|
cmmlu - naive_average gen 78.70
|
||
|
ceval - naive_average gen 79.74
|
||
|
agieval - - - -
|
||
|
GaokaoBench - weighted_average gen 77.29
|
||
|
GPQA_extended - - - -
|
||
|
GPQA_main - - - -
|
||
|
GPQA_diamond - - - -
|
||
|
ARC-c - - - -
|
||
|
truthfulqa - - - -
|
||
|
triviaqa 2121ce score gen 63.36
|
||
|
triviaqa_wiki_1shot - - - -
|
||
|
nq 3dcea1 score gen 29.36
|
||
|
C3 8c358f accuracy gen 94.68
|
||
|
race-high 9a54b6 accuracy gen 90.79
|
||
|
flores_100 - - - -
|
||
|
winogrande b36770 accuracy gen 83.50
|
||
|
hellaswag e42710 accuracy gen 94.13
|
||
|
bbh - naive_average gen 73.43
|
||
|
gsm8k 1d7fe4 accuracy gen 77.79
|
||
|
math 393424 accuracy gen 49.88
|
||
|
TheoremQA 6f0af8 score gen 23.75
|
||
|
MathBench - - - -
|
||
|
openai_humaneval 8e312c humaneval_pass@1 gen 75.00
|
||
|
humaneval_plus - - - -
|
||
|
humanevalx - - - -
|
||
|
sanitized_mbpp a447ff score gen 68.48
|
||
|
mbpp_plus - - - -
|
||
|
mbpp_cn 6fb572 score gen 55.20
|
||
|
leval - - - -
|
||
|
leval_closed - - - -
|
||
|
leval_open - - - -
|
||
|
longbench - - - -
|
||
|
longbench_single-document-qa - - - -
|
||
|
longbench_multi-document-qa - - - -
|
||
|
longbench_summarization - - - -
|
||
|
longbench_few-shot-learning - - - -
|
||
|
longbench_synthetic-tasks - - - -
|
||
|
longbench_code-completion - - - -
|
||
|
teval - - - -
|
||
|
teval_zh - - - -
|
||
|
IFEval 3321a3 Prompt-level-strict-accuracy gen 50.46
|
||
|
IFEval 3321a3 Inst-level-strict-accuracy gen 60.79
|
||
|
IFEval 3321a3 Prompt-level-loose-accuracy gen 53.42
|
||
|
IFEval 3321a3 Inst-level-loose-accuracy gen 63.67
|
||
|
11/19 11:15:09 - OpenCompass - INFO - write summary to /mnt/petrelfs/wangweiyun/workspace_cz/InternVL/internvl_chat_dev/work_dirs/internvl_chat_v2_5/internvl_chat_v2_5_internlm2_5_7b_dynamic_res_finetune_datav162/20241119_105739/summary/summary_20241119_105739.txt
|
||
|
11/19 11:15:09 - OpenCompass - INFO - write csv to /mnt/petrelfs/wangweiyun/workspace_cz/InternVL/internvl_chat_dev/work_dirs/internvl_chat_v2_5/internvl_chat_v2_5_internlm2_5_7b_dynamic_res_finetune_datav162/20241119_105739/summary/summary_20241119_105739.csv
|