first commit
This commit is contained in:
parent
f970a9f5d7
commit
7c5a442811
71
README.md
71
README.md
|
@ -1,3 +1,70 @@
|
|||
# PDF-Extract-Kit_a13672718289465344129244
|
||||
### Install Git LFS
|
||||
Before you begin, make sure Git Large File Storage (Git LFS) is installed on your system. Install it using the following command:
|
||||
|
||||
PDF-Extract-Kit
|
||||
```bash
|
||||
git lfs install
|
||||
```
|
||||
|
||||
### Download the Model from Hugging Face
|
||||
To download the `PDF-Extract-Kit` model from Hugging Face, use the following command:
|
||||
|
||||
```bash
|
||||
git lfs clone https://huggingface.co/opendatalab/PDF-Extract-Kit
|
||||
```
|
||||
|
||||
Ensure that Git LFS is enabled during the clone to properly download all large files.
|
||||
|
||||
|
||||
|
||||
### Download the Model from ModelScope
|
||||
|
||||
#### SDK Download
|
||||
|
||||
```bash
|
||||
# First, install the ModelScope library using pip:
|
||||
pip install modelscope
|
||||
```
|
||||
|
||||
```python
|
||||
# Use the following Python code to download the model using the ModelScope SDK:
|
||||
from modelscope import snapshot_download
|
||||
model_dir = snapshot_download('opendatalab/PDF-Extract-Kit')
|
||||
```
|
||||
|
||||
#### Git Download
|
||||
Alternatively, you can use Git to clone the model repository from ModelScope:
|
||||
|
||||
```bash
|
||||
git clone https://www.modelscope.cn/opendatalab/PDF-Extract-Kit.git
|
||||
```
|
||||
|
||||
|
||||
Put [model files]() here:
|
||||
|
||||
```
|
||||
./
|
||||
├── Layout
|
||||
│ ├── config.json
|
||||
│ └── model_final.pth
|
||||
├── MFD
|
||||
│ └── weights.pt
|
||||
├── MFR
|
||||
│ └── UniMERNet
|
||||
│ ├── config.json
|
||||
│ ├── preprocessor_config.json
|
||||
│ ├── pytorch_model.bin
|
||||
│ ├── README.md
|
||||
│ ├── tokenizer_config.json
|
||||
│ └── tokenizer.json
|
||||
├── TabRec
|
||||
│ └── StructEqTable
|
||||
│ ├── config.json
|
||||
│ ├──generation_config.json
|
||||
│ ├──model.safetensors
|
||||
│ ├──preprocessor_config.json
|
||||
│ ├──special_tokens_map.json
|
||||
│ ├──spiece.model
|
||||
│ ├──tokenizer_config.json
|
||||
│ └──tokenizer.json
|
||||
└── README.md
|
||||
```
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
{
|
||||
"attention_probs_dropout_prob": 0.1,
|
||||
"bos_token_id": 0,
|
||||
"classifier_dropout": null,
|
||||
"coordinate_size": 128,
|
||||
"eos_token_id": 2,
|
||||
"has_relative_attention_bias": true,
|
||||
"has_spatial_attention_bias": true,
|
||||
"hidden_act": "gelu",
|
||||
"hidden_dropout_prob": 0.1,
|
||||
"hidden_size": 768,
|
||||
"initializer_range": 0.02,
|
||||
"input_size": 224,
|
||||
"intermediate_size": 3072,
|
||||
"layer_norm_eps": 1e-05,
|
||||
"max_2d_position_embeddings": 1024,
|
||||
"max_position_embeddings": 514,
|
||||
"max_rel_2d_pos": 256,
|
||||
"max_rel_pos": 128,
|
||||
"model_type": "layoutlmv3",
|
||||
"num_attention_heads": 12,
|
||||
"num_hidden_layers": 12,
|
||||
"pad_token_id": 1,
|
||||
"rel_2d_pos_bins": 64,
|
||||
"rel_pos_bins": 32,
|
||||
"second_input_size": 112,
|
||||
"shape_size": 128,
|
||||
"torch_dtype": "float32",
|
||||
"transformers_version": "4.12.5",
|
||||
"type_vocab_size": 1,
|
||||
"visual_embed": true,
|
||||
"vocab_size": 250002
|
||||
}
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
license: apache-2.0
|
||||
---
|
||||
UniMERNet: A Universal Network for Mathematical Expression Recognition in Real-World Scenarios.
|
||||
|
||||
Visit our GitHub repository at [unimernet](https://github.com/opendatalab/unimernet) for more information.
|
|
@ -0,0 +1,193 @@
|
|||
{
|
||||
"_name_or_path": "unimernet/checkpoint-180000",
|
||||
"architectures": [
|
||||
"VisionEncoderDecoderModel"
|
||||
],
|
||||
"decoder": {
|
||||
"_name_or_path": "",
|
||||
"activation_dropout": 0.0,
|
||||
"activation_function": "gelu",
|
||||
"add_cross_attention": true,
|
||||
"add_final_layer_norm": true,
|
||||
"architectures": null,
|
||||
"attention_dropout": 0.0,
|
||||
"bad_words_ids": null,
|
||||
"begin_suppress_tokens": null,
|
||||
"bos_token_id": 0,
|
||||
"chunk_size_feed_forward": 0,
|
||||
"classifier_dropout": 0.0,
|
||||
"cross_attention_hidden_size": null,
|
||||
"d_model": 1024,
|
||||
"decoder_attention_heads": 16,
|
||||
"decoder_ffn_dim": 4096,
|
||||
"decoder_layerdrop": 0.0,
|
||||
"decoder_layers": 8,
|
||||
"decoder_start_token_id": null,
|
||||
"diversity_penalty": 0.0,
|
||||
"do_sample": false,
|
||||
"dropout": 0.1,
|
||||
"early_stopping": false,
|
||||
"encoder_attention_heads": 16,
|
||||
"encoder_ffn_dim": 4096,
|
||||
"encoder_layerdrop": 0.0,
|
||||
"encoder_layers": 12,
|
||||
"encoder_no_repeat_ngram_size": 0,
|
||||
"eos_token_id": 2,
|
||||
"exponential_decay_length_penalty": null,
|
||||
"finetuning_task": null,
|
||||
"forced_bos_token_id": null,
|
||||
"forced_eos_token_id": 2,
|
||||
"id2label": {
|
||||
"0": "LABEL_0",
|
||||
"1": "LABEL_1"
|
||||
},
|
||||
"init_std": 0.02,
|
||||
"is_decoder": true,
|
||||
"is_encoder_decoder": false,
|
||||
"label2id": {
|
||||
"LABEL_0": 0,
|
||||
"LABEL_1": 1
|
||||
},
|
||||
"length_penalty": 1.0,
|
||||
"max_length": 20,
|
||||
"max_position_embeddings": 1536,
|
||||
"min_length": 0,
|
||||
"model_type": "mbart",
|
||||
"no_repeat_ngram_size": 0,
|
||||
"num_beam_groups": 1,
|
||||
"num_beams": 1,
|
||||
"num_hidden_layers": 12,
|
||||
"num_return_sequences": 1,
|
||||
"output_attentions": false,
|
||||
"output_hidden_states": false,
|
||||
"output_scores": false,
|
||||
"pad_token_id": 1,
|
||||
"prefix": null,
|
||||
"problem_type": null,
|
||||
"pruned_heads": {},
|
||||
"remove_invalid_values": false,
|
||||
"repetition_penalty": 1.0,
|
||||
"return_dict": true,
|
||||
"return_dict_in_generate": false,
|
||||
"scale_embedding": true,
|
||||
"sep_token_id": null,
|
||||
"suppress_tokens": null,
|
||||
"task_specific_params": null,
|
||||
"temperature": 1.0,
|
||||
"tf_legacy_loss": false,
|
||||
"tie_encoder_decoder": false,
|
||||
"tie_word_embeddings": false,
|
||||
"tokenizer_class": null,
|
||||
"top_k": 50,
|
||||
"top_p": 1.0,
|
||||
"torch_dtype": null,
|
||||
"torchscript": false,
|
||||
"typical_p": 1.0,
|
||||
"use_bfloat16": false,
|
||||
"use_cache": true,
|
||||
"vocab_size": 50000
|
||||
},
|
||||
"decoder_start_token_id": 0,
|
||||
"encoder": {
|
||||
"_name_or_path": "",
|
||||
"add_cross_attention": false,
|
||||
"architectures": null,
|
||||
"attention_probs_dropout_prob": 0.0,
|
||||
"bad_words_ids": null,
|
||||
"begin_suppress_tokens": null,
|
||||
"bos_token_id": null,
|
||||
"chunk_size_feed_forward": 0,
|
||||
"cross_attention_hidden_size": null,
|
||||
"decoder_start_token_id": null,
|
||||
"depths": [
|
||||
2,
|
||||
2,
|
||||
14,
|
||||
2
|
||||
],
|
||||
"diversity_penalty": 0.0,
|
||||
"do_sample": false,
|
||||
"drop_path_rate": 0.1,
|
||||
"early_stopping": false,
|
||||
"embed_dim": 128,
|
||||
"encoder_no_repeat_ngram_size": 0,
|
||||
"eos_token_id": null,
|
||||
"exponential_decay_length_penalty": null,
|
||||
"finetuning_task": null,
|
||||
"forced_bos_token_id": null,
|
||||
"forced_eos_token_id": null,
|
||||
"hidden_act": "gelu",
|
||||
"hidden_dropout_prob": 0.0,
|
||||
"hidden_size": 1024,
|
||||
"id2label": {
|
||||
"0": "LABEL_0",
|
||||
"1": "LABEL_1"
|
||||
},
|
||||
"image_size": [
|
||||
420,
|
||||
420
|
||||
],
|
||||
"initializer_range": 0.02,
|
||||
"is_decoder": false,
|
||||
"is_encoder_decoder": false,
|
||||
"label2id": {
|
||||
"LABEL_0": 0,
|
||||
"LABEL_1": 1
|
||||
},
|
||||
"layer_norm_eps": 1e-05,
|
||||
"length_penalty": 1.0,
|
||||
"max_length": 20,
|
||||
"min_length": 0,
|
||||
"mlp_ratio": 4.0,
|
||||
"model_type": "donut-swin",
|
||||
"no_repeat_ngram_size": 0,
|
||||
"num_beam_groups": 1,
|
||||
"num_beams": 1,
|
||||
"num_channels": 3,
|
||||
"num_heads": [
|
||||
4,
|
||||
8,
|
||||
16,
|
||||
32
|
||||
],
|
||||
"num_layers": 4,
|
||||
"num_return_sequences": 1,
|
||||
"output_attentions": false,
|
||||
"output_hidden_states": false,
|
||||
"output_scores": false,
|
||||
"pad_token_id": null,
|
||||
"patch_size": 4,
|
||||
"path_norm": true,
|
||||
"prefix": null,
|
||||
"problem_type": null,
|
||||
"pruned_heads": {},
|
||||
"qkv_bias": true,
|
||||
"remove_invalid_values": false,
|
||||
"repetition_penalty": 1.0,
|
||||
"return_dict": true,
|
||||
"return_dict_in_generate": false,
|
||||
"sep_token_id": null,
|
||||
"suppress_tokens": null,
|
||||
"task_specific_params": null,
|
||||
"temperature": 1.0,
|
||||
"tf_legacy_loss": false,
|
||||
"tie_encoder_decoder": false,
|
||||
"tie_word_embeddings": true,
|
||||
"tokenizer_class": null,
|
||||
"top_k": 50,
|
||||
"top_p": 1.0,
|
||||
"torch_dtype": null,
|
||||
"torchscript": false,
|
||||
"typical_p": 1.0,
|
||||
"use_2d_embeddings": false,
|
||||
"use_absolute_embeddings": false,
|
||||
"use_bfloat16": false,
|
||||
"window_size": 5
|
||||
},
|
||||
"is_encoder_decoder": true,
|
||||
"model_type": "vision-encoder-decoder",
|
||||
"pad_token_id": 1,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "float32",
|
||||
"transformers_version": "4.40.0"
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
{
|
||||
"do_align_long_axis": false,
|
||||
"do_normalize": false,
|
||||
"do_pad": false,
|
||||
"do_rescale": false,
|
||||
"do_resize": false,
|
||||
"do_thumbnail": false,
|
||||
"feature_extractor_type": "DonutFeatureExtractor",
|
||||
"image_mean": [
|
||||
0.485,
|
||||
0.456,
|
||||
0.406
|
||||
],
|
||||
"image_processor_type": "VariableDonutImageProcessor",
|
||||
"image_std": [
|
||||
0.229,
|
||||
0.224,
|
||||
0.225
|
||||
],
|
||||
"max_size": {
|
||||
"height": 192,
|
||||
"width": 672
|
||||
},
|
||||
"patch_size": [
|
||||
4,
|
||||
4
|
||||
],
|
||||
"processor_class": "VariableDonutProcessor",
|
||||
"resample": 2,
|
||||
"rescale_factor": 0.00392156862745098,
|
||||
"size": [
|
||||
192,
|
||||
672
|
||||
],
|
||||
"train": false
|
||||
}
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,205 @@
|
|||
{
|
||||
"added_tokens_decoder": {
|
||||
"0": {
|
||||
"content": "<s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"1": {
|
||||
"content": "<pad>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"2": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"3": {
|
||||
"content": "<unk>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"4": {
|
||||
"content": "[START_REF]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"5": {
|
||||
"content": "[END_REF]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"6": {
|
||||
"content": "[IMAGE]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"7": {
|
||||
"content": "<fragments>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"8": {
|
||||
"content": "</fragments>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"9": {
|
||||
"content": "<work>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"10": {
|
||||
"content": "</work>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"11": {
|
||||
"content": "[START_SUP]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"12": {
|
||||
"content": "[END_SUP]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"13": {
|
||||
"content": "[START_SUB]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"14": {
|
||||
"content": "[END_SUB]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"15": {
|
||||
"content": "[START_DNA]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"16": {
|
||||
"content": "[END_DNA]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"17": {
|
||||
"content": "[START_AMINO]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"18": {
|
||||
"content": "[END_AMINO]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"19": {
|
||||
"content": "[START_SMILES]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"20": {
|
||||
"content": "[END_SMILES]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"21": {
|
||||
"content": "[START_I_SMILES]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"22": {
|
||||
"content": "[END_I_SMILES]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [],
|
||||
"bos_token": "<s>",
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "</s>",
|
||||
"max_length": 4096,
|
||||
"model_max_length": 768,
|
||||
"pad_to_multiple_of": null,
|
||||
"pad_token": "<pad>",
|
||||
"pad_token_type_id": 0,
|
||||
"padding_side": "right",
|
||||
"processor_class": "VariableDonutProcessor",
|
||||
"stride": 0,
|
||||
"tokenizer_class": "NougatTokenizer",
|
||||
"truncation_side": "right",
|
||||
"truncation_strategy": "longest_first",
|
||||
"unk_token": "<unk>",
|
||||
"vocab_file": null
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
---
|
||||
license: apache-2.0
|
||||
---
|
||||
## UniMERNet: A Universal Network for Mathematical Expression Recognition in Real-World Scenarios.
|
||||
|
||||
Visit our GitHub repository at [UniMERNet](https://github.com/opendatalab/unimernet) for more information.
|
||||
|
||||
## Citations
|
||||
```
|
||||
@misc{wang2024unimernet,
|
||||
title={UniMERNet: A Universal Network for Real-World Mathematical Expression Recognition},
|
||||
author={Bin Wang and Zhuangcheng Gu and Chao Xu and Bo Zhang and Botian Shi and Conghui He},
|
||||
year={2024},
|
||||
eprint={2404.15254},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CV}
|
||||
}
|
||||
|
||||
@misc{conghui2022opendatalab,
|
||||
author={He, Conghui and Li, Wei and Jin, Zhenjiang and Wang, Bin and Xu, Chao and Lin, Dahua},
|
||||
title={OpenDataLab: Empowering General Artificial Intelligence with Open Datasets},
|
||||
howpublished = {\url{https://opendatalab.com}},
|
||||
year={2022}
|
||||
}
|
||||
```
|
||||
|
||||
## MD5 checksums
|
||||
```
|
||||
97f4867b4ff4e9a96c8daba8aaa793b4 tokenizer_config.json
|
||||
351652071425d3d36a634ccc8efb22e8 tokenizer.json
|
||||
ff4391872dad6688f21ed140009d817b pytorch_model.pth
|
||||
```
|
|
@ -0,0 +1,193 @@
|
|||
{
|
||||
"_name_or_path": "unimernet/checkpoint-300000",
|
||||
"architectures": [
|
||||
"VisionEncoderDecoderModel"
|
||||
],
|
||||
"decoder": {
|
||||
"_name_or_path": "",
|
||||
"activation_dropout": 0.0,
|
||||
"activation_function": "gelu",
|
||||
"add_cross_attention": true,
|
||||
"add_final_layer_norm": true,
|
||||
"architectures": null,
|
||||
"attention_dropout": 0.0,
|
||||
"bad_words_ids": null,
|
||||
"begin_suppress_tokens": null,
|
||||
"bos_token_id": 0,
|
||||
"chunk_size_feed_forward": 0,
|
||||
"classifier_dropout": 0.0,
|
||||
"cross_attention_hidden_size": null,
|
||||
"d_model": 1024,
|
||||
"decoder_attention_heads": 16,
|
||||
"decoder_ffn_dim": 4096,
|
||||
"decoder_layerdrop": 0.0,
|
||||
"decoder_layers": 8,
|
||||
"decoder_start_token_id": null,
|
||||
"diversity_penalty": 0.0,
|
||||
"do_sample": false,
|
||||
"dropout": 0.1,
|
||||
"early_stopping": false,
|
||||
"encoder_attention_heads": 16,
|
||||
"encoder_ffn_dim": 4096,
|
||||
"encoder_layerdrop": 0.0,
|
||||
"encoder_layers": 12,
|
||||
"encoder_no_repeat_ngram_size": 0,
|
||||
"eos_token_id": 2,
|
||||
"exponential_decay_length_penalty": null,
|
||||
"finetuning_task": null,
|
||||
"forced_bos_token_id": null,
|
||||
"forced_eos_token_id": 2,
|
||||
"id2label": {
|
||||
"0": "LABEL_0",
|
||||
"1": "LABEL_1"
|
||||
},
|
||||
"init_std": 0.02,
|
||||
"is_decoder": true,
|
||||
"is_encoder_decoder": false,
|
||||
"label2id": {
|
||||
"LABEL_0": 0,
|
||||
"LABEL_1": 1
|
||||
},
|
||||
"length_penalty": 1.0,
|
||||
"max_length": 20,
|
||||
"max_position_embeddings": 1536,
|
||||
"min_length": 0,
|
||||
"model_type": "mbart",
|
||||
"no_repeat_ngram_size": 0,
|
||||
"num_beam_groups": 1,
|
||||
"num_beams": 1,
|
||||
"num_hidden_layers": 12,
|
||||
"num_return_sequences": 1,
|
||||
"output_attentions": false,
|
||||
"output_hidden_states": false,
|
||||
"output_scores": false,
|
||||
"pad_token_id": 1,
|
||||
"prefix": null,
|
||||
"problem_type": null,
|
||||
"pruned_heads": {},
|
||||
"remove_invalid_values": false,
|
||||
"repetition_penalty": 1.0,
|
||||
"return_dict": true,
|
||||
"return_dict_in_generate": false,
|
||||
"scale_embedding": true,
|
||||
"sep_token_id": null,
|
||||
"suppress_tokens": null,
|
||||
"task_specific_params": null,
|
||||
"temperature": 1.0,
|
||||
"tf_legacy_loss": false,
|
||||
"tie_encoder_decoder": false,
|
||||
"tie_word_embeddings": false,
|
||||
"tokenizer_class": null,
|
||||
"top_k": 50,
|
||||
"top_p": 1.0,
|
||||
"torch_dtype": null,
|
||||
"torchscript": false,
|
||||
"typical_p": 1.0,
|
||||
"use_bfloat16": false,
|
||||
"use_cache": true,
|
||||
"vocab_size": 50000
|
||||
},
|
||||
"decoder_start_token_id": 0,
|
||||
"encoder": {
|
||||
"_name_or_path": "",
|
||||
"add_cross_attention": false,
|
||||
"architectures": null,
|
||||
"attention_probs_dropout_prob": 0.0,
|
||||
"bad_words_ids": null,
|
||||
"begin_suppress_tokens": null,
|
||||
"bos_token_id": null,
|
||||
"chunk_size_feed_forward": 0,
|
||||
"cross_attention_hidden_size": null,
|
||||
"decoder_start_token_id": null,
|
||||
"depths": [
|
||||
6,
|
||||
6,
|
||||
6,
|
||||
6
|
||||
],
|
||||
"diversity_penalty": 0.0,
|
||||
"do_sample": false,
|
||||
"drop_path_rate": 0.1,
|
||||
"early_stopping": false,
|
||||
"embed_dim": 128,
|
||||
"encoder_no_repeat_ngram_size": 0,
|
||||
"eos_token_id": null,
|
||||
"exponential_decay_length_penalty": null,
|
||||
"finetuning_task": null,
|
||||
"forced_bos_token_id": null,
|
||||
"forced_eos_token_id": null,
|
||||
"hidden_act": "gelu",
|
||||
"hidden_dropout_prob": 0.0,
|
||||
"hidden_size": 1024,
|
||||
"id2label": {
|
||||
"0": "LABEL_0",
|
||||
"1": "LABEL_1"
|
||||
},
|
||||
"image_size": [
|
||||
420,
|
||||
420
|
||||
],
|
||||
"initializer_range": 0.02,
|
||||
"is_decoder": false,
|
||||
"is_encoder_decoder": false,
|
||||
"label2id": {
|
||||
"LABEL_0": 0,
|
||||
"LABEL_1": 1
|
||||
},
|
||||
"layer_norm_eps": 1e-05,
|
||||
"length_penalty": 1.0,
|
||||
"max_length": 20,
|
||||
"min_length": 0,
|
||||
"mlp_ratio": 4.0,
|
||||
"model_type": "donut-swin",
|
||||
"no_repeat_ngram_size": 0,
|
||||
"num_beam_groups": 1,
|
||||
"num_beams": 1,
|
||||
"num_channels": 3,
|
||||
"num_heads": [
|
||||
4,
|
||||
8,
|
||||
16,
|
||||
32
|
||||
],
|
||||
"num_layers": 4,
|
||||
"num_return_sequences": 1,
|
||||
"output_attentions": false,
|
||||
"output_hidden_states": false,
|
||||
"output_scores": false,
|
||||
"pad_token_id": null,
|
||||
"patch_size": 4,
|
||||
"path_norm": true,
|
||||
"prefix": null,
|
||||
"problem_type": null,
|
||||
"pruned_heads": {},
|
||||
"qkv_bias": true,
|
||||
"remove_invalid_values": false,
|
||||
"repetition_penalty": 1.0,
|
||||
"return_dict": true,
|
||||
"return_dict_in_generate": false,
|
||||
"sep_token_id": null,
|
||||
"suppress_tokens": null,
|
||||
"task_specific_params": null,
|
||||
"temperature": 1.0,
|
||||
"tf_legacy_loss": false,
|
||||
"tie_encoder_decoder": false,
|
||||
"tie_word_embeddings": true,
|
||||
"tokenizer_class": null,
|
||||
"top_k": 50,
|
||||
"top_p": 1.0,
|
||||
"torch_dtype": null,
|
||||
"torchscript": false,
|
||||
"typical_p": 1.0,
|
||||
"use_2d_embeddings": false,
|
||||
"use_absolute_embeddings": false,
|
||||
"use_bfloat16": false,
|
||||
"window_size": 5
|
||||
},
|
||||
"is_encoder_decoder": true,
|
||||
"model_type": "vision-encoder-decoder",
|
||||
"pad_token_id": 1,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "float16",
|
||||
"transformers_version": "4.36.0"
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
{
|
||||
"do_align_long_axis": false,
|
||||
"do_normalize": false,
|
||||
"do_pad": false,
|
||||
"do_rescale": false,
|
||||
"do_resize": false,
|
||||
"do_thumbnail": false,
|
||||
"feature_extractor_type": "DonutFeatureExtractor",
|
||||
"image_mean": [
|
||||
0.485,
|
||||
0.456,
|
||||
0.406
|
||||
],
|
||||
"image_processor_type": "VariableDonutImageProcessor",
|
||||
"image_std": [
|
||||
0.229,
|
||||
0.224,
|
||||
0.225
|
||||
],
|
||||
"max_size": {
|
||||
"height": 192,
|
||||
"width": 672
|
||||
},
|
||||
"patch_size": [
|
||||
4,
|
||||
4
|
||||
],
|
||||
"processor_class": "VariableDonutProcessor",
|
||||
"resample": 2,
|
||||
"rescale_factor": 0.00392156862745098,
|
||||
"size": [
|
||||
192,
|
||||
672
|
||||
],
|
||||
"train": false
|
||||
}
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,205 @@
|
|||
{
|
||||
"added_tokens_decoder": {
|
||||
"0": {
|
||||
"content": "<s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"1": {
|
||||
"content": "<pad>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"2": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"3": {
|
||||
"content": "<unk>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"4": {
|
||||
"content": "[START_REF]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"5": {
|
||||
"content": "[END_REF]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"6": {
|
||||
"content": "[IMAGE]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"7": {
|
||||
"content": "<fragments>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"8": {
|
||||
"content": "</fragments>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"9": {
|
||||
"content": "<work>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"10": {
|
||||
"content": "</work>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"11": {
|
||||
"content": "[START_SUP]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"12": {
|
||||
"content": "[END_SUP]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"13": {
|
||||
"content": "[START_SUB]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"14": {
|
||||
"content": "[END_SUB]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"15": {
|
||||
"content": "[START_DNA]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"16": {
|
||||
"content": "[END_DNA]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"17": {
|
||||
"content": "[START_AMINO]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"18": {
|
||||
"content": "[END_AMINO]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"19": {
|
||||
"content": "[START_SMILES]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"20": {
|
||||
"content": "[END_SMILES]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"21": {
|
||||
"content": "[START_I_SMILES]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"22": {
|
||||
"content": "[END_I_SMILES]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [],
|
||||
"bos_token": "<s>",
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "</s>",
|
||||
"max_length": 4096,
|
||||
"model_max_length": 768,
|
||||
"pad_to_multiple_of": null,
|
||||
"pad_token": "<pad>",
|
||||
"pad_token_type_id": 0,
|
||||
"padding_side": "right",
|
||||
"processor_class": "VariableDonutProcessor",
|
||||
"stride": 0,
|
||||
"tokenizer_class": "NougatTokenizer",
|
||||
"truncation_side": "right",
|
||||
"truncation_strategy": "longest_first",
|
||||
"unk_token": "<unk>",
|
||||
"vocab_file": null
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
model:
|
||||
arch: unimernet
|
||||
model_type: unimernet
|
||||
model_config:
|
||||
model_name: ./models/unimernet_base
|
||||
max_seq_len: 1536
|
||||
|
||||
load_pretrained: True
|
||||
pretrained: './models/unimernet_base/pytorch_model.pth'
|
||||
tokenizer_config:
|
||||
path: ./models/unimernet_base
|
||||
|
||||
datasets:
|
||||
formula_rec_eval:
|
||||
vis_processor:
|
||||
eval:
|
||||
name: "formula_image_eval"
|
||||
image_size:
|
||||
- 192
|
||||
- 672
|
||||
|
||||
run:
|
||||
runner: runner_iter
|
||||
task: unimernet_train
|
||||
|
||||
batch_size_train: 64
|
||||
batch_size_eval: 64
|
||||
num_workers: 1
|
||||
|
||||
iters_per_inner_epoch: 2000
|
||||
max_iters: 60000
|
||||
|
||||
seed: 42
|
||||
output_dir: "../output/demo"
|
||||
|
||||
evaluate: True
|
||||
test_splits: [ "eval" ]
|
||||
|
||||
device: "cuda"
|
||||
world_size: 1
|
||||
dist_url: "env://"
|
||||
distributed: True
|
||||
distributed_type: ddp # or fsdp when train llm
|
||||
|
||||
generate_cfg:
|
||||
temperature: 0.0
|
|
@ -0,0 +1,32 @@
|
|||
---
|
||||
license: apache-2.0
|
||||
---
|
||||
## UniMERNet: A Universal Network for Mathematical Expression Recognition in Real-World Scenarios.
|
||||
|
||||
Visit our GitHub repository at [UniMERNet](https://github.com/opendatalab/unimernet) for more information.
|
||||
|
||||
## Citations
|
||||
```
|
||||
@misc{wang2024unimernet,
|
||||
title={UniMERNet: A Universal Network for Real-World Mathematical Expression Recognition},
|
||||
author={Bin Wang and Zhuangcheng Gu and Chao Xu and Bo Zhang and Botian Shi and Conghui He},
|
||||
year={2024},
|
||||
eprint={2404.15254},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CV}
|
||||
}
|
||||
|
||||
@misc{conghui2022opendatalab,
|
||||
author={He, Conghui and Li, Wei and Jin, Zhenjiang and Wang, Bin and Xu, Chao and Lin, Dahua},
|
||||
title={OpenDataLab: Empowering General Artificial Intelligence with Open Datasets},
|
||||
howpublished = {\url{https://opendatalab.com}},
|
||||
year={2022}
|
||||
}
|
||||
```
|
||||
|
||||
## MD5 checksums
|
||||
```
|
||||
97f4867b4ff4e9a96c8daba8aaa793b4 tokenizer_config.json
|
||||
351652071425d3d36a634ccc8efb22e8 tokenizer.json
|
||||
430e426354e71624fb096c5c7ad90a78 pytorch_model.pth
|
||||
```
|
|
@ -0,0 +1,193 @@
|
|||
{
|
||||
"_name_or_path": "unimernet/checkpoint-300000",
|
||||
"architectures": [
|
||||
"VisionEncoderDecoderModel"
|
||||
],
|
||||
"decoder": {
|
||||
"_name_or_path": "",
|
||||
"activation_dropout": 0.0,
|
||||
"activation_function": "gelu",
|
||||
"add_cross_attention": true,
|
||||
"add_final_layer_norm": true,
|
||||
"architectures": null,
|
||||
"attention_dropout": 0.0,
|
||||
"bad_words_ids": null,
|
||||
"begin_suppress_tokens": null,
|
||||
"bos_token_id": 0,
|
||||
"chunk_size_feed_forward": 0,
|
||||
"classifier_dropout": 0.0,
|
||||
"cross_attention_hidden_size": null,
|
||||
"d_model": 768,
|
||||
"decoder_attention_heads": 16,
|
||||
"decoder_ffn_dim": 3072,
|
||||
"decoder_layerdrop": 0.0,
|
||||
"decoder_layers": 8,
|
||||
"decoder_start_token_id": null,
|
||||
"diversity_penalty": 0.0,
|
||||
"do_sample": false,
|
||||
"dropout": 0.1,
|
||||
"early_stopping": false,
|
||||
"encoder_attention_heads": 16,
|
||||
"encoder_ffn_dim": 3072,
|
||||
"encoder_layerdrop": 0.0,
|
||||
"encoder_layers": 12,
|
||||
"encoder_no_repeat_ngram_size": 0,
|
||||
"eos_token_id": 2,
|
||||
"exponential_decay_length_penalty": null,
|
||||
"finetuning_task": null,
|
||||
"forced_bos_token_id": null,
|
||||
"forced_eos_token_id": 2,
|
||||
"id2label": {
|
||||
"0": "LABEL_0",
|
||||
"1": "LABEL_1"
|
||||
},
|
||||
"init_std": 0.02,
|
||||
"is_decoder": true,
|
||||
"is_encoder_decoder": false,
|
||||
"label2id": {
|
||||
"LABEL_0": 0,
|
||||
"LABEL_1": 1
|
||||
},
|
||||
"length_penalty": 1.0,
|
||||
"max_length": 20,
|
||||
"max_position_embeddings": 1536,
|
||||
"min_length": 0,
|
||||
"model_type": "mbart",
|
||||
"no_repeat_ngram_size": 0,
|
||||
"num_beam_groups": 1,
|
||||
"num_beams": 1,
|
||||
"num_hidden_layers": 12,
|
||||
"num_return_sequences": 1,
|
||||
"output_attentions": false,
|
||||
"output_hidden_states": false,
|
||||
"output_scores": false,
|
||||
"pad_token_id": 1,
|
||||
"prefix": null,
|
||||
"problem_type": null,
|
||||
"pruned_heads": {},
|
||||
"remove_invalid_values": false,
|
||||
"repetition_penalty": 1.0,
|
||||
"return_dict": true,
|
||||
"return_dict_in_generate": false,
|
||||
"scale_embedding": true,
|
||||
"sep_token_id": null,
|
||||
"suppress_tokens": null,
|
||||
"task_specific_params": null,
|
||||
"temperature": 1.0,
|
||||
"tf_legacy_loss": false,
|
||||
"tie_encoder_decoder": false,
|
||||
"tie_word_embeddings": false,
|
||||
"tokenizer_class": null,
|
||||
"top_k": 50,
|
||||
"top_p": 1.0,
|
||||
"torch_dtype": null,
|
||||
"torchscript": false,
|
||||
"typical_p": 1.0,
|
||||
"use_bfloat16": false,
|
||||
"use_cache": true,
|
||||
"vocab_size": 50000
|
||||
},
|
||||
"decoder_start_token_id": 0,
|
||||
"encoder": {
|
||||
"_name_or_path": "",
|
||||
"add_cross_attention": false,
|
||||
"architectures": null,
|
||||
"attention_probs_dropout_prob": 0.0,
|
||||
"bad_words_ids": null,
|
||||
"begin_suppress_tokens": null,
|
||||
"bos_token_id": null,
|
||||
"chunk_size_feed_forward": 0,
|
||||
"cross_attention_hidden_size": null,
|
||||
"decoder_start_token_id": null,
|
||||
"depths": [
|
||||
6,
|
||||
6,
|
||||
6,
|
||||
6
|
||||
],
|
||||
"diversity_penalty": 0.0,
|
||||
"do_sample": false,
|
||||
"drop_path_rate": 0.1,
|
||||
"early_stopping": false,
|
||||
"embed_dim": 96,
|
||||
"encoder_no_repeat_ngram_size": 0,
|
||||
"eos_token_id": null,
|
||||
"exponential_decay_length_penalty": null,
|
||||
"finetuning_task": null,
|
||||
"forced_bos_token_id": null,
|
||||
"forced_eos_token_id": null,
|
||||
"hidden_act": "gelu",
|
||||
"hidden_dropout_prob": 0.0,
|
||||
"hidden_size": 768,
|
||||
"id2label": {
|
||||
"0": "LABEL_0",
|
||||
"1": "LABEL_1"
|
||||
},
|
||||
"image_size": [
|
||||
420,
|
||||
420
|
||||
],
|
||||
"initializer_range": 0.02,
|
||||
"is_decoder": false,
|
||||
"is_encoder_decoder": false,
|
||||
"label2id": {
|
||||
"LABEL_0": 0,
|
||||
"LABEL_1": 1
|
||||
},
|
||||
"layer_norm_eps": 1e-05,
|
||||
"length_penalty": 1.0,
|
||||
"max_length": 20,
|
||||
"min_length": 0,
|
||||
"mlp_ratio": 4.0,
|
||||
"model_type": "donut-swin",
|
||||
"no_repeat_ngram_size": 0,
|
||||
"num_beam_groups": 1,
|
||||
"num_beams": 1,
|
||||
"num_channels": 3,
|
||||
"num_heads": [
|
||||
3,
|
||||
6,
|
||||
12,
|
||||
24
|
||||
],
|
||||
"num_layers": 4,
|
||||
"num_return_sequences": 1,
|
||||
"output_attentions": false,
|
||||
"output_hidden_states": false,
|
||||
"output_scores": false,
|
||||
"pad_token_id": null,
|
||||
"patch_size": 4,
|
||||
"path_norm": true,
|
||||
"prefix": null,
|
||||
"problem_type": null,
|
||||
"pruned_heads": {},
|
||||
"qkv_bias": true,
|
||||
"remove_invalid_values": false,
|
||||
"repetition_penalty": 1.0,
|
||||
"return_dict": true,
|
||||
"return_dict_in_generate": false,
|
||||
"sep_token_id": null,
|
||||
"suppress_tokens": null,
|
||||
"task_specific_params": null,
|
||||
"temperature": 1.0,
|
||||
"tf_legacy_loss": false,
|
||||
"tie_encoder_decoder": false,
|
||||
"tie_word_embeddings": true,
|
||||
"tokenizer_class": null,
|
||||
"top_k": 50,
|
||||
"top_p": 1.0,
|
||||
"torch_dtype": null,
|
||||
"torchscript": false,
|
||||
"typical_p": 1.0,
|
||||
"use_2d_embeddings": false,
|
||||
"use_absolute_embeddings": false,
|
||||
"use_bfloat16": false,
|
||||
"window_size": 5
|
||||
},
|
||||
"is_encoder_decoder": true,
|
||||
"model_type": "vision-encoder-decoder",
|
||||
"pad_token_id": 1,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "float16",
|
||||
"transformers_version": "4.36.0"
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
{
|
||||
"do_align_long_axis": false,
|
||||
"do_normalize": false,
|
||||
"do_pad": false,
|
||||
"do_rescale": false,
|
||||
"do_resize": false,
|
||||
"do_thumbnail": false,
|
||||
"feature_extractor_type": "DonutFeatureExtractor",
|
||||
"image_mean": [
|
||||
0.485,
|
||||
0.456,
|
||||
0.406
|
||||
],
|
||||
"image_processor_type": "VariableDonutImageProcessor",
|
||||
"image_std": [
|
||||
0.229,
|
||||
0.224,
|
||||
0.225
|
||||
],
|
||||
"max_size": {
|
||||
"height": 192,
|
||||
"width": 672
|
||||
},
|
||||
"patch_size": [
|
||||
4,
|
||||
4
|
||||
],
|
||||
"processor_class": "VariableDonutProcessor",
|
||||
"resample": 2,
|
||||
"rescale_factor": 0.00392156862745098,
|
||||
"size": [
|
||||
192,
|
||||
672
|
||||
],
|
||||
"train": false
|
||||
}
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,205 @@
|
|||
{
|
||||
"added_tokens_decoder": {
|
||||
"0": {
|
||||
"content": "<s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"1": {
|
||||
"content": "<pad>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"2": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"3": {
|
||||
"content": "<unk>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"4": {
|
||||
"content": "[START_REF]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"5": {
|
||||
"content": "[END_REF]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"6": {
|
||||
"content": "[IMAGE]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"7": {
|
||||
"content": "<fragments>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"8": {
|
||||
"content": "</fragments>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"9": {
|
||||
"content": "<work>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"10": {
|
||||
"content": "</work>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"11": {
|
||||
"content": "[START_SUP]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"12": {
|
||||
"content": "[END_SUP]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"13": {
|
||||
"content": "[START_SUB]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"14": {
|
||||
"content": "[END_SUB]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"15": {
|
||||
"content": "[START_DNA]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"16": {
|
||||
"content": "[END_DNA]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"17": {
|
||||
"content": "[START_AMINO]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"18": {
|
||||
"content": "[END_AMINO]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"19": {
|
||||
"content": "[START_SMILES]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"20": {
|
||||
"content": "[END_SMILES]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"21": {
|
||||
"content": "[START_I_SMILES]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"22": {
|
||||
"content": "[END_I_SMILES]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [],
|
||||
"bos_token": "<s>",
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "</s>",
|
||||
"max_length": 4096,
|
||||
"model_max_length": 768,
|
||||
"pad_to_multiple_of": null,
|
||||
"pad_token": "<pad>",
|
||||
"pad_token_type_id": 0,
|
||||
"padding_side": "right",
|
||||
"processor_class": "VariableDonutProcessor",
|
||||
"stride": 0,
|
||||
"tokenizer_class": "NougatTokenizer",
|
||||
"truncation_side": "right",
|
||||
"truncation_strategy": "longest_first",
|
||||
"unk_token": "<unk>",
|
||||
"vocab_file": null
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
model:
|
||||
arch: unimernet
|
||||
model_type: unimernet
|
||||
model_config:
|
||||
model_name: ./models/unimernet_small
|
||||
max_seq_len: 1536
|
||||
|
||||
load_pretrained: True
|
||||
pretrained: './models/unimernet_small/pytorch_model.pth'
|
||||
tokenizer_config:
|
||||
path: ./models/unimernet_small
|
||||
|
||||
datasets:
|
||||
formula_rec_eval:
|
||||
vis_processor:
|
||||
eval:
|
||||
name: "formula_image_eval"
|
||||
image_size:
|
||||
- 192
|
||||
- 672
|
||||
|
||||
run:
|
||||
runner: runner_iter
|
||||
task: unimernet_train
|
||||
|
||||
batch_size_train: 64
|
||||
batch_size_eval: 64
|
||||
num_workers: 1
|
||||
|
||||
iters_per_inner_epoch: 2000
|
||||
max_iters: 60000
|
||||
|
||||
seed: 42
|
||||
output_dir: "../output/demo"
|
||||
|
||||
evaluate: True
|
||||
test_splits: [ "eval" ]
|
||||
|
||||
device: "cuda"
|
||||
world_size: 1
|
||||
dist_url: "env://"
|
||||
distributed: True
|
||||
distributed_type: ddp # or fsdp when train llm
|
||||
|
||||
generate_cfg:
|
||||
temperature: 0.0
|
|
@ -0,0 +1,32 @@
|
|||
---
|
||||
license: apache-2.0
|
||||
---
|
||||
## UniMERNet: A Universal Network for Mathematical Expression Recognition in Real-World Scenarios.
|
||||
|
||||
Visit our GitHub repository at [UniMERNet](https://github.com/opendatalab/unimernet) for more information.
|
||||
|
||||
## Citations
|
||||
```
|
||||
@misc{wang2024unimernet,
|
||||
title={UniMERNet: A Universal Network for Real-World Mathematical Expression Recognition},
|
||||
author={Bin Wang and Zhuangcheng Gu and Chao Xu and Bo Zhang and Botian Shi and Conghui He},
|
||||
year={2024},
|
||||
eprint={2404.15254},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CV}
|
||||
}
|
||||
|
||||
@misc{conghui2022opendatalab,
|
||||
author={He, Conghui and Li, Wei and Jin, Zhenjiang and Wang, Bin and Xu, Chao and Lin, Dahua},
|
||||
title={OpenDataLab: Empowering General Artificial Intelligence with Open Datasets},
|
||||
howpublished = {\url{https://opendatalab.com}},
|
||||
year={2022}
|
||||
}
|
||||
```
|
||||
|
||||
## MD5 checksums
|
||||
```
|
||||
97f4867b4ff4e9a96c8daba8aaa793b4 tokenizer_config.json
|
||||
351652071425d3d36a634ccc8efb22e8 tokenizer.json
|
||||
72b53a2152af43a57f8d5eebf8e31562 pytorch_model.pth
|
||||
```
|
|
@ -0,0 +1,193 @@
|
|||
{
|
||||
"_name_or_path": "unimernet/checkpoint-300000",
|
||||
"architectures": [
|
||||
"VisionEncoderDecoderModel"
|
||||
],
|
||||
"decoder": {
|
||||
"_name_or_path": "",
|
||||
"activation_dropout": 0.0,
|
||||
"activation_function": "gelu",
|
||||
"add_cross_attention": true,
|
||||
"add_final_layer_norm": true,
|
||||
"architectures": null,
|
||||
"attention_dropout": 0.0,
|
||||
"bad_words_ids": null,
|
||||
"begin_suppress_tokens": null,
|
||||
"bos_token_id": 0,
|
||||
"chunk_size_feed_forward": 0,
|
||||
"classifier_dropout": 0.0,
|
||||
"cross_attention_hidden_size": null,
|
||||
"d_model": 512,
|
||||
"decoder_attention_heads": 16,
|
||||
"decoder_ffn_dim": 2048,
|
||||
"decoder_layerdrop": 0.0,
|
||||
"decoder_layers": 8,
|
||||
"decoder_start_token_id": null,
|
||||
"diversity_penalty": 0.0,
|
||||
"do_sample": false,
|
||||
"dropout": 0.1,
|
||||
"early_stopping": false,
|
||||
"encoder_attention_heads": 16,
|
||||
"encoder_ffn_dim": 2048,
|
||||
"encoder_layerdrop": 0.0,
|
||||
"encoder_layers": 12,
|
||||
"encoder_no_repeat_ngram_size": 0,
|
||||
"eos_token_id": 2,
|
||||
"exponential_decay_length_penalty": null,
|
||||
"finetuning_task": null,
|
||||
"forced_bos_token_id": null,
|
||||
"forced_eos_token_id": 2,
|
||||
"id2label": {
|
||||
"0": "LABEL_0",
|
||||
"1": "LABEL_1"
|
||||
},
|
||||
"init_std": 0.02,
|
||||
"is_decoder": true,
|
||||
"is_encoder_decoder": false,
|
||||
"label2id": {
|
||||
"LABEL_0": 0,
|
||||
"LABEL_1": 1
|
||||
},
|
||||
"length_penalty": 1.0,
|
||||
"max_length": 20,
|
||||
"max_position_embeddings": 1536,
|
||||
"min_length": 0,
|
||||
"model_type": "mbart",
|
||||
"no_repeat_ngram_size": 0,
|
||||
"num_beam_groups": 1,
|
||||
"num_beams": 1,
|
||||
"num_hidden_layers": 12,
|
||||
"num_return_sequences": 1,
|
||||
"output_attentions": false,
|
||||
"output_hidden_states": false,
|
||||
"output_scores": false,
|
||||
"pad_token_id": 1,
|
||||
"prefix": null,
|
||||
"problem_type": null,
|
||||
"pruned_heads": {},
|
||||
"remove_invalid_values": false,
|
||||
"repetition_penalty": 1.0,
|
||||
"return_dict": true,
|
||||
"return_dict_in_generate": false,
|
||||
"scale_embedding": true,
|
||||
"sep_token_id": null,
|
||||
"suppress_tokens": null,
|
||||
"task_specific_params": null,
|
||||
"temperature": 1.0,
|
||||
"tf_legacy_loss": false,
|
||||
"tie_encoder_decoder": false,
|
||||
"tie_word_embeddings": false,
|
||||
"tokenizer_class": null,
|
||||
"top_k": 50,
|
||||
"top_p": 1.0,
|
||||
"torch_dtype": null,
|
||||
"torchscript": false,
|
||||
"typical_p": 1.0,
|
||||
"use_bfloat16": false,
|
||||
"use_cache": true,
|
||||
"vocab_size": 50000
|
||||
},
|
||||
"decoder_start_token_id": 0,
|
||||
"encoder": {
|
||||
"_name_or_path": "",
|
||||
"add_cross_attention": false,
|
||||
"architectures": null,
|
||||
"attention_probs_dropout_prob": 0.0,
|
||||
"bad_words_ids": null,
|
||||
"begin_suppress_tokens": null,
|
||||
"bos_token_id": null,
|
||||
"chunk_size_feed_forward": 0,
|
||||
"cross_attention_hidden_size": null,
|
||||
"decoder_start_token_id": null,
|
||||
"depths": [
|
||||
6,
|
||||
6,
|
||||
6,
|
||||
6
|
||||
],
|
||||
"diversity_penalty": 0.0,
|
||||
"do_sample": false,
|
||||
"drop_path_rate": 0.1,
|
||||
"early_stopping": false,
|
||||
"embed_dim": 64,
|
||||
"encoder_no_repeat_ngram_size": 0,
|
||||
"eos_token_id": null,
|
||||
"exponential_decay_length_penalty": null,
|
||||
"finetuning_task": null,
|
||||
"forced_bos_token_id": null,
|
||||
"forced_eos_token_id": null,
|
||||
"hidden_act": "gelu",
|
||||
"hidden_dropout_prob": 0.0,
|
||||
"hidden_size": 512,
|
||||
"id2label": {
|
||||
"0": "LABEL_0",
|
||||
"1": "LABEL_1"
|
||||
},
|
||||
"image_size": [
|
||||
420,
|
||||
420
|
||||
],
|
||||
"initializer_range": 0.02,
|
||||
"is_decoder": false,
|
||||
"is_encoder_decoder": false,
|
||||
"label2id": {
|
||||
"LABEL_0": 0,
|
||||
"LABEL_1": 1
|
||||
},
|
||||
"layer_norm_eps": 1e-05,
|
||||
"length_penalty": 1.0,
|
||||
"max_length": 20,
|
||||
"min_length": 0,
|
||||
"mlp_ratio": 4.0,
|
||||
"model_type": "donut-swin",
|
||||
"no_repeat_ngram_size": 0,
|
||||
"num_beam_groups": 1,
|
||||
"num_beams": 1,
|
||||
"num_channels": 3,
|
||||
"num_heads": [
|
||||
2,
|
||||
4,
|
||||
8,
|
||||
16
|
||||
],
|
||||
"num_layers": 4,
|
||||
"num_return_sequences": 1,
|
||||
"output_attentions": false,
|
||||
"output_hidden_states": false,
|
||||
"output_scores": false,
|
||||
"pad_token_id": null,
|
||||
"patch_size": 4,
|
||||
"path_norm": true,
|
||||
"prefix": null,
|
||||
"problem_type": null,
|
||||
"pruned_heads": {},
|
||||
"qkv_bias": true,
|
||||
"remove_invalid_values": false,
|
||||
"repetition_penalty": 1.0,
|
||||
"return_dict": true,
|
||||
"return_dict_in_generate": false,
|
||||
"sep_token_id": null,
|
||||
"suppress_tokens": null,
|
||||
"task_specific_params": null,
|
||||
"temperature": 1.0,
|
||||
"tf_legacy_loss": false,
|
||||
"tie_encoder_decoder": false,
|
||||
"tie_word_embeddings": true,
|
||||
"tokenizer_class": null,
|
||||
"top_k": 50,
|
||||
"top_p": 1.0,
|
||||
"torch_dtype": null,
|
||||
"torchscript": false,
|
||||
"typical_p": 1.0,
|
||||
"use_2d_embeddings": false,
|
||||
"use_absolute_embeddings": false,
|
||||
"use_bfloat16": false,
|
||||
"window_size": 5
|
||||
},
|
||||
"is_encoder_decoder": true,
|
||||
"model_type": "vision-encoder-decoder",
|
||||
"pad_token_id": 1,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "float16",
|
||||
"transformers_version": "4.36.0"
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
{
|
||||
"do_align_long_axis": false,
|
||||
"do_normalize": false,
|
||||
"do_pad": false,
|
||||
"do_rescale": false,
|
||||
"do_resize": false,
|
||||
"do_thumbnail": false,
|
||||
"feature_extractor_type": "DonutFeatureExtractor",
|
||||
"image_mean": [
|
||||
0.485,
|
||||
0.456,
|
||||
0.406
|
||||
],
|
||||
"image_processor_type": "VariableDonutImageProcessor",
|
||||
"image_std": [
|
||||
0.229,
|
||||
0.224,
|
||||
0.225
|
||||
],
|
||||
"max_size": {
|
||||
"height": 192,
|
||||
"width": 672
|
||||
},
|
||||
"patch_size": [
|
||||
4,
|
||||
4
|
||||
],
|
||||
"processor_class": "VariableDonutProcessor",
|
||||
"resample": 2,
|
||||
"rescale_factor": 0.00392156862745098,
|
||||
"size": [
|
||||
192,
|
||||
672
|
||||
],
|
||||
"train": false
|
||||
}
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,205 @@
|
|||
{
|
||||
"added_tokens_decoder": {
|
||||
"0": {
|
||||
"content": "<s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"1": {
|
||||
"content": "<pad>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"2": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"3": {
|
||||
"content": "<unk>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"4": {
|
||||
"content": "[START_REF]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"5": {
|
||||
"content": "[END_REF]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"6": {
|
||||
"content": "[IMAGE]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"7": {
|
||||
"content": "<fragments>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"8": {
|
||||
"content": "</fragments>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"9": {
|
||||
"content": "<work>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"10": {
|
||||
"content": "</work>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"11": {
|
||||
"content": "[START_SUP]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"12": {
|
||||
"content": "[END_SUP]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"13": {
|
||||
"content": "[START_SUB]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"14": {
|
||||
"content": "[END_SUB]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"15": {
|
||||
"content": "[START_DNA]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"16": {
|
||||
"content": "[END_DNA]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"17": {
|
||||
"content": "[START_AMINO]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"18": {
|
||||
"content": "[END_AMINO]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"19": {
|
||||
"content": "[START_SMILES]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"20": {
|
||||
"content": "[END_SMILES]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"21": {
|
||||
"content": "[START_I_SMILES]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"22": {
|
||||
"content": "[END_I_SMILES]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [],
|
||||
"bos_token": "<s>",
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "</s>",
|
||||
"max_length": 4096,
|
||||
"model_max_length": 768,
|
||||
"pad_to_multiple_of": null,
|
||||
"pad_token": "<pad>",
|
||||
"pad_token_type_id": 0,
|
||||
"padding_side": "right",
|
||||
"processor_class": "VariableDonutProcessor",
|
||||
"stride": 0,
|
||||
"tokenizer_class": "NougatTokenizer",
|
||||
"truncation_side": "right",
|
||||
"truncation_strategy": "longest_first",
|
||||
"unk_token": "<unk>",
|
||||
"vocab_file": null
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
model:
|
||||
arch: unimernet
|
||||
model_type: unimernet
|
||||
model_config:
|
||||
model_name: ./models/unimernet_tiny
|
||||
max_seq_len: 1536
|
||||
|
||||
load_pretrained: True
|
||||
pretrained: './models/unimernet_tiny/pytorch_model.pth'
|
||||
tokenizer_config:
|
||||
path: ./models/unimernet_tiny
|
||||
|
||||
datasets:
|
||||
formula_rec_eval:
|
||||
vis_processor:
|
||||
eval:
|
||||
name: "formula_image_eval"
|
||||
image_size:
|
||||
- 192
|
||||
- 672
|
||||
|
||||
run:
|
||||
runner: runner_iter
|
||||
task: unimernet_train
|
||||
|
||||
batch_size_train: 64
|
||||
batch_size_eval: 64
|
||||
num_workers: 1
|
||||
|
||||
iters_per_inner_epoch: 2000
|
||||
max_iters: 60000
|
||||
|
||||
seed: 42
|
||||
output_dir: "../output/demo"
|
||||
|
||||
evaluate: True
|
||||
test_splits: [ "eval" ]
|
||||
|
||||
device: "cuda"
|
||||
world_size: 1
|
||||
dist_url: "env://"
|
||||
distributed: True
|
||||
distributed_type: ddp # or fsdp when train llm
|
||||
|
||||
generate_cfg:
|
||||
temperature: 0.0
|
|
@ -0,0 +1,70 @@
|
|||
### Install Git LFS
|
||||
Before you begin, make sure Git Large File Storage (Git LFS) is installed on your system. Install it using the following command:
|
||||
|
||||
```bash
|
||||
git lfs install
|
||||
```
|
||||
|
||||
### Download the Model from Hugging Face
|
||||
To download the `PDF-Extract-Kit` model from Hugging Face, use the following command:
|
||||
|
||||
```bash
|
||||
git lfs clone https://huggingface.co/wanderkid/PDF-Extract-Kit
|
||||
```
|
||||
|
||||
Ensure that Git LFS is enabled during the clone to properly download all large files.
|
||||
|
||||
|
||||
|
||||
### Download the Model from ModelScope
|
||||
|
||||
#### SDK Download
|
||||
|
||||
```bash
|
||||
# First, install the ModelScope library using pip:
|
||||
pip install modelscope
|
||||
```
|
||||
|
||||
```python
|
||||
# Use the following Python code to download the model using the ModelScope SDK:
|
||||
from modelscope import snapshot_download
|
||||
model_dir = snapshot_download('wanderkid/PDF-Extract-Kit')
|
||||
```
|
||||
|
||||
#### Git Download
|
||||
Alternatively, you can use Git to clone the model repository from ModelScope:
|
||||
|
||||
```bash
|
||||
git clone https://www.modelscope.cn/wanderkid/PDF-Extract-Kit.git
|
||||
```
|
||||
|
||||
|
||||
Put [model files]() here:
|
||||
|
||||
```
|
||||
./
|
||||
├── Layout
|
||||
│ ├── config.json
|
||||
│ └── model_final.pth
|
||||
├── MFD
|
||||
│ └── weights.pt
|
||||
├── MFR
|
||||
│ └── UniMERNet
|
||||
│ ├── config.json
|
||||
│ ├── preprocessor_config.json
|
||||
│ ├── pytorch_model.bin
|
||||
│ ├── README.md
|
||||
│ ├── tokenizer_config.json
|
||||
│ └── tokenizer.json
|
||||
├── TabRec
|
||||
│ └── StructEqTable
|
||||
│ ├── config.json
|
||||
│ ├──generation_config.json
|
||||
│ ├──model.safetensors
|
||||
│ ├──preprocessor_config.json
|
||||
│ ├──special_tokens_map.json
|
||||
│ ├──spiece.model
|
||||
│ ├──tokenizer_config.json
|
||||
│ └──tokenizer.json
|
||||
└── README.md
|
||||
```
|
|
@ -0,0 +1,36 @@
|
|||
{
|
||||
"_name_or_path": "/cpfs01/user/zhouhongbin/code/StructEqTable-deepspeed/ckpt/pretrained/pix2struct-base-zh",
|
||||
"architectures": [
|
||||
"Pix2StructForConditionalGeneration"
|
||||
],
|
||||
"decoder_start_token_id": 0,
|
||||
"eos_token_id": 1,
|
||||
"initializer_factor": 1.0,
|
||||
"initializer_range": 0.02,
|
||||
"is_encoder_decoder": true,
|
||||
"is_vqa": false,
|
||||
"model_type": "pix2struct",
|
||||
"pad_token_id": 0,
|
||||
"text_config": {
|
||||
"dropout_rate": 0.2,
|
||||
"encoder_hidden_size": 768,
|
||||
"initializer_range": 0.02,
|
||||
"model_type": "pix2struct_text_model",
|
||||
"vocab_size": 77078
|
||||
},
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "float32",
|
||||
"transformers_version": "4.37.2",
|
||||
"use_cache": false,
|
||||
"vision_config": {
|
||||
"attention_dropout": 0.2,
|
||||
"dropout_rate": 0.2,
|
||||
"hidden_dropout_prob": 0.2,
|
||||
"initializer_range": 0.02,
|
||||
"layer_norm_bias": false,
|
||||
"model_type": "pix2struct_vision_model",
|
||||
"num_channels": 3,
|
||||
"patch_size": 16,
|
||||
"projection_dim": 768
|
||||
}
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"_from_model_config": true,
|
||||
"decoder_start_token_id": 0,
|
||||
"eos_token_id": 1,
|
||||
"pad_token_id": 0,
|
||||
"transformers_version": "4.37.2",
|
||||
"use_cache": false
|
||||
}
|
Binary file not shown.
|
@ -0,0 +1,12 @@
|
|||
{
|
||||
"do_convert_rgb": true,
|
||||
"do_normalize": true,
|
||||
"image_processor_type": "Pix2StructImageProcessor",
|
||||
"is_vqa": false,
|
||||
"max_patches": 4096,
|
||||
"patch_size": {
|
||||
"height": 16,
|
||||
"width": 16
|
||||
},
|
||||
"processor_class": "Pix2StructProcessor"
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
{
|
||||
"additional_special_tokens": [
|
||||
"<extra_id_0>",
|
||||
"<extra_id_1>",
|
||||
"<extra_id_2>",
|
||||
"<extra_id_3>",
|
||||
"<extra_id_4>",
|
||||
"<extra_id_5>",
|
||||
"<extra_id_6>",
|
||||
"<extra_id_7>",
|
||||
"<extra_id_8>",
|
||||
"<extra_id_9>",
|
||||
"<extra_id_10>",
|
||||
"<extra_id_11>",
|
||||
"<extra_id_12>",
|
||||
"<extra_id_13>",
|
||||
"<extra_id_14>",
|
||||
"<extra_id_15>",
|
||||
"<extra_id_16>",
|
||||
"<extra_id_17>",
|
||||
"<extra_id_18>",
|
||||
"<extra_id_19>",
|
||||
"<extra_id_20>",
|
||||
"<extra_id_21>",
|
||||
"<extra_id_22>",
|
||||
"<extra_id_23>",
|
||||
"<extra_id_24>",
|
||||
"<extra_id_25>",
|
||||
"<extra_id_26>",
|
||||
"<extra_id_27>",
|
||||
"<extra_id_28>",
|
||||
"<extra_id_29>",
|
||||
"<extra_id_30>",
|
||||
"<extra_id_31>",
|
||||
"<extra_id_32>",
|
||||
"<extra_id_33>",
|
||||
"<extra_id_34>",
|
||||
"<extra_id_35>",
|
||||
"<extra_id_36>",
|
||||
"<extra_id_37>",
|
||||
"<extra_id_38>",
|
||||
"<extra_id_39>",
|
||||
"<extra_id_40>",
|
||||
"<extra_id_41>",
|
||||
"<extra_id_42>",
|
||||
"<extra_id_43>",
|
||||
"<extra_id_44>",
|
||||
"<extra_id_45>",
|
||||
"<extra_id_46>",
|
||||
"<extra_id_47>",
|
||||
"<extra_id_48>",
|
||||
"<extra_id_49>",
|
||||
"<extra_id_50>",
|
||||
"<extra_id_51>",
|
||||
"<extra_id_52>",
|
||||
"<extra_id_53>",
|
||||
"<extra_id_54>",
|
||||
"<extra_id_55>",
|
||||
"<extra_id_56>",
|
||||
"<extra_id_57>",
|
||||
"<extra_id_58>",
|
||||
"<extra_id_59>",
|
||||
"<extra_id_60>",
|
||||
"<extra_id_61>",
|
||||
"<extra_id_62>",
|
||||
"<extra_id_63>",
|
||||
"<extra_id_64>",
|
||||
"<extra_id_65>",
|
||||
"<extra_id_66>",
|
||||
"<extra_id_67>",
|
||||
"<extra_id_68>",
|
||||
"<extra_id_69>",
|
||||
"<extra_id_70>",
|
||||
"<extra_id_71>",
|
||||
"<extra_id_72>",
|
||||
"<extra_id_73>",
|
||||
"<extra_id_74>",
|
||||
"<extra_id_75>",
|
||||
"<extra_id_76>",
|
||||
"<extra_id_77>",
|
||||
"<extra_id_78>",
|
||||
"<extra_id_79>",
|
||||
"<extra_id_80>",
|
||||
"<extra_id_81>",
|
||||
"<extra_id_82>",
|
||||
"<extra_id_83>",
|
||||
"<extra_id_84>",
|
||||
"<extra_id_85>",
|
||||
"<extra_id_86>",
|
||||
"<extra_id_87>",
|
||||
"<extra_id_88>",
|
||||
"<extra_id_89>",
|
||||
"<extra_id_90>",
|
||||
"<extra_id_91>",
|
||||
"<extra_id_92>",
|
||||
"<extra_id_93>",
|
||||
"<extra_id_94>",
|
||||
"<extra_id_95>",
|
||||
"<extra_id_96>",
|
||||
"<extra_id_97>",
|
||||
"<extra_id_98>",
|
||||
"<extra_id_99>"
|
||||
],
|
||||
"eos_token": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<pad>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"unk_token": {
|
||||
"content": "<unk>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,940 @@
|
|||
{
|
||||
"added_tokens_decoder": {
|
||||
"0": {
|
||||
"content": "<pad>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"1": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"3": {
|
||||
"content": "<unk>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76978": {
|
||||
"content": "<extra_id_99>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76979": {
|
||||
"content": "<extra_id_98>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76980": {
|
||||
"content": "<extra_id_97>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76981": {
|
||||
"content": "<extra_id_96>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76982": {
|
||||
"content": "<extra_id_95>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76983": {
|
||||
"content": "<extra_id_94>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76984": {
|
||||
"content": "<extra_id_93>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76985": {
|
||||
"content": "<extra_id_92>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76986": {
|
||||
"content": "<extra_id_91>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76987": {
|
||||
"content": "<extra_id_90>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76988": {
|
||||
"content": "<extra_id_89>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76989": {
|
||||
"content": "<extra_id_88>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76990": {
|
||||
"content": "<extra_id_87>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76991": {
|
||||
"content": "<extra_id_86>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76992": {
|
||||
"content": "<extra_id_85>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76993": {
|
||||
"content": "<extra_id_84>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76994": {
|
||||
"content": "<extra_id_83>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76995": {
|
||||
"content": "<extra_id_82>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76996": {
|
||||
"content": "<extra_id_81>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76997": {
|
||||
"content": "<extra_id_80>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76998": {
|
||||
"content": "<extra_id_79>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"76999": {
|
||||
"content": "<extra_id_78>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77000": {
|
||||
"content": "<extra_id_77>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77001": {
|
||||
"content": "<extra_id_76>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77002": {
|
||||
"content": "<extra_id_75>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77003": {
|
||||
"content": "<extra_id_74>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77004": {
|
||||
"content": "<extra_id_73>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77005": {
|
||||
"content": "<extra_id_72>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77006": {
|
||||
"content": "<extra_id_71>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77007": {
|
||||
"content": "<extra_id_70>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77008": {
|
||||
"content": "<extra_id_69>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77009": {
|
||||
"content": "<extra_id_68>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77010": {
|
||||
"content": "<extra_id_67>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77011": {
|
||||
"content": "<extra_id_66>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77012": {
|
||||
"content": "<extra_id_65>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77013": {
|
||||
"content": "<extra_id_64>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77014": {
|
||||
"content": "<extra_id_63>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77015": {
|
||||
"content": "<extra_id_62>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77016": {
|
||||
"content": "<extra_id_61>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77017": {
|
||||
"content": "<extra_id_60>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77018": {
|
||||
"content": "<extra_id_59>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77019": {
|
||||
"content": "<extra_id_58>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77020": {
|
||||
"content": "<extra_id_57>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77021": {
|
||||
"content": "<extra_id_56>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77022": {
|
||||
"content": "<extra_id_55>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77023": {
|
||||
"content": "<extra_id_54>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77024": {
|
||||
"content": "<extra_id_53>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77025": {
|
||||
"content": "<extra_id_52>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77026": {
|
||||
"content": "<extra_id_51>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77027": {
|
||||
"content": "<extra_id_50>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77028": {
|
||||
"content": "<extra_id_49>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77029": {
|
||||
"content": "<extra_id_48>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77030": {
|
||||
"content": "<extra_id_47>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77031": {
|
||||
"content": "<extra_id_46>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77032": {
|
||||
"content": "<extra_id_45>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77033": {
|
||||
"content": "<extra_id_44>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77034": {
|
||||
"content": "<extra_id_43>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77035": {
|
||||
"content": "<extra_id_42>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77036": {
|
||||
"content": "<extra_id_41>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77037": {
|
||||
"content": "<extra_id_40>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77038": {
|
||||
"content": "<extra_id_39>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77039": {
|
||||
"content": "<extra_id_38>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77040": {
|
||||
"content": "<extra_id_37>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77041": {
|
||||
"content": "<extra_id_36>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77042": {
|
||||
"content": "<extra_id_35>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77043": {
|
||||
"content": "<extra_id_34>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77044": {
|
||||
"content": "<extra_id_33>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77045": {
|
||||
"content": "<extra_id_32>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77046": {
|
||||
"content": "<extra_id_31>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77047": {
|
||||
"content": "<extra_id_30>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77048": {
|
||||
"content": "<extra_id_29>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77049": {
|
||||
"content": "<extra_id_28>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77050": {
|
||||
"content": "<extra_id_27>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77051": {
|
||||
"content": "<extra_id_26>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77052": {
|
||||
"content": "<extra_id_25>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77053": {
|
||||
"content": "<extra_id_24>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77054": {
|
||||
"content": "<extra_id_23>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77055": {
|
||||
"content": "<extra_id_22>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77056": {
|
||||
"content": "<extra_id_21>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77057": {
|
||||
"content": "<extra_id_20>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77058": {
|
||||
"content": "<extra_id_19>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77059": {
|
||||
"content": "<extra_id_18>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77060": {
|
||||
"content": "<extra_id_17>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77061": {
|
||||
"content": "<extra_id_16>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77062": {
|
||||
"content": "<extra_id_15>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77063": {
|
||||
"content": "<extra_id_14>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77064": {
|
||||
"content": "<extra_id_13>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77065": {
|
||||
"content": "<extra_id_12>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77066": {
|
||||
"content": "<extra_id_11>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77067": {
|
||||
"content": "<extra_id_10>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77068": {
|
||||
"content": "<extra_id_9>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77069": {
|
||||
"content": "<extra_id_8>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77070": {
|
||||
"content": "<extra_id_7>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77071": {
|
||||
"content": "<extra_id_6>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77072": {
|
||||
"content": "<extra_id_5>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77073": {
|
||||
"content": "<extra_id_4>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77074": {
|
||||
"content": "<extra_id_3>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77075": {
|
||||
"content": "<extra_id_2>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77076": {
|
||||
"content": "<extra_id_1>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"77077": {
|
||||
"content": "<extra_id_0>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<extra_id_0>",
|
||||
"<extra_id_1>",
|
||||
"<extra_id_2>",
|
||||
"<extra_id_3>",
|
||||
"<extra_id_4>",
|
||||
"<extra_id_5>",
|
||||
"<extra_id_6>",
|
||||
"<extra_id_7>",
|
||||
"<extra_id_8>",
|
||||
"<extra_id_9>",
|
||||
"<extra_id_10>",
|
||||
"<extra_id_11>",
|
||||
"<extra_id_12>",
|
||||
"<extra_id_13>",
|
||||
"<extra_id_14>",
|
||||
"<extra_id_15>",
|
||||
"<extra_id_16>",
|
||||
"<extra_id_17>",
|
||||
"<extra_id_18>",
|
||||
"<extra_id_19>",
|
||||
"<extra_id_20>",
|
||||
"<extra_id_21>",
|
||||
"<extra_id_22>",
|
||||
"<extra_id_23>",
|
||||
"<extra_id_24>",
|
||||
"<extra_id_25>",
|
||||
"<extra_id_26>",
|
||||
"<extra_id_27>",
|
||||
"<extra_id_28>",
|
||||
"<extra_id_29>",
|
||||
"<extra_id_30>",
|
||||
"<extra_id_31>",
|
||||
"<extra_id_32>",
|
||||
"<extra_id_33>",
|
||||
"<extra_id_34>",
|
||||
"<extra_id_35>",
|
||||
"<extra_id_36>",
|
||||
"<extra_id_37>",
|
||||
"<extra_id_38>",
|
||||
"<extra_id_39>",
|
||||
"<extra_id_40>",
|
||||
"<extra_id_41>",
|
||||
"<extra_id_42>",
|
||||
"<extra_id_43>",
|
||||
"<extra_id_44>",
|
||||
"<extra_id_45>",
|
||||
"<extra_id_46>",
|
||||
"<extra_id_47>",
|
||||
"<extra_id_48>",
|
||||
"<extra_id_49>",
|
||||
"<extra_id_50>",
|
||||
"<extra_id_51>",
|
||||
"<extra_id_52>",
|
||||
"<extra_id_53>",
|
||||
"<extra_id_54>",
|
||||
"<extra_id_55>",
|
||||
"<extra_id_56>",
|
||||
"<extra_id_57>",
|
||||
"<extra_id_58>",
|
||||
"<extra_id_59>",
|
||||
"<extra_id_60>",
|
||||
"<extra_id_61>",
|
||||
"<extra_id_62>",
|
||||
"<extra_id_63>",
|
||||
"<extra_id_64>",
|
||||
"<extra_id_65>",
|
||||
"<extra_id_66>",
|
||||
"<extra_id_67>",
|
||||
"<extra_id_68>",
|
||||
"<extra_id_69>",
|
||||
"<extra_id_70>",
|
||||
"<extra_id_71>",
|
||||
"<extra_id_72>",
|
||||
"<extra_id_73>",
|
||||
"<extra_id_74>",
|
||||
"<extra_id_75>",
|
||||
"<extra_id_76>",
|
||||
"<extra_id_77>",
|
||||
"<extra_id_78>",
|
||||
"<extra_id_79>",
|
||||
"<extra_id_80>",
|
||||
"<extra_id_81>",
|
||||
"<extra_id_82>",
|
||||
"<extra_id_83>",
|
||||
"<extra_id_84>",
|
||||
"<extra_id_85>",
|
||||
"<extra_id_86>",
|
||||
"<extra_id_87>",
|
||||
"<extra_id_88>",
|
||||
"<extra_id_89>",
|
||||
"<extra_id_90>",
|
||||
"<extra_id_91>",
|
||||
"<extra_id_92>",
|
||||
"<extra_id_93>",
|
||||
"<extra_id_94>",
|
||||
"<extra_id_95>",
|
||||
"<extra_id_96>",
|
||||
"<extra_id_97>",
|
||||
"<extra_id_98>",
|
||||
"<extra_id_99>"
|
||||
],
|
||||
"clean_up_tokenization_spaces": true,
|
||||
"eos_token": "</s>",
|
||||
"extra_ids": 100,
|
||||
"legacy": true,
|
||||
"model_max_length": 1000000000000000019884624838656,
|
||||
"pad_token": "<pad>",
|
||||
"processor_class": "Pix2StructProcessor",
|
||||
"sp_model_kwargs": {},
|
||||
"tokenizer_class": "T5Tokenizer",
|
||||
"unk_token": "<unk>"
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,39 @@
|
|||
<thead>
|
||||
<tr>
|
||||
<td></td>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<eb></eb>
|
||||
</tbody>
|
||||
<td
|
||||
colspan="5"
|
||||
>
|
||||
</td>
|
||||
colspan="2"
|
||||
colspan="3"
|
||||
<eb2></eb2>
|
||||
<eb1></eb1>
|
||||
rowspan="2"
|
||||
colspan="4"
|
||||
colspan="6"
|
||||
rowspan="3"
|
||||
colspan="9"
|
||||
colspan="10"
|
||||
colspan="7"
|
||||
rowspan="4"
|
||||
rowspan="5"
|
||||
rowspan="9"
|
||||
colspan="8"
|
||||
rowspan="8"
|
||||
rowspan="6"
|
||||
rowspan="7"
|
||||
rowspan="10"
|
||||
<eb3></eb3>
|
||||
<eb4></eb4>
|
||||
<eb5></eb5>
|
||||
<eb6></eb6>
|
||||
<eb7></eb7>
|
||||
<eb8></eb8>
|
||||
<eb9></eb9>
|
||||
<eb10></eb10>
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue