first commit

This commit is contained in:
xxl 2024-11-21 16:21:33 +08:00
parent f970a9f5d7
commit 7c5a442811
51 changed files with 719559 additions and 2 deletions

View File

@ -1,3 +1,70 @@
# PDF-Extract-Kit_a13672718289465344129244
### Install Git LFS
Before you begin, make sure Git Large File Storage (Git LFS) is installed on your system. Install it using the following command:
PDF-Extract-Kit
```bash
git lfs install
```
### Download the Model from Hugging Face
To download the `PDF-Extract-Kit` model from Hugging Face, use the following command:
```bash
git lfs clone https://huggingface.co/opendatalab/PDF-Extract-Kit
```
Ensure that Git LFS is enabled during the clone to properly download all large files.
### Download the Model from ModelScope
#### SDK Download
```bash
# First, install the ModelScope library using pip:
pip install modelscope
```
```python
# Use the following Python code to download the model using the ModelScope SDK:
from modelscope import snapshot_download
model_dir = snapshot_download('opendatalab/PDF-Extract-Kit')
```
#### Git Download
Alternatively, you can use Git to clone the model repository from ModelScope:
```bash
git clone https://www.modelscope.cn/opendatalab/PDF-Extract-Kit.git
```
Put [model files]() here:
```
./
├── Layout
│ ├── config.json
│ └── model_final.pth
├── MFD
│ └── weights.pt
├── MFR
│ └── UniMERNet
│ ├── config.json
│ ├── preprocessor_config.json
│ ├── pytorch_model.bin
│ ├── README.md
│ ├── tokenizer_config.json
│ └── tokenizer.json
├── TabRec
│ └── StructEqTable
│ ├── config.json
│ ├──generation_config.json
│ ├──model.safetensors
│ ├──preprocessor_config.json
│ ├──special_tokens_map.json
│ ├──spiece.model
│ ├──tokenizer_config.json
│ └──tokenizer.json
└── README.md
```

33
models/Layout/config.json Normal file
View File

@ -0,0 +1,33 @@
{
"attention_probs_dropout_prob": 0.1,
"bos_token_id": 0,
"classifier_dropout": null,
"coordinate_size": 128,
"eos_token_id": 2,
"has_relative_attention_bias": true,
"has_spatial_attention_bias": true,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"initializer_range": 0.02,
"input_size": 224,
"intermediate_size": 3072,
"layer_norm_eps": 1e-05,
"max_2d_position_embeddings": 1024,
"max_position_embeddings": 514,
"max_rel_2d_pos": 256,
"max_rel_pos": 128,
"model_type": "layoutlmv3",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"pad_token_id": 1,
"rel_2d_pos_bins": 64,
"rel_pos_bins": 32,
"second_input_size": 112,
"shape_size": 128,
"torch_dtype": "float32",
"transformers_version": "4.12.5",
"type_vocab_size": 1,
"visual_embed": true,
"vocab_size": 250002
}

BIN
models/Layout/model_final.pth (Stored with Git LFS) Normal file

Binary file not shown.

BIN
models/MFD/weights.pt (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,6 @@
---
license: apache-2.0
---
UniMERNet: A Universal Network for Mathematical Expression Recognition in Real-World Scenarios.
Visit our GitHub repository at [unimernet](https://github.com/opendatalab/unimernet) for more information.

View File

@ -0,0 +1,193 @@
{
"_name_or_path": "unimernet/checkpoint-180000",
"architectures": [
"VisionEncoderDecoderModel"
],
"decoder": {
"_name_or_path": "",
"activation_dropout": 0.0,
"activation_function": "gelu",
"add_cross_attention": true,
"add_final_layer_norm": true,
"architectures": null,
"attention_dropout": 0.0,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": 0,
"chunk_size_feed_forward": 0,
"classifier_dropout": 0.0,
"cross_attention_hidden_size": null,
"d_model": 1024,
"decoder_attention_heads": 16,
"decoder_ffn_dim": 4096,
"decoder_layerdrop": 0.0,
"decoder_layers": 8,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"dropout": 0.1,
"early_stopping": false,
"encoder_attention_heads": 16,
"encoder_ffn_dim": 4096,
"encoder_layerdrop": 0.0,
"encoder_layers": 12,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": 2,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": 2,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"init_std": 0.02,
"is_decoder": true,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"length_penalty": 1.0,
"max_length": 20,
"max_position_embeddings": 1536,
"min_length": 0,
"model_type": "mbart",
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_hidden_layers": 12,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": 1,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"scale_embedding": true,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": false,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torch_dtype": null,
"torchscript": false,
"typical_p": 1.0,
"use_bfloat16": false,
"use_cache": true,
"vocab_size": 50000
},
"decoder_start_token_id": 0,
"encoder": {
"_name_or_path": "",
"add_cross_attention": false,
"architectures": null,
"attention_probs_dropout_prob": 0.0,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"depths": [
2,
2,
14,
2
],
"diversity_penalty": 0.0,
"do_sample": false,
"drop_path_rate": 0.1,
"early_stopping": false,
"embed_dim": 128,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"hidden_size": 1024,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"image_size": [
420,
420
],
"initializer_range": 0.02,
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"layer_norm_eps": 1e-05,
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"mlp_ratio": 4.0,
"model_type": "donut-swin",
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_channels": 3,
"num_heads": [
4,
8,
16,
32
],
"num_layers": 4,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"patch_size": 4,
"path_norm": true,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"qkv_bias": true,
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torch_dtype": null,
"torchscript": false,
"typical_p": 1.0,
"use_2d_embeddings": false,
"use_absolute_embeddings": false,
"use_bfloat16": false,
"window_size": 5
},
"is_encoder_decoder": true,
"model_type": "vision-encoder-decoder",
"pad_token_id": 1,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.40.0"
}

View File

@ -0,0 +1,36 @@
{
"do_align_long_axis": false,
"do_normalize": false,
"do_pad": false,
"do_rescale": false,
"do_resize": false,
"do_thumbnail": false,
"feature_extractor_type": "DonutFeatureExtractor",
"image_mean": [
0.485,
0.456,
0.406
],
"image_processor_type": "VariableDonutImageProcessor",
"image_std": [
0.229,
0.224,
0.225
],
"max_size": {
"height": 192,
"width": 672
},
"patch_size": [
4,
4
],
"processor_class": "VariableDonutProcessor",
"resample": 2,
"rescale_factor": 0.00392156862745098,
"size": [
192,
672
],
"train": false
}

BIN
models/MFR/UniMERNet/pytorch_model.bin (Stored with Git LFS) Normal file

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,205 @@
{
"added_tokens_decoder": {
"0": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"4": {
"content": "[START_REF]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"5": {
"content": "[END_REF]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"6": {
"content": "[IMAGE]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"7": {
"content": "<fragments>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"8": {
"content": "</fragments>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"9": {
"content": "<work>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"10": {
"content": "</work>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"11": {
"content": "[START_SUP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"12": {
"content": "[END_SUP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"13": {
"content": "[START_SUB]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14": {
"content": "[END_SUB]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"15": {
"content": "[START_DNA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"16": {
"content": "[END_DNA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"17": {
"content": "[START_AMINO]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"18": {
"content": "[END_AMINO]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"19": {
"content": "[START_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"20": {
"content": "[END_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"21": {
"content": "[START_I_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"22": {
"content": "[END_I_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [],
"bos_token": "<s>",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"max_length": 4096,
"model_max_length": 768,
"pad_to_multiple_of": null,
"pad_token": "<pad>",
"pad_token_type_id": 0,
"padding_side": "right",
"processor_class": "VariableDonutProcessor",
"stride": 0,
"tokenizer_class": "NougatTokenizer",
"truncation_side": "right",
"truncation_strategy": "longest_first",
"unk_token": "<unk>",
"vocab_file": null
}

View File

@ -0,0 +1,32 @@
---
license: apache-2.0
---
## UniMERNet: A Universal Network for Mathematical Expression Recognition in Real-World Scenarios.
Visit our GitHub repository at [UniMERNet](https://github.com/opendatalab/unimernet) for more information.
## Citations
```
@misc{wang2024unimernet,
title={UniMERNet: A Universal Network for Real-World Mathematical Expression Recognition},
author={Bin Wang and Zhuangcheng Gu and Chao Xu and Bo Zhang and Botian Shi and Conghui He},
year={2024},
eprint={2404.15254},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{conghui2022opendatalab,
author={He, Conghui and Li, Wei and Jin, Zhenjiang and Wang, Bin and Xu, Chao and Lin, Dahua},
title={OpenDataLab: Empowering General Artificial Intelligence with Open Datasets},
howpublished = {\url{https://opendatalab.com}},
year={2022}
}
```
## MD5 checksums
```
97f4867b4ff4e9a96c8daba8aaa793b4 tokenizer_config.json
351652071425d3d36a634ccc8efb22e8 tokenizer.json
ff4391872dad6688f21ed140009d817b pytorch_model.pth
```

View File

@ -0,0 +1,193 @@
{
"_name_or_path": "unimernet/checkpoint-300000",
"architectures": [
"VisionEncoderDecoderModel"
],
"decoder": {
"_name_or_path": "",
"activation_dropout": 0.0,
"activation_function": "gelu",
"add_cross_attention": true,
"add_final_layer_norm": true,
"architectures": null,
"attention_dropout": 0.0,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": 0,
"chunk_size_feed_forward": 0,
"classifier_dropout": 0.0,
"cross_attention_hidden_size": null,
"d_model": 1024,
"decoder_attention_heads": 16,
"decoder_ffn_dim": 4096,
"decoder_layerdrop": 0.0,
"decoder_layers": 8,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"dropout": 0.1,
"early_stopping": false,
"encoder_attention_heads": 16,
"encoder_ffn_dim": 4096,
"encoder_layerdrop": 0.0,
"encoder_layers": 12,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": 2,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": 2,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"init_std": 0.02,
"is_decoder": true,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"length_penalty": 1.0,
"max_length": 20,
"max_position_embeddings": 1536,
"min_length": 0,
"model_type": "mbart",
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_hidden_layers": 12,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": 1,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"scale_embedding": true,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": false,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torch_dtype": null,
"torchscript": false,
"typical_p": 1.0,
"use_bfloat16": false,
"use_cache": true,
"vocab_size": 50000
},
"decoder_start_token_id": 0,
"encoder": {
"_name_or_path": "",
"add_cross_attention": false,
"architectures": null,
"attention_probs_dropout_prob": 0.0,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"depths": [
6,
6,
6,
6
],
"diversity_penalty": 0.0,
"do_sample": false,
"drop_path_rate": 0.1,
"early_stopping": false,
"embed_dim": 128,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"hidden_size": 1024,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"image_size": [
420,
420
],
"initializer_range": 0.02,
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"layer_norm_eps": 1e-05,
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"mlp_ratio": 4.0,
"model_type": "donut-swin",
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_channels": 3,
"num_heads": [
4,
8,
16,
32
],
"num_layers": 4,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"patch_size": 4,
"path_norm": true,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"qkv_bias": true,
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torch_dtype": null,
"torchscript": false,
"typical_p": 1.0,
"use_2d_embeddings": false,
"use_absolute_embeddings": false,
"use_bfloat16": false,
"window_size": 5
},
"is_encoder_decoder": true,
"model_type": "vision-encoder-decoder",
"pad_token_id": 1,
"tie_word_embeddings": false,
"torch_dtype": "float16",
"transformers_version": "4.36.0"
}

View File

@ -0,0 +1,36 @@
{
"do_align_long_axis": false,
"do_normalize": false,
"do_pad": false,
"do_rescale": false,
"do_resize": false,
"do_thumbnail": false,
"feature_extractor_type": "DonutFeatureExtractor",
"image_mean": [
0.485,
0.456,
0.406
],
"image_processor_type": "VariableDonutImageProcessor",
"image_std": [
0.229,
0.224,
0.225
],
"max_size": {
"height": 192,
"width": 672
},
"patch_size": [
4,
4
],
"processor_class": "VariableDonutProcessor",
"resample": 2,
"rescale_factor": 0.00392156862745098,
"size": [
192,
672
],
"train": false
}

BIN
models/MFR/unimernet_base/pytorch_model.pth (Stored with Git LFS) Normal file

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,205 @@
{
"added_tokens_decoder": {
"0": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"4": {
"content": "[START_REF]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"5": {
"content": "[END_REF]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"6": {
"content": "[IMAGE]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"7": {
"content": "<fragments>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"8": {
"content": "</fragments>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"9": {
"content": "<work>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"10": {
"content": "</work>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"11": {
"content": "[START_SUP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"12": {
"content": "[END_SUP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"13": {
"content": "[START_SUB]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14": {
"content": "[END_SUB]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"15": {
"content": "[START_DNA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"16": {
"content": "[END_DNA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"17": {
"content": "[START_AMINO]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"18": {
"content": "[END_AMINO]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"19": {
"content": "[START_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"20": {
"content": "[END_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"21": {
"content": "[START_I_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"22": {
"content": "[END_I_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [],
"bos_token": "<s>",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"max_length": 4096,
"model_max_length": 768,
"pad_to_multiple_of": null,
"pad_token": "<pad>",
"pad_token_type_id": 0,
"padding_side": "right",
"processor_class": "VariableDonutProcessor",
"stride": 0,
"tokenizer_class": "NougatTokenizer",
"truncation_side": "right",
"truncation_strategy": "longest_first",
"unk_token": "<unk>",
"vocab_file": null
}

View File

@ -0,0 +1,46 @@
model:
arch: unimernet
model_type: unimernet
model_config:
model_name: ./models/unimernet_base
max_seq_len: 1536
load_pretrained: True
pretrained: './models/unimernet_base/pytorch_model.pth'
tokenizer_config:
path: ./models/unimernet_base
datasets:
formula_rec_eval:
vis_processor:
eval:
name: "formula_image_eval"
image_size:
- 192
- 672
run:
runner: runner_iter
task: unimernet_train
batch_size_train: 64
batch_size_eval: 64
num_workers: 1
iters_per_inner_epoch: 2000
max_iters: 60000
seed: 42
output_dir: "../output/demo"
evaluate: True
test_splits: [ "eval" ]
device: "cuda"
world_size: 1
dist_url: "env://"
distributed: True
distributed_type: ddp # or fsdp when train llm
generate_cfg:
temperature: 0.0

View File

@ -0,0 +1,32 @@
---
license: apache-2.0
---
## UniMERNet: A Universal Network for Mathematical Expression Recognition in Real-World Scenarios.
Visit our GitHub repository at [UniMERNet](https://github.com/opendatalab/unimernet) for more information.
## Citations
```
@misc{wang2024unimernet,
title={UniMERNet: A Universal Network for Real-World Mathematical Expression Recognition},
author={Bin Wang and Zhuangcheng Gu and Chao Xu and Bo Zhang and Botian Shi and Conghui He},
year={2024},
eprint={2404.15254},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{conghui2022opendatalab,
author={He, Conghui and Li, Wei and Jin, Zhenjiang and Wang, Bin and Xu, Chao and Lin, Dahua},
title={OpenDataLab: Empowering General Artificial Intelligence with Open Datasets},
howpublished = {\url{https://opendatalab.com}},
year={2022}
}
```
## MD5 checksums
```
97f4867b4ff4e9a96c8daba8aaa793b4 tokenizer_config.json
351652071425d3d36a634ccc8efb22e8 tokenizer.json
430e426354e71624fb096c5c7ad90a78 pytorch_model.pth
```

View File

@ -0,0 +1,193 @@
{
"_name_or_path": "unimernet/checkpoint-300000",
"architectures": [
"VisionEncoderDecoderModel"
],
"decoder": {
"_name_or_path": "",
"activation_dropout": 0.0,
"activation_function": "gelu",
"add_cross_attention": true,
"add_final_layer_norm": true,
"architectures": null,
"attention_dropout": 0.0,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": 0,
"chunk_size_feed_forward": 0,
"classifier_dropout": 0.0,
"cross_attention_hidden_size": null,
"d_model": 768,
"decoder_attention_heads": 16,
"decoder_ffn_dim": 3072,
"decoder_layerdrop": 0.0,
"decoder_layers": 8,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"dropout": 0.1,
"early_stopping": false,
"encoder_attention_heads": 16,
"encoder_ffn_dim": 3072,
"encoder_layerdrop": 0.0,
"encoder_layers": 12,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": 2,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": 2,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"init_std": 0.02,
"is_decoder": true,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"length_penalty": 1.0,
"max_length": 20,
"max_position_embeddings": 1536,
"min_length": 0,
"model_type": "mbart",
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_hidden_layers": 12,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": 1,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"scale_embedding": true,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": false,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torch_dtype": null,
"torchscript": false,
"typical_p": 1.0,
"use_bfloat16": false,
"use_cache": true,
"vocab_size": 50000
},
"decoder_start_token_id": 0,
"encoder": {
"_name_or_path": "",
"add_cross_attention": false,
"architectures": null,
"attention_probs_dropout_prob": 0.0,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"depths": [
6,
6,
6,
6
],
"diversity_penalty": 0.0,
"do_sample": false,
"drop_path_rate": 0.1,
"early_stopping": false,
"embed_dim": 96,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"hidden_size": 768,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"image_size": [
420,
420
],
"initializer_range": 0.02,
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"layer_norm_eps": 1e-05,
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"mlp_ratio": 4.0,
"model_type": "donut-swin",
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_channels": 3,
"num_heads": [
3,
6,
12,
24
],
"num_layers": 4,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"patch_size": 4,
"path_norm": true,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"qkv_bias": true,
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torch_dtype": null,
"torchscript": false,
"typical_p": 1.0,
"use_2d_embeddings": false,
"use_absolute_embeddings": false,
"use_bfloat16": false,
"window_size": 5
},
"is_encoder_decoder": true,
"model_type": "vision-encoder-decoder",
"pad_token_id": 1,
"tie_word_embeddings": false,
"torch_dtype": "float16",
"transformers_version": "4.36.0"
}

View File

@ -0,0 +1,36 @@
{
"do_align_long_axis": false,
"do_normalize": false,
"do_pad": false,
"do_rescale": false,
"do_resize": false,
"do_thumbnail": false,
"feature_extractor_type": "DonutFeatureExtractor",
"image_mean": [
0.485,
0.456,
0.406
],
"image_processor_type": "VariableDonutImageProcessor",
"image_std": [
0.229,
0.224,
0.225
],
"max_size": {
"height": 192,
"width": 672
},
"patch_size": [
4,
4
],
"processor_class": "VariableDonutProcessor",
"resample": 2,
"rescale_factor": 0.00392156862745098,
"size": [
192,
672
],
"train": false
}

BIN
models/MFR/unimernet_small/pytorch_model.pth (Stored with Git LFS) Normal file

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,205 @@
{
"added_tokens_decoder": {
"0": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"4": {
"content": "[START_REF]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"5": {
"content": "[END_REF]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"6": {
"content": "[IMAGE]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"7": {
"content": "<fragments>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"8": {
"content": "</fragments>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"9": {
"content": "<work>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"10": {
"content": "</work>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"11": {
"content": "[START_SUP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"12": {
"content": "[END_SUP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"13": {
"content": "[START_SUB]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14": {
"content": "[END_SUB]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"15": {
"content": "[START_DNA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"16": {
"content": "[END_DNA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"17": {
"content": "[START_AMINO]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"18": {
"content": "[END_AMINO]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"19": {
"content": "[START_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"20": {
"content": "[END_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"21": {
"content": "[START_I_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"22": {
"content": "[END_I_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [],
"bos_token": "<s>",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"max_length": 4096,
"model_max_length": 768,
"pad_to_multiple_of": null,
"pad_token": "<pad>",
"pad_token_type_id": 0,
"padding_side": "right",
"processor_class": "VariableDonutProcessor",
"stride": 0,
"tokenizer_class": "NougatTokenizer",
"truncation_side": "right",
"truncation_strategy": "longest_first",
"unk_token": "<unk>",
"vocab_file": null
}

View File

@ -0,0 +1,46 @@
model:
arch: unimernet
model_type: unimernet
model_config:
model_name: ./models/unimernet_small
max_seq_len: 1536
load_pretrained: True
pretrained: './models/unimernet_small/pytorch_model.pth'
tokenizer_config:
path: ./models/unimernet_small
datasets:
formula_rec_eval:
vis_processor:
eval:
name: "formula_image_eval"
image_size:
- 192
- 672
run:
runner: runner_iter
task: unimernet_train
batch_size_train: 64
batch_size_eval: 64
num_workers: 1
iters_per_inner_epoch: 2000
max_iters: 60000
seed: 42
output_dir: "../output/demo"
evaluate: True
test_splits: [ "eval" ]
device: "cuda"
world_size: 1
dist_url: "env://"
distributed: True
distributed_type: ddp # or fsdp when train llm
generate_cfg:
temperature: 0.0

View File

@ -0,0 +1,32 @@
---
license: apache-2.0
---
## UniMERNet: A Universal Network for Mathematical Expression Recognition in Real-World Scenarios.
Visit our GitHub repository at [UniMERNet](https://github.com/opendatalab/unimernet) for more information.
## Citations
```
@misc{wang2024unimernet,
title={UniMERNet: A Universal Network for Real-World Mathematical Expression Recognition},
author={Bin Wang and Zhuangcheng Gu and Chao Xu and Bo Zhang and Botian Shi and Conghui He},
year={2024},
eprint={2404.15254},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{conghui2022opendatalab,
author={He, Conghui and Li, Wei and Jin, Zhenjiang and Wang, Bin and Xu, Chao and Lin, Dahua},
title={OpenDataLab: Empowering General Artificial Intelligence with Open Datasets},
howpublished = {\url{https://opendatalab.com}},
year={2022}
}
```
## MD5 checksums
```
97f4867b4ff4e9a96c8daba8aaa793b4 tokenizer_config.json
351652071425d3d36a634ccc8efb22e8 tokenizer.json
72b53a2152af43a57f8d5eebf8e31562 pytorch_model.pth
```

View File

@ -0,0 +1,193 @@
{
"_name_or_path": "unimernet/checkpoint-300000",
"architectures": [
"VisionEncoderDecoderModel"
],
"decoder": {
"_name_or_path": "",
"activation_dropout": 0.0,
"activation_function": "gelu",
"add_cross_attention": true,
"add_final_layer_norm": true,
"architectures": null,
"attention_dropout": 0.0,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": 0,
"chunk_size_feed_forward": 0,
"classifier_dropout": 0.0,
"cross_attention_hidden_size": null,
"d_model": 512,
"decoder_attention_heads": 16,
"decoder_ffn_dim": 2048,
"decoder_layerdrop": 0.0,
"decoder_layers": 8,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"dropout": 0.1,
"early_stopping": false,
"encoder_attention_heads": 16,
"encoder_ffn_dim": 2048,
"encoder_layerdrop": 0.0,
"encoder_layers": 12,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": 2,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": 2,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"init_std": 0.02,
"is_decoder": true,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"length_penalty": 1.0,
"max_length": 20,
"max_position_embeddings": 1536,
"min_length": 0,
"model_type": "mbart",
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_hidden_layers": 12,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": 1,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"scale_embedding": true,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": false,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torch_dtype": null,
"torchscript": false,
"typical_p": 1.0,
"use_bfloat16": false,
"use_cache": true,
"vocab_size": 50000
},
"decoder_start_token_id": 0,
"encoder": {
"_name_or_path": "",
"add_cross_attention": false,
"architectures": null,
"attention_probs_dropout_prob": 0.0,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"depths": [
6,
6,
6,
6
],
"diversity_penalty": 0.0,
"do_sample": false,
"drop_path_rate": 0.1,
"early_stopping": false,
"embed_dim": 64,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"hidden_size": 512,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"image_size": [
420,
420
],
"initializer_range": 0.02,
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"layer_norm_eps": 1e-05,
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"mlp_ratio": 4.0,
"model_type": "donut-swin",
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_channels": 3,
"num_heads": [
2,
4,
8,
16
],
"num_layers": 4,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"patch_size": 4,
"path_norm": true,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"qkv_bias": true,
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torch_dtype": null,
"torchscript": false,
"typical_p": 1.0,
"use_2d_embeddings": false,
"use_absolute_embeddings": false,
"use_bfloat16": false,
"window_size": 5
},
"is_encoder_decoder": true,
"model_type": "vision-encoder-decoder",
"pad_token_id": 1,
"tie_word_embeddings": false,
"torch_dtype": "float16",
"transformers_version": "4.36.0"
}

View File

@ -0,0 +1,36 @@
{
"do_align_long_axis": false,
"do_normalize": false,
"do_pad": false,
"do_rescale": false,
"do_resize": false,
"do_thumbnail": false,
"feature_extractor_type": "DonutFeatureExtractor",
"image_mean": [
0.485,
0.456,
0.406
],
"image_processor_type": "VariableDonutImageProcessor",
"image_std": [
0.229,
0.224,
0.225
],
"max_size": {
"height": 192,
"width": 672
},
"patch_size": [
4,
4
],
"processor_class": "VariableDonutProcessor",
"resample": 2,
"rescale_factor": 0.00392156862745098,
"size": [
192,
672
],
"train": false
}

BIN
models/MFR/unimernet_tiny/pytorch_model.pth (Stored with Git LFS) Normal file

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,205 @@
{
"added_tokens_decoder": {
"0": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"4": {
"content": "[START_REF]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"5": {
"content": "[END_REF]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"6": {
"content": "[IMAGE]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"7": {
"content": "<fragments>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"8": {
"content": "</fragments>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"9": {
"content": "<work>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"10": {
"content": "</work>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"11": {
"content": "[START_SUP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"12": {
"content": "[END_SUP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"13": {
"content": "[START_SUB]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14": {
"content": "[END_SUB]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"15": {
"content": "[START_DNA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"16": {
"content": "[END_DNA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"17": {
"content": "[START_AMINO]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"18": {
"content": "[END_AMINO]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"19": {
"content": "[START_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"20": {
"content": "[END_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"21": {
"content": "[START_I_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"22": {
"content": "[END_I_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [],
"bos_token": "<s>",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"max_length": 4096,
"model_max_length": 768,
"pad_to_multiple_of": null,
"pad_token": "<pad>",
"pad_token_type_id": 0,
"padding_side": "right",
"processor_class": "VariableDonutProcessor",
"stride": 0,
"tokenizer_class": "NougatTokenizer",
"truncation_side": "right",
"truncation_strategy": "longest_first",
"unk_token": "<unk>",
"vocab_file": null
}

View File

@ -0,0 +1,46 @@
model:
arch: unimernet
model_type: unimernet
model_config:
model_name: ./models/unimernet_tiny
max_seq_len: 1536
load_pretrained: True
pretrained: './models/unimernet_tiny/pytorch_model.pth'
tokenizer_config:
path: ./models/unimernet_tiny
datasets:
formula_rec_eval:
vis_processor:
eval:
name: "formula_image_eval"
image_size:
- 192
- 672
run:
runner: runner_iter
task: unimernet_train
batch_size_train: 64
batch_size_eval: 64
num_workers: 1
iters_per_inner_epoch: 2000
max_iters: 60000
seed: 42
output_dir: "../output/demo"
evaluate: True
test_splits: [ "eval" ]
device: "cuda"
world_size: 1
dist_url: "env://"
distributed: True
distributed_type: ddp # or fsdp when train llm
generate_cfg:
temperature: 0.0

70
models/README.md Normal file
View File

@ -0,0 +1,70 @@
### Install Git LFS
Before you begin, make sure Git Large File Storage (Git LFS) is installed on your system. Install it using the following command:
```bash
git lfs install
```
### Download the Model from Hugging Face
To download the `PDF-Extract-Kit` model from Hugging Face, use the following command:
```bash
git lfs clone https://huggingface.co/wanderkid/PDF-Extract-Kit
```
Ensure that Git LFS is enabled during the clone to properly download all large files.
### Download the Model from ModelScope
#### SDK Download
```bash
# First, install the ModelScope library using pip:
pip install modelscope
```
```python
# Use the following Python code to download the model using the ModelScope SDK:
from modelscope import snapshot_download
model_dir = snapshot_download('wanderkid/PDF-Extract-Kit')
```
#### Git Download
Alternatively, you can use Git to clone the model repository from ModelScope:
```bash
git clone https://www.modelscope.cn/wanderkid/PDF-Extract-Kit.git
```
Put [model files]() here:
```
./
├── Layout
│ ├── config.json
│ └── model_final.pth
├── MFD
│ └── weights.pt
├── MFR
│ └── UniMERNet
│ ├── config.json
│ ├── preprocessor_config.json
│ ├── pytorch_model.bin
│ ├── README.md
│ ├── tokenizer_config.json
│ └── tokenizer.json
├── TabRec
│ └── StructEqTable
│ ├── config.json
│ ├──generation_config.json
│ ├──model.safetensors
│ ├──preprocessor_config.json
│ ├──special_tokens_map.json
│ ├──spiece.model
│ ├──tokenizer_config.json
│ └──tokenizer.json
└── README.md
```

View File

@ -0,0 +1,36 @@
{
"_name_or_path": "/cpfs01/user/zhouhongbin/code/StructEqTable-deepspeed/ckpt/pretrained/pix2struct-base-zh",
"architectures": [
"Pix2StructForConditionalGeneration"
],
"decoder_start_token_id": 0,
"eos_token_id": 1,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"is_encoder_decoder": true,
"is_vqa": false,
"model_type": "pix2struct",
"pad_token_id": 0,
"text_config": {
"dropout_rate": 0.2,
"encoder_hidden_size": 768,
"initializer_range": 0.02,
"model_type": "pix2struct_text_model",
"vocab_size": 77078
},
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.37.2",
"use_cache": false,
"vision_config": {
"attention_dropout": 0.2,
"dropout_rate": 0.2,
"hidden_dropout_prob": 0.2,
"initializer_range": 0.02,
"layer_norm_bias": false,
"model_type": "pix2struct_vision_model",
"num_channels": 3,
"patch_size": 16,
"projection_dim": 768
}
}

View File

@ -0,0 +1,8 @@
{
"_from_model_config": true,
"decoder_start_token_id": 0,
"eos_token_id": 1,
"pad_token_id": 0,
"transformers_version": "4.37.2",
"use_cache": false
}

BIN
models/TabRec/StructEqTable/model.safetensors (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,12 @@
{
"do_convert_rgb": true,
"do_normalize": true,
"image_processor_type": "Pix2StructImageProcessor",
"is_vqa": false,
"max_patches": 4096,
"patch_size": {
"height": 16,
"width": 16
},
"processor_class": "Pix2StructProcessor"
}

View File

@ -0,0 +1,125 @@
{
"additional_special_tokens": [
"<extra_id_0>",
"<extra_id_1>",
"<extra_id_2>",
"<extra_id_3>",
"<extra_id_4>",
"<extra_id_5>",
"<extra_id_6>",
"<extra_id_7>",
"<extra_id_8>",
"<extra_id_9>",
"<extra_id_10>",
"<extra_id_11>",
"<extra_id_12>",
"<extra_id_13>",
"<extra_id_14>",
"<extra_id_15>",
"<extra_id_16>",
"<extra_id_17>",
"<extra_id_18>",
"<extra_id_19>",
"<extra_id_20>",
"<extra_id_21>",
"<extra_id_22>",
"<extra_id_23>",
"<extra_id_24>",
"<extra_id_25>",
"<extra_id_26>",
"<extra_id_27>",
"<extra_id_28>",
"<extra_id_29>",
"<extra_id_30>",
"<extra_id_31>",
"<extra_id_32>",
"<extra_id_33>",
"<extra_id_34>",
"<extra_id_35>",
"<extra_id_36>",
"<extra_id_37>",
"<extra_id_38>",
"<extra_id_39>",
"<extra_id_40>",
"<extra_id_41>",
"<extra_id_42>",
"<extra_id_43>",
"<extra_id_44>",
"<extra_id_45>",
"<extra_id_46>",
"<extra_id_47>",
"<extra_id_48>",
"<extra_id_49>",
"<extra_id_50>",
"<extra_id_51>",
"<extra_id_52>",
"<extra_id_53>",
"<extra_id_54>",
"<extra_id_55>",
"<extra_id_56>",
"<extra_id_57>",
"<extra_id_58>",
"<extra_id_59>",
"<extra_id_60>",
"<extra_id_61>",
"<extra_id_62>",
"<extra_id_63>",
"<extra_id_64>",
"<extra_id_65>",
"<extra_id_66>",
"<extra_id_67>",
"<extra_id_68>",
"<extra_id_69>",
"<extra_id_70>",
"<extra_id_71>",
"<extra_id_72>",
"<extra_id_73>",
"<extra_id_74>",
"<extra_id_75>",
"<extra_id_76>",
"<extra_id_77>",
"<extra_id_78>",
"<extra_id_79>",
"<extra_id_80>",
"<extra_id_81>",
"<extra_id_82>",
"<extra_id_83>",
"<extra_id_84>",
"<extra_id_85>",
"<extra_id_86>",
"<extra_id_87>",
"<extra_id_88>",
"<extra_id_89>",
"<extra_id_90>",
"<extra_id_91>",
"<extra_id_92>",
"<extra_id_93>",
"<extra_id_94>",
"<extra_id_95>",
"<extra_id_96>",
"<extra_id_97>",
"<extra_id_98>",
"<extra_id_99>"
],
"eos_token": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

BIN
models/TabRec/StructEqTable/spiece.model (Stored with Git LFS) Normal file

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,940 @@
{
"added_tokens_decoder": {
"0": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76978": {
"content": "<extra_id_99>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76979": {
"content": "<extra_id_98>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76980": {
"content": "<extra_id_97>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76981": {
"content": "<extra_id_96>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76982": {
"content": "<extra_id_95>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76983": {
"content": "<extra_id_94>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76984": {
"content": "<extra_id_93>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76985": {
"content": "<extra_id_92>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76986": {
"content": "<extra_id_91>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76987": {
"content": "<extra_id_90>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76988": {
"content": "<extra_id_89>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76989": {
"content": "<extra_id_88>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76990": {
"content": "<extra_id_87>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76991": {
"content": "<extra_id_86>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76992": {
"content": "<extra_id_85>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76993": {
"content": "<extra_id_84>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76994": {
"content": "<extra_id_83>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76995": {
"content": "<extra_id_82>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76996": {
"content": "<extra_id_81>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76997": {
"content": "<extra_id_80>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76998": {
"content": "<extra_id_79>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"76999": {
"content": "<extra_id_78>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77000": {
"content": "<extra_id_77>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77001": {
"content": "<extra_id_76>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77002": {
"content": "<extra_id_75>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77003": {
"content": "<extra_id_74>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77004": {
"content": "<extra_id_73>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77005": {
"content": "<extra_id_72>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77006": {
"content": "<extra_id_71>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77007": {
"content": "<extra_id_70>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77008": {
"content": "<extra_id_69>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77009": {
"content": "<extra_id_68>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77010": {
"content": "<extra_id_67>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77011": {
"content": "<extra_id_66>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77012": {
"content": "<extra_id_65>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77013": {
"content": "<extra_id_64>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77014": {
"content": "<extra_id_63>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77015": {
"content": "<extra_id_62>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77016": {
"content": "<extra_id_61>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77017": {
"content": "<extra_id_60>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77018": {
"content": "<extra_id_59>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77019": {
"content": "<extra_id_58>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77020": {
"content": "<extra_id_57>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77021": {
"content": "<extra_id_56>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77022": {
"content": "<extra_id_55>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77023": {
"content": "<extra_id_54>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77024": {
"content": "<extra_id_53>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77025": {
"content": "<extra_id_52>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77026": {
"content": "<extra_id_51>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77027": {
"content": "<extra_id_50>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77028": {
"content": "<extra_id_49>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77029": {
"content": "<extra_id_48>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77030": {
"content": "<extra_id_47>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77031": {
"content": "<extra_id_46>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77032": {
"content": "<extra_id_45>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77033": {
"content": "<extra_id_44>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77034": {
"content": "<extra_id_43>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77035": {
"content": "<extra_id_42>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77036": {
"content": "<extra_id_41>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77037": {
"content": "<extra_id_40>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77038": {
"content": "<extra_id_39>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77039": {
"content": "<extra_id_38>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77040": {
"content": "<extra_id_37>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77041": {
"content": "<extra_id_36>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77042": {
"content": "<extra_id_35>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77043": {
"content": "<extra_id_34>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77044": {
"content": "<extra_id_33>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77045": {
"content": "<extra_id_32>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77046": {
"content": "<extra_id_31>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77047": {
"content": "<extra_id_30>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77048": {
"content": "<extra_id_29>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77049": {
"content": "<extra_id_28>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77050": {
"content": "<extra_id_27>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77051": {
"content": "<extra_id_26>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77052": {
"content": "<extra_id_25>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77053": {
"content": "<extra_id_24>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77054": {
"content": "<extra_id_23>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77055": {
"content": "<extra_id_22>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77056": {
"content": "<extra_id_21>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77057": {
"content": "<extra_id_20>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77058": {
"content": "<extra_id_19>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77059": {
"content": "<extra_id_18>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77060": {
"content": "<extra_id_17>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77061": {
"content": "<extra_id_16>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77062": {
"content": "<extra_id_15>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77063": {
"content": "<extra_id_14>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77064": {
"content": "<extra_id_13>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77065": {
"content": "<extra_id_12>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77066": {
"content": "<extra_id_11>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77067": {
"content": "<extra_id_10>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77068": {
"content": "<extra_id_9>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77069": {
"content": "<extra_id_8>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77070": {
"content": "<extra_id_7>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77071": {
"content": "<extra_id_6>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77072": {
"content": "<extra_id_5>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77073": {
"content": "<extra_id_4>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77074": {
"content": "<extra_id_3>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77075": {
"content": "<extra_id_2>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77076": {
"content": "<extra_id_1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"77077": {
"content": "<extra_id_0>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<extra_id_0>",
"<extra_id_1>",
"<extra_id_2>",
"<extra_id_3>",
"<extra_id_4>",
"<extra_id_5>",
"<extra_id_6>",
"<extra_id_7>",
"<extra_id_8>",
"<extra_id_9>",
"<extra_id_10>",
"<extra_id_11>",
"<extra_id_12>",
"<extra_id_13>",
"<extra_id_14>",
"<extra_id_15>",
"<extra_id_16>",
"<extra_id_17>",
"<extra_id_18>",
"<extra_id_19>",
"<extra_id_20>",
"<extra_id_21>",
"<extra_id_22>",
"<extra_id_23>",
"<extra_id_24>",
"<extra_id_25>",
"<extra_id_26>",
"<extra_id_27>",
"<extra_id_28>",
"<extra_id_29>",
"<extra_id_30>",
"<extra_id_31>",
"<extra_id_32>",
"<extra_id_33>",
"<extra_id_34>",
"<extra_id_35>",
"<extra_id_36>",
"<extra_id_37>",
"<extra_id_38>",
"<extra_id_39>",
"<extra_id_40>",
"<extra_id_41>",
"<extra_id_42>",
"<extra_id_43>",
"<extra_id_44>",
"<extra_id_45>",
"<extra_id_46>",
"<extra_id_47>",
"<extra_id_48>",
"<extra_id_49>",
"<extra_id_50>",
"<extra_id_51>",
"<extra_id_52>",
"<extra_id_53>",
"<extra_id_54>",
"<extra_id_55>",
"<extra_id_56>",
"<extra_id_57>",
"<extra_id_58>",
"<extra_id_59>",
"<extra_id_60>",
"<extra_id_61>",
"<extra_id_62>",
"<extra_id_63>",
"<extra_id_64>",
"<extra_id_65>",
"<extra_id_66>",
"<extra_id_67>",
"<extra_id_68>",
"<extra_id_69>",
"<extra_id_70>",
"<extra_id_71>",
"<extra_id_72>",
"<extra_id_73>",
"<extra_id_74>",
"<extra_id_75>",
"<extra_id_76>",
"<extra_id_77>",
"<extra_id_78>",
"<extra_id_79>",
"<extra_id_80>",
"<extra_id_81>",
"<extra_id_82>",
"<extra_id_83>",
"<extra_id_84>",
"<extra_id_85>",
"<extra_id_86>",
"<extra_id_87>",
"<extra_id_88>",
"<extra_id_89>",
"<extra_id_90>",
"<extra_id_91>",
"<extra_id_92>",
"<extra_id_93>",
"<extra_id_94>",
"<extra_id_95>",
"<extra_id_96>",
"<extra_id_97>",
"<extra_id_98>",
"<extra_id_99>"
],
"clean_up_tokenization_spaces": true,
"eos_token": "</s>",
"extra_ids": 100,
"legacy": true,
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<pad>",
"processor_class": "Pix2StructProcessor",
"sp_model_kwargs": {},
"tokenizer_class": "T5Tokenizer",
"unk_token": "<unk>"
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,39 @@
<thead>
<tr>
<td></td>
</tr>
</thead>
<tbody>
<eb></eb>
</tbody>
<td
colspan="5"
>
</td>
colspan="2"
colspan="3"
<eb2></eb2>
<eb1></eb1>
rowspan="2"
colspan="4"
colspan="6"
rowspan="3"
colspan="9"
colspan="10"
colspan="7"
rowspan="4"
rowspan="5"
rowspan="9"
colspan="8"
rowspan="8"
rowspan="6"
rowspan="7"
rowspan="10"
<eb3></eb3>
<eb4></eb4>
<eb5></eb5>
<eb6></eb6>
<eb7></eb7>
<eb8></eb8>
<eb9></eb9>
<eb10></eb10>