From 4e85e1bf2e9b2b7c8564b68fd9fe7ba6629ade5a Mon Sep 17 00:00:00 2001 From: xxl <505279206@qq.com> Date: Wed, 22 Jan 2025 17:19:32 +0800 Subject: [PATCH] first commit --- README.md | 74 +++- config.json | 3 + model_artifacts/layout/config.json | 130 ++++++ model_artifacts/layout/model.safetensors | 3 + .../layout/preprocessor_config.json | 26 ++ .../accurate/tableformer_accurate.safetensors | 3 + .../tableformer/accurate/tm_config.json | 369 ++++++++++++++++++ .../fast/tableformer_fast.safetensors | 3 + .../tableformer/fast/tm_config.json | 369 ++++++++++++++++++ 9 files changed, 978 insertions(+), 2 deletions(-) create mode 100644 config.json create mode 100644 model_artifacts/layout/config.json create mode 100644 model_artifacts/layout/model.safetensors create mode 100644 model_artifacts/layout/preprocessor_config.json create mode 100644 model_artifacts/tableformer/accurate/tableformer_accurate.safetensors create mode 100644 model_artifacts/tableformer/accurate/tm_config.json create mode 100644 model_artifacts/tableformer/fast/tableformer_fast.safetensors create mode 100644 model_artifacts/tableformer/fast/tm_config.json diff --git a/README.md b/README.md index 4a37a3e..d60ddbe 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,73 @@ -# docling-models +--- +license: cdla-permissive-2.0 +--- -docling-models \ No newline at end of file +# Docling Models + +This page contains models that power the PDF document converion package [docling](https://github.com/DS4SD/docling). + +## Layout Model + +The layout model will take an image from a poge and apply RT-DETR model in order to find different layout components. It currently detects the labels: Caption, Footnote, Formula, List-item, Page-footer, Page-header, Picture, Section-header, Table, Text, Title. As a reference (from the DocLayNet-paper), this is the performance of standard object detection methods on the DocLayNet dataset compared to human evaluation, + +| | human | MRCNN | MRCNN | FRCNN | YOLO | +|----------------|---------|---------|---------|---------|--------| +| | human | R50 | R101 | R101 | v5x6 | +| Caption | 84-89 | 68.4 | 71.5 | 70.1 | 77.7 | +| Footnote | 83-91 | 70.9 | 71.8 | 73.7 | 77.2 | +| Formula | 83-85 | 60.1 | 63.4 | 63.5 | 66.2 | +| List-item | 87-88 | 81.2 | 80.8 | 81.0 | 86.2 | +| Page-footer | 93-94 | 61.6 | 59.3 | 58.9 | 61.1 | +| Page-header | 85-89 | 71.9 | 70.0 | 72.0 | 67.9 | +| Picture | 69-71 | 71.7 | 72.7 | 72.0 | 77.1 | +| Section-header | 83-84 | 67.6 | 69.3 | 68.4 | 74.6 | +| Table | 77-81 | 82.2 | 82.9 | 82.2 | 86.3 | +| Text | 84-86 | 84.6 | 85.8 | 85.4 | 88.1 | +| Title | 60-72 | 76.7 | 80.4 | 79.9 | 82.7 | +| All | 82-83 | 72.4 | 73.5 | 73.4 | 76.8 | + +## TableFormer + +The tableformer model will identify the structure of the table, starting from an image of a table. It uses the predicted table regions of the layout model to identify the tables. Tableformer has SOTA table structure identification, + +| Model (TEDS) | Simple table | Complex table | All tables | +| ------------ | ------------ | ------------- | ---------- | +| Tabula | 78.0 | 57.8 | 67.9 | +| Traprange | 60.8 | 49.9 | 55.4 | +| Camelot | 80.0 | 66.0 | 73.0 | +| Acrobat Pro | 68.9 | 61.8 | 65.3 | +| EDD | 91.2 | 85.4 | 88.3 | +| TableFormer | 95.4 | 90.1 | 93.6 | + +## References + +``` +@techreport{Docling, + author = {Deep Search Team}, + month = {8}, + title = {{Docling Technical Report}}, + url={https://arxiv.org/abs/2408.09869}, + eprint={2408.09869}, + doi = "10.48550/arXiv.2408.09869", + version = {1.0.0}, + year = {2024} +} + +@article{doclaynet2022, + title = {DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis}, + doi = {10.1145/3534678.353904}, + url = {https://arxiv.org/abs/2206.01062}, + author = {Pfitzmann, Birgit and Auer, Christoph and Dolfi, Michele and Nassar, Ahmed S and Staar, Peter W J}, + year = {2022} +} + +@InProceedings{TableFormer2022, + author = {Nassar, Ahmed and Livathinos, Nikolaos and Lysak, Maksym and Staar, Peter}, + title = {TableFormer: Table Structure Understanding With Transformers}, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2022}, + pages = {4614-4623}, + doi = {https://doi.org/10.1109/CVPR52688.2022.00457} +} +``` diff --git a/config.json b/config.json new file mode 100644 index 0000000..4dc84c9 --- /dev/null +++ b/config.json @@ -0,0 +1,3 @@ +{ + "_name_or_path": "docling-models" +} \ No newline at end of file diff --git a/model_artifacts/layout/config.json b/model_artifacts/layout/config.json new file mode 100644 index 0000000..d24c213 --- /dev/null +++ b/model_artifacts/layout/config.json @@ -0,0 +1,130 @@ +{ + "activation_dropout": 0.0, + "activation_function": "silu", + "anchor_image_size": null, + "architectures": [ + "RTDetrForObjectDetection" + ], + "attention_dropout": 0.0, + "auxiliary_loss": true, + "backbone": null, + "backbone_config": { + "model_type": "rt_detr_resnet", + "out_features": [ + "stage2", + "stage3", + "stage4" + ], + "out_indices": [ + 2, + 3, + 4 + ] + }, + "backbone_kwargs": null, + "batch_norm_eps": 1e-05, + "box_noise_scale": 1.0, + "d_model": 256, + "decoder_activation_function": "relu", + "decoder_attention_heads": 8, + "decoder_ffn_dim": 1024, + "decoder_in_channels": [ + 256, + 256, + 256 + ], + "decoder_layers": 6, + "decoder_n_points": 4, + "disable_custom_kernels": true, + "dropout": 0.0, + "encode_proj_layers": [ + 2 + ], + "encoder_activation_function": "gelu", + "encoder_attention_heads": 8, + "encoder_ffn_dim": 1024, + "encoder_hidden_dim": 256, + "encoder_in_channels": [ + 512, + 1024, + 2048 + ], + "encoder_layers": 1, + "eos_coefficient": 0.0001, + "eval_size": null, + "feat_strides": [ + 8, + 16, + 32 + ], + "focal_loss_alpha": 0.75, + "focal_loss_gamma": 2.0, + "freeze_backbone_batch_norms": true, + "hidden_expansion": 1.0, + "id2label": { + "0": "background", + "1": "Caption", + "10": "Text", + "11": "Title", + "12": "Document Index", + "13": "Code", + "14": "Checkbox-Selected", + "15": "Checkbox-Unselected", + "16": "Form", + "17": "Key-Value Region", + "2": "Footnote", + "3": "Formula", + "4": "List-item", + "5": "Page-footer", + "6": "Page-header", + "7": "Picture", + "8": "Section-header", + "9": "Table" + }, + "initializer_bias_prior_prob": null, + "initializer_range": 0.01, + "is_encoder_decoder": true, + "label2id": { + "Caption": "1", + "Checkbox-Selected": "14", + "Checkbox-Unselected": "15", + "Code": "13", + "Document Index": "12", + "Footnote": "2", + "Form": "16", + "Formula": "3", + "Key-Value Region": "17", + "List-item": "4", + "Page-footer": "5", + "Page-header": "6", + "Picture": "7", + "Section-header": "8", + "Table": "9", + "Text": "10", + "Title": "11", + "background": "0" + }, + "label_noise_ratio": 0.5, + "layer_norm_eps": 1e-05, + "learn_initial_query": false, + "matcher_alpha": 0.25, + "matcher_bbox_cost": 5.0, + "matcher_class_cost": 2.0, + "matcher_gamma": 2.0, + "matcher_giou_cost": 2.0, + "model_type": "rt_detr", + "normalize_before": false, + "num_denoising": 100, + "num_feature_levels": 3, + "num_queries": 300, + "positional_encoding_temperature": 10000, + "torch_dtype": "float32", + "transformers_version": "4.46.2", + "use_focal_loss": true, + "use_pretrained_backbone": false, + "use_timm_backbone": false, + "weight_loss_bbox": 5.0, + "weight_loss_giou": 2.0, + "weight_loss_vfl": 1.0, + "with_box_refine": true +} diff --git a/model_artifacts/layout/model.safetensors b/model_artifacts/layout/model.safetensors new file mode 100644 index 0000000..d4f528b --- /dev/null +++ b/model_artifacts/layout/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31e60b4709571b613bc8736a9c982fb550d8d7a1809160a68a8282af60c8910b +size 171666216 diff --git a/model_artifacts/layout/preprocessor_config.json b/model_artifacts/layout/preprocessor_config.json new file mode 100644 index 0000000..fcdff16 --- /dev/null +++ b/model_artifacts/layout/preprocessor_config.json @@ -0,0 +1,26 @@ +{ + "do_convert_annotations": true, + "do_normalize": false, + "do_pad": false, + "do_rescale": true, + "do_resize": true, + "format": "coco_detection", + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "RTDetrImageProcessor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "pad_size": null, + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 640, + "width": 640 + } +} diff --git a/model_artifacts/tableformer/accurate/tableformer_accurate.safetensors b/model_artifacts/tableformer/accurate/tableformer_accurate.safetensors new file mode 100644 index 0000000..6a035c6 --- /dev/null +++ b/model_artifacts/tableformer/accurate/tableformer_accurate.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d8912f290375d3466f91be2048030a16317e84c8f1f69d3dbd7adc6d6cd2a9 +size 212758388 diff --git a/model_artifacts/tableformer/accurate/tm_config.json b/model_artifacts/tableformer/accurate/tm_config.json new file mode 100644 index 0000000..5036aa1 --- /dev/null +++ b/model_artifacts/tableformer/accurate/tm_config.json @@ -0,0 +1,369 @@ +{ + "dataset": { + "type": "PTN_prepared", + "name": "PubTabNet_300_100_512", + "raw_data_dir": "./tests/test_data/ccs_api/model/", + "load_cells": true, + "bbox_format": "5plet", + "resized_image": 448, + "keep_AR": false, + "up_scaling_enabled": true, + "down_scaling_enabled": true, + "padding_mode": "null", + "padding_color": [ + 0, + 0, + 0 + ], + "image_normalization": { + "state": true, + "mean": [ + 0.94247851, + 0.94254675, + 0.94292611 + ], + "std": [ + 0.17910956, + 0.17940403, + 0.17931663 + ] + }, + "color_jitter": true, + "rand_crop": true, + "rand_pad": true, + "image_grayscale": false + }, + "model": { + "type": "TableModel04_rs", + "name": "14_128_256_4_true", + "backbone": "resnet18", + "enc_image_size": 28, + "tag_embed_dim": 16, + "hidden_dim": 512, + "tag_decoder_dim": 512, + "bbox_embed_dim": 256, + "tag_attention_dim": 256, + "bbox_attention_dim": 512, + "enc_layers": 6, + "dec_layers": 6, + "nheads": 8, + "dropout": 0.1, + "bbox_classes": 2 + }, + "train": { + "bbox": true + }, + "predict": { + "max_steps": 1024, + "beam_size": 5, + "bbox": true, + "pdf_cell_iou_thres": 0.05, + "padding": false, + "padding_size": 50, + "disable_post_process": false, + "profiling": false + }, + "debug": { + "save_debug_images": false + }, + "dataset_wordmap": { + "word_map_tag": { + "": 0, + "": 1, + "": 2, + "": 3, + "ecel": 4, + "fcel": 5, + "lcel": 6, + "ucel": 7, + "xcel": 8, + "nl": 9, + "ched": 10, + "rhed": 11, + "srow": 12 + }, + "word_map_cell": { + " ": 13, + "!": 179, + "\"": 126, + "#": 101, + "$": 119, + "%": 18, + "&": 114, + "'": 108, + "(": 29, + ")": 32, + "*": 26, + "+": 97, + ",": 71, + "-": 63, + ".": 34, + "/": 66, + "0": 33, + "1": 36, + "2": 43, + "3": 41, + "4": 45, + "5": 17, + "6": 37, + "7": 35, + "8": 40, + "9": 16, + ":": 88, + ";": 92, + "<": 73, + "": 9, + "": 23, + "": 219, + "": 233, + "": 94, + "": 77, + "": 151, + "": 1, + "": 280, + "": 21, + "": 218, + "": 0, + "": 279, + "": 232, + "": 93, + "": 75, + "": 150, + "": 278, + "=": 99, + ">": 39, + "?": 96, + "@": 125, + "A": 27, + "B": 86, + "C": 19, + "D": 57, + "E": 64, + "F": 47, + "G": 44, + "H": 10, + "I": 20, + "J": 80, + "K": 81, + "L": 52, + "M": 46, + "N": 69, + "O": 65, + "P": 62, + "Q": 59, + "R": 60, + "S": 58, + "T": 48, + "U": 55, + "V": 2, + "W": 83, + "X": 104, + "Y": 89, + "Z": 113, + "[": 70, + "\\": 165, + "]": 72, + "^": 132, + "_": 84, + "`": 196, + "a": 3, + "b": 6, + "c": 54, + "d": 12, + "e": 8, + "f": 50, + "g": 28, + "h": 56, + "i": 5, + "j": 82, + "k": 95, + "l": 7, + "m": 30, + "n": 31, + "o": 15, + "p": 22, + "q": 67, + "r": 4, + "s": 51, + "t": 14, + "u": 25, + "v": 24, + "w": 53, + "x": 61, + "y": 49, + "z": 11, + "{": 158, + "|": 139, + "}": 159, + "~": 147, + "\u00a2": 203, + "\u00a3": 162, + "\u00a4": 220, + "\u00a5": 176, + "\u00a7": 142, + "\u00a9": 268, + "\u00ab": 239, + "\u00ad": 275, + "\u00ae": 130, + "\u00b0": 100, + "\u00b1": 79, + "\u00b6": 171, + "\u00b7": 137, + "\u00bb": 240, + "\u00d7": 118, + "\u00d8": 192, + "\u00df": 197, + "\u00e6": 261, + "\u00f7": 225, + "\u00f8": 163, + "\u0131": 242, + "\u0142": 267, + "\u01c2": 211, + "\u025b": 223, + "\u02b9": 248, + "\u02c2": 195, + "\u02c3": 208, + "\u02c6": 253, + "\u0300": 209, + "\u0301": 131, + "\u0302": 138, + "\u0303": 156, + "\u0304": 152, + "\u0306": 222, + "\u0307": 247, + "\u0308": 103, + "\u030a": 102, + "\u030c": 254, + "\u0327": 155, + "\u0328": 269, + "\u0338": 170, + "\u0391": 173, + "\u0392": 169, + "\u0393": 180, + "\u0394": 85, + "\u0398": 243, + "\u0399": 271, + "\u039b": 272, + "\u03a0": 213, + "\u03a3": 185, + "\u03a6": 148, + "\u03a7": 212, + "\u03a8": 141, + "\u03a9": 161, + "\u03b1": 90, + "\u03b2": 107, + "\u03b3": 110, + "\u03b4": 153, + "\u03b5": 166, + "\u03b6": 178, + "\u03b7": 146, + "\u03b8": 186, + "\u03b9": 229, + "\u03ba": 164, + "\u03bb": 91, + "\u03bc": 78, + "\u03bd": 230, + "\u03be": 244, + "\u03c0": 127, + "\u03c1": 149, + "\u03c3": 116, + "\u03c4": 198, + "\u03c5": 189, + "\u03c6": 140, + "\u03c7": 124, + "\u03c8": 216, + "\u03c9": 167, + "\u0410": 273, + "\u0421": 194, + "\u115f": 217, + "\u200b": 265, + "\u2010": 117, + "\u2012": 135, + "\u2013": 42, + "\u2014": 106, + "\u2015": 228, + "\u2016": 259, + "\u2018": 123, + "\u2019": 121, + "\u201c": 87, + "\u201d": 115, + "\u201e": 245, + "\u2020": 109, + "\u2021": 129, + "\u2022": 128, + "\u2028": 190, + "\u2030": 154, + "\u2032": 68, + "\u203b": 224, + "\u2044": 188, + "\u204e": 199, + "\u2061": 200, + "\u20ac": 184, + "\u2190": 202, + "\u2191": 112, + "\u2192": 120, + "\u2193": 111, + "\u2194": 183, + "\u21d1": 266, + "\u21d2": 264, + "\u21d3": 255, + "\u2205": 215, + "\u2206": 175, + "\u2208": 262, + "\u2211": 160, + "\u2212": 76, + "\u2216": 206, + "\u2217": 105, + "\u2218": 246, + "\u2219": 236, + "\u221a": 187, + "\u221e": 207, + "\u2223": 260, + "\u2225": 193, + "\u2227": 182, + "\u2229": 256, + "\u222b": 258, + "\u223c": 98, + "\u2248": 210, + "\u2264": 38, + "\u2265": 74, + "\u2266": 214, + "\u2267": 181, + "\u2295": 263, + "\u22c5": 174, + "\u22c6": 191, + "\u22ee": 277, + "\u22ef": 270, + "\u2500": 205, + "\u2551": 231, + "\u25a0": 250, + "\u25a1": 177, + "\u25aa": 145, + "\u25b2": 136, + "\u25b3": 143, + "\u25bc": 251, + "\u25c6": 226, + "\u25ca": 235, + "\u25cb": 227, + "\u25cf": 172, + "\u25e6": 274, + "\u2605": 204, + "\u2606": 144, + "\u2640": 133, + "\u2642": 134, + "\u2663": 252, + "\u2666": 157, + "\u266f": 221, + "\u2713": 122, + "\u2714": 249, + "\u2717": 201, + "\u2794": 168, + "\u27a2": 276, + "\u2a7d": 234, + "\u2a7e": 241, + "\u3008": 237, + "\u3009": 238, + "\ufeff": 257 + } + } +} diff --git a/model_artifacts/tableformer/fast/tableformer_fast.safetensors b/model_artifacts/tableformer/fast/tableformer_fast.safetensors new file mode 100644 index 0000000..f6653fe --- /dev/null +++ b/model_artifacts/tableformer/fast/tableformer_fast.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3119563aab5a7c96fda4d621119b63fd8806272b86c30936d15507616422f718 +size 145453276 diff --git a/model_artifacts/tableformer/fast/tm_config.json b/model_artifacts/tableformer/fast/tm_config.json new file mode 100644 index 0000000..d02c588 --- /dev/null +++ b/model_artifacts/tableformer/fast/tm_config.json @@ -0,0 +1,369 @@ +{ + "dataset": { + "type": "PTN_prepared", + "name": "PubTabNet_300_100_512", + "raw_data_dir": "./tests/test_data/ccs_api/model/", + "load_cells": true, + "bbox_format": "5plet", + "resized_image": 448, + "keep_AR": false, + "up_scaling_enabled": true, + "down_scaling_enabled": true, + "padding_mode": "null", + "padding_color": [ + 0, + 0, + 0 + ], + "image_normalization": { + "state": true, + "mean": [ + 0.94247851, + 0.94254675, + 0.94292611 + ], + "std": [ + 0.17910956, + 0.17940403, + 0.17931663 + ] + }, + "color_jitter": true, + "rand_crop": true, + "rand_pad": true, + "image_grayscale": false + }, + "model": { + "type": "TableModel04_rs", + "name": "14_128_256_4_true", + "backbone": "resnet18", + "enc_image_size": 28, + "tag_embed_dim": 16, + "hidden_dim": 512, + "tag_decoder_dim": 512, + "bbox_embed_dim": 256, + "tag_attention_dim": 256, + "bbox_attention_dim": 512, + "enc_layers": 4, + "dec_layers": 2, + "nheads": 8, + "dropout": 0.1, + "bbox_classes": 2 + }, + "train": { + "bbox": true + }, + "predict": { + "max_steps": 1024, + "beam_size": 5, + "bbox": true, + "pdf_cell_iou_thres": 0.05, + "padding": false, + "padding_size": 50, + "disable_post_process": false, + "profiling": false + }, + "debug": { + "save_debug_images": false + }, + "dataset_wordmap": { + "word_map_tag": { + "": 0, + "": 1, + "": 2, + "": 3, + "ecel": 4, + "fcel": 5, + "lcel": 6, + "ucel": 7, + "xcel": 8, + "nl": 9, + "ched": 10, + "rhed": 11, + "srow": 12 + }, + "word_map_cell": { + " ": 13, + "!": 179, + "\"": 126, + "#": 101, + "$": 119, + "%": 18, + "&": 114, + "'": 108, + "(": 29, + ")": 32, + "*": 26, + "+": 97, + ",": 71, + "-": 63, + ".": 34, + "/": 66, + "0": 33, + "1": 36, + "2": 43, + "3": 41, + "4": 45, + "5": 17, + "6": 37, + "7": 35, + "8": 40, + "9": 16, + ":": 88, + ";": 92, + "<": 73, + "": 9, + "": 23, + "": 219, + "": 233, + "": 94, + "": 77, + "": 151, + "": 1, + "": 280, + "": 21, + "": 218, + "": 0, + "": 279, + "": 232, + "": 93, + "": 75, + "": 150, + "": 278, + "=": 99, + ">": 39, + "?": 96, + "@": 125, + "A": 27, + "B": 86, + "C": 19, + "D": 57, + "E": 64, + "F": 47, + "G": 44, + "H": 10, + "I": 20, + "J": 80, + "K": 81, + "L": 52, + "M": 46, + "N": 69, + "O": 65, + "P": 62, + "Q": 59, + "R": 60, + "S": 58, + "T": 48, + "U": 55, + "V": 2, + "W": 83, + "X": 104, + "Y": 89, + "Z": 113, + "[": 70, + "\\": 165, + "]": 72, + "^": 132, + "_": 84, + "`": 196, + "a": 3, + "b": 6, + "c": 54, + "d": 12, + "e": 8, + "f": 50, + "g": 28, + "h": 56, + "i": 5, + "j": 82, + "k": 95, + "l": 7, + "m": 30, + "n": 31, + "o": 15, + "p": 22, + "q": 67, + "r": 4, + "s": 51, + "t": 14, + "u": 25, + "v": 24, + "w": 53, + "x": 61, + "y": 49, + "z": 11, + "{": 158, + "|": 139, + "}": 159, + "~": 147, + "\u00a2": 203, + "\u00a3": 162, + "\u00a4": 220, + "\u00a5": 176, + "\u00a7": 142, + "\u00a9": 268, + "\u00ab": 239, + "\u00ad": 275, + "\u00ae": 130, + "\u00b0": 100, + "\u00b1": 79, + "\u00b6": 171, + "\u00b7": 137, + "\u00bb": 240, + "\u00d7": 118, + "\u00d8": 192, + "\u00df": 197, + "\u00e6": 261, + "\u00f7": 225, + "\u00f8": 163, + "\u0131": 242, + "\u0142": 267, + "\u01c2": 211, + "\u025b": 223, + "\u02b9": 248, + "\u02c2": 195, + "\u02c3": 208, + "\u02c6": 253, + "\u0300": 209, + "\u0301": 131, + "\u0302": 138, + "\u0303": 156, + "\u0304": 152, + "\u0306": 222, + "\u0307": 247, + "\u0308": 103, + "\u030a": 102, + "\u030c": 254, + "\u0327": 155, + "\u0328": 269, + "\u0338": 170, + "\u0391": 173, + "\u0392": 169, + "\u0393": 180, + "\u0394": 85, + "\u0398": 243, + "\u0399": 271, + "\u039b": 272, + "\u03a0": 213, + "\u03a3": 185, + "\u03a6": 148, + "\u03a7": 212, + "\u03a8": 141, + "\u03a9": 161, + "\u03b1": 90, + "\u03b2": 107, + "\u03b3": 110, + "\u03b4": 153, + "\u03b5": 166, + "\u03b6": 178, + "\u03b7": 146, + "\u03b8": 186, + "\u03b9": 229, + "\u03ba": 164, + "\u03bb": 91, + "\u03bc": 78, + "\u03bd": 230, + "\u03be": 244, + "\u03c0": 127, + "\u03c1": 149, + "\u03c3": 116, + "\u03c4": 198, + "\u03c5": 189, + "\u03c6": 140, + "\u03c7": 124, + "\u03c8": 216, + "\u03c9": 167, + "\u0410": 273, + "\u0421": 194, + "\u115f": 217, + "\u200b": 265, + "\u2010": 117, + "\u2012": 135, + "\u2013": 42, + "\u2014": 106, + "\u2015": 228, + "\u2016": 259, + "\u2018": 123, + "\u2019": 121, + "\u201c": 87, + "\u201d": 115, + "\u201e": 245, + "\u2020": 109, + "\u2021": 129, + "\u2022": 128, + "\u2028": 190, + "\u2030": 154, + "\u2032": 68, + "\u203b": 224, + "\u2044": 188, + "\u204e": 199, + "\u2061": 200, + "\u20ac": 184, + "\u2190": 202, + "\u2191": 112, + "\u2192": 120, + "\u2193": 111, + "\u2194": 183, + "\u21d1": 266, + "\u21d2": 264, + "\u21d3": 255, + "\u2205": 215, + "\u2206": 175, + "\u2208": 262, + "\u2211": 160, + "\u2212": 76, + "\u2216": 206, + "\u2217": 105, + "\u2218": 246, + "\u2219": 236, + "\u221a": 187, + "\u221e": 207, + "\u2223": 260, + "\u2225": 193, + "\u2227": 182, + "\u2229": 256, + "\u222b": 258, + "\u223c": 98, + "\u2248": 210, + "\u2264": 38, + "\u2265": 74, + "\u2266": 214, + "\u2267": 181, + "\u2295": 263, + "\u22c5": 174, + "\u22c6": 191, + "\u22ee": 277, + "\u22ef": 270, + "\u2500": 205, + "\u2551": 231, + "\u25a0": 250, + "\u25a1": 177, + "\u25aa": 145, + "\u25b2": 136, + "\u25b3": 143, + "\u25bc": 251, + "\u25c6": 226, + "\u25ca": 235, + "\u25cb": 227, + "\u25cf": 172, + "\u25e6": 274, + "\u2605": 204, + "\u2606": 144, + "\u2640": 133, + "\u2642": 134, + "\u2663": 252, + "\u2666": 157, + "\u266f": 221, + "\u2713": 122, + "\u2714": 249, + "\u2717": 201, + "\u2794": 168, + "\u27a2": 276, + "\u2a7d": 234, + "\u2a7e": 241, + "\u3008": 237, + "\u3009": 238, + "\ufeff": 257 + } + } +}