first commit
This commit is contained in:
parent
a7efa2bf4b
commit
97dd36a344
63
README.md
63
README.md
|
@ -1,3 +1,62 @@
|
||||||
# bart-base_a13579706051194880100888
|
---
|
||||||
|
license: apache-2.0
|
||||||
|
language: en
|
||||||
|
---
|
||||||
|
|
||||||
bart-base
|
# BART (base-sized model)
|
||||||
|
|
||||||
|
BART model pre-trained on English language. It was introduced in the paper [BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension](https://arxiv.org/abs/1910.13461) by Lewis et al. and first released in [this repository](https://github.com/pytorch/fairseq/tree/master/examples/bart).
|
||||||
|
|
||||||
|
Disclaimer: The team releasing BART did not write a model card for this model so this model card has been written by the Hugging Face team.
|
||||||
|
|
||||||
|
## Model description
|
||||||
|
|
||||||
|
BART is a transformer encoder-decoder (seq2seq) model with a bidirectional (BERT-like) encoder and an autoregressive (GPT-like) decoder. BART is pre-trained by (1) corrupting text with an arbitrary noising function, and (2) learning a model to reconstruct the original text.
|
||||||
|
|
||||||
|
BART is particularly effective when fine-tuned for text generation (e.g. summarization, translation) but also works well for comprehension tasks (e.g. text classification, question answering).
|
||||||
|
|
||||||
|
## Intended uses & limitations
|
||||||
|
|
||||||
|
You can use the raw model for text infilling. However, the model is mostly meant to be fine-tuned on a supervised dataset. See the [model hub](https://huggingface.co/models?search=bart) to look for fine-tuned versions on a task that interests you.
|
||||||
|
|
||||||
|
### How to use
|
||||||
|
|
||||||
|
Here is how to use this model in PyTorch:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from transformers import BartTokenizer, BartModel
|
||||||
|
|
||||||
|
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
|
||||||
|
model = BartModel.from_pretrained('facebook/bart-base')
|
||||||
|
|
||||||
|
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
|
outputs = model(**inputs)
|
||||||
|
|
||||||
|
last_hidden_states = outputs.last_hidden_state
|
||||||
|
```
|
||||||
|
|
||||||
|
### BibTeX entry and citation info
|
||||||
|
|
||||||
|
```bibtex
|
||||||
|
@article{DBLP:journals/corr/abs-1910-13461,
|
||||||
|
author = {Mike Lewis and
|
||||||
|
Yinhan Liu and
|
||||||
|
Naman Goyal and
|
||||||
|
Marjan Ghazvininejad and
|
||||||
|
Abdelrahman Mohamed and
|
||||||
|
Omer Levy and
|
||||||
|
Veselin Stoyanov and
|
||||||
|
Luke Zettlemoyer},
|
||||||
|
title = {{BART:} Denoising Sequence-to-Sequence Pre-training for Natural Language
|
||||||
|
Generation, Translation, and Comprehension},
|
||||||
|
journal = {CoRR},
|
||||||
|
volume = {abs/1910.13461},
|
||||||
|
year = {2019},
|
||||||
|
url = {http://arxiv.org/abs/1910.13461},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprint = {1910.13461},
|
||||||
|
timestamp = {Thu, 31 Oct 2019 14:02:26 +0100},
|
||||||
|
biburl = {https://dblp.org/rec/journals/corr/abs-1910-13461.bib},
|
||||||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
||||||
|
}
|
||||||
|
```
|
|
@ -0,0 +1,75 @@
|
||||||
|
{
|
||||||
|
"_name_or_path": "bart-base",
|
||||||
|
"activation_dropout": 0.1,
|
||||||
|
"activation_function": "gelu",
|
||||||
|
"add_bias_logits": false,
|
||||||
|
"add_final_layer_norm": false,
|
||||||
|
"architectures": [
|
||||||
|
"BartModel"
|
||||||
|
],
|
||||||
|
"attention_dropout": 0.1,
|
||||||
|
"bos_token_id": 0,
|
||||||
|
"classif_dropout": 0.1,
|
||||||
|
"classifier_dropout": 0.0,
|
||||||
|
"d_model": 768,
|
||||||
|
"decoder_attention_heads": 12,
|
||||||
|
"decoder_ffn_dim": 3072,
|
||||||
|
"decoder_layerdrop": 0.0,
|
||||||
|
"decoder_layers": 6,
|
||||||
|
"decoder_start_token_id": 2,
|
||||||
|
"dropout": 0.1,
|
||||||
|
"early_stopping": true,
|
||||||
|
"encoder_attention_heads": 12,
|
||||||
|
"encoder_ffn_dim": 3072,
|
||||||
|
"encoder_layerdrop": 0.0,
|
||||||
|
"encoder_layers": 6,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"forced_eos_token_id": 2,
|
||||||
|
"forced_bos_token_id": 0,
|
||||||
|
"gradient_checkpointing": false,
|
||||||
|
"id2label": {
|
||||||
|
"0": "LABEL_0",
|
||||||
|
"1": "LABEL_1",
|
||||||
|
"2": "LABEL_2"
|
||||||
|
},
|
||||||
|
"init_std": 0.02,
|
||||||
|
"is_encoder_decoder": true,
|
||||||
|
"label2id": {
|
||||||
|
"LABEL_0": 0,
|
||||||
|
"LABEL_1": 1,
|
||||||
|
"LABEL_2": 2
|
||||||
|
},
|
||||||
|
"max_position_embeddings": 1024,
|
||||||
|
"model_type": "bart",
|
||||||
|
"no_repeat_ngram_size": 3,
|
||||||
|
"normalize_before": false,
|
||||||
|
"normalize_embedding": true,
|
||||||
|
"num_beams": 4,
|
||||||
|
"num_hidden_layers": 6,
|
||||||
|
"pad_token_id": 1,
|
||||||
|
"scale_embedding": false,
|
||||||
|
"task_specific_params": {
|
||||||
|
"summarization": {
|
||||||
|
"length_penalty": 1.0,
|
||||||
|
"max_length": 128,
|
||||||
|
"min_length": 12,
|
||||||
|
"num_beams": 4
|
||||||
|
},
|
||||||
|
"summarization_cnn": {
|
||||||
|
"length_penalty": 2.0,
|
||||||
|
"max_length": 142,
|
||||||
|
"min_length": 56,
|
||||||
|
"num_beams": 4
|
||||||
|
},
|
||||||
|
"summarization_xsum": {
|
||||||
|
"length_penalty": 1.0,
|
||||||
|
"max_length": 62,
|
||||||
|
"min_length": 11,
|
||||||
|
"num_beams": 6
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"torch_dtype": "float32",
|
||||||
|
"transformers_version": "4.12.0.dev0",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 50265
|
||||||
|
}
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue