Merge pull request #3 from erenup/run_multiple_choice_merge

Run multiple choice merge
huggingface · Sep 18, 2019 · b57bfb5 · b57bfb5
2 parents 5a81e79 + 46ffc28
commit b57bfb5
Show file tree

Hide file tree

Showing 20 changed files with 40 additions and 1,433 deletions.
diff --git a/README.md b/README.md
@@ -288,7 +288,7 @@ This is the model provided as `bert-large-uncased-whole-word-masking-finetuned-s
 ### `run_generation.py`: Text generation with GPT, GPT-2, Transformer-XL and XLNet
 
 A conditional generation script is also included to generate text from a prompt.
-The generation script includes the [tricks](https://github.com/rusiaaman/XLNet-gen#methodology) proposed by by Aman Rusia to get high quality generation with memory models like Transformer-XL and XLNet (include a predefined text to make short inputs longer).
+The generation script includes the [tricks](https://github.com/rusiaaman/XLNet-gen#methodology) proposed by Aman Rusia to get high quality generation with memory models like Transformer-XL and XLNet (include a predefined text to make short inputs longer).
 
 Here is how to run the script with the small version of OpenAI GPT-2 model:
 

diff --git a/docs/source/pretrained_models.rst b/docs/source/pretrained_models.rst
@@ -79,10 +79,10 @@ Here is the full list of the currently provided pretrained models together with
 |                   |                                                            | | XLM English model                                                                                                                   |
 |                   +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
 |                   | ``xlm-mlm-ende-1024``                                      | | 6-layer, 1024-hidden, 8-heads                                                                                                       |
-|                   |                                                            | | XLM English-German Multi-language model                                                                                             |
+|                   |                                                            | | XLM English-German model trained on the concatenation of English and German wikipedia                                               |
 |                   +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
 |                   | ``xlm-mlm-enfr-1024``                                      | | 6-layer, 1024-hidden, 8-heads                                                                                                       |
-|                   |                                                            | | XLM English-French Multi-language model                                                                                             |
+|                   |                                                            | | XLM English-French model trained on the concatenation of English and French wikipedia                                               |
 |                   +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
 |                   | ``xlm-mlm-enro-1024``                                      | | 6-layer, 1024-hidden, 8-heads                                                                                                       |
 |                   |                                                            | | XLM English-Romanian Multi-language model                                                                                           |
@@ -93,11 +93,11 @@ Here is the full list of the currently provided pretrained models together with
 |                   | ``xlm-mlm-tlm-xnli15-1024``                                | | 12-layer, 1024-hidden, 8-heads                                                                                                      |
 |                   |                                                            | | XLM Model pre-trained with MLM + TLM on the `15 XNLI languages <https://github.com/facebookresearch/XNLI>`__.                       |
 |                   +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
-|                   | ``xlm-clm-enfr-1024``                                      | | 12-layer, 1024-hidden, 8-heads                                                                                                      |
-|                   |                                                            | | XLM English model trained with CLM (Causal Language Modeling)                                                                       |
+|                   | ``xlm-clm-enfr-1024``                                      | | 6-layer, 1024-hidden, 8-heads                                                                                                       |
+|                   |                                                            | | XLM English-French model trained with CLM (Causal Language Modeling) on the concatenation of English and French wikipedia           |
 |                   +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
 |                   | ``xlm-clm-ende-1024``                                      | | 6-layer, 1024-hidden, 8-heads                                                                                                       |
-|                   |                                                            | | XLM English-German Multi-language model trained with CLM (Causal Language Modeling)                                                 |
+|                   |                                                            | | XLM English-German model trained with CLM (Causal Language Modeling) on the concatenation of English and German wikipedia           |
 +-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
 | RoBERTa           | ``roberta-base``                                           | | 12-layer, 768-hidden, 12-heads, 125M parameters                                                                                     |
 |                   |                                                            | | RoBERTa using the BERT-base architecture                                                                                            |

diff --git a/examples/contrib/README.md b/examples/contrib/README.md
@@ -0,0 +1,5 @@
+# Community contributed examples
+
+This folder contains examples which are not actively maintained (mostly contributed by the community).
+
+Using these examples together with a recent version of the library usually requires to make small (sometimes big) adaptations to get the scripts working.
diff --git a/...es/single_model_scripts/run_openai_gpt.py → examples/contrib/run_openai_gpt.py b/...es/single_model_scripts/run_openai_gpt.py → examples/contrib/run_openai_gpt.py
@@ -153,9 +153,11 @@ def main():
     # This loading functions also add new tokens and embeddings called `special tokens`
     # These new embeddings will be fine-tuned on the RocStories dataset
     special_tokens = ['_start_', '_delimiter_', '_classify_']
-    tokenizer = OpenAIGPTTokenizer.from_pretrained(args.model_name, special_tokens=special_tokens)
-    special_tokens_ids = list(tokenizer.convert_tokens_to_ids(token) for token in special_tokens)
-    model = OpenAIGPTDoubleHeadsModel.from_pretrained(args.model_name, num_special_tokens=len(special_tokens))
+    tokenizer = OpenAIGPTTokenizer.from_pretrained(args.model_name)
+    tokenizer.add_tokens(special_tokens)
+    special_tokens_ids = tokenizer.convert_tokens_to_ids(special_tokens)
+    model = OpenAIGPTDoubleHeadsModel.from_pretrained(args.model_name)
+    model.resize_token_embeddings(len(tokenizer))
     model.to(device)
 
     # Load and encode the datasets
@@ -221,7 +223,7 @@ def tokenize_and_encode(obj):
             for step, batch in enumerate(tqdm_bar):
                 batch = tuple(t.to(device) for t in batch)
                 input_ids, mc_token_ids, lm_labels, mc_labels = batch
-                losses = model(input_ids, mc_token_ids, lm_labels, mc_labels)
+                losses = model(input_ids, mc_token_ids=mc_token_ids, lm_labels=lm_labels, mc_labels=mc_labels)
                 loss = args.lm_coef * losses[0] + losses[1]
                 loss.backward()
                 scheduler.step()
@@ -258,7 +260,7 @@ def tokenize_and_encode(obj):
             batch = tuple(t.to(device) for t in batch)
             input_ids, mc_token_ids, lm_labels, mc_labels = batch
             with torch.no_grad():
-               _, mc_loss, _, mc_logits = model(input_ids, mc_token_ids, lm_labels, mc_labels)
+               _, mc_loss, _, mc_logits = model(input_ids, mc_token_ids=mc_token_ids, lm_labels=lm_labels, mc_labels=mc_labels)
 
             mc_logits = mc_logits.detach().cpu().numpy()
             mc_labels = mc_labels.to('cpu').numpy()

diff --git a/examples/single_model_scripts/run_swag.py → examples/contrib/run_swag.py b/examples/single_model_scripts/run_swag.py → examples/contrib/run_swag.py
@@ -670,4 +670,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/...es/single_model_scripts/run_transfo_xl.py → examples/contrib/run_transfo_xl.py b/...es/single_model_scripts/run_transfo_xl.py → examples/contrib/run_transfo_xl.py
@@ -113,7 +113,7 @@ def evaluate(eval_iter):
         with torch.no_grad():
             mems = None
             for idx, (data, target, seq_len) in enumerate(eval_iter):
-                ret = model(data, target, mems)
+                ret = model(data, lm_labels=target, mems=mems)
                 loss, _, mems = ret
                 loss = loss.mean()
                 total_loss += seq_len * loss.item()

diff --git a/examples/lm_finetuning/README.md b/examples/lm_finetuning/README.md
Original file line number	Diff line number	Diff line change
Expand Up		@@ -670,4 +670,4 @@ def main():


		if __name__ == "__main__":
		main()
		main()