cecilemacaire commited on
Commit
3f18d74
1 Parent(s): 9725eed

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +11 -13
README.md CHANGED
@@ -12,8 +12,12 @@ tags:
12
  metrics:
13
  - bleu
14
  widget:
15
- - text: "Je vais à la piscine avec ma maman aujourd'hui"
16
- example_title : "A simple sentence"
 
 
 
 
17
  ---
18
 
19
  # t2p-t5-large-orféo
@@ -37,25 +41,19 @@ example_title : "A simple sentence"
37
  ## Using t2p-t5-large-orféo model with HuggingFace transformers
38
 
39
  ```python
40
- import torch
41
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
42
- import numpy as np
43
 
44
  source_lang = "fr"
45
  target_lang = "frp"
46
  max_input_length = 128
47
  max_target_length = 128
48
 
49
- def load_model(checkpoint):
50
- tokenizer = AutoTokenizer.from_pretrained(checkpoint)
51
- model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
52
- model = model.to("cuda:0")
53
- return tokenizer, model
54
 
55
- def generate(sentence, tokenizer, model):
56
- inputs = tokenizer("Je mange une pomme", return_tensors="pt").input_ids
57
- outputs = model.generate(inputs.to("cuda:0"), max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
58
- pred = tokenizer.decode(outputs[0], skip_special_tokens=True)
59
  ```
60
 
61
  - **Language(s):** French
 
12
  metrics:
13
  - bleu
14
  widget:
15
+ - text: "je mange une pomme"
16
+ example_title: "A simple sentence"
17
+ - text: "je ne pense pas à toi"
18
+ example_title: "Sentence with a negation"
19
+ - text: "il y a 2 jours, les gendarmes ont vérifié ma licence"
20
+ example_title: "Sentence with a polylexical term"
21
  ---
22
 
23
  # t2p-t5-large-orféo
 
41
  ## Using t2p-t5-large-orféo model with HuggingFace transformers
42
 
43
  ```python
 
44
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
45
 
46
  source_lang = "fr"
47
  target_lang = "frp"
48
  max_input_length = 128
49
  max_target_length = 128
50
 
51
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
52
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
 
 
 
53
 
54
+ inputs = tokenizer("Je mange une pomme", return_tensors="pt").input_ids
55
+ outputs = model.generate(inputs.to("cuda:0"), max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
56
+ pred = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
57
  ```
58
 
59
  - **Language(s):** French