Update README.md
Browse files
README.md
CHANGED
@@ -23,9 +23,23 @@ set a seed for reproducibility:
|
|
23 |
|
24 |
```python
|
25 |
>>> from transformers import pipeline, set_seed
|
|
|
|
|
|
|
26 |
>>> generator = pipeline('text-generation', model='olm/olm-gpt2-oct-2022')
|
27 |
>>> set_seed(42)
|
|
|
|
|
|
|
28 |
>>> generator("Hello, I'm a language model,", max_length=30, num_return_sequences=5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
```
|
30 |
|
31 |
Here is how to use this model to get the features of a given text in PyTorch:
|
@@ -33,7 +47,7 @@ Here is how to use this model to get the features of a given text in PyTorch:
|
|
33 |
```python
|
34 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
35 |
tokenizer = AutoTokenizer.from_pretrained('olm/olm-gpt2-oct-2022')
|
36 |
-
model = AutoModelForCausalLM.from_pretrained('gpt2')
|
37 |
text = "Replace me by any text you'd like."
|
38 |
encoded_input = tokenizer(text, return_tensors='pt')
|
39 |
output = model(**encoded_input)
|
|
|
23 |
|
24 |
```python
|
25 |
>>> from transformers import pipeline, set_seed
|
26 |
+
>>> # It is important to include the bad_words_ids=[[0,2]] if you want this model to stay on topic.
|
27 |
+
>>> # Otherwise, the model may generate start and end tokens followed by text that is not relevant to
|
28 |
+
>>> # the previous text.
|
29 |
>>> generator = pipeline('text-generation', model='olm/olm-gpt2-oct-2022')
|
30 |
>>> set_seed(42)
|
31 |
+
>>> # This example also illustrates that sometimes our model generates
|
32 |
+
>>> # bloggy/spammy/webb-y things, even though it gets higher evaluation results
|
33 |
+
>>> # than the original GPT-2 accross a variety of benchmarks. See the first output.
|
34 |
>>> generator("Hello, I'm a language model,", max_length=30, num_return_sequences=5)
|
35 |
+
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
|
36 |
+
[
|
37 |
+
{'generated_text': "Hello, I'm a language model, but you can take me if I want.\nReplyDelete\nReplies\nReply\nAnonymous October 17, 2011"},
|
38 |
+
{'generated_text': "Hello, I'm a language model, and here's some useful news for you all: The release date for the new release of"},
|
39 |
+
{'generated_text': "Hello, I'm a language model, I'm not a developer or anybody who's working on those. I'm a freelancer... I"},
|
40 |
+
{'generated_text': "Hello, I'm a language model, a language analyst, and a language system designer. I'm just curious about the"},
|
41 |
+
{'generated_text': "Hello, I'm a language model, I'm passionate about languages, but I don't understand how my system works, the interaction"}
|
42 |
+
]
|
43 |
```
|
44 |
|
45 |
Here is how to use this model to get the features of a given text in PyTorch:
|
|
|
47 |
```python
|
48 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
49 |
tokenizer = AutoTokenizer.from_pretrained('olm/olm-gpt2-oct-2022')
|
50 |
+
model = AutoModelForCausalLM.from_pretrained('olm/olm-gpt2-oct-2022')
|
51 |
text = "Replace me by any text you'd like."
|
52 |
encoded_input = tokenizer(text, return_tensors='pt')
|
53 |
output = model(**encoded_input)
|