hugohrban commited on
Commit
522d0fc
·
verified ·
1 Parent(s): 4e2e6d0

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -9
README.md CHANGED
@@ -15,20 +15,18 @@ Example usage:
15
 
16
  ```python
17
  from transformers import AutoModelForCausalLM
18
- from transformers import AutoTokenizer
19
- # optionally use local imports
20
- # from models.progen.modeling_progen import ProGenForCausalLM
21
- # from models.progen.configuration_progen import ProGenConfig
22
  import torch
23
  import torch.nn.functional as F
24
 
25
  # load model and tokenizer
26
  model = AutoModelForCausalLM.from_pretrained("hugohrban/progen2-small-mix7-bidi", trust_remote_code=True)
27
- tokenizer = AutoTokenizer.from_pretrained("hugohrban/progen2-small-mix7-bidi", trust_remote_code=True)
 
28
 
29
  # prepare input
30
- prompt = "<|pf00125|>2FDDDVSAVKSTGV"
31
- input_ids = torch.tensor(tokenizer.encode(prompt)).to(model.device)
32
 
33
  # forward pass
34
  logits = model(input_ids).logits
@@ -36,6 +34,6 @@ logits = model(input_ids).logits
36
  # print output probabilities
37
  next_token_logits = logits[-1, :]
38
  next_token_probs = F.softmax(next_token_logits, dim=-1)
39
- for i, prob in enumerate(next_token_probs):
40
- print(f"{tokenizer.decode(i)}: {100 * prob:.2f}%")
41
  ```
 
15
 
16
  ```python
17
  from transformers import AutoModelForCausalLM
18
+ from tokenizers import Tokenizer
 
 
 
19
  import torch
20
  import torch.nn.functional as F
21
 
22
  # load model and tokenizer
23
  model = AutoModelForCausalLM.from_pretrained("hugohrban/progen2-small-mix7-bidi", trust_remote_code=True)
24
+ tokenizer = Tokenizer.from_pretrained("hugohrban/progen2-small-mix7-bidi")
25
+ tokenizer.no_padding()
26
 
27
  # prepare input
28
+ prompt = "<|pf00125|>2FDDDVSAVKSTGVSK"
29
+ input_ids = torch.tensor(tokenizer.encode(prompt).ids).to(model.device)
30
 
31
  # forward pass
32
  logits = model(input_ids).logits
 
34
  # print output probabilities
35
  next_token_logits = logits[-1, :]
36
  next_token_probs = F.softmax(next_token_logits, dim=-1)
37
+ for i in range(tokenizer.get_vocab_size(with_added_tokens=False)):
38
+ print(f"{tokenizer.id_to_token(i)}: {round(100 * next_token_probs[i].item(), 2):.2f} %")
39
  ```