alexchen4ai commited on
Commit
740b2a8
1 Parent(s): 313c2a6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +0 -8
README.md CHANGED
@@ -48,11 +48,7 @@ You can run the model on a GPU using the following code.
48
  import torch
49
  from transformers import AutoModelForCausalLM, AutoTokenizer
50
  import time
51
- import warnings
52
- warnings.filterwarnings("ignore")
53
  torch.random.manual_seed(0)
54
- import json
55
-
56
 
57
  model = AutoModelForCausalLM.from_pretrained(
58
  "NexaAIDev/Octopus-v4",
@@ -62,8 +58,6 @@ model = AutoModelForCausalLM.from_pretrained(
62
  )
63
  tokenizer = AutoTokenizer.from_pretrained("NexaAIDev/octopus-v4-finetuned-v1")
64
 
65
-
66
-
67
  question = "Tell me the result of derivative of x^3 when x is 2?"
68
 
69
  inputs = f"<|system|>You are a router. Below is the query from the users, please call the correct function and generate the parameters to call the function.<|end|><|user|>{question}<|end|><|assistant|>"
@@ -71,7 +65,6 @@ inputs = f"<|system|>You are a router. Below is the query from the users, please
71
  print(inputs)
72
  print('\n============= Below is the response ==============\n')
73
 
74
-
75
  # You should consider to use early stopping with <nexa_end> token to accelerate
76
  input_ids = tokenizer(inputs, return_tensors="pt")['input_ids'].to(model.device)
77
 
@@ -83,7 +76,6 @@ for i in range(200):
83
  next_token = model(input_ids).logits[:, -1].argmax(-1)
84
  generated_token_ids.append(next_token.item())
85
 
86
- print(next_token.item())
87
  input_ids = torch.cat([input_ids, next_token.unsqueeze(1)], dim=-1)
88
 
89
  # 32041 is the token id of <nexa_end>
 
48
  import torch
49
  from transformers import AutoModelForCausalLM, AutoTokenizer
50
  import time
 
 
51
  torch.random.manual_seed(0)
 
 
52
 
53
  model = AutoModelForCausalLM.from_pretrained(
54
  "NexaAIDev/Octopus-v4",
 
58
  )
59
  tokenizer = AutoTokenizer.from_pretrained("NexaAIDev/octopus-v4-finetuned-v1")
60
 
 
 
61
  question = "Tell me the result of derivative of x^3 when x is 2?"
62
 
63
  inputs = f"<|system|>You are a router. Below is the query from the users, please call the correct function and generate the parameters to call the function.<|end|><|user|>{question}<|end|><|assistant|>"
 
65
  print(inputs)
66
  print('\n============= Below is the response ==============\n')
67
 
 
68
  # You should consider to use early stopping with <nexa_end> token to accelerate
69
  input_ids = tokenizer(inputs, return_tensors="pt")['input_ids'].to(model.device)
70
 
 
76
  next_token = model(input_ids).logits[:, -1].argmax(-1)
77
  generated_token_ids.append(next_token.item())
78
 
 
79
  input_ids = torch.cat([input_ids, next_token.unsqueeze(1)], dim=-1)
80
 
81
  # 32041 is the token id of <nexa_end>