BounharAbdelaziz commited on
Commit
3c73224
ยท
verified ยท
1 Parent(s): 01090a1

fix of endless loop in generate

Browse files
Files changed (1) hide show
  1. app.py +6 -0
app.py CHANGED
@@ -17,6 +17,10 @@ MODEL_NAME = "BounharAbdelaziz/Al-Atlas-LLM-0.5B" # "atlasia/Al-Atlas-LLM-mid-tr
17
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # , token=token
18
  model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(device)
19
 
 
 
 
 
20
  # Predefined examples
21
  examples = [
22
  ["ุงู„ุฐูƒุงุก ุงู„ุงุตุทู†ุงุนูŠ ู‡ูˆ ูุฑุน ู…ู† ุนู„ูˆู… ุงู„ูƒู…ุจูŠูˆุชุฑ ุงู„ู„ูŠ ูƒูŠุฑูƒุฒ"
@@ -42,6 +46,8 @@ def generate_text(prompt, max_length=256, temperature=0.7, top_p=0.9, top_k=150,
42
  num_beams=num_beams,
43
  top_k= top_k,
44
  early_stopping = True,
 
 
45
  )
46
  return tokenizer.decode(output[0], skip_special_tokens=True)
47
 
 
17
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # , token=token
18
  model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(device)
19
 
20
+ # Fix tokenizer padding
21
+ if tokenizer.pad_token is None:
22
+ tokenizer.pad_token = tokenizer.eos_token # Set pad token
23
+
24
  # Predefined examples
25
  examples = [
26
  ["ุงู„ุฐูƒุงุก ุงู„ุงุตุทู†ุงุนูŠ ู‡ูˆ ูุฑุน ู…ู† ุนู„ูˆู… ุงู„ูƒู…ุจูŠูˆุชุฑ ุงู„ู„ูŠ ูƒูŠุฑูƒุฒ"
 
46
  num_beams=num_beams,
47
  top_k= top_k,
48
  early_stopping = True,
49
+ pad_token_id=tokenizer.pad_token_id, # Explicit pad token
50
+ eos_token_id=tokenizer.eos_token_id, # Explicit eos token
51
  )
52
  return tokenizer.decode(output[0], skip_special_tokens=True)
53