Spaces:
Sleeping
Sleeping
File size: 1,815 Bytes
ec6df95 45bb491 ec6df95 929735b ec6df95 733133f 929735b ec6df95 015761d ec6df95 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
from peft import AutoPeftModelForCausalLM
from transformers import GenerationConfig
from transformers import AutoTokenizer, GPTQConfig
import torch
import streamlit as st
# model = AutoModelForCausalLM.from_pretrained(
# "tiiuae/falcon-7b-instruct",
# torch_dtype=torch.bfloat16,
# trust_remote_code=True,
# device_map="auto",
# low_cpu_mem_usage=True,
# )
gptq_config = GPTQConfig(bits=4, disable_exllama=True)
model = AutoPeftModelForCausalLM.from_pretrained(
"Aneeth/zephyr_10k",
return_dict=True,
torch_dtype=torch.float32,
trust_remote_code=True,
quantization_config = gptq_config
)
tokenizer = AutoTokenizer.from_pretrained("Aneeth/zephyr_10k")
generation_config = GenerationConfig(
do_sample=True,
top_k=1,
temperature=0.5,
max_new_tokens=5000,
pad_token_id=tokenizer.eos_token_id,
)
def process_data_sample(example):
processed_example = "<|system|>\n Generate an authentic job description using the given input.\n<|user|>\n" + example["instruction"] + "\n<|assistant|>\n"
return processed_example
def generate_text(prompt):
inp_str = process_data_sample(
{
"instruction": prompt,
}
)
inputs = tokenizer(inp_str, return_tensors="pt").to("cpu")
outputs = model.generate(**inputs, generation_config=generation_config)
response=tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
def main():
st.title("Zephyr Inference")
# Get input from user
input_text = st.text_area("Input JD prompt", "Type here...")
# Generate text on button click
if st.button("Generate Text"):
generated_text = generate_text(input_text)
st.subheader("Generated Text:")
st.write(generated_text)
if __name__ == "__main__":
main() |