File size: 1,867 Bytes
99fc69e
ca838f6
99fc69e
 
 
ca838f6
99fc69e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf058f4
942ae7e
ca838f6
 
 
 
 
 
 
 
cf058f4
ca838f6
 
c07b63c
ca838f6
 
 
 
 
e228c01
c07b63c
 
ca838f6
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from threading import Thread
import gradio as gr
import inspect
from gradio import routes
from typing import List, Type
from transformers import AutoTokenizer

loop = asyncio.get_event_loop()

# init code
def get_types(cls_set: List[Type], component: str):
    docset = []
    types = []
    if component == "input":
        for cls in cls_set:
            doc = inspect.getdoc(cls)
            doc_lines = doc.split("\n")
            docset.append(doc_lines[1].split(":")[-1])
            types.append(doc_lines[1].split(")")[0].split("(")[-1])
    else:
        for cls in cls_set:
            doc = inspect.getdoc(cls)
            doc_lines = doc.split("\n")
            docset.append(doc_lines[-1].split(":")[-1])
            types.append(doc_lines[-1].split(")")[0].split("(")[-1])
    return docset, types
routes.get_types = get_types

from petals import AutoDistributedModelForCausalLM
import npc_data

# Choose any model available at https://health.petals.dev
model_name = "daekeun-ml/Llama-2-ko-instruct-13B"

#daekeun-ml/Llama-2-ko-instruct-13B
#quantumaikr/llama-2-70b-fb16-korean

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoDistributedModelForCausalLM.from_pretrained(model_name)

# Run the model as if it were on your computer
def chat2(id, npc, text):
    prom = ""
    inputs = tokenizer(prom, return_tensors="pt")["input_ids"]
    outputs = model.generate(inputs, max_new_tokens=100)
    print(tokenizer.decode(outputs[0]))

    return text
def chat(id, npc, text):
    return f"{text}에 λŒ€ν•œ {npc}의 응닡"



with gr.Blocks() as demo:
    count = 0
    aa = gr.Interface(
      fn=chat,
      inputs=["text","text","text"],
      outputs="text",
      description="chat, ai 응닡을 λ°˜ν™˜ν•©λ‹ˆλ‹€. λ‚΄λΆ€μ μœΌλ‘œ νŠΈλžœμž­μ…˜ 생성. \n /run/predict",
    )

    
    demo.queue(max_size=32).launch(enable_queue=True)