|
import os |
|
import time |
|
import spaces |
|
from threading import Thread |
|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer |
|
import gradio as gr |
|
|
|
MODEL = "weblab-GENIAC/Tanuki-8B-dpo-v1.0" |
|
HF_TOKEN = os.environ.get("HF_TOKEN", None) |
|
|
|
TITLE = "<h1><center>Tanuki-8B-dpo-v1.0</center></h1>" |
|
|
|
DESCRIPTION = """ |
|
<div class="model-description"> |
|
<p> |
|
🦡 <a href="https://huggingface.co/weblab-GENIAC/Tanuki-8B-dpo-v1.0"><b>Tanuki 8B</b>(weblab-GENIAC/Tanuki-8B-dpo-v1.0)</a>は、 |
|
経産省及びNEDOが推進する日本国内の生成AI基盤モデル開発を推進する「GENIAC」プロジェクトにおいて、松尾・岩澤研究室が開発・公開したLLMとなります。 |
|
本プロジェクトは松尾研が提供する大規模言語モデル講座(2023年9月開催、2,000名が受講)の修了生及び一般公募によって集まった有志の開発者(⺠間企業・研究者・学⽣で構成)が、最新の研究成果や技術的な知見を取り入れ、開発を行ったモデルです。 |
|
</p> |
|
<p>🤖 <s>このデモでは、Tanuki 8Bとチャットを行うことが可能です。</s>デモの公開期間は終了いたしました。(注:フルバーションの<a href="https://huggingface.co/weblab-GENIAC/Tanuki-8x8B-dpo-v1.0">Tanuki 8x8B</a>ではございません。)</p> |
|
<p>📄 モデルの詳細については、<a href="http://weblab.t.u-tokyo.ac.jp/2024-08-30">プレスリリース</a>をご覧ください。お問い合わせは<a href="https://weblab.t.u-tokyo.ac.jp/contact/">こちら</a>までどうぞ。</p> |
|
<p>関連サイト: <a href="https://weblab.t.u-tokyo.ac.jp/geniac_llm">GENIAC 松尾研 LLM開発プロジェクト</a></p> |
|
</div> |
|
""" |
|
|
|
PLACEHOLDER = """ |
|
<div class="image-placeholder"> |
|
<img src="https://weblab.t.u-tokyo.ac.jp/wp-content/uploads/2024/06/GENIAC-image-cutting3-1.jpg" alt="Tanuki-8B Image"> |
|
<h1>Tanuki-8B</h1> |
|
</div> |
|
""" |
|
|
|
CSS = """ |
|
.duplicate-button { |
|
margin: auto !important; |
|
color: white !important; |
|
background: black !important; |
|
border-radius: 100vh !important; |
|
} |
|
|
|
h3 { |
|
text-align: center; |
|
} |
|
|
|
.model-description { |
|
padding: 0.5em 1em; |
|
margin: 2em 0; |
|
border-top: solid 5px #5d627b; |
|
box-shadow: 0 1px 1px rgba(0, 0, 0, 0.22); |
|
border-radius: 5px; |
|
} |
|
|
|
.model-description p { |
|
margin: 0; |
|
padding: 0; |
|
color: #5d627b; |
|
} |
|
|
|
.image-placeholder { |
|
text-align: center; |
|
display: flex; |
|
flex-direction: column; |
|
align-items: center; |
|
} |
|
|
|
.image-placeholder img { |
|
width: 100%; |
|
height: auto; |
|
opacity: 0.55; |
|
} |
|
|
|
.image-placeholder h1 { |
|
font-size: 28px; |
|
margin-bottom: 2px; |
|
opacity: 0.55; |
|
} |
|
""" |
|
|
|
ANALYTICS_HEAD = """ |
|
<script async src="https://www.googletagmanager.com/gtag/js?id=G-JLBL393020"></script> |
|
""" |
|
ANALYTICS_JS = """ |
|
function() { |
|
window.dataLayer = window.dataLayer || []; |
|
function gtag(){dataLayer.push(arguments);} |
|
gtag('js', new Date()); |
|
gtag('config', 'G-JLBL393020'); |
|
} |
|
""" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
MODEL, |
|
torch_dtype=torch.bfloat16, |
|
device_map="auto", |
|
) |
|
print(model) |
|
|
|
@spaces.GPU() |
|
def stream_chat( |
|
message: str, |
|
history: list, |
|
system_prompt: str, |
|
temperature: float = 0.3, |
|
max_new_tokens: int = 1024, |
|
top_p: float = 1.0, |
|
top_k: int = 20, |
|
): |
|
print(f'message: {message}') |
|
print(f'history: {history}') |
|
|
|
conversation = [ |
|
{"role": "system", "content": system_prompt} |
|
] |
|
for prompt, answer in history: |
|
if prompt == None: |
|
prompt = " " |
|
if answer == None: |
|
answer = " " |
|
conversation.extend([ |
|
{"role": "user", "content": prompt}, |
|
{"role": "assistant", "content": answer}, |
|
]) |
|
|
|
conversation.append({"role": "user", "content": message}) |
|
|
|
input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device) |
|
|
|
streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True) |
|
|
|
generate_kwargs = dict( |
|
input_ids=input_ids, |
|
max_new_tokens = max_new_tokens, |
|
do_sample = False if temperature == 0 else True, |
|
top_p = top_p, |
|
top_k = top_k, |
|
temperature = temperature, |
|
streamer=streamer, |
|
) |
|
|
|
with torch.no_grad(): |
|
thread = Thread(target=model.generate, kwargs=generate_kwargs) |
|
thread.start() |
|
|
|
buffer = "" |
|
for new_text in streamer: |
|
buffer += new_text |
|
yield buffer |
|
|
|
|
|
chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER) |
|
|
|
with gr.Blocks(head=ANALYTICS_HEAD, css=CSS, theme="soft") as demo: |
|
demo.load(None, js=ANALYTICS_JS) |
|
gr.HTML(TITLE) |
|
gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button") |
|
gr.Markdown(DESCRIPTION) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |