Spaces:
Sleeping
Sleeping
# import os | |
# # import json | |
# import numpy as np | |
# import pandas as pd | |
# import openai | |
# from haystack.schema import Document | |
# import streamlit as st | |
# from tenacity import retry, stop_after_attempt, wait_random_exponential | |
# # Get openai API key | |
# # openai.api_key = os.environ["OPENAI_API_KEY"] | |
# hf_token = os.environ["HF_API_KEY"] | |
# #model_select = "gpt-3.5-turbo-0125" | |
# model_select ="gpt-4" | |
# # define a special function for putting the prompt together (as we can't use haystack) | |
# def get_prompt(context, label): | |
# base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \ | |
# Summarize only elements of the context that address vulnerability of "+label+" to climate change. \ | |
# If there is no mention of "+label+" in the context, return nothing. \ | |
# Formatting example: \ | |
# - Bullet point 1 \ | |
# - Bullet point 2 \ | |
# " | |
# # Add the meta data for references | |
# # context = ' - '.join([d.content for d in docs]) | |
# prompt = base_prompt+"; Context: "+context+"; Answer:" | |
# return prompt | |
# # def get_prompt(context, label): | |
# # base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \ | |
# # Summarize only elements of the context that address vulnerability to climate change. \ | |
# # Formatting example: \ | |
# # - Bullet point 1 \ | |
# # - Bullet point 2 \ | |
# # " | |
# # # Add the meta data for references | |
# # # context = ' - '.join([d.content for d in docs]) | |
# # prompt = base_prompt+"; Context: "+context+"; Answer:" | |
# # return prompt | |
# # base_prompt="Summarize the following context efficiently in bullet points, the less the better- but keep concrete goals. \ | |
# # Summarize only activities that address the vulnerability of "+label+" to climate change. \ | |
# # Formatting example: \ | |
# # - Collect and utilize gender-disaggregated data to inform and improve climate change adaptation efforts. \ | |
# # - Prioritize gender sensitivity in adaptation options, ensuring participation and benefits for women, who are more vulnerable to climate impacts. \ | |
# # " | |
# # # convert df rows to Document object so we can feed it into the summarizer easily | |
# # def get_document(df): | |
# # # we take a list of each extract | |
# # ls_dict = [] | |
# # for index, row in df.iterrows(): | |
# # # Create a Document object for each row (we only need the text) | |
# # doc = Document( | |
# # row['text'], | |
# # meta={ | |
# # 'label': row['Vulnerability Label']} | |
# # ) | |
# # # Append the Document object to the documents list | |
# # ls_dict.append(doc) | |
# # return ls_dict | |
# # exception handling for issuing multiple API calls to openai (exponential backoff) | |
# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) | |
# def completion_with_backoff(**kwargs): | |
# return openai.ChatCompletion.create(**kwargs) | |
# # construct RAG query, send to openai and process response | |
# def run_query(context, label): | |
# ''' | |
# For non-streamed completion, enable the following 2 lines and comment out the code below | |
# ''' | |
# # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}]) | |
# # result = res.choices[0].message.content | |
# # instantiate ChatCompletion as a generator object (stream is set to True) | |
# response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(context, label)}], stream=True) | |
# # iterate through the streamed output | |
# report = [] | |
# res_box = st.empty() | |
# for chunk in response: | |
# # extract the object containing the text (totally different structure when streaming) | |
# chunk_message = chunk['choices'][0]['delta'] | |
# # test to make sure there is text in the object (some don't have) | |
# if 'content' in chunk_message: | |
# report.append(chunk_message.content) # extract the message | |
# # add the latest text and merge it with all previous | |
# result = "".join(report).strip() | |
# # res_box.success(result) # output to response text box | |
# res_box.success(result) | |
import os | |
# import json | |
import numpy as np | |
import pandas as pd | |
import openai | |
from haystack.schema import Document | |
import streamlit as st | |
from tenacity import retry, stop_after_attempt, wait_random_exponential | |
from huggingface_hub import InferenceClient | |
# Get openai API key | |
hf_token = os.environ["HF_API_KEY"] | |
# define a special function for putting the prompt together (as we can't use haystack) | |
def get_prompt(context, label): | |
base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \ | |
Summarize only elements of the context that address vulnerability of "+label+" to climate change. \ | |
If there is no mention of "+label+" in the context, return nothing. \ | |
Formatting example: \ | |
- Bullet point 1 \ | |
- Bullet point 2 \ | |
" | |
# Add the meta data for references | |
# context = ' - '.join([d.content for d in docs]) | |
prompt = base_prompt+"; Context: "+context+"; Answer:" | |
return prompt | |
# # exception handling for issuing multiple API calls to openai (exponential backoff) | |
# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) | |
# def completion_with_backoff(**kwargs): | |
# return openai.ChatCompletion.create(**kwargs) | |
def get_prompt(context, label): | |
base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \ | |
Summarize only elements of the context that address vulnerability of "+label+" to climate change. \ | |
If there is no mention of "+label+" in the context, return nothing. \ | |
Do not include an introduction sentence, just the bullet points as per below. \ | |
Formatting example: \ | |
- Bullet point 1 \ | |
- Bullet point 2 \ | |
" | |
# Add the meta data for references | |
# context = ' - '.join([d.content for d in docs]) | |
prompt = base_prompt+"; Context: "+context+"; Answer:" | |
return prompt | |
# # construct RAG query, send to openai and process response | |
# def run_query(context, label, chatbot_role): | |
# ''' | |
# For non-streamed completion, enable the following 2 lines and comment out the code below | |
# ''' | |
# # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}]) | |
# # result = res.choices[0].message.content | |
# messages = [ | |
# ChatMessage(role="system", content=chatbot_role), | |
# ChatMessage(role="user", content=get_prompt(context, label)), | |
# ] | |
# response = llm.chat(messages) | |
# return(response) | |
# tokenizer = AutoTokenizer.from_pretrained( | |
# "meta-llama/Meta-Llama-3.1-8B-Instruct", | |
# token=hf_token, | |
# ) | |
# stopping_ids = [ | |
# tokenizer.eos_token_id, | |
# tokenizer.convert_tokens_to_ids("<|eot_id|>"), | |
# ] | |
# Define the role of the chatbot | |
# chatbot_role = """You are an analyst specializing in climate change impact assessments and producing insights from policy documents.""" | |
# construct RAG query, send to openai and process response | |
def run_query(context, label): | |
''' | |
For non-streamed completion, enable the following 2 lines and comment out the code below | |
''' | |
chatbot_role = """You are an analyst specializing in climate change impact assessments and producing insights from policy documents.""" | |
# res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}]) | |
# result = res.choices[0].message.content | |
# Initialize the client, pointing it to one of the available models | |
client = InferenceClient() | |
response = client.chat.completions.create( | |
model="meta-llama/Meta-Llama-3.1-8B-Instruct", | |
messages=[ | |
ChatMessage(role="system", content=chatbot_role), | |
ChatMessage(role="user", content=get_prompt(context, label)), | |
], | |
stream=True, | |
max_tokens=500 | |
) | |
# iterate and print stream | |
# for message in chat_completion: | |
# print(message.choices[0].delta.content, end="") | |
# instantiate ChatCompletion as a generator object (stream is set to True) | |
# response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(context, label)}], stream=True) | |
# iterate through the streamed output | |
report = [] | |
res_box = st.empty() | |
for chunk in client.chat_completion(messages, stream=True):: | |
# extract the object containing the text (totally different structure when streaming) | |
chunk_message = chunk['choices'][0]['delta'] | |
# test to make sure there is text in the object (some don't have) | |
if 'content' in chunk_message: | |
report.append(chunk_message.content) # extract the message | |
# add the latest text and merge it with all previous | |
result = "".join(report).strip() | |
# res_box.success(result) # output to response text box | |
res_box.success(result) | |