Spaces:
Paused
Paused
import pandas as pd | |
import chainlit as cl | |
from chainlit.prompt import Prompt, PromptMessage | |
from dotenv import load_dotenv | |
# Load environment variables | |
load_dotenv() | |
# ChatOpenAI Templates | |
system_template = """You are a cybersecurity expert specialized in log analysis and data normalization, | |
helping security teams to map security log fields to Google Chronicle's Unified Data Model (UDM). | |
Please follow these steps: | |
1. Use web search to consult the latest log documentation for the product provided and Google Chronicle UDM schema documentation. | |
2. Map each product log field to its corresponding UDM field. | |
3. Carefully consult the UDM fields csv file available to you as part of this code. Make sure you attempt to map to already defined fields in the UDM data model. | |
4. For fields that don't have a direct match in UDM, place them into custom fields. | |
5. Ensure each mapped field, including custom fields, is unique and accurate. | |
6. Organize the mapping into a structured table format. | |
You always provide detailed, accurate, and structured responses in a professional tone, focusing on precision. | |
""" | |
user_template = """Here is a sample log: | |
{input} | |
Please follow these steps: | |
1. Use web search to consult the latest documentation for this product/log source and Google Chronicle UDM schema documentation. | |
2. Map each log field to its corresponding UDM field. | |
3. For fields that don't have a direct match in UDM, place them into custom fields. | |
4. Ensure each mapped field, including custom fields, is unique and accurate. | |
5. Organize the mapping into a structured table format. | |
""" | |
# Function to read UDM fields from the existing CSV file | |
def read_udm_fields(csv_file_path): | |
udm_fields_df = pd.read_csv(csv_file_path) | |
return udm_fields_df | |
# Marks function to be executed at the start of a user session | |
async def start_chat(): | |
settings = { | |
"model": "gpt-3.5-turbo", | |
"temperature": 0, | |
"max_tokens": 500, | |
"top_p": 1, | |
"frequency_penalty": 0, | |
"presence_penalty": 0, | |
} | |
cl.user_session.set("settings", settings) | |
# Marks function to run each time chatbot receives a message from a user | |
async def main(message: cl.Message): | |
settings = cl.user_session.get("settings") | |
client = AsyncOpenAI() | |
print(message.content) | |
# Read UDM fields from the existing CSV file | |
csv_file_path = 'udm_fields.csv' # Ensure this file exists in the environment | |
udm_fields_df = read_udm_fields(csv_file_path) | |
# Process and map log fields to UDM fields based on the CSV | |
prompt = Prompt( | |
provider=ChatOpenAI.id, | |
messages=[ | |
PromptMessage(role="system", template=system_template, formatted=system_template), | |
PromptMessage(role="user", template=user_template, formatted=user_template.format(input=message.content)), | |
], | |
inputs={"input": message.content}, | |
settings=settings, | |
) | |
print([m.to_openai() for m in prompt.messages]) | |
msg = cl.Message(content="") | |
async for stream_resp in await client.chat.completions.create( | |
messages=[m.to_openai() for m in prompt.messages], stream=True, **settings | |
): | |
token = stream_resp.choices[0].delta.content | |
if not token: | |
token = "" | |
await msg.stream_token(token) | |
# Update prompt object with the completion | |
prompt.completion = msg.content | |
msg.prompt = prompt | |
# Send and close the message stream | |
await msg.send() | |