Spaces:

Galatea007
/

LLMopsDK

Paused

App Files Files Community

Galatea007 commited on Oct 5, 2024

Commit

22e26a4

verified ·

1 Parent(s): 0bf70b9

Update app_transformation.py

Browse files

Files changed (1) hide show

app_transformation.py +22 -90

app_transformation.py CHANGED Viewed

@@ -1,28 +1,21 @@
-# You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python)
-# OpenAI Chat completion
-import os
-from openai import AsyncOpenAI  # importing openai for API usage
-import chainlit as cl  # importing chainlit for our app
-from chainlit.prompt import Prompt, PromptMessage  # importing prompt tools
-from chainlit.playground.providers import ChatOpenAI  # importing ChatOpenAI tools
 from dotenv import load_dotenv
 load_dotenv()
 # ChatOpenAI Templates
-system_template = """You are a cybersecurity expert specialized in log analysis and data normalization,/n
 helping security teams to map security log fields to Google Chronicle's Unified Data Model (UDM).
 Please follow these steps:
 1. Use web search to consult the latest log documentation for the product provided and Google Chronicle UDM schema documentation.
 2. Map each product log field to its corresponding UDM field.
 3. For fields that don't have a direct match in UDM, place them into custom fields.
-4. make sure the each mapped field including the custome fields are unique and accurate
 5. Organize the mapping into a structured table format.
-Remember to explain your reasoning for each field's mapping based on your findings from the product documentation.
-Think through your response step by step, and include references to the documentation you consulted during the process.
 You always provide detailed, accurate, and structured responses in a professional tone, focusing on precision.
 """
@@ -34,70 +27,16 @@ Please follow these steps:
 1. Use web search to consult the latest documentation for this product/log source and Google Chronicle UDM schema documentation.
 2. Map each log field to its corresponding UDM field.
 3. For fields that don't have a direct match in UDM, place them into custom fields.
-4. make sure the each mapped field including the custome fields are unique and accurate
 5. Organize the mapping into a structured table format.
-Here is an example of how to map FortiGate fields to UDM fields..
-Consider this example and related search into relevant URLs to identify best and most accurate mapping for other log sources against UB
-Log_Attribute	Simpliefied_Naming_for_Attribute	UBM_Attribute
-devname	devicehostname	intermediary.hostname
-devid	deviceexternalid	intermediary.asset.hardware.serial_number
-ap	customstring62	target.application
-dstport	destinationport	target.port
-tranport	destinationtranslatedport	target.nat_port
-policytype	customstring55	security_result.rule_type
-TransportProtocol	transportprotocol	network.ip_protocol
-srcip	sourceaddress	principal.ip
-dstmac	destinationmacaddress	target.mac
-eventtype	customstring28	security_result.rule_type
-level	deviceseverity	security_result.severity_details
-filename	filename	target.file.full_path
-action	deviceaction	security_result.action_details
-ipaddr	destinationtranslatedaddress	target.nat_ip
-srcuuid	flowsiemid	principal.asset.product_object_id
-policyid	customnumber22	additional.fields.policyid
-srcport	sourceport	principal.port
-service	applicationprotocol	network.application_protocol_version
-to	emailrecipient	network.email.to
-appid	customstring34	security_result.rule_id
-proto	customnumber6	network.ip_protocol
-ref	devicecustomstring6	network.http.referral_url
-method	requestmethod	network.http.method
-transport	translatedport	src.nat_port
-srcname	sourcehostname	principal.user.first_name
-duration	transactionstring4	network.session_duration.seconds
-hostname	destinationhostname	target.hostname
-Header2	deviceaddress	intermediary.hostname
-url	requesturl	target.url
-domain	emailsenderdomain	network.dns_domain
-agent	requestclientapplication	network.http.user_agent
-rcvdpkt	transactionnumber4	additional.fields.rcvdpkt
-catdesc	deviceeventcategory	security_result.category_details
-sentbyte	bytesout	network.sent_bytes
-sentpkt	transactionnumber3	additional.fields.sentpkt
-msg	message	metadata.description
-source_mac	sourcemacaddress	principal.mac
-subject	emailsubject	network.email.subject
-from	emailsender	network.email.from
-dstip	destinationaddress	target.ip
-logid	transactionstring6	metadata.product_log_id
-rcvdbyte	bytesin	network.received_bytes
-type	customstring1	metadata.product_event_type
-user	accountname	principal.user.user_display_name
-sessionid	sessionid	network.session_id
-dstuser	destinationusername	target.user.userid
-References:
-FortiGate Log Documentation, check all links in this URL: https://docs.cyderes.cloud/parser-knowledge-base/fortinet_firewall/
-Google Chronicle UDM Documentation:https://cloud.google.com/chronicle/docs/reference/udm-field-list
-The official documentation of Chronicle UDM, explaining the different fields and structures available for mapping.
-Think through your response step by step, and include references to the documentation you consulted during the process.
 """
-@cl.on_chat_start  # marks a function that will be executed at the start of a user session
 async def start_chat():
     settings = {
         "model": "gpt-3.5-turbo",
@@ -107,31 +46,26 @@ async def start_chat():
         "frequency_penalty": 0,
         "presence_penalty": 0,
     }
     cl.user_session.set("settings", settings)
-@cl.on_message  # marks a function that should be run each time the chatbot receives a message from a user
 async def main(message: cl.Message):
     settings = cl.user_session.get("settings")
     client = AsyncOpenAI()
     print(message.content)
     prompt = Prompt(
         provider=ChatOpenAI.id,
         messages=[
-            PromptMessage(
-                role="system",
-                template=system_template,
-                formatted=system_template,
-            ),
-            PromptMessage(
-                role="user",
-                template=user_template,
-                formatted=user_template.format(input=message.content),
-            ),
         ],
         inputs={"input": message.content},
         settings=settings,
@@ -140,8 +74,6 @@ async def main(message: cl.Message):
     print([m.to_openai() for m in prompt.messages])
     msg = cl.Message(content="")
-    # Call OpenAI
     async for stream_resp in await client.chat.completions.create(
         messages=[m.to_openai() for m in prompt.messages], stream=True, **settings
     ):
@@ -150,7 +82,7 @@ async def main(message: cl.Message):
             token = ""
         await msg.stream_token(token)
-    # Update the prompt object with the completion
     prompt.completion = msg.content
     msg.prompt = prompt

+import pandas as pd
+import chainlit as cl
+from chainlit.prompt import Prompt, PromptMessage
 from dotenv import load_dotenv
+# Load environment variables
 load_dotenv()
 # ChatOpenAI Templates
+system_template = """You are a cybersecurity expert specialized in log analysis and data normalization,
 helping security teams to map security log fields to Google Chronicle's Unified Data Model (UDM).
 Please follow these steps:
 1. Use web search to consult the latest log documentation for the product provided and Google Chronicle UDM schema documentation.
 2. Map each product log field to its corresponding UDM field.
 3. For fields that don't have a direct match in UDM, place them into custom fields.
+4. Ensure each mapped field, including custom fields, is unique and accurate.
 5. Organize the mapping into a structured table format.
 You always provide detailed, accurate, and structured responses in a professional tone, focusing on precision.
 """
 1. Use web search to consult the latest documentation for this product/log source and Google Chronicle UDM schema documentation.
 2. Map each log field to its corresponding UDM field.
 3. For fields that don't have a direct match in UDM, place them into custom fields.
+4. Ensure each mapped field, including custom fields, is unique and accurate.
 5. Organize the mapping into a structured table format.
 """
+# Function to read UDM fields from the existing CSV file
+def read_udm_fields(csv_file_path):
+    udm_fields_df = pd.read_csv(csv_file_path)
+    return udm_fields_df
+@cl.on_chat_start  # Marks function to be executed at the start of a user session
 async def start_chat():
     settings = {
         "model": "gpt-3.5-turbo",
         "frequency_penalty": 0,
         "presence_penalty": 0,
     }
     cl.user_session.set("settings", settings)
+@cl.on_message  # Marks function to run each time chatbot receives a message from a user
 async def main(message: cl.Message):
     settings = cl.user_session.get("settings")
     client = AsyncOpenAI()
     print(message.content)
+    # Read UDM fields from the existing CSV file
+    csv_file_path = 'udm_fields.csv'  # Ensure this file exists in the environment
+    udm_fields_df = read_udm_fields(csv_file_path)
+    # Process and map log fields to UDM fields based on the CSV
     prompt = Prompt(
         provider=ChatOpenAI.id,
         messages=[
+            PromptMessage(role="system", template=system_template, formatted=system_template),
+            PromptMessage(role="user", template=user_template, formatted=user_template.format(input=message.content)),
         ],
         inputs={"input": message.content},
         settings=settings,
     print([m.to_openai() for m in prompt.messages])
     msg = cl.Message(content="")
     async for stream_resp in await client.chat.completions.create(
         messages=[m.to_openai() for m in prompt.messages], stream=True, **settings
     ):
             token = ""
         await msg.stream_token(token)
+    # Update prompt object with the completion
     prompt.completion = msg.content
     msg.prompt = prompt