Spaces:
Paused
Paused
Rename app.py to app_udm.py
Browse files- app.py +0 -81
- app_udm.py +135 -0
app.py
DELETED
@@ -1,81 +0,0 @@
|
|
1 |
-
# You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python)
|
2 |
-
|
3 |
-
# OpenAI Chat completion
|
4 |
-
import os
|
5 |
-
from openai import AsyncOpenAI # importing openai for API usage
|
6 |
-
import chainlit as cl # importing chainlit for our app
|
7 |
-
from chainlit.prompt import Prompt, PromptMessage # importing prompt tools
|
8 |
-
from chainlit.playground.providers import ChatOpenAI # importing ChatOpenAI tools
|
9 |
-
from dotenv import load_dotenv
|
10 |
-
|
11 |
-
load_dotenv()
|
12 |
-
|
13 |
-
|
14 |
-
# ChatOpenAI Templates
|
15 |
-
system_template = """You are a helpful assistant who always speaks in a pleasant tone!
|
16 |
-
"""
|
17 |
-
|
18 |
-
user_template = """{input}
|
19 |
-
Think through your response step by step.
|
20 |
-
"""
|
21 |
-
|
22 |
-
|
23 |
-
@cl.on_chat_start # marks a function that will be executed at the start of a user session
|
24 |
-
async def start_chat():
|
25 |
-
settings = {
|
26 |
-
"model": "gpt-3.5-turbo",
|
27 |
-
"temperature": 0,
|
28 |
-
"max_tokens": 500,
|
29 |
-
"top_p": 1,
|
30 |
-
"frequency_penalty": 0,
|
31 |
-
"presence_penalty": 0,
|
32 |
-
}
|
33 |
-
|
34 |
-
cl.user_session.set("settings", settings)
|
35 |
-
|
36 |
-
|
37 |
-
@cl.on_message # marks a function that should be run each time the chatbot receives a message from a user
|
38 |
-
async def main(message: cl.Message):
|
39 |
-
settings = cl.user_session.get("settings")
|
40 |
-
|
41 |
-
client = AsyncOpenAI()
|
42 |
-
|
43 |
-
print(message.content)
|
44 |
-
|
45 |
-
prompt = Prompt(
|
46 |
-
provider=ChatOpenAI.id,
|
47 |
-
messages=[
|
48 |
-
PromptMessage(
|
49 |
-
role="system",
|
50 |
-
template=system_template,
|
51 |
-
formatted=system_template,
|
52 |
-
),
|
53 |
-
PromptMessage(
|
54 |
-
role="user",
|
55 |
-
template=user_template,
|
56 |
-
formatted=user_template.format(input=message.content),
|
57 |
-
),
|
58 |
-
],
|
59 |
-
inputs={"input": message.content},
|
60 |
-
settings=settings,
|
61 |
-
)
|
62 |
-
|
63 |
-
print([m.to_openai() for m in prompt.messages])
|
64 |
-
|
65 |
-
msg = cl.Message(content="")
|
66 |
-
|
67 |
-
# Call OpenAI
|
68 |
-
async for stream_resp in await client.chat.completions.create(
|
69 |
-
messages=[m.to_openai() for m in prompt.messages], stream=True, **settings
|
70 |
-
):
|
71 |
-
token = stream_resp.choices[0].delta.content
|
72 |
-
if not token:
|
73 |
-
token = ""
|
74 |
-
await msg.stream_token(token)
|
75 |
-
|
76 |
-
# Update the prompt object with the completion
|
77 |
-
prompt.completion = msg.content
|
78 |
-
msg.prompt = prompt
|
79 |
-
|
80 |
-
# Send and close the message stream
|
81 |
-
await msg.send()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app_udm.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python)
|
2 |
+
|
3 |
+
# OpenAI Chat completion
|
4 |
+
import os
|
5 |
+
from openai import AsyncOpenAI # importing openai for API usage
|
6 |
+
import chainlit as cl # importing chainlit for our app
|
7 |
+
from chainlit.prompt import Prompt, PromptMessage # importing prompt tools
|
8 |
+
from chainlit.playground.providers import ChatOpenAI # importing ChatOpenAI tools
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
|
11 |
+
load_dotenv()
|
12 |
+
import pandas as pd
|
13 |
+
import os
|
14 |
+
from openai import AsyncOpenAI
|
15 |
+
import chainlit as cl
|
16 |
+
from chainlit.prompt import Prompt, PromptMessage
|
17 |
+
from dotenv import load_dotenv
|
18 |
+
|
19 |
+
# Load environment variables
|
20 |
+
load_dotenv()
|
21 |
+
|
22 |
+
|
23 |
+
# Load UDM fields CSV file
|
24 |
+
def load_udm_fields(csv_path):
|
25 |
+
return pd.read_csv(csv_path)
|
26 |
+
|
27 |
+
# Map log fields to UDM fields
|
28 |
+
def map_log_fields_to_udm(log_fields, udm_fields):
|
29 |
+
mapped_fields = []
|
30 |
+
for field in log_fields:
|
31 |
+
# Try to find a matching UDM field
|
32 |
+
udm_match = udm_fields[udm_fields['Field Name'].str.contains(field, case=False)]
|
33 |
+
if not udm_match.empty:
|
34 |
+
mapped_fields.append({
|
35 |
+
'Log_Field': field,
|
36 |
+
'UDM_Field': udm_match.iloc[0]['Field Name']
|
37 |
+
})
|
38 |
+
else:
|
39 |
+
# If no direct match, add to custom fields
|
40 |
+
mapped_fields.append({
|
41 |
+
'Log_Field': field,
|
42 |
+
'UDM_Field': 'custom_fields.' + field
|
43 |
+
})
|
44 |
+
return pd.DataFrame(mapped_fields)
|
45 |
+
|
46 |
+
# Chainlit OpenAI Templates for multi-shot learning
|
47 |
+
system_template = """You are a cybersecurity expert specialized in log analysis and data normalization,
|
48 |
+
helping security teams map security log fields to Google Chronicle's Unified Data Model (UDM).
|
49 |
+
|
50 |
+
Please follow these steps:
|
51 |
+
1. Map each product log field to its corresponding UDM field using the reference UDM CSV provided.
|
52 |
+
2. For fields that don't have a direct match in UDM, place them into custom fields.
|
53 |
+
3. Ensure each mapped field, including custom fields, is unique and accurate.
|
54 |
+
4. Organize the mapping into a structured table format.
|
55 |
+
"""
|
56 |
+
|
57 |
+
# Multi-shot learning examples for Fortinet and Palo Alto
|
58 |
+
user_template = """Here is a sample log:
|
59 |
+
|
60 |
+
{input}
|
61 |
+
|
62 |
+
Please follow these steps:
|
63 |
+
1. Use the provided UDM CSV to map the log fields.
|
64 |
+
2. For fields that don't have a direct match, assign them to custom fields.
|
65 |
+
3. Organize the mapping into a structured table.
|
66 |
+
|
67 |
+
### Example 1: Fortinet Fields to UDM Mapping
|
68 |
+
|
69 |
+
Log Attribute | UDM Attribute
|
70 |
+
--------------|---------------
|
71 |
+
devname | intermediary.hostname
|
72 |
+
devid | intermediary.asset.hardware.serial_number
|
73 |
+
srcip | principal.ip
|
74 |
+
dstip | target.ip
|
75 |
+
dstport | target.port
|
76 |
+
|
77 |
+
### Example 2: Palo Alto Fields to UDM Mapping
|
78 |
+
|
79 |
+
|
80 |
+
|
81 |
+
Log Attribute | UDM Attribute
|
82 |
+
--------------|---------------
|
83 |
+
src_ip | principal.ip
|
84 |
+
dest_ip | target.ip
|
85 |
+
dest_port | target.port
|
86 |
+
action | security_result.action_details
|
87 |
+
severity | security_result.severity_details
|
88 |
+
|
89 |
+
Now proceed to map the given sample log:
|
90 |
+
"""
|
91 |
+
|
92 |
+
@cl.on_chat_start # Marks a function that will be executed at the start of a user session
|
93 |
+
async def start_chat():
|
94 |
+
settings = {
|
95 |
+
"model": "gpt-3.5-turbo",
|
96 |
+
"temperature": 0,
|
97 |
+
"max_tokens": 500,
|
98 |
+
"top_p": 1,
|
99 |
+
"frequency_penalty": 0,
|
100 |
+
"presence_penalty": 0,
|
101 |
+
}
|
102 |
+
cl.user_session.set("settings", settings)
|
103 |
+
|
104 |
+
@cl.on_message # Marks a function that should be run each time the chatbot receives a message from a user
|
105 |
+
async def main(message: cl.Message):
|
106 |
+
settings = cl.user_session.get("settings")
|
107 |
+
|
108 |
+
# Load the UDM fields reference CSV
|
109 |
+
udm_fields_csv = "udm_field_list_v2.csv" # Replace with your actual CSV path
|
110 |
+
udm_fields = load_udm_fields(udm_fields_csv)
|
111 |
+
|
112 |
+
# Simulate log fields from the user's input (in real use case, you'd parse the input log)
|
113 |
+
log_fields = message.content.split() # Example: Splitting input log into fields for simplicity
|
114 |
+
|
115 |
+
# Perform the mapping
|
116 |
+
mapped_fields_df = map_log_fields_to_udm(log_fields, udm_fields)
|
117 |
+
|
118 |
+
# Create a response showing the mapping
|
119 |
+
mapped_fields_table = mapped_fields_df.to_string(index=False)
|
120 |
+
|
121 |
+
prompt = Prompt(
|
122 |
+
provider=ChatOpenAI.id,
|
123 |
+
messages=[
|
124 |
+
PromptMessage(role="system", template=system_template, formatted=system_template),
|
125 |
+
PromptMessage(role="user", template=user_template, formatted=user_template.format(input=message.content)),
|
126 |
+
],
|
127 |
+
inputs={"input": message.content},
|
128 |
+
settings=settings,
|
129 |
+
)
|
130 |
+
|
131 |
+
msg = cl.Message(content=f"Here is the mapped log fields to UDM:\n\n{mapped_fields_table}")
|
132 |
+
await msg.send()
|
133 |
+
|
134 |
+
# Save the mapping to CSV for further analysis
|
135 |
+
mapped_fields_df.to_csv('mapped_log_fields.csv', index=False)
|