Spaces:
Paused
Paused
Update app_udm.py
Browse files- app_udm.py +83 -76
app_udm.py
CHANGED
@@ -1,31 +1,58 @@
|
|
1 |
-
# You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python)
|
2 |
-
|
3 |
-
# OpenAI Chat completion
|
4 |
-
import os
|
5 |
-
from openai import AsyncOpenAI # importing openai for API usage
|
6 |
-
import chainlit as cl # importing chainlit for our app
|
7 |
-
from chainlit.prompt import Prompt, PromptMessage # importing prompt tools
|
8 |
-
from chainlit.playground.providers import ChatOpenAI # importing ChatOpenAI tools
|
9 |
-
from dotenv import load_dotenv
|
10 |
-
|
11 |
-
load_dotenv()
|
12 |
import pandas as pd
|
13 |
import os
|
14 |
-
|
15 |
import chainlit as cl
|
16 |
-
from chainlit.prompt import Prompt, PromptMessage
|
17 |
from dotenv import load_dotenv
|
18 |
|
19 |
# Load environment variables
|
20 |
load_dotenv()
|
21 |
|
22 |
-
|
23 |
# Load UDM fields CSV file
|
24 |
def load_udm_fields(csv_path):
|
25 |
return pd.read_csv(csv_path)
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
# Map log fields to UDM fields
|
28 |
-
def map_log_fields_to_udm(log_fields, udm_fields):
|
29 |
mapped_fields = []
|
30 |
for field in log_fields:
|
31 |
# Try to find a matching UDM field
|
@@ -36,63 +63,47 @@ def map_log_fields_to_udm(log_fields, udm_fields):
|
|
36 |
'UDM_Field': udm_match.iloc[0]['Field Name']
|
37 |
})
|
38 |
else:
|
39 |
-
# If no direct match,
|
|
|
40 |
mapped_fields.append({
|
41 |
'Log_Field': field,
|
42 |
-
'UDM_Field': 'custom_fields.' + field
|
43 |
})
|
44 |
return pd.DataFrame(mapped_fields)
|
45 |
|
46 |
-
#
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
srcip | principal.ip
|
74 |
-
dstip | target.ip
|
75 |
-
dstport | target.port
|
76 |
-
|
77 |
-
### Example 2: Palo Alto Fields to UDM Mapping
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
Log Attribute | UDM Attribute
|
82 |
-
--------------|---------------
|
83 |
-
src_ip | principal.ip
|
84 |
-
dest_ip | target.ip
|
85 |
-
dest_port | target.port
|
86 |
-
action | security_result.action_details
|
87 |
-
severity | security_result.severity_details
|
88 |
-
|
89 |
-
Now proceed to map the given sample log:
|
90 |
"""
|
91 |
|
92 |
@cl.on_chat_start # Marks a function that will be executed at the start of a user session
|
93 |
async def start_chat():
|
94 |
settings = {
|
95 |
-
"model": "gpt-
|
96 |
"temperature": 0,
|
97 |
"max_tokens": 500,
|
98 |
"top_p": 1,
|
@@ -109,25 +120,21 @@ async def main(message: cl.Message):
|
|
109 |
udm_fields_csv = "udm_field_list_v2.csv" # Replace with your actual CSV path
|
110 |
udm_fields = load_udm_fields(udm_fields_csv)
|
111 |
|
112 |
-
#
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
# Perform the mapping
|
116 |
-
mapped_fields_df = map_log_fields_to_udm(
|
117 |
|
118 |
# Create a response showing the mapping
|
119 |
-
mapped_fields_table = mapped_fields_df
|
120 |
-
|
121 |
-
prompt = Prompt(
|
122 |
-
provider=ChatOpenAI.id,
|
123 |
-
messages=[
|
124 |
-
PromptMessage(role="system", template=system_template, formatted=system_template),
|
125 |
-
PromptMessage(role="user", template=user_template, formatted=user_template.format(input=message.content)),
|
126 |
-
],
|
127 |
-
inputs={"input": message.content},
|
128 |
-
settings=settings,
|
129 |
-
)
|
130 |
|
|
|
131 |
msg = cl.Message(content=f"Here is the mapped log fields to UDM:\n\n{mapped_fields_table}")
|
132 |
await msg.send()
|
133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import pandas as pd
|
2 |
import os
|
3 |
+
import openai
|
4 |
import chainlit as cl
|
|
|
5 |
from dotenv import load_dotenv
|
6 |
|
7 |
# Load environment variables
|
8 |
load_dotenv()
|
9 |
|
|
|
10 |
# Load UDM fields CSV file
|
11 |
def load_udm_fields(csv_path):
|
12 |
return pd.read_csv(csv_path)
|
13 |
|
14 |
+
# Use OpenAI to assist in mapping log fields that are not directly mapped
|
15 |
+
async def ai_assisted_mapping(log_field, vendor, app, udm_fields):
|
16 |
+
# Few-shot learning examples included in the prompt, adaptable for any vendor/app
|
17 |
+
prompt = f"""
|
18 |
+
You are a cybersecurity expert specialized in {vendor} logs for {app} applications.
|
19 |
+
The log field '{log_field}' doesn't have a direct mapping in Google Chronicle UDM.
|
20 |
+
|
21 |
+
Here are examples of how fields from other vendors are mapped to UDM:
|
22 |
+
|
23 |
+
### Example 1: Fortinet Fields to UDM Mapping
|
24 |
+
Log Attribute | UDM Attribute
|
25 |
+
--------------|---------------
|
26 |
+
devname | intermediary.hostname
|
27 |
+
devid | intermediary.asset.hardware.serial_number
|
28 |
+
srcip | principal.ip
|
29 |
+
dstip | target.ip
|
30 |
+
dstport | target.port
|
31 |
+
|
32 |
+
### Example 2: Palo Alto Fields to UDM Mapping
|
33 |
+
Log Attribute | UDM Attribute
|
34 |
+
--------------|---------------
|
35 |
+
src_ip | principal.ip
|
36 |
+
dest_ip | target.ip
|
37 |
+
dest_port | target.port
|
38 |
+
action | security_result.action_details
|
39 |
+
severity | security_result.severity_details
|
40 |
+
|
41 |
+
Now, map the log field '{log_field}' from {vendor} {app} to the best possible UDM attribute.
|
42 |
+
Reference the provided UDM fields: {', '.join(udm_fields['Field Name'].tolist())}.
|
43 |
+
|
44 |
+
If no exact match, provide the closest UDM mapping or suggest a custom field (e.g., custom_fields.{log_field}).
|
45 |
+
"""
|
46 |
+
response = openai.Completion.create(
|
47 |
+
engine="gpt-4", # Use GPT-4 model
|
48 |
+
prompt=prompt,
|
49 |
+
max_tokens=100,
|
50 |
+
temperature=0.2
|
51 |
+
)
|
52 |
+
return response.choices[0].text.strip()
|
53 |
+
|
54 |
# Map log fields to UDM fields
|
55 |
+
async def map_log_fields_to_udm(log_fields, vendor, app, udm_fields):
|
56 |
mapped_fields = []
|
57 |
for field in log_fields:
|
58 |
# Try to find a matching UDM field
|
|
|
63 |
'UDM_Field': udm_match.iloc[0]['Field Name']
|
64 |
})
|
65 |
else:
|
66 |
+
# If no direct match, call OpenAI to suggest a mapping with few-shot examples
|
67 |
+
ai_suggestion = await ai_assisted_mapping(field, vendor, app, udm_fields)
|
68 |
mapped_fields.append({
|
69 |
'Log_Field': field,
|
70 |
+
'UDM_Field': ai_suggestion or 'custom_fields.' + field
|
71 |
})
|
72 |
return pd.DataFrame(mapped_fields)
|
73 |
|
74 |
+
# Function to format the DataFrame into a readable string format for display
|
75 |
+
def format_mapped_fields_for_display(mapped_fields_df):
|
76 |
+
# Convert the DataFrame to a formatted string
|
77 |
+
result = "Log Field | UDM Field\n"
|
78 |
+
result += "-----------------------\n"
|
79 |
+
for _, row in mapped_fields_df.iterrows():
|
80 |
+
result += f"{row['Log_Field']} | {row['UDM_Field']}\n"
|
81 |
+
return result
|
82 |
+
|
83 |
+
# Enhanced user prompt to guide the user
|
84 |
+
user_template = """To help with the mapping of your log fields to Google Chronicle's Unified Data Model (UDM),
|
85 |
+
please provide the following information:
|
86 |
+
|
87 |
+
1. **Vendor**: What is the vendor for this log (e.g., Palo Alto, Fortinet, etc.)?
|
88 |
+
2. **Application**: Which application does the log come from (e.g., Firewall, IDS, etc.)?
|
89 |
+
3. **Log Fields**: List the log fields you want to map (e.g., src_ip, dest_ip, action, etc.).
|
90 |
+
|
91 |
+
Example Input:
|
92 |
+
Vendor: Palo Alto
|
93 |
+
Application: Firewall
|
94 |
+
Log Fields:
|
95 |
+
src_ip
|
96 |
+
dest_ip
|
97 |
+
action
|
98 |
+
severity
|
99 |
+
|
100 |
+
Please provide this information in the format shown above, and I will help map the fields to UDM.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
"""
|
102 |
|
103 |
@cl.on_chat_start # Marks a function that will be executed at the start of a user session
|
104 |
async def start_chat():
|
105 |
settings = {
|
106 |
+
"model": "gpt-4", # Use GPT-4 here
|
107 |
"temperature": 0,
|
108 |
"max_tokens": 500,
|
109 |
"top_p": 1,
|
|
|
120 |
udm_fields_csv = "udm_field_list_v2.csv" # Replace with your actual CSV path
|
121 |
udm_fields = load_udm_fields(udm_fields_csv)
|
122 |
|
123 |
+
# Parse user input to dynamically capture log fields, vendor, and app
|
124 |
+
user_input = message.content.split("\n")
|
125 |
+
|
126 |
+
# Example: The first line contains the vendor, second the app, and remaining lines are log fields
|
127 |
+
vendor_line = user_input[0].split(":")[1].strip() # Extract Vendor
|
128 |
+
app_line = user_input[1].split(":")[1].strip() # Extract Application
|
129 |
+
log_lines = [line.strip() for line in user_input[2:] if line.strip()] # Extract log fields
|
130 |
|
131 |
# Perform the mapping
|
132 |
+
mapped_fields_df = await map_log_fields_to_udm(log_lines, vendor_line, app_line, udm_fields)
|
133 |
|
134 |
# Create a response showing the mapping
|
135 |
+
mapped_fields_table = format_mapped_fields_for_display(mapped_fields_df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
+
# Display the output to the user
|
138 |
msg = cl.Message(content=f"Here is the mapped log fields to UDM:\n\n{mapped_fields_table}")
|
139 |
await msg.send()
|
140 |
|