Galatea007 commited on
Commit
91ba323
·
verified ·
1 Parent(s): 0e07bda

Update app_udm.py

Browse files
Files changed (1) hide show
  1. app_udm.py +83 -76
app_udm.py CHANGED
@@ -1,31 +1,58 @@
1
- # You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python)
2
-
3
- # OpenAI Chat completion
4
- import os
5
- from openai import AsyncOpenAI # importing openai for API usage
6
- import chainlit as cl # importing chainlit for our app
7
- from chainlit.prompt import Prompt, PromptMessage # importing prompt tools
8
- from chainlit.playground.providers import ChatOpenAI # importing ChatOpenAI tools
9
- from dotenv import load_dotenv
10
-
11
- load_dotenv()
12
  import pandas as pd
13
  import os
14
- from openai import AsyncOpenAI
15
  import chainlit as cl
16
- from chainlit.prompt import Prompt, PromptMessage
17
  from dotenv import load_dotenv
18
 
19
  # Load environment variables
20
  load_dotenv()
21
 
22
-
23
  # Load UDM fields CSV file
24
  def load_udm_fields(csv_path):
25
  return pd.read_csv(csv_path)
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # Map log fields to UDM fields
28
- def map_log_fields_to_udm(log_fields, udm_fields):
29
  mapped_fields = []
30
  for field in log_fields:
31
  # Try to find a matching UDM field
@@ -36,63 +63,47 @@ def map_log_fields_to_udm(log_fields, udm_fields):
36
  'UDM_Field': udm_match.iloc[0]['Field Name']
37
  })
38
  else:
39
- # If no direct match, add to custom fields
 
40
  mapped_fields.append({
41
  'Log_Field': field,
42
- 'UDM_Field': 'custom_fields.' + field
43
  })
44
  return pd.DataFrame(mapped_fields)
45
 
46
- # Chainlit OpenAI Templates for multi-shot learning
47
- system_template = """You are a cybersecurity expert specialized in log analysis and data normalization,
48
- helping security teams map security log fields to Google Chronicle's Unified Data Model (UDM).
49
-
50
- Please follow these steps:
51
- 1. Map each product log field to its corresponding UDM field using the reference UDM CSV provided.
52
- 2. For fields that don't have a direct match in UDM, place them into custom fields.
53
- 3. Ensure each mapped field, including custom fields, is unique and accurate.
54
- 4. Organize the mapping into a structured table format.
55
- """
56
-
57
- # Multi-shot learning examples for Fortinet and Palo Alto
58
- user_template = """Here is a sample log:
59
-
60
- {input}
61
-
62
- Please follow these steps:
63
- 1. Use the provided UDM CSV to map the log fields.
64
- 2. For fields that don't have a direct match, assign them to custom fields.
65
- 3. Organize the mapping into a structured table.
66
-
67
- ### Example 1: Fortinet Fields to UDM Mapping
68
-
69
- Log Attribute | UDM Attribute
70
- --------------|---------------
71
- devname | intermediary.hostname
72
- devid | intermediary.asset.hardware.serial_number
73
- srcip | principal.ip
74
- dstip | target.ip
75
- dstport | target.port
76
-
77
- ### Example 2: Palo Alto Fields to UDM Mapping
78
-
79
-
80
-
81
- Log Attribute | UDM Attribute
82
- --------------|---------------
83
- src_ip | principal.ip
84
- dest_ip | target.ip
85
- dest_port | target.port
86
- action | security_result.action_details
87
- severity | security_result.severity_details
88
-
89
- Now proceed to map the given sample log:
90
  """
91
 
92
  @cl.on_chat_start # Marks a function that will be executed at the start of a user session
93
  async def start_chat():
94
  settings = {
95
- "model": "gpt-3.5-turbo",
96
  "temperature": 0,
97
  "max_tokens": 500,
98
  "top_p": 1,
@@ -109,25 +120,21 @@ async def main(message: cl.Message):
109
  udm_fields_csv = "udm_field_list_v2.csv" # Replace with your actual CSV path
110
  udm_fields = load_udm_fields(udm_fields_csv)
111
 
112
- # Simulate log fields from the user's input (in real use case, you'd parse the input log)
113
- log_fields = message.content.split() # Example: Splitting input log into fields for simplicity
 
 
 
 
 
114
 
115
  # Perform the mapping
116
- mapped_fields_df = map_log_fields_to_udm(log_fields, udm_fields)
117
 
118
  # Create a response showing the mapping
119
- mapped_fields_table = mapped_fields_df.to_string(index=False)
120
-
121
- prompt = Prompt(
122
- provider=ChatOpenAI.id,
123
- messages=[
124
- PromptMessage(role="system", template=system_template, formatted=system_template),
125
- PromptMessage(role="user", template=user_template, formatted=user_template.format(input=message.content)),
126
- ],
127
- inputs={"input": message.content},
128
- settings=settings,
129
- )
130
 
 
131
  msg = cl.Message(content=f"Here is the mapped log fields to UDM:\n\n{mapped_fields_table}")
132
  await msg.send()
133
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
  import os
3
+ import openai
4
  import chainlit as cl
 
5
  from dotenv import load_dotenv
6
 
7
  # Load environment variables
8
  load_dotenv()
9
 
 
10
  # Load UDM fields CSV file
11
  def load_udm_fields(csv_path):
12
  return pd.read_csv(csv_path)
13
 
14
+ # Use OpenAI to assist in mapping log fields that are not directly mapped
15
+ async def ai_assisted_mapping(log_field, vendor, app, udm_fields):
16
+ # Few-shot learning examples included in the prompt, adaptable for any vendor/app
17
+ prompt = f"""
18
+ You are a cybersecurity expert specialized in {vendor} logs for {app} applications.
19
+ The log field '{log_field}' doesn't have a direct mapping in Google Chronicle UDM.
20
+
21
+ Here are examples of how fields from other vendors are mapped to UDM:
22
+
23
+ ### Example 1: Fortinet Fields to UDM Mapping
24
+ Log Attribute | UDM Attribute
25
+ --------------|---------------
26
+ devname | intermediary.hostname
27
+ devid | intermediary.asset.hardware.serial_number
28
+ srcip | principal.ip
29
+ dstip | target.ip
30
+ dstport | target.port
31
+
32
+ ### Example 2: Palo Alto Fields to UDM Mapping
33
+ Log Attribute | UDM Attribute
34
+ --------------|---------------
35
+ src_ip | principal.ip
36
+ dest_ip | target.ip
37
+ dest_port | target.port
38
+ action | security_result.action_details
39
+ severity | security_result.severity_details
40
+
41
+ Now, map the log field '{log_field}' from {vendor} {app} to the best possible UDM attribute.
42
+ Reference the provided UDM fields: {', '.join(udm_fields['Field Name'].tolist())}.
43
+
44
+ If no exact match, provide the closest UDM mapping or suggest a custom field (e.g., custom_fields.{log_field}).
45
+ """
46
+ response = openai.Completion.create(
47
+ engine="gpt-4", # Use GPT-4 model
48
+ prompt=prompt,
49
+ max_tokens=100,
50
+ temperature=0.2
51
+ )
52
+ return response.choices[0].text.strip()
53
+
54
  # Map log fields to UDM fields
55
+ async def map_log_fields_to_udm(log_fields, vendor, app, udm_fields):
56
  mapped_fields = []
57
  for field in log_fields:
58
  # Try to find a matching UDM field
 
63
  'UDM_Field': udm_match.iloc[0]['Field Name']
64
  })
65
  else:
66
+ # If no direct match, call OpenAI to suggest a mapping with few-shot examples
67
+ ai_suggestion = await ai_assisted_mapping(field, vendor, app, udm_fields)
68
  mapped_fields.append({
69
  'Log_Field': field,
70
+ 'UDM_Field': ai_suggestion or 'custom_fields.' + field
71
  })
72
  return pd.DataFrame(mapped_fields)
73
 
74
+ # Function to format the DataFrame into a readable string format for display
75
+ def format_mapped_fields_for_display(mapped_fields_df):
76
+ # Convert the DataFrame to a formatted string
77
+ result = "Log Field | UDM Field\n"
78
+ result += "-----------------------\n"
79
+ for _, row in mapped_fields_df.iterrows():
80
+ result += f"{row['Log_Field']} | {row['UDM_Field']}\n"
81
+ return result
82
+
83
+ # Enhanced user prompt to guide the user
84
+ user_template = """To help with the mapping of your log fields to Google Chronicle's Unified Data Model (UDM),
85
+ please provide the following information:
86
+
87
+ 1. **Vendor**: What is the vendor for this log (e.g., Palo Alto, Fortinet, etc.)?
88
+ 2. **Application**: Which application does the log come from (e.g., Firewall, IDS, etc.)?
89
+ 3. **Log Fields**: List the log fields you want to map (e.g., src_ip, dest_ip, action, etc.).
90
+
91
+ Example Input:
92
+ Vendor: Palo Alto
93
+ Application: Firewall
94
+ Log Fields:
95
+ src_ip
96
+ dest_ip
97
+ action
98
+ severity
99
+
100
+ Please provide this information in the format shown above, and I will help map the fields to UDM.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  """
102
 
103
  @cl.on_chat_start # Marks a function that will be executed at the start of a user session
104
  async def start_chat():
105
  settings = {
106
+ "model": "gpt-4", # Use GPT-4 here
107
  "temperature": 0,
108
  "max_tokens": 500,
109
  "top_p": 1,
 
120
  udm_fields_csv = "udm_field_list_v2.csv" # Replace with your actual CSV path
121
  udm_fields = load_udm_fields(udm_fields_csv)
122
 
123
+ # Parse user input to dynamically capture log fields, vendor, and app
124
+ user_input = message.content.split("\n")
125
+
126
+ # Example: The first line contains the vendor, second the app, and remaining lines are log fields
127
+ vendor_line = user_input[0].split(":")[1].strip() # Extract Vendor
128
+ app_line = user_input[1].split(":")[1].strip() # Extract Application
129
+ log_lines = [line.strip() for line in user_input[2:] if line.strip()] # Extract log fields
130
 
131
  # Perform the mapping
132
+ mapped_fields_df = await map_log_fields_to_udm(log_lines, vendor_line, app_line, udm_fields)
133
 
134
  # Create a response showing the mapping
135
+ mapped_fields_table = format_mapped_fields_for_display(mapped_fields_df)
 
 
 
 
 
 
 
 
 
 
136
 
137
+ # Display the output to the user
138
  msg = cl.Message(content=f"Here is the mapped log fields to UDM:\n\n{mapped_fields_table}")
139
  await msg.send()
140