Spaces:
Paused
Paused
Update app_transformation.py
Browse files- app_transformation.py +22 -90
app_transformation.py
CHANGED
@@ -1,28 +1,21 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
import os
|
5 |
-
from openai import AsyncOpenAI # importing openai for API usage
|
6 |
-
import chainlit as cl # importing chainlit for our app
|
7 |
-
from chainlit.prompt import Prompt, PromptMessage # importing prompt tools
|
8 |
-
from chainlit.playground.providers import ChatOpenAI # importing ChatOpenAI tools
|
9 |
from dotenv import load_dotenv
|
10 |
|
|
|
11 |
load_dotenv()
|
12 |
|
13 |
-
|
14 |
# ChatOpenAI Templates
|
15 |
-
system_template = """You are a cybersecurity expert specialized in log analysis and data normalization
|
16 |
helping security teams to map security log fields to Google Chronicle's Unified Data Model (UDM).
|
17 |
|
18 |
Please follow these steps:
|
19 |
1. Use web search to consult the latest log documentation for the product provided and Google Chronicle UDM schema documentation.
|
20 |
2. Map each product log field to its corresponding UDM field.
|
21 |
3. For fields that don't have a direct match in UDM, place them into custom fields.
|
22 |
-
4.
|
23 |
5. Organize the mapping into a structured table format.
|
24 |
-
Remember to explain your reasoning for each field's mapping based on your findings from the product documentation.
|
25 |
-
Think through your response step by step, and include references to the documentation you consulted during the process.
|
26 |
You always provide detailed, accurate, and structured responses in a professional tone, focusing on precision.
|
27 |
"""
|
28 |
|
@@ -34,70 +27,16 @@ Please follow these steps:
|
|
34 |
1. Use web search to consult the latest documentation for this product/log source and Google Chronicle UDM schema documentation.
|
35 |
2. Map each log field to its corresponding UDM field.
|
36 |
3. For fields that don't have a direct match in UDM, place them into custom fields.
|
37 |
-
4.
|
38 |
5. Organize the mapping into a structured table format.
|
39 |
-
|
40 |
-
Here is an example of how to map FortiGate fields to UDM fields..
|
41 |
-
Consider this example and related search into relevant URLs to identify best and most accurate mapping for other log sources against UB
|
42 |
-
Log_Attribute Simpliefied_Naming_for_Attribute UBM_Attribute
|
43 |
-
devname devicehostname intermediary.hostname
|
44 |
-
devid deviceexternalid intermediary.asset.hardware.serial_number
|
45 |
-
ap customstring62 target.application
|
46 |
-
dstport destinationport target.port
|
47 |
-
tranport destinationtranslatedport target.nat_port
|
48 |
-
policytype customstring55 security_result.rule_type
|
49 |
-
TransportProtocol transportprotocol network.ip_protocol
|
50 |
-
srcip sourceaddress principal.ip
|
51 |
-
dstmac destinationmacaddress target.mac
|
52 |
-
eventtype customstring28 security_result.rule_type
|
53 |
-
level deviceseverity security_result.severity_details
|
54 |
-
filename filename target.file.full_path
|
55 |
-
action deviceaction security_result.action_details
|
56 |
-
ipaddr destinationtranslatedaddress target.nat_ip
|
57 |
-
srcuuid flowsiemid principal.asset.product_object_id
|
58 |
-
policyid customnumber22 additional.fields.policyid
|
59 |
-
srcport sourceport principal.port
|
60 |
-
service applicationprotocol network.application_protocol_version
|
61 |
-
to emailrecipient network.email.to
|
62 |
-
appid customstring34 security_result.rule_id
|
63 |
-
proto customnumber6 network.ip_protocol
|
64 |
-
ref devicecustomstring6 network.http.referral_url
|
65 |
-
method requestmethod network.http.method
|
66 |
-
transport translatedport src.nat_port
|
67 |
-
srcname sourcehostname principal.user.first_name
|
68 |
-
duration transactionstring4 network.session_duration.seconds
|
69 |
-
hostname destinationhostname target.hostname
|
70 |
-
Header2 deviceaddress intermediary.hostname
|
71 |
-
url requesturl target.url
|
72 |
-
domain emailsenderdomain network.dns_domain
|
73 |
-
agent requestclientapplication network.http.user_agent
|
74 |
-
rcvdpkt transactionnumber4 additional.fields.rcvdpkt
|
75 |
-
catdesc deviceeventcategory security_result.category_details
|
76 |
-
sentbyte bytesout network.sent_bytes
|
77 |
-
sentpkt transactionnumber3 additional.fields.sentpkt
|
78 |
-
msg message metadata.description
|
79 |
-
source_mac sourcemacaddress principal.mac
|
80 |
-
subject emailsubject network.email.subject
|
81 |
-
from emailsender network.email.from
|
82 |
-
dstip destinationaddress target.ip
|
83 |
-
logid transactionstring6 metadata.product_log_id
|
84 |
-
rcvdbyte bytesin network.received_bytes
|
85 |
-
type customstring1 metadata.product_event_type
|
86 |
-
user accountname principal.user.user_display_name
|
87 |
-
sessionid sessionid network.session_id
|
88 |
-
dstuser destinationusername target.user.userid
|
89 |
-
|
90 |
-
References:
|
91 |
-
FortiGate Log Documentation, check all links in this URL: https://docs.cyderes.cloud/parser-knowledge-base/fortinet_firewall/
|
92 |
-
Google Chronicle UDM Documentation:https://cloud.google.com/chronicle/docs/reference/udm-field-list
|
93 |
-
|
94 |
-
The official documentation of Chronicle UDM, explaining the different fields and structures available for mapping.
|
95 |
-
|
96 |
-
Think through your response step by step, and include references to the documentation you consulted during the process.
|
97 |
"""
|
98 |
|
|
|
|
|
|
|
|
|
99 |
|
100 |
-
@cl.on_chat_start #
|
101 |
async def start_chat():
|
102 |
settings = {
|
103 |
"model": "gpt-3.5-turbo",
|
@@ -107,31 +46,26 @@ async def start_chat():
|
|
107 |
"frequency_penalty": 0,
|
108 |
"presence_penalty": 0,
|
109 |
}
|
110 |
-
|
111 |
cl.user_session.set("settings", settings)
|
112 |
|
113 |
-
|
114 |
-
@cl.on_message # marks a function that should be run each time the chatbot receives a message from a user
|
115 |
async def main(message: cl.Message):
|
116 |
settings = cl.user_session.get("settings")
|
117 |
-
|
118 |
client = AsyncOpenAI()
|
119 |
|
120 |
print(message.content)
|
121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
prompt = Prompt(
|
123 |
provider=ChatOpenAI.id,
|
124 |
messages=[
|
125 |
-
PromptMessage(
|
126 |
-
|
127 |
-
template=system_template,
|
128 |
-
formatted=system_template,
|
129 |
-
),
|
130 |
-
PromptMessage(
|
131 |
-
role="user",
|
132 |
-
template=user_template,
|
133 |
-
formatted=user_template.format(input=message.content),
|
134 |
-
),
|
135 |
],
|
136 |
inputs={"input": message.content},
|
137 |
settings=settings,
|
@@ -140,8 +74,6 @@ async def main(message: cl.Message):
|
|
140 |
print([m.to_openai() for m in prompt.messages])
|
141 |
|
142 |
msg = cl.Message(content="")
|
143 |
-
|
144 |
-
# Call OpenAI
|
145 |
async for stream_resp in await client.chat.completions.create(
|
146 |
messages=[m.to_openai() for m in prompt.messages], stream=True, **settings
|
147 |
):
|
@@ -150,7 +82,7 @@ async def main(message: cl.Message):
|
|
150 |
token = ""
|
151 |
await msg.stream_token(token)
|
152 |
|
153 |
-
# Update
|
154 |
prompt.completion = msg.content
|
155 |
msg.prompt = prompt
|
156 |
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import chainlit as cl
|
3 |
+
from chainlit.prompt import Prompt, PromptMessage
|
|
|
|
|
|
|
|
|
|
|
4 |
from dotenv import load_dotenv
|
5 |
|
6 |
+
# Load environment variables
|
7 |
load_dotenv()
|
8 |
|
|
|
9 |
# ChatOpenAI Templates
|
10 |
+
system_template = """You are a cybersecurity expert specialized in log analysis and data normalization,
|
11 |
helping security teams to map security log fields to Google Chronicle's Unified Data Model (UDM).
|
12 |
|
13 |
Please follow these steps:
|
14 |
1. Use web search to consult the latest log documentation for the product provided and Google Chronicle UDM schema documentation.
|
15 |
2. Map each product log field to its corresponding UDM field.
|
16 |
3. For fields that don't have a direct match in UDM, place them into custom fields.
|
17 |
+
4. Ensure each mapped field, including custom fields, is unique and accurate.
|
18 |
5. Organize the mapping into a structured table format.
|
|
|
|
|
19 |
You always provide detailed, accurate, and structured responses in a professional tone, focusing on precision.
|
20 |
"""
|
21 |
|
|
|
27 |
1. Use web search to consult the latest documentation for this product/log source and Google Chronicle UDM schema documentation.
|
28 |
2. Map each log field to its corresponding UDM field.
|
29 |
3. For fields that don't have a direct match in UDM, place them into custom fields.
|
30 |
+
4. Ensure each mapped field, including custom fields, is unique and accurate.
|
31 |
5. Organize the mapping into a structured table format.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
"""
|
33 |
|
34 |
+
# Function to read UDM fields from the existing CSV file
|
35 |
+
def read_udm_fields(csv_file_path):
|
36 |
+
udm_fields_df = pd.read_csv(csv_file_path)
|
37 |
+
return udm_fields_df
|
38 |
|
39 |
+
@cl.on_chat_start # Marks function to be executed at the start of a user session
|
40 |
async def start_chat():
|
41 |
settings = {
|
42 |
"model": "gpt-3.5-turbo",
|
|
|
46 |
"frequency_penalty": 0,
|
47 |
"presence_penalty": 0,
|
48 |
}
|
|
|
49 |
cl.user_session.set("settings", settings)
|
50 |
|
51 |
+
@cl.on_message # Marks function to run each time chatbot receives a message from a user
|
|
|
52 |
async def main(message: cl.Message):
|
53 |
settings = cl.user_session.get("settings")
|
|
|
54 |
client = AsyncOpenAI()
|
55 |
|
56 |
print(message.content)
|
57 |
|
58 |
+
# Read UDM fields from the existing CSV file
|
59 |
+
csv_file_path = 'udm_fields.csv' # Ensure this file exists in the environment
|
60 |
+
udm_fields_df = read_udm_fields(csv_file_path)
|
61 |
+
|
62 |
+
# Process and map log fields to UDM fields based on the CSV
|
63 |
+
|
64 |
prompt = Prompt(
|
65 |
provider=ChatOpenAI.id,
|
66 |
messages=[
|
67 |
+
PromptMessage(role="system", template=system_template, formatted=system_template),
|
68 |
+
PromptMessage(role="user", template=user_template, formatted=user_template.format(input=message.content)),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
],
|
70 |
inputs={"input": message.content},
|
71 |
settings=settings,
|
|
|
74 |
print([m.to_openai() for m in prompt.messages])
|
75 |
|
76 |
msg = cl.Message(content="")
|
|
|
|
|
77 |
async for stream_resp in await client.chat.completions.create(
|
78 |
messages=[m.to_openai() for m in prompt.messages], stream=True, **settings
|
79 |
):
|
|
|
82 |
token = ""
|
83 |
await msg.stream_token(token)
|
84 |
|
85 |
+
# Update prompt object with the completion
|
86 |
prompt.completion = msg.content
|
87 |
msg.prompt = prompt
|
88 |
|