Galatea007 commited on
Commit
22e26a4
·
verified ·
1 Parent(s): 0bf70b9

Update app_transformation.py

Browse files
Files changed (1) hide show
  1. app_transformation.py +22 -90
app_transformation.py CHANGED
@@ -1,28 +1,21 @@
1
- # You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python)
2
-
3
- # OpenAI Chat completion
4
- import os
5
- from openai import AsyncOpenAI # importing openai for API usage
6
- import chainlit as cl # importing chainlit for our app
7
- from chainlit.prompt import Prompt, PromptMessage # importing prompt tools
8
- from chainlit.playground.providers import ChatOpenAI # importing ChatOpenAI tools
9
  from dotenv import load_dotenv
10
 
 
11
  load_dotenv()
12
 
13
-
14
  # ChatOpenAI Templates
15
- system_template = """You are a cybersecurity expert specialized in log analysis and data normalization,/n
16
  helping security teams to map security log fields to Google Chronicle's Unified Data Model (UDM).
17
 
18
  Please follow these steps:
19
  1. Use web search to consult the latest log documentation for the product provided and Google Chronicle UDM schema documentation.
20
  2. Map each product log field to its corresponding UDM field.
21
  3. For fields that don't have a direct match in UDM, place them into custom fields.
22
- 4. make sure the each mapped field including the custome fields are unique and accurate
23
  5. Organize the mapping into a structured table format.
24
- Remember to explain your reasoning for each field's mapping based on your findings from the product documentation.
25
- Think through your response step by step, and include references to the documentation you consulted during the process.
26
  You always provide detailed, accurate, and structured responses in a professional tone, focusing on precision.
27
  """
28
 
@@ -34,70 +27,16 @@ Please follow these steps:
34
  1. Use web search to consult the latest documentation for this product/log source and Google Chronicle UDM schema documentation.
35
  2. Map each log field to its corresponding UDM field.
36
  3. For fields that don't have a direct match in UDM, place them into custom fields.
37
- 4. make sure the each mapped field including the custome fields are unique and accurate
38
  5. Organize the mapping into a structured table format.
39
-
40
- Here is an example of how to map FortiGate fields to UDM fields..
41
- Consider this example and related search into relevant URLs to identify best and most accurate mapping for other log sources against UB
42
- Log_Attribute Simpliefied_Naming_for_Attribute UBM_Attribute
43
- devname devicehostname intermediary.hostname
44
- devid deviceexternalid intermediary.asset.hardware.serial_number
45
- ap customstring62 target.application
46
- dstport destinationport target.port
47
- tranport destinationtranslatedport target.nat_port
48
- policytype customstring55 security_result.rule_type
49
- TransportProtocol transportprotocol network.ip_protocol
50
- srcip sourceaddress principal.ip
51
- dstmac destinationmacaddress target.mac
52
- eventtype customstring28 security_result.rule_type
53
- level deviceseverity security_result.severity_details
54
- filename filename target.file.full_path
55
- action deviceaction security_result.action_details
56
- ipaddr destinationtranslatedaddress target.nat_ip
57
- srcuuid flowsiemid principal.asset.product_object_id
58
- policyid customnumber22 additional.fields.policyid
59
- srcport sourceport principal.port
60
- service applicationprotocol network.application_protocol_version
61
- to emailrecipient network.email.to
62
- appid customstring34 security_result.rule_id
63
- proto customnumber6 network.ip_protocol
64
- ref devicecustomstring6 network.http.referral_url
65
- method requestmethod network.http.method
66
- transport translatedport src.nat_port
67
- srcname sourcehostname principal.user.first_name
68
- duration transactionstring4 network.session_duration.seconds
69
- hostname destinationhostname target.hostname
70
- Header2 deviceaddress intermediary.hostname
71
- url requesturl target.url
72
- domain emailsenderdomain network.dns_domain
73
- agent requestclientapplication network.http.user_agent
74
- rcvdpkt transactionnumber4 additional.fields.rcvdpkt
75
- catdesc deviceeventcategory security_result.category_details
76
- sentbyte bytesout network.sent_bytes
77
- sentpkt transactionnumber3 additional.fields.sentpkt
78
- msg message metadata.description
79
- source_mac sourcemacaddress principal.mac
80
- subject emailsubject network.email.subject
81
- from emailsender network.email.from
82
- dstip destinationaddress target.ip
83
- logid transactionstring6 metadata.product_log_id
84
- rcvdbyte bytesin network.received_bytes
85
- type customstring1 metadata.product_event_type
86
- user accountname principal.user.user_display_name
87
- sessionid sessionid network.session_id
88
- dstuser destinationusername target.user.userid
89
-
90
- References:
91
- FortiGate Log Documentation, check all links in this URL: https://docs.cyderes.cloud/parser-knowledge-base/fortinet_firewall/
92
- Google Chronicle UDM Documentation:https://cloud.google.com/chronicle/docs/reference/udm-field-list
93
-
94
- The official documentation of Chronicle UDM, explaining the different fields and structures available for mapping.
95
-
96
- Think through your response step by step, and include references to the documentation you consulted during the process.
97
  """
98
 
 
 
 
 
99
 
100
- @cl.on_chat_start # marks a function that will be executed at the start of a user session
101
  async def start_chat():
102
  settings = {
103
  "model": "gpt-3.5-turbo",
@@ -107,31 +46,26 @@ async def start_chat():
107
  "frequency_penalty": 0,
108
  "presence_penalty": 0,
109
  }
110
-
111
  cl.user_session.set("settings", settings)
112
 
113
-
114
- @cl.on_message # marks a function that should be run each time the chatbot receives a message from a user
115
  async def main(message: cl.Message):
116
  settings = cl.user_session.get("settings")
117
-
118
  client = AsyncOpenAI()
119
 
120
  print(message.content)
121
 
 
 
 
 
 
 
122
  prompt = Prompt(
123
  provider=ChatOpenAI.id,
124
  messages=[
125
- PromptMessage(
126
- role="system",
127
- template=system_template,
128
- formatted=system_template,
129
- ),
130
- PromptMessage(
131
- role="user",
132
- template=user_template,
133
- formatted=user_template.format(input=message.content),
134
- ),
135
  ],
136
  inputs={"input": message.content},
137
  settings=settings,
@@ -140,8 +74,6 @@ async def main(message: cl.Message):
140
  print([m.to_openai() for m in prompt.messages])
141
 
142
  msg = cl.Message(content="")
143
-
144
- # Call OpenAI
145
  async for stream_resp in await client.chat.completions.create(
146
  messages=[m.to_openai() for m in prompt.messages], stream=True, **settings
147
  ):
@@ -150,7 +82,7 @@ async def main(message: cl.Message):
150
  token = ""
151
  await msg.stream_token(token)
152
 
153
- # Update the prompt object with the completion
154
  prompt.completion = msg.content
155
  msg.prompt = prompt
156
 
 
1
+ import pandas as pd
2
+ import chainlit as cl
3
+ from chainlit.prompt import Prompt, PromptMessage
 
 
 
 
 
4
  from dotenv import load_dotenv
5
 
6
+ # Load environment variables
7
  load_dotenv()
8
 
 
9
  # ChatOpenAI Templates
10
+ system_template = """You are a cybersecurity expert specialized in log analysis and data normalization,
11
  helping security teams to map security log fields to Google Chronicle's Unified Data Model (UDM).
12
 
13
  Please follow these steps:
14
  1. Use web search to consult the latest log documentation for the product provided and Google Chronicle UDM schema documentation.
15
  2. Map each product log field to its corresponding UDM field.
16
  3. For fields that don't have a direct match in UDM, place them into custom fields.
17
+ 4. Ensure each mapped field, including custom fields, is unique and accurate.
18
  5. Organize the mapping into a structured table format.
 
 
19
  You always provide detailed, accurate, and structured responses in a professional tone, focusing on precision.
20
  """
21
 
 
27
  1. Use web search to consult the latest documentation for this product/log source and Google Chronicle UDM schema documentation.
28
  2. Map each log field to its corresponding UDM field.
29
  3. For fields that don't have a direct match in UDM, place them into custom fields.
30
+ 4. Ensure each mapped field, including custom fields, is unique and accurate.
31
  5. Organize the mapping into a structured table format.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  """
33
 
34
+ # Function to read UDM fields from the existing CSV file
35
+ def read_udm_fields(csv_file_path):
36
+ udm_fields_df = pd.read_csv(csv_file_path)
37
+ return udm_fields_df
38
 
39
+ @cl.on_chat_start # Marks function to be executed at the start of a user session
40
  async def start_chat():
41
  settings = {
42
  "model": "gpt-3.5-turbo",
 
46
  "frequency_penalty": 0,
47
  "presence_penalty": 0,
48
  }
 
49
  cl.user_session.set("settings", settings)
50
 
51
+ @cl.on_message # Marks function to run each time chatbot receives a message from a user
 
52
  async def main(message: cl.Message):
53
  settings = cl.user_session.get("settings")
 
54
  client = AsyncOpenAI()
55
 
56
  print(message.content)
57
 
58
+ # Read UDM fields from the existing CSV file
59
+ csv_file_path = 'udm_fields.csv' # Ensure this file exists in the environment
60
+ udm_fields_df = read_udm_fields(csv_file_path)
61
+
62
+ # Process and map log fields to UDM fields based on the CSV
63
+
64
  prompt = Prompt(
65
  provider=ChatOpenAI.id,
66
  messages=[
67
+ PromptMessage(role="system", template=system_template, formatted=system_template),
68
+ PromptMessage(role="user", template=user_template, formatted=user_template.format(input=message.content)),
 
 
 
 
 
 
 
 
69
  ],
70
  inputs={"input": message.content},
71
  settings=settings,
 
74
  print([m.to_openai() for m in prompt.messages])
75
 
76
  msg = cl.Message(content="")
 
 
77
  async for stream_resp in await client.chat.completions.create(
78
  messages=[m.to_openai() for m in prompt.messages], stream=True, **settings
79
  ):
 
82
  token = ""
83
  await msg.stream_token(token)
84
 
85
+ # Update prompt object with the completion
86
  prompt.completion = msg.content
87
  msg.prompt = prompt
88