File size: 4,742 Bytes
d404a15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python)

# OpenAI Chat completion
import os
from openai import AsyncOpenAI  # importing openai for API usage
import chainlit as cl  # importing chainlit for our app
from chainlit.prompt import Prompt, PromptMessage  # importing prompt tools
from chainlit.playground.providers import ChatOpenAI  # importing ChatOpenAI tools
from dotenv import load_dotenv

load_dotenv()
import pandas as pd
import os
from openai import AsyncOpenAI
import chainlit as cl
from chainlit.prompt import Prompt, PromptMessage
from dotenv import load_dotenv

# Load environment variables
load_dotenv()


# Load UDM fields CSV file
def load_udm_fields(csv_path):
    return pd.read_csv(csv_path)

# Map log fields to UDM fields
def map_log_fields_to_udm(log_fields, udm_fields):
    mapped_fields = []
    for field in log_fields:
        # Try to find a matching UDM field
        udm_match = udm_fields[udm_fields['Field Name'].str.contains(field, case=False)]
        if not udm_match.empty:
            mapped_fields.append({
                'Log_Field': field,
                'UDM_Field': udm_match.iloc[0]['Field Name']
            })
        else:
            # If no direct match, add to custom fields
            mapped_fields.append({
                'Log_Field': field,
                'UDM_Field': 'custom_fields.' + field
            })
    return pd.DataFrame(mapped_fields)

# Chainlit OpenAI Templates for multi-shot learning
system_template = """You are a cybersecurity expert specialized in log analysis and data normalization, 
helping security teams map security log fields to Google Chronicle's Unified Data Model (UDM).

Please follow these steps:
1. Map each product log field to its corresponding UDM field using the reference UDM CSV provided.
2. For fields that don't have a direct match in UDM, place them into custom fields. 
3. Ensure each mapped field, including custom fields, is unique and accurate.
4. Organize the mapping into a structured table format.
"""

# Multi-shot learning examples for Fortinet and Palo Alto
user_template = """Here is a sample log:

{input}

Please follow these steps:
1. Use the provided UDM CSV to map the log fields.
2. For fields that don't have a direct match, assign them to custom fields.
3. Organize the mapping into a structured table.

### Example 1: Fortinet Fields to UDM Mapping

Log Attribute | UDM Attribute
--------------|---------------
devname       | intermediary.hostname
devid         | intermediary.asset.hardware.serial_number
srcip         | principal.ip
dstip         | target.ip
dstport       | target.port

### Example 2: Palo Alto Fields to UDM Mapping



Log Attribute | UDM Attribute
--------------|---------------
src_ip        | principal.ip
dest_ip       | target.ip
dest_port     | target.port
action        | security_result.action_details
severity      | security_result.severity_details

Now proceed to map the given sample log:
"""

@cl.on_chat_start  # Marks a function that will be executed at the start of a user session
async def start_chat():
    settings = {
        "model": "gpt-3.5-turbo",
        "temperature": 0,
        "max_tokens": 500,
        "top_p": 1,
        "frequency_penalty": 0,
        "presence_penalty": 0,
    }
    cl.user_session.set("settings", settings)

@cl.on_message  # Marks a function that should be run each time the chatbot receives a message from a user
async def main(message: cl.Message):
    settings = cl.user_session.get("settings")

    # Load the UDM fields reference CSV
    udm_fields_csv = "udm_field_list_v2.csv"  # Replace with your actual CSV path
    udm_fields = load_udm_fields(udm_fields_csv)

    # Simulate log fields from the user's input (in real use case, you'd parse the input log)
    log_fields = message.content.split()  # Example: Splitting input log into fields for simplicity

    # Perform the mapping
    mapped_fields_df = map_log_fields_to_udm(log_fields, udm_fields)

    # Create a response showing the mapping
    mapped_fields_table = mapped_fields_df.to_string(index=False)

    prompt = Prompt(
        provider=ChatOpenAI.id,
        messages=[
            PromptMessage(role="system", template=system_template, formatted=system_template),
            PromptMessage(role="user", template=user_template, formatted=user_template.format(input=message.content)),
        ],
        inputs={"input": message.content},
        settings=settings,
    )

    msg = cl.Message(content=f"Here is the mapped log fields to UDM:\n\n{mapped_fields_table}")
    await msg.send()

    # Save the mapping to CSV for further analysis
    mapped_fields_df.to_csv('mapped_log_fields.csv', index=False)