AfshinMA commited on
Commit
d235162
·
verified ·
1 Parent(s): 9af7106

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +149 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import joblib
3
+ import pandas as pd
4
+ from typing import Any, Dict, List
5
+ from imblearn.over_sampling import SMOTE
6
+ from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.preprocessing import StandardScaler
9
+ import gradio as gr
10
+
11
+ # Constants for directories and file names
12
+ DIR = 'C:\\Users\\Afshin\\Desktop\\10_Projects\\Project_4_Fraud_Detection_Etherium\\'
13
+ MODEL_DIR = os.path.join(DIR, 'models')
14
+ DATA_DIR = os.path.join(DIR, 'datasets')
15
+ DATA_FILE = 'cleaned_transaction_dataset.csv'
16
+ MODEL_NAMES = [
17
+ 'Ada Boost Classifier',
18
+ 'Extra Trees Classifier',
19
+ 'Gradient Boosting Classifier',
20
+ 'LGBM Classifier',
21
+ 'Random Forest Classifier',
22
+ 'XGBoost Classifier',
23
+ ]
24
+
25
+ # Load dataset
26
+ data_path = os.path.join(DATA_DIR, DATA_FILE)
27
+ df = pd.read_csv(data_path)
28
+
29
+ # Load models
30
+ def load_models(model_names: List[str]) -> Dict[str, Any]:
31
+ """Load machine learning models from disk."""
32
+ models = {}
33
+ for name in model_names:
34
+ path = os.path.join(MODEL_DIR, f"{name.replace(' ', '')}.joblib")
35
+ try:
36
+ models[name] = joblib.load(path)
37
+ except Exception as e:
38
+ print(f"Error loading model {name}: {str(e)}")
39
+ return models
40
+
41
+ models = load_models(MODEL_NAMES)
42
+
43
+ # Prepare features and target
44
+ X = df.drop(columns=['FLAG'])
45
+ y = df['FLAG']
46
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=123)
47
+
48
+ # Standardize the features
49
+ scaler = StandardScaler().fit(X_train)
50
+
51
+ # Prediction and metrics evaluation function
52
+ def calculate_metrics(y_true, y_pred, average_type='binary'):
53
+ """Calculate and return accuracy, recall, F1, and precision scores."""
54
+ acc = accuracy_score(y_true, y_pred)
55
+ rec = recall_score(y_true, y_pred, average=average_type)
56
+ f1 = f1_score(y_true, y_pred, average=average_type)
57
+ prec = precision_score(y_true, y_pred, average=average_type)
58
+ return acc, rec, f1, prec
59
+
60
+ def load_and_predict(input_data):
61
+ try:
62
+ # Scale the input sample using the already-fitted scaler
63
+ sample_trans = scaler.transform(input_data)
64
+
65
+ # Using SMOTE to handle class imbalance
66
+ X_resampled, y_resampled = SMOTE(random_state=123).fit_resample(scaler.transform(X_train), y_train)
67
+
68
+ results = []
69
+
70
+ for name, model in models.items():
71
+ flag_pred = model.predict(sample_trans)
72
+ y_resampled_pred = model.predict(X_resampled)
73
+ acc, rec, f1, prec = calculate_metrics(y_resampled, y_resampled_pred)
74
+
75
+ results.append({
76
+ 'Model': name,
77
+ 'Predicted Fraud': 'Yes' if flag_pred[0] == 1 else 'No',
78
+ 'Accuracy %': acc * 100,
79
+ 'Recall %': rec * 100,
80
+ 'F1 %': f1 * 100,
81
+ 'Precision %': prec * 100
82
+ })
83
+
84
+ return pd.DataFrame(results).sort_values(by='Accuracy %', ascending=False)
85
+
86
+ except Exception as e:
87
+ return f"An error occurred during prediction: {str(e)}"
88
+
89
+ # Gradio interface
90
+ def predict(avg_min_sent, avg_min_received, time_diff, sent_tnx, received_tnx, num_created_contracts,
91
+ max_value_received, avg_value_received, avg_value_sent, total_sent,
92
+ total_balance, erc20_received, erc20_sent, erc20_sent_contract,
93
+ erc20_unique_sent, erc20_unique_received):
94
+
95
+ input_features = [
96
+ avg_min_sent,
97
+ avg_min_received,
98
+ time_diff,
99
+ sent_tnx,
100
+ received_tnx,
101
+ num_created_contracts,
102
+ max_value_received,
103
+ avg_value_received,
104
+ avg_value_sent,
105
+ total_sent,
106
+ total_balance,
107
+ erc20_received,
108
+ erc20_sent,
109
+ erc20_sent_contract,
110
+ erc20_unique_sent,
111
+ erc20_unique_received
112
+ ]
113
+
114
+ input_data = pd.DataFrame([input_features])
115
+ results_df = load_and_predict(input_data)
116
+
117
+ return results_df
118
+
119
+ # Gradio inputs based on the features you have
120
+ inputs = [
121
+ gr.Number(label="Avg min between sent tnx", value=df["Avg min between sent tnx"].mean()),
122
+ gr.Number(label="Avg min between received tnx", value=df["Avg min between received tnx"].mean()),
123
+ gr.Number(label="Time difference between first and last (mins)", value=df["Time difference between first and last (mins)"].mean()),
124
+ gr.Number(label="Sent tnx", value=df["Sent tnx"].mean()),
125
+ gr.Number(label="Received tnx", value=df["Received tnx"].mean()),
126
+ gr.Number(label="Number of created contracts", value=int(df["Number of created contracts"].mean())),
127
+ gr.Number(label="Max value received", value=df["Max value received"].mean()),
128
+ gr.Number(label="Avg value received", value=df["Avg value received"].mean()),
129
+ gr.Number(label="Avg value sent", value=df["Avg value sent"].mean()),
130
+ gr.Number(label="Total either sent", value=df["Total either sent"].mean()),
131
+ gr.Number(label="Total either balance", value=df["Total either balance"].mean()),
132
+ gr.Number(label="ERC20 total either received", value=df["ERC20 total either received"].mean()),
133
+ gr.Number(label="ERC20 total either sent", value=df["ERC20 total either sent"].mean()),
134
+ gr.Number(label="ERC20 total either sent contract", value=df["ERC20 total either sent contract"].mean()),
135
+ gr.Number(label="ERC20 unique sent address", value=df["ERC20 unique sent address"].mean()),
136
+ gr.Number(label="ERC20 unique received token name", value=df["ERC20 unique received token name"].mean()),
137
+ ]
138
+
139
+ output = gr.Dataframe(label="Prediction Results")
140
+
141
+ # Create the Gradio interface
142
+ gr.Interface(
143
+ fn=predict,
144
+ inputs=inputs,
145
+ outputs=output,
146
+ title="Fraud Detection Etherium Prediction App",
147
+ description="This application predicts fraud in Ethereum transactions using multiple machine learning models.",
148
+ theme="compact"
149
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ numpy
3
+ joblib
4
+ scikit-learn
5
+ imbalanced-learn
6
+ lightgbm
7
+ xgboost
8
+ catboost