namanviber commited on
Commit
d8cfc37
·
verified ·
1 Parent(s): e18021c

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +180 -0
  2. requirements.txt +4 -0
  3. rf.pkl +3 -0
app.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import requests
4
+ import pickle
5
+
6
+ Operating_Airline= ["American Airlines", "Delta Air Lines", "American Eagle Airlines", "United Airlines", "Southwest Airlines"]
7
+ Origin = ["Hartsfield-Jackson Atlanta International Airport", "Charlotte Douglas International Airport", "Denver International Airport", "Dallas/Fort Worth International Airport", "George Bush Intercontinental Airport", "Los Angeles International Airport", "Chicago O'Hare International Airport", "Phoenix Sky Harbor International Airport", "San Francisco International Airport"]
8
+ Dest = ["Hartsfield-Jackson Atlanta International Airport", "Charlotte Douglas International Airport", "Denver International Airport", "Dallas/Fort Worth International Airport", "George Bush Intercontinental Airport", "Los Angeles International Airport", "Chicago O'Hare International Airport", "Phoenix Sky Harbor International Airport", "San Francisco International Airport"]
9
+
10
+ airports = {
11
+ "Hartsfield-Jackson Atlanta International Airport": "ATL",
12
+ "Charlotte Douglas International Airport": "CLT",
13
+ "Denver International Airport": "DEN",
14
+ "Dallas/Fort Worth International Airport": "DFW",
15
+ "George Bush Intercontinental Airport": "IAH",
16
+ "Los Angeles International Airport": "LAX",
17
+ "Chicago O'Hare International Airport": "ORD",
18
+ "Phoenix Sky Harbor International Airport": "PHX",
19
+ "San Francisco International Airport": "SFO"
20
+ }
21
+
22
+ airlines = {
23
+ "American Airlines": "AA",
24
+ "Delta Air Lines": "DL",
25
+ "American Eagle Airlines": "OO",
26
+ "United Airlines": "UA",
27
+ "Southwest Airlines": "WN"
28
+ }
29
+
30
+
31
+ data_pivot = {
32
+ 'origin': ['ATL', 'ATL', 'ATL', 'ATL', 'ATL', 'ATL', 'ATL', 'CLT', 'CLT', 'CLT', 'CLT', 'CLT', 'CLT', 'CLT', 'DEN', 'DEN', 'DEN', 'DEN', 'DEN', 'DEN', 'DEN', 'DFW', 'DFW', 'DFW', 'DFW', 'DFW', 'DFW', 'IAH', 'IAH', 'IAH', 'IAH', 'IAH', 'IAH', 'LAX', 'LAX', 'LAX', 'LAX', 'LAX', 'LAX', 'LAX', 'ORD', 'ORD', 'ORD', 'ORD', 'ORD', 'ORD', 'ORD', 'PHX', 'PHX', 'PHX', 'PHX', 'PHX', 'PHX', 'PHX', 'SFO', 'SFO', 'SFO', 'SFO', 'SFO', 'SFO', 'SFO'],
33
+ 'dest': ['CLT', 'DEN', 'DFW', 'IAH', 'LAX', 'ORD', 'PHX', 'ATL', 'DEN', 'DFW', 'IAH', 'LAX', 'ORD', 'PHX', 'ATL', 'CLT', 'DFW', 'IAH', 'LAX', 'ORD', 'PHX', 'ATL', 'CLT', 'DEN', 'IAH', 'LAX', 'ORD', 'ATL', 'CLT', 'DEN', 'DFW', 'LAX', 'ORD', 'ATL', 'CLT', 'DEN', 'DFW', 'IAH', 'ORD', 'PHX', 'ATL', 'CLT', 'DEN', 'DFW', 'IAH', 'LAX', 'PHX', 'ATL', 'CLT', 'DEN', 'DFW', 'IAH', 'LAX', 'SFO', 'ATL', 'CLT', 'DEN', 'DFW', 'IAH', 'LAX', 'ORD'],
34
+ 'distance': [226.0, 1199.0, 731.0, 689.0, 1947.0, 606.0, 1587.0, 226.0, 1337.0, 936.0, 912.0, 2125.0, 599.0, 1773.0, 1199.0, 1337.0, 641.0, 862.0, 862.0, 888.0, 602.0, 731.0, 936.0, 641.0, 224.0, 1235.0, 801.0, 689.0, 912.0, 862.0, 224.0, 1379.0, 925.0, 1947.0, 2125.0, 862.0, 1235.0, 1379.0, 1744.0, 370.0, 606.0, 599.0, 888.0, 802.0, 925.0, 1744.0, 1440.0, 1587.0, 1773.0, 602.0, 868.0, 1009.0, 370.0, 651.0, 2139.0, 2296.0, 967.0, 1464.0, 1635.0, 337.0, 1846.0]
35
+ }
36
+
37
+ df_pivot = pd.DataFrame(data_pivot)
38
+ pivot_table = pd.pivot_table(df_pivot, values='distance', index=['origin'], columns=['dest'], fill_value=0)
39
+
40
+ filename = "rf.pkl"
41
+
42
+ with open(filename, "rb") as pickle_file:
43
+ model = pickle.load(pickle_file)
44
+
45
+ airport_codes = {
46
+ 'LAX': 'USW00023174',
47
+ 'IAH': 'USW00012960',
48
+ 'DEN': 'USW00003017',
49
+ 'ORD': 'USW00094846',
50
+ 'ATL': 'USW00013874',
51
+ 'SFO': 'USW00023234',
52
+ 'DFW': 'USW00003927',
53
+ 'PHX': 'USW00023183',
54
+ 'CLT': 'USW00013881'
55
+ }
56
+
57
+ def processResponse(a):
58
+ data = a.text.replace('"', ' ').splitlines()
59
+ data = [line.strip() for line in data]
60
+
61
+ header = data[0].split(',')
62
+ header = [line.strip() for line in header]
63
+
64
+ rows = [row.split(',') for row in data[1:] if row]
65
+ rows[0] = [line.strip() for line in rows[0]]
66
+ rows[1] = [line.strip() for line in rows[1]]
67
+
68
+ df = pd.DataFrame(rows, columns=header)
69
+
70
+ columns_to_convert = ['AWND', 'PRCP', 'SNOW', 'TAVG']
71
+ df[columns_to_convert] = df[columns_to_convert].apply(pd.to_numeric, errors='coerce')
72
+ df.fillna(0,inplace=True)
73
+
74
+
75
+ return df
76
+
77
+ def weather_info(origin,dest,date):
78
+
79
+ url = 'https://www.ncei.noaa.gov/access/services/data/v1'
80
+
81
+ params = {
82
+ 'dataset': 'daily-summaries',
83
+ 'stations': f'{origin}, {dest}',
84
+ 'dataTypes': 'AWND,PRCP,SNOW,TAVG',
85
+ 'startDate': f'{date}',
86
+ 'endDate': f'{date}'
87
+ }
88
+
89
+
90
+ response = requests.get(url, params=params)
91
+
92
+ if response.status_code == 200:
93
+
94
+ df = processResponse(response)
95
+ awnd_o, prcp_o, tavg_o, awnd_d, prcp_d, tavg_d,snow_o, snow_d = df['AWND'][0], df['PRCP'][0], df['TAVG'][0], df['AWND'][1], df['PRCP'][1], df['TAVG'][1], df['SNOW'][0], df['SNOW'][1]
96
+ return awnd_o, prcp_o, tavg_o, awnd_d, prcp_d, tavg_d,snow_o, snow_d
97
+
98
+ return 0,0,0,0,0,0,0,0
99
+
100
+
101
+ def preprocess_input(date, operating_airline, origin, dest, dep_time, distance):
102
+ quarter = (date.month - 1) // 3 + 1
103
+ month = date.month
104
+ day_of_month = date.day
105
+ day_of_week = date.weekday() + 1
106
+
107
+ processed_time = dep_time.hour * 100 + dep_time.minute
108
+ dep_hour_of_day = int(processed_time) // 100
109
+
110
+ awnd_o, prcp_o, tavg_o, awnd_d, prcp_d, tavg_d,snow_o, snow_d = weather_info(airport_codes[origin],airport_codes[dest],date)
111
+
112
+ format = {
113
+ "Distance": False, "DepHourofDay": False, "AWND_O": False, "PRCP_O": False, "TAVG_O": False, "AWND_D": False,
114
+ "PRCP_D": False, "TAVG_D": False, "SNOW_O": False, "SNOW_D": False, "Quarter_1": False, "Quarter_2": False,
115
+ "Quarter_3": False, "Quarter_4": False, "Month_1": False, "Month_2": False, "Month_3": False, "Month_4": False,
116
+ "Month_5": False, "Month_6": False, "Month_7": False, "Month_8": False, "Month_9": False, "Month_10": False,
117
+ "Month_11": False, "Month_12": False, "DayofMonth_1": False, "DayofMonth_2": False, "DayofMonth_3": False,
118
+ "DayofMonth_4": False, "DayofMonth_5": False, "DayofMonth_6": False, "DayofMonth_7": False, "DayofMonth_8": False,
119
+ "DayofMonth_9": False, "DayofMonth_10": False, "DayofMonth_11": False, "DayofMonth_12": False, "DayofMonth_13": False,
120
+ "DayofMonth_14": False, "DayofMonth_15": False, "DayofMonth_16": False, "DayofMonth_17": False, "DayofMonth_18": False,
121
+ "DayofMonth_19": False, "DayofMonth_20": False, "DayofMonth_21": False, "DayofMonth_22": False, "DayofMonth_23": False,
122
+ "DayofMonth_24": False, "DayofMonth_25": False, "DayofMonth_26": False, "DayofMonth_27": False, "DayofMonth_28": False,
123
+ "DayofMonth_29": False, "DayofMonth_30": False, "DayofMonth_31": False, "DayOfWeek_1": False, "DayOfWeek_2": False,
124
+ "DayOfWeek_3": False, "DayOfWeek_4": False, "DayOfWeek_5": False, "DayOfWeek_6": False, "DayOfWeek_7": False,
125
+ "Operating_Airline _AA": False, "Operating_Airline _DL": False, "Operating_Airline _OO": False, "Operating_Airline _UA": False,
126
+ "Operating_Airline _WN": False, "Origin_ATL": False, "Origin_CLT": False, "Origin_DEN": False, "Origin_DFW": False,
127
+ "Origin_IAH": False, "Origin_LAX": False, "Origin_ORD": False, "Origin_PHX": False, "Origin_SFO": False,
128
+ "Dest_ATL": False, "Dest_CLT": False, "Dest_DEN": False, "Dest_DFW": False, "Dest_IAH": False, "Dest_LAX": False,
129
+ "Dest_ORD": False, "Dest_PHX": False, "Dest_SFO": False}
130
+
131
+ format["Distance"] = distance
132
+ format["DepHourofDay"] = dep_hour_of_day
133
+ format["AWND_O"] = awnd_o
134
+ format["PRCP_O"] = prcp_o
135
+ format["TAVG_O"] = tavg_o
136
+ format["AWND_D"] = awnd_d
137
+ format["PRCP_D"] = prcp_d
138
+ format["TAVG_D"] = tavg_d
139
+ format["SNOW_O"] = snow_o
140
+ format["SNOW_D"] = snow_d
141
+ format[f"Quarter_{quarter}"] = True
142
+ format[f"Month_{month}"] = True
143
+ format[f"DayofMonth_{day_of_month}"] = True
144
+ format[f"DayOfWeek_{day_of_week}"] = True
145
+ format[f"Operating_Airline _{operating_airline}"] = True
146
+ format[f"Origin_{origin}"] = True
147
+ format[f"Dest_{dest}"] = True
148
+
149
+ return pd.DataFrame(format, index=[0])
150
+
151
+
152
+ def predict(data):
153
+ pred = model.predict(data.iloc[:, :])
154
+
155
+ return pred[0]
156
+
157
+ # Streamlit Code
158
+
159
+ st.title("Flight Delay Prediction")
160
+
161
+ input1 = st.selectbox("Please Select Your Airline", Operating_Airline)
162
+ input2 = st.selectbox("Please Select your Origin Airport", Origin)
163
+ input3 = st.selectbox("Please Select your Destination Airport", Dest)
164
+ date = st.date_input("Please Pick Date of your Journey")
165
+ time = st.time_input("Please Select Scheduled Departure Time")
166
+
167
+ input1 = airlines[f"{input1}"]
168
+ input2 = airports[f"{input2}"]
169
+ input3 = airports[f"{input3}"]
170
+
171
+ if st.button("Predict"):
172
+
173
+ df = preprocess_input(date,input1,input2,input3,time, pivot_table[input2][input3])
174
+
175
+ prediction = predict(df)
176
+
177
+ if prediction == 1:
178
+ st.error("Your Flight is Most Likely to be delayed more than 15 minutes")
179
+ else:
180
+ st.success("Your flight is likely to be on time")
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ requests
4
+ pickle
rf.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69cac329e7b812cef2a55184153102bade00f2c5392706e0a3f43a5c789f4e03
3
+ size 1825297252