Upload 7 files
Browse files- Final.xlsx +0 -0
- app.py +422 -0
- lottie_2.json +1 -0
- metadata.json +267 -0
- tbl_dtl.csv +5 -0
- template.txt +254 -0
- ydata_config.yml +219 -0
Final.xlsx
ADDED
Binary file (9.06 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,422 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
import networkx as nx
|
4 |
+
import numpy as np
|
5 |
+
import streamlit as st
|
6 |
+
import sdv
|
7 |
+
from sdv.datasets.local import load_csvs
|
8 |
+
from sdv.metadata import MultiTableMetadata
|
9 |
+
from sdv.multi_table import HMASynthesizer
|
10 |
+
import time
|
11 |
+
import os
|
12 |
+
import gc
|
13 |
+
import warnings
|
14 |
+
from PIL import Image
|
15 |
+
from sdv.metadata import SingleTableMetadata
|
16 |
+
import pyodbc
|
17 |
+
import google.generativeai as genai
|
18 |
+
from google.generativeai.types import HarmCategory, HarmBlockThreshold
|
19 |
+
import textwrap
|
20 |
+
from streamlit_extras.stylable_container import stylable_container
|
21 |
+
from streamlit_extras.stateful_button import button
|
22 |
+
import json
|
23 |
+
from io import BytesIO
|
24 |
+
genai.configure(api_key='AIzaSyCeY8jSHKW6t0OSDRjc2VAfBvMunVrff2w')
|
25 |
+
genai_mod = genai.GenerativeModel(
|
26 |
+
model_name='models/gemini-pro'
|
27 |
+
)
|
28 |
+
|
29 |
+
st.set_page_config(page_title='DATA DISCOVERY', layout= 'wide')
|
30 |
+
st.markdown("""
|
31 |
+
<style>
|
32 |
+
|
33 |
+
/* Remove blank space at top and bottom */
|
34 |
+
.block-container {
|
35 |
+
padding-top: 2rem;
|
36 |
+
}
|
37 |
+
|
38 |
+
/* Remove blank space at the center canvas */
|
39 |
+
.st-emotion-cache-z5fcl4 {
|
40 |
+
position: relative;
|
41 |
+
top: -62px;
|
42 |
+
}
|
43 |
+
|
44 |
+
/* Make the toolbar transparent and the content below it clickable */
|
45 |
+
.st-emotion-cache-18ni7ap {
|
46 |
+
pointer-events: none;
|
47 |
+
background: rgb(255 255 255 / 0%)
|
48 |
+
}
|
49 |
+
.st-emotion-cache-zq5wmm {
|
50 |
+
pointer-events: auto;
|
51 |
+
background: rgb(255 255 255);
|
52 |
+
border-radius: 5px;
|
53 |
+
}
|
54 |
+
</style>
|
55 |
+
""", unsafe_allow_html=True)
|
56 |
+
def clear_cache():
|
57 |
+
if 'rdf' in st.session_state:
|
58 |
+
st.session_state.pop('rdf')
|
59 |
+
|
60 |
+
def create_er_diagram(df):
|
61 |
+
G = nx.DiGraph() # Directed graph
|
62 |
+
|
63 |
+
# Dictionary to hold table columns
|
64 |
+
table_columns = {}
|
65 |
+
|
66 |
+
# Add nodes and edges to the graph
|
67 |
+
for _, row in df.iterrows():
|
68 |
+
parent_table = row['PARENT TABLE']
|
69 |
+
child_table = row['CHILD TABLE']
|
70 |
+
parent_pk = row['PARENT TABLE RELATIONSHIP COLUMN']
|
71 |
+
child_fk = row['CHILD TABLE RELATIONSHIP COLUMN']
|
72 |
+
cardinality = row.get('CARDINALITY', '1:N')
|
73 |
+
|
74 |
+
# Add columns to tables
|
75 |
+
if parent_table not in table_columns:
|
76 |
+
table_columns[parent_table] = []
|
77 |
+
table_columns[parent_table].append(parent_pk)
|
78 |
+
|
79 |
+
if child_table not in table_columns:
|
80 |
+
table_columns[child_table] = []
|
81 |
+
table_columns[child_table].append(child_fk)
|
82 |
+
|
83 |
+
# Add nodes and edges
|
84 |
+
G.add_node(parent_table)
|
85 |
+
G.add_node(child_table)
|
86 |
+
G.add_edge(parent_table, child_table, label=f'{parent_pk} -> {child_fk}\n{cardinality}')
|
87 |
+
|
88 |
+
return G, table_columns
|
89 |
+
|
90 |
+
def draw_er_diagram(G, table_columns):
|
91 |
+
pos = nx.spring_layout(G, k=1.5, iterations=50) # Use a layout that spreads out nodes
|
92 |
+
|
93 |
+
plt.figure(figsize=(8, 8))
|
94 |
+
nx.draw(G, pos, with_labels=False, node_size=2500, node_color='lightblue', edge_color='gray', font_size=8, font_weight='bold', arrows=True)
|
95 |
+
|
96 |
+
# Draw node labels (table names in bold)
|
97 |
+
for node, (x, y) in pos.items():
|
98 |
+
plt.text(x, y + 0.13, node, fontsize=7, fontweight='bold', ha='center', va='center')
|
99 |
+
|
100 |
+
# Draw column names
|
101 |
+
for node, columns in table_columns.items():
|
102 |
+
x, y = pos[node]
|
103 |
+
column_text = '\n'.join(columns)
|
104 |
+
plt.text(x, y, column_text, fontsize=6, ha='center', va='center')
|
105 |
+
|
106 |
+
# Draw edge labels
|
107 |
+
edge_labels = nx.get_edge_attributes(G, 'label')
|
108 |
+
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=6)
|
109 |
+
st.subheader("Schematic Representation")
|
110 |
+
with st.container(border=True, height= 350):
|
111 |
+
st.pyplot(plt)
|
112 |
+
img_bytes = BytesIO()
|
113 |
+
plt.savefig(img_bytes, format='png')
|
114 |
+
img_bytes.seek(0)
|
115 |
+
return img_bytes
|
116 |
+
|
117 |
+
def cardinality(parent_df, child_df, parent_column, child_column):
|
118 |
+
# Check uniqueness of parent primary key
|
119 |
+
is_parent_unique = parent_df[parent_column].is_unique
|
120 |
+
|
121 |
+
# Check uniqueness of child foreign key
|
122 |
+
is_child_unique = child_df[child_column].is_unique
|
123 |
+
|
124 |
+
# Determine cardinality
|
125 |
+
if is_parent_unique and is_child_unique:
|
126 |
+
return '1:1'
|
127 |
+
elif is_parent_unique and not is_child_unique:
|
128 |
+
return '1:N'
|
129 |
+
elif not is_parent_unique and is_child_unique:
|
130 |
+
return 'N:1'
|
131 |
+
else:
|
132 |
+
return 'N:N'
|
133 |
+
|
134 |
+
#st.title('AUTOMATED DATA CATALOGUE')
|
135 |
+
st.subheader('SELECT SOURCE')
|
136 |
+
selectcol11, selectcol12 = st.columns(2)
|
137 |
+
with selectcol11:
|
138 |
+
select1=st.selectbox('SOURCE DB NAME',('DB_10001','Marcopolo_db'),key='dbname',index=None,placeholder='Select database name', on_change=clear_cache)
|
139 |
+
with selectcol12:
|
140 |
+
select2=st.selectbox('SOURCE SCHEMA NAME',('DBO','CLIENT'),key='SCHname',index=None,placeholder='Select schema name', on_change=clear_cache)
|
141 |
+
if select1 =='DB_10001' and select2 is not None:
|
142 |
+
with st.spinner("Loading Tables:"):
|
143 |
+
conn1 = pyodbc.connect("Driver={ODBC Driver 17 for SQL Server};"
|
144 |
+
"Server=sql-ext-dev-uks-001.database.windows.net;"
|
145 |
+
"Database=sqldb-ext-dev-uks-001;"
|
146 |
+
"UID=dbadmin;"
|
147 |
+
"PWD=mYpa$$w0rD" )
|
148 |
+
|
149 |
+
query0_1=f"select * from INFORMATION_SCHEMA.TABLES where TABLE_SCHEMA='{select2}' ORDER BY TABLE_NAME ASC"
|
150 |
+
st.session_state.tab_names_init=list(pd.read_sql_query(query0_1,con=conn1)['TABLE_NAME'])
|
151 |
+
|
152 |
+
table_selector=st.multiselect('SOURCE TABLE NAME',st.session_state.tab_names_init,default=None,placeholder='Select table(s) for automated data cataloging', on_change= clear_cache)
|
153 |
+
sample_selector=st.selectbox('SELECT SAMPLE SIZE',['100','10K','100K','1M','Full Table'],index=None,placeholder='Select sample size for the table(s)', on_change= clear_cache)
|
154 |
+
|
155 |
+
discover= button("Discover", key='discover')
|
156 |
+
|
157 |
+
if discover:
|
158 |
+
if sample_selector=='100':
|
159 |
+
count="top 100"
|
160 |
+
elif sample_selector=='10K':
|
161 |
+
count="top 10000"
|
162 |
+
elif sample_selector=='100K':
|
163 |
+
count="top 100000"
|
164 |
+
elif sample_selector=='1M':
|
165 |
+
count="top 1000000"
|
166 |
+
else:
|
167 |
+
count=""
|
168 |
+
|
169 |
+
query1_1=f"select * from INFORMATION_SCHEMA.TABLES where TABLE_SCHEMA='{select2}' and TABLE_NAME in ("+(', '.join(f"'{table}'" for table in table_selector))+") ORDER BY TABLE_NAME ASC"
|
170 |
+
st.session_state.tab_names=list(pd.read_sql_query(query1_1,con=conn1)['TABLE_NAME'])
|
171 |
+
st.session_state.dataframes = {}
|
172 |
+
st.session_state.col_names = []
|
173 |
+
for tab in st.session_state.tab_names:
|
174 |
+
query2_2= "select "+count+" * from ["+select2+"].["+tab+"]"
|
175 |
+
st.session_state.dataframes[f'{tab}'] = pd.read_sql_query(query2_2,con=conn1)
|
176 |
+
st.session_state.col_names = st.session_state.col_names + list(st.session_state.dataframes[f'{tab}'].columns)
|
177 |
+
#st.session_state.data_load = "Yes"
|
178 |
+
|
179 |
+
tab_names = st.session_state.tab_names
|
180 |
+
dataframes = st.session_state.dataframes
|
181 |
+
col_names = st.session_state.col_names
|
182 |
+
metadata = MultiTableMetadata()
|
183 |
+
metadata.detect_from_dataframes(
|
184 |
+
data= st.session_state.dataframes
|
185 |
+
)
|
186 |
+
multi_python_dict = metadata.to_dict()
|
187 |
+
|
188 |
+
st.markdown(f"System has ingested :orange[**{str(len(tab_names))} tables**] from the source. Please proceed with the discovery.")
|
189 |
+
#st.subheader("DATA CATALOGUE")
|
190 |
+
tab1, tab2= st.tabs(["Explain Tables", "Show Relationships"])
|
191 |
+
def view_callback():
|
192 |
+
st.session_state.tdet = False
|
193 |
+
with tab1:
|
194 |
+
#st.write(python_dict)
|
195 |
+
st.session_state.table_list= pd.DataFrame(tab_names,columns=['TABLE NAME'])
|
196 |
+
containter_length = (len(st.session_state.table_list) + 1)*35
|
197 |
+
tab_names_shown= list(st.session_state.table_list['TABLE NAME'].values)
|
198 |
+
tabs2= st.tabs(tab_names_shown)
|
199 |
+
for i, tab in enumerate(tabs2):
|
200 |
+
with tab:
|
201 |
+
with st.container(height= 400, border=True):
|
202 |
+
cole1,cole2=st.columns([1,1.5])
|
203 |
+
with cole1:
|
204 |
+
conn = pyodbc.connect("Driver={ODBC Driver 17 for SQL Server};"
|
205 |
+
"Server=sql-ext-dev-uks-001.database.windows.net;"
|
206 |
+
"Database=sqldb-ext-dev-uks-001;"
|
207 |
+
"UID=dbadmin;"
|
208 |
+
"PWD=mYpa$$w0rD" )
|
209 |
+
table_selector= tab_names_shown[i]
|
210 |
+
if table_selector is not None:
|
211 |
+
query2="select "+count+" * from [dbo].["+table_selector+"]"
|
212 |
+
#df = pd.read_sql_query(query2,con=conn)
|
213 |
+
df = st.session_state.dataframes[table_selector]
|
214 |
+
selected_df = pd.DataFrame()
|
215 |
+
for col in df.columns:
|
216 |
+
# Filter non-null and non-blank values
|
217 |
+
non_null_values = df[col][df[col] != ''].dropna().astype(str).str.strip()
|
218 |
+
|
219 |
+
# Select up to 10 values (or fewer if less than 10 non-null values)
|
220 |
+
selected_values = list(non_null_values[:10])
|
221 |
+
selected_values = selected_values + [""] * (10 - len(selected_values))
|
222 |
+
# Add selected values to the new dataframe
|
223 |
+
selected_df[col] = selected_values
|
224 |
+
#st.dataframe(selected_df)
|
225 |
+
null_columns = [col for col in selected_df.columns if selected_df.apply(lambda x: x == '')[col].nunique() > 1]
|
226 |
+
null_mes= "**The Following columns have very few records(less than 10). You might exclude them (if they are redundant) for better table discovery:** \n\n"
|
227 |
+
for col in null_columns[:-1]:
|
228 |
+
null_mes += f":orange[**{col}**]" + ', '
|
229 |
+
for collast in null_columns[-1:]:
|
230 |
+
if len(null_columns)> 1:
|
231 |
+
null_mes += '**and** ' + f":orange[**{collast}**]"
|
232 |
+
else:
|
233 |
+
null_mes += f":orange[**{collast}**]"
|
234 |
+
|
235 |
+
if len(null_columns) != 0:
|
236 |
+
with st.expander("🛈 Potential redundant Columns Found in Terms of Data Completeness:", expanded= True):
|
237 |
+
st.markdown(null_mes)
|
238 |
+
inf_filter= st.multiselect('Select Incomplete and Insignificant Columns to exclude:', list(null_columns))
|
239 |
+
run = st.button('Check', key= f"{tab_names_shown[i]}")
|
240 |
+
else:
|
241 |
+
st.success("No redundant Columns Found in Terms of Data Completeness")
|
242 |
+
inf_filter= None
|
243 |
+
run = False
|
244 |
+
|
245 |
+
if inf_filter is not None:
|
246 |
+
df.drop(columns=inf_filter, inplace=True)
|
247 |
+
selected_df.drop(columns=inf_filter, inplace=True)
|
248 |
+
|
249 |
+
if run or len(null_columns) == 0:
|
250 |
+
main_list=df.columns.to_list()
|
251 |
+
sub_list=['ID','LOADID','FILE_NAME']
|
252 |
+
if any(main_list[i:i+len(sub_list)] == sub_list for i in range(len(main_list) - len(sub_list) + 1)):
|
253 |
+
df=df.drop(['ID','LOADID','FILE_NAME'],axis=1)
|
254 |
+
conn.close()
|
255 |
+
sin_metadata = SingleTableMetadata()
|
256 |
+
sin_metadata.detect_from_dataframe(df)
|
257 |
+
python_dict = sin_metadata.to_dict()
|
258 |
+
if f'cont_{table_selector}' not in st.session_state:
|
259 |
+
with st.spinner("Processing Table"):
|
260 |
+
# Create a GenerativeModel instance
|
261 |
+
genai_mod = genai.GenerativeModel(
|
262 |
+
model_name='models/gemini-pro'
|
263 |
+
)
|
264 |
+
if 'primary_key' in python_dict:
|
265 |
+
primary_key = python_dict['primary_key']
|
266 |
+
else:
|
267 |
+
primary_key = "Could Not be Identified"
|
268 |
+
|
269 |
+
|
270 |
+
story = f""" Details of the table:
|
271 |
+
table columns: {str(list(df.columns))}
|
272 |
+
column datatypes: {str(df.dtypes.to_string())}
|
273 |
+
table sample data: {selected_df.head(10).to_string()}
|
274 |
+
"""
|
275 |
+
response = genai_mod.generate_content(textwrap.dedent("""
|
276 |
+
You are a Data Migration expert. You can analyze and understand any table/data/ Please return a narration about the data. The narration should Include primary key name(if any) and a intellectual guess about the table schema. The data can be any kind of generic data. you have to guess the object name/class name/schema name etc. of that data. Don't add unnecessary details. Strictly stick to the informations provided only.
|
277 |
+
Important: Please consider All fields are mandetorily during your analysis. Explain all fields precisely without unnecessary and irrelevant information. NO NEED TO PROVIDE THE SAMPLE DATA AGAIN.
|
278 |
+
|
279 |
+
Here is the table details:
|
280 |
+
|
281 |
+
""") + story + f"The Primary Key is:{primary_key}" ,
|
282 |
+
safety_settings={
|
283 |
+
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
|
284 |
+
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
|
285 |
+
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
|
286 |
+
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
|
287 |
+
})
|
288 |
+
st.session_state[f'cont_{table_selector}'] = response.text
|
289 |
+
|
290 |
+
st.markdown(st.session_state[f'cont_{table_selector}'])
|
291 |
+
with cole2:
|
292 |
+
st.markdown("**DATA PREVIEW**")
|
293 |
+
st.dataframe(df, use_container_width= True)
|
294 |
+
|
295 |
+
with tab2:
|
296 |
+
metadata1 = MultiTableMetadata()
|
297 |
+
metadata1.detect_from_dataframes(
|
298 |
+
data= st.session_state.dataframes
|
299 |
+
)
|
300 |
+
multi_python_dict1 = metadata1.to_dict()
|
301 |
+
rlist1=multi_python_dict1['relationships']
|
302 |
+
rdf=pd.DataFrame(columns=['PARENT TABLE','CHILD TABLE','PARENT TABLE RELATIONSHIP COLUMN','CHILD TABLE RELATIONSHIP COLUMN','CARDINALITY'])
|
303 |
+
for i in range(len(rlist1)):
|
304 |
+
rlist=rlist1[i]
|
305 |
+
nrow=pd.DataFrame({'PARENT TABLE':rlist['parent_table_name'],'CHILD TABLE':rlist['child_table_name'],'PARENT TABLE RELATIONSHIP COLUMN':rlist['parent_primary_key'],'CHILD TABLE RELATIONSHIP COLUMN':rlist['child_foreign_key']},index=[i])
|
306 |
+
rdf=pd.concat([rdf,nrow],ignore_index=True)
|
307 |
+
|
308 |
+
rdf['CARDINALITY'] = rdf.apply(
|
309 |
+
lambda row: cardinality(
|
310 |
+
st.session_state.dataframes[str(row['PARENT TABLE'])],
|
311 |
+
st.session_state.dataframes[str(row['CHILD TABLE'])],
|
312 |
+
str(row['PARENT TABLE RELATIONSHIP COLUMN']),
|
313 |
+
str(row['CHILD TABLE RELATIONSHIP COLUMN'])),axis=1)
|
314 |
+
|
315 |
+
|
316 |
+
if 'rdf' not in st.session_state:
|
317 |
+
st.session_state.rdf = rdf
|
318 |
+
|
319 |
+
edited_map_df = st.data_editor(
|
320 |
+
st.session_state.rdf,
|
321 |
+
column_config={
|
322 |
+
"PARENT TABLE": st.column_config.SelectboxColumn(
|
323 |
+
"Available Parent Table",
|
324 |
+
width="medium",
|
325 |
+
options=tab_names,
|
326 |
+
required=True,
|
327 |
+
),
|
328 |
+
"CHILD TABLE": st.column_config.SelectboxColumn(
|
329 |
+
"Available Child Table",
|
330 |
+
width="medium",
|
331 |
+
options=tab_names,
|
332 |
+
required=True,
|
333 |
+
),
|
334 |
+
"PARENT TABLE RELATIONSHIP COLUMN": st.column_config.SelectboxColumn(
|
335 |
+
"Available Parent Table Relationship Column",
|
336 |
+
width="medium",
|
337 |
+
options=col_names,
|
338 |
+
required=True,
|
339 |
+
),
|
340 |
+
"CHILD TABLE RELATIONSHIP COLUMN": st.column_config.SelectboxColumn(
|
341 |
+
"Available Child Table Relationship Column",
|
342 |
+
width="medium",
|
343 |
+
options=col_names,
|
344 |
+
required=True,
|
345 |
+
),
|
346 |
+
"CARDINALITY": st.column_config.SelectboxColumn(
|
347 |
+
"Cardinality",
|
348 |
+
width="medium",
|
349 |
+
options=['1:1','1:N','N:1','N:N'],
|
350 |
+
required=True,
|
351 |
+
)
|
352 |
+
},
|
353 |
+
hide_index=True,
|
354 |
+
num_rows = 'dynamic',
|
355 |
+
use_container_width = True
|
356 |
+
)
|
357 |
+
|
358 |
+
for i,row in edited_map_df.iterrows():
|
359 |
+
pcolchecklist = st.session_state.dataframes[str(row['PARENT TABLE'])].columns
|
360 |
+
ccolchecklist = st.session_state.dataframes[str(row['CHILD TABLE'])].columns
|
361 |
+
pvals= list(st.session_state.dataframes[str(row['PARENT TABLE'])][row['PARENT TABLE RELATIONSHIP COLUMN']].values)
|
362 |
+
cvals= list(st.session_state.dataframes[str(row['CHILD TABLE'])][row['CHILD TABLE RELATIONSHIP COLUMN']].values)
|
363 |
+
match = [val for val in pvals if val in cvals]
|
364 |
+
#st.write(match)
|
365 |
+
if row['PARENT TABLE RELATIONSHIP COLUMN'] not in pcolchecklist:
|
366 |
+
st.error(f"{row['PARENT TABLE RELATIONSHIP COLUMN']} does not belong to {row['PARENT TABLE']}")
|
367 |
+
else:
|
368 |
+
pass
|
369 |
+
if row['CHILD TABLE RELATIONSHIP COLUMN'] not in ccolchecklist:
|
370 |
+
st.error(f"{row['CHILD TABLE RELATIONSHIP COLUMN']} does not belong to {row['CHILD TABLE']}")
|
371 |
+
else:
|
372 |
+
pass
|
373 |
+
if (row['PARENT TABLE RELATIONSHIP COLUMN'] in pcolchecklist) and (row['CHILD TABLE RELATIONSHIP COLUMN'] in ccolchecklist):
|
374 |
+
pvals= list(st.session_state.dataframes[str(row['PARENT TABLE'])][row['PARENT TABLE RELATIONSHIP COLUMN']].values)
|
375 |
+
cvals= list(st.session_state.dataframes[str(row['CHILD TABLE'])][row['CHILD TABLE RELATIONSHIP COLUMN']].values)
|
376 |
+
match = [val for val in pvals if val in cvals]
|
377 |
+
if match == []:
|
378 |
+
st.error(f"The Joining Condition Between column: {row['PARENT TABLE RELATIONSHIP COLUMN']} from Table: {row['PARENT TABLE']} and column: {row['CHILD TABLE RELATIONSHIP COLUMN']} from Table: {row['CHILD TABLE']} does not yield any record. ")
|
379 |
+
if ((row['PARENT TABLE RELATIONSHIP COLUMN'] in pcolchecklist) and (row['CHILD TABLE RELATIONSHIP COLUMN'] in ccolchecklist)) and (match != []):
|
380 |
+
# primary_check = len(list(dataframes[str(row['PARENT TABLE'])][row['PARENT TABLE RELATIONSHIP COLUMN']].values)) == dataframes[str(row['PARENT TABLE'])][row['PARENT TABLE RELATIONSHIP COLUMN']].nunique()
|
381 |
+
# if primary_check:
|
382 |
+
# pass
|
383 |
+
# else:
|
384 |
+
# st.error(f"The Column {row['PARENT TABLE RELATIONSHIP COLUMN']} from Table: {row['PARENT TABLE']} has duplicate records and hence can not be considered as Primary Key.")
|
385 |
+
pass
|
386 |
+
|
387 |
+
add = st.button("Add Relationship", key='add')
|
388 |
+
if add:
|
389 |
+
if ((row['PARENT TABLE RELATIONSHIP COLUMN'] in pcolchecklist) and (row['CHILD TABLE RELATIONSHIP COLUMN'] in ccolchecklist)) and ((match != [])):
|
390 |
+
add_df = edited_map_df
|
391 |
+
else:
|
392 |
+
add_df = st.session_state.rdf
|
393 |
+
else:
|
394 |
+
add_df = st.session_state.rdf
|
395 |
+
|
396 |
+
add_df['CARDINALITY'] = add_df.apply(
|
397 |
+
lambda row: cardinality(
|
398 |
+
st.session_state.dataframes[str(row['PARENT TABLE'])],
|
399 |
+
st.session_state.dataframes[str(row['CHILD TABLE'])],
|
400 |
+
str(row['PARENT TABLE RELATIONSHIP COLUMN']),
|
401 |
+
str(row['CHILD TABLE RELATIONSHIP COLUMN'])),axis=1)
|
402 |
+
|
403 |
+
st.session_state.add_df = add_df
|
404 |
+
edited_map_df = st.session_state.add_df
|
405 |
+
|
406 |
+
rel_tabs = list(add_df['PARENT TABLE'].values) + list(add_df['CHILD TABLE'].values)
|
407 |
+
unrel_tabs = [tab for tab in tab_names if tab not in rel_tabs]
|
408 |
+
st.info(f"""Unrelated tables due to undetected pattern: {str(unrel_tabs).replace("[","").replace("]","")}""")
|
409 |
+
|
410 |
+
G, table_columns = create_er_diagram(st.session_state.add_df)
|
411 |
+
img_bytes= draw_er_diagram(G, table_columns)
|
412 |
+
col21, col22= st.columns([1,8])
|
413 |
+
with col21:
|
414 |
+
if st.button("Regenerate"):
|
415 |
+
st.rerun()
|
416 |
+
with col22:
|
417 |
+
st.download_button(
|
418 |
+
label="Download ER Diagram",
|
419 |
+
data=img_bytes,
|
420 |
+
file_name="er_diagram.png",
|
421 |
+
mime="image/png"
|
422 |
+
)
|
lottie_2.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"v":"5.4.1","fr":29.9700012207031,"ip":0,"op":132.00000537647,"w":1024,"h":768,"nm":"Comp 1","ddd":0,"assets":[],"layers":[{"ddd":0,"ind":1,"ty":3,"nm":"Null 1","sr":1,"ks":{"o":{"a":0,"k":0,"ix":11},"r":{"a":0,"k":0,"ix":10},"p":{"a":0,"k":[512,364,0],"ix":2},"a":{"a":0,"k":[0,0,0],"ix":1},"s":{"a":0,"k":[100,100,100],"ix":6}},"ao":0,"ip":0,"op":316.000012870944,"st":0,"bm":0,"completed":true},{"ddd":0,"ind":2,"ty":4,"nm":"Shape Layer 3","parent":4,"sr":1,"ks":{"o":{"a":0,"k":100,"ix":11},"r":{"a":0,"k":0,"ix":10},"p":{"a":0,"k":[15.069,93,0],"ix":2},"a":{"a":0,"k":[0,93,0],"ix":1},"s":{"a":0,"k":[100,100,100],"ix":6}},"ao":0,"shapes":[{"ty":"gr","it":[{"ind":0,"ty":"sh","ix":1,"ks":{"a":1,"k":[{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"n":"0p833_0p833_0p167_0p167","t":30,"s":[{"i":[[101.919,-93],[119.5,-85.113],[119.5,70.823],[109.55199999999999,93],[-97.323,93],[-119.5,83.05199999999999],[-119.5,-40.823],[-109.55199999999999,-63],[25.468,-63],[34.5,-67.05199999999999],[34.5,-75.419],[42.387,-93]],"o":[[111.613,-93],[119.5,-75.419],[119.5,83.05199999999999],[97.323,93],[-109.55199999999999,93],[-119.5,70.823],[-119.5,-53.052],[-97.323,-63],[30.448,-63],[34.5,-72.032],[34.5,-85.113],[52.081,-93]],"v":[[101.919,-93],[119.5,-75.419],[119.5,70.823],[97.323,93],[-97.323,93],[-119.5,70.823],[-119.5,-40.823],[-97.323,-63],[25.468,-63],[34.5,-72.032],[34.5,-75.419],[52.081,-93]],"c":true}],"e":[{"i":[[135.824,-47.794],[151.816,-39.604],[119.5,70.823],[109.55199999999999,93],[-97.323,93],[-119.5,83.05199999999999],[-89.363,4.383],[-77.907,-17.794],[55.605,-17.794],[64.637,-21.846],[66.333,-30.213],[76.291,-47.794]],"o":[[145.518,-47.794],[149.637,-30.213],[119.5,83.05199999999999],[97.323,93],[-109.55199999999999,93],[-119.5,70.823],[-85.138,-8.337],[-65.678,-17.794],[60.584999999999994,-17.794],[64.637,-26.826],[69.505,-41.11],[85.985,-47.794]],"v":[[135.824,-47.794],[149.637,-30.213],[119.5,70.823],[97.323,93],[-97.323,93],[-119.5,70.823],[-89.363,4.383],[-65.678,-17.794],[55.605,-17.794],[64.637,-26.826],[66.333,-30.213],[85.985,-47.794]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"n":"0p833_0p833_0p167_0p167","t":35,"s":[{"i":[[135.824,-47.794],[151.816,-39.604],[119.5,70.823],[109.55199999999999,93],[-97.323,93],[-119.5,83.05199999999999],[-89.363,4.383],[-77.907,-17.794],[55.605,-17.794],[64.637,-21.846],[66.333,-30.213],[76.291,-47.794]],"o":[[145.518,-47.794],[149.637,-30.213],[119.5,83.05199999999999],[97.323,93],[-109.55199999999999,93],[-119.5,70.823],[-85.138,-8.337],[-65.678,-17.794],[60.584999999999994,-17.794],[64.637,-26.826],[69.505,-41.11],[85.985,-47.794]],"v":[[135.824,-47.794],[149.637,-30.213],[119.5,70.823],[97.323,93],[-97.323,93],[-119.5,70.823],[-89.363,4.383],[-65.678,-17.794],[55.605,-17.794],[64.637,-26.826],[66.333,-30.213],[85.985,-47.794]],"c":true}],"e":[{"i":[[121.293,-67.168],[137.966,-59.108000000000004],[119.5,70.823],[109.55199999999999,93],[-97.323,93],[-119.5,83.05199999999999],[-102.279,-14.991],[-91.469,-37.168],[42.689,-37.168],[51.721,-41.22],[52.69,-49.587],[61.760999999999996,-67.168]],"o":[[130.987,-67.168],[136.721,-49.587],[119.5,83.05199999999999],[97.323,93],[-109.55199999999999,93],[-119.5,70.823],[-99.865,-27.5],[-79.24,-37.168],[47.669,-37.168],[51.721,-46.2],[54.501999999999995,-59.969],[71.455,-67.168]],"v":[[121.293,-67.168],[136.721,-49.587],[119.5,70.823],[97.323,93],[-97.323,93],[-119.5,70.823],[-102.279,-14.991],[-79.24,-37.168],[42.689,-37.168],[51.721,-46.2],[52.69,-49.587],[71.455,-67.168]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"n":"0p833_0p833_0p167_0p167","t":40,"s":[{"i":[[121.293,-67.168],[137.966,-59.108000000000004],[119.5,70.823],[109.55199999999999,93],[-97.323,93],[-119.5,83.05199999999999],[-102.279,-14.991],[-91.469,-37.168],[42.689,-37.168],[51.721,-41.22],[52.69,-49.587],[61.760999999999996,-67.168]],"o":[[130.987,-67.168],[136.721,-49.587],[119.5,83.05199999999999],[97.323,93],[-109.55199999999999,93],[-119.5,70.823],[-99.865,-27.5],[-79.24,-37.168],[47.669,-37.168],[51.721,-46.2],[54.501999999999995,-59.969],[71.455,-67.168]],"v":[[121.293,-67.168],[136.721,-49.587],[119.5,70.823],[97.323,93],[-97.323,93],[-119.5,70.823],[-102.279,-14.991],[-79.24,-37.168],[42.689,-37.168],[51.721,-46.2],[52.69,-49.587],[71.455,-67.168]],"c":true}],"e":[{"i":[[135.824,-47.794],[151.816,-39.604],[119.5,70.823],[109.55199999999999,93],[-97.323,93],[-119.5,83.05199999999999],[-89.363,4.383],[-77.907,-17.794],[55.605,-17.794],[64.637,-21.846],[66.333,-30.213],[76.291,-47.794]],"o":[[145.518,-47.794],[149.637,-30.213],[119.5,83.05199999999999],[97.323,93],[-109.55199999999999,93],[-119.5,70.823],[-85.138,-8.337],[-65.678,-17.794],[60.584999999999994,-17.794],[64.637,-26.826],[69.505,-41.11],[85.985,-47.794]],"v":[[135.824,-47.794],[149.637,-30.213],[119.5,70.823],[97.323,93],[-97.323,93],[-119.5,70.823],[-89.363,4.383],[-65.678,-17.794],[55.605,-17.794],[64.637,-26.826],[66.333,-30.213],[85.985,-47.794]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"n":"0p833_0p833_0p167_0p167","t":45,"s":[{"i":[[135.824,-47.794],[151.816,-39.604],[119.5,70.823],[109.55199999999999,93],[-97.323,93],[-119.5,83.05199999999999],[-89.363,4.383],[-77.907,-17.794],[55.605,-17.794],[64.637,-21.846],[66.333,-30.213],[76.291,-47.794]],"o":[[145.518,-47.794],[149.637,-30.213],[119.5,83.05199999999999],[97.323,93],[-109.55199999999999,93],[-119.5,70.823],[-85.138,-8.337],[-65.678,-17.794],[60.584999999999994,-17.794],[64.637,-26.826],[69.505,-41.11],[85.985,-47.794]],"v":[[135.824,-47.794],[149.637,-30.213],[119.5,70.823],[97.323,93],[-97.323,93],[-119.5,70.823],[-89.363,4.383],[-65.678,-17.794],[55.605,-17.794],[64.637,-26.826],[66.333,-30.213],[85.985,-47.794]],"c":true}],"e":[{"i":[[135.824,-47.794],[151.816,-39.604],[119.5,70.823],[109.55199999999999,93],[-97.323,93],[-119.5,83.05199999999999],[-89.363,4.383],[-77.907,-17.794],[55.605,-17.794],[64.637,-21.846],[66.333,-30.213],[76.291,-47.794]],"o":[[145.518,-47.794],[149.637,-30.213],[119.5,83.05199999999999],[97.323,93],[-109.55199999999999,93],[-119.5,70.823],[-85.138,-8.337],[-65.678,-17.794],[60.584999999999994,-17.794],[64.637,-26.826],[69.505,-41.11],[85.985,-47.794]],"v":[[135.824,-47.794],[149.637,-30.213],[119.5,70.823],[97.323,93],[-97.323,93],[-119.5,70.823],[-89.363,4.383],[-65.678,-17.794],[55.605,-17.794],[64.637,-26.826],[66.333,-30.213],[85.985,-47.794]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"n":"0p833_0p833_0p167_0p167","t":124,"s":[{"i":[[135.824,-47.794],[151.816,-39.604],[119.5,70.823],[109.55199999999999,93],[-97.323,93],[-119.5,83.05199999999999],[-89.363,4.383],[-77.907,-17.794],[55.605,-17.794],[64.637,-21.846],[66.333,-30.213],[76.291,-47.794]],"o":[[145.518,-47.794],[149.637,-30.213],[119.5,83.05199999999999],[97.323,93],[-109.55199999999999,93],[-119.5,70.823],[-85.138,-8.337],[-65.678,-17.794],[60.584999999999994,-17.794],[64.637,-26.826],[69.505,-41.11],[85.985,-47.794]],"v":[[135.824,-47.794],[149.637,-30.213],[119.5,70.823],[97.323,93],[-97.323,93],[-119.5,70.823],[-89.363,4.383],[-65.678,-17.794],[55.605,-17.794],[64.637,-26.826],[66.333,-30.213],[85.985,-47.794]],"c":true}],"e":[{"i":[[101.919,-93],[119.5,-85.113],[119.5,70.823],[109.55199999999999,93],[-97.323,93],[-119.5,83.05199999999999],[-119.5,-40.823],[-109.55199999999999,-63],[25.468,-63],[34.5,-67.05199999999999],[34.5,-75.419],[42.387,-93]],"o":[[111.613,-93],[119.5,-75.419],[119.5,83.05199999999999],[97.323,93],[-109.55199999999999,93],[-119.5,70.823],[-119.5,-53.052],[-97.323,-63],[30.448,-63],[34.5,-72.032],[34.5,-85.113],[52.081,-93]],"v":[[101.919,-93],[119.5,-75.419],[119.5,70.823],[97.323,93],[-97.323,93],[-119.5,70.823],[-119.5,-40.823],[-97.323,-63],[25.468,-63],[34.5,-72.032],[34.5,-75.419],[52.081,-93]],"c":true}]},{"t":128.000005213547}],"ix":2},"nm":"Path 1","mn":"ADBE Vector Shape - Group","hd":false,"_render":true},{"ty":"st","c":{"a":0,"k":[0.5,0.5,0.5,1],"ix":3},"o":{"a":0,"k":100,"ix":4},"w":{"a":0,"k":6,"ix":5},"lc":1,"lj":1,"ml":4,"ml2":{"a":0,"k":4,"ix":8},"nm":"Stroke 1","mn":"ADBE Vector Graphic - Stroke","hd":false,"_render":true},{"ty":"fl","c":{"a":0,"k":[0.5,0.5,0.5,1],"ix":4},"o":{"a":0,"k":100,"ix":5},"r":1,"nm":"Fill 1","mn":"ADBE Vector Graphic - Fill","hd":false,"_render":true},{"ty":"tr","p":{"a":0,"k":[0,0],"ix":2},"a":{"a":0,"k":[0,0],"ix":1},"s":{"a":0,"k":[100,100],"ix":3},"r":{"a":0,"k":0,"ix":6},"o":{"a":0,"k":100,"ix":7},"sk":{"a":0,"k":0,"ix":4},"sa":{"a":0,"k":0,"ix":5},"nm":"Transform","_render":true}],"nm":"Shape 1","np":3,"cix":2,"ix":1,"mn":"ADBE Vector Group","hd":false,"_render":true}],"ip":0,"op":316.000012870944,"st":0,"bm":0,"completed":true},{"ddd":0,"ind":3,"ty":4,"nm":"Shape Layer 4","parent":1,"sr":1,"ks":{"o":{"a":0,"k":100,"ix":11},"r":{"a":0,"k":0,"ix":10},"p":{"a":0,"k":[0,0,0],"ix":2},"a":{"a":0,"k":[0,0,0],"ix":1},"s":{"a":0,"k":[100,100,100],"ix":6}},"ao":0,"shapes":[{"ty":"gr","it":[{"ind":0,"ty":"sh","ix":1,"ks":{"a":0,"k":{"i":[[128.5,-143],[160.291,-218.596],[185,-151],[179.5,-190.5]],"o":[[128.5,-143],[214,-176.5],[159.481,-178.482],[188.742,-168.549]],"v":[[128.5,-143],[195.5,-191],[172,-165],[183.5,-181]],"c":false},"ix":2},"nm":"Path 1","mn":"ADBE Vector Shape - Group","hd":false,"_render":true},{"ty":"gs","o":{"a":0,"k":100,"ix":9},"w":{"a":0,"k":6,"ix":10},"g":{"p":3,"k":{"a":0,"k":[0,0.5,0.5,0.5,0.5,0.46,0.46,0.46,1,0.5,0.5,0.5],"ix":8}},"s":{"a":0,"k":[0,0],"ix":4},"e":{"a":0,"k":[100,0],"ix":5},"t":1,"lc":2,"lj":1,"ml":4,"ml2":{"a":0,"k":4,"ix":13},"nm":"Gradient Stroke 1","mn":"ADBE Vector Graphic - G-Stroke","hd":false,"_render":true},{"ty":"fl","c":{"a":0,"k":[0.5,0.5,0.5,1],"ix":4},"o":{"a":0,"k":100,"ix":5},"r":1,"nm":"Fill 1","mn":"ADBE Vector Graphic - Fill","hd":true,"_render":true},{"ty":"tr","p":{"a":0,"k":[10,0],"ix":2},"a":{"a":0,"k":[0,0],"ix":1},"s":{"a":0,"k":[-67.81,84.615],"ix":3},"r":{"a":0,"k":0,"ix":6},"o":{"a":0,"k":100,"ix":7},"sk":{"a":0,"k":0,"ix":4},"sa":{"a":0,"k":0,"ix":5},"nm":"Transform","_render":true}],"nm":"Shape 5","np":3,"cix":2,"ix":1,"mn":"ADBE Vector Group","hd":false,"_render":true},{"ty":"gr","it":[{"ind":0,"ty":"sh","ix":1,"ks":{"a":0,"k":{"i":[[128.5,-143],[160.291,-218.596],[185,-151],[179.5,-190.5]],"o":[[128.5,-143],[214,-176.5],[159.481,-178.482],[188.742,-168.549]],"v":[[128.5,-143],[195.5,-191],[172,-165],[183.5,-181]],"c":false},"ix":2},"nm":"Path 1","mn":"ADBE Vector Shape - Group","hd":false,"_render":true},{"ty":"fl","c":{"a":0,"k":[0.5,0.5,0.5,1],"ix":4},"o":{"a":0,"k":100,"ix":5},"r":1,"nm":"Fill 1","mn":"ADBE Vector Graphic - Fill","hd":true,"_render":true},{"ty":"gs","o":{"a":0,"k":100,"ix":9},"w":{"a":0,"k":6,"ix":10},"g":{"p":3,"k":{"a":0,"k":[0,0.5,0.5,0.5,0.5,0.46,0.46,0.46,1,0.5,0.5,0.5],"ix":8}},"s":{"a":0,"k":[0,0],"ix":4},"e":{"a":0,"k":[100,0],"ix":5},"t":1,"lc":2,"lj":1,"ml":4,"ml2":{"a":0,"k":4,"ix":13},"nm":"Gradient Stroke 1","mn":"ADBE Vector Graphic - G-Stroke","hd":false,"_render":true},{"ty":"tr","p":{"a":0,"k":[0,0],"ix":2},"a":{"a":0,"k":[0,0],"ix":1},"s":{"a":0,"k":[100,100],"ix":3},"r":{"a":0,"k":0,"ix":6},"o":{"a":0,"k":100,"ix":7},"sk":{"a":0,"k":0,"ix":4},"sa":{"a":0,"k":0,"ix":5},"nm":"Transform","_render":true}],"nm":"Shape 4","np":3,"cix":2,"ix":2,"mn":"ADBE Vector Group","hd":false,"_render":true},{"ty":"gr","it":[{"ind":0,"ty":"sh","ix":1,"ks":{"a":0,"k":{"i":[[20,-36],[18,-162]],"o":[[20,-36],[82,-234]],"v":[[20,-36],[50,-198]],"c":false},"ix":2},"nm":"Path 1","mn":"ADBE Vector Shape - Group","hd":false,"_render":true},{"ty":"fl","c":{"a":0,"k":[0.5,0.5,0.5,1],"ix":4},"o":{"a":0,"k":100,"ix":5},"r":1,"nm":"Fill 1","mn":"ADBE Vector Graphic - Fill","hd":true,"_render":true},{"ty":"gs","o":{"a":0,"k":100,"ix":9},"w":{"a":0,"k":6,"ix":10},"g":{"p":3,"k":{"a":0,"k":[0,0.5,0.5,0.5,0.5,0.46,0.46,0.46,1,0.5,0.5,0.5],"ix":8}},"s":{"a":0,"k":[0,0],"ix":4},"e":{"a":0,"k":[100,0],"ix":5},"t":1,"lc":2,"lj":1,"ml":4,"ml2":{"a":0,"k":4,"ix":13},"nm":"Gradient Stroke 1","mn":"ADBE Vector Graphic - G-Stroke","hd":false,"_render":true},{"ty":"tr","p":{"a":0,"k":[0,0],"ix":2},"a":{"a":0,"k":[0,0],"ix":1},"s":{"a":0,"k":[100,100],"ix":3},"r":{"a":0,"k":0,"ix":6},"o":{"a":0,"k":100,"ix":7},"sk":{"a":0,"k":0,"ix":4},"sa":{"a":0,"k":0,"ix":5},"nm":"Transform","_render":true}],"nm":"Shape 3","np":3,"cix":2,"ix":3,"mn":"ADBE Vector Group","hd":false,"_render":true},{"ty":"gr","it":[{"ind":0,"ty":"sh","ix":1,"ks":{"a":0,"k":{"i":[[-74,-30],[-131,-133]],"o":[[-74,-30],[-285,-99]],"v":[[-74,-30],[-208,-116]],"c":false},"ix":2},"nm":"Path 1","mn":"ADBE Vector Shape - Group","hd":false,"_render":true},{"ty":"fl","c":{"a":0,"k":[0.5,0.5,0.5,1],"ix":4},"o":{"a":0,"k":100,"ix":5},"r":1,"nm":"Fill 1","mn":"ADBE Vector Graphic - Fill","hd":true,"_render":true},{"ty":"gs","o":{"a":0,"k":100,"ix":9},"w":{"a":0,"k":6,"ix":10},"g":{"p":3,"k":{"a":0,"k":[0,0.5,0.5,0.5,0.5,0.46,0.46,0.46,1,0.5,0.5,0.5],"ix":8}},"s":{"a":0,"k":[0,0],"ix":4},"e":{"a":0,"k":[100,0],"ix":5},"t":1,"lc":2,"lj":1,"ml":4,"ml2":{"a":0,"k":4,"ix":13},"nm":"Gradient Stroke 1","mn":"ADBE Vector Graphic - G-Stroke","hd":false,"_render":true},{"ty":"tr","p":{"a":0,"k":[0,0],"ix":2},"a":{"a":0,"k":[0,0],"ix":1},"s":{"a":0,"k":[100,100],"ix":3},"r":{"a":0,"k":0,"ix":6},"o":{"a":0,"k":100,"ix":7},"sk":{"a":0,"k":0,"ix":4},"sa":{"a":0,"k":0,"ix":5},"nm":"Transform","_render":true}],"nm":"Shape 2","np":3,"cix":2,"ix":4,"mn":"ADBE Vector Group","hd":false,"_render":true},{"ty":"gr","it":[{"ind":0,"ty":"sh","ix":1,"ks":{"a":0,"k":{"i":[[129,-67],[223,-164]],"o":[[129,-67],[393,-96]],"v":[[129,-67],[308,-130]],"c":false},"ix":2},"nm":"Path 1","mn":"ADBE Vector Shape - Group","hd":false,"_render":true},{"ty":"fl","c":{"a":0,"k":[0.5,0.5,0.5,1],"ix":4},"o":{"a":0,"k":100,"ix":5},"r":1,"nm":"Fill 1","mn":"ADBE Vector Graphic - Fill","hd":true,"_render":true},{"ty":"gs","o":{"a":0,"k":100,"ix":9},"w":{"a":0,"k":6,"ix":10},"g":{"p":3,"k":{"a":0,"k":[0,0.5,0.5,0.5,0.5,0.46,0.46,0.46,1,0.5,0.5,0.5],"ix":8}},"s":{"a":0,"k":[0,0],"ix":4},"e":{"a":0,"k":[100,0],"ix":5},"t":1,"lc":2,"lj":1,"ml":4,"ml2":{"a":0,"k":4,"ix":13},"nm":"Gradient Stroke 1","mn":"ADBE Vector Graphic - G-Stroke","hd":false,"_render":true},{"ty":"tr","p":{"a":0,"k":[0,0],"ix":2},"a":{"a":0,"k":[0,0],"ix":1},"s":{"a":0,"k":[100,100],"ix":3},"r":{"a":0,"k":0,"ix":6},"o":{"a":0,"k":100,"ix":7},"sk":{"a":0,"k":0,"ix":4},"sa":{"a":0,"k":0,"ix":5},"nm":"Transform","_render":true}],"nm":"Shape 1","np":3,"cix":2,"ix":5,"mn":"ADBE Vector Group","hd":false,"_render":true},{"ty":"tm","s":{"a":1,"k":[{"i":{"x":[0],"y":[1]},"o":{"x":[0.167],"y":[0.167]},"n":["0_1_0p167_0p167"],"t":40,"s":[0],"e":[100]},{"t":58.0000023623884}],"ix":1},"e":{"a":1,"k":[{"i":{"x":[0],"y":[1]},"o":{"x":[0.167],"y":[0.167]},"n":["0_1_0p167_0p167"],"t":36,"s":[0],"e":[100]},{"t":54.0000021994651}],"ix":2},"o":{"a":0,"k":0,"ix":3},"m":1,"ix":6,"nm":"Trim Paths 1","mn":"ADBE Vector Filter - Trim","hd":false,"_render":true}],"ip":0,"op":316.000012870944,"st":0,"bm":0,"completed":true},{"ddd":0,"ind":4,"ty":4,"nm":"Shape Layer 5","parent":1,"sr":1,"ks":{"o":{"a":0,"k":50,"ix":11},"r":{"a":0,"k":0,"ix":10},"p":{"a":0,"k":[-13,138,0],"ix":2},"a":{"a":0,"k":[0,93,0],"ix":1},"s":{"a":1,"k":[{"i":{"x":[0.924,0.924,0.667],"y":[1,1,1]},"o":{"x":[0.751,0.751,0.333],"y":[0,0,0]},"n":["0p924_1_0p751_0","0p924_1_0p751_0","0p667_1_0p333_0"],"t":16,"s":[49.726,49.726,100],"e":[132.726,132.726,100]},{"i":{"x":[0.924,0.924,0.667],"y":[1,1,1]},"o":{"x":[0.333,0.333,0.333],"y":[0,0,0]},"n":["0p924_1_0p333_0","0p924_1_0p333_0","0p667_1_0p333_0"],"t":30,"s":[132.726,132.726,100],"e":[132.726,132.726,100]},{"i":{"x":[0.833,0.833,0.833],"y":[1,1,1]},"o":{"x":[0.333,0.333,0.333],"y":[0,0,0]},"n":["0p833_1_0p333_0","0p833_1_0p333_0","0p833_1_0p333_0"],"t":118,"s":[132.726,132.726,100],"e":[49.726,49.726,100]},{"t":128.000005213547}],"ix":6}},"ao":0,"shapes":[{"ty":"gr","it":[{"ind":0,"ty":"sh","ix":1,"ks":{"a":1,"k":[{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"n":"0p833_0p833_0p167_0p167","t":32,"s":[{"i":[[101.919,-93],[119.5,-85.113],[119.5,70.823],[109.55199999999999,93],[-97.323,93],[-119.5,83.05199999999999],[-119.5,-40.823],[-109.55199999999999,-63],[25.468,-63],[34.5,-67.05199999999999],[34.5,-75.419],[42.387,-93]],"o":[[111.613,-93],[119.5,-75.419],[119.5,83.05199999999999],[97.323,93],[-109.55199999999999,93],[-119.5,70.823],[-119.5,-53.052],[-97.323,-63],[30.448,-63],[34.5,-72.032],[34.5,-85.113],[52.081,-93]],"v":[[101.919,-93],[119.5,-75.419],[119.5,70.823],[97.323,93],[-97.323,93],[-119.5,70.823],[-119.5,-40.823],[-97.323,-63],[25.468,-63],[34.5,-72.032],[34.5,-75.419],[52.081,-93]],"c":true}],"e":[{"i":[[109.669,-82.667],[126.887,-74.712],[119.5,70.823],[109.55199999999999,93],[-97.323,93],[-119.5,83.05199999999999],[-112.611,-30.49],[-102.319,-52.667],[32.356,-52.667],[41.389,-56.72],[41.776,-65.087],[50.135999999999996,-82.667]],"o":[[119.363,-82.667],[126.389,-65.087],[119.5,83.05199999999999],[97.323,93],[-109.55199999999999,93],[-119.5,70.823],[-111.64500000000001,-42.830999999999996],[-90.09,-52.667],[37.336,-52.667],[41.389,-61.7],[42.501000000000005,-75.056],[59.83,-82.667]],"v":[[109.669,-82.667],[126.389,-65.087],[119.5,70.823],[97.323,93],[-97.323,93],[-119.5,70.823],[-112.611,-30.49],[-90.09,-52.667],[32.356,-52.667],[41.389,-61.7],[41.776,-65.087],[59.83,-82.667]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"n":"0p833_0p833_0p167_0p167","t":36,"s":[{"i":[[109.669,-82.667],[126.887,-74.712],[119.5,70.823],[109.55199999999999,93],[-97.323,93],[-119.5,83.05199999999999],[-112.611,-30.49],[-102.319,-52.667],[32.356,-52.667],[41.389,-56.72],[41.776,-65.087],[50.135999999999996,-82.667]],"o":[[119.363,-82.667],[126.389,-65.087],[119.5,83.05199999999999],[97.323,93],[-109.55199999999999,93],[-119.5,70.823],[-111.64500000000001,-42.830999999999996],[-90.09,-52.667],[37.336,-52.667],[41.389,-61.7],[42.501000000000005,-75.056],[59.83,-82.667]],"v":[[109.669,-82.667],[126.389,-65.087],[119.5,70.823],[97.323,93],[-97.323,93],[-119.5,70.823],[-112.611,-30.49],[-90.09,-52.667],[32.356,-52.667],[41.389,-61.7],[41.776,-65.087],[59.83,-82.667]],"c":true}],"e":[{"i":[[101.919,-93],[119.5,-85.113],[119.5,70.823],[109.55199999999999,93],[-97.323,93],[-119.5,83.05199999999999],[-119.5,-40.823],[-109.55199999999999,-63],[25.468,-63],[34.5,-67.05199999999999],[34.5,-75.419],[42.387,-93]],"o":[[111.613,-93],[119.5,-75.419],[119.5,83.05199999999999],[97.323,93],[-109.55199999999999,93],[-119.5,70.823],[-119.5,-53.052],[-97.323,-63],[30.448,-63],[34.5,-72.032],[34.5,-85.113],[52.081,-93]],"v":[[101.919,-93],[119.5,-75.419],[119.5,70.823],[97.323,93],[-97.323,93],[-119.5,70.823],[-119.5,-40.823],[-97.323,-63],[25.468,-63],[34.5,-72.032],[34.5,-75.419],[52.081,-93]],"c":true}]},{"t":40.0000016292334}],"ix":2},"nm":"Path 1","mn":"ADBE Vector Shape - Group","hd":false,"_render":true},{"ty":"st","c":{"a":0,"k":[1,1,1,1],"ix":3},"o":{"a":0,"k":100,"ix":4},"w":{"a":0,"k":6,"ix":5},"lc":1,"lj":1,"ml":4,"ml2":{"a":0,"k":4,"ix":8},"nm":"Stroke 1","mn":"ADBE Vector Graphic - Stroke","hd":false,"_render":true},{"ty":"fl","c":{"a":0,"k":[0.5,0.5,0.5,1],"ix":4},"o":{"a":0,"k":100,"ix":5},"r":1,"nm":"Fill 1","mn":"ADBE Vector Graphic - Fill","hd":true,"_render":true},{"ty":"tr","p":{"a":0,"k":[0,0],"ix":2},"a":{"a":0,"k":[0,0],"ix":1},"s":{"a":0,"k":[100,100],"ix":3},"r":{"a":0,"k":0,"ix":6},"o":{"a":0,"k":100,"ix":7},"sk":{"a":0,"k":0,"ix":4},"sa":{"a":0,"k":0,"ix":5},"nm":"Transform","_render":true}],"nm":"Shape 1","np":3,"cix":2,"ix":1,"mn":"ADBE Vector Group","hd":false,"_render":true}],"ip":0,"op":316.000012870944,"st":0,"bm":0,"completed":true},{"ddd":0,"ind":5,"ty":4,"nm":"NO DATA Outlines","sr":1,"ks":{"o":{"a":0,"k":100,"ix":11},"r":{"a":0,"k":0,"ix":10},"p":{"s":true,"x":{"a":0,"k":452,"ix":3},"y":{"a":1,"k":[{"i":{"x":[0.667],"y":[1]},"o":{"x":[0.03],"y":[0.238]},"n":["0p667_1_0p03_0p238"],"t":35,"s":[344],"e":[204]},{"i":{"x":[0.667],"y":[1]},"o":{"x":[0.333],"y":[0]},"n":["0p667_1_0p333_0"],"t":45,"s":[204],"e":[204]},{"i":{"x":[0.957],"y":[0.75]},"o":{"x":[0.333],"y":[0]},"n":["0p957_0p75_0p333_0"],"t":98,"s":[204],"e":[324]},{"t":105.000004276738}],"ix":4}},"a":{"a":0,"k":[0,0,0],"ix":1},"s":{"a":0,"k":[122.539,122.539,100],"ix":6}},"ao":0,"shapes":[{"ty":"gr","it":[{"ind":0,"ty":"sh","ix":1,"ks":{"a":0,"k":{"i":[[11.813,-28.459],[11.813,-15.645],[7.664,-28.459],[1.477,-28.459],[1.477,0],[7.664,0],[7.664,-12.938],[11.514,0],[18,0],[18,-28.459]],"o":[[11.813,-28.459],[11.813,-15.645],[7.664,-28.459],[1.477,-28.459],[1.477,0],[7.664,0],[7.664,-12.938],[11.514,0],[18,0],[18,-28.459]],"v":[[11.813,-28.459],[11.813,-15.645],[7.664,-28.459],[1.477,-28.459],[1.477,0],[7.664,0],[7.664,-12.938],[11.514,0],[18,0],[18,-28.459]],"c":true},"ix":2},"nm":"N","mn":"ADBE Vector Shape - Group","hd":false},{"ty":"st","c":{"a":0,"k":[0,0,0,1],"ix":3},"o":{"a":0,"k":100,"ix":4},"w":{"a":0,"k":2,"ix":5},"lc":1,"lj":1,"ml":4,"ml2":{"a":0,"k":4,"ix":8},"nm":"Stroke 1","mn":"ADBE Vector Graphic - Stroke","hd":true},{"ty":"fl","c":{"a":0,"k":[0.92,0.92,0.92,1],"ix":4},"o":{"a":0,"k":100,"ix":5},"r":1,"nm":"Fill 1","mn":"ADBE Vector Graphic - Fill","hd":false},{"ty":"tr","p":{"a":0,"k":[0,0],"ix":2},"a":{"a":0,"k":[0,0],"ix":1},"s":{"a":0,"k":[100,100],"ix":3},"r":{"a":0,"k":0,"ix":6},"o":{"a":0,"k":100,"ix":7},"sk":{"a":0,"k":0,"ix":4},"sa":{"a":0,"k":0,"ix":5},"nm":"Transform"}],"nm":"N","np":3,"cix":2,"ix":1,"mn":"ADBE Vector Group","hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ix":1,"ks":{"a":0,"k":{"i":[[37.881,-16.664],[37.811,-21.644],[37.089999999999996,-25.072000000000003],[34.638,-27.811999999999998],[30.843,-29.057],[26.309,-28.793],[22.937,-26.982],[21.097,-23.926],[20.76,-19.523000000000003],[20.76,-11.795],[20.830000000000002,-6.8149999999999995],[21.551000000000002,-3.387],[24.003,-0.647],[27.796,0.598],[32.332,0.334],[35.70399999999999,-1.4769999999999999],[37.544000000000004,-4.532],[37.881,-8.936]],"o":[[37.881,-19.594],[37.529,-23.988],[35.614,-27.064],[32.211999999999996,-28.808],[27.714,-29.057],[23.895,-27.738999999999997],[21.519000000000002,-25.014000000000003],[20.826999999999998,-21.546999999999997],[20.76,-16.664],[20.76,-8.865],[21.112,-4.471],[23.027,-1.395],[26.429000000000002,0.349],[30.925,0.598],[34.746,-0.72],[37.122,-3.445],[37.814,-6.9110000000000005],[37.881,-11.795]],"v":[[37.881,-16.664],[37.67,-22.816],[36.352,-26.068],[33.425,-28.31],[29.32,-29.057],[25.102,-28.266],[22.228,-25.998],[20.962,-22.737],[20.76,-16.664],[20.76,-11.795],[20.971,-5.643],[22.289,-2.391],[25.216,-0.149],[29.32,0.598],[33.539,-0.193],[36.413,-2.461],[37.679,-5.722],[37.881,-11.795]],"c":true},"ix":2},"nm":"O","mn":"ADBE Vector Shape - Group","hd":false},{"ind":1,"ty":"sh","ix":2,"ks":{"a":0,"k":{"i":[[30.48,-8.068],[30.410999999999998,-5.332],[29.807000000000002,-4.219],[28.497,-4.412000000000001],[28.16,-6.205],[28.16,-21.146],[28.263,-23.493000000000002],[28.968,-24.24],[30.114,-24.056],[30.48,-22.470000000000002]],"o":[[30.48,-6.3919999999999995],[30.129,-4.441999999999999],[28.811,-4.219],[28.226999999999997,-5.186],[28.16,-7.857],[28.16,-22.611],[28.673,-24.091],[29.812,-24.24],[30.407,-23.318],[30.48,-21.146]],"v":[[30.48,-8.068],[30.27,-4.887],[29.303,-4.219],[28.362,-4.799],[28.16,-7.857],[28.16,-21.146],[28.468,-23.792],[29.355,-24.24],[30.261,-23.687],[30.48,-21.146]],"c":true},"ix":2},"nm":"O","mn":"ADBE Vector Shape - Group","hd":false},{"ty":"mm","mm":1,"nm":"Merge Paths 1","mn":"ADBE Vector Filter - Merge","hd":false},{"ty":"st","c":{"a":0,"k":[0,0,0,1],"ix":3},"o":{"a":0,"k":100,"ix":4},"w":{"a":0,"k":2,"ix":5},"lc":1,"lj":1,"ml":4,"ml2":{"a":0,"k":4,"ix":8},"nm":"Stroke 1","mn":"ADBE Vector Graphic - Stroke","hd":true},{"ty":"fl","c":{"a":0,"k":[0.92,0.92,0.92,1],"ix":4},"o":{"a":0,"k":100,"ix":5},"r":1,"nm":"Fill 1","mn":"ADBE Vector Graphic - Fill","hd":false},{"ty":"tr","p":{"a":0,"k":[0,0],"ix":2},"a":{"a":0,"k":[0,0],"ix":1},"s":{"a":0,"k":[100,100],"ix":3},"r":{"a":0,"k":0,"ix":6},"o":{"a":0,"k":100,"ix":7},"sk":{"a":0,"k":0,"ix":4},"sa":{"a":0,"k":0,"ix":5},"nm":"Transform"}],"nm":"O","np":5,"cix":2,"ix":2,"mn":"ADBE Vector Group","hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ix":1,"ks":{"a":0,"k":{"i":[[46.986,0],[56.32,0],[59.414,-0.09699999999999998],[61.910000000000004,-0.8240000000000001],[63.525,-2.4699999999999998],[64.125,-5.9],[64.125,-18.422],[64.04299999999999,-22.924999999999997],[63.304,-25.6],[61.034,-27.639],[56.097,-28.459],[46.986,-28.459]],"o":[[46.986,0],[58.089,0],[61.172,-0.483],[63.106,-1.796],[64.005,-4.192],[64.125,-8.455],[64.125,-21.117],[63.714999999999996,-24.765],[61.992000000000004,-27.1],[58.514,-28.294999999999998],[52.523,-28.459],[46.986,-28.459]],"v":[[46.986,0],[56.32,0],[60.293,-0.29],[62.508,-1.31],[63.765,-3.331],[64.125,-8.455],[64.125,-18.422],[63.879,-23.845],[62.648,-26.35],[59.774,-27.967],[52.523,-28.459],[46.986,-28.459]],"c":true},"ix":2},"nm":"D","mn":"ADBE Vector Shape - Group","hd":false},{"ind":1,"ty":"sh","ix":2,"ks":{"a":0,"k":{"i":[[55.746,-23.505000000000003],[56.519999999999996,-22.898],[56.725,-21.346],[56.725,-9],[56.601,-5.939],[55.453,-4.869],[54.387,-23.59]],"o":[[56.332,-23.165],[56.684,-22.172],[56.725,-20.057],[56.725,-7.102],[56.108999999999995,-5.083],[54.387,-4.869],[55.196,-23.59]],"v":[[56.039,-23.335],[56.602,-22.535],[56.725,-20.057],[56.725,-9],[56.355,-5.511],[54.387,-4.869],[54.387,-23.59]],"c":true},"ix":2},"nm":"D","mn":"ADBE Vector Shape - Group","hd":false},{"ty":"mm","mm":1,"nm":"Merge Paths 1","mn":"ADBE Vector Filter - Merge","hd":false},{"ty":"st","c":{"a":0,"k":[0,0,0,1],"ix":3},"o":{"a":0,"k":100,"ix":4},"w":{"a":0,"k":2,"ix":5},"lc":1,"lj":1,"ml":4,"ml2":{"a":0,"k":4,"ix":8},"nm":"Stroke 1","mn":"ADBE Vector Graphic - Stroke","hd":true},{"ty":"fl","c":{"a":0,"k":[0.92,0.92,0.92,1],"ix":4},"o":{"a":0,"k":100,"ix":5},"r":1,"nm":"Fill 1","mn":"ADBE Vector Graphic - Fill","hd":false},{"ty":"tr","p":{"a":0,"k":[0,0],"ix":2},"a":{"a":0,"k":[0,0],"ix":1},"s":{"a":0,"k":[100,100],"ix":3},"r":{"a":0,"k":0,"ix":6},"o":{"a":0,"k":100,"ix":7},"sk":{"a":0,"k":0,"ix":4},"sa":{"a":0,"k":0,"ix":5},"nm":"Transform"}],"nm":"D","np":5,"cix":2,"ix":3,"mn":"ADBE Vector Group","hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ix":1,"ks":{"a":0,"k":{"i":[[68.981,-28.459],[65.206,0],[72.857,0],[73.302,-5.115],[75.949,-5.115],[76.346,0],[83.909,0],[79.677,-28.459]],"o":[[68.981,-28.459],[65.206,0],[72.857,0],[73.302,-5.115],[75.949,-5.115],[76.346,0],[83.909,0],[79.677,-28.459]],"v":[[68.981,-28.459],[65.206,0],[72.857,0],[73.302,-5.115],[75.949,-5.115],[76.346,0],[83.909,0],[79.677,-28.459]],"c":true},"ix":2},"nm":"A","mn":"ADBE Vector Shape - Group","hd":false},{"ind":1,"ty":"sh","ix":2,"ks":{"a":0,"k":{"i":[[73.208,-10.16],[73.874,-16.664],[75.38,-13.383]],"o":[[73.401,-12.679],[75.005,-17.366999999999997],[75.755,-10.16]],"v":[[73.208,-10.16],[74.628,-22.113],[75.755,-10.16]],"c":true},"ix":2},"nm":"A","mn":"ADBE Vector Shape - Group","hd":false},{"ty":"mm","mm":1,"nm":"Merge Paths 1","mn":"ADBE Vector Filter - Merge","hd":false},{"ty":"st","c":{"a":0,"k":[0,0,0,1],"ix":3},"o":{"a":0,"k":100,"ix":4},"w":{"a":0,"k":2,"ix":5},"lc":1,"lj":1,"ml":4,"ml2":{"a":0,"k":4,"ix":8},"nm":"Stroke 1","mn":"ADBE Vector Graphic - Stroke","hd":true},{"ty":"fl","c":{"a":0,"k":[0.92,0.92,0.92,1],"ix":4},"o":{"a":0,"k":100,"ix":5},"r":1,"nm":"Fill 1","mn":"ADBE Vector Graphic - Fill","hd":false},{"ty":"tr","p":{"a":0,"k":[0,0],"ix":2},"a":{"a":0,"k":[0,0],"ix":1},"s":{"a":0,"k":[100,100],"ix":3},"r":{"a":0,"k":0,"ix":6},"o":{"a":0,"k":100,"ix":7},"sk":{"a":0,"k":0,"ix":4},"sa":{"a":0,"k":0,"ix":5},"nm":"Transform"}],"nm":"A","np":5,"cix":2,"ix":4,"mn":"ADBE Vector Group","hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ix":1,"ks":{"a":0,"k":{"i":[[81.914,-28.459],[81.914,-22.764],[86.291,-22.764],[86.291,0],[93.691,0],[93.691,-22.764],[98.086,-22.764],[98.086,-28.459]],"o":[[81.914,-28.459],[81.914,-22.764],[86.291,-22.764],[86.291,0],[93.691,0],[93.691,-22.764],[98.086,-22.764],[98.086,-28.459]],"v":[[81.914,-28.459],[81.914,-22.764],[86.291,-22.764],[86.291,0],[93.691,0],[93.691,-22.764],[98.086,-22.764],[98.086,-28.459]],"c":true},"ix":2},"nm":"T","mn":"ADBE Vector Shape - Group","hd":false},{"ty":"st","c":{"a":0,"k":[0,0,0,1],"ix":3},"o":{"a":0,"k":100,"ix":4},"w":{"a":0,"k":2,"ix":5},"lc":1,"lj":1,"ml":4,"ml2":{"a":0,"k":4,"ix":8},"nm":"Stroke 1","mn":"ADBE Vector Graphic - Stroke","hd":true},{"ty":"fl","c":{"a":0,"k":[0.92,0.92,0.92,1],"ix":4},"o":{"a":0,"k":100,"ix":5},"r":1,"nm":"Fill 1","mn":"ADBE Vector Graphic - Fill","hd":false},{"ty":"tr","p":{"a":0,"k":[0,0],"ix":2},"a":{"a":0,"k":[0,0],"ix":1},"s":{"a":0,"k":[100,100],"ix":3},"r":{"a":0,"k":0,"ix":6},"o":{"a":0,"k":100,"ix":7},"sk":{"a":0,"k":0,"ix":4},"sa":{"a":0,"k":0,"ix":5},"nm":"Transform"}],"nm":"T","np":3,"cix":2,"ix":5,"mn":"ADBE Vector Group","hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ix":1,"ks":{"a":0,"k":{"i":[[100.428,-28.459],[96.653,0],[104.304,0],[104.749,-5.115],[107.396,-5.115],[107.793,0],[115.356,0],[111.125,-28.459]],"o":[[100.428,-28.459],[96.653,0],[104.304,0],[104.749,-5.115],[107.396,-5.115],[107.793,0],[115.356,0],[111.125,-28.459]],"v":[[100.428,-28.459],[96.653,0],[104.304,0],[104.749,-5.115],[107.396,-5.115],[107.793,0],[115.356,0],[111.125,-28.459]],"c":true},"ix":2},"nm":"A","mn":"ADBE Vector Shape - Group","hd":false},{"ind":1,"ty":"sh","ix":2,"ks":{"a":0,"k":{"i":[[104.655,-10.16],[105.321,-16.664],[106.827,-13.383]],"o":[[104.848,-12.679],[106.452,-17.366999999999997],[107.202,-10.16]],"v":[[104.655,-10.16],[106.075,-22.113],[107.202,-10.16]],"c":true},"ix":2},"nm":"A","mn":"ADBE Vector Shape - Group","hd":false},{"ty":"mm","mm":1,"nm":"Merge Paths 1","mn":"ADBE Vector Filter - Merge","hd":false},{"ty":"st","c":{"a":0,"k":[0,0,0,1],"ix":3},"o":{"a":0,"k":100,"ix":4},"w":{"a":0,"k":2,"ix":5},"lc":1,"lj":1,"ml":4,"ml2":{"a":0,"k":4,"ix":8},"nm":"Stroke 1","mn":"ADBE Vector Graphic - Stroke","hd":true},{"ty":"fl","c":{"a":0,"k":[0.92,0.92,0.92,1],"ix":4},"o":{"a":0,"k":100,"ix":5},"r":1,"nm":"Fill 1","mn":"ADBE Vector Graphic - Fill","hd":false},{"ty":"tr","p":{"a":0,"k":[0,0],"ix":2},"a":{"a":0,"k":[0,0],"ix":1},"s":{"a":0,"k":[100,100],"ix":3},"r":{"a":0,"k":0,"ix":6},"o":{"a":0,"k":100,"ix":7},"sk":{"a":0,"k":0,"ix":4},"sa":{"a":0,"k":0,"ix":5},"nm":"Transform"}],"nm":"A","np":5,"cix":2,"ix":6,"mn":"ADBE Vector Group","hd":false}],"ip":35.0000014255792,"op":106.000004317469,"st":25.0000010182709,"bm":0,"completed":true}],"markers":[],"__complete":true}
|
metadata.json
ADDED
@@ -0,0 +1,267 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"tables": {
|
3 |
+
"brands": {
|
4 |
+
"columns": {
|
5 |
+
"brand_id": {
|
6 |
+
"sdtype": "id"
|
7 |
+
},
|
8 |
+
"brand_name": {
|
9 |
+
"sdtype": "unknown",
|
10 |
+
"pii": true
|
11 |
+
}
|
12 |
+
},
|
13 |
+
"primary_key": "brand_id"
|
14 |
+
},
|
15 |
+
"categories": {
|
16 |
+
"columns": {
|
17 |
+
"category_id": {
|
18 |
+
"sdtype": "id"
|
19 |
+
},
|
20 |
+
"category_name": {
|
21 |
+
"sdtype": "unknown",
|
22 |
+
"pii": true
|
23 |
+
}
|
24 |
+
},
|
25 |
+
"primary_key": "category_id"
|
26 |
+
},
|
27 |
+
"customers": {
|
28 |
+
"columns": {
|
29 |
+
"customer_id": {
|
30 |
+
"sdtype": "id"
|
31 |
+
},
|
32 |
+
"first_name": {
|
33 |
+
"sdtype": "first_name",
|
34 |
+
"pii": true
|
35 |
+
},
|
36 |
+
"last_name": {
|
37 |
+
"sdtype": "last_name",
|
38 |
+
"pii": true
|
39 |
+
},
|
40 |
+
"phone": {
|
41 |
+
"sdtype": "categorical"
|
42 |
+
},
|
43 |
+
"email": {
|
44 |
+
"sdtype": "email"
|
45 |
+
},
|
46 |
+
"street": {
|
47 |
+
"sdtype": "unknown",
|
48 |
+
"pii": true
|
49 |
+
},
|
50 |
+
"city": {
|
51 |
+
"sdtype": "city",
|
52 |
+
"pii": true
|
53 |
+
},
|
54 |
+
"state": {
|
55 |
+
"sdtype": "administrative_unit",
|
56 |
+
"pii": true
|
57 |
+
},
|
58 |
+
"zip_code": {
|
59 |
+
"sdtype": "postcode",
|
60 |
+
"pii": true
|
61 |
+
}
|
62 |
+
},
|
63 |
+
"primary_key": "customer_id"
|
64 |
+
},
|
65 |
+
"orders": {
|
66 |
+
"columns": {
|
67 |
+
"order_id": {
|
68 |
+
"sdtype": "id"
|
69 |
+
},
|
70 |
+
"customer_id": {
|
71 |
+
"sdtype": "id"
|
72 |
+
},
|
73 |
+
"order_status": {
|
74 |
+
"sdtype": "categorical"
|
75 |
+
},
|
76 |
+
"order_date": {
|
77 |
+
"sdtype": "datetime",
|
78 |
+
"datetime_format": "%Y-%m-%d"
|
79 |
+
},
|
80 |
+
"required_date": {
|
81 |
+
"sdtype": "datetime",
|
82 |
+
"datetime_format": "%Y-%m-%d"
|
83 |
+
},
|
84 |
+
"shipped_date": {
|
85 |
+
"sdtype": "datetime",
|
86 |
+
"datetime_format": "%Y-%m-%d"
|
87 |
+
},
|
88 |
+
"store_id": {
|
89 |
+
"sdtype": "categorical"
|
90 |
+
},
|
91 |
+
"staff_id": {
|
92 |
+
"sdtype": "id"
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"primary_key": "order_id"
|
96 |
+
},
|
97 |
+
"order_items": {
|
98 |
+
"columns": {
|
99 |
+
"order_id": {
|
100 |
+
"sdtype": "id"
|
101 |
+
},
|
102 |
+
"item_id": {
|
103 |
+
"sdtype": "categorical"
|
104 |
+
},
|
105 |
+
"product_id": {
|
106 |
+
"sdtype": "id"
|
107 |
+
},
|
108 |
+
"quantity": {
|
109 |
+
"sdtype": "categorical"
|
110 |
+
},
|
111 |
+
"list_price": {
|
112 |
+
"sdtype": "numerical"
|
113 |
+
},
|
114 |
+
"discount": {
|
115 |
+
"sdtype": "numerical"
|
116 |
+
}
|
117 |
+
}
|
118 |
+
},
|
119 |
+
"products": {
|
120 |
+
"columns": {
|
121 |
+
"product_id": {
|
122 |
+
"sdtype": "id"
|
123 |
+
},
|
124 |
+
"product_name": {
|
125 |
+
"sdtype": "unknown",
|
126 |
+
"pii": true
|
127 |
+
},
|
128 |
+
"brand_id": {
|
129 |
+
"sdtype": "id"
|
130 |
+
},
|
131 |
+
"category_id": {
|
132 |
+
"sdtype": "id"
|
133 |
+
},
|
134 |
+
"model_year": {
|
135 |
+
"sdtype": "categorical"
|
136 |
+
},
|
137 |
+
"list_price": {
|
138 |
+
"sdtype": "numerical"
|
139 |
+
}
|
140 |
+
},
|
141 |
+
"primary_key": "product_id"
|
142 |
+
},
|
143 |
+
"staffs": {
|
144 |
+
"columns": {
|
145 |
+
"staff_id": {
|
146 |
+
"sdtype": "id"
|
147 |
+
},
|
148 |
+
"first_name": {
|
149 |
+
"sdtype": "first_name",
|
150 |
+
"pii": true
|
151 |
+
},
|
152 |
+
"last_name": {
|
153 |
+
"sdtype": "last_name",
|
154 |
+
"pii": true
|
155 |
+
},
|
156 |
+
"email": {
|
157 |
+
"sdtype": "email"
|
158 |
+
},
|
159 |
+
"phone": {
|
160 |
+
"sdtype": "unknown",
|
161 |
+
"pii": true
|
162 |
+
},
|
163 |
+
"active": {
|
164 |
+
"sdtype": "categorical"
|
165 |
+
},
|
166 |
+
"store_id": {
|
167 |
+
"sdtype": "numerical"
|
168 |
+
},
|
169 |
+
"manager_id": {
|
170 |
+
"sdtype": "numerical"
|
171 |
+
}
|
172 |
+
},
|
173 |
+
"primary_key": "staff_id"
|
174 |
+
},
|
175 |
+
"stocks": {
|
176 |
+
"columns": {
|
177 |
+
"store_id": {
|
178 |
+
"sdtype": "categorical"
|
179 |
+
},
|
180 |
+
"product_id": {
|
181 |
+
"sdtype": "id"
|
182 |
+
},
|
183 |
+
"quantity": {
|
184 |
+
"sdtype": "numerical"
|
185 |
+
}
|
186 |
+
}
|
187 |
+
},
|
188 |
+
"stores": {
|
189 |
+
"columns": {
|
190 |
+
"store_id": {
|
191 |
+
"sdtype": "numerical"
|
192 |
+
},
|
193 |
+
"store_name": {
|
194 |
+
"sdtype": "categorical"
|
195 |
+
},
|
196 |
+
"phone": {
|
197 |
+
"sdtype": "categorical"
|
198 |
+
},
|
199 |
+
"email": {
|
200 |
+
"sdtype": "email",
|
201 |
+
"pii": true
|
202 |
+
},
|
203 |
+
"street": {
|
204 |
+
"sdtype": "categorical"
|
205 |
+
},
|
206 |
+
"city": {
|
207 |
+
"sdtype": "city",
|
208 |
+
"pii": true
|
209 |
+
},
|
210 |
+
"state": {
|
211 |
+
"sdtype": "administrative_unit",
|
212 |
+
"pii": true
|
213 |
+
},
|
214 |
+
"zip_code": {
|
215 |
+
"sdtype": "postcode",
|
216 |
+
"pii": true
|
217 |
+
}
|
218 |
+
},
|
219 |
+
"primary_key": "email"
|
220 |
+
}
|
221 |
+
},
|
222 |
+
"relationships": [
|
223 |
+
{
|
224 |
+
"parent_table_name": "brands",
|
225 |
+
"child_table_name": "products",
|
226 |
+
"parent_primary_key": "brand_id",
|
227 |
+
"child_foreign_key": "brand_id"
|
228 |
+
},
|
229 |
+
{
|
230 |
+
"parent_table_name": "categories",
|
231 |
+
"child_table_name": "products",
|
232 |
+
"parent_primary_key": "category_id",
|
233 |
+
"child_foreign_key": "category_id"
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"parent_table_name": "customers",
|
237 |
+
"child_table_name": "orders",
|
238 |
+
"parent_primary_key": "customer_id",
|
239 |
+
"child_foreign_key": "customer_id"
|
240 |
+
},
|
241 |
+
{
|
242 |
+
"parent_table_name": "orders",
|
243 |
+
"child_table_name": "order_items",
|
244 |
+
"parent_primary_key": "order_id",
|
245 |
+
"child_foreign_key": "order_id"
|
246 |
+
},
|
247 |
+
{
|
248 |
+
"parent_table_name": "products",
|
249 |
+
"child_table_name": "order_items",
|
250 |
+
"parent_primary_key": "product_id",
|
251 |
+
"child_foreign_key": "product_id"
|
252 |
+
},
|
253 |
+
{
|
254 |
+
"parent_table_name": "products",
|
255 |
+
"child_table_name": "stocks",
|
256 |
+
"parent_primary_key": "product_id",
|
257 |
+
"child_foreign_key": "product_id"
|
258 |
+
},
|
259 |
+
{
|
260 |
+
"parent_table_name": "staffs",
|
261 |
+
"child_table_name": "orders",
|
262 |
+
"parent_primary_key": "staff_id",
|
263 |
+
"child_foreign_key": "staff_id"
|
264 |
+
}
|
265 |
+
],
|
266 |
+
"METADATA_SPEC_VERSION": "MULTI_TABLE_V1"
|
267 |
+
}
|
tbl_dtl.csv
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Table Name,ETL Timestamp,Change Timestamp,ETL Filter
|
2 |
+
TPM,TPM._hoodie_commit_time,TPM.prev_updt_ts,"coalesce(OPERATION,'NA')<>'D'"
|
3 |
+
TPP,TPP._hoodie_commit_time,TPP.prev_updt_ts,"coalesce(OPERATION,'NA')<>'D'"
|
4 |
+
TSM,TSM._hoodie_commit_time,TSM.prev_updt_ts,"coalesce(OPERATION,'NA')<>'D'"
|
5 |
+
TRM,TRM._hoodie_commit_time,TRM.prev_updt_ts,"coalesce(OPERATION,'NA')<>'D'"
|
template.txt
ADDED
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import traceback
|
2 |
+
from pyspark.sql import SparkSession
|
3 |
+
from pyspark import SparkConf
|
4 |
+
from pyspark.sql.functions import col,regexp_replace, concat_ws, when, collect_list, lit, to_timestamp
|
5 |
+
from pyspark.sql.functions import year, month, date_format
|
6 |
+
from pyspark.sql import functions as F
|
7 |
+
from pyspark.sql.types import LongType,DecimalType,IntegerType,TimestampType,DoubleType
|
8 |
+
from pyspark.sql.functions import *
|
9 |
+
from pytz import timezone
|
10 |
+
from datetime import datetime,timedelta
|
11 |
+
from pyspark.sql.window import Window
|
12 |
+
import json
|
13 |
+
import sys
|
14 |
+
import logging
|
15 |
+
import datetime
|
16 |
+
import time
|
17 |
+
import os
|
18 |
+
import psycopg2
|
19 |
+
import requests
|
20 |
+
from requests.auth import HTTPBasicAuth
|
21 |
+
import base64
|
22 |
+
import functools
|
23 |
+
import boto3
|
24 |
+
|
25 |
+
# adding '/home/hadoop' path of emr master instance as our downloaded packages will be present at this path
|
26 |
+
sys.path.append('/home/hadoop')
|
27 |
+
|
28 |
+
curr_time = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
|
29 |
+
log_file_name = 'job_' + str(datetime.datetime.now().strftime('%Y%m%d_%H%M%S_%f')) + '.log'
|
30 |
+
extra = {'log_file_name': log_file_name}
|
31 |
+
logger = logging.getLogger(__name__)
|
32 |
+
syslog = logging.FileHandler(log_file_name, mode='w')
|
33 |
+
formatter = logging.Formatter('%(log_file_name)s;%(asctime)s;%(levelname)s;%(message)s')
|
34 |
+
syslog.setFormatter(formatter)
|
35 |
+
logger.setLevel(logging.INFO)
|
36 |
+
logger.addHandler(syslog)
|
37 |
+
logger = logging.LoggerAdapter(logger, extra)
|
38 |
+
|
39 |
+
def read_config(config_path):
|
40 |
+
logger.info("Inside read config")
|
41 |
+
try:
|
42 |
+
# checking if config path provided as input is s3 path or file system path
|
43 |
+
if config_path[0:2] == 's3':
|
44 |
+
# read config file from s3
|
45 |
+
logger.info("Reading config file from S3")
|
46 |
+
s3 = boto3.resource('s3')
|
47 |
+
file_object = s3.Object(config_path.split('/')[2], '/'.join(config_path.split('/')[3:]))
|
48 |
+
file_content = file_object.get()['Body'].read().decode('utf-8')
|
49 |
+
# converting file content to json format
|
50 |
+
json_content = json.loads(file_content)
|
51 |
+
json_object = json.dumps(json_content)
|
52 |
+
else:
|
53 |
+
# reading config file from system
|
54 |
+
logger.info("Reading config file from path : " + config_path)
|
55 |
+
# converting file content to json format
|
56 |
+
json_content = json.load(open(config_path, 'r'))
|
57 |
+
json_object = json.dumps(json_content)
|
58 |
+
logger.info("Input Config Details:")
|
59 |
+
logger.info(json_object)
|
60 |
+
return json_content
|
61 |
+
except Exception as e:
|
62 |
+
raise Exception("Error reading config.")
|
63 |
+
|
64 |
+
def get_secret(secret):
|
65 |
+
secret_name = secret
|
66 |
+
region_name = "ap-south-1"
|
67 |
+
|
68 |
+
session = boto3.session.Session()
|
69 |
+
client = session.client(
|
70 |
+
service_name='secretsmanager',
|
71 |
+
region_name=region_name,
|
72 |
+
)
|
73 |
+
|
74 |
+
try:
|
75 |
+
get_secret_value_response = client.get_secret_value(SecretId=secret_name)
|
76 |
+
except ClientError as e:
|
77 |
+
if e.response['Error']['Code'] == 'ResourceNotFoundException':
|
78 |
+
print("The requested secret " + secret_name + " was not found")
|
79 |
+
elif e.response['Error']['Code'] == 'InvalidRequestException':
|
80 |
+
print("The request was invalid due to:", e)
|
81 |
+
elif e.response['Error']['Code'] == 'InvalidParameterException':
|
82 |
+
print("The request had invalid params:", e)
|
83 |
+
elif e.response['Error']['Code'] == 'DecryptionFailure':
|
84 |
+
print("The requested secret can't be decrypted using the provided KMS key:", e)
|
85 |
+
elif e.response['Error']['Code'] == 'InternalServiceError':
|
86 |
+
print("An error occurred on service side:", e)
|
87 |
+
else:
|
88 |
+
# Secrets Manager decrypts the secret value using the associated KMS CMK
|
89 |
+
# Depending on whether the secret was a string or binary, only one of these fields will be populated
|
90 |
+
if 'SecretString' in get_secret_value_response:
|
91 |
+
text_secret_data = get_secret_value_response['SecretString']
|
92 |
+
return text_secret_data
|
93 |
+
else:
|
94 |
+
binary_secret_data = get_secret_value_response['SecretBinary']
|
95 |
+
return binary_secret_data
|
96 |
+
logger.info("Secret manager read complete")
|
97 |
+
|
98 |
+
def create_spark_session(config):
|
99 |
+
logger.info("Inside create spark session")
|
100 |
+
try:
|
101 |
+
conf = SparkConf()
|
102 |
+
|
103 |
+
# setting spark configuration properties provided in config file
|
104 |
+
spark_conf = dict(config['spark_properties'])
|
105 |
+
for key in spark_conf.keys():
|
106 |
+
conf.set(key, spark_conf[key])
|
107 |
+
logger.info("Secret manager read")
|
108 |
+
if 'application_name' in list(config.keys()):
|
109 |
+
if config['application_name'] != '':
|
110 |
+
app_name = config['application_name']
|
111 |
+
else:
|
112 |
+
app_name = 'DefaultApp'
|
113 |
+
else:
|
114 |
+
app_name = 'DefaultApp'
|
115 |
+
logger.info("Secret manager read start")
|
116 |
+
# creating spark session
|
117 |
+
spark = SparkSession.builder.config(conf=conf).appName(app_name).enableHiveSupport().getOrCreate()
|
118 |
+
spark.sparkContext.setLogLevel("ERROR")
|
119 |
+
spark.conf.set("spark.sql.autoBroadcastJoinThreshold",-1)
|
120 |
+
spark.conf.set("spark.sql.legacy.parquet.datetimeRebaseModeInRead",'LEGACY')
|
121 |
+
spark.conf.set("spark.sql.legacy.timeParserPolicy",'CORRECTED')
|
122 |
+
spark.conf.set("spark.sql.legacy.parquet.int96RebaseModeInWrite",'CORRECTED')
|
123 |
+
spark.conf.set("spark.sql.legacy.parquet.datetimeRebaseModeInWrite",'CORRECTED')
|
124 |
+
spark.conf.set("spark.sql.legacy.parquet.int96RebaseModeInRead",'CORRECTED')
|
125 |
+
spark.conf.set("spark.sql.shuffle.partitions",100)
|
126 |
+
logger.info("Spark session object created")
|
127 |
+
return spark
|
128 |
+
except Exception as e:
|
129 |
+
raise Exception("Error in Spark Session Creation.")
|
130 |
+
|
131 |
+
def read_file(spark,config,table):
|
132 |
+
readOptions = {
|
133 |
+
'hoodie.datasource.query.type': 'incremental',
|
134 |
+
'hoodie.datasource.hive_sync.support_timestamp': 'true'
|
135 |
+
}
|
136 |
+
path = config['Paths'][table]
|
137 |
+
df=spark.read.format("hudi").load(path)
|
138 |
+
df =df.withColumn('_hoodie_commit_time',to_timestamp(F.concat(F.substring(col('_hoodie_commit_time'),1,4),F.lit('-'),\
|
139 |
+
F.substring(col('_hoodie_commit_time'),5,2),F.lit('-'),\
|
140 |
+
F.substring(col('_hoodie_commit_time'),7,2),F.lit(' '),\
|
141 |
+
F.substring(col('_hoodie_commit_time'),9,2),F.lit(':'),\
|
142 |
+
F.substring(col('_hoodie_commit_time'),11,2),F.lit(':'),\
|
143 |
+
F.substring(col('_hoodie_commit_time'),13,2)\
|
144 |
+
)))
|
145 |
+
return df
|
146 |
+
|
147 |
+
def get_max_audit_batch(conn,job_name, config):
|
148 |
+
cur = conn.cursor()
|
149 |
+
cur.execute("SELECT COALESCE(MAX(COALESCE(BATCH_ID,0)),0)+1 FROM "+config['audit_table'])
|
150 |
+
result = cur.fetchall()[0][0]
|
151 |
+
logger.info("Maximum batch id in Audit Table is :"+str(result))
|
152 |
+
return result
|
153 |
+
|
154 |
+
def read_max_update_date(conn, job_name, table, config):
|
155 |
+
try:
|
156 |
+
cur = conn.cursor()
|
157 |
+
cur.execute("SELECT MAX(max_update_date) from "+config['audit_table']+" WHERE mart_table_name = '"+job_name+"' AND src_table_name = '"+table+"'")
|
158 |
+
query_results = cur.fetchall()
|
159 |
+
except Exception as e:
|
160 |
+
print("Database connection failed due to {}".format(e))
|
161 |
+
raise Exception("Error reading audit table.")
|
162 |
+
return query_results
|
163 |
+
logger.info("Reading max of max_update_date from audit table complete")
|
164 |
+
|
165 |
+
def insert_max_update_date(spark,conn, job_name, table, max_update_date,source_reference_date, max_batch_id, config):
|
166 |
+
try:
|
167 |
+
cur = conn.cursor()
|
168 |
+
cur.execute("INSERT INTO "+config['audit_table']+"(mart_table_name, src_table_name, max_update_date, load_timestamp,source_reference_date,batch_id) VALUES ('"+str(job_name)+"', '"+str(table)+"', '"+str(max_update_date)+"', SYSDATE ,'"+str(source_reference_date)+"' as source_reference_date,cast('"+str(max_batch_id)+"' as int) as batch_id)")
|
169 |
+
|
170 |
+
except Exception as e:
|
171 |
+
print("Database connection failed due to {}".format(e))
|
172 |
+
raise Exception("Error Updating audit table.")
|
173 |
+
logger.info("Inserting max max_update_date into audit table complete")
|
174 |
+
|
175 |
+
def write_file(spark,conn,redshift_iam_role,resultdf_path, config, table_name):
|
176 |
+
#Writing resultant data into incr table using copy command
|
177 |
+
logger.info("write data to redshift started")
|
178 |
+
try:
|
179 |
+
cur = conn.cursor()
|
180 |
+
cur.execute(f"""Truncate table int.{table_name};commit;""" )
|
181 |
+
sql="""COPY %s FROM '%s' credentials 'aws_iam_role=%s' FORMAT PARQUET; commit;""" % \
|
182 |
+
(f"int.{table_name}", resultdf_path,redshift_iam_role)
|
183 |
+
cur.execute(sql)
|
184 |
+
|
185 |
+
except Exception as e:
|
186 |
+
print("Database connection failed due to {}".format(e))
|
187 |
+
raise Exception("Error Inserting target table.")
|
188 |
+
print("write complete")
|
189 |
+
logger.info("upsert data to rds completed")
|
190 |
+
|
191 |
+
def main():
|
192 |
+
logger.info("Inside main function")
|
193 |
+
if len(sys.argv) != 2:
|
194 |
+
logger.info(len(sys.argv))
|
195 |
+
logger.info("Command line arguments : " + str(sys.argv))
|
196 |
+
logger.info("Incorrect command line arguments.")
|
197 |
+
exit(1)
|
198 |
+
|
199 |
+
config = {}
|
200 |
+
spark = ''
|
201 |
+
job_status = ''
|
202 |
+
|
203 |
+
try:
|
204 |
+
# reading json config file
|
205 |
+
logger.info("Calling function to read config file")
|
206 |
+
config = read_config(sys.argv[1])
|
207 |
+
logger.info("Calling function to create Spark session object")
|
208 |
+
#creating spark session
|
209 |
+
spark = create_spark_session(config)
|
210 |
+
logger.info("Calling function to read input file")
|
211 |
+
start_time = datetime.datetime.now(timezone("Asia/Kolkata")).strftime('%Y-%m-%d %H:%M:%S')
|
212 |
+
|
213 |
+
#creating redshift database connection
|
214 |
+
redshift_secret = get_secret(config['redshift_secret'])
|
215 |
+
redshift_secret = json.loads(redshift_secret)
|
216 |
+
redshift_user = redshift_secret['username']
|
217 |
+
redshift_pwd = redshift_secret['password']
|
218 |
+
redshift_host = redshift_secret['host']
|
219 |
+
redshift_port = str(redshift_secret['port'])
|
220 |
+
redshift_dbname = redshift_secret['dbname']
|
221 |
+
#creating database connection
|
222 |
+
redshift_conn=psycopg2.connect(dbname=redshift_dbname, host=redshift_host, port=redshift_port, user=redshift_user, password=redshift_pwd)
|
223 |
+
redshift_dburl = "jdbc:postgresql://"+redshift_host+":"+redshift_port+"/"+redshift_dbname
|
224 |
+
cur = redshift_conn.cursor()
|
225 |
+
max_batch_id = get_max_audit_batch(redshift_conn, config['application_name'], config)
|
226 |
+
|
227 |
+
INSERT_CODE_1
|
228 |
+
|
229 |
+
#writing from parquet to table in database
|
230 |
+
write_file(spark, redshift_conn, config['redshift_iam_role'],config['incr2df_path'],config, config['incr2df'])
|
231 |
+
write_file(spark, redshift_conn, config['redshift_iam_role'],config['resultdf_path'],config, config['resultdf'])
|
232 |
+
|
233 |
+
INSERT_CODE_2
|
234 |
+
|
235 |
+
print('Run Successful')
|
236 |
+
print('End of Code')
|
237 |
+
|
238 |
+
|
239 |
+
except Exception as e:
|
240 |
+
#job gets error
|
241 |
+
job_status = 'Failed'
|
242 |
+
print(e)
|
243 |
+
|
244 |
+
finally:
|
245 |
+
spark.catalog.clearCache()
|
246 |
+
redshift_conn.commit()
|
247 |
+
redshift_conn.close()
|
248 |
+
spark.stop()
|
249 |
+
|
250 |
+
|
251 |
+
if __name__ == "__main__":
|
252 |
+
# calling main function
|
253 |
+
logger.info("Calling main function")
|
254 |
+
main()
|
ydata_config.yml
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Title of the document
|
2 |
+
title: "Pandas Profiling Report"
|
3 |
+
|
4 |
+
# Metadata
|
5 |
+
dataset:
|
6 |
+
description: ""
|
7 |
+
creator: ""
|
8 |
+
author: ""
|
9 |
+
copyright_holder: ""
|
10 |
+
copyright_year: ""
|
11 |
+
url: ""
|
12 |
+
|
13 |
+
variables:
|
14 |
+
descriptions: {}
|
15 |
+
|
16 |
+
# infer dtypes
|
17 |
+
infer_dtypes: false
|
18 |
+
|
19 |
+
# Show the description at each variable (in addition to the overview tab)
|
20 |
+
show_variable_description: true
|
21 |
+
|
22 |
+
# Number of workers (0=multiprocessing.cpu_count())
|
23 |
+
pool_size: 0
|
24 |
+
|
25 |
+
# Show the progress bar
|
26 |
+
progress_bar: true
|
27 |
+
|
28 |
+
# Per variable type description settings
|
29 |
+
vars:
|
30 |
+
num:
|
31 |
+
quantiles:
|
32 |
+
- 0.05
|
33 |
+
- 0.25
|
34 |
+
- 0.5
|
35 |
+
- 0.75
|
36 |
+
- 0.95
|
37 |
+
skewness_threshold: 20
|
38 |
+
low_categorical_threshold: 5
|
39 |
+
# Set to zero to disable
|
40 |
+
chi_squared_threshold: 0.0
|
41 |
+
cat:
|
42 |
+
length: false
|
43 |
+
characters: false
|
44 |
+
words: false
|
45 |
+
cardinality_threshold: 50
|
46 |
+
n_obs: 5
|
47 |
+
# Set to zero to disable
|
48 |
+
chi_squared_threshold: 0.0
|
49 |
+
coerce_str_to_date: false
|
50 |
+
redact: false
|
51 |
+
histogram_largest: 10
|
52 |
+
stop_words: []
|
53 |
+
|
54 |
+
bool:
|
55 |
+
n_obs: 3
|
56 |
+
# string to boolean mapping dict
|
57 |
+
mappings:
|
58 |
+
t: true
|
59 |
+
f: false
|
60 |
+
yes: true
|
61 |
+
no: false
|
62 |
+
y: true
|
63 |
+
n: false
|
64 |
+
true: true
|
65 |
+
false: false
|
66 |
+
path:
|
67 |
+
active: false
|
68 |
+
file:
|
69 |
+
active: false
|
70 |
+
image:
|
71 |
+
active: false
|
72 |
+
exif: false
|
73 |
+
hash: false
|
74 |
+
url:
|
75 |
+
active: false
|
76 |
+
timeseries:
|
77 |
+
active: false
|
78 |
+
autocorrelation: 0.7
|
79 |
+
lags: [1, 7, 12, 24, 30]
|
80 |
+
significance: 0.05
|
81 |
+
pacf_acf_lag: 100
|
82 |
+
|
83 |
+
# Sort the variables. Possible values: "ascending", "descending" or null (leaves original sorting)
|
84 |
+
sort: null
|
85 |
+
|
86 |
+
# which diagrams to show
|
87 |
+
missing_diagrams:
|
88 |
+
bar: false
|
89 |
+
matrix: false
|
90 |
+
heatmap: false
|
91 |
+
|
92 |
+
correlations:
|
93 |
+
pearson:
|
94 |
+
calculate: false
|
95 |
+
warn_high_correlations: true
|
96 |
+
threshold: 0.9
|
97 |
+
spearman:
|
98 |
+
calculate: false
|
99 |
+
warn_high_correlations: false
|
100 |
+
threshold: 0.9
|
101 |
+
kendall:
|
102 |
+
calculate: false
|
103 |
+
warn_high_correlations: false
|
104 |
+
threshold: 0.9
|
105 |
+
phi_k:
|
106 |
+
calculate: false
|
107 |
+
warn_high_correlations: false
|
108 |
+
threshold: 0.9
|
109 |
+
cramers:
|
110 |
+
calculate: false
|
111 |
+
warn_high_correlations: true
|
112 |
+
threshold: 0.9
|
113 |
+
auto:
|
114 |
+
calculate: false
|
115 |
+
warn_high_correlations: true
|
116 |
+
threshold: 0.9
|
117 |
+
|
118 |
+
|
119 |
+
# Bivariate / Pairwise relations
|
120 |
+
interactions:
|
121 |
+
targets: []
|
122 |
+
continuous: false
|
123 |
+
|
124 |
+
# For categorical
|
125 |
+
categorical_maximum_correlation_distinct: 100
|
126 |
+
|
127 |
+
report:
|
128 |
+
precision: 10
|
129 |
+
|
130 |
+
# Plot-specific settings
|
131 |
+
plot:
|
132 |
+
# Image format (svg or png)
|
133 |
+
image_format: "svg"
|
134 |
+
dpi: 800
|
135 |
+
|
136 |
+
scatter_threshold: 1000
|
137 |
+
|
138 |
+
correlation:
|
139 |
+
cmap: 'RdBu'
|
140 |
+
bad: '#000000'
|
141 |
+
|
142 |
+
missing:
|
143 |
+
cmap: 'RdBu'
|
144 |
+
# Force labels when there are > 50 variables
|
145 |
+
force_labels: true
|
146 |
+
|
147 |
+
cat_frequency:
|
148 |
+
show: true # if false, the category frequency plot is turned off
|
149 |
+
type: 'bar' # options: 'bar', 'pie'
|
150 |
+
max_unique: 0
|
151 |
+
colors: null # use null for default or give a list of matplotlib recognised strings
|
152 |
+
|
153 |
+
histogram:
|
154 |
+
x_axis_labels: true
|
155 |
+
|
156 |
+
# Number of bins (set to 0 to automatically detect the bin size)
|
157 |
+
bins: 50
|
158 |
+
|
159 |
+
# Maximum number of bins (when bins=0)
|
160 |
+
max_bins: 250
|
161 |
+
|
162 |
+
font_path: null
|
163 |
+
|
164 |
+
# The number of observations to show
|
165 |
+
n_obs_unique: 5
|
166 |
+
n_extreme_obs: 5
|
167 |
+
n_freq_table_max: 10
|
168 |
+
|
169 |
+
# Use `deep` flag for memory_usage
|
170 |
+
memory_deep: false
|
171 |
+
|
172 |
+
# Configuration related to the duplicates
|
173 |
+
duplicates:
|
174 |
+
head: 0
|
175 |
+
key: "# duplicates"
|
176 |
+
|
177 |
+
# Configuration related to the samples area
|
178 |
+
samples:
|
179 |
+
head: 0
|
180 |
+
tail: 0
|
181 |
+
random: 0
|
182 |
+
|
183 |
+
# Configuration related to the rejection of variables
|
184 |
+
reject_variables: true
|
185 |
+
|
186 |
+
# When in a Jupyter notebook
|
187 |
+
notebook:
|
188 |
+
iframe:
|
189 |
+
height: '800px'
|
190 |
+
width: '100%'
|
191 |
+
# or 'src'
|
192 |
+
attribute: 'srcdoc'
|
193 |
+
|
194 |
+
html:
|
195 |
+
# Minify the html
|
196 |
+
minify_html: true
|
197 |
+
|
198 |
+
# Offline support
|
199 |
+
use_local_assets: true
|
200 |
+
|
201 |
+
# If true, single file, else directory with assets
|
202 |
+
inline: true
|
203 |
+
|
204 |
+
# Show navbar
|
205 |
+
navbar_show: true
|
206 |
+
|
207 |
+
# Assets prefix if inline = true
|
208 |
+
assets_prefix: null
|
209 |
+
|
210 |
+
# Styling options for the HTML report
|
211 |
+
style:
|
212 |
+
theme: null
|
213 |
+
logo: ""
|
214 |
+
primary_colors:
|
215 |
+
- "#377eb8"
|
216 |
+
- "#e41a1c"
|
217 |
+
- "#4daf4a"
|
218 |
+
|
219 |
+
full_width: false
|