vishanth10 commited on
Commit
8959c46
·
1 Parent(s): 61f09d1

New UI and resolved bugs

Browse files
Files changed (2) hide show
  1. .txt +298 -0
  2. app.py +180 -211
.txt ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import streamlit as st
2
+ # from carbon import Carbon
3
+ # import requests
4
+ # import json
5
+
6
+ # # Carbon API Key
7
+ # CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea"
8
+ # CUSTOMER_ID = "Candid"
9
+
10
+ # def get_google_drive_oauth(carbon):
11
+ # get_oauth_url_response = carbon.integrations.get_oauth_url(
12
+ # service="GOOGLE_DRIVE",
13
+ # scope="https://www.googleapis.com/auth/drive.readonly",
14
+ # connecting_new_account=True,
15
+ # )
16
+ # return get_oauth_url_response.oauth_url
17
+
18
+ # def get_dropbox_oauth(carbon):
19
+ # get_oauth_url_response = carbon.integrations.get_oauth_url(
20
+ # service="DROPBOX",
21
+ # connecting_new_account=True,
22
+ # )
23
+ # return get_oauth_url_response.oauth_url
24
+
25
+ # def get_notion_oauth(carbon):
26
+ # get_oauth_url_response = carbon.integrations.get_oauth_url(
27
+ # service="NOTION",
28
+ # connecting_new_account=True,
29
+ # )
30
+ # return get_oauth_url_response.oauth_url
31
+
32
+ # def sync_github(carbon, username, token):
33
+ # sync_response = carbon.integrations.sync_git_hub(
34
+ # username=username,
35
+ # token=token,
36
+ # sync_source_items=True
37
+ # )
38
+ # return sync_response
39
+
40
+ # def sync_gitbook(carbon, access_token, organization):
41
+ # sync_response = carbon.integrations.sync_git_book(
42
+ # access_token=access_token,
43
+ # organization=organization,
44
+ # sync_source_items=True
45
+ # )
46
+ # return sync_response
47
+
48
+ # def sync_s3(carbon, access_key, access_key_secret):
49
+ # sync_response = carbon.integrations.sync_s3(
50
+ # access_key=access_key,
51
+ # access_key_secret=access_key_secret,
52
+ # sync_source_items=True
53
+ # )
54
+ # return sync_response
55
+
56
+ # def sync_google_drive(carbon, data_source_id):
57
+ # sync_response = carbon.integrations.sync_data_source_items(data_source_id=int(data_source_id))
58
+ # return sync_response
59
+
60
+
61
+ # def list_files(carbon, data_source_id=None, service="GOOGLE_DRIVE"):
62
+ # if data_source_id:
63
+ # sync_google_drive(carbon, data_source_id)
64
+ # list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
65
+ # st.session_state['current_data_source'] = data_source_id # Store the current data source
66
+ # st.session_state['files'] = list_files_response.items # Store the fetched files
67
+ # else:
68
+ # if 'current_data_source' in st.session_state and st.session_state['current_data_source']:
69
+ # data_source_id = st.session_state['current_data_source']
70
+ # sync_google_drive(carbon, data_source_id)
71
+ # list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
72
+ # st.session_state['files'] = list_files_response.items # Store the fetched files
73
+ # else:
74
+ # query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
75
+ # pagination={"limit": 100, "offset": 0},
76
+ # order_by="created_at",
77
+ # order_dir="desc",
78
+ # filters={"source": service},
79
+ # )
80
+ # if query_user_data_sources_response.results:
81
+ # data_source_id = query_user_data_sources_response.results[0].id
82
+ # sync_google_drive(carbon, data_source_id)
83
+ # list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
84
+ # st.session_state['current_data_source'] = data_source_id
85
+ # st.session_state['files'] = list_files_response.items # Store the fetched files
86
+ # else:
87
+ # list_files_response = None
88
+ # return list_files_response.items if list_files_response else None
89
+
90
+ # def list_all_files(carbon, data_source_id):
91
+ # url = "https://api.carbon.ai/user_files_v2"
92
+ # payload = {
93
+ # "pagination": {
94
+ # "limit": 100,
95
+ # "offset": 0
96
+ # },
97
+ # "order_by": "created_at",
98
+ # "order_dir": "desc",
99
+ # "filters": {
100
+ # "organization_user_data_source_id": [data_source_id],
101
+ # "embedding_generators": ["OPENAI"],
102
+ # "include_all_children": True,
103
+ # },
104
+ # "include_raw_file": True,
105
+ # "include_parsed_text_file": True,
106
+ # "include_additional_files": True
107
+ # }
108
+ # headers = {
109
+ # "authorization": f"Bearer {CARBON_API_KEY}",
110
+ # "customer-id": CUSTOMER_ID,
111
+ # "Content-Type": "application/json"
112
+ # }
113
+
114
+ # response = requests.request("POST", url, json=payload, headers=headers)
115
+ # res = json.loads(response.text)
116
+ # file_id= res['results'][0]['id']
117
+ # for i,document in enumerate(res['results']):
118
+ # print(document['name'])
119
+ # print(document['id'])
120
+
121
+ # file_id=res['results'][0]['id']
122
+ # print(file_id)
123
+ # return res['results']
124
+
125
+
126
+ # def list_user_documents(carbon):
127
+ # query_user_documents_response = carbon.documents.query_documents(
128
+ # pagination={"limit": 100, "offset": 0},
129
+ # order_by="created_at",
130
+ # order_dir="desc"
131
+ # )
132
+ # return query_user_documents_response.documents if query_user_documents_response else None
133
+
134
+ # def semantic_search_v2(carbon, query, tags_v2=None, hybrid_search=False):
135
+ # search_response = carbon.embeddings.get_documents(
136
+ # query=query,
137
+ # k=2,
138
+ # tags_v2=tags_v2 if tags_v2 else {},
139
+ # include_tags=True,
140
+ # include_vectors=True,
141
+ # include_raw_file=True,
142
+ # hybrid_search=hybrid_search,
143
+ # hybrid_search_tuning_parameters={
144
+ # "weight_a": 0.5,
145
+ # "weight_b": 0.5,
146
+ # },
147
+ # media_type="TEXT",
148
+ # embedding_model="OPENAI",
149
+ # )
150
+ # return search_response.documents
151
+
152
+
153
+ # def main():
154
+ # st.title('Data Connector using Carbon SDK')
155
+
156
+ # # Authenticate with Carbon API
157
+ # st.write('### Authenticate with Carbon API')
158
+ # carbon = Carbon(api_key=CARBON_API_KEY, customer_id=CUSTOMER_ID)
159
+ # token = carbon.auth.get_access_token()
160
+ # carbon = Carbon(access_token=token.access_token) # authenticated object
161
+
162
+ # # Connect to Data Source
163
+ # st.write('## Connect to Data Source')
164
+ # service = st.selectbox('Select Data Source for OAuth', ['GOOGLE_DRIVE', 'DROPBOX', 'NOTION'])
165
+ # if st.button('Get OAuth URL'):
166
+ # with st.spinner('Fetching OAuth URL...'):
167
+ # try:
168
+ # if service == "GOOGLE_DRIVE":
169
+ # oauth_url = get_google_drive_oauth(carbon)
170
+ # elif service == "DROPBOX":
171
+ # oauth_url = get_dropbox_oauth(carbon)
172
+ # elif service == "NOTION":
173
+ # oauth_url = get_notion_oauth(carbon)
174
+ # st.write(f"OAuth URL for {service}: {oauth_url}")
175
+ # st.session_state['current_data_source'] = None # Reset the current data source
176
+ # st.session_state['files'] = None # Clear the previous files
177
+ # st.session_state['oauth_fetched'] = True
178
+ # except Exception as e:
179
+ # st.error(f"An error occurred: {e}")
180
+
181
+ # if 'oauth_fetched' in st.session_state and st.session_state['oauth_fetched']:
182
+ # st.write("OAuth URL fetched. Please authenticate and then click 'Sync and Fetch Files'.")
183
+ # if st.button('Sync and Fetch Files'):
184
+ # with st.spinner('Syncing and fetching files...'):
185
+ # try:
186
+ # query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
187
+ # pagination={"limit": 100, "offset": 0},
188
+ # order_by="created_at",
189
+ # order_dir="desc",
190
+ # filters={"source": service},
191
+ # )
192
+ # if query_user_data_sources_response.results:
193
+ # data_source_id = query_user_data_sources_response.results[0].id
194
+ # sync_google_drive(carbon, data_source_id)
195
+ # st.session_state['current_data_source'] = data_source_id
196
+ # st.session_state['oauth_fetched'] = False
197
+ # st.success("Synced successfully! Now you can list the files.")
198
+ # else:
199
+ # st.error("No data sources found. Please ensure the connection was successful.")
200
+ # except Exception as e:
201
+ # st.error(f"An error occurred: {e}")
202
+
203
+ # # List Files in Data Source
204
+ # st.write(f'## List Files in {service}')
205
+ # data_source_id = st.text_input('Enter Data Source ID (leave blank to list all files)')
206
+ # if st.button('List Files'):
207
+ # with st.spinner('Fetching files...'):
208
+ # try:
209
+ # if 'current_data_source' in st.session_state and st.session_state['current_data_source']:
210
+ # data_source_id = st.session_state['current_data_source']
211
+ # files = list_files(carbon, data_source_id if data_source_id else None, service)
212
+ # if files:
213
+ # st.write(f"Files in {service}:")
214
+ # for item in files:
215
+ # st.write(f"File ID: {item.id}, File Name: {item.name}, File Size: {item.size if hasattr(item, 'size') else 'N/A'}, Last Modified: {item.last_modified if hasattr(item, 'last_modified') else 'N/A'}")
216
+ # else:
217
+ # st.write("No files found.")
218
+ # except Exception as e:
219
+ # st.error(f"An error occurred: {e}")
220
+
221
+ # # List All Files
222
+ # st.write('### List All Files')
223
+ # if st.button('List All Files'):
224
+ # with st.spinner('Fetching all files...'):
225
+ # try:
226
+ # if 'current_data_source' in st.session_state and st.session_state['current_data_source']:
227
+ # data_source_id = st.session_state['current_data_source']
228
+ # all_files = list_all_files(carbon, data_source_id)
229
+ # if all_files:
230
+ # st.write("All files:")
231
+ # for i, document in enumerate(all_files):
232
+ # file_id = document['id']
233
+ # st.write(f"File ID: {document['id']}, File Name: {document['name']}")
234
+ # else:
235
+ # st.write("No files found.")
236
+ # except Exception as e:
237
+ # st.error(f"An error occurred: {e}")
238
+
239
+ # # Search in the Connected Data Source
240
+ # st.write('### Search in the Connected Data Source')
241
+ # query = st.text_input("Enter your query:", value="Type here...")
242
+ # if st.button('Search'):
243
+ # if query:
244
+ # with st.spinner('Searching...'):
245
+ # try:
246
+ # all_files = list_all_files(carbon, data_source_id)
247
+ # url = "https://api.carbon.ai/embeddings"
248
+ # payload = {
249
+ # "query": query,
250
+ # "k": 2,
251
+ # "file_ids": file_id,
252
+ # "include_all_children": True,
253
+ # "tags": {},
254
+ # "include_tags": True,
255
+ # "include_vectors": True,
256
+ # "include_raw_file": True,
257
+ # "hybrid_search": False,
258
+ # "media_type": "TEXT",
259
+ # "embedding_model": "OPENAI"
260
+ # }
261
+ # headers = {
262
+ # "authorization": f"Bearer {CARBON_API_KEY}",
263
+ # "customer-id": CUSTOMER_ID,
264
+ # "Content-Type": "application/json"
265
+ # }
266
+ # response_search = requests.post(url, json=payload, headers=headers)
267
+ # response_search_chunks = json.loads(response_search.text)
268
+
269
+ # st.write("Search results:")
270
+ # for i, doc in enumerate(response_search_chunks['documents']):
271
+ # st.write(f"Document {i+1}:")
272
+ # st.write(f"Content: {doc['content']}")
273
+ # st.write(f"Source: {doc['source']}")
274
+ # st.write(f"Match Percentage: {doc['score'] * 100}%")
275
+ # if 'file_url' in doc:
276
+ # st.markdown(f"[Download {doc['filename']}]({doc['file_url']})")
277
+ # st.write("-------------------------------------------------")
278
+ # except Exception as e:
279
+ # st.error(f"An error occurred: {e}")
280
+ # else:
281
+ # st.write("Please enter a query to search.")
282
+
283
+ # # Display Search History
284
+ # st.write('## Search History')
285
+ # if 'search_history' not in st.session_state:
286
+ # st.session_state['search_history'] = []
287
+
288
+ # if query and st.button('Add to Search History'):
289
+ # st.session_state['search_history'].append(query)
290
+
291
+ # if st.session_state['search_history']:
292
+ # st.write("Past Searches:")
293
+ # for past_query in st.session_state['search_history']:
294
+ # st.write(past_query)
295
+
296
+ # # Call the main function
297
+ # if __name__ == '__main__':
298
+ # main()
app.py CHANGED
@@ -1,12 +1,17 @@
1
  import streamlit as st
2
  from carbon import Carbon
 
3
  import requests
4
  import json
5
 
6
- # Carbon API Key
7
  CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea"
8
- customer_id = "Candid"
9
 
 
 
 
 
10
  def get_google_drive_oauth(carbon):
11
  get_oauth_url_response = carbon.integrations.get_oauth_url(
12
  service="GOOGLE_DRIVE",
@@ -15,6 +20,7 @@ def get_google_drive_oauth(carbon):
15
  )
16
  return get_oauth_url_response.oauth_url
17
 
 
18
  def get_dropbox_oauth(carbon):
19
  get_oauth_url_response = carbon.integrations.get_oauth_url(
20
  service="DROPBOX",
@@ -22,6 +28,7 @@ def get_dropbox_oauth(carbon):
22
  )
23
  return get_oauth_url_response.oauth_url
24
 
 
25
  def get_notion_oauth(carbon):
26
  get_oauth_url_response = carbon.integrations.get_oauth_url(
27
  service="NOTION",
@@ -29,220 +36,182 @@ def get_notion_oauth(carbon):
29
  )
30
  return get_oauth_url_response.oauth_url
31
 
32
- def sync_github(carbon, username, token):
33
- sync_response = carbon.integrations.sync_git_hub(
34
- username=username,
35
- token=token,
36
- sync_source_items=True
 
 
37
  )
38
- return sync_response
39
-
40
- def sync_gitbook(carbon, access_token, organization):
41
- sync_response = carbon.integrations.sync_git_book(
42
- access_token=access_token,
43
- organization=organization,
44
- sync_source_items=True
 
45
  )
46
- return sync_response
47
 
48
- def sync_s3(carbon, access_key, access_key_secret):
49
- sync_response = carbon.integrations.sync_s3(
50
- access_key=access_key,
51
- access_key_secret=access_key_secret,
52
- sync_source_items=True
53
- )
54
- return sync_response
55
-
56
- def list_files(carbon, data_source_id=None, service="GOOGLE_DRIVE"):
57
- if data_source_id:
58
- list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
59
- else:
60
- query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
61
- pagination={"limit": 100, "offset": 0},
62
- order_by="created_at",
63
- order_dir="desc",
64
- filters={"source": service},
65
- )
66
- if query_user_data_sources_response.results:
67
- data_source_id = query_user_data_sources_response.results[0].id
68
- list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
69
- else:
70
- list_files_response = None
71
- return list_files_response.items if list_files_response else None
72
-
73
- def list_user_documents(carbon):
74
- query_user_documents_response = carbon.documents.query_documents(
75
  pagination={"limit": 100, "offset": 0},
76
  order_by="created_at",
77
- order_dir="desc"
78
  )
79
- return query_user_documents_response.documents if query_user_documents_response else None
80
-
81
- def semantic_search_v2(carbon, query, tags_v2=None, hybrid_search=False):
82
- search_response = carbon.embeddings.get_documents(
83
- query=query,
84
- k=2,
85
- tags_v2=tags_v2 if tags_v2 else {},
86
- include_tags=True,
87
- include_vectors=True,
88
- include_raw_file=True,
89
- hybrid_search=hybrid_search,
90
- hybrid_search_tuning_parameters={
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  "weight_a": 0.5,
92
- "weight_b": 0.5,
93
  },
94
- media_type="TEXT",
95
- embedding_model="OPENAI",
96
- )
97
- return search_response.documents
98
-
99
-
100
- def main():
101
- st.title('Data Connector using Carbon SDK')
102
-
103
- # Authenticate with Carbon API
104
- st.write('### Authenticate with Carbon API')
105
- carbon = Carbon(api_key=CARBON_API_KEY, customer_id=customer_id)
106
- token = carbon.auth.get_access_token()
107
- carbon = Carbon(access_token=token.access_token) # authenticated object
108
-
109
- # Connect to Data Source
110
- st.write('## Connect to Data Source')
111
- service = st.selectbox('Select Data Source for OAuth', ['GOOGLE_DRIVE', 'DROPBOX', 'NOTION'])
112
- if st.button('Get OAuth URL'):
113
- with st.spinner('Fetching OAuth URL...'):
114
- try:
115
- if service == "GOOGLE_DRIVE":
116
- oauth_url = get_google_drive_oauth(carbon)
117
- elif service == "DROPBOX":
118
- oauth_url = get_dropbox_oauth(carbon)
119
- elif service == "NOTION":
120
- oauth_url = get_notion_oauth(carbon)
121
- st.write(f"OAuth URL for {service}: {oauth_url}")
122
- except Exception as e:
123
- st.error(f"An error occurred: {e}")
124
-
125
- # Sync Data Sources
126
- st.write('## Sync Data Sources')
127
- sync_service = st.selectbox('Select Data Source for Sync', ['GITHUB', 'GITBOOK', 'S3'])
128
-
129
- if sync_service == 'GITHUB':
130
- github_username = st.text_input('Enter GitHub Username')
131
- github_token = st.text_input('Enter GitHub Token', type="password")
132
- if st.button('Sync GitHub'):
133
- with st.spinner('Syncing GitHub...'):
134
- try:
135
- sync_response = sync_github(carbon, github_username, github_token)
136
- st.write(f"GitHub sync response: {sync_response}")
137
- except Exception as e:
138
- st.error(f"An error occurred: {e}")
139
-
140
- elif sync_service == 'GITBOOK':
141
- gitbook_access_token = st.text_input('Enter GitBook Access Token', type="password")
142
- gitbook_organization = st.text_input('Enter GitBook Organization')
143
- if st.button('Sync GitBook'):
144
- with st.spinner('Syncing GitBook...'):
145
- try:
146
- sync_response = sync_gitbook(carbon, gitbook_access_token, gitbook_organization)
147
- st.write(f"GitBook sync response: {sync_response}")
148
- except Exception as e:
149
- st.error(f"An error occurred: {e}")
150
-
151
- elif sync_service == 'S3':
152
- s3_access_key = st.text_input('Enter S3 Access Key')
153
- s3_access_key_secret = st.text_input('Enter S3 Access Key Secret', type="password")
154
- if st.button('Sync S3'):
155
- with st.spinner('Syncing S3...'):
156
- try:
157
- sync_response = sync_s3(carbon, s3_access_key, s3_access_key_secret)
158
- st.write(f"S3 sync response: {sync_response}")
159
- except Exception as e:
160
- st.error(f"An error occurred: {e}")
161
-
162
- # List Files in Data Source
163
- st.write(f'## List Files in {service}')
164
- data_source_id = st.text_input('Enter Data Source ID (leave blank to list all files)')
165
- if st.button('List Files'):
166
- with st.spinner('Fetching files...'):
167
- try:
168
- files = list_files(carbon, data_source_id if data_source_id else None, service)
169
- if files:
170
- st.write(f"Files in {service}:")
171
- for item in files:
172
- st.write(f"File ID: {item.id}, File Name: {item.name}, File Size: {item.size if hasattr(item, 'size') else 'N/A'}, Last Modified: {item.last_modified if hasattr(item, 'last_modified') else 'N/A'}")
173
- else:
174
- st.write("No files found.")
175
- except Exception as e:
176
- st.error(f"An error occurred: {e}")
177
-
178
- # List All Documents Uploaded by User
179
- # st.write('## List All Documents Uploaded by User')
180
- # if st.button('List User Documents'):
181
- # with st.spinner('Fetching user documents...'):
182
- # try:
183
- # documents = list_user_documents(carbon)
184
- # if documents:
185
- # st.write("User Documents:")
186
- # for doc in documents:
187
- # st.write(f"Document ID: {doc.id}, Document Name: {doc.name}, Uploaded At: {doc.created_at}")
188
- # else:
189
- # st.write("No documents found.")
190
- # except Exception as e:
191
- # st.error(f"An error occurred: {e}")
192
-
193
- # Search in the Connected Data Source
194
- st.write('### Search in the Connected Data Source')
195
- query = st.text_input("Enter your query:", value="Type here...")
196
- if st.button('Search'):
197
- if query:
198
- with st.spinner('Searching...'):
199
- try:
200
- url = "https://api.carbon.ai/embeddings"
201
- payload = {
202
- "query": query,
203
- "k": 2,
204
- "file_ids": [], # Modify to include relevant file IDs if needed
205
- "include_all_children": True,
206
- "tags": {},
207
- "include_tags": True,
208
- "include_vectors": True,
209
- "include_raw_file": True,
210
- "hybrid_search": False,
211
- "media_type": "TEXT",
212
- "embedding_model": "OPENAI"
213
- }
214
- headers = {
215
- "authorization": f"Bearer {CARBON_API_KEY}",
216
- "customer-id": customer_id,
217
- "Content-Type": "application/json"
218
- }
219
- response_search = requests.post(url, json=payload, headers=headers)
220
- response_search_chunks = json.loads(response_search.text)
221
-
222
- st.write("Search results:")
223
- for i, doc in enumerate(response_search_chunks['documents']):
224
- st.write(f"Document {i+1}:")
225
- st.write(f"Content: {doc['content']}")
226
- st.write(f"Source: {doc['source']}")
227
- st.write(f"Match Percentage: {doc['score'] * 100}%")
228
- if 'file_url' in doc:
229
- st.markdown(f"[Download {doc['filename']}]({doc['file_url']})")
230
- st.write("-------------------------------------------------")
231
- except Exception as e:
232
- st.error(f"An error occurred: {e}")
233
- else:
234
- st.write("Please enter a query to search.")
235
-
236
- # Display Search History
237
- st.write('## Search History')
238
- if 'search_history' not in st.session_state:
239
- st.session_state['search_history'] = []
240
-
241
- if query and st.button('Add to Search History'):
242
- st.session_state['search_history'].append(query)
243
-
244
- if st.session_state['search_history']:
245
- st.write("Past Searches:")
246
-
247
- if __name__ == '__main__':
248
- main()
 
1
  import streamlit as st
2
  from carbon import Carbon
3
+ from decimal import Decimal
4
  import requests
5
  import json
6
 
7
+ # Constants
8
  CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea"
9
+ CUSTOMER_ID = "Candid"
10
 
11
+ # Initialize Carbon SDK
12
+ carbon = Carbon(api_key=CARBON_API_KEY, customer_id=CUSTOMER_ID)
13
+
14
+ # Authenticate and get OAuth URL for Google Drive
15
  def get_google_drive_oauth(carbon):
16
  get_oauth_url_response = carbon.integrations.get_oauth_url(
17
  service="GOOGLE_DRIVE",
 
20
  )
21
  return get_oauth_url_response.oauth_url
22
 
23
+ # Authenticate and get OAuth URL for Dropbox
24
  def get_dropbox_oauth(carbon):
25
  get_oauth_url_response = carbon.integrations.get_oauth_url(
26
  service="DROPBOX",
 
28
  )
29
  return get_oauth_url_response.oauth_url
30
 
31
+ # Authenticate and get OAuth URL for Notion
32
  def get_notion_oauth(carbon):
33
  get_oauth_url_response = carbon.integrations.get_oauth_url(
34
  service="NOTION",
 
36
  )
37
  return get_oauth_url_response.oauth_url
38
 
39
+ # Get data source ID
40
+ def get_data_source_id(service):
41
+ response = carbon.data_sources.query_user_data_sources(
42
+ pagination={"limit": 100, "offset": 0},
43
+ order_by="created_at",
44
+ order_dir="desc",
45
+ filters={"source": service},
46
  )
47
+ return response.results[0].id
48
+
49
+ # List files in the data source
50
+ def list_files(data_source_id):
51
+ response = carbon.integrations.list_data_source_items(
52
+ data_source_id=data_source_id,
53
+ filters={},
54
+ pagination={"limit": 250, "offset": 0},
55
  )
56
+ return response.items
57
 
58
+ # List all data sources associated with the user
59
+ def list_user_data_sources():
60
+ response = carbon.data_sources.query_user_data_sources(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  pagination={"limit": 100, "offset": 0},
62
  order_by="created_at",
63
+ order_dir="desc",
64
  )
65
+ return response.results
66
+
67
+ # List files uploaded by the user
68
+ def list_uploaded_files(data_source_id):
69
+ url = "https://api.carbon.ai/user_files_v2"
70
+ payload = {
71
+ "pagination": {
72
+ "limit": 100,
73
+ "offset": 0
74
+ },
75
+ "order_by": "created_at",
76
+ "order_dir": "desc",
77
+ "filters": {
78
+ "organization_user_data_source_id": [data_source_id],
79
+ "embedding_generators": ["OPENAI"],
80
+ "include_all_children": True,
81
+ },
82
+ "include_raw_file": True,
83
+ "include_parsed_text_file": True,
84
+ "include_additional_files": True
85
+ }
86
+ headers = {
87
+ "authorization": f"Bearer {CARBON_API_KEY}",
88
+ "customer-id": CUSTOMER_ID,
89
+ "Content-Type": "application/json"
90
+ }
91
+ response = requests.post(url, json=payload, headers=headers)
92
+ return response.json()['results']
93
+
94
+ # Search function
95
+ def search_documents(query, file_ids):
96
+ url = "https://api.carbon.ai/embeddings"
97
+ payload = {
98
+ "query": query,
99
+ "k": 2,
100
+ "file_ids": file_ids,
101
+ "include_all_children": True,
102
+ "include_tags": True,
103
+ "include_vectors": True,
104
+ "include_raw_file": True,
105
+ "hybrid_search": False,
106
+ "hybrid_search_tuning_parameters": {
107
  "weight_a": 0.5,
108
+ "weight_b": 0.5
109
  },
110
+ "media_type": "TEXT",
111
+ "embedding_model": "OPENAI"
112
+ }
113
+ headers = {
114
+ "authorization": f"Bearer {CARBON_API_KEY}",
115
+ "customer-id": CUSTOMER_ID,
116
+ "Content-Type": "application/json"
117
+ }
118
+ #response = requests.post(url, json=payload, headers=headers)
119
+ response = requests.request("POST", url, json=payload, headers=headers)
120
+ print(response.json())
121
+ return response.json()['documents']
122
+
123
+ # Streamlit UI
124
+ st.title("Data Connector using Carbon SDK")
125
+
126
+ # Authenticate with Carbon API
127
+ st.header("Authenticate with Carbon API")
128
+
129
+ # Connect to Data Source
130
+ st.subheader("Connect to Data Source")
131
+ data_source = st.selectbox("Select Data Source for OAuth", ["GOOGLE_DRIVE", "DROPBOX", "NOTION"])
132
+ if st.button("Get OAuth URL"):
133
+ if data_source == "GOOGLE_DRIVE":
134
+ oauth_url = get_google_drive_oauth(carbon)
135
+ elif data_source == "DROPBOX":
136
+ oauth_url = get_dropbox_oauth(carbon)
137
+ elif data_source == "NOTION":
138
+ oauth_url = get_notion_oauth(carbon)
139
+ st.write(f"OAuth URL for {data_source}: {oauth_url}")
140
+ st.markdown(f'<a href="{oauth_url}" target="_blank">Authenticate {data_source}</a>', unsafe_allow_html=True)
141
+
142
+ # List User Data Sources
143
+ st.subheader("List Data Sources")
144
+ if st.button("List Data Sources"):
145
+ data_sources = list_user_data_sources()
146
+ st.write("Data Sources associated with the user:")
147
+ for ds in data_sources:
148
+ st.write(f"ID: {ds.id}, External ID: {ds.data_source_external_id}, Type: {ds.data_source_type}, "
149
+ f"Sync Status: {ds.sync_status}, Created At: {ds.created_at}, Updated At: {ds.updated_at}")
150
+
151
+ # List Files in Data Source
152
+ st.subheader(f"List Files in {data_source}")
153
+ if st.button("List Files"):
154
+ data_source_id = get_data_source_id(data_source)
155
+ files = list_files(data_source_id)
156
+ st.write(f"Files in {data_source}:")
157
+ for file in files:
158
+ st.write(file.name)
159
+
160
+ # List Uploaded Files
161
+ st.subheader("Documents Uploaded Result")
162
+ file_ids = []
163
+ if st.button("Show Uploaded Files"):
164
+ data_source_id = get_data_source_id(data_source)
165
+ uploaded_files = list_uploaded_files(data_source_id)
166
+ st.write("Uploaded Files:")
167
+ for file in uploaded_files:
168
+ st.write(f"ID: {file['id']}, Organization Supplied User ID: {file['organization_supplied_user_id']}, "
169
+ f"Organization User Data Source ID: {file['organization_user_data_source_id']}, External URL: {file['external_url']}")
170
+ file_ids.append(file['id'])
171
+ print(file_ids)
172
+
173
+ # Search Documents
174
+ st.subheader("Search Documents")
175
+ query = st.text_input("Enter your search query:")
176
+ if st.button("Search"):
177
+ if query:
178
+ search_results = search_documents(query, file_ids)
179
+ st.write("Search Results:")
180
+ for result in search_results:
181
+ st.write(f"Source: {result['source']}")
182
+ st.write(f"Title: {result['content']}")
183
+ st.write(f"Source URL: {result['source_url']}")
184
+ st.write(f"Source Type: {result['source_type']}")
185
+ st.write(f"Presigned URL: {result['presigned_url']}")
186
+ st.write(f"Tags: {result['tags']}")
187
+ st.write("-------------------------------------------------")
188
+
189
+ # # Add chat interface using custom HTML/CSS
190
+ # st.subheader("Chat Interface")
191
+ # chat_input = st.text_input("Enter your query:")
192
+ # if st.button("Send"):
193
+ # if chat_input:
194
+ # st.markdown(f'<div class="chat-bubble user">{chat_input}</div>', unsafe_allow_html=True)
195
+ # # Placeholder for bot response (add your processing logic here)
196
+ # bot_response = "This is a bot response."
197
+ # st.markdown(f'<div class="chat-bubble bot">{bot_response}</div>', unsafe_allow_html=True)
198
+
199
+ # Custom CSS for chat bubbles
200
+ st.markdown("""
201
+ <style>
202
+ .chat-bubble {
203
+ padding: 10px 15px;
204
+ border-radius: 10px;
205
+ margin: 5px 0;
206
+ max-width: 60%;
207
+ }
208
+ .user {
209
+ background-color: lightblue;
210
+ align-self: flex-end;
211
+ }
212
+ .bot {
213
+ background-color: darkgray;
214
+ align-self: flex-start;
215
+ }
216
+ </style>
217
+ """, unsafe_allow_html=True)