Spaces:
Sleeping
Sleeping
vishanth10
commited on
Commit
·
8959c46
1
Parent(s):
61f09d1
New UI and resolved bugs
Browse files
.txt
ADDED
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import streamlit as st
|
2 |
+
# from carbon import Carbon
|
3 |
+
# import requests
|
4 |
+
# import json
|
5 |
+
|
6 |
+
# # Carbon API Key
|
7 |
+
# CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea"
|
8 |
+
# CUSTOMER_ID = "Candid"
|
9 |
+
|
10 |
+
# def get_google_drive_oauth(carbon):
|
11 |
+
# get_oauth_url_response = carbon.integrations.get_oauth_url(
|
12 |
+
# service="GOOGLE_DRIVE",
|
13 |
+
# scope="https://www.googleapis.com/auth/drive.readonly",
|
14 |
+
# connecting_new_account=True,
|
15 |
+
# )
|
16 |
+
# return get_oauth_url_response.oauth_url
|
17 |
+
|
18 |
+
# def get_dropbox_oauth(carbon):
|
19 |
+
# get_oauth_url_response = carbon.integrations.get_oauth_url(
|
20 |
+
# service="DROPBOX",
|
21 |
+
# connecting_new_account=True,
|
22 |
+
# )
|
23 |
+
# return get_oauth_url_response.oauth_url
|
24 |
+
|
25 |
+
# def get_notion_oauth(carbon):
|
26 |
+
# get_oauth_url_response = carbon.integrations.get_oauth_url(
|
27 |
+
# service="NOTION",
|
28 |
+
# connecting_new_account=True,
|
29 |
+
# )
|
30 |
+
# return get_oauth_url_response.oauth_url
|
31 |
+
|
32 |
+
# def sync_github(carbon, username, token):
|
33 |
+
# sync_response = carbon.integrations.sync_git_hub(
|
34 |
+
# username=username,
|
35 |
+
# token=token,
|
36 |
+
# sync_source_items=True
|
37 |
+
# )
|
38 |
+
# return sync_response
|
39 |
+
|
40 |
+
# def sync_gitbook(carbon, access_token, organization):
|
41 |
+
# sync_response = carbon.integrations.sync_git_book(
|
42 |
+
# access_token=access_token,
|
43 |
+
# organization=organization,
|
44 |
+
# sync_source_items=True
|
45 |
+
# )
|
46 |
+
# return sync_response
|
47 |
+
|
48 |
+
# def sync_s3(carbon, access_key, access_key_secret):
|
49 |
+
# sync_response = carbon.integrations.sync_s3(
|
50 |
+
# access_key=access_key,
|
51 |
+
# access_key_secret=access_key_secret,
|
52 |
+
# sync_source_items=True
|
53 |
+
# )
|
54 |
+
# return sync_response
|
55 |
+
|
56 |
+
# def sync_google_drive(carbon, data_source_id):
|
57 |
+
# sync_response = carbon.integrations.sync_data_source_items(data_source_id=int(data_source_id))
|
58 |
+
# return sync_response
|
59 |
+
|
60 |
+
|
61 |
+
# def list_files(carbon, data_source_id=None, service="GOOGLE_DRIVE"):
|
62 |
+
# if data_source_id:
|
63 |
+
# sync_google_drive(carbon, data_source_id)
|
64 |
+
# list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
|
65 |
+
# st.session_state['current_data_source'] = data_source_id # Store the current data source
|
66 |
+
# st.session_state['files'] = list_files_response.items # Store the fetched files
|
67 |
+
# else:
|
68 |
+
# if 'current_data_source' in st.session_state and st.session_state['current_data_source']:
|
69 |
+
# data_source_id = st.session_state['current_data_source']
|
70 |
+
# sync_google_drive(carbon, data_source_id)
|
71 |
+
# list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
|
72 |
+
# st.session_state['files'] = list_files_response.items # Store the fetched files
|
73 |
+
# else:
|
74 |
+
# query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
|
75 |
+
# pagination={"limit": 100, "offset": 0},
|
76 |
+
# order_by="created_at",
|
77 |
+
# order_dir="desc",
|
78 |
+
# filters={"source": service},
|
79 |
+
# )
|
80 |
+
# if query_user_data_sources_response.results:
|
81 |
+
# data_source_id = query_user_data_sources_response.results[0].id
|
82 |
+
# sync_google_drive(carbon, data_source_id)
|
83 |
+
# list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
|
84 |
+
# st.session_state['current_data_source'] = data_source_id
|
85 |
+
# st.session_state['files'] = list_files_response.items # Store the fetched files
|
86 |
+
# else:
|
87 |
+
# list_files_response = None
|
88 |
+
# return list_files_response.items if list_files_response else None
|
89 |
+
|
90 |
+
# def list_all_files(carbon, data_source_id):
|
91 |
+
# url = "https://api.carbon.ai/user_files_v2"
|
92 |
+
# payload = {
|
93 |
+
# "pagination": {
|
94 |
+
# "limit": 100,
|
95 |
+
# "offset": 0
|
96 |
+
# },
|
97 |
+
# "order_by": "created_at",
|
98 |
+
# "order_dir": "desc",
|
99 |
+
# "filters": {
|
100 |
+
# "organization_user_data_source_id": [data_source_id],
|
101 |
+
# "embedding_generators": ["OPENAI"],
|
102 |
+
# "include_all_children": True,
|
103 |
+
# },
|
104 |
+
# "include_raw_file": True,
|
105 |
+
# "include_parsed_text_file": True,
|
106 |
+
# "include_additional_files": True
|
107 |
+
# }
|
108 |
+
# headers = {
|
109 |
+
# "authorization": f"Bearer {CARBON_API_KEY}",
|
110 |
+
# "customer-id": CUSTOMER_ID,
|
111 |
+
# "Content-Type": "application/json"
|
112 |
+
# }
|
113 |
+
|
114 |
+
# response = requests.request("POST", url, json=payload, headers=headers)
|
115 |
+
# res = json.loads(response.text)
|
116 |
+
# file_id= res['results'][0]['id']
|
117 |
+
# for i,document in enumerate(res['results']):
|
118 |
+
# print(document['name'])
|
119 |
+
# print(document['id'])
|
120 |
+
|
121 |
+
# file_id=res['results'][0]['id']
|
122 |
+
# print(file_id)
|
123 |
+
# return res['results']
|
124 |
+
|
125 |
+
|
126 |
+
# def list_user_documents(carbon):
|
127 |
+
# query_user_documents_response = carbon.documents.query_documents(
|
128 |
+
# pagination={"limit": 100, "offset": 0},
|
129 |
+
# order_by="created_at",
|
130 |
+
# order_dir="desc"
|
131 |
+
# )
|
132 |
+
# return query_user_documents_response.documents if query_user_documents_response else None
|
133 |
+
|
134 |
+
# def semantic_search_v2(carbon, query, tags_v2=None, hybrid_search=False):
|
135 |
+
# search_response = carbon.embeddings.get_documents(
|
136 |
+
# query=query,
|
137 |
+
# k=2,
|
138 |
+
# tags_v2=tags_v2 if tags_v2 else {},
|
139 |
+
# include_tags=True,
|
140 |
+
# include_vectors=True,
|
141 |
+
# include_raw_file=True,
|
142 |
+
# hybrid_search=hybrid_search,
|
143 |
+
# hybrid_search_tuning_parameters={
|
144 |
+
# "weight_a": 0.5,
|
145 |
+
# "weight_b": 0.5,
|
146 |
+
# },
|
147 |
+
# media_type="TEXT",
|
148 |
+
# embedding_model="OPENAI",
|
149 |
+
# )
|
150 |
+
# return search_response.documents
|
151 |
+
|
152 |
+
|
153 |
+
# def main():
|
154 |
+
# st.title('Data Connector using Carbon SDK')
|
155 |
+
|
156 |
+
# # Authenticate with Carbon API
|
157 |
+
# st.write('### Authenticate with Carbon API')
|
158 |
+
# carbon = Carbon(api_key=CARBON_API_KEY, customer_id=CUSTOMER_ID)
|
159 |
+
# token = carbon.auth.get_access_token()
|
160 |
+
# carbon = Carbon(access_token=token.access_token) # authenticated object
|
161 |
+
|
162 |
+
# # Connect to Data Source
|
163 |
+
# st.write('## Connect to Data Source')
|
164 |
+
# service = st.selectbox('Select Data Source for OAuth', ['GOOGLE_DRIVE', 'DROPBOX', 'NOTION'])
|
165 |
+
# if st.button('Get OAuth URL'):
|
166 |
+
# with st.spinner('Fetching OAuth URL...'):
|
167 |
+
# try:
|
168 |
+
# if service == "GOOGLE_DRIVE":
|
169 |
+
# oauth_url = get_google_drive_oauth(carbon)
|
170 |
+
# elif service == "DROPBOX":
|
171 |
+
# oauth_url = get_dropbox_oauth(carbon)
|
172 |
+
# elif service == "NOTION":
|
173 |
+
# oauth_url = get_notion_oauth(carbon)
|
174 |
+
# st.write(f"OAuth URL for {service}: {oauth_url}")
|
175 |
+
# st.session_state['current_data_source'] = None # Reset the current data source
|
176 |
+
# st.session_state['files'] = None # Clear the previous files
|
177 |
+
# st.session_state['oauth_fetched'] = True
|
178 |
+
# except Exception as e:
|
179 |
+
# st.error(f"An error occurred: {e}")
|
180 |
+
|
181 |
+
# if 'oauth_fetched' in st.session_state and st.session_state['oauth_fetched']:
|
182 |
+
# st.write("OAuth URL fetched. Please authenticate and then click 'Sync and Fetch Files'.")
|
183 |
+
# if st.button('Sync and Fetch Files'):
|
184 |
+
# with st.spinner('Syncing and fetching files...'):
|
185 |
+
# try:
|
186 |
+
# query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
|
187 |
+
# pagination={"limit": 100, "offset": 0},
|
188 |
+
# order_by="created_at",
|
189 |
+
# order_dir="desc",
|
190 |
+
# filters={"source": service},
|
191 |
+
# )
|
192 |
+
# if query_user_data_sources_response.results:
|
193 |
+
# data_source_id = query_user_data_sources_response.results[0].id
|
194 |
+
# sync_google_drive(carbon, data_source_id)
|
195 |
+
# st.session_state['current_data_source'] = data_source_id
|
196 |
+
# st.session_state['oauth_fetched'] = False
|
197 |
+
# st.success("Synced successfully! Now you can list the files.")
|
198 |
+
# else:
|
199 |
+
# st.error("No data sources found. Please ensure the connection was successful.")
|
200 |
+
# except Exception as e:
|
201 |
+
# st.error(f"An error occurred: {e}")
|
202 |
+
|
203 |
+
# # List Files in Data Source
|
204 |
+
# st.write(f'## List Files in {service}')
|
205 |
+
# data_source_id = st.text_input('Enter Data Source ID (leave blank to list all files)')
|
206 |
+
# if st.button('List Files'):
|
207 |
+
# with st.spinner('Fetching files...'):
|
208 |
+
# try:
|
209 |
+
# if 'current_data_source' in st.session_state and st.session_state['current_data_source']:
|
210 |
+
# data_source_id = st.session_state['current_data_source']
|
211 |
+
# files = list_files(carbon, data_source_id if data_source_id else None, service)
|
212 |
+
# if files:
|
213 |
+
# st.write(f"Files in {service}:")
|
214 |
+
# for item in files:
|
215 |
+
# st.write(f"File ID: {item.id}, File Name: {item.name}, File Size: {item.size if hasattr(item, 'size') else 'N/A'}, Last Modified: {item.last_modified if hasattr(item, 'last_modified') else 'N/A'}")
|
216 |
+
# else:
|
217 |
+
# st.write("No files found.")
|
218 |
+
# except Exception as e:
|
219 |
+
# st.error(f"An error occurred: {e}")
|
220 |
+
|
221 |
+
# # List All Files
|
222 |
+
# st.write('### List All Files')
|
223 |
+
# if st.button('List All Files'):
|
224 |
+
# with st.spinner('Fetching all files...'):
|
225 |
+
# try:
|
226 |
+
# if 'current_data_source' in st.session_state and st.session_state['current_data_source']:
|
227 |
+
# data_source_id = st.session_state['current_data_source']
|
228 |
+
# all_files = list_all_files(carbon, data_source_id)
|
229 |
+
# if all_files:
|
230 |
+
# st.write("All files:")
|
231 |
+
# for i, document in enumerate(all_files):
|
232 |
+
# file_id = document['id']
|
233 |
+
# st.write(f"File ID: {document['id']}, File Name: {document['name']}")
|
234 |
+
# else:
|
235 |
+
# st.write("No files found.")
|
236 |
+
# except Exception as e:
|
237 |
+
# st.error(f"An error occurred: {e}")
|
238 |
+
|
239 |
+
# # Search in the Connected Data Source
|
240 |
+
# st.write('### Search in the Connected Data Source')
|
241 |
+
# query = st.text_input("Enter your query:", value="Type here...")
|
242 |
+
# if st.button('Search'):
|
243 |
+
# if query:
|
244 |
+
# with st.spinner('Searching...'):
|
245 |
+
# try:
|
246 |
+
# all_files = list_all_files(carbon, data_source_id)
|
247 |
+
# url = "https://api.carbon.ai/embeddings"
|
248 |
+
# payload = {
|
249 |
+
# "query": query,
|
250 |
+
# "k": 2,
|
251 |
+
# "file_ids": file_id,
|
252 |
+
# "include_all_children": True,
|
253 |
+
# "tags": {},
|
254 |
+
# "include_tags": True,
|
255 |
+
# "include_vectors": True,
|
256 |
+
# "include_raw_file": True,
|
257 |
+
# "hybrid_search": False,
|
258 |
+
# "media_type": "TEXT",
|
259 |
+
# "embedding_model": "OPENAI"
|
260 |
+
# }
|
261 |
+
# headers = {
|
262 |
+
# "authorization": f"Bearer {CARBON_API_KEY}",
|
263 |
+
# "customer-id": CUSTOMER_ID,
|
264 |
+
# "Content-Type": "application/json"
|
265 |
+
# }
|
266 |
+
# response_search = requests.post(url, json=payload, headers=headers)
|
267 |
+
# response_search_chunks = json.loads(response_search.text)
|
268 |
+
|
269 |
+
# st.write("Search results:")
|
270 |
+
# for i, doc in enumerate(response_search_chunks['documents']):
|
271 |
+
# st.write(f"Document {i+1}:")
|
272 |
+
# st.write(f"Content: {doc['content']}")
|
273 |
+
# st.write(f"Source: {doc['source']}")
|
274 |
+
# st.write(f"Match Percentage: {doc['score'] * 100}%")
|
275 |
+
# if 'file_url' in doc:
|
276 |
+
# st.markdown(f"[Download {doc['filename']}]({doc['file_url']})")
|
277 |
+
# st.write("-------------------------------------------------")
|
278 |
+
# except Exception as e:
|
279 |
+
# st.error(f"An error occurred: {e}")
|
280 |
+
# else:
|
281 |
+
# st.write("Please enter a query to search.")
|
282 |
+
|
283 |
+
# # Display Search History
|
284 |
+
# st.write('## Search History')
|
285 |
+
# if 'search_history' not in st.session_state:
|
286 |
+
# st.session_state['search_history'] = []
|
287 |
+
|
288 |
+
# if query and st.button('Add to Search History'):
|
289 |
+
# st.session_state['search_history'].append(query)
|
290 |
+
|
291 |
+
# if st.session_state['search_history']:
|
292 |
+
# st.write("Past Searches:")
|
293 |
+
# for past_query in st.session_state['search_history']:
|
294 |
+
# st.write(past_query)
|
295 |
+
|
296 |
+
# # Call the main function
|
297 |
+
# if __name__ == '__main__':
|
298 |
+
# main()
|
app.py
CHANGED
@@ -1,12 +1,17 @@
|
|
1 |
import streamlit as st
|
2 |
from carbon import Carbon
|
|
|
3 |
import requests
|
4 |
import json
|
5 |
|
6 |
-
#
|
7 |
CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea"
|
8 |
-
|
9 |
|
|
|
|
|
|
|
|
|
10 |
def get_google_drive_oauth(carbon):
|
11 |
get_oauth_url_response = carbon.integrations.get_oauth_url(
|
12 |
service="GOOGLE_DRIVE",
|
@@ -15,6 +20,7 @@ def get_google_drive_oauth(carbon):
|
|
15 |
)
|
16 |
return get_oauth_url_response.oauth_url
|
17 |
|
|
|
18 |
def get_dropbox_oauth(carbon):
|
19 |
get_oauth_url_response = carbon.integrations.get_oauth_url(
|
20 |
service="DROPBOX",
|
@@ -22,6 +28,7 @@ def get_dropbox_oauth(carbon):
|
|
22 |
)
|
23 |
return get_oauth_url_response.oauth_url
|
24 |
|
|
|
25 |
def get_notion_oauth(carbon):
|
26 |
get_oauth_url_response = carbon.integrations.get_oauth_url(
|
27 |
service="NOTION",
|
@@ -29,220 +36,182 @@ def get_notion_oauth(carbon):
|
|
29 |
)
|
30 |
return get_oauth_url_response.oauth_url
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
37 |
)
|
38 |
-
return
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
45 |
)
|
46 |
-
return
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
access_key_secret=access_key_secret,
|
52 |
-
sync_source_items=True
|
53 |
-
)
|
54 |
-
return sync_response
|
55 |
-
|
56 |
-
def list_files(carbon, data_source_id=None, service="GOOGLE_DRIVE"):
|
57 |
-
if data_source_id:
|
58 |
-
list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
|
59 |
-
else:
|
60 |
-
query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
|
61 |
-
pagination={"limit": 100, "offset": 0},
|
62 |
-
order_by="created_at",
|
63 |
-
order_dir="desc",
|
64 |
-
filters={"source": service},
|
65 |
-
)
|
66 |
-
if query_user_data_sources_response.results:
|
67 |
-
data_source_id = query_user_data_sources_response.results[0].id
|
68 |
-
list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
|
69 |
-
else:
|
70 |
-
list_files_response = None
|
71 |
-
return list_files_response.items if list_files_response else None
|
72 |
-
|
73 |
-
def list_user_documents(carbon):
|
74 |
-
query_user_documents_response = carbon.documents.query_documents(
|
75 |
pagination={"limit": 100, "offset": 0},
|
76 |
order_by="created_at",
|
77 |
-
order_dir="desc"
|
78 |
)
|
79 |
-
return
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
"weight_a": 0.5,
|
92 |
-
"weight_b": 0.5
|
93 |
},
|
94 |
-
media_type
|
95 |
-
embedding_model
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
"query": query,
|
203 |
-
"k": 2,
|
204 |
-
"file_ids": [], # Modify to include relevant file IDs if needed
|
205 |
-
"include_all_children": True,
|
206 |
-
"tags": {},
|
207 |
-
"include_tags": True,
|
208 |
-
"include_vectors": True,
|
209 |
-
"include_raw_file": True,
|
210 |
-
"hybrid_search": False,
|
211 |
-
"media_type": "TEXT",
|
212 |
-
"embedding_model": "OPENAI"
|
213 |
-
}
|
214 |
-
headers = {
|
215 |
-
"authorization": f"Bearer {CARBON_API_KEY}",
|
216 |
-
"customer-id": customer_id,
|
217 |
-
"Content-Type": "application/json"
|
218 |
-
}
|
219 |
-
response_search = requests.post(url, json=payload, headers=headers)
|
220 |
-
response_search_chunks = json.loads(response_search.text)
|
221 |
-
|
222 |
-
st.write("Search results:")
|
223 |
-
for i, doc in enumerate(response_search_chunks['documents']):
|
224 |
-
st.write(f"Document {i+1}:")
|
225 |
-
st.write(f"Content: {doc['content']}")
|
226 |
-
st.write(f"Source: {doc['source']}")
|
227 |
-
st.write(f"Match Percentage: {doc['score'] * 100}%")
|
228 |
-
if 'file_url' in doc:
|
229 |
-
st.markdown(f"[Download {doc['filename']}]({doc['file_url']})")
|
230 |
-
st.write("-------------------------------------------------")
|
231 |
-
except Exception as e:
|
232 |
-
st.error(f"An error occurred: {e}")
|
233 |
-
else:
|
234 |
-
st.write("Please enter a query to search.")
|
235 |
-
|
236 |
-
# Display Search History
|
237 |
-
st.write('## Search History')
|
238 |
-
if 'search_history' not in st.session_state:
|
239 |
-
st.session_state['search_history'] = []
|
240 |
-
|
241 |
-
if query and st.button('Add to Search History'):
|
242 |
-
st.session_state['search_history'].append(query)
|
243 |
-
|
244 |
-
if st.session_state['search_history']:
|
245 |
-
st.write("Past Searches:")
|
246 |
-
|
247 |
-
if __name__ == '__main__':
|
248 |
-
main()
|
|
|
1 |
import streamlit as st
|
2 |
from carbon import Carbon
|
3 |
+
from decimal import Decimal
|
4 |
import requests
|
5 |
import json
|
6 |
|
7 |
+
# Constants
|
8 |
CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea"
|
9 |
+
CUSTOMER_ID = "Candid"
|
10 |
|
11 |
+
# Initialize Carbon SDK
|
12 |
+
carbon = Carbon(api_key=CARBON_API_KEY, customer_id=CUSTOMER_ID)
|
13 |
+
|
14 |
+
# Authenticate and get OAuth URL for Google Drive
|
15 |
def get_google_drive_oauth(carbon):
|
16 |
get_oauth_url_response = carbon.integrations.get_oauth_url(
|
17 |
service="GOOGLE_DRIVE",
|
|
|
20 |
)
|
21 |
return get_oauth_url_response.oauth_url
|
22 |
|
23 |
+
# Authenticate and get OAuth URL for Dropbox
|
24 |
def get_dropbox_oauth(carbon):
|
25 |
get_oauth_url_response = carbon.integrations.get_oauth_url(
|
26 |
service="DROPBOX",
|
|
|
28 |
)
|
29 |
return get_oauth_url_response.oauth_url
|
30 |
|
31 |
+
# Authenticate and get OAuth URL for Notion
|
32 |
def get_notion_oauth(carbon):
|
33 |
get_oauth_url_response = carbon.integrations.get_oauth_url(
|
34 |
service="NOTION",
|
|
|
36 |
)
|
37 |
return get_oauth_url_response.oauth_url
|
38 |
|
39 |
+
# Get data source ID
|
40 |
+
def get_data_source_id(service):
|
41 |
+
response = carbon.data_sources.query_user_data_sources(
|
42 |
+
pagination={"limit": 100, "offset": 0},
|
43 |
+
order_by="created_at",
|
44 |
+
order_dir="desc",
|
45 |
+
filters={"source": service},
|
46 |
)
|
47 |
+
return response.results[0].id
|
48 |
+
|
49 |
+
# List files in the data source
|
50 |
+
def list_files(data_source_id):
|
51 |
+
response = carbon.integrations.list_data_source_items(
|
52 |
+
data_source_id=data_source_id,
|
53 |
+
filters={},
|
54 |
+
pagination={"limit": 250, "offset": 0},
|
55 |
)
|
56 |
+
return response.items
|
57 |
|
58 |
+
# List all data sources associated with the user
|
59 |
+
def list_user_data_sources():
|
60 |
+
response = carbon.data_sources.query_user_data_sources(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
pagination={"limit": 100, "offset": 0},
|
62 |
order_by="created_at",
|
63 |
+
order_dir="desc",
|
64 |
)
|
65 |
+
return response.results
|
66 |
+
|
67 |
+
# List files uploaded by the user
|
68 |
+
def list_uploaded_files(data_source_id):
|
69 |
+
url = "https://api.carbon.ai/user_files_v2"
|
70 |
+
payload = {
|
71 |
+
"pagination": {
|
72 |
+
"limit": 100,
|
73 |
+
"offset": 0
|
74 |
+
},
|
75 |
+
"order_by": "created_at",
|
76 |
+
"order_dir": "desc",
|
77 |
+
"filters": {
|
78 |
+
"organization_user_data_source_id": [data_source_id],
|
79 |
+
"embedding_generators": ["OPENAI"],
|
80 |
+
"include_all_children": True,
|
81 |
+
},
|
82 |
+
"include_raw_file": True,
|
83 |
+
"include_parsed_text_file": True,
|
84 |
+
"include_additional_files": True
|
85 |
+
}
|
86 |
+
headers = {
|
87 |
+
"authorization": f"Bearer {CARBON_API_KEY}",
|
88 |
+
"customer-id": CUSTOMER_ID,
|
89 |
+
"Content-Type": "application/json"
|
90 |
+
}
|
91 |
+
response = requests.post(url, json=payload, headers=headers)
|
92 |
+
return response.json()['results']
|
93 |
+
|
94 |
+
# Search function
|
95 |
+
def search_documents(query, file_ids):
|
96 |
+
url = "https://api.carbon.ai/embeddings"
|
97 |
+
payload = {
|
98 |
+
"query": query,
|
99 |
+
"k": 2,
|
100 |
+
"file_ids": file_ids,
|
101 |
+
"include_all_children": True,
|
102 |
+
"include_tags": True,
|
103 |
+
"include_vectors": True,
|
104 |
+
"include_raw_file": True,
|
105 |
+
"hybrid_search": False,
|
106 |
+
"hybrid_search_tuning_parameters": {
|
107 |
"weight_a": 0.5,
|
108 |
+
"weight_b": 0.5
|
109 |
},
|
110 |
+
"media_type": "TEXT",
|
111 |
+
"embedding_model": "OPENAI"
|
112 |
+
}
|
113 |
+
headers = {
|
114 |
+
"authorization": f"Bearer {CARBON_API_KEY}",
|
115 |
+
"customer-id": CUSTOMER_ID,
|
116 |
+
"Content-Type": "application/json"
|
117 |
+
}
|
118 |
+
#response = requests.post(url, json=payload, headers=headers)
|
119 |
+
response = requests.request("POST", url, json=payload, headers=headers)
|
120 |
+
print(response.json())
|
121 |
+
return response.json()['documents']
|
122 |
+
|
123 |
+
# Streamlit UI
|
124 |
+
st.title("Data Connector using Carbon SDK")
|
125 |
+
|
126 |
+
# Authenticate with Carbon API
|
127 |
+
st.header("Authenticate with Carbon API")
|
128 |
+
|
129 |
+
# Connect to Data Source
|
130 |
+
st.subheader("Connect to Data Source")
|
131 |
+
data_source = st.selectbox("Select Data Source for OAuth", ["GOOGLE_DRIVE", "DROPBOX", "NOTION"])
|
132 |
+
if st.button("Get OAuth URL"):
|
133 |
+
if data_source == "GOOGLE_DRIVE":
|
134 |
+
oauth_url = get_google_drive_oauth(carbon)
|
135 |
+
elif data_source == "DROPBOX":
|
136 |
+
oauth_url = get_dropbox_oauth(carbon)
|
137 |
+
elif data_source == "NOTION":
|
138 |
+
oauth_url = get_notion_oauth(carbon)
|
139 |
+
st.write(f"OAuth URL for {data_source}: {oauth_url}")
|
140 |
+
st.markdown(f'<a href="{oauth_url}" target="_blank">Authenticate {data_source}</a>', unsafe_allow_html=True)
|
141 |
+
|
142 |
+
# List User Data Sources
|
143 |
+
st.subheader("List Data Sources")
|
144 |
+
if st.button("List Data Sources"):
|
145 |
+
data_sources = list_user_data_sources()
|
146 |
+
st.write("Data Sources associated with the user:")
|
147 |
+
for ds in data_sources:
|
148 |
+
st.write(f"ID: {ds.id}, External ID: {ds.data_source_external_id}, Type: {ds.data_source_type}, "
|
149 |
+
f"Sync Status: {ds.sync_status}, Created At: {ds.created_at}, Updated At: {ds.updated_at}")
|
150 |
+
|
151 |
+
# List Files in Data Source
|
152 |
+
st.subheader(f"List Files in {data_source}")
|
153 |
+
if st.button("List Files"):
|
154 |
+
data_source_id = get_data_source_id(data_source)
|
155 |
+
files = list_files(data_source_id)
|
156 |
+
st.write(f"Files in {data_source}:")
|
157 |
+
for file in files:
|
158 |
+
st.write(file.name)
|
159 |
+
|
160 |
+
# List Uploaded Files
|
161 |
+
st.subheader("Documents Uploaded Result")
|
162 |
+
file_ids = []
|
163 |
+
if st.button("Show Uploaded Files"):
|
164 |
+
data_source_id = get_data_source_id(data_source)
|
165 |
+
uploaded_files = list_uploaded_files(data_source_id)
|
166 |
+
st.write("Uploaded Files:")
|
167 |
+
for file in uploaded_files:
|
168 |
+
st.write(f"ID: {file['id']}, Organization Supplied User ID: {file['organization_supplied_user_id']}, "
|
169 |
+
f"Organization User Data Source ID: {file['organization_user_data_source_id']}, External URL: {file['external_url']}")
|
170 |
+
file_ids.append(file['id'])
|
171 |
+
print(file_ids)
|
172 |
+
|
173 |
+
# Search Documents
|
174 |
+
st.subheader("Search Documents")
|
175 |
+
query = st.text_input("Enter your search query:")
|
176 |
+
if st.button("Search"):
|
177 |
+
if query:
|
178 |
+
search_results = search_documents(query, file_ids)
|
179 |
+
st.write("Search Results:")
|
180 |
+
for result in search_results:
|
181 |
+
st.write(f"Source: {result['source']}")
|
182 |
+
st.write(f"Title: {result['content']}")
|
183 |
+
st.write(f"Source URL: {result['source_url']}")
|
184 |
+
st.write(f"Source Type: {result['source_type']}")
|
185 |
+
st.write(f"Presigned URL: {result['presigned_url']}")
|
186 |
+
st.write(f"Tags: {result['tags']}")
|
187 |
+
st.write("-------------------------------------------------")
|
188 |
+
|
189 |
+
# # Add chat interface using custom HTML/CSS
|
190 |
+
# st.subheader("Chat Interface")
|
191 |
+
# chat_input = st.text_input("Enter your query:")
|
192 |
+
# if st.button("Send"):
|
193 |
+
# if chat_input:
|
194 |
+
# st.markdown(f'<div class="chat-bubble user">{chat_input}</div>', unsafe_allow_html=True)
|
195 |
+
# # Placeholder for bot response (add your processing logic here)
|
196 |
+
# bot_response = "This is a bot response."
|
197 |
+
# st.markdown(f'<div class="chat-bubble bot">{bot_response}</div>', unsafe_allow_html=True)
|
198 |
+
|
199 |
+
# Custom CSS for chat bubbles
|
200 |
+
st.markdown("""
|
201 |
+
<style>
|
202 |
+
.chat-bubble {
|
203 |
+
padding: 10px 15px;
|
204 |
+
border-radius: 10px;
|
205 |
+
margin: 5px 0;
|
206 |
+
max-width: 60%;
|
207 |
+
}
|
208 |
+
.user {
|
209 |
+
background-color: lightblue;
|
210 |
+
align-self: flex-end;
|
211 |
+
}
|
212 |
+
.bot {
|
213 |
+
background-color: darkgray;
|
214 |
+
align-self: flex-start;
|
215 |
+
}
|
216 |
+
</style>
|
217 |
+
""", unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|