Spaces:
Sleeping
Sleeping
# import streamlit as st | |
# from carbon import Carbon | |
# import requests | |
# import json | |
# # Carbon API Key | |
# CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea" | |
# CUSTOMER_ID = "Candid" | |
# def get_google_drive_oauth(carbon): | |
# get_oauth_url_response = carbon.integrations.get_oauth_url( | |
# service="GOOGLE_DRIVE", | |
# scope="https://www.googleapis.com/auth/drive.readonly", | |
# connecting_new_account=True, | |
# ) | |
# return get_oauth_url_response.oauth_url | |
# def get_dropbox_oauth(carbon): | |
# get_oauth_url_response = carbon.integrations.get_oauth_url( | |
# service="DROPBOX", | |
# connecting_new_account=True, | |
# ) | |
# return get_oauth_url_response.oauth_url | |
# def get_notion_oauth(carbon): | |
# get_oauth_url_response = carbon.integrations.get_oauth_url( | |
# service="NOTION", | |
# connecting_new_account=True, | |
# ) | |
# return get_oauth_url_response.oauth_url | |
# def sync_github(carbon, username, token): | |
# sync_response = carbon.integrations.sync_git_hub( | |
# username=username, | |
# token=token, | |
# sync_source_items=True | |
# ) | |
# return sync_response | |
# def sync_gitbook(carbon, access_token, organization): | |
# sync_response = carbon.integrations.sync_git_book( | |
# access_token=access_token, | |
# organization=organization, | |
# sync_source_items=True | |
# ) | |
# return sync_response | |
# def sync_s3(carbon, access_key, access_key_secret): | |
# sync_response = carbon.integrations.sync_s3( | |
# access_key=access_key, | |
# access_key_secret=access_key_secret, | |
# sync_source_items=True | |
# ) | |
# return sync_response | |
# def sync_google_drive(carbon, data_source_id): | |
# sync_response = carbon.integrations.sync_data_source_items(data_source_id=int(data_source_id)) | |
# return sync_response | |
# def list_files(carbon, data_source_id=None, service="GOOGLE_DRIVE"): | |
# if data_source_id: | |
# sync_google_drive(carbon, data_source_id) | |
# list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id)) | |
# st.session_state['current_data_source'] = data_source_id # Store the current data source | |
# st.session_state['files'] = list_files_response.items # Store the fetched files | |
# else: | |
# if 'current_data_source' in st.session_state and st.session_state['current_data_source']: | |
# data_source_id = st.session_state['current_data_source'] | |
# sync_google_drive(carbon, data_source_id) | |
# list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id)) | |
# st.session_state['files'] = list_files_response.items # Store the fetched files | |
# else: | |
# query_user_data_sources_response = carbon.data_sources.query_user_data_sources( | |
# pagination={"limit": 100, "offset": 0}, | |
# order_by="created_at", | |
# order_dir="desc", | |
# filters={"source": service}, | |
# ) | |
# if query_user_data_sources_response.results: | |
# data_source_id = query_user_data_sources_response.results[0].id | |
# sync_google_drive(carbon, data_source_id) | |
# list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id)) | |
# st.session_state['current_data_source'] = data_source_id | |
# st.session_state['files'] = list_files_response.items # Store the fetched files | |
# else: | |
# list_files_response = None | |
# return list_files_response.items if list_files_response else None | |
# def list_all_files(carbon, data_source_id): | |
# url = "https://api.carbon.ai/user_files_v2" | |
# payload = { | |
# "pagination": { | |
# "limit": 100, | |
# "offset": 0 | |
# }, | |
# "order_by": "created_at", | |
# "order_dir": "desc", | |
# "filters": { | |
# "organization_user_data_source_id": [data_source_id], | |
# "embedding_generators": ["OPENAI"], | |
# "include_all_children": True, | |
# }, | |
# "include_raw_file": True, | |
# "include_parsed_text_file": True, | |
# "include_additional_files": True | |
# } | |
# headers = { | |
# "authorization": f"Bearer {CARBON_API_KEY}", | |
# "customer-id": CUSTOMER_ID, | |
# "Content-Type": "application/json" | |
# } | |
# response = requests.request("POST", url, json=payload, headers=headers) | |
# res = json.loads(response.text) | |
# file_id= res['results'][0]['id'] | |
# for i,document in enumerate(res['results']): | |
# print(document['name']) | |
# print(document['id']) | |
# file_id=res['results'][0]['id'] | |
# print(file_id) | |
# return res['results'] | |
# def list_user_documents(carbon): | |
# query_user_documents_response = carbon.documents.query_documents( | |
# pagination={"limit": 100, "offset": 0}, | |
# order_by="created_at", | |
# order_dir="desc" | |
# ) | |
# return query_user_documents_response.documents if query_user_documents_response else None | |
# def semantic_search_v2(carbon, query, tags_v2=None, hybrid_search=False): | |
# search_response = carbon.embeddings.get_documents( | |
# query=query, | |
# k=2, | |
# tags_v2=tags_v2 if tags_v2 else {}, | |
# include_tags=True, | |
# include_vectors=True, | |
# include_raw_file=True, | |
# hybrid_search=hybrid_search, | |
# hybrid_search_tuning_parameters={ | |
# "weight_a": 0.5, | |
# "weight_b": 0.5, | |
# }, | |
# media_type="TEXT", | |
# embedding_model="OPENAI", | |
# ) | |
# return search_response.documents | |
# def main(): | |
# st.title('Data Connector using Carbon SDK') | |
# # Authenticate with Carbon API | |
# st.write('### Authenticate with Carbon API') | |
# carbon = Carbon(api_key=CARBON_API_KEY, customer_id=CUSTOMER_ID) | |
# token = carbon.auth.get_access_token() | |
# carbon = Carbon(access_token=token.access_token) # authenticated object | |
# # Connect to Data Source | |
# st.write('## Connect to Data Source') | |
# service = st.selectbox('Select Data Source for OAuth', ['GOOGLE_DRIVE', 'DROPBOX', 'NOTION']) | |
# if st.button('Get OAuth URL'): | |
# with st.spinner('Fetching OAuth URL...'): | |
# try: | |
# if service == "GOOGLE_DRIVE": | |
# oauth_url = get_google_drive_oauth(carbon) | |
# elif service == "DROPBOX": | |
# oauth_url = get_dropbox_oauth(carbon) | |
# elif service == "NOTION": | |
# oauth_url = get_notion_oauth(carbon) | |
# st.write(f"OAuth URL for {service}: {oauth_url}") | |
# st.session_state['current_data_source'] = None # Reset the current data source | |
# st.session_state['files'] = None # Clear the previous files | |
# st.session_state['oauth_fetched'] = True | |
# except Exception as e: | |
# st.error(f"An error occurred: {e}") | |
# if 'oauth_fetched' in st.session_state and st.session_state['oauth_fetched']: | |
# st.write("OAuth URL fetched. Please authenticate and then click 'Sync and Fetch Files'.") | |
# if st.button('Sync and Fetch Files'): | |
# with st.spinner('Syncing and fetching files...'): | |
# try: | |
# query_user_data_sources_response = carbon.data_sources.query_user_data_sources( | |
# pagination={"limit": 100, "offset": 0}, | |
# order_by="created_at", | |
# order_dir="desc", | |
# filters={"source": service}, | |
# ) | |
# if query_user_data_sources_response.results: | |
# data_source_id = query_user_data_sources_response.results[0].id | |
# sync_google_drive(carbon, data_source_id) | |
# st.session_state['current_data_source'] = data_source_id | |
# st.session_state['oauth_fetched'] = False | |
# st.success("Synced successfully! Now you can list the files.") | |
# else: | |
# st.error("No data sources found. Please ensure the connection was successful.") | |
# except Exception as e: | |
# st.error(f"An error occurred: {e}") | |
# # List Files in Data Source | |
# st.write(f'## List Files in {service}') | |
# data_source_id = st.text_input('Enter Data Source ID (leave blank to list all files)') | |
# if st.button('List Files'): | |
# with st.spinner('Fetching files...'): | |
# try: | |
# if 'current_data_source' in st.session_state and st.session_state['current_data_source']: | |
# data_source_id = st.session_state['current_data_source'] | |
# files = list_files(carbon, data_source_id if data_source_id else None, service) | |
# if files: | |
# st.write(f"Files in {service}:") | |
# for item in files: | |
# st.write(f"File ID: {item.id}, File Name: {item.name}, File Size: {item.size if hasattr(item, 'size') else 'N/A'}, Last Modified: {item.last_modified if hasattr(item, 'last_modified') else 'N/A'}") | |
# else: | |
# st.write("No files found.") | |
# except Exception as e: | |
# st.error(f"An error occurred: {e}") | |
# # List All Files | |
# st.write('### List All Files') | |
# if st.button('List All Files'): | |
# with st.spinner('Fetching all files...'): | |
# try: | |
# if 'current_data_source' in st.session_state and st.session_state['current_data_source']: | |
# data_source_id = st.session_state['current_data_source'] | |
# all_files = list_all_files(carbon, data_source_id) | |
# if all_files: | |
# st.write("All files:") | |
# for i, document in enumerate(all_files): | |
# file_id = document['id'] | |
# st.write(f"File ID: {document['id']}, File Name: {document['name']}") | |
# else: | |
# st.write("No files found.") | |
# except Exception as e: | |
# st.error(f"An error occurred: {e}") | |
# # Search in the Connected Data Source | |
# st.write('### Search in the Connected Data Source') | |
# query = st.text_input("Enter your query:", value="Type here...") | |
# if st.button('Search'): | |
# if query: | |
# with st.spinner('Searching...'): | |
# try: | |
# all_files = list_all_files(carbon, data_source_id) | |
# url = "https://api.carbon.ai/embeddings" | |
# payload = { | |
# "query": query, | |
# "k": 2, | |
# "file_ids": file_id, | |
# "include_all_children": True, | |
# "tags": {}, | |
# "include_tags": True, | |
# "include_vectors": True, | |
# "include_raw_file": True, | |
# "hybrid_search": False, | |
# "media_type": "TEXT", | |
# "embedding_model": "OPENAI" | |
# } | |
# headers = { | |
# "authorization": f"Bearer {CARBON_API_KEY}", | |
# "customer-id": CUSTOMER_ID, | |
# "Content-Type": "application/json" | |
# } | |
# response_search = requests.post(url, json=payload, headers=headers) | |
# response_search_chunks = json.loads(response_search.text) | |
# st.write("Search results:") | |
# for i, doc in enumerate(response_search_chunks['documents']): | |
# st.write(f"Document {i+1}:") | |
# st.write(f"Content: {doc['content']}") | |
# st.write(f"Source: {doc['source']}") | |
# st.write(f"Match Percentage: {doc['score'] * 100}%") | |
# if 'file_url' in doc: | |
# st.markdown(f"[Download {doc['filename']}]({doc['file_url']})") | |
# st.write("-------------------------------------------------") | |
# except Exception as e: | |
# st.error(f"An error occurred: {e}") | |
# else: | |
# st.write("Please enter a query to search.") | |
# # Display Search History | |
# st.write('## Search History') | |
# if 'search_history' not in st.session_state: | |
# st.session_state['search_history'] = [] | |
# if query and st.button('Add to Search History'): | |
# st.session_state['search_history'].append(query) | |
# if st.session_state['search_history']: | |
# st.write("Past Searches:") | |
# for past_query in st.session_state['search_history']: | |
# st.write(past_query) | |
# # Call the main function | |
# if __name__ == '__main__': | |
# main() | |