Spaces:

vhr1007
/

traversaal_api_test

Sleeping

vishanth10

New UI and resolved bugs

8959c46 8 months ago

13.2 kB

	# import streamlit as st
	# from carbon import Carbon
	# import requests
	# import json

	# # Carbon API Key
	# CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea"
	# CUSTOMER_ID = "Candid"

	# def get_google_drive_oauth(carbon):
	# get_oauth_url_response = carbon.integrations.get_oauth_url(
	# service="GOOGLE_DRIVE",
	# scope="https://www.googleapis.com/auth/drive.readonly",
	# connecting_new_account=True,
	# )
	# return get_oauth_url_response.oauth_url

	# def get_dropbox_oauth(carbon):
	# get_oauth_url_response = carbon.integrations.get_oauth_url(
	# service="DROPBOX",
	# connecting_new_account=True,
	# )
	# return get_oauth_url_response.oauth_url

	# def get_notion_oauth(carbon):
	# get_oauth_url_response = carbon.integrations.get_oauth_url(
	# service="NOTION",
	# connecting_new_account=True,
	# )
	# return get_oauth_url_response.oauth_url

	# def sync_github(carbon, username, token):
	# sync_response = carbon.integrations.sync_git_hub(
	# username=username,
	# token=token,
	# sync_source_items=True
	# )
	# return sync_response

	# def sync_gitbook(carbon, access_token, organization):
	# sync_response = carbon.integrations.sync_git_book(
	# access_token=access_token,
	# organization=organization,
	# sync_source_items=True
	# )
	# return sync_response

	# def sync_s3(carbon, access_key, access_key_secret):
	# sync_response = carbon.integrations.sync_s3(
	# access_key=access_key,
	# access_key_secret=access_key_secret,
	# sync_source_items=True
	# )
	# return sync_response

	# def sync_google_drive(carbon, data_source_id):
	# sync_response = carbon.integrations.sync_data_source_items(data_source_id=int(data_source_id))
	# return sync_response


	# def list_files(carbon, data_source_id=None, service="GOOGLE_DRIVE"):
	# if data_source_id:
	# sync_google_drive(carbon, data_source_id)
	# list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
	# st.session_state['current_data_source'] = data_source_id # Store the current data source
	# st.session_state['files'] = list_files_response.items # Store the fetched files
	# else:
	# if 'current_data_source' in st.session_state and st.session_state['current_data_source']:
	# data_source_id = st.session_state['current_data_source']
	# sync_google_drive(carbon, data_source_id)
	# list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
	# st.session_state['files'] = list_files_response.items # Store the fetched files
	# else:
	# query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
	# pagination={"limit": 100, "offset": 0},
	# order_by="created_at",
	# order_dir="desc",
	# filters={"source": service},
	# )
	# if query_user_data_sources_response.results:
	# data_source_id = query_user_data_sources_response.results[0].id
	# sync_google_drive(carbon, data_source_id)
	# list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
	# st.session_state['current_data_source'] = data_source_id
	# st.session_state['files'] = list_files_response.items # Store the fetched files
	# else:
	# list_files_response = None
	# return list_files_response.items if list_files_response else None

	# def list_all_files(carbon, data_source_id):
	# url = "https://api.carbon.ai/user_files_v2"
	# payload = {
	# "pagination": {
	# "limit": 100,
	# "offset": 0
	# },
	# "order_by": "created_at",
	# "order_dir": "desc",
	# "filters": {
	# "organization_user_data_source_id": [data_source_id],
	# "embedding_generators": ["OPENAI"],
	# "include_all_children": True,
	# },
	# "include_raw_file": True,
	# "include_parsed_text_file": True,
	# "include_additional_files": True
	# }
	# headers = {
	# "authorization": f"Bearer {CARBON_API_KEY}",
	# "customer-id": CUSTOMER_ID,
	# "Content-Type": "application/json"
	# }

	# response = requests.request("POST", url, json=payload, headers=headers)
	# res = json.loads(response.text)
	# file_id= res['results'][0]['id']
	# for i,document in enumerate(res['results']):
	# print(document['name'])
	# print(document['id'])

	# file_id=res['results'][0]['id']
	# print(file_id)
	# return res['results']


	# def list_user_documents(carbon):
	# query_user_documents_response = carbon.documents.query_documents(
	# pagination={"limit": 100, "offset": 0},
	# order_by="created_at",
	# order_dir="desc"
	# )
	# return query_user_documents_response.documents if query_user_documents_response else None

	# def semantic_search_v2(carbon, query, tags_v2=None, hybrid_search=False):
	# search_response = carbon.embeddings.get_documents(
	# query=query,
	# k=2,
	# tags_v2=tags_v2 if tags_v2 else {},
	# include_tags=True,
	# include_vectors=True,
	# include_raw_file=True,
	# hybrid_search=hybrid_search,
	# hybrid_search_tuning_parameters={
	# "weight_a": 0.5,
	# "weight_b": 0.5,
	# },
	# media_type="TEXT",
	# embedding_model="OPENAI",
	# )
	# return search_response.documents


	# def main():
	# st.title('Data Connector using Carbon SDK')

	# # Authenticate with Carbon API
	# st.write('### Authenticate with Carbon API')
	# carbon = Carbon(api_key=CARBON_API_KEY, customer_id=CUSTOMER_ID)
	# token = carbon.auth.get_access_token()
	# carbon = Carbon(access_token=token.access_token) # authenticated object

	# # Connect to Data Source
	# st.write('## Connect to Data Source')
	# service = st.selectbox('Select Data Source for OAuth', ['GOOGLE_DRIVE', 'DROPBOX', 'NOTION'])
	# if st.button('Get OAuth URL'):
	# with st.spinner('Fetching OAuth URL...'):
	# try:
	# if service == "GOOGLE_DRIVE":
	# oauth_url = get_google_drive_oauth(carbon)
	# elif service == "DROPBOX":
	# oauth_url = get_dropbox_oauth(carbon)
	# elif service == "NOTION":
	# oauth_url = get_notion_oauth(carbon)
	# st.write(f"OAuth URL for {service}: {oauth_url}")
	# st.session_state['current_data_source'] = None # Reset the current data source
	# st.session_state['files'] = None # Clear the previous files
	# st.session_state['oauth_fetched'] = True
	# except Exception as e:
	# st.error(f"An error occurred: {e}")

	# if 'oauth_fetched' in st.session_state and st.session_state['oauth_fetched']:
	# st.write("OAuth URL fetched. Please authenticate and then click 'Sync and Fetch Files'.")
	# if st.button('Sync and Fetch Files'):
	# with st.spinner('Syncing and fetching files...'):
	# try:
	# query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
	# pagination={"limit": 100, "offset": 0},
	# order_by="created_at",
	# order_dir="desc",
	# filters={"source": service},
	# )
	# if query_user_data_sources_response.results:
	# data_source_id = query_user_data_sources_response.results[0].id
	# sync_google_drive(carbon, data_source_id)
	# st.session_state['current_data_source'] = data_source_id
	# st.session_state['oauth_fetched'] = False
	# st.success("Synced successfully! Now you can list the files.")
	# else:
	# st.error("No data sources found. Please ensure the connection was successful.")
	# except Exception as e:
	# st.error(f"An error occurred: {e}")

	# # List Files in Data Source
	# st.write(f'## List Files in {service}')
	# data_source_id = st.text_input('Enter Data Source ID (leave blank to list all files)')
	# if st.button('List Files'):
	# with st.spinner('Fetching files...'):
	# try:
	# if 'current_data_source' in st.session_state and st.session_state['current_data_source']:
	# data_source_id = st.session_state['current_data_source']
	# files = list_files(carbon, data_source_id if data_source_id else None, service)
	# if files:
	# st.write(f"Files in {service}:")
	# for item in files:
	# st.write(f"File ID: {item.id}, File Name: {item.name}, File Size: {item.size if hasattr(item, 'size') else 'N/A'}, Last Modified: {item.last_modified if hasattr(item, 'last_modified') else 'N/A'}")
	# else:
	# st.write("No files found.")
	# except Exception as e:
	# st.error(f"An error occurred: {e}")

	# # List All Files
	# st.write('### List All Files')
	# if st.button('List All Files'):
	# with st.spinner('Fetching all files...'):
	# try:
	# if 'current_data_source' in st.session_state and st.session_state['current_data_source']:
	# data_source_id = st.session_state['current_data_source']
	# all_files = list_all_files(carbon, data_source_id)
	# if all_files:
	# st.write("All files:")
	# for i, document in enumerate(all_files):
	# file_id = document['id']
	# st.write(f"File ID: {document['id']}, File Name: {document['name']}")
	# else:
	# st.write("No files found.")
	# except Exception as e:
	# st.error(f"An error occurred: {e}")

	# # Search in the Connected Data Source
	# st.write('### Search in the Connected Data Source')
	# query = st.text_input("Enter your query:", value="Type here...")
	# if st.button('Search'):
	# if query:
	# with st.spinner('Searching...'):
	# try:
	# all_files = list_all_files(carbon, data_source_id)
	# url = "https://api.carbon.ai/embeddings"
	# payload = {
	# "query": query,
	# "k": 2,
	# "file_ids": file_id,
	# "include_all_children": True,
	# "tags": {},
	# "include_tags": True,
	# "include_vectors": True,
	# "include_raw_file": True,
	# "hybrid_search": False,
	# "media_type": "TEXT",
	# "embedding_model": "OPENAI"
	# }
	# headers = {
	# "authorization": f"Bearer {CARBON_API_KEY}",
	# "customer-id": CUSTOMER_ID,
	# "Content-Type": "application/json"
	# }
	# response_search = requests.post(url, json=payload, headers=headers)
	# response_search_chunks = json.loads(response_search.text)

	# st.write("Search results:")
	# for i, doc in enumerate(response_search_chunks['documents']):
	# st.write(f"Document {i+1}:")
	# st.write(f"Content: {doc['content']}")
	# st.write(f"Source: {doc['source']}")
	# st.write(f"Match Percentage: {doc['score'] * 100}%")
	# if 'file_url' in doc:
	# st.markdown(f"[Download {doc['filename']}]({doc['file_url']})")
	# st.write("-------------------------------------------------")
	# except Exception as e:
	# st.error(f"An error occurred: {e}")
	# else:
	# st.write("Please enter a query to search.")

	# # Display Search History
	# st.write('## Search History')
	# if 'search_history' not in st.session_state:
	# st.session_state['search_history'] = []

	# if query and st.button('Add to Search History'):
	# st.session_state['search_history'].append(query)

	# if st.session_state['search_history']:
	# st.write("Past Searches:")
	# for past_query in st.session_state['search_history']:
	# st.write(past_query)

	# # Call the main function
	# if __name__ == '__main__':
	# main()