File size: 4,969 Bytes
1c744c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc3d855
1c744c7
 
bc3d855
1c744c7
bc3d855
1c744c7
bc3d855
1c744c7
bc3d855
1c744c7
bc3d855
1f6598f
bc3d855
1c744c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import streamlit as st
from streamlit_chat import message
import os
from utils import (
    parse_docx,
    parse_pdf,
    parse_txt,
    parse_csv,
    search_docs,
    embed_docs,
    text_to_docs,
    get_answer,
    get_sources,
    wrap_text_in_html,
)
from openai.error import OpenAIError

def clear_submit():
    st.session_state["submit"] = False

def set_openai_api_key(api_key: str):
    st.session_state["OPENAI_API_KEY"] = api_key

st.markdown('<h1>File GPT 🤖<small> by <a href="https://codegpt.co">Code GPT</a></small></h1>', unsafe_allow_html=True)

# Sidebar
index = None
doc = None
with st.sidebar:
    user_secret = st.text_input(
        "OpenAI API Key",
        type="password",
        placeholder="Paste your OpenAI API key here (sk-...)",
        help="You can get your API key from https://platform.openai.com/account/api-keys.",
        value=st.session_state.get("OPENAI_API_KEY", ""),
    )
    if user_secret:
        set_openai_api_key(user_secret)

    uploaded_file = st.file_uploader(
        "Upload a pdf, docx, or txt file",
        type=["pdf", "docx", "txt", "csv"],
        help="Scanned documents are not supported yet!",
        on_change=clear_submit,
    )

    if uploaded_file is not None:
        if uploaded_file.name.endswith(".pdf"):
            doc = parse_pdf(uploaded_file)
        elif uploaded_file.name.endswith(".docx"):
            doc = parse_docx(uploaded_file)
        elif uploaded_file.name.endswith(".csv"):
            doc = parse_csv(uploaded_file)
        elif uploaded_file.name.endswith(".txt"):
            doc = parse_txt(uploaded_file)
        else:
            st.error("File type not supported")
            doc = None
        text = text_to_docs(doc)
        try:
            with st.spinner("Indexing document... This may take a while⏳"):
                index = embed_docs(text)
                st.session_state["api_key_configured"] = True
        except OpenAIError as e:
            st.error(e._message)

tab1, tab2 = st.tabs(["Intro", "Chat with the File"])
with tab1:
    st.markdown("### How does it work?")
    st.markdown('<p>Read the article to know how it works: <a target="_blank" href="https://medium.com/@dan.avila7/file-gpt-conversaci%C3%B3n-por-chat-con-un-archivo-698d17570358">Medium Article</a></p>', unsafe_allow_html=True)
    st.write("File GPT was written with the following tools:")
    st.markdown("#### Code GPT")
    st.write('All code was written with the help of Code GPT. Visit https://codegpt.co to get the extension.')
    st.markdown("#### Streamlit")
    st.write('The design was written with <a target="_blank" href="https://streamlit.io/">Streamlit</a>.', unsafe_allow_html=True)
    st.markdown("#### LangChain")
    st.write('Question answering with source <a target="_blank" href="https://langchain.readthedocs.io/en/latest/use_cases/question_answering.html#adding-in-sources">Langchain QA</a>.', unsafe_allow_html=True)
    st.markdown("#### Embedding")
    st.write('<a target="_blank" href="https://platform.openai.com/docs/guides/embeddings">Embedding</a> is done via the OpenAI API with "text-embedding-ada-002"', unsafe_allow_html=True)
    st.markdown("""---""")
    st.write('Author:  <a target="_blank" href="https://www.linkedin.com/in/daniel-avila-arias/">Daniel Avila</a>', unsafe_allow_html=True)
    st.write('Repo: <a target="_blank" href="https://github.com/davila7/file-gpt">Github</a>', unsafe_allow_html=True)
    st.write("This software was developed with Code GPT, for more information visit: https://codegpt.co", unsafe_allow_html=True)

with tab2:
    st.write('To obtain an API Key you must create an OpenAI account at the following link: https://openai.com/api/')
    if 'generated' not in st.session_state:
        st.session_state['generated'] = []

    if 'past' not in st.session_state:
        st.session_state['past'] = []

    def get_text():
        if user_secret:
            st.header("Ask me something about the document:")
            input_text = st.text_area("You:", on_change=clear_submit)
            return input_text
    user_input = get_text()

    button = st.button("Submit")
    if button or st.session_state.get("submit"):
        if not user_input:
            st.error("Please enter a question!")
        else:
            st.session_state["submit"] = True
            sources = search_docs(index, user_input)
            try:
                answer = get_answer(sources, user_input)
                st.session_state.past.append(user_input)
                st.session_state.generated.append(answer["output_text"].split("SOURCES: ")[0])
            except OpenAIError as e:
                st.error(e._message)
            if st.session_state['generated']:
                for i in range(len(st.session_state['generated'])-1, -1, -1):
                    message(st.session_state["generated"][i], key=str(i))
                    message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')