Upload 2 files

Browse files

With Streamlit features

Files changed (2) hide show

CiPE_Streamlit-2.ipynb +0 -0
cipe_streamlit-2.py +290 -0

CiPE_Streamlit-2.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

cipe_streamlit-2.py ADDED Viewed

	@@ -0,0 +1,290 @@

+# -*- coding: utf-8 -*-
+"""CiPE_Streamlit
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1jACLFXfsdWM59lrfTQGcZVsTIHBO92R8
+"""
+# Om Maa
+!pip install langchain predictionguard lancedb html2text sentence-transformers PyPDF2
+!pip install huggingface_hub
+!pip install transformers
+!pip install sentencepiece
+!pip install streamlit
+import os
+import urllib.request
+import html2text
+import predictionguard as pg
+from langchain import PromptTemplate, FewShotPromptTemplate
+from langchain.text_splitter import CharacterTextSplitter
+from sentence_transformers import SentenceTransformer
+import numpy as np
+import lancedb
+from lancedb.embeddings import with_embeddings
+import pandas as pd
+os.environ['PREDICTIONGUARD_TOKEN'] = "q1VuOjnffJ3NO2oFN8Q9m8vghYc84ld13jaqdF7E"
+# Streamlit App Initiation
+import streamlit as st
+# Replace input() with Streamlit's input widgets
+# Sidebar for inputting the name, age, gender, and ethnicity
+name = st.sidebar.text_input('Name')
+age = st.sidebar.number_input('Age', min_value=0, max_value=120, step=1)
+gender = st.sidebar.selectbox('Gender', ['Male', 'Female', 'Other'])
+ethnicity = st.sidebar.text_input('Ethnicity')
+# Main container
+with st.form(key='patient_form'):
+    # Text input for procedures
+    disease = st.text_area('DISEASE', height=100)
+    # Text input for prescriptions (where you would get drug_names)
+    prescriptions = st.text_area('PRESCRIPTIONS', height=100)
+    # Text input for additional information
+    additional_info = st.text_area('ADDITIONAL INFO', height=100)
+    # Submit button for the form
+    submit_button = st.form_submit_button(label='Predict Drug Effects')
+from PyPDF2 import PdfReader
+# Replace 'path_to_your_pdf_file.pdf' with the path to your PDF file
+pdf_path = '/content/drug_side_effects_summary_cleaned.pdf'
+reader = PdfReader(pdf_path)
+# Initialize an empty string to accumulate text
+text = ''
+# Iterate over each page in the PDF
+for page in reader.pages:
+    # Extract text from the page and append it to the text string
+    text += page.extract_text() + "\n"
+# Now, `text` contains the text content of the PDF. You can print it or process it further.
+print(text[:500])  # Example: print the first 500 characters to understand the structure
+import re
+# Function to clean the extracted text
+def clean_text(text):
+    # Correcting unwanted line breaks and spaces
+    text = re.sub(r'-\n', '', text)  # Remove hyphenation
+    text = re.sub(r'\n', ' ', text)  # Replace new lines with space
+    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces with single space
+    text = text.strip()  # Remove leading and trailing spaces
+    return text
+# Clean the extracted text
+cleaned_text = clean_text(text)
+# Return a portion of the cleaned text to verify the cleaning
+cleaned_text[:500]
+# Define a function to chunk text with specified size and overlap using standard Python
+def chunk_text(text, chunk_size=700, overlap=50):
+    chunks = []
+    start = 0
+    while start < len(text):
+        # If we're not at the beginning, move back 'overlap' characters for context
+        if start > 0:
+            start -= overlap
+        end = start + chunk_size
+        chunks.append(text[start:end])
+        start += chunk_size
+    return chunks
+# Chunk the cleaned text into smaller pieces for LLM input
+docs_alternative = chunk_text(cleaned_text, chunk_size=700, overlap=50)
+# Prepare to display the first few chunks to verify the result
+chunks_to_display_alt = 3
+chunks_preview_alt = [docs_alternative[i] for i in range(min(len(docs_alternative), chunks_to_display_alt))]
+chunks_preview_alt
+# Format the chunks to avoid prompt template conflicts
+chunks_preview_alt = [x.replace('#', '-') for x in chunks_preview_alt]
+# Embeddings setup
+name = "all-MiniLM-L12-v2"
+model = SentenceTransformer(name)
+# Embedding functions
+def embed_batch(batch):
+    return [model.encode(sentence, show_progress_bar=True) for sentence in batch]
+def embed(sentence):
+    return model.encode(sentence)
+# Ensure the LanceDB directory does not exist already to avoid errors
+lancedb_dir = ".lancedb"
+if not os.path.exists(lancedb_dir):
+    os.mkdir(lancedb_dir)
+uri = lancedb_dir
+db = lancedb.connect(uri)
+# Prepare metadata for embedding
+metadata = [[i, chunks_preview_alt] for i, chunks_preview_alt in enumerate(chunks_preview_alt)]
+doc_df = pd.DataFrame(metadata, columns=["chunk", "text"])
+# Embed the documents
+data = with_embeddings(embed_batch, doc_df)
+# LanceDB operations
+# if not db.has_table("pdf_data"):
+db.create_table("pdf_data", data=data)
+table = db.open_table("pdf_data")
+table.add(data=data)
+# Note: Adjust the 'create_table' and 'open_table' to match your dataset/table names
+message = "What are the side effects of doxycycline for treating Acne?"
+results = table.search(embed(message)).limit(5).to_pandas()
+#print(results.head())
+message = "What are the side effects of doxycycline for treating Acne?"
+results = table.search(embed(message)).limit(5).to_pandas()
+#print(results.head())
+# Assuming the setup for embeddings, LanceDB, and the PromptTemplate are already in place
+# Assuming drug_names are retrieved from the prescriptions field
+# You should parse the prescriptions field to extract the drug names
+drug_names = prescriptions.split(',')  # This is an example, the actual extraction depends on how the prescriptions are entered
+disease = disease  # Replace this with the actual method of getting the disease from the user
+def rag_answer_drug_side_effects(name, drug_names, disease):
+    # Formulate a question related to drug side effects
+    message = f"What are the potential side effects of using {drug_names} for treating {disease}? Please provide a list of side effects specific to the use of these drugs in the context of the mentioned disease of {name} person."
+    # Search the database for relevant context
+    results = table.search(embed(message)).limit(10).to_pandas()  # Adjust based on the correct API call
+    results.sort_values(by=['_distance'], inplace=True, ascending=True)
+    context = results['text'].iloc[0]  # Use the most relevant document
+    # Define the prompt template
+    template = """### Instruction:
+    Start with Hi, {name}. Then give a compassionate answer in bullet points and list.
+    Read the below input context and respond with a mid length answer to the given question. If you cannot find an exact answer then look up something nearer to the medicaiton and disease
+    "
+    ### Input:
+    Context: {context}
+    Question: {question}
+    ### Response:
+    """
+    # Augment the prompt with the retrieved context
+    prompt = template.format(context=context, question=message)
+    # Get a response
+    result = pg.Completion.create(
+    model="Neural-Chat-7B",
+    prompt = prompt
+    )
+    # # Here you would call your LLM or any other model to generate an answer based on the prompt
+    # # Since we cannot execute dynamic model calls in this environment, we'll simulate a response
+    # simulated_response = "Sorry, I can't find an answer, but you might try looking in the following resource."
+    return result['choices'][0]['text']
+def rag_answer_drug_benfit_effects(name, drug_names, disease):
+    # Formulate a question related to drug side effects
+    message = f"What are the potential benefits of using {drug_names} for treating {disease}? Please provide a list of benefits specific to the use of these drugs in the context of the mentioned disease of {name} person."
+    # Search the database for relevant context
+    results = table.search(embed(message)).limit(10).to_pandas()  # Adjust based on the correct API call
+    results.sort_values(by=['_distance'], inplace=True, ascending=True)
+    context = results['text'].iloc[0]  # Use the most relevant document
+    # Define the prompt template
+    template = """### Instruction:
+    Start with Hi, {name}. Then give a compassionate answer in bullet points and list.
+    Read the below input context and respond with a mid length answer to the given question. If you cannot find an exact answer then look up something nearer to the medicaiton and disease
+    "
+    ### Input:
+    Context: {context}
+    Question: {question}
+    ### Response:
+    """
+    # Augment the prompt with the retrieved context
+    prompt = template.format(context=context, question=message)
+    # Get a response
+    result = pg.Completion.create(
+    model="Neural-Chat-7B",
+    prompt = prompt
+    )
+    # # Here you would call your LLM or any other model to generate an answer based on the prompt
+    # # Since we cannot execute dynamic model calls in this environment, we'll simulate a response
+    # simulated_response = "Sorry, I can't find an answer, but you might try looking in the following resource."
+    return result['choices'][0]['text']
+# When this button is clicked, it will return True
+if st.button('Predict Drug Effects'):
+    # Call your processing functions here
+    # For example:
+    side_effects, benefits = rag_answer_drug_side_effects(name, drug_names, disease), rag_answer_drug_benfit_effects(name,drug_names,disease )
+    # You will need to define the process_input function to process these inputs
+# When this button is clicked, it will return True
+if submit_button:
+    # Call your processing functions here
+    # Make sure to validate input and handle errors/exceptions as necessary
+    try:
+        side_effects_response = rag_answer_drug_side_effects(name, drug_names, disease)
+        benefits_response = rag_answer_drug_benfit_effects(name, drug_names, disease)
+        st.write("Side Effects:", side_effects_response)
+        st.write("Benefits:", benefits_response)
+    except Exception as e:
+        st.error(f"An error occurred: {e}")
+from huggingface_hub import notebook_login, Repository
+notebook_login()
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+# Define the path to the checkpoint
+checkpoint_path = r"filius-Dei/CiPE"
+# # Load the model
+# model = AutoModelForSequenceClassification.from_pretrained(checkpoint_path)
+# # Load the tokenizer
+# tokenzier = AutoTokenzier.from_pretrained("distilbert-base-uncased")
+# # # Define the path to the checkpoint
+# # checkpoint_path = r'https://huggingface.co/filius-Dei/CiPE'
+# # # Correct format for repo_id
+# # repo_id = "filius-Dei/CiPE"
+# # model = AutoModelForSequenceClassification.from_pretrained(repo_id)
+# # # Load the tokenizer
+# # tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")