sunbal7 commited on
Commit
449bb7f
·
verified ·
1 Parent(s): 88ca44f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -0
app.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import streamlit as st
3
+ import arxiv
4
+ import requests
5
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
6
+ from keybert import KeyBERT
7
+ from pyvis.network import Network
8
+ from pybtex.database import parse_string
9
+ import numpy as np
10
+ from sklearn.feature_extraction.text import TfidfVectorizer
11
+ from sklearn.decomposition import LatentDirichletAllocation
12
+ import time
13
+ import json
14
+
15
+ # Initialize models
16
+ @st.cache_resource
17
+ def load_models():
18
+ # Summarization model
19
+ tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
20
+ summarizer = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
21
+
22
+ # Keyword model
23
+ kw_model = KeyBERT()
24
+
25
+ # Research suggestion model
26
+ suggestion_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
27
+ suggestion_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
28
+
29
+ return tokenizer, summarizer, kw_model, suggestion_tokenizer, suggestion_model
30
+
31
+ def fetch_arxiv_papers(query, max_results=10):
32
+ client = arxiv.Client()
33
+ search = arxiv.Search(
34
+ query=query,
35
+ max_results=max_results,
36
+ sort_by=arxiv.SortCriterion.Relevance
37
+ )
38
+ results = []
39
+ for result in client.results(search):
40
+ results.append({
41
+ "title": result.title,
42
+ "abstract": result.summary,
43
+ "authors": [a.name for a in result.authors],
44
+ "published": result.published.strftime("%Y-%m-%d"),
45
+ "pdf_url": result.pdf_url,
46
+ "doi": result.doi
47
+ })
48
+ return results
49
+
50
+ def fetch_semantic_scholar(query, max_results=5):
51
+ url = "https://api.semanticscholar.org/graph/v1/paper/search"
52
+ params = {
53
+ "query": query,
54
+ "limit": max_results,
55
+ "fields": "title,abstract,authors,year,references,url"
56
+ }
57
+ headers = {"x-api-key": "YOUR_API_KEY"}
58
+ response = requests.get(url, params=params, headers=headers)
59
+ if response.status_code == 200:
60
+ return response.json().get("data", [])
61
+ return []
62
+
63
+ def generate_summary(text, tokenizer, model, max_length=300):
64
+ inputs = tokenizer([text], max_length=1024, return_tensors="pt", truncation=True)
65
+ summary_ids = model.generate(
66
+ inputs.input_ids,
67
+ max_length=max_length,
68
+ min_length=50,
69
+ length_penalty=2.0,
70
+ num_beams=4,
71
+ early_stopping=True
72
+ )
73
+ return tokenizer.decode(summary_ids[0], skip_special_tokens=True)
74
+
75
+ def generate_concept_map(texts, model):
76
+ keywords = []
77
+ for text in texts:
78
+ kws = model.extract_keywords(text, keyphrase_ngram_range=(1,2))
79
+ keywords.extend([kw[0] for kw in kws])
80
+
81
+ vectorizer = TfidfVectorizer()
82
+ X = vectorizer.fit_transform(keywords)
83
+
84
+ net = Network(height="400px", width="100%")
85
+ unique_kws = list(set(keywords))
86
+
87
+ for kw in unique_kws:
88
+ net.add_node(kw, label=kw)
89
+
90
+ similarities = (X * X.T).A
91
+ np.fill_diagonal(similarities, 0)
92
+
93
+ for i in range(len(unique_kws)):
94
+ for j in range(i+1, len(unique_kws)):
95
+ if similarities[i,j] > 0.2:
96
+ net.add_edge(unique_kws[i], unique_kws[j], value=similarities[i,j])
97
+
98
+ return net
99
+
100
+ def generate_citations(papers):
101
+ citations = []
102
+ for paper in papers:
103
+ entry = {
104
+ "title": paper.get("title", ""),
105
+ "authors": paper.get("authors", []),
106
+ "year": paper.get("year", ""),
107
+ "url": paper.get("pdf_url") or paper.get("url", "")
108
+ }
109
+ citations.append(entry)
110
+ return citations
111
+
112
+ def generate_research_suggestions(context, tokenizer, model):
113
+ input_text = f"Based on this research context: {context}\nGenerate three research questions:"
114
+ inputs = tokenizer(input_text, return_tensors="pt")
115
+ outputs = model.generate(**inputs, max_length=200)
116
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
117
+
118
+ def main():
119
+ st.title("PaperPilot - Intelligent Academic Navigator")
120
+
121
+ # Load models
122
+ tokenizer, summarizer, kw_model, suggestion_tokenizer, suggestion_model = load_models()
123
+
124
+ # User input
125
+ query = st.text_input("Enter your research topic or question:")
126
+
127
+ if query:
128
+ with st.spinner("Searching academic databases..."):
129
+ arxiv_results = fetch_arxiv_papers(query)
130
+ ss_results = fetch_semantic_scholar(query)
131
+ all_papers = arxiv_results + ss_results
132
+
133
+ if not all_papers:
134
+ st.warning("No papers found. Try a different query.")
135
+ return
136
+
137
+ # Display papers
138
+ st.subheader("Relevant Papers")
139
+ for idx, paper in enumerate(all_papers[:5]):
140
+ with st.expander(f"{paper['title']}"):
141
+ st.write(f"**Abstract:** {paper['abstract']}")
142
+
143
+ # Generate summary
144
+ summary = generate_summary(paper['abstract'], tokenizer, summarizer)
145
+ st.write(f"**Summary:** {summary}")
146
+
147
+ # Display metadata
148
+ st.write(f"**Authors:** {', '.join(paper.get('authors', []))}")
149
+ st.write(f"**Published:** {paper.get('published') or paper.get('year'))}")
150
+ st.write(f"**URL:** {paper.get('pdf_url') or paper.get('url'))}")
151
+
152
+ # Concept Map
153
+ st.subheader("Research Concept Map")
154
+ texts = [p['abstract'] for p in all_papers]
155
+ net = generate_concept_map(texts, kw_model)
156
+ net.save_graph("concept_map.html")
157
+ HtmlFile = open("concept_map.html", 'r', encoding='utf-8')
158
+ components.html(HtmlFile.read(), height=500)
159
+
160
+ # Citations
161
+ st.subheader("Citation Management")
162
+ citations = generate_citations(all_papers)
163
+ citation_format = st.selectbox("Select citation style:", ["APA", "MLA", "Chicago"])
164
+
165
+ for cite in citations:
166
+ st.code(f"{cite['authors'][0]} et al. ({cite['year']}). {cite['title']}. URL: {cite['url']}")
167
+
168
+ # Research Suggestions
169
+ st.subheader("Research Proposal Suggestions")
170
+ context = " ".join([p['abstract'] for p in all_papers[:3]])
171
+ suggestions = generate_research_suggestions(context, suggestion_tokenizer, suggestion_model)
172
+ st.write(suggestions)
173
+
174
+ if __name__ == "__main__":
175
+ main()