File size: 8,562 Bytes
832926c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# research/arxiv_research.py
import asyncio
import aiohttp
import nest_asyncio
import xml.etree.ElementTree as ET  # For parsing Arxiv XML response
nest_asyncio.apply()

# API Endpoints
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
ARXIV_API_URL = "http://export.arxiv.org/api/query"

# Global API Key (You'll set this in app.py)
OPENROUTER_API_KEY = ""
DEFAULT_MODEL = "google/gemini-2.0-flash-lite-preview-02-05:free"

FIXED_PAPER_COUNT = 70  
async def call_openrouter_async(session, messages, model=DEFAULT_MODEL):
    """
    Make an asynchronous request to the OpenRouter chat completion API.
    Returns the assistant's reply text.
    """
    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        "HTTP-Referer": "https://github.com/Pygen",
        "X-Title": "Arxiv Literature Review Assistant",
        "Content-Type": "application/json"
    }

    payload = {
        "model": model,
        "messages": messages,
        "temperature": 0.7,
        "max_tokens": 4096
    }

    try:
        async with session.post(OPENROUTER_URL, headers=headers, json=payload) as resp:
            if resp.status == 200:
                result = await resp.json()
                return result['choices'][0]['message']['content']
            else:
                text = await resp.text()
                print(f"OpenRouter API error: {resp.status} - {text}")
                return None
    except Exception as e:
        print("Error during OpenRouter call:", e)
        return None

async def search_arxiv_async(session, query, max_results=100):
    """
    Search Arxiv API (no API key needed) and return paper entries.
    """
    params = {
        'search_query': query,
        'start': 0,
        'max_results': max_results,
        'sortBy': 'relevance',
        'sortOrder': 'descending'
    }
    paper_entries = []
    try:
        async with session.get(ARXIV_API_URL, params=params) as response:
            if response.status == 200:
                xml_content = await response.text()
                root = ET.fromstring(xml_content)
                namespace = {'atom': 'http://www.w3.org/2005/Atom'}

                entries = root.findall('atom:entry', namespace)
                for entry in entries:
                    title_element = entry.find('atom:title', namespace)
                    abstract_element = entry.find('atom:summary', namespace)
                    url_element = entry.find('atom:id', namespace)
                    authors_elements = entry.findall('atom:author/atom:name', namespace)
                    published_element = entry.find('atom:published', namespace)  # Get publication date

                    authors = [author.text for author in authors_elements] if authors_elements else ["N/A"]
                    title = title_element.text.strip() if title_element is not None else "N/A"
                    abstract = abstract_element.text.strip().replace('\n', ' ') if abstract_element is not None else "N/A"
                    url = url_element.text.strip() if url_element is not None else "N/A"
                    published = published_element.text.strip() if published_element is not None else "N/A"
                    year = published[:4] if published else "N/A" #Extract the year.

                    paper_entries.append({
                        'title': title,
                        'abstract': abstract,
                        'url': url,
                        'authors': ', '.join(authors),
                        'year': year
                    })
            else:
                print(f"Arxiv API error: {response.status}")
                return []
    except Exception as e:
        print(f"Error during Arxiv API call: {e}")
        return []
    return paper_entries

async def prepare_references(paper_entries):
    """Prepare reference list from paper entries"""
    references = []
    for idx, paper in enumerate(paper_entries, 1):
        references.append({
            'citation_number': idx,
            'authors': paper['authors'],
            'title': paper['title'],
            'year': paper['year'],
            'url': paper['url'],
            'abstract': paper['abstract'],
            'citation_key': f"[{idx}]"
        })
    return references

async def generate_bibtex_entry(ref):
    """Generate BibTeX entry for a paper."""
    arxiv_id = ref['url'].split('/')[-1]
    bibtex = (
        f"@article{{{arxiv_id},\n"
        f"  author = {{{ref['authors']}}},\n"
        f"  title = {{{ref['title']}}},\n"
        f"  year = {{{ref['year']}}},\n"
        f"  eprint = {{{arxiv_id}}},\n"
        f"  archivePrefix = {{arXiv}},\n"
        f"  primaryClass = {{cs.LG}},\n"  # You might want to make this dynamic
        f"  url = {{{ref['url']}}}\n"
        f"}}\n\n"  # Added an extra newline after the BibTeX entry
    )
    return bibtex

async def generate_literature_review_async(session, user_query, paper_entries):
    """
    Generate literature review based on prepared references.
    """
    # First prepare all references
    references = await prepare_references(paper_entries)
    
    # Prepare paper information with citations
    papers_info = []
    for ref in references:
        papers_info.append(
            f"Paper {ref['citation_key']}:\n"
            f"Title: {ref['title']}\n"
            f"Abstract: {ref['abstract']}\n"
            f"Citation: Use {ref['citation_key']} to cite this paper"
        )

    # Generate Nature-style review
    review_prompt = (
        "Write a comprehensive literature review in Nature journal style. "
        "Requirements:\n"
        "1. Use formal Nature journal style\n"
        "2. Begin with a compelling introduction\n"
        "3. Organize findings into clear themes\n"
        "4. Use provided citation numbers [n] when discussing papers\n"
        "5. Each paper must be cited at least once\n"
        "6. Make connections between related papers\n"
        "7. Conclude with future directions\n"
        "7. Make sure the literature review is at least 6000 words if the {paper_count} are more than 70, and at least 4000 words when the {paper_count} are 40 to 70, andat least 2500 words when the {paper_count} are 10 to 39.\n"
        "8. DO NOT include references - they will be added separately\n"
        f"\nTopic: {user_query}\n\n"
        f"Available Papers:\n\n{chr(10).join(papers_info)}"
    )

    messages = [
        {"role": "system", "content": "You are a Nature journal editor writing a literature review."},
        {"role": "user", "content": review_prompt}
    ]
    
    literature_review = await call_openrouter_async(session, messages)

    if literature_review:
        # Format references in Nature style with clickable links
        refs_section = "\nReferences\n"
        bibtex_section = "\nBibTeX Citations:\n\n"  # New section for BibTeX

        for ref in references:
            arxiv_id = ref['url'].split('/')[-1]
            refs_section += (
                f"{ref['citation_number']}. {ref['authors']}. "
                f"{ref['title']}. "
                f"arXiv:{arxiv_id} ({ref['year']}). "
                f"Available at: {ref['url']}\n"
            )
            bibtex_section += await generate_bibtex_entry(ref)  # Generate BibTeX entry

        # Add section separator
        final_text = (
            literature_review +
            "\n" + "="*50 + "\n" +
            refs_section +
            "\n" + "="*50 + "\n" +  # Separator for BibTeX
            bibtex_section
        )

        return final_text

    return "Error generating literature review."

async def research_flow(user_query, paper_count):
    """
    Execute research flow with user-specified paper count.
    """
    async with aiohttp.ClientSession() as session:
        # Step 1: Get exact number of papers requested
        paper_entries = await search_arxiv_async(session, user_query, max_results=paper_count)
        
        if not paper_entries:
            return "No relevant papers found. Please try a different query."
        
        # Step 2: Generate review with prepared references
        literature_review = await generate_literature_review_async(session, user_query, paper_entries[:paper_count])
        return literature_review

# def main():
#     """CLI entry point."""
#     user_query = input("Enter your research topic/question: ").strip()
#     final_report = asyncio.run(research_flow(user_query))
#     print("\n==== LITERATURE REVIEW ====\n")
#     print(final_report)

# if __name__ == "__main__":
#     main()