#!/usr/bin/env python # -*- coding: utf-8 -*- import subprocess import sys from pathlib import Path import pickle from paperqa import Settings, Docs local_llm_config = { "model_list": [ { "model_name": "ollama/llama3.1", "litellm_params": { "model": "ollama/llama3.1", }, }, ] } local_emb_config = { "model_list": [ { "model_name": "ollama/mxbai-embed-large", "litellm_params": { "model": "ollama/mxbai-embed-large", }, } ] } settings = Settings( llm="ollama/llama3.1", llm_config=local_llm_config, summary_llm="ollama/llama3.1", summary_llm_config=local_llm_config, embedding="ollama/mxbai-embed-large", embedding_config=local_emb_config, ) def find_main_tex_file(folder_path: Path): """ Find the main LaTeX file in the given folder. This function searches for a .tex file that is likely to be the main file of a LaTeX project. It first checks for common names like 'main.tex', then looks for files containing '\\documentclass', and finally returns the first .tex file if no other criteria are met. Args: folder_path (Path): The path to the folder to search in. Returns: Path: The path to the main .tex file, or None if no .tex files are found. """ tex_files = list(folder_path.glob('**/*.tex')) if not tex_files: return None # Check for common main file names common_names = ['main.tex', 'paper.tex', 'article.tex'] for name in common_names: if name in tex_files: return name # If no common name found, look for \documentclass for file in tex_files: with open(file, 'r', encoding='utf-8') as f: content = f.read() if '\\documentclass' in content: return file # If still not found, return the first .tex file return tex_files[0] def run_latexpand(input_file, output_file): """ Run the latexpand command on the input file and write the result to the output file. This function uses the latexpand tool to expand a LaTeX file, including all its inputs and packages, into a single file. The expanded content is then written to the specified output file. Args: input_file (str or Path): The path to the input LaTeX file. output_file (str or Path): The path where the expanded LaTeX content will be written. Raises: subprocess.CalledProcessError: If latexpand encounters an error during execution. FileNotFoundError: If the latexpand command is not found in the system PATH. """ try: result = subprocess.run(['latexpand', input_file], capture_output=True, text=True, check=True) with open(output_file, 'w', encoding='utf-8') as output_file_handle: output_file_handle.write(result.stdout) print(f"Expanded LaTeX written to {output_file}") except subprocess.CalledProcessError as e: print(f"Error running latexpand: {e}") except FileNotFoundError: print("latexpand not found. Please make sure it's installed and in your PATH.") cache_path = Path("pqa_index.pkl") if cache_path.exists(): with open(cache_path, "rb") as f: docs = pickle.load(f) else: docs = Docs() for root, dirs, files in Path(".").walk(): for dir_name in dirs: if dir_name.startswith("arXiv-"): dir_path = root / dir_name concat_main = dir_path / ".main.tex" try: # Step 1: Find the main entry TeX file main_file = find_main_tex_file(dir_path) if not main_file: raise ValueError("No main TeX file found.") # Step 2 & 3: Run latexpand and write output run_latexpand(main_file, dir_path / ".main.tex") except (ValueError, subprocess.CalledProcessError, FileNotFoundError) as preprocess_error: print(f"Failed to pre-process {dir_name}: {preprocess_error}") continue print(f"adding {dir_path} (latex source)") try: docs.add(concat_main, settings=settings, disable_check=True) except (IOError, OSError, ValueError) as add_error: print(f"Failed to add {dir_path}: {add_error}") continue dirs.remove(dir_name) break else: for file_name in files: if file_name.lower().endswith((".pdf", ".txt", ".md", ".tex")): file_path = root / file_name print(f"adding {file_path}") docs.add(file_path, settings=settings, disable_check=True) with open(cache_path, "wb") as f: pickle.dump(docs, f) if __name__ == "__main__": if len(sys.argv) > 1: QUERY = " ".join(sys.argv[1:]) answer = docs.query(QUERY, settings=settings) print(answer) else: print("Please provide a query as a command-line argument.") print("Usage: python script_name.py 'Your query here'")