File size: 5,305 Bytes
f1a2ec8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import subprocess
import sys
from pathlib import Path
import pickle
from paperqa import Settings, Docs
local_llm_config = {
"model_list": [
{
"model_name": "ollama/llama3.1",
"litellm_params": {
"model": "ollama/llama3.1",
},
},
]
}
local_emb_config = {
"model_list": [
{
"model_name": "ollama/mxbai-embed-large",
"litellm_params": {
"model": "ollama/mxbai-embed-large",
},
}
]
}
settings = Settings(
llm="ollama/llama3.1",
llm_config=local_llm_config,
summary_llm="ollama/llama3.1",
summary_llm_config=local_llm_config,
embedding="ollama/mxbai-embed-large",
embedding_config=local_emb_config,
)
def find_main_tex_file(folder_path: Path):
"""
Find the main LaTeX file in the given folder.
This function searches for a .tex file that is likely to be the main file
of a LaTeX project. It first checks for common names like 'main.tex',
then looks for files containing '\\documentclass', and finally returns
the first .tex file if no other criteria are met.
Args:
folder_path (Path): The path to the folder to search in.
Returns:
Path: The path to the main .tex file, or None if no .tex files are found.
"""
tex_files = list(folder_path.glob('**/*.tex'))
if not tex_files:
return None
# Check for common main file names
common_names = ['main.tex', 'paper.tex', 'article.tex']
for name in common_names:
if name in tex_files:
return name
# If no common name found, look for \documentclass
for file in tex_files:
with open(file, 'r', encoding='utf-8') as f:
content = f.read()
if '\\documentclass' in content:
return file
# If still not found, return the first .tex file
return tex_files[0]
def run_latexpand(input_file, output_file):
"""
Run the latexpand command on the input file and write the result to the output file.
This function uses the latexpand tool to expand a LaTeX file, including all its
inputs and packages, into a single file. The expanded content is then written
to the specified output file.
Args:
input_file (str or Path): The path to the input LaTeX file.
output_file (str or Path): The path where the expanded LaTeX content will be written.
Raises:
subprocess.CalledProcessError: If latexpand encounters an error during execution.
FileNotFoundError: If the latexpand command is not found in the system PATH.
"""
try:
result = subprocess.run(['latexpand', input_file],
capture_output=True, text=True, check=True)
with open(output_file, 'w', encoding='utf-8') as output_file_handle:
output_file_handle.write(result.stdout)
print(f"Expanded LaTeX written to {output_file}")
except subprocess.CalledProcessError as e:
print(f"Error running latexpand: {e}")
except FileNotFoundError:
print("latexpand not found. Please make sure it's installed and in your PATH.")
cache_path = Path("pqa_index.pkl")
if cache_path.exists():
with open(cache_path, "rb") as f:
docs = pickle.load(f)
else:
docs = Docs()
for root, dirs, files in Path(".").walk():
for dir_name in dirs:
if dir_name.startswith("arXiv-"):
dir_path = root / dir_name
concat_main = dir_path / ".main.tex"
try:
# Step 1: Find the main entry TeX file
main_file = find_main_tex_file(dir_path)
if not main_file:
raise ValueError("No main TeX file found.")
# Step 2 & 3: Run latexpand and write output
run_latexpand(main_file, dir_path / ".main.tex")
except (ValueError, subprocess.CalledProcessError,
FileNotFoundError) as preprocess_error:
print(f"Failed to pre-process {dir_name}: {preprocess_error}")
continue
print(f"adding {dir_path} (latex source)")
try:
docs.add(concat_main, settings=settings, disable_check=True)
except (IOError, OSError, ValueError) as add_error:
print(f"Failed to add {dir_path}: {add_error}")
continue
dirs.remove(dir_name)
break
else:
for file_name in files:
if file_name.lower().endswith((".pdf", ".txt", ".md", ".tex")):
file_path = root / file_name
print(f"adding {file_path}")
docs.add(file_path, settings=settings, disable_check=True)
with open(cache_path, "wb") as f:
pickle.dump(docs, f)
if __name__ == "__main__":
if len(sys.argv) > 1:
QUERY = " ".join(sys.argv[1:])
answer = docs.query(QUERY, settings=settings)
print(answer)
else:
print("Please provide a query as a command-line argument.")
print("Usage: python script_name.py 'Your query here'")
|