File size: 14,709 Bytes
4615690
 
 
6ac5190
4615690
2fd5de9
4615690
 
6ac5190
4615690
6ac5190
4615690
6ac5190
4615690
 
4ac07e8
6ac5190
4ac07e8
 
 
6ac5190
 
 
4615690
 
6ac5190
 
 
 
da8eb93
6ac5190
 
482c276
6ac5190
da8eb93
6ac5190
 
4615690
12db954
4615690
6ac5190
 
 
a6f739a
 
6ac5190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30e9946
6ac5190
 
 
 
 
3847ff4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30e9946
4da30c6
 
 
30e9946
4da30c6
 
 
 
30e9946
4da30c6
 
 
 
 
 
 
d6b48f7
 
 
 
 
 
 
 
 
 
32953b6
d6b48f7
 
 
6ac5190
 
 
 
 
 
 
 
 
 
 
 
 
 
d6b48f7
6ac5190
4da30c6
6ac5190
 
 
 
dd3b789
12db954
5298c06
6ac5190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4615690
6ac5190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4615690
6ac5190
 
 
 
4615690
6ac5190
 
 
 
 
4615690
6ac5190
 
 
 
 
 
 
 
 
4615690
6ac5190
 
 
 
 
 
 
 
 
4615690
6ac5190
b6c265e
6ac5190
 
 
 
 
 
 
 
4615690
6ac5190
 
 
 
041ccdd
6ac5190
041ccdd
 
6ac5190
3847ff4
ee0a2fc
2cd12cb
6ac5190
041ccdd
 
6ac5190
 
 
 
 
 
 
 
 
 
 
d6b48f7
6ac5190
 
 
 
041ccdd
 
 
 
 
6ac5190
041ccdd
 
 
 
 
 
 
 
 
6ac5190
 
 
041ccdd
 
 
6ac5190
 
 
 
4615690
6ac5190
 
4615690
d6b48f7
 
 
 
 
 
 
 
6ac5190
 
 
 
 
 
 
d6b48f7
 
 
 
 
 
 
 
6ac5190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4615690
6ac5190
 
4615690
6ac5190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4615690
6ac5190
4615690
6ac5190
 
 
 
 
 
 
 
 
 
 
4615690
6ac5190
2fd5de9
4615690
6ac5190
da8eb93
6ac5190
 
 
 
 
482c276
6afa43e
6ac5190
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
import asyncio
import subprocess
import json
import concurrent.futures
from fastapi import FastAPI, WebSocket
from fastapi.responses import HTMLResponse
from jinja2 import Template
from llama_cpp import Llama
from contextlib import asynccontextmanager
import logging
from pathlib import Path

# Set up logging
logging.basicConfig(level=logging.INFO)

# Set up the log file and empty it
log_path = Path("interaction_history.log")
log_path.touch(exist_ok=True)
with log_path.open('w') as f:
    pass  # Do nothing, just empty the file

# Global variable to keep track of the last read position in the log file
last_read_position = 0

# Define the models and their paths
models = {
    "production": {
        "file": "DeepSeek-R1-Distill-Llama-8B-Q4_K_L.gguf",
        "alias": "R1Llama8BQ4L",
        "template": "app/templates/Llama8bq4k.html"
    },
    "development": {
        "file": "DeepSeek-R1-Distill-Qwen-1.5B-Q2_K.gguf",
        "alias": "R1Qwen1.5BQ2",
        "template": "app/templates/Qwen5bq2k.html"
    },
}

model_in_use = models["production"]

with open(model_in_use["template"], "r") as jinja_template:
    CHAT_TEMPLATE = jinja_template.read()

#with open("app/templates/default.html", "r") as jinja_template:
#    CHAT_TEMPLATE = jinja_template.read()

# Define the shell execution tool
def execute_shell(arguments):
    """Execute a shell command."""
    try:
        args = json.loads(arguments)
        command = args.get("command", "")
        if not command:
            return json.dumps({"error": "No command provided."})
        process = subprocess.run(
            command,
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )
        return json.dumps({"stdout": process.stdout, "stderr": process.stderr})
    except Exception as e:
        return json.dumps({"error": str(e)})

# Define the tools available to the assistant
tools = {
    "shell": {
        "description": "Execute shell commands.",
        "example_input": '{"command": string"..."}',
        "example_output": '{"stdout": "...", "stderr": "..."}',
        "function": execute_shell,
    },
}

tools_list = [
    {
        "type": "function",
        "function": {
            "name": "shell",
            "description": "Execute shell commands.",
            "parameters": {
                "type": "object",
                "properties": {
                    "command": {
                        "type": "string",
                        "description": "The shell command to execute."
                    }
                },
                "required": ["command"]
            }
        }
    }
]

expected_tool_usage = """\n\nYou are expected to demarcate tool calls between markers like:
       <|tool▁calls▁begin|> ... <|tool▁calls▁end|>
    and each individual call between:
       <|tool▁call▁begin|> ... <|tool▁sep|> ... "```json" ... "```"
    \nBut you should not make more than one call at a time.\n
    For example, to download the Google homepage, you could use the `wget` command.
        <|tool_calls_begin|>
        <|tool_call_begin|>
        {
            "tool": "shell",
            "arguments": "{\"command\": \"wget google.com\"}"
        }
        <|tool_sep|>
        <|tool_call_end|>
        <|tool_calls_end|>
   """

text_prompt = """You are an AI model expert in computational biology performing autonomous real research.
        Your goal is to discover something new and relevant in the field of nerve regeneration.
        You can execute shell commands in the Docker container with the following tools:\n\n"""

custom_prompt = """You are an AI model expert in computational biology performing autonomous real research.
        Your goal is to discover something new and relevant in the field of nerve regeneration.
        You can execute shell commands using a Json:
        ```json
        {
            "tool": "shell",
            "arguments": '{\"command\": "<INSERT-YOUR-COMMAND>"}'
        }
        ```
        """
# Dynamically generate the system prompt based on available tools.
def generate_system_prompt(tools):
    tool_descriptions = []
    for tool_name, tool_data in tools.items():
        description = tool_data.get("description", "No description available.")
        example_input = tool_data.get("example_input", "{}")
        example_output = tool_data.get("example_output", "{}")
        tool_descriptions.append(
            f"""- **{tool_name}**:
   - Description: {description}
   - Input: {example_input}
   - Output: {example_output}"""
        )
    return (
        text_prompt
        + "\n\n".join(tool_descriptions)
        + expected_tool_usage
    )

# Create the system prompt.
system_prompt = generate_system_prompt(tools)
with log_path.open("a") as f:
    #f.write("System prompt:\n\n"+system_prompt+"\n\n")
    f.write("System prompt:\n"+custom_prompt+"\n\n")

# Parse out any tool calls embedded in the model's output.
def extract_tool_calls(response_text):
    """
    Parse tool calls from model output.

    The model is expected to demarcate tool calls between markers like:
       <|tool▁calls▁begin|> ... <|tool▁calls▁end|>
    and each individual call between:
       <|tool▁call▁begin|> ... <|tool▁sep|> ... "```json" ... "```"
    """
    if "<|tool▁calls▁begin|>" not in response_text:
        return []

    tool_calls_part = response_text.split("<|tool▁calls▁begin|>")[1]
    tool_calls_part = tool_calls_part.split("<|tool▁calls▁end|>")[0]
    tool_calls = tool_calls_part.split("<|tool▁call▁begin|>")
    
    parsed_tool_calls = []
    for tool_call in tool_calls:
        tool_call = tool_call.strip()
        if tool_call:
            try:
                tool_type, tool_name_and_args = tool_call.split("<|tool▁sep|>")
                tool_name, tool_args = tool_name_and_args.split("\n```json\n", 1)
                tool_args = tool_args.split("\n```")[0]
                parsed_tool_calls.append({
                    "type": tool_type,
                    "name": tool_name.strip(),
                    "arguments": tool_args.strip()
                })
            except ValueError:
                logging.warning("Failed to parse tool call: %s", tool_call)
    return parsed_tool_calls

def process_tool_call(tool_call):
    """Execute the requested tool and return its output."""
    tool_name = tool_call["name"]
    tool_args = tool_call["arguments"]

    if tool_name in tools:
        tool_function = tools[tool_name]["function"]
        return tool_function(tool_args)
    else:
        return json.dumps({"error": f"Tool {tool_name} not found."})

#
# Helper: Wrap a synchronous generator as an asynchronous generator.
#
async def async_generator_from_sync(sync_gen_func, *args, **kwargs):
    """
    Runs a synchronous generator function in a thread and yields items asynchronously.
    """
    loop = asyncio.get_running_loop()
    q = asyncio.Queue()

    def producer():
        try:
            for item in sync_gen_func(*args, **kwargs):
                loop.call_soon_threadsafe(q.put_nowait, item)
        except Exception as e:
            loop.call_soon_threadsafe(q.put_nowait, e)
        finally:
            # Signal the end of iteration with a sentinel (None)
            loop.call_soon_threadsafe(q.put_nowait, None)

    with concurrent.futures.ThreadPoolExecutor() as executor:
        logging.info("Inside executor")
        executor.submit(producer)
        while True:
            item = await q.get()
            if item is None:
                break
            if isinstance(item, Exception):
                raise item
            yield item

#
# Background response generator without requiring a WebSocket.
#
async def generate_response_background(conversation):
    logging.info(f"Starting generation with conversation: {conversation}")
    async for token_chunk in async_generator_from_sync(
        llm.create_chat_completion,
        messages=conversation,
        stream=True,
        tools=tools_list,
        max_tokens=2048,
        repeat_penalty=1.1
    ):
        logging.debug(f"Raw token chunk: {json.dumps(token_chunk, indent=2)}")
        yield token_chunk
        await asyncio.sleep(0)

#
# Main research loop running continuously in the background.
#
async def run_research_forever():
    global log_path
    logging.info("🚀 Autonomous computational biology research initiated!")
    with log_path.open("a") as f:
        f.write("🚀 Autonomous computational biology research initiated!\n")

    conversation = [{"role": "system", "content": custom_prompt}]
    while True:
        full_response = ""
        try:
            async for token in generate_response_background(conversation):
                # Safely extract delta
                delta = token["choices"][0].get("delta", {})
                
                # Handle text content
                token_text = delta.get("content", "")  # Default to empty string
                full_response += token_text
                
                # Handle tool calls (critical for container environment)
                if "tool_calls" in delta:
                    tool_calls = delta["tool_calls"]
                    # Process tool call deltas here (append to full_response or log)
                    tool_call_str = json.dumps(tool_calls)
                    full_response += f"\n🔧 Tool Call: {tool_call_str}\n"
                
                # Logging remains the same
                with open(log_path, "a") as f:
                    f.write(token_text)
                    f.flush()
                
                # Check for finish reason
                if token['choices'][0].get("finish_reason"):
                
                    # The presence of a finish reason (like "stop") indicates that generation is complete.
                    # Append the assistant's response to the conversation log.
                    conversation.append({"role": "assistant", "content": full_response})
                    try:
                        tool_output = parse_tool_calls(full_response)
                        conversation.append({"role": "tool", "content": tool_output})
                    except Exception as e:
                        logging.error(f"🛠️ Tool execution failed: {e} attempting tool execution fallback")
                        try:
                            tool_output = parse_tool_calls_fallback(full_response)
                            conversation.append({"role": "tool", "content": tool_output})
                        except:
                            logging.error(f"🛠️ Tool execution fallback also failed: {e}")
                            conversation.append({"role": "system", "content": """Error in function calling.
                                 You must work towards your goal using the proper format"""+custom_prompt})
                        continue
        except Exception as e:
            logging.error(f"Autonomous research error during response generation: {e}")
            continue
        
        # Delay before the next query iteration.
        await asyncio.sleep(1)

def parse_tool_calls_fallback(full_response):
    tool_call = full_response.split("```json")[1]
    tool_call = tool_call.split("```")[0]
    tool_call = tool_output.strip()
    tool_output = process_tool_call({tool_call})
    return tool_output
    
def parse_tool_calls(full_response):
    # Check for tool calls in the response and process them.
    logging.info(f"Full response: {full_response}")
    tool_calls = extract_tool_calls(full_response)
    logging.info(f"Tool calls: {tool_calls}")
    for tool_call in tool_calls:
        tool_output = process_tool_call(tool_call)
        logging.info(f"🔧 Tool Execution: {tool_output}")
        with log_path.open("a") as f:
            f.write(f"🔧 Tool Execution: {tool_output}\n")
        return tool_output

# Automatically start the research process when the app starts.
@asynccontextmanager
async def lifespan(app: FastAPI):
    """Start the background task when FastAPI starts."""
    logging.info("Starting run_research_forever()...")
    await asyncio.sleep(5) # Wait for the server to load
    asyncio.create_task(run_research_forever())  # Run in background
    yield
    logging.info("FastAPI shutdown: Cleaning up resources.")

# Initialize the FastAPI application
app = FastAPI(lifespan=lifespan)

# Load the Llama model (assumed to return a synchronous generator when stream=True)
llm = Llama(model_path=model_in_use["file"], n_ctx=2048)

@app.websocket("/stream")
async def stream(websocket: WebSocket):
    logging.info("WebSocket connection established.")
    global log_path, last_read_position
    await websocket.accept()
    
    # Send existing interaction history to the client.
    try:
        with open(log_path, "r") as log_file:
            interaction_history = log_file.read()
            last_read_position = log_file.tell()
        if interaction_history:
            await websocket.send_text(interaction_history)
    except Exception as e:
        logging.error(f"Error reading interaction history: {e}")

    # Continuously send updates from the log file.
    while True:
        await asyncio.sleep(0.1)
        try:
            with open(log_path, "r") as log_file:
                log_file.seek(last_read_position)
                new_content = log_file.read()
                if new_content:
                    await websocket.send_text(new_content)
                    last_read_position = log_file.tell()
        except Exception as e:
            logging.error(f"Error reading interaction history: {e}")

# Endpoint to retrieve the interaction log.
@app.get("/log")
async def get_log():
    try:
        with open("interaction_history.log", "r") as f:
            log_content = f.read()
        # Return the log inside a <pre> block for readability.
        return HTMLResponse(content=f"<pre>{log_content}</pre>")
    except Exception as e:
        logging.error(f"Error reading log: {e}")
        return {"error": str(e)}

# A simple frontend page with a link to the log.
@app.get("/", response_class=HTMLResponse)
async def get():
    try:
        with open("app/templates/index.html", "r") as f:
            html_content = f.read()
    except Exception as e:
        logging.error(f"Error loading template: {e}")
        html_content = "<html><body><h1>Error loading template</h1></body></html>"
    return HTMLResponse(html_content)


# To run the app, use a command like:
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)