Sergidev commited on
Commit
a42bad6
1 Parent(s): 2f6a972

Attempt Rollback

Browse files
Files changed (1) hide show
  1. modules/pmbl.py +8 -9
modules/pmbl.py CHANGED
@@ -1,5 +1,6 @@
1
  import sqlite3
2
  from datetime import datetime
 
3
  from concurrent.futures import ThreadPoolExecutor
4
 
5
  class PMBL:
@@ -77,8 +78,7 @@ class PMBL:
77
  conn = sqlite3.connect('chat_history.db')
78
  c = conn.cursor()
79
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
80
- c.execute("INSERT INTO chats (timestamp, prompt, response, topic) VALUES (?, ?, ?, 'Untitled')",
81
- (timestamp, prompt, response))
82
  conn.commit()
83
  conn.close()
84
 
@@ -102,12 +102,12 @@ class PMBL:
102
  yield chunk
103
 
104
  def generate_response_task(self, system_prompt, prompt, n_ctx):
105
- llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8, mlock=True)
106
 
107
  response = llm(
108
  system_prompt,
109
  max_tokens=1500,
110
- temperature=0.2,
111
  stop=["</s>", "\nUser:", "\nuser:", "\nSystem:", "\nsystem:"],
112
  echo=False,
113
  stream=True
@@ -122,17 +122,16 @@ class PMBL:
122
  self.save_chat_history(prompt, response_text)
123
 
124
  def calculate_context(self, system_prompt, formatted_history):
125
- system_prompt_tokens = len(system_prompt) // 3
126
- history_tokens = len(formatted_history) // 2
127
  max_response_tokens = 1500
128
- context_ceiling = 31690
129
 
130
  available_tokens = context_ceiling - system_prompt_tokens - max_response_tokens
131
  if history_tokens <= available_tokens:
132
  return system_prompt_tokens + history_tokens + max_response_tokens
133
  else:
134
  return context_ceiling # Return the maximum context size
135
-
136
  def sleep_mode(self):
137
  conn = sqlite3.connect('chat_history.db')
138
  c = conn.cursor()
@@ -148,7 +147,7 @@ class PMBL:
148
  conn.close()
149
 
150
  def generate_topic(self, prompt, response):
151
- llm = Llama(model_path=self.model_path, n_ctx=1690, n_threads=8, mlock=True)
152
 
153
  system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
154
 
 
1
  import sqlite3
2
  from datetime import datetime
3
+ from llama_cpp import Llama
4
  from concurrent.futures import ThreadPoolExecutor
5
 
6
  class PMBL:
 
78
  conn = sqlite3.connect('chat_history.db')
79
  c = conn.cursor()
80
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
81
+ c.execute("INSERT INTO chats (timestamp, prompt, response, topic) VALUES (?, ?, ?, 'Untitled')", (timestamp, prompt, response))
 
82
  conn.commit()
83
  conn.close()
84
 
 
102
  yield chunk
103
 
104
  def generate_response_task(self, system_prompt, prompt, n_ctx):
105
+ llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8)
106
 
107
  response = llm(
108
  system_prompt,
109
  max_tokens=1500,
110
+ temperature=0.7,
111
  stop=["</s>", "\nUser:", "\nuser:", "\nSystem:", "\nsystem:"],
112
  echo=False,
113
  stream=True
 
122
  self.save_chat_history(prompt, response_text)
123
 
124
  def calculate_context(self, system_prompt, formatted_history):
125
+ system_prompt_tokens = len(system_prompt) // 4
126
+ history_tokens = len(formatted_history) // 4
127
  max_response_tokens = 1500
128
+ context_ceiling = 32690
129
 
130
  available_tokens = context_ceiling - system_prompt_tokens - max_response_tokens
131
  if history_tokens <= available_tokens:
132
  return system_prompt_tokens + history_tokens + max_response_tokens
133
  else:
134
  return context_ceiling # Return the maximum context size
 
135
  def sleep_mode(self):
136
  conn = sqlite3.connect('chat_history.db')
137
  c = conn.cursor()
 
147
  conn.close()
148
 
149
  def generate_topic(self, prompt, response):
150
+ llm = Llama(model_path=self.model_path, n_ctx=2690, n_threads=8)
151
 
152
  system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
153