CintraAI commited on
Commit
4644b40
·
1 Parent(s): 6ca6c86

updated requirements

Browse files
Files changed (4) hide show
  1. .idea/code-chunker.iml +1 -0
  2. Chunker.py +15 -15
  3. app.py +19 -1
  4. requirements.txt +45 -0
.idea/code-chunker.iml CHANGED
@@ -10,5 +10,6 @@
10
  </component>
11
  <component name="PackageRequirementsSettings">
12
  <option name="removeUnused" value="true" />
 
13
  </component>
14
  </module>
 
10
  </component>
11
  <component name="PackageRequirementsSettings">
12
  <option name="removeUnused" value="true" />
13
+ <option name="modifyBaseFiles" value="true" />
14
  </component>
15
  </module>
Chunker.py CHANGED
@@ -1,7 +1,6 @@
1
  from abc import ABC, abstractmethod
2
  from CodeParser import CodeParser
3
- from Utils import count_tokens
4
-
5
 
6
 
7
  class Chunker(ABC):
@@ -20,19 +19,20 @@ class Chunker(ABC):
20
  def print_chunks(chunks):
21
  for chunk_number, chunk_code in chunks.items():
22
  print(f"Chunk {chunk_number}:")
23
- print("="*40)
24
  print(chunk_code)
25
- print("="*40)
26
 
27
  @staticmethod
28
  def consolidate_chunks_into_file(chunks):
29
  return "\n".join(chunks.values())
30
-
31
  @staticmethod
32
  def count_lines(consolidated_chunks):
33
  lines = consolidated_chunks.split("\n")
34
  return len(lines)
35
 
 
36
  class CodeChunker(Chunker):
37
  def __init__(self, file_extension, encoding_name="gpt-4"):
38
  super().__init__(encoding_name)
@@ -60,15 +60,16 @@ class CodeChunker(Chunker):
60
  if highest_comment_line: # If a highest comment line exists, add it
61
  adjusted_breakpoints.append(highest_comment_line)
62
  else:
63
- adjusted_breakpoints.append(bp) # If no comments were found before the breakpoint, add the original breakpoint
 
64
 
65
  breakpoints = sorted(set(adjusted_breakpoints)) # Ensure breakpoints are unique and sorted
66
-
67
  while i < len(lines):
68
  line = lines[i]
69
  new_token_count = count_tokens(line, self.encoding_name)
70
  if token_count + new_token_count > token_limit:
71
-
72
  # Set the stop line to the last breakpoint before the current line
73
  if i in breakpoints:
74
  stop_line = i
@@ -79,20 +80,20 @@ class CodeChunker(Chunker):
79
  if stop_line == start_line and i not in breakpoints:
80
  token_count += new_token_count
81
  i += 1
82
-
83
  # If the stop line is the same as the start line and the current line is a breakpoint, it means we can create a chunk with just the current line
84
  elif stop_line == start_line and i == stop_line:
85
  token_count += new_token_count
86
  i += 1
87
-
88
-
89
  # If the stop line is the same as the start line and the current line is a breakpoint, it means we can create a chunk with just the current line
90
  elif stop_line == start_line and i in breakpoints:
91
  current_chunk = "\n".join(lines[start_line:stop_line])
92
  if current_chunk.strip(): # If the current chunk is not just whitespace
93
  chunks[chunk_number] = current_chunk # Using chunk_number as key
94
  chunk_number += 1
95
-
96
  token_count = 0
97
  start_line = i
98
  i += 1
@@ -103,7 +104,7 @@ class CodeChunker(Chunker):
103
  if current_chunk.strip():
104
  chunks[chunk_number] = current_chunk # Using chunk_number as key
105
  chunk_number += 1
106
-
107
  i = stop_line
108
  token_count = 0
109
  start_line = stop_line
@@ -116,9 +117,8 @@ class CodeChunker(Chunker):
116
  current_chunk_code = "\n".join(lines[start_line:])
117
  if current_chunk_code.strip(): # Checks if the chunk is not just whitespace
118
  chunks[chunk_number] = current_chunk_code # Using chunk_number as key
119
-
120
  return chunks
121
 
122
  def get_chunk(self, chunked_codebase, chunk_number):
123
  return chunked_codebase[chunk_number]
124
-
 
1
  from abc import ABC, abstractmethod
2
  from CodeParser import CodeParser
3
+ from utils import count_tokens
 
4
 
5
 
6
  class Chunker(ABC):
 
19
  def print_chunks(chunks):
20
  for chunk_number, chunk_code in chunks.items():
21
  print(f"Chunk {chunk_number}:")
22
+ print("=" * 40)
23
  print(chunk_code)
24
+ print("=" * 40)
25
 
26
  @staticmethod
27
  def consolidate_chunks_into_file(chunks):
28
  return "\n".join(chunks.values())
29
+
30
  @staticmethod
31
  def count_lines(consolidated_chunks):
32
  lines = consolidated_chunks.split("\n")
33
  return len(lines)
34
 
35
+
36
  class CodeChunker(Chunker):
37
  def __init__(self, file_extension, encoding_name="gpt-4"):
38
  super().__init__(encoding_name)
 
60
  if highest_comment_line: # If a highest comment line exists, add it
61
  adjusted_breakpoints.append(highest_comment_line)
62
  else:
63
+ adjusted_breakpoints.append(
64
+ bp) # If no comments were found before the breakpoint, add the original breakpoint
65
 
66
  breakpoints = sorted(set(adjusted_breakpoints)) # Ensure breakpoints are unique and sorted
67
+
68
  while i < len(lines):
69
  line = lines[i]
70
  new_token_count = count_tokens(line, self.encoding_name)
71
  if token_count + new_token_count > token_limit:
72
+
73
  # Set the stop line to the last breakpoint before the current line
74
  if i in breakpoints:
75
  stop_line = i
 
80
  if stop_line == start_line and i not in breakpoints:
81
  token_count += new_token_count
82
  i += 1
83
+
84
  # If the stop line is the same as the start line and the current line is a breakpoint, it means we can create a chunk with just the current line
85
  elif stop_line == start_line and i == stop_line:
86
  token_count += new_token_count
87
  i += 1
88
+
89
+
90
  # If the stop line is the same as the start line and the current line is a breakpoint, it means we can create a chunk with just the current line
91
  elif stop_line == start_line and i in breakpoints:
92
  current_chunk = "\n".join(lines[start_line:stop_line])
93
  if current_chunk.strip(): # If the current chunk is not just whitespace
94
  chunks[chunk_number] = current_chunk # Using chunk_number as key
95
  chunk_number += 1
96
+
97
  token_count = 0
98
  start_line = i
99
  i += 1
 
104
  if current_chunk.strip():
105
  chunks[chunk_number] = current_chunk # Using chunk_number as key
106
  chunk_number += 1
107
+
108
  i = stop_line
109
  token_count = 0
110
  start_line = stop_line
 
117
  current_chunk_code = "\n".join(lines[start_line:])
118
  if current_chunk_code.strip(): # Checks if the chunk is not just whitespace
119
  chunks[chunk_number] = current_chunk_code # Using chunk_number as key
120
+
121
  return chunks
122
 
123
  def get_chunk(self, chunked_codebase, chunk_number):
124
  return chunked_codebase[chunk_number]
 
app.py CHANGED
@@ -1,10 +1,28 @@
1
  import streamlit as st
2
  from utils import load_json, count_tokens
3
  import json
 
4
 
5
  # Set up the Streamlit page configuration
6
  st.set_page_config(page_title="Cintra Code Chunker", layout="wide")
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def main():
9
  # Streamlit widgets for file selection
10
  st.title("Cintra Code Chunker")
@@ -38,4 +56,4 @@ def main():
38
 
39
 
40
  if __name__ == "__main__":
41
- main()
 
1
  import streamlit as st
2
  from utils import load_json, count_tokens
3
  import json
4
+ import os
5
 
6
  # Set up the Streamlit page configuration
7
  st.set_page_config(page_title="Cintra Code Chunker", layout="wide")
8
 
9
+ # Slider to select a value
10
+ x = st.slider("Select a value")
11
+ st.write(x, "squared is", x * x)
12
+
13
+
14
+ code_files_directory = "example_code_files"
15
+ code_files = os.listdir(code_files_directory)
16
+
17
+ # Dropdown menu for the user to select a code file
18
+ selected_file = st.selectbox("Select a code file", code_files)
19
+
20
+ file_path = os.path.join(code_files_directory, selected_file)
21
+ with open(file_path, "r") as file:
22
+ code_content = file.read()
23
+ st.code(code_content, language="python")
24
+
25
+
26
  def main():
27
  # Streamlit widgets for file selection
28
  st.title("Cintra Code Chunker")
 
56
 
57
 
58
  if __name__ == "__main__":
59
+ main()
requirements.txt CHANGED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ altair==5.3.0
2
+ attrs==23.2.0
3
+ blinker==1.7.0
4
+ cachetools==5.3.3
5
+ certifi==2024.2.2
6
+ charset-normalizer==3.3.2
7
+ click==8.1.7
8
+ colorama==0.4.6
9
+ gitdb==4.0.11
10
+ GitPython==3.1.43
11
+ idna==3.6
12
+ Jinja2==3.1.3
13
+ jsonschema==4.21.1
14
+ jsonschema-specifications==2023.12.1
15
+ markdown-it-py==3.0.0
16
+ MarkupSafe==2.1.5
17
+ mdurl==0.1.2
18
+ numpy==1.26.4
19
+ packaging==24.0
20
+ pandas==2.2.1
21
+ pillow==10.3.0
22
+ protobuf==4.25.3
23
+ pyarrow==15.0.2
24
+ pydeck==0.8.1b0
25
+ Pygments==2.17.2
26
+ python-dateutil==2.9.0.post0
27
+ pytz==2024.1
28
+ referencing==0.34.0
29
+ requests==2.31.0
30
+ rich==13.7.1
31
+ rpds-py==0.18.0
32
+ six==1.16.0
33
+ smmap==5.0.1
34
+ streamlit==1.33.0
35
+ tenacity==8.2.3
36
+ regex==2023.12.25
37
+ tiktoken==0.6.0
38
+ tree-sitter==0.21.3
39
+ toml==0.10.2
40
+ toolz==0.12.1
41
+ tornado==6.4
42
+ typing_extensions==4.11.0
43
+ tzdata==2024.1
44
+ urllib3==2.2.1
45
+ watchdog==4.0.0