Nanobit commited on
Commit
de2406c
·
1 Parent(s): 8b617cc

Lint convert.py

Browse files
Files changed (1) hide show
  1. src/axolotl/convert.py +32 -3
src/axolotl/convert.py CHANGED
@@ -1,47 +1,76 @@
 
 
 
1
  import json
2
  import sys
3
 
4
 
5
  class FileReader:
 
 
 
 
6
  def read(self, file_path):
7
- with open(file_path, "r") as file:
8
  return file.read()
9
 
10
 
11
  class FileWriter:
 
 
 
 
12
  def __init__(self, file_path):
13
  self.file_path = file_path
14
 
15
  def write(self, content):
16
- with open(self.file_path, "w") as file:
17
  file.write(content)
18
 
19
 
20
  class StdoutWriter:
 
 
 
 
21
  def write(self, content):
22
  sys.stdout.write(content)
23
  sys.stdout.write("\n")
24
 
25
 
26
  class JsonParser:
 
 
 
 
27
  def parse(self, content):
28
  return json.loads(content)
29
 
30
 
31
  class JsonlSerializer:
 
 
 
 
32
  def serialize(self, data):
33
  lines = [json.dumps(item) for item in data]
34
  return "\n".join(lines)
35
 
36
 
37
  class JsonToJsonlConverter:
 
 
 
 
38
  def __init__(self, file_reader, file_writer, json_parser, jsonl_serializer):
39
  self.file_reader = file_reader
40
  self.file_writer = file_writer
41
  self.json_parser = json_parser
42
  self.jsonl_serializer = jsonl_serializer
43
 
44
- def convert(self, input_file_path, output_file_path):
 
 
45
  content = self.file_reader.read(input_file_path)
46
  data = self.json_parser.parse(content)
47
  # data = [r for r in data if r["conversations"]] # vicuna cleaned has rows with empty conversations
 
1
+ """Module containing File Reader, File Writer, Json Parser, and Jsonl Serializer classes"""
2
+
3
+
4
  import json
5
  import sys
6
 
7
 
8
  class FileReader:
9
+ """
10
+ Reads a file and returns its contents as a string
11
+ """
12
+
13
  def read(self, file_path):
14
+ with open(file_path, encoding="utf-8") as file:
15
  return file.read()
16
 
17
 
18
  class FileWriter:
19
+ """
20
+ Writes a string to a file
21
+ """
22
+
23
  def __init__(self, file_path):
24
  self.file_path = file_path
25
 
26
  def write(self, content):
27
+ with open(self.file_path, "w", encoding="utf-8") as file:
28
  file.write(content)
29
 
30
 
31
  class StdoutWriter:
32
+ """
33
+ Writes a string to stdout
34
+ """
35
+
36
  def write(self, content):
37
  sys.stdout.write(content)
38
  sys.stdout.write("\n")
39
 
40
 
41
  class JsonParser:
42
+ """
43
+ Parses a string as JSON and returns the result
44
+ """
45
+
46
  def parse(self, content):
47
  return json.loads(content)
48
 
49
 
50
  class JsonlSerializer:
51
+ """
52
+ Serializes a list of JSON objects into a JSONL string
53
+ """
54
+
55
  def serialize(self, data):
56
  lines = [json.dumps(item) for item in data]
57
  return "\n".join(lines)
58
 
59
 
60
  class JsonToJsonlConverter:
61
+ """
62
+ Converts a JSON file to JSONL
63
+ """
64
+
65
  def __init__(self, file_reader, file_writer, json_parser, jsonl_serializer):
66
  self.file_reader = file_reader
67
  self.file_writer = file_writer
68
  self.json_parser = json_parser
69
  self.jsonl_serializer = jsonl_serializer
70
 
71
+ def convert(
72
+ self, input_file_path, output_file_path
73
+ ): # pylint: disable=unused-argument
74
  content = self.file_reader.read(input_file_path)
75
  data = self.json_parser.parse(content)
76
  # data = [r for r in data if r["conversations"]] # vicuna cleaned has rows with empty conversations