openfree commited on
Commit
407248f
Β·
verified Β·
1 Parent(s): 7bcb745

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -16
app.py CHANGED
@@ -24,7 +24,7 @@ def dataset_converter(input_file, conversion_type, parquet_url):
24
  # Conversion: CSV to Parquet
25
  if conversion_type == "CSV to Parquet":
26
  if input_file is None or file_extension != "csv":
27
- raise ValueError("For CSV to Parquet conversion, please upload a CSV file.")
28
  df = pd.read_csv(BytesIO(file_bytes))
29
  output_file = "output.parquet"
30
  df.to_parquet(output_file, index=False)
@@ -34,7 +34,7 @@ def dataset_converter(input_file, conversion_type, parquet_url):
34
  # Conversion: Parquet to CSV
35
  elif conversion_type == "Parquet to CSV":
36
  if input_file is None or file_extension != "parquet":
37
- raise ValueError("For Parquet to CSV conversion, please upload a Parquet file.")
38
  df = pd.read_parquet(BytesIO(file_bytes))
39
  output_file = "output.csv"
40
  df.to_csv(output_file, index=False)
@@ -44,7 +44,7 @@ def dataset_converter(input_file, conversion_type, parquet_url):
44
  # Conversion: CSV to JSONL
45
  elif conversion_type == "CSV to JSONL":
46
  if input_file is None or file_extension != "csv":
47
- raise ValueError("For CSV to JSONL conversion, please upload a CSV file.")
48
  # Read CSV with latin1 encoding
49
  df = pd.read_csv(BytesIO(file_bytes), encoding='latin1')
50
  output_file = "metadata.jsonl"
@@ -70,14 +70,13 @@ def dataset_converter(input_file, conversion_type, parquet_url):
70
  # Use uploaded file if available; otherwise try the provided URL
71
  if input_file is not None:
72
  df = pd.read_parquet(BytesIO(file_bytes))
73
- # file_name is already set from the uploaded file
74
  elif parquet_url:
75
  response = requests.get(parquet_url)
76
  response.raise_for_status() # Ensure the request was successful
77
  df = pd.read_parquet(BytesIO(response.content))
78
  file_name = "from_url.parquet"
79
  else:
80
- raise ValueError("For Parquet to JSONL conversion, please upload a file or provide a URL.")
81
 
82
  output_file = "output.jsonl"
83
  # Recursive function to decode bytes to UTF-8 strings
@@ -100,12 +99,13 @@ def dataset_converter(input_file, conversion_type, parquet_url):
100
  preview_str = df.head(10).to_string(index=False)
101
 
102
  else:
103
- raise ValueError("Invalid conversion type selected.")
104
 
105
  info_message = (
106
  f"Input file: {file_name if file_name is not None else 'N/A'}\n"
107
  f"Converted file format: {converted_format}\n\n"
108
- f"Preview (Top 10 Rows):\n{preview_str}"
 
109
  )
110
  return output_file, info_message
111
 
@@ -143,35 +143,37 @@ h1, h2 {
143
  }
144
  """
145
 
146
- with gr.Blocks(css=custom_css, title="Comprehensive Dataset Converter") as demo:
147
- gr.Markdown("# Comprehensive Dataset Converter")
148
  gr.Markdown(
149
  "Upload a CSV or Parquet file (or provide a Parquet file URL for Parquet to JSONL conversion) "
150
- "and select the conversion type. The app converts the file to the desired format and displays a preview of the top 10 rows."
151
  )
152
 
153
  with gr.Row():
154
  with gr.Column(scale=1):
155
- input_file = gr.File(label="Upload CSV or Parquet File")
156
  with gr.Column(scale=1):
157
  conversion_type = gr.Radio(
158
  choices=["CSV to Parquet", "Parquet to CSV", "CSV to JSONL", "Parquet to JSONL"],
159
- label="Conversion Type"
160
  )
161
 
162
  # Optional URL input for Parquet to JSONL conversion
163
- parquet_url = gr.Textbox(label="Parquet File URL (Optional)", placeholder="Enter URL if not uploading a file")
164
 
165
- convert_button = gr.Button("Convert", elem_classes=["gradio-button"])
166
 
167
  with gr.Row():
168
- output_file = gr.File(label="Converted File")
169
- preview = gr.Textbox(label="Preview (Top 10 Rows)", lines=15)
170
 
171
  convert_button.click(
172
  fn=dataset_converter,
173
  inputs=[input_file, conversion_type, parquet_url],
174
  outputs=[output_file, preview]
175
  )
 
 
176
 
177
  demo.launch()
 
24
  # Conversion: CSV to Parquet
25
  if conversion_type == "CSV to Parquet":
26
  if input_file is None or file_extension != "csv":
27
+ raise ValueError("For CSV to Parquet conversion, please upload a CSV file. πŸ“„")
28
  df = pd.read_csv(BytesIO(file_bytes))
29
  output_file = "output.parquet"
30
  df.to_parquet(output_file, index=False)
 
34
  # Conversion: Parquet to CSV
35
  elif conversion_type == "Parquet to CSV":
36
  if input_file is None or file_extension != "parquet":
37
+ raise ValueError("For Parquet to CSV conversion, please upload a Parquet file. πŸ“„")
38
  df = pd.read_parquet(BytesIO(file_bytes))
39
  output_file = "output.csv"
40
  df.to_csv(output_file, index=False)
 
44
  # Conversion: CSV to JSONL
45
  elif conversion_type == "CSV to JSONL":
46
  if input_file is None or file_extension != "csv":
47
+ raise ValueError("For CSV to JSONL conversion, please upload a CSV file. πŸ“„")
48
  # Read CSV with latin1 encoding
49
  df = pd.read_csv(BytesIO(file_bytes), encoding='latin1')
50
  output_file = "metadata.jsonl"
 
70
  # Use uploaded file if available; otherwise try the provided URL
71
  if input_file is not None:
72
  df = pd.read_parquet(BytesIO(file_bytes))
 
73
  elif parquet_url:
74
  response = requests.get(parquet_url)
75
  response.raise_for_status() # Ensure the request was successful
76
  df = pd.read_parquet(BytesIO(response.content))
77
  file_name = "from_url.parquet"
78
  else:
79
+ raise ValueError("For Parquet to JSONL conversion, please upload a file or provide a URL. 🌐")
80
 
81
  output_file = "output.jsonl"
82
  # Recursive function to decode bytes to UTF-8 strings
 
99
  preview_str = df.head(10).to_string(index=False)
100
 
101
  else:
102
+ raise ValueError("Invalid conversion type selected. ⚠️")
103
 
104
  info_message = (
105
  f"Input file: {file_name if file_name is not None else 'N/A'}\n"
106
  f"Converted file format: {converted_format}\n\n"
107
+ f"Preview (Top 10 Rows):\n{preview_str}\n\n"
108
+ "Community: https://discord.gg/openfreeai πŸš€"
109
  )
110
  return output_file, info_message
111
 
 
143
  }
144
  """
145
 
146
+ with gr.Blocks(css=custom_css, title="Datasets Convertor") as demo:
147
+ gr.Markdown("# Datasets Convertor πŸš€")
148
  gr.Markdown(
149
  "Upload a CSV or Parquet file (or provide a Parquet file URL for Parquet to JSONL conversion) "
150
+ "and select the conversion type. The app converts the file to the desired format and displays a preview of the top 10 rows. ✨"
151
  )
152
 
153
  with gr.Row():
154
  with gr.Column(scale=1):
155
+ input_file = gr.File(label="Upload CSV or Parquet File πŸ“„")
156
  with gr.Column(scale=1):
157
  conversion_type = gr.Radio(
158
  choices=["CSV to Parquet", "Parquet to CSV", "CSV to JSONL", "Parquet to JSONL"],
159
+ label="Conversion Type πŸ”„"
160
  )
161
 
162
  # Optional URL input for Parquet to JSONL conversion
163
+ parquet_url = gr.Textbox(label="Parquet File URL (Optional) 🌐", placeholder="Enter URL if not uploading a file")
164
 
165
+ convert_button = gr.Button("Convert ⚑", elem_classes=["gradio-button"])
166
 
167
  with gr.Row():
168
+ output_file = gr.File(label="Converted File πŸ’Ύ")
169
+ preview = gr.Textbox(label="Preview (Top 10 Rows) πŸ”", lines=15)
170
 
171
  convert_button.click(
172
  fn=dataset_converter,
173
  inputs=[input_file, conversion_type, parquet_url],
174
  outputs=[output_file, preview]
175
  )
176
+
177
+ gr.Markdown("**Join our Community:** [https://discord.gg/openfreeai](https://discord.gg/openfreeai) 🀝")
178
 
179
  demo.launch()