Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -24,7 +24,7 @@ def dataset_converter(input_file, conversion_type, parquet_url):
|
|
24 |
# Conversion: CSV to Parquet
|
25 |
if conversion_type == "CSV to Parquet":
|
26 |
if input_file is None or file_extension != "csv":
|
27 |
-
raise ValueError("For CSV to Parquet conversion, please upload a CSV file.")
|
28 |
df = pd.read_csv(BytesIO(file_bytes))
|
29 |
output_file = "output.parquet"
|
30 |
df.to_parquet(output_file, index=False)
|
@@ -34,7 +34,7 @@ def dataset_converter(input_file, conversion_type, parquet_url):
|
|
34 |
# Conversion: Parquet to CSV
|
35 |
elif conversion_type == "Parquet to CSV":
|
36 |
if input_file is None or file_extension != "parquet":
|
37 |
-
raise ValueError("For Parquet to CSV conversion, please upload a Parquet file.")
|
38 |
df = pd.read_parquet(BytesIO(file_bytes))
|
39 |
output_file = "output.csv"
|
40 |
df.to_csv(output_file, index=False)
|
@@ -44,7 +44,7 @@ def dataset_converter(input_file, conversion_type, parquet_url):
|
|
44 |
# Conversion: CSV to JSONL
|
45 |
elif conversion_type == "CSV to JSONL":
|
46 |
if input_file is None or file_extension != "csv":
|
47 |
-
raise ValueError("For CSV to JSONL conversion, please upload a CSV file.")
|
48 |
# Read CSV with latin1 encoding
|
49 |
df = pd.read_csv(BytesIO(file_bytes), encoding='latin1')
|
50 |
output_file = "metadata.jsonl"
|
@@ -70,14 +70,13 @@ def dataset_converter(input_file, conversion_type, parquet_url):
|
|
70 |
# Use uploaded file if available; otherwise try the provided URL
|
71 |
if input_file is not None:
|
72 |
df = pd.read_parquet(BytesIO(file_bytes))
|
73 |
-
# file_name is already set from the uploaded file
|
74 |
elif parquet_url:
|
75 |
response = requests.get(parquet_url)
|
76 |
response.raise_for_status() # Ensure the request was successful
|
77 |
df = pd.read_parquet(BytesIO(response.content))
|
78 |
file_name = "from_url.parquet"
|
79 |
else:
|
80 |
-
raise ValueError("For Parquet to JSONL conversion, please upload a file or provide a URL.")
|
81 |
|
82 |
output_file = "output.jsonl"
|
83 |
# Recursive function to decode bytes to UTF-8 strings
|
@@ -100,12 +99,13 @@ def dataset_converter(input_file, conversion_type, parquet_url):
|
|
100 |
preview_str = df.head(10).to_string(index=False)
|
101 |
|
102 |
else:
|
103 |
-
raise ValueError("Invalid conversion type selected.")
|
104 |
|
105 |
info_message = (
|
106 |
f"Input file: {file_name if file_name is not None else 'N/A'}\n"
|
107 |
f"Converted file format: {converted_format}\n\n"
|
108 |
-
f"Preview (Top 10 Rows):\n{preview_str}"
|
|
|
109 |
)
|
110 |
return output_file, info_message
|
111 |
|
@@ -143,35 +143,37 @@ h1, h2 {
|
|
143 |
}
|
144 |
"""
|
145 |
|
146 |
-
with gr.Blocks(css=custom_css, title="
|
147 |
-
gr.Markdown("#
|
148 |
gr.Markdown(
|
149 |
"Upload a CSV or Parquet file (or provide a Parquet file URL for Parquet to JSONL conversion) "
|
150 |
-
"and select the conversion type. The app converts the file to the desired format and displays a preview of the top 10 rows."
|
151 |
)
|
152 |
|
153 |
with gr.Row():
|
154 |
with gr.Column(scale=1):
|
155 |
-
input_file = gr.File(label="Upload CSV or Parquet File")
|
156 |
with gr.Column(scale=1):
|
157 |
conversion_type = gr.Radio(
|
158 |
choices=["CSV to Parquet", "Parquet to CSV", "CSV to JSONL", "Parquet to JSONL"],
|
159 |
-
label="Conversion Type"
|
160 |
)
|
161 |
|
162 |
# Optional URL input for Parquet to JSONL conversion
|
163 |
-
parquet_url = gr.Textbox(label="Parquet File URL (Optional)", placeholder="Enter URL if not uploading a file")
|
164 |
|
165 |
-
convert_button = gr.Button("Convert", elem_classes=["gradio-button"])
|
166 |
|
167 |
with gr.Row():
|
168 |
-
output_file = gr.File(label="Converted File")
|
169 |
-
preview = gr.Textbox(label="Preview (Top 10 Rows)", lines=15)
|
170 |
|
171 |
convert_button.click(
|
172 |
fn=dataset_converter,
|
173 |
inputs=[input_file, conversion_type, parquet_url],
|
174 |
outputs=[output_file, preview]
|
175 |
)
|
|
|
|
|
176 |
|
177 |
demo.launch()
|
|
|
24 |
# Conversion: CSV to Parquet
|
25 |
if conversion_type == "CSV to Parquet":
|
26 |
if input_file is None or file_extension != "csv":
|
27 |
+
raise ValueError("For CSV to Parquet conversion, please upload a CSV file. π")
|
28 |
df = pd.read_csv(BytesIO(file_bytes))
|
29 |
output_file = "output.parquet"
|
30 |
df.to_parquet(output_file, index=False)
|
|
|
34 |
# Conversion: Parquet to CSV
|
35 |
elif conversion_type == "Parquet to CSV":
|
36 |
if input_file is None or file_extension != "parquet":
|
37 |
+
raise ValueError("For Parquet to CSV conversion, please upload a Parquet file. π")
|
38 |
df = pd.read_parquet(BytesIO(file_bytes))
|
39 |
output_file = "output.csv"
|
40 |
df.to_csv(output_file, index=False)
|
|
|
44 |
# Conversion: CSV to JSONL
|
45 |
elif conversion_type == "CSV to JSONL":
|
46 |
if input_file is None or file_extension != "csv":
|
47 |
+
raise ValueError("For CSV to JSONL conversion, please upload a CSV file. π")
|
48 |
# Read CSV with latin1 encoding
|
49 |
df = pd.read_csv(BytesIO(file_bytes), encoding='latin1')
|
50 |
output_file = "metadata.jsonl"
|
|
|
70 |
# Use uploaded file if available; otherwise try the provided URL
|
71 |
if input_file is not None:
|
72 |
df = pd.read_parquet(BytesIO(file_bytes))
|
|
|
73 |
elif parquet_url:
|
74 |
response = requests.get(parquet_url)
|
75 |
response.raise_for_status() # Ensure the request was successful
|
76 |
df = pd.read_parquet(BytesIO(response.content))
|
77 |
file_name = "from_url.parquet"
|
78 |
else:
|
79 |
+
raise ValueError("For Parquet to JSONL conversion, please upload a file or provide a URL. π")
|
80 |
|
81 |
output_file = "output.jsonl"
|
82 |
# Recursive function to decode bytes to UTF-8 strings
|
|
|
99 |
preview_str = df.head(10).to_string(index=False)
|
100 |
|
101 |
else:
|
102 |
+
raise ValueError("Invalid conversion type selected. β οΈ")
|
103 |
|
104 |
info_message = (
|
105 |
f"Input file: {file_name if file_name is not None else 'N/A'}\n"
|
106 |
f"Converted file format: {converted_format}\n\n"
|
107 |
+
f"Preview (Top 10 Rows):\n{preview_str}\n\n"
|
108 |
+
"Community: https://discord.gg/openfreeai π"
|
109 |
)
|
110 |
return output_file, info_message
|
111 |
|
|
|
143 |
}
|
144 |
"""
|
145 |
|
146 |
+
with gr.Blocks(css=custom_css, title="Datasets Convertor") as demo:
|
147 |
+
gr.Markdown("# Datasets Convertor π")
|
148 |
gr.Markdown(
|
149 |
"Upload a CSV or Parquet file (or provide a Parquet file URL for Parquet to JSONL conversion) "
|
150 |
+
"and select the conversion type. The app converts the file to the desired format and displays a preview of the top 10 rows. β¨"
|
151 |
)
|
152 |
|
153 |
with gr.Row():
|
154 |
with gr.Column(scale=1):
|
155 |
+
input_file = gr.File(label="Upload CSV or Parquet File π")
|
156 |
with gr.Column(scale=1):
|
157 |
conversion_type = gr.Radio(
|
158 |
choices=["CSV to Parquet", "Parquet to CSV", "CSV to JSONL", "Parquet to JSONL"],
|
159 |
+
label="Conversion Type π"
|
160 |
)
|
161 |
|
162 |
# Optional URL input for Parquet to JSONL conversion
|
163 |
+
parquet_url = gr.Textbox(label="Parquet File URL (Optional) π", placeholder="Enter URL if not uploading a file")
|
164 |
|
165 |
+
convert_button = gr.Button("Convert β‘", elem_classes=["gradio-button"])
|
166 |
|
167 |
with gr.Row():
|
168 |
+
output_file = gr.File(label="Converted File πΎ")
|
169 |
+
preview = gr.Textbox(label="Preview (Top 10 Rows) π", lines=15)
|
170 |
|
171 |
convert_button.click(
|
172 |
fn=dataset_converter,
|
173 |
inputs=[input_file, conversion_type, parquet_url],
|
174 |
outputs=[output_file, preview]
|
175 |
)
|
176 |
+
|
177 |
+
gr.Markdown("**Join our Community:** [https://discord.gg/openfreeai](https://discord.gg/openfreeai) π€")
|
178 |
|
179 |
demo.launch()
|