openfree commited on
Commit
2bf1e25
·
verified ·
1 Parent(s): fcd8f70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -22
app.py CHANGED
@@ -3,34 +3,52 @@ import pandas as pd
3
  import requests
4
  from io import BytesIO
5
 
6
- def convert_csv_to_parquet(csv_file=None, csv_url=None):
7
- # Read the CSV file either from an uploaded file or from a URL
8
- if csv_file is not None:
9
- df = pd.read_csv(csv_file.name)
10
- elif csv_url is not None:
11
- response = requests.get(csv_url)
12
- response.raise_for_status() # Ensure the request was successful
13
- df = pd.read_csv(BytesIO(response.content))
 
 
 
 
14
  else:
15
- raise ValueError("Either csv_file or csv_url must be provided")
 
 
 
 
 
16
 
17
- # Optionally, perform any cleaning on the DataFrame here if needed
18
-
19
- # Save the DataFrame as a Parquet file
20
- output_file_path = "output.parquet"
21
- df.to_parquet(output_file_path, index=False)
22
-
23
- return output_file_path
 
 
 
 
 
24
 
25
  demo = gr.Interface(
26
- fn=convert_csv_to_parquet,
27
  inputs=[
28
- gr.File(label="CSV File"),
29
- gr.Textbox(label="CSV File URL", placeholder="Enter a URL to a CSV file")
 
 
 
 
 
30
  ],
31
- outputs=[gr.File(label="Parquet Output")],
32
- title="CSV to Parquet Converter",
33
- description="Convert a CSV file to Parquet format from a downloadable link or file upload"
34
  )
35
 
36
  if __name__ == "__main__":
 
3
  import requests
4
  from io import BytesIO
5
 
6
+ def convert_file(input_file, file_url, conversion_type):
7
+ # Use the file provided by upload; if not, use the URL input.
8
+ if input_file is None and (file_url is None or file_url.strip() == ""):
9
+ raise ValueError("Please provide a file or a URL.")
10
+
11
+ # Read the file into a DataFrame based on conversion type.
12
+ if input_file is not None:
13
+ file_path = input_file.name
14
+ if conversion_type == "CSV to Parquet":
15
+ df = pd.read_csv(file_path)
16
+ else: # Parquet to CSV
17
+ df = pd.read_parquet(file_path)
18
  else:
19
+ response = requests.get(file_url)
20
+ response.raise_for_status()
21
+ if conversion_type == "CSV to Parquet":
22
+ df = pd.read_csv(BytesIO(response.content))
23
+ else:
24
+ df = pd.read_parquet(BytesIO(response.content))
25
 
26
+ # Save the converted file.
27
+ if conversion_type == "CSV to Parquet":
28
+ output_file = "output.parquet"
29
+ df.to_parquet(output_file, index=False)
30
+ else:
31
+ output_file = "output.csv"
32
+ df.to_csv(output_file, index=False)
33
+
34
+ # Generate a preview of the top 10 rows.
35
+ preview = df.head(10).to_string(index=False)
36
+
37
+ return output_file, preview
38
 
39
  demo = gr.Interface(
40
+ fn=convert_file,
41
  inputs=[
42
+ gr.File(label="Input File (CSV or Parquet)"),
43
+ gr.Textbox(label="Input File URL (optional)", placeholder="Enter a URL to a CSV or Parquet file"),
44
+ gr.Radio(choices=["CSV to Parquet", "Parquet to CSV"], label="Conversion Type")
45
+ ],
46
+ outputs=[
47
+ gr.File(label="Converted File"),
48
+ gr.Textbox(label="Preview (Top 10 Rows)")
49
  ],
50
+ title="CSV <-> Parquet Converter",
51
+ description="Choose a conversion type, upload a file or enter a URL, and convert between CSV and Parquet formats. A preview of the top 10 rows will be shown."
 
52
  )
53
 
54
  if __name__ == "__main__":