Update app.py
Browse files
app.py
CHANGED
@@ -22,6 +22,33 @@ def extract_table_from_markdown(markdown_text, table_start):
|
|
22 |
return '\n'.join(table_content)
|
23 |
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
def markdown_table_to_df(table_content):
|
26 |
"""Convert markdown table to pandas DataFrame."""
|
27 |
# Split the table content into lines
|
@@ -42,9 +69,11 @@ def markdown_table_to_df(table_content):
|
|
42 |
# Create DataFrame
|
43 |
df = pd.DataFrame(data, columns=headers)
|
44 |
|
45 |
-
# Convert numeric columns to float
|
46 |
for col in df.columns:
|
47 |
-
if col
|
|
|
|
|
48 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
49 |
|
50 |
return df
|
|
|
22 |
return '\n'.join(table_content)
|
23 |
|
24 |
|
25 |
+
# def markdown_table_to_df(table_content):
|
26 |
+
# """Convert markdown table to pandas DataFrame."""
|
27 |
+
# # Split the table content into lines
|
28 |
+
# lines = table_content.split('\n')
|
29 |
+
|
30 |
+
# # Extract headers
|
31 |
+
# headers = [h.strip() for h in lines[0].split('|') if h.strip()]
|
32 |
+
|
33 |
+
# # Extract data
|
34 |
+
# data = []
|
35 |
+
# for line in lines[2:]: # Skip the header separator line
|
36 |
+
# row = [cell.strip() for cell in line.split('|') if cell.strip()]
|
37 |
+
# if row: # Include any non-empty row
|
38 |
+
# # Pad the row with empty strings if it's shorter than the headers
|
39 |
+
# padded_row = row + [''] * (len(headers) - len(row))
|
40 |
+
# data.append(padded_row[:len(headers)]) # Trim if longer than headers
|
41 |
+
|
42 |
+
# # Create DataFrame
|
43 |
+
# df = pd.DataFrame(data, columns=headers)
|
44 |
+
|
45 |
+
# # Convert numeric columns to float
|
46 |
+
# for col in df.columns:
|
47 |
+
# if col not in ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka"]:
|
48 |
+
# df[col] = pd.to_numeric(df[col], errors='coerce')
|
49 |
+
|
50 |
+
# return df
|
51 |
+
|
52 |
def markdown_table_to_df(table_content):
|
53 |
"""Convert markdown table to pandas DataFrame."""
|
54 |
# Split the table content into lines
|
|
|
69 |
# Create DataFrame
|
70 |
df = pd.DataFrame(data, columns=headers)
|
71 |
|
72 |
+
# Convert numeric columns to float and handle Dimension column
|
73 |
for col in df.columns:
|
74 |
+
if col == "Dimension":
|
75 |
+
df[col] = df[col].apply(lambda x: int(x) if x.isdigit() else "")
|
76 |
+
elif col not in ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka"]:
|
77 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
78 |
|
79 |
return df
|