Mollel commited on
Commit
138fae9
·
verified ·
1 Parent(s): 228c3f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -2
app.py CHANGED
@@ -22,6 +22,33 @@ def extract_table_from_markdown(markdown_text, table_start):
22
  return '\n'.join(table_content)
23
 
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def markdown_table_to_df(table_content):
26
  """Convert markdown table to pandas DataFrame."""
27
  # Split the table content into lines
@@ -42,9 +69,11 @@ def markdown_table_to_df(table_content):
42
  # Create DataFrame
43
  df = pd.DataFrame(data, columns=headers)
44
 
45
- # Convert numeric columns to float
46
  for col in df.columns:
47
- if col not in ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka"]:
 
 
48
  df[col] = pd.to_numeric(df[col], errors='coerce')
49
 
50
  return df
 
22
  return '\n'.join(table_content)
23
 
24
 
25
+ # def markdown_table_to_df(table_content):
26
+ # """Convert markdown table to pandas DataFrame."""
27
+ # # Split the table content into lines
28
+ # lines = table_content.split('\n')
29
+
30
+ # # Extract headers
31
+ # headers = [h.strip() for h in lines[0].split('|') if h.strip()]
32
+
33
+ # # Extract data
34
+ # data = []
35
+ # for line in lines[2:]: # Skip the header separator line
36
+ # row = [cell.strip() for cell in line.split('|') if cell.strip()]
37
+ # if row: # Include any non-empty row
38
+ # # Pad the row with empty strings if it's shorter than the headers
39
+ # padded_row = row + [''] * (len(headers) - len(row))
40
+ # data.append(padded_row[:len(headers)]) # Trim if longer than headers
41
+
42
+ # # Create DataFrame
43
+ # df = pd.DataFrame(data, columns=headers)
44
+
45
+ # # Convert numeric columns to float
46
+ # for col in df.columns:
47
+ # if col not in ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka"]:
48
+ # df[col] = pd.to_numeric(df[col], errors='coerce')
49
+
50
+ # return df
51
+
52
  def markdown_table_to_df(table_content):
53
  """Convert markdown table to pandas DataFrame."""
54
  # Split the table content into lines
 
69
  # Create DataFrame
70
  df = pd.DataFrame(data, columns=headers)
71
 
72
+ # Convert numeric columns to float and handle Dimension column
73
  for col in df.columns:
74
+ if col == "Dimension":
75
+ df[col] = df[col].apply(lambda x: int(x) if x.isdigit() else "")
76
+ elif col not in ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka"]:
77
  df[col] = pd.to_numeric(df[col], errors='coerce')
78
 
79
  return df