Spaces:

sartifyllc
/

Swahili-Text-Embeddings-Leaderboard

Running

App Files Files Community

Mollel commited on Jul 13, 2024

Commit

138fae9

verified ·

1 Parent(s): 228c3f8

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -2

app.py CHANGED Viewed

@@ -22,6 +22,33 @@ def extract_table_from_markdown(markdown_text, table_start):
     return '\n'.join(table_content)
 def markdown_table_to_df(table_content):
     """Convert markdown table to pandas DataFrame."""
     # Split the table content into lines
@@ -42,9 +69,11 @@ def markdown_table_to_df(table_content):
     # Create DataFrame
     df = pd.DataFrame(data, columns=headers)
-    # Convert numeric columns to float
     for col in df.columns:
-        if col not in ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka"]:
             df[col] = pd.to_numeric(df[col], errors='coerce')
     return df

     return '\n'.join(table_content)
+# def markdown_table_to_df(table_content):
+#     """Convert markdown table to pandas DataFrame."""
+#     # Split the table content into lines
+#     lines = table_content.split('\n')
+#     # Extract headers
+#     headers = [h.strip() for h in lines[0].split('|') if h.strip()]
+#     # Extract data
+#     data = []
+#     for line in lines[2:]:  # Skip the header separator line
+#         row = [cell.strip() for cell in line.split('|') if cell.strip()]
+#         if row:  # Include any non-empty row
+#             # Pad the row with empty strings if it's shorter than the headers
+#             padded_row = row + [''] * (len(headers) - len(row))
+#             data.append(padded_row[:len(headers)])  # Trim if longer than headers
+#     # Create DataFrame
+#     df = pd.DataFrame(data, columns=headers)
+#     # Convert numeric columns to float
+#     for col in df.columns:
+#         if col not in ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka"]:
+#             df[col] = pd.to_numeric(df[col], errors='coerce')
+#     return df
 def markdown_table_to_df(table_content):
     """Convert markdown table to pandas DataFrame."""
     # Split the table content into lines
     # Create DataFrame
     df = pd.DataFrame(data, columns=headers)
+    # Convert numeric columns to float and handle Dimension column
     for col in df.columns:
+        if col == "Dimension":
+            df[col] = df[col].apply(lambda x: int(x) if x.isdigit() else "")
+        elif col not in ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka"]:
             df[col] = pd.to_numeric(df[col], errors='coerce')
     return df