|
import gradio as gr |
|
import pandas as pd |
|
import re |
|
|
|
|
|
def check_spacing_around_dollar(df): |
|
errors = [] |
|
for i, row in df.iterrows(): |
|
for col in df.columns: |
|
text = str(row[col]) |
|
matches = list(re.finditer(r'(\s?\$\S*?\s?|\s?\S*?\$)', text)) |
|
for match in matches: |
|
if not (text[match.start() - 1] == ' ' and text[match.end()] == ' '): |
|
errors.append(f"行 {i+1} 列 '{col}': '{text}'") |
|
return errors |
|
|
|
|
|
def check_spacing_between_dollars(df): |
|
errors = [] |
|
for i, row in df.iterrows(): |
|
for col in df.columns: |
|
text = str(row[col]) |
|
matches = list(re.finditer(r'\$\S+?(?=\$)', text)) |
|
for match in matches: |
|
if text[match.end()] != ' ' and text[match.start() - 1] != ' ': |
|
errors.append(f"行 {i+1} 列 '{col}': '{text}'") |
|
return errors |
|
|
|
|
|
def check_numbers_surrounded_by_dollar(df): |
|
errors = [] |
|
for i, row in df.iterrows(): |
|
for col in df.columns: |
|
text = str(row[col]) |
|
matches = list(re.finditer(r'\b\d+\b', text)) |
|
for match in matches: |
|
if not (text[match.start() - 1] == '$' and text[match.end()] == '$'): |
|
errors.append(f"行 {i+1} 列 '{col}': '{text}'") |
|
return errors |
|
|
|
|
|
def process_file(file): |
|
if file.name.endswith('.csv'): |
|
df = pd.read_csv(file.name) |
|
elif file.name.endswith('.xlsx'): |
|
df = pd.read_excel(file.name) |
|
else: |
|
return "只支持 CSV 和 XLSX 檔案" |
|
|
|
|
|
errors1 = check_spacing_around_dollar(df) |
|
errors2 = check_spacing_between_dollars(df) |
|
errors3 = check_numbers_surrounded_by_dollar(df) |
|
|
|
return { |
|
"第一個檢查": errors1, |
|
"第二個檢查": errors2, |
|
"第三個檢查": errors3 |
|
} |
|
|
|
|
|
iface = gr.Interface( |
|
fn=process_file, |
|
inputs=gr.File(label="上傳 CSV 或 XLSX 檔案"), |
|
outputs=gr.JSON(label="檢查結果"), |
|
title="校對系統", |
|
description="這個系統會檢查 CSV 或 XLSX 檔案中的格式錯誤,包括 $ 符號和數字的空格錯誤。" |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|