20122013 / app.py
ChingCL's picture
Update app.py
83afe02 verified
import gradio as gr
import pandas as pd
import re
# 第一個檢查:檢查$符號前後是否有空格
def check_spacing_around_dollar(df):
errors = []
for i, row in df.iterrows():
for col in df.columns:
text = str(row[col])
matches = list(re.finditer(r'(\s?\$\S*?\s?|\s?\S*?\$)', text))
for match in matches:
if not (text[match.start() - 1] == ' ' and text[match.end()] == ' '):
errors.append(f"行 {i+1} 列 '{col}': '{text}'")
return errors
# 第二個檢查:檢查$符號之間是否有空格
def check_spacing_between_dollars(df):
errors = []
for i, row in df.iterrows():
for col in df.columns:
text = str(row[col])
matches = list(re.finditer(r'\$\S+?(?=\$)', text))
for match in matches:
if text[match.end()] != ' ' and text[match.start() - 1] != ' ':
errors.append(f"行 {i+1} 列 '{col}': '{text}'")
return errors
# 第三個檢查:檢查數字前後是否有$
def check_numbers_surrounded_by_dollar(df):
errors = []
for i, row in df.iterrows():
for col in df.columns:
text = str(row[col])
matches = list(re.finditer(r'\b\d+\b', text))
for match in matches:
if not (text[match.start() - 1] == '$' and text[match.end()] == '$'):
errors.append(f"行 {i+1} 列 '{col}': '{text}'")
return errors
# 處理檔案並執行檢查
def process_file(file):
if file.name.endswith('.csv'):
df = pd.read_csv(file.name)
elif file.name.endswith('.xlsx'):
df = pd.read_excel(file.name)
else:
return "只支持 CSV 和 XLSX 檔案"
# 執行檢查
errors1 = check_spacing_around_dollar(df)
errors2 = check_spacing_between_dollars(df)
errors3 = check_numbers_surrounded_by_dollar(df)
return {
"第一個檢查": errors1,
"第二個檢查": errors2,
"第三個檢查": errors3
}
# Gradio 介面
iface = gr.Interface(
fn=process_file,
inputs=gr.File(label="上傳 CSV 或 XLSX 檔案"),
outputs=gr.JSON(label="檢查結果"),
title="校對系統",
description="這個系統會檢查 CSV 或 XLSX 檔案中的格式錯誤,包括 $ 符號和數字的空格錯誤。"
)
if __name__ == "__main__":
iface.launch()