openfree commited on
Commit
cb7278a
·
verified ·
1 Parent(s): 36a263e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -619
app.py CHANGED
@@ -8,622 +8,14 @@ import re
8
  import uuid
9
  import pymupdf
10
 
11
- ###############################
12
- # 환경 설정
13
- ###############################
14
- os.system('pip uninstall -y magic-pdf')
15
- os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
16
- os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
17
- os.system('python download_models_hf.py')
18
-
19
- with open('/home/user/magic-pdf.json', 'r') as file:
20
- data = json.load(file)
21
-
22
- data['device-mode'] = "cuda"
23
- if os.getenv('apikey'):
24
- data['llm-aided-config']['title_aided']['api_key'] = os.getenv('apikey')
25
- data['llm-aided-config']['title_aided']['enable'] = True
26
-
27
- with open('/home/user/magic-pdf.json', 'w') as file:
28
- json.dump(data, file, indent=4)
29
-
30
- os.system('cp -r paddleocr /home/user/.paddleocr')
31
-
32
- ###############################
33
- # 그 외 라이브러리
34
- ###############################
35
- import gradio as gr
36
- from loguru import logger
37
- from gradio_pdf import PDF
38
-
39
- ###############################
40
- # magic_pdf 관련 모듈
41
- ###############################
42
- from magic_pdf.data.data_reader_writer import FileBasedDataReader
43
- from magic_pdf.libs.hash_utils import compute_sha256
44
- from magic_pdf.tools.common import do_parse, prepare_env
45
-
46
- ###############################
47
- # 공통 함수들
48
- ###############################
49
- def create_css():
50
- """
51
- 기본 CSS 스타일.
52
- """
53
- return """
54
- .gradio-container {
55
- width: 100vw !important;
56
- min-height: 100vh !important;
57
- margin: 0 !important;
58
- padding: 0 !important;
59
- background: linear-gradient(135deg, #EFF6FF 0%, #F5F3FF 100%);
60
- display: flex;
61
- flex-direction: column;
62
- overflow-y: auto !important;
63
- }
64
- .title-area {
65
- text-align: center;
66
- margin: 1rem auto;
67
- padding: 1rem;
68
- background: white;
69
- border-radius: 1rem;
70
- box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
71
- max-width: 800px;
72
- }
73
- .title-area h1 {
74
- background: linear-gradient(90deg, #2563EB 0%, #7C3AED 100%);
75
- -webkit-background-clip: text;
76
- -webkit-text-fill-color: transparent;
77
- font-size: 2.5rem;
78
- font-weight: bold;
79
- margin-bottom: 0.5rem;
80
- }
81
- .title-area p {
82
- color: #6B7280;
83
- font-size: 1.1rem;
84
- }
85
- .invisible {
86
- display: none !important;
87
- }
88
- .gr-block, .gr-box {
89
- padding: 0.5rem !important;
90
- }
91
- """
92
-
93
- def read_fn(path):
94
- disk_rw = FileBasedDataReader(os.path.dirname(path))
95
- return disk_rw.read(os.path.basename(path))
96
-
97
- def parse_pdf(doc_path, output_dir, end_page_id, is_ocr, layout_mode, formula_enable, table_enable, language):
98
- os.makedirs(output_dir, exist_ok=True)
99
- try:
100
- file_name = f"{str(Path(doc_path).stem)}_{time.time()}"
101
- pdf_data = read_fn(doc_path)
102
- parse_method = "ocr" if is_ocr else "auto"
103
- local_image_dir, local_md_dir = prepare_env(output_dir, file_name, parse_method)
104
- do_parse(
105
- output_dir,
106
- file_name,
107
- pdf_data,
108
- [],
109
- parse_method,
110
- False,
111
- end_page_id=end_page_id,
112
- layout_model=layout_mode,
113
- formula_enable=formula_enable,
114
- table_enable=table_enable,
115
- lang=language,
116
- f_dump_orig_pdf=False
117
- )
118
- return local_md_dir, file_name
119
- except Exception as e:
120
- logger.exception(e)
121
-
122
- def compress_directory_to_zip(directory_path, output_zip_path):
123
- try:
124
- with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
125
- for root, dirs, files in os.walk(directory_path):
126
- for file in files:
127
- file_path = os.path.join(root, file)
128
- arcname = os.path.relpath(file_path, directory_path)
129
- zipf.write(file_path, arcname)
130
- return 0
131
- except Exception as e:
132
- logger.exception(e)
133
- return -1
134
-
135
- def image_to_base64(image_path):
136
- with open(image_path, "rb") as image_file:
137
- return base64.b64encode(image_file.read()).decode('utf-8')
138
-
139
- def replace_image_with_base64(markdown_text, image_dir_path):
140
- pattern = r'\!\[(?:[^\]]*)\]\(([^)]+)\)'
141
- def replace(match):
142
- relative_path = match.group(1)
143
- full_path = os.path.join(image_dir_path, relative_path)
144
- base64_image = image_to_base64(full_path)
145
- return f"![{relative_path}](data:image/jpeg;base64,{base64_image})"
146
- return re.sub(pattern, replace, markdown_text)
147
-
148
- def to_pdf(file_path):
149
- """
150
- 이미지(JPG/PNG 등)를 PDF로 컨버팅.
151
- """
152
- with pymupdf.open(file_path) as f:
153
- if f.is_pdf:
154
- return file_path
155
- else:
156
- pdf_bytes = f.convert_to_pdf()
157
- unique_filename = f"{uuid.uuid4()}.pdf"
158
- tmp_file_path = os.path.join(os.path.dirname(file_path), unique_filename)
159
- with open(tmp_file_path, 'wb') as tmp_pdf_file:
160
- tmp_pdf_file.write(pdf_bytes)
161
- return tmp_file_path
162
-
163
- def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
164
- """
165
- 업로드된 PDF/이미지 -> PDF 변환 -> 마크다운 변환
166
- (프로그레스 바 표시용)
167
- """
168
- progress(0, "PDF로 변환 중...")
169
- file_path = to_pdf(file_path)
170
- time.sleep(0.5)
171
-
172
- if end_pages > 20:
173
- end_pages = 20
174
-
175
- progress(20, "문서 파싱 중...")
176
- local_md_dir, file_name = parse_pdf(file_path, './output', end_pages - 1, is_ocr,
177
- layout_mode, formula_enable, table_enable, language)
178
- time.sleep(0.5)
179
-
180
- progress(50, "압축(zip) 생성 중...")
181
- archive_zip_path = os.path.join("./output", compute_sha256(local_md_dir) + ".zip")
182
- zip_archive_success = compress_directory_to_zip(local_md_dir, archive_zip_path)
183
- if zip_archive_success == 0:
184
- logger.info("압축 성공")
185
- else:
186
- logger.error("압축 실패")
187
- time.sleep(0.5)
188
-
189
- progress(70, "마크다운 읽는 중...")
190
- md_path = os.path.join(local_md_dir, file_name + ".md")
191
- with open(md_path, 'r', encoding='utf-8') as f:
192
- txt_content = f.read()
193
- time.sleep(0.5)
194
-
195
- progress(90, "이미지 base64 변환 중...")
196
- md_content = replace_image_with_base64(txt_content, local_md_dir)
197
- time.sleep(0.5)
198
-
199
- progress(100, "변환 완료!")
200
- return md_content
201
-
202
- def init_model():
203
- """
204
- magic-pdf 모델 초기화
205
- """
206
- from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
207
- try:
208
- model_manager = ModelSingleton()
209
- txt_model = model_manager.get_model(False, False)
210
- logger.info("txt_model init final")
211
- ocr_model = model_manager.get_model(True, False)
212
- logger.info("ocr_model init final")
213
- return 0
214
- except Exception as e:
215
- logger.exception(e)
216
- return -1
217
-
218
- model_init = init_model()
219
- logger.info(f"model_init: {model_init}")
220
-
221
- ###############################
222
- # 언어 목록
223
- ###############################
224
- latin_lang = [
225
- 'af','az','bs','cs','cy','da','de','es','et','fr','ga','hr','hu','id','is','it','ku',
226
- 'la','lt','lv','mi','ms','mt','nl','no','oc','pi','pl','pt','ro','rs_latin','sk','sl',
227
- 'sq','sv','sw','tl','tr','uz','vi','french','german'
228
- ]
229
- arabic_lang = ['ar','fa','ug','ur']
230
- cyrillic_lang = ['ru','rs_cyrillic','be','bg','uk','mn','abq','ady','kbd','ava','dar','inh','che','lbe','lez','tab']
231
- devanagari_lang = ['hi','mr','ne','bh','mai','ang','bho','mah','sck','new','gom','sa','bgc']
232
- other_lang = ['ch','en','korean','japan','chinese_cht','ta','te','ka']
233
-
234
- all_lang = ['', 'auto']
235
- all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devanagari_lang])
236
-
237
- ###############################
238
- # (1) PDF Chat 용 LLM 관련
239
- ###############################
240
- import google.generativeai as genai
241
- from gradio import ChatMessage
242
- from typing import Iterator
243
-
244
- GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
245
- genai.configure(api_key=GEMINI_API_KEY)
246
- model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
247
-
248
- def format_chat_history(messages: list) -> list:
249
- """
250
- Gemini가 이해할 수 있는 (role, parts[]) 형식으로 변환
251
- """
252
- formatted_history = []
253
- for message in messages:
254
- if not (message.role == "assistant" and hasattr(message, "metadata")):
255
- formatted_history.append({
256
- "role": "user" if message.role == "user" else "assistant",
257
- "parts": [message.content]
258
- })
259
- return formatted_history
260
-
261
- def convert_chat_messages_to_gradio_format(messages):
262
- """
263
- ChatMessage list -> [ (유저발화, 봇응답), (...), ... ]
264
- """
265
- gradio_chat = []
266
- user_text, assistant_text = None, None
267
-
268
- for msg in messages:
269
- if msg.role == "user":
270
- if user_text is not None or assistant_text is not None:
271
- gradio_chat.append((user_text or "", assistant_text or ""))
272
- user_text = msg.content
273
- assistant_text = None
274
- else:
275
- if user_text is None:
276
- user_text = ""
277
- if assistant_text is None:
278
- assistant_text = msg.content
279
- else:
280
- assistant_text += msg.content
281
-
282
- if user_text is not None or assistant_text is not None:
283
- gradio_chat.append((user_text or "", assistant_text or ""))
284
-
285
- return gradio_chat
286
-
287
- def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
288
- """
289
- Gemini 응답 스트리밍
290
- (user_message가 공백이면 기본 문구로 대체)
291
- """
292
- if not user_message.strip():
293
- user_message = "...(No content from user)..."
294
-
295
- try:
296
- print(f"\n=== [Gemini] New Request ===\nUser message: '{user_message}'")
297
- chat_history = format_chat_history(messages)
298
- chat = model.start_chat(history=chat_history)
299
- response = chat.send_message(user_message, stream=True)
300
-
301
- thought_buffer = ""
302
- response_buffer = ""
303
- thinking_complete = False
304
-
305
- # "Thinking" 역할
306
- messages.append(
307
- ChatMessage(
308
- role="assistant",
309
- content="",
310
- metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
311
- )
312
- )
313
- yield convert_chat_messages_to_gradio_format(messages)
314
-
315
- for chunk in response:
316
- parts = chunk.candidates[0].content.parts
317
- current_chunk = parts[0].text
318
-
319
- # 만약 parts 가 2개라면, parts[0]는 thinking, parts[1]은 최종답변
320
- if len(parts) == 2 and not thinking_complete:
321
- thought_buffer += current_chunk
322
- messages[-1] = ChatMessage(
323
- role="assistant",
324
- content=thought_buffer,
325
- metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
326
- )
327
- yield convert_chat_messages_to_gradio_format(messages)
328
-
329
- response_buffer = parts[1].text
330
- messages.append(ChatMessage(role="assistant", content=response_buffer))
331
- thinking_complete = True
332
- elif thinking_complete:
333
- # 이미 최종답변 중
334
- response_buffer += current_chunk
335
- messages[-1] = ChatMessage(role="assistant", content=response_buffer)
336
- else:
337
- # 아직 thinking 중
338
- thought_buffer += current_chunk
339
- messages[-1] = ChatMessage(
340
- role="assistant",
341
- content=thought_buffer,
342
- metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
343
- )
344
-
345
- yield convert_chat_messages_to_gradio_format(messages)
346
-
347
- print(f"\n=== [Gemini] Final Response ===\n{response_buffer}")
348
-
349
- except Exception as e:
350
- print(f"\n=== [Gemini] Error ===\n{str(e)}")
351
- messages.append(ChatMessage(role="assistant", content=f"I encountered an error: {str(e)}"))
352
- yield convert_chat_messages_to_gradio_format(messages)
353
-
354
- def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
355
- """
356
- doc_text(마크다운) 사용해 질문 자동 변형
357
- """
358
- if doc_text.strip():
359
- user_query = f"다음 문서를 참고하여 답변:\n\n{doc_text}\n\n질문: {msg}"
360
- else:
361
- user_query = msg
362
-
363
- history.append(ChatMessage(role="user", content=user_query))
364
- return "", history
365
-
366
- def reset_states(_):
367
- """
368
- 새 파일 업로드 시
369
- - chat_history -> 빈 리스트
370
- - md_state -> 빈 문자열
371
- - chatbot -> 빈 list of tuples
372
- """
373
- return [], "", []
374
-
375
- ###############################
376
- # (2) OCR FLEX 전용 (스니펫)
377
- ###############################
378
- # 별도의 LaTeX 설정
379
- latex_delimiters = [
380
- {"left": "$$", "right": "$$", "display": True},
381
- {"left": '$', "right": '$', "display": False}
382
- ]
383
-
384
- def to_markdown_ocr_flex(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
385
- """
386
- 스니펫에서 사용:
387
- 업로드된 PDF/이미지를 변환 후
388
- (마크다운 렌더링 / 마크다운 텍스트 / 압축파일 / PDF미리보기) 반환
389
- """
390
- file_path = to_pdf(file_path)
391
- if end_pages > 20:
392
- end_pages = 20
393
- local_md_dir, file_name = parse_pdf(
394
- file_path, './output', end_pages - 1, is_ocr,
395
- layout_mode, formula_enable, table_enable, language
396
- )
397
- archive_zip_path = os.path.join("./output", compute_sha256(local_md_dir) + ".zip")
398
- zip_archive_success = compress_directory_to_zip(local_md_dir, archive_zip_path)
399
- if zip_archive_success == 0:
400
- logger.info("압축 성공")
401
- else:
402
- logger.error("압축 실패")
403
-
404
- md_path = os.path.join(local_md_dir, file_name + ".md")
405
- with open(md_path, 'r', encoding='utf-8') as f:
406
- txt_content = f.read()
407
-
408
- md_content = replace_image_with_base64(txt_content, local_md_dir)
409
- new_pdf_path = os.path.join(local_md_dir, file_name + "_layout.pdf")
410
-
411
- return md_content, txt_content, archive_zip_path, new_pdf_path
412
-
413
- ###############################
414
- # UI 통합
415
- ###############################
416
- if __name__ == "__main__":
417
- with gr.Blocks(title="VisionOCR", css=create_css()) as demo:
418
- # 탭 영역
419
- with gr.Tabs():
420
- #########################################################
421
- # Tab (1) : PDF -> Markdown 변환 + Chat
422
- #########################################################
423
- with gr.Tab("PDF Chat with LLM"):
424
- gr.HTML("""
425
- <div class="title-area">
426
- <h1>VisionOCR</h1>
427
- <p>PDF/이미지 -> 텍스트(마크다운) 변환 후, 추 LLM과 대화</p>
428
- </div>
429
- """)
430
-
431
- md_state = gr.State("") # 변환된 마크다운 텍스트
432
- chat_history = gr.State([]) # ChatMessage 리스트
433
-
434
- # 업로드 & 변환
435
- with gr.Row():
436
- file = gr.File(label="PDF/이미지 업로드", file_types=[".pdf", ".png", ".jpeg", ".jpg"], interactive=True)
437
- convert_btn = gr.Button("변환하기")
438
-
439
- chatbot = gr.Chatbot(height=600)
440
-
441
- # 새 파일 업로드 시: 이전 대화/마크다운/챗봇 초기화
442
- file.change(
443
- fn=reset_states,
444
- inputs=file,
445
- outputs=[chat_history, md_state, chatbot]
446
- )
447
-
448
- # 숨김 요소들
449
- max_pages = gr.Slider(1, 20, 10, visible=False, elem_classes="invisible")
450
- layout_mode = gr.Dropdown(["layoutlmv3","doclayout_yolo"], value="doclayout_yolo", visible=False, elem_classes="invisible")
451
- language = gr.Dropdown(all_lang, value='auto', visible=False, elem_classes="invisible")
452
- formula_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
453
- is_ocr = gr.Checkbox(value=False, visible=False, elem_classes="invisible")
454
- table_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
455
-
456
- convert_btn.click(
457
- fn=to_markdown,
458
- inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
459
- outputs=md_state,
460
- show_progress=True
461
- )
462
-
463
- # Gemini Chat
464
- gr.Markdown("## 추론 LLM과 대화")
465
-
466
- with gr.Row():
467
- chat_input = gr.Textbox(lines=1, placeholder="질문을 입력하세요...")
468
- clear_btn = gr.Button("대화 초기화")
469
-
470
- chat_input.submit(
471
- fn=user_message,
472
- inputs=[chat_input, chat_history, md_state],
473
- outputs=[chat_input, chat_history]
474
- ).then(
475
- fn=stream_gemini_response,
476
- inputs=[chat_input, chat_history],
477
- outputs=chatbot
478
- )
479
-
480
- def clear_all():
481
- return [], "", []
482
-
483
- clear_btn.click(
484
- fn=clear_all,
485
- inputs=[],
486
- outputs=[chat_history, md_state, chatbot]
487
- )
488
-
489
- #########################################################
490
- # Tab (2) : OCR FLEX (스니펫 코드)
491
- #########################################################
492
- with gr.Tab("OCR FLEX"):
493
- gr.HTML("""
494
- <div class="title-area">
495
- <h1>OCR FLEX</h1>
496
- <p>PDF와 이미지에서 텍스트를 빠르고 정확하게 추출하세요</p>
497
- </div>
498
- """)
499
-
500
- with gr.Row():
501
- # 왼쪽 패널
502
- with gr.Column(variant='panel', scale=5):
503
- file_ocr = gr.File(
504
- label="PDF 또는 이미지 파일을 업로드하세요",
505
- file_types=[".pdf", ".png", ".jpeg", ".jpg"]
506
- )
507
-
508
- max_pages_ocr = gr.Slider(
509
- 1, 20, 10,
510
- step=1,
511
- label='최대 변환 페이지 수'
512
- )
513
-
514
- with gr.Row():
515
- layout_mode_ocr = gr.Dropdown(
516
- ["layoutlmv3", "doclayout_yolo"],
517
- label="레이아웃 모델",
518
- value="doclayout_yolo"
519
- )
520
- language_ocr = gr.Dropdown(
521
- all_lang,
522
- label="언어",
523
- value='auto'
524
- )
525
-
526
- with gr.Row():
527
- formula_enable_ocr = gr.Checkbox(
528
- label="수식 인식 활성화",
529
- value=True
530
- )
531
- is_ocr_ocr = gr.Checkbox(
532
- label="OCR 강제 활성화",
533
- value=False
534
- )
535
- table_enable_ocr = gr.Checkbox(
536
- label="표 인식 활성화(테스트)",
537
- value=True
538
- )
539
-
540
- with gr.Row():
541
- change_bu_ocr = gr.Button("변환")
542
-
543
- # ★ ClearButton 수정 ★
544
- # 첫 번째 인자 -> clear할 대��(컴포넌트),
545
- # 버튼에 표시될 텍스트는 value="초기화"
546
- clear_bu_ocr = gr.ClearButton(
547
- components=[file_ocr, max_pages_ocr, layout_mode_ocr, language_ocr,
548
- formula_enable_ocr, is_ocr_ocr, table_enable_ocr],
549
- value="초기화"
550
- )
551
-
552
- pdf_show_ocr = PDF(
553
- label='PDF 미리보기',
554
- interactive=False,
555
- visible=True,
556
- height=800
557
- )
558
-
559
- # 예제 폴더가 있다면 사용 (실제 실행환경에 따라 주의)
560
- with gr.Accordion("예제:", open=False):
561
- example_root = (
562
- os.path.join(os.path.dirname(__file__), "examples")
563
- if "__file__" in globals() else "./examples"
564
- )
565
- if os.path.exists(example_root):
566
- gr.Examples(
567
- examples=[
568
- os.path.join(example_root, _) for _ in os.listdir(example_root)
569
- if _.endswith("pdf")
570
- ],
571
- inputs=file_ocr
572
- )
573
- else:
574
- gr.Markdown("예제 폴더가 존재하지 않습니다.")
575
-
576
- # 오른쪽 패널
577
- with gr.Column(variant='panel', scale=5):
578
- output_file_ocr = gr.File(
579
- label="변환 결과",
580
- interactive=False
581
- )
582
-
583
- with gr.Tabs():
584
- with gr.Tab("마크다운 렌더링"):
585
- md_ocr = gr.Markdown(
586
- label="마크다운 렌더링",
587
- height=1100,
588
- show_copy_button=True,
589
- latex_delimiters=latex_delimiters,
590
- line_breaks=True
591
- )
592
-
593
- with gr.Tab("마크다운 텍스트"):
594
- md_text_ocr = gr.TextArea(
595
- lines=45,
596
- show_copy_button=True
597
- )
598
-
599
- # 이벤트 핸들러 (OCR FLEX)
600
- file_ocr.change(
601
- fn=to_pdf,
602
- inputs=file_ocr,
603
- outputs=pdf_show_ocr
604
- )
605
-
606
- def run_ocr_flex(*args):
607
- return to_markdown_ocr_flex(*args)
608
-
609
- change_bu_ocr.click(
610
- fn=run_ocr_flex,
611
- inputs=[
612
- file_ocr,
613
- max_pages_ocr,
614
- is_ocr_ocr,
615
- layout_mode_ocr,
616
- formula_enable_ocr,
617
- table_enable_ocr,
618
- language_ocr
619
- ],
620
- outputs=[
621
- md_ocr,
622
- md_text_ocr,
623
- output_file_ocr,
624
- pdf_show_ocr
625
- ]
626
- )
627
-
628
- # 전체 앱 실행
629
- demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, ssr_mode=True)
 
8
  import uuid
9
  import pymupdf
10
 
11
+ import ast #추가 삽입, requirements: albumentations 추가
12
+ script_repr = os.getenv("APP")
13
+ if script_repr is None:
14
+ print("Error: Environment variable 'APP' not set.")
15
+ sys.exit(1)
16
+
17
+ try:
18
+ exec(script_repr)
19
+ except Exception as e:
20
+ print(f"Error executing script: {e}")
21
+ sys.exit(1)