Spaces:
Sleeping
Sleeping
Commit
·
789b57f
1
Parent(s):
49cde8e
add: alias to document loader artifacts and datasets + enable mps fallback for marker
Browse files
medrag_multi_modal/document_loader/image_loader/base_img_loader.py
CHANGED
@@ -100,7 +100,11 @@ class BaseImageLoader(BaseTextLoader):
|
|
100 |
await task
|
101 |
|
102 |
if wandb_artifact_name:
|
103 |
-
artifact = wandb.Artifact(
|
|
|
|
|
|
|
|
|
104 |
artifact.add_dir(local_path=image_save_dir)
|
105 |
artifact.save()
|
106 |
rich.print("Artifact saved and uploaded to wandb!")
|
|
|
100 |
await task
|
101 |
|
102 |
if wandb_artifact_name:
|
103 |
+
artifact = wandb.Artifact(
|
104 |
+
name=wandb_artifact_name,
|
105 |
+
type="dataset",
|
106 |
+
metadata={"loader_name": self.__class__.__name__},
|
107 |
+
)
|
108 |
artifact.add_dir(local_path=image_save_dir)
|
109 |
artifact.save()
|
110 |
rich.print("Artifact saved and uploaded to wandb!")
|
medrag_multi_modal/document_loader/image_loader/marker_img_loader.py
CHANGED
@@ -6,6 +6,8 @@ from marker.models import load_all_models
|
|
6 |
|
7 |
from .base_img_loader import BaseImageLoader
|
8 |
|
|
|
|
|
9 |
|
10 |
class MarkerImageLoader(BaseImageLoader):
|
11 |
"""
|
|
|
6 |
|
7 |
from .base_img_loader import BaseImageLoader
|
8 |
|
9 |
+
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
|
10 |
+
|
11 |
|
12 |
class MarkerImageLoader(BaseImageLoader):
|
13 |
"""
|
medrag_multi_modal/document_loader/text_loader/base_text_loader.py
CHANGED
@@ -131,6 +131,7 @@ class BaseTextLoader(ABC):
|
|
131 |
async def process_page(page_idx):
|
132 |
nonlocal processed_pages_counter
|
133 |
page_data = await self.extract_page_data(page_idx, **kwargs)
|
|
|
134 |
pages.append(page_data)
|
135 |
rich.print(
|
136 |
f"Processed page idx: {page_idx}, progress: {processed_pages_counter}/{total_pages}"
|
|
|
131 |
async def process_page(page_idx):
|
132 |
nonlocal processed_pages_counter
|
133 |
page_data = await self.extract_page_data(page_idx, **kwargs)
|
134 |
+
page_data["loader_name"] = self.__class__.__name__
|
135 |
pages.append(page_data)
|
136 |
rich.print(
|
137 |
f"Processed page idx: {page_idx}, progress: {processed_pages_counter}/{total_pages}"
|
medrag_multi_modal/document_loader/text_loader/marker_text_loader.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from typing import Dict
|
2 |
|
3 |
from marker.convert import convert_single_pdf
|
@@ -5,6 +6,8 @@ from marker.models import load_all_models
|
|
5 |
|
6 |
from .base_text_loader import BaseTextLoader
|
7 |
|
|
|
|
|
8 |
|
9 |
class MarkerTextLoader(BaseTextLoader):
|
10 |
"""
|
|
|
1 |
+
import os
|
2 |
from typing import Dict
|
3 |
|
4 |
from marker.convert import convert_single_pdf
|
|
|
6 |
|
7 |
from .base_text_loader import BaseTextLoader
|
8 |
|
9 |
+
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
|
10 |
+
|
11 |
|
12 |
class MarkerTextLoader(BaseTextLoader):
|
13 |
"""
|