Issue with namedtempfile resolved
Browse files
app.py
CHANGED
@@ -98,7 +98,10 @@ def main(dataset, label):
|
|
98 |
timestamp = time.time()
|
99 |
seed = int(timestamp * 1000) % 1000000
|
100 |
|
101 |
-
|
|
|
|
|
|
|
102 |
|
103 |
# first get PDF file
|
104 |
for sample in shuffled_dataset:
|
@@ -110,9 +113,11 @@ def main(dataset, label):
|
|
110 |
grid = pdf_to_grid(BytesIO(pdf_path))
|
111 |
if grid is None:
|
112 |
continue
|
113 |
-
PDF = tempfile.NamedTemporaryFile(suffix=".pdf")
|
114 |
-
PDF
|
115 |
-
|
|
|
|
|
116 |
|
117 |
|
118 |
_CLASSES = [
|
@@ -139,7 +144,7 @@ _CLASSES = [
|
|
139 |
DATASETS = OrderedDict(
|
140 |
{
|
141 |
# "rvl_cdip": load_dataset("bdpc/rvl_cdip_mp", split="test", streaming=True),
|
142 |
-
"rvl_cdip_N": load_dataset("bdpc/rvl_cdip_n_mp", split="test"
|
143 |
}
|
144 |
)
|
145 |
|
@@ -166,6 +171,7 @@ The first time that the app is launched, it will download the datasets, which ca
|
|
166 |
For fastest response, choose the rvl_cdip_N dataset, which is considerably smaller to iterate over.
|
167 |
"""
|
168 |
|
|
|
169 |
iface = gr.Interface(
|
170 |
fn=main,
|
171 |
inputs=sliders,
|
|
|
98 |
timestamp = time.time()
|
99 |
seed = int(timestamp * 1000) % 1000000
|
100 |
|
101 |
+
try:
|
102 |
+
shuffled_dataset = DATASETS[dataset].shuffle(buffer_size=10, seed=seed)
|
103 |
+
except: # lazy
|
104 |
+
shuffled_dataset = DATASETS[dataset].shuffle(seed=seed)
|
105 |
|
106 |
# first get PDF file
|
107 |
for sample in shuffled_dataset:
|
|
|
113 |
grid = pdf_to_grid(BytesIO(pdf_path))
|
114 |
if grid is None:
|
115 |
continue
|
116 |
+
PDF = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
|
117 |
+
with PDF as tmp_file:
|
118 |
+
# pdf_path.to_file(tmp_file.name)
|
119 |
+
tmp_file.write(pdf_path)
|
120 |
+
return filelabel, grid, tmp_file.name
|
121 |
|
122 |
|
123 |
_CLASSES = [
|
|
|
144 |
DATASETS = OrderedDict(
|
145 |
{
|
146 |
# "rvl_cdip": load_dataset("bdpc/rvl_cdip_mp", split="test", streaming=True),
|
147 |
+
"rvl_cdip_N": load_dataset("bdpc/rvl_cdip_n_mp", split="test"),
|
148 |
}
|
149 |
)
|
150 |
|
|
|
171 |
For fastest response, choose the rvl_cdip_N dataset, which is considerably smaller to iterate over.
|
172 |
"""
|
173 |
|
174 |
+
# main("rvl_cdip_N", "letter")
|
175 |
iface = gr.Interface(
|
176 |
fn=main,
|
177 |
inputs=sliders,
|