Spaces:
Sleeping
Sleeping
Update fn.py
Browse files
fn.py
CHANGED
@@ -14,8 +14,11 @@ import numpy as np
|
|
14 |
from scipy.spatial.distance import cdist
|
15 |
from duckduckgo_search import DDGS
|
16 |
from bs4 import BeautifulSoup
|
|
|
17 |
|
18 |
-
model_name = "
|
|
|
|
|
19 |
input_dir = 'input'
|
20 |
vectors_dir = 'vectors'
|
21 |
|
@@ -54,6 +57,13 @@ def bs4(url):
|
|
54 |
|
55 |
return text
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
def upload(name, filename, content):
|
58 |
os.makedirs(f"{input_dir}/{name}", exist_ok=True)
|
59 |
srcpath = f"{input_dir}/{name}/{filename}"
|
|
|
14 |
from scipy.spatial.distance import cdist
|
15 |
from duckduckgo_search import DDGS
|
16 |
from bs4 import BeautifulSoup
|
17 |
+
from markdownify import markdownify
|
18 |
|
19 |
+
model_name = "cl-nagoya/ruri-large"
|
20 |
+
# "mixedbread-ai/mxbai-embed-large-v1"
|
21 |
+
# "intfloat/multilingual-e5-large"
|
22 |
input_dir = 'input'
|
23 |
vectors_dir = 'vectors'
|
24 |
|
|
|
57 |
|
58 |
return text
|
59 |
|
60 |
+
def md(url):
|
61 |
+
html = requests.get(url).text.replace("\r", '')
|
62 |
+
mdtxt = markdownify(html)
|
63 |
+
mdtxt = re.sub("\n+", "\n", mdtxt, flags=(re.MULTILINE | re.DOTALL))
|
64 |
+
|
65 |
+
return mdtxt
|
66 |
+
|
67 |
def upload(name, filename, content):
|
68 |
os.makedirs(f"{input_dir}/{name}", exist_ok=True)
|
69 |
srcpath = f"{input_dir}/{name}/{filename}"
|