aka7774 commited on
Commit
00847b8
·
verified ·
1 Parent(s): b16615c

Update fn.py

Browse files
Files changed (1) hide show
  1. fn.py +11 -1
fn.py CHANGED
@@ -14,8 +14,11 @@ import numpy as np
14
  from scipy.spatial.distance import cdist
15
  from duckduckgo_search import DDGS
16
  from bs4 import BeautifulSoup
 
17
 
18
- model_name = "intfloat/multilingual-e5-large"
 
 
19
  input_dir = 'input'
20
  vectors_dir = 'vectors'
21
 
@@ -54,6 +57,13 @@ def bs4(url):
54
 
55
  return text
56
 
 
 
 
 
 
 
 
57
  def upload(name, filename, content):
58
  os.makedirs(f"{input_dir}/{name}", exist_ok=True)
59
  srcpath = f"{input_dir}/{name}/{filename}"
 
14
  from scipy.spatial.distance import cdist
15
  from duckduckgo_search import DDGS
16
  from bs4 import BeautifulSoup
17
+ from markdownify import markdownify
18
 
19
+ model_name = "cl-nagoya/ruri-large"
20
+ # "mixedbread-ai/mxbai-embed-large-v1"
21
+ # "intfloat/multilingual-e5-large"
22
  input_dir = 'input'
23
  vectors_dir = 'vectors'
24
 
 
57
 
58
  return text
59
 
60
+ def md(url):
61
+ html = requests.get(url).text.replace("\r", '')
62
+ mdtxt = markdownify(html)
63
+ mdtxt = re.sub("\n+", "\n", mdtxt, flags=(re.MULTILINE | re.DOTALL))
64
+
65
+ return mdtxt
66
+
67
  def upload(name, filename, content):
68
  os.makedirs(f"{input_dir}/{name}", exist_ok=True)
69
  srcpath = f"{input_dir}/{name}/{filename}"