Spaces:
No application file
No application file
from typing import Union, List | |
import hashlib | |
import os | |
def get_md5sum(data: str, length: int = None, blocksize: int=2**23) -> str: | |
"""获取文件的hash值前几位作为文件唯一标识符 | |
Args: | |
path (str): 文件路径 | |
length (int, optional): hash前多少位. Defaults to None. | |
blocksize (int, optional):分块读取,块的大小. Defaults to None. | |
Returns: | |
str: hash值前length位 | |
""" | |
if isinstance(data, str): | |
if os.path.isfile(data): | |
signature = get_md5sum_of_file(path=data, length=length, blocksize=blocksize) | |
else: | |
signature = get_signature_of_string(data, length=length) | |
else: | |
raise ValueError( | |
"only support str or file path str, but given {}".format(type(data)) | |
) | |
return signature | |
def get_md5sum_of_file(path: str, length: int = None, blocksize: int=2**23) -> str: | |
"""获取文件的hash值前几位作为文件唯一标识符 | |
Args: | |
path (str): 文件路径 | |
length (int, optional): hash前多少位. Defaults to None. | |
blocksize (int, optional):分块读取,块的大小. Defaults to None. | |
Returns: | |
str: hash值前length位 | |
""" | |
# sig = (os.popen('md5sum {}'.format(path))).readlines()[0].split(' ')[0] | |
m = hashlib.md5() | |
with open(path, "rb") as f: | |
while chunk := f.read(blocksize): | |
m.update(chunk) | |
sig = m.hexdigest() | |
if length is not None: | |
sig = sig[:length] | |
return sig | |
def get_signature_of_string(string: str, length: int = None) -> str: | |
"""cal signature of string | |
Args: | |
string (str): target string | |
length (int, optional): only return the first length character of signature. Defaults to None. | |
Returns: | |
str: signature of string | |
""" | |
sig = hashlib.md5(string.encode()).hexdigest() | |
if length is not None: | |
sig = sig[:length] | |
return sig | |