|
import gradio as gr |
|
from huggingface_hub import InferenceClient |
|
import PyPDF2 |
|
from PIL import Image, ImageEnhance, ImageFont, ImageDraw |
|
import cv2 |
|
import numpy as np |
|
from pydub import AudioSegment |
|
from langdetect import detect |
|
from rembg import remove |
|
import torch |
|
import torchvision |
|
from torchvision.models.detection import fasterrcnn_resnet50_fpn |
|
from torchvision.transforms import functional as F |
|
import tempfile |
|
import time |
|
import requests |
|
import zipfile |
|
import os |
|
import torchaudio |
|
from transformers import pipeline |
|
|
|
|
|
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") |
|
|
|
|
|
model = fasterrcnn_resnet50_fpn(pretrained=True) |
|
model.eval() |
|
|
|
|
|
def process_pdf(file): |
|
pdf_reader = PyPDF2.PdfReader(file) |
|
text = "" |
|
for page in pdf_reader.pages: |
|
text += page.extract_text() |
|
return text |
|
|
|
|
|
def process_image(file): |
|
image = Image.open(file) |
|
return f"Изображение: {image.size[0]}x{image.size[1]} пикселей, формат: {image.format}" |
|
|
|
|
|
def process_video(file): |
|
cap = cv2.VideoCapture(file.name) |
|
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
duration = frame_count / cap.get(cv2.CAP_PROP_FPS) |
|
cap.release() |
|
return f"Видео: длительность {duration:.2f} секунд, {frame_count} кадров" |
|
|
|
|
|
def process_audio(file): |
|
audio = AudioSegment.from_file(file) |
|
return f"Аудио: длительность {len(audio) / 1000:.2f} секунд, частота {audio.frame_rate} Гц" |
|
|
|
|
|
def process_txt(file): |
|
with open(file.name, "r", encoding="utf-8") as f: |
|
text = f.read() |
|
return text |
|
|
|
|
|
def remove_background(image): |
|
if image is None: |
|
return None |
|
output = remove(image) |
|
return output |
|
|
|
|
|
def count_objects(image): |
|
if image is None: |
|
return "Изображение не загружено." |
|
|
|
|
|
img = Image.open(image.name).convert("RGB") |
|
img_tensor = F.to_tensor(img).unsqueeze(0) |
|
|
|
|
|
with torch.no_grad(): |
|
predictions = model(img_tensor) |
|
|
|
|
|
num_objects = len(predictions[0]['labels']) |
|
return f"Количество объектов на изображении: {num_objects}" |
|
|
|
|
|
def convert_image(image, target_format): |
|
if image is None: |
|
return None |
|
img = Image.open(image.name) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{target_format.lower()}") as tmp_file: |
|
img.save(tmp_file, format=target_format) |
|
return tmp_file.name |
|
|
|
|
|
def detect_language(text): |
|
try: |
|
return detect(text) |
|
except: |
|
return "en" |
|
|
|
|
|
def respond( |
|
message, |
|
history: list[tuple[str, str]], |
|
system_message, |
|
max_tokens, |
|
temperature, |
|
top_p, |
|
file=None, |
|
): |
|
|
|
if file is not None: |
|
file_type = file.name.split(".")[-1].lower() |
|
if file_type == "pdf": |
|
file_info = process_pdf(file) |
|
elif file_type in ["jpg", "jpeg", "png", "bmp", "gif"]: |
|
file_info = process_image(file) |
|
elif file_type in ["mp4", "avi", "mov"]: |
|
file_info = process_video(file) |
|
elif file_type in ["mp3", "wav", "ogg"]: |
|
file_info = process_audio(file) |
|
elif file_type == "txt": |
|
file_info = process_txt(file) |
|
else: |
|
file_info = "Неизвестный тип файла" |
|
message += f"\n[Пользователь загрузил файл: {file.name}]\n{file_info}" |
|
|
|
|
|
language = detect_language(message) |
|
|
|
|
|
if language == "ru": |
|
system_message = "Вы дружелюбный чат-бот, который понимает русский язык." |
|
else: |
|
system_message = "You are a friendly chatbot." |
|
|
|
|
|
messages = [{"role": "system", "content": system_message}] |
|
|
|
|
|
for val in history: |
|
if val[0]: |
|
messages.append({"role": "user", "content": val[0]}) |
|
if val[1]: |
|
messages.append({"role": "assistant", "content": val[1]}) |
|
|
|
|
|
messages.append({"role": "user", "content": message}) |
|
|
|
|
|
response = "" |
|
for message in client.chat_completion( |
|
messages, |
|
max_tokens=max_tokens, |
|
stream=True, |
|
temperature=temperature, |
|
top_p=top_p, |
|
): |
|
token = message.choices[0].delta.content |
|
response += token |
|
yield response |
|
|
|
|
|
def reset_chat(): |
|
return [] |
|
|
|
|
|
def analyze_txt(file): |
|
if file is None: |
|
return "Файл не загружен." |
|
text = process_txt(file) |
|
return f"Содержимое файла:\n{text}" |
|
|
|
|
|
def generate_text_image(text, font_file, font_size=40, bg_color="#FFFFFF", text_color="#000000"): |
|
if not text: |
|
return "Введите текст." |
|
|
|
|
|
if font_file is None: |
|
font = ImageFont.load_default() |
|
else: |
|
|
|
if font_file.name.endswith(".zip"): |
|
with zipfile.ZipFile(font_file.name, 'r') as zip_ref: |
|
zip_ref.extractall("temp_fonts") |
|
font_files = [f for f in os.listdir("temp_fonts") if f.endswith(('.ttf', '.otf'))] |
|
if not font_files: |
|
return "В архиве нет файлов шрифтов (.ttf, .otf)." |
|
font_path = os.path.join("temp_fonts", font_files[0]) |
|
font = ImageFont.truetype(font_path, font_size) |
|
else: |
|
font = ImageFont.truetype(font_file.name, font_size) |
|
|
|
|
|
image = Image.new("RGB", (800, 200), color=bg_color) |
|
draw = ImageDraw.Draw(image) |
|
|
|
|
|
text_width, text_height = draw.textsize(text, font=font) |
|
x = (800 - text_width) / 2 |
|
y = (200 - text_height) / 2 |
|
|
|
|
|
draw.text((x, y), text, font=font, fill=text_color) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_file: |
|
image.save(tmp_file.name) |
|
return tmp_file.name |
|
|
|
|
|
def enhance_image(image, contrast_factor=1.5, brightness_factor=1.2, sharpness_factor=1.5): |
|
if image is None: |
|
return None |
|
|
|
img = Image.open(image.name) |
|
|
|
|
|
enhancer = ImageEnhance.Contrast(img) |
|
img = enhancer.enhance(contrast_factor) |
|
|
|
|
|
enhancer = ImageEnhance.Brightness(img) |
|
img = enhancer.enhance(brightness_factor) |
|
|
|
|
|
enhancer = ImageEnhance.Sharpness(img) |
|
img = enhancer.enhance(sharpness_factor) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file: |
|
img.save(tmp_file.name) |
|
return tmp_file.name |
|
|
|
|
|
def trim_audio(file, start_time: float, end_time: float): |
|
if file is None: |
|
return "Файл не загружен." |
|
|
|
audio = AudioSegment.from_file(file.name) |
|
trimmed_audio = audio[start_time * 1000:end_time * 1000] |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: |
|
trimmed_audio.export(tmp_file.name, format="wav") |
|
return tmp_file.name |
|
|
|
|
|
def summarize_text(text: str, max_length: int = 100): |
|
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") |
|
summary = summarizer(text, max_length=max_length, min_length=30, do_sample=False) |
|
return summary[0]['summary_text'] |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
|
gr.HTML(""" |
|
<div style="text-align: center;"> |
|
<img src="https://huggingface.co/spaces/Felguk/Felguk-v0/resolve/main/hd_crop_4c480c6a2c7e176289b0dfcb64a30603_67753ddec8355.png" alt="Felguk Logo" style="width: 300px;"> |
|
</div> |
|
""") |
|
|
|
gr.Markdown("Чат-бот Felguk v0. Отвечает на том же языке, на котором вы написали. Задавайте вопросы и загружайте файлы (PDF, изображения, видео, аудио, txt)!") |
|
|
|
|
|
with gr.Row(): |
|
new_chat_button = gr.Button("Новый чат", variant="secondary") |
|
|
|
|
|
with gr.Tab("Felguk Tools"): |
|
|
|
with gr.Tab("Анализатор текста"): |
|
gr.Markdown("## Анализатор текста") |
|
txt_file = gr.File(label="Загрузите txt файл", file_types=[".txt"]) |
|
txt_output = gr.Textbox(label="Содержимое файла", interactive=False) |
|
analyze_button = gr.Button("Анализировать") |
|
analyze_button.click(fn=analyze_txt, inputs=txt_file, outputs=txt_output) |
|
|
|
|
|
with gr.Tab("Удаление фона"): |
|
gr.Markdown("## Удаление фона с изображения") |
|
image_input = gr.Image(label="Загрузите изображение", type="pil") |
|
image_output = gr.Image(label="Результат (без фона)", type="pil") |
|
remove_bg_button = gr.Button("Удалить фон") |
|
remove_bg_button.click(fn=remove_background, inputs=image_input, outputs=image_output) |
|
|
|
|
|
with gr.Tab("Numage"): |
|
gr.Markdown("## Numage: Подсчет объектов на изображении") |
|
numage_input = gr.File(label="Загрузите изображение", file_types=["image"]) |
|
numage_output = gr.Textbox(label="Результат", interactive=False) |
|
numage_button = gr.Button("Определить количество объектов") |
|
numage_button.click(fn=count_objects, inputs=numage_input, outputs=numage_output) |
|
|
|
|
|
with gr.Tab("Конвертер изображений"): |
|
gr.Markdown("## Конвертер изображений") |
|
img_input = gr.File(label="Загрузите изображение", file_types=["image"]) |
|
img_format = gr.Dropdown( |
|
choices=["JPEG", "PNG", "BMP", "GIF", "TIFF"], |
|
label="Выберите формат для конвертации", |
|
value="JPEG" |
|
) |
|
img_output = gr.File(label="Результат конвертации") |
|
convert_button = gr.Button("Конвертировать") |
|
convert_button.click(fn=convert_image, inputs=[img_input, img_format], outputs=img_output) |
|
|
|
|
|
with gr.Tab("Felguk-ImageEnhancer"): |
|
gr.Markdown("## Felguk-ImageEnhancer: Улучшение качества изображений") |
|
image_input = gr.File(label="Загрузите изображение", file_types=["image"]) |
|
contrast_slider = gr.Slider(label="Контраст", minimum=0.5, maximum=2.0, value=1.5) |
|
brightness_slider = gr.Slider(label="Яркость", minimum=0.5, maximum=2.0, value=1.2) |
|
sharpness_slider = gr.Slider(label="Резкость", minimum=0.5, maximum=2.0, value=1.5) |
|
enhance_button = gr.Button("Улучшить изображение") |
|
image_output = gr.Image(label="Результат", type="filepath") |
|
enhance_button.click(fn=enhance_image, inputs=[image_input, contrast_slider, brightness_slider, sharpness_slider], outputs=image_output) |
|
|
|
|
|
with gr.Tab("Felguk-AudioTrimmer"): |
|
gr.Markdown("## Felguk-AudioTrimmer: Обрезка аудиофайлов") |
|
audio_input = gr.File(label="Загрузите аудиофайл", file_types=[".mp3", ".wav"]) |
|
start_time = gr.Number(label="Начало обрезки (сек)", value=0) |
|
end_time = gr.Number(label="Конец обрезки (сек)", value=10) |
|
trim_button = gr.Button("Обрезать аудио") |
|
audio_output = gr.Audio(label="Результат") |
|
trim_button.click(fn=trim_audio, inputs=[audio_input, start_time, end_time], outputs=audio_output) |
|
|
|
|
|
with gr.Tab("Felguk-TextSummarizer"): |
|
gr.Markdown("## Felguk-TextSummarizer: Суммаризация текста") |
|
text_input = gr.Textbox(label="Введите текст для суммаризации", lines=5) |
|
max_length_slider = gr.Slider(label="Максимальная длина текста", minimum=30, maximum=200, value=100) |
|
summarize_button = gr.Button("Суммаризировать") |
|
summary_output = gr.Textbox(label="Результат", interactive=False) |
|
summarize_button.click(fn=summarize_text, inputs=[text_input, max_length_slider], outputs=summary_output) |
|
|
|
|
|
chat_interface = gr.ChatInterface( |
|
respond, |
|
additional_inputs=[ |
|
gr.Textbox(value="You are a friendly Chatbot.", label="System message"), |
|
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), |
|
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), |
|
gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.95, |
|
step=0.05, |
|
label="Top-p (nucleus sampling)", |
|
), |
|
gr.File(label="Загрузите файл (опционально)"), |
|
], |
|
) |
|
|
|
|
|
new_chat_button.click(fn=reset_chat, outputs=chat_interface.chatbot) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |