Spaces:
Sleeping
Sleeping
File size: 2,802 Bytes
8fdf34e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
from __future__ import annotations
import os
import llama_index
from llama_index import (
LLMPredictor,
GPTTreeIndex,
Document,
GPTSimpleVectorIndex,
SimpleDirectoryReader,
RefinePrompt,
QuestionAnswerPrompt,
GPTListIndex,
PromptHelper,
)
from pathlib import Path
from docx import Document as DocxDocument
from tqdm import tqdm
import re
from langchain.llms import OpenAIChat, OpenAI
from llama_index.composability import ComposableGraph
from IPython.display import Markdown, display
import json
from llama_index import Prompt
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple, Type
import logging
import sys
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple, Type
import logging
import json
import gradio as gr
# import openai
import os
import traceback
import requests
# import markdown
import csv
import mdtex2html
from pypinyin import lazy_pinyin
from presets import *
from llama_func import *
import tiktoken
from tqdm import tqdm
import colorama
import os
from llama_index import (
GPTSimpleVectorIndex,
GPTTreeIndex,
GPTKeywordTableIndex,
GPTListIndex,
)
from llama_index import SimpleDirectoryReader, download_loader
from llama_index import (
Document,
LLMPredictor,
PromptHelper,
QuestionAnswerPrompt,
RefinePrompt,
)
from langchain.llms import OpenAIChat, OpenAI
from duckduckgo_search import ddg
import datetime
def compact_text_chunks(self, prompt: Prompt, text_chunks: List[str]) -> List[str]:
logging.debug("Compacting text chunks...πππ")
combined_str = [c.strip() for c in text_chunks if c.strip()]
combined_str = [f"[{index+1}] {c}" for index, c in enumerate(combined_str)]
combined_str = "\n\n".join(combined_str)
# resplit based on self.max_chunk_overlap
text_splitter = self.get_text_splitter_given_prompt(prompt, 1, padding=1)
return text_splitter.split_text(combined_str)
def postprocess(
self, y: List[Tuple[str | None, str | None]]
) -> List[Tuple[str | None, str | None]]:
"""
Parameters:
y: List of tuples representing the message and response pairs. Each message and response should be a string, which may be in Markdown format.
Returns:
List of tuples representing the message and response. Each message and response will be a string of HTML.
"""
if y is None:
return []
for i, (message, response) in enumerate(y):
y[i] = (
# None if message is None else markdown.markdown(message),
# None if response is None else markdown.markdown(response),
None if message is None else message,
None if response is None else mdtex2html.convert(response, extensions=['fenced_code','codehilite','tables']),
)
return y
|