import os import PyPDF2 from pptx import Presentation import subprocess from io import BytesIO import sys import requests hf_token = os.environ['MY_HF_TOKEN'] API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf" headers = {"Authorization": "Bearer "+hf_token} def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() sys.path.append("/home/user/app") # Function to generate text2ppt input prompt def generate_text2ppt_input_prompt(input_type, input_value, input_pages): header = """ Assume you are a designer creating a PPT using markdown syntax, and write a PPT of %s pages. +++ Summarize the content or link below in markdown language, adhering to the rules in ===, and refer to the slide examples in ~~~. +++ """ % input_pages summary_value = "" if input_type == "Link": summary_value += input_value summary_value += "\n" elif input_type == "Text": summary_value += input_value summary_value += "\n" elif input_type == "PDF": with open(input_value, 'rb') as pdf_file: pdf_reader = PyPDF2.PdfReader(pdf_file) num_pages = len(pdf_reader.pages) # Convert the content of each page to a string. text = "" for page_num in range(num_pages): page = pdf_reader.pages[page_num] text += page.extract_text() summary_value += text summary_value += "\n" else: print("ERROR: Invalid input") rule_value = """ === - Always use '---' as a slide divider. - Write factually only about the content or link provided. - Design and arrange the slides diversely with appropriate shapes, images(![Image](Image link), https://unsplash.com/ko/images/stock/non-copyrighted for actual use), tables(|-|), quotes(>), emphasis(bold, ``), emojis(https://kr.piliapp.com/twitter-symbols/), icons (https://kr.piliapp.com/symbol/#popular). - Use emojis only once in every two pages, and use various other designs. - When using images and tables, specify the size considering the page size so that all the text content appears. - Make Slide 1 the title, for a total of %s pages. - Write the content of the PPT richly in markdown. - Don't explain slide by slide, just write the code. - Don't write using the content of the example, just refer to the format. ~~~ # Slide Title ![Image link](https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo-with-title.png) - This is 🤗**TEXT2PPT service PA!** using llama2. - Converts `link`,`text`, `PDF` input or upload into PPT. """ % input_pages return header + summary_value + rule_value # Function to execute text2ppt def text2ppt(input_prompt, input_theme): output = query({"inputs": "You are a kind helpful PPT designer. "+input_prompt, "parameters": { "return_full_text": False, "max_new_tokens": 1000}}) reply = output[0]['generated_text'] md_text = reply[4:] if reply[:3] == "---" else reply md_text_list = md_text.split('\n') f = open("text2ppt_input.md", 'w') for i in range(0, len(md_text_list)): data = md_text_list[i].strip() + "\n" f.write(data) f.close() if input_theme == 'default': subprocess.run(["/home/user/app/pandoc-2.14.2/bin/pandoc", "/home/user/app/text2ppt_input.md", "-t", "pptx", "-o", "/home/user/app/text2ppt_output.pptx"], capture_output=True) else: ppt_theme = "--reference-doc=/home/user/app/template/"+input_theme+".pptx" subprocess.run(["/home/user/app/pandoc-2.14.2/bin/pandoc", "/home/user/app/text2ppt_input.md", "-t", "pptx", ppt_theme, "-o", "/home/user/app/text2ppt_output.pptx"], capture_output=True) def ppt2script(input_file, input_type): if input_type=="PDF": with open(input_file, 'rb') as pdf_file: pdf_reader = PyPDF2.PdfReader(pdf_file) num_pages = len(pdf_reader.pages) # Convert the content of each page to a string. text = "" for page_num in range(num_pages): page = pdf_reader.pages[page_num] text += "[PAGE_NUM " + str(page_num + 1) + "]" text += page.extract_text() else: prs = Presentation(input_file) text = "" page_num = 0 for slide in prs.slides: text += "[PAGE_NUM " + str(page_num + 1) + "]" page_num += 1 for shape in slide.shapes: if not shape.has_text_frame: continue for paragraph in shape.text_frame.paragraphs: for run in paragraph.runs: text += run.text header = """ You are an assistant helping with PPT presentations. ~~~Follow the rules below and write a presentation script for the PPT content below. ~~~ - When [PAGE_NUM 1], where 1 is the page number, write a presentation script for each page number. - Write only in text without using markdown language. - Add additional explanations or examples to the PPT content. --- """ input_prompt = header + text output = query({"inputs": "You are a kind helpful PPT Assistant."+input_prompt, "parameters": { "return_full_text": False, "max_new_tokens": 1000}}) reply = output[0]['generated_text'] return reply