Dolly-v2

Runtime error

App Files Files Community

Lenery

RamAnanth1 commited on Apr 24, 2023

Commit

c08e081

0 Parent(s):

Duplicate from RamAnanth1/Dolly-v2

Browse files

Co-authored-by: Ram Ananth <[email protected]>

Files changed (5) hide show

.gitattributes +34 -0
README.md +13 -0
app.py +133 -0
instruct_pipeline.py +158 -0
requirements.txt +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Dolly V2
+emoji: 🐠
+colorFrom: purple
+colorTo: indigo
+sdk: gradio
+sdk_version: 3.24.1
+app_file: app.py
+pinned: false
+duplicated_from: RamAnanth1/Dolly-v2
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,133 @@

+from __future__ import annotations
+from typing import Iterable
+import gradio as gr
+from gradio.themes.base import Base
+from gradio.themes.utils import colors, fonts, sizes
+from instruct_pipeline import InstructionTextGenerationPipeline
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import torch
+theme = gr.themes.Monochrome(
+    primary_hue="indigo",
+    secondary_hue="blue",
+    neutral_hue="slate",
+    radius_size=gr.themes.sizes.radius_sm,
+    font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
+)
+tokenizer = AutoTokenizer.from_pretrained("databricks/dolly-v2-12b", padding_side="left")
+model = AutoModelForCausalLM.from_pretrained("databricks/dolly-v2-12b", device_map="auto", load_in_8bit=True)
+generate_text = InstructionTextGenerationPipeline(model=model, tokenizer=tokenizer)
+#generate_text = pipeline(model="databricks/dolly-v2-12b", torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
+def generate(instruction):
+    response = generate_text(instruction)
+    result = ""
+    for word in response.split(" "):
+        result += word + " "
+        yield result
+examples = [
+    "Instead of making a peanut butter and jelly sandwich, what else could I combine peanut butter with in a sandwich? Give five ideas",
+    "How do I make a campfire?",
+    "Write me a tweet about the release of Dolly 2.0, a new LLM",
+    "Explain to me the difference between nuclear fission and fusion.",
+    "I'm selling my Nikon D-750, write a short blurb for my ad."
+]
+def process_example(args):
+    for x in generate(args):
+        pass
+    return x
+css = ".generating {visibility: hidden}"
+# Based on the gradio theming guide and borrowed from https://huggingface.co/spaces/shivi/dolly-v2-demo
+class SeafoamCustom(Base):
+    def __init__(
+        self,
+        *,
+        primary_hue: colors.Color | str = colors.emerald,
+        secondary_hue: colors.Color | str = colors.blue,
+        neutral_hue: colors.Color | str = colors.blue,
+        spacing_size: sizes.Size | str = sizes.spacing_md,
+        radius_size: sizes.Size | str = sizes.radius_md,
+        font: fonts.Font
+        | str
+        | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("Quicksand"),
+            "ui-sans-serif",
+            "sans-serif",
+        ),
+        font_mono: fonts.Font
+        | str
+        | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("IBM Plex Mono"),
+            "ui-monospace",
+            "monospace",
+        ),
+    ):
+        super().__init__(
+            primary_hue=primary_hue,
+            secondary_hue=secondary_hue,
+            neutral_hue=neutral_hue,
+            spacing_size=spacing_size,
+            radius_size=radius_size,
+            font=font,
+            font_mono=font_mono,
+        )
+        super().set(
+            button_primary_background_fill="linear-gradient(90deg, *primary_300, *secondary_400)",
+            button_primary_background_fill_hover="linear-gradient(90deg, *primary_200, *secondary_300)",
+            button_primary_text_color="white",
+            button_primary_background_fill_dark="linear-gradient(90deg, *primary_600, *secondary_800)",
+            block_shadow="*shadow_drop_lg",
+            button_shadow="*shadow_drop_lg",
+            input_background_fill="zinc",
+            input_border_color="*secondary_300",
+            input_shadow="*shadow_drop",
+            input_shadow_focus="*shadow_drop_lg",
+        )
+seafoam = SeafoamCustom()
+with gr.Blocks(theme=seafoam, analytics_enabled=False, css=css) as demo:
+    with gr.Column():
+        gr.Markdown(
+            """ ## Dolly 2.0
+            Dolly 2.0 is a 12B parameter language model based on the EleutherAI pythia model family and fine-tuned exclusively on a new, high-quality human generated instruction following dataset, crowdsourced among Databricks employees. For more details, please refer to the [model card](https://huggingface.co/databricks/dolly-v2-12b)
+            Type in the box below and click the button to generate answers to your most pressing questions!
+      """
+        )
+        gr.HTML("<p>You can duplicate this Space to run it privately without a queue for shorter queue times  : <a style='display:inline-block' href='https://huggingface.co/spaces/RamAnanth1/Dolly-v2?duplicate=true'><img src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14' alt='Duplicate Space'></a> </p>")
+        with gr.Row():
+            with gr.Column(scale=3):
+                instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input")
+                with gr.Box():
+                    gr.Markdown("**Answer**")
+                    output = gr.Markdown(elem_id="q-output")
+                submit = gr.Button("Generate", variant="primary")
+                gr.Examples(
+                    examples=examples,
+                    inputs=[instruction],
+                    cache_examples=False,
+                    fn=process_example,
+                    outputs=[output],
+                )
+    submit.click(generate, inputs=[instruction], outputs=[output])
+    instruction.submit(generate, inputs=[instruction], outputs=[output])
+demo.queue(concurrency_count=16).launch(debug=True)

instruct_pipeline.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import logging
+import re
+import numpy as np
+from transformers import Pipeline, PreTrainedTokenizer
+logger = logging.getLogger(__name__)
+INSTRUCTION_KEY = "### Instruction:"
+RESPONSE_KEY = "### Response:"
+END_KEY = "### End"
+INTRO_BLURB = (
+    "Below is an instruction that describes a task. Write a response that appropriately completes the request."
+)
+# This is the prompt that is used for generating responses using an already trained model.  It ends with the response
+# key, where the job of the model is to provide the completion that follows it (i.e. the response itself).
+PROMPT_FOR_GENERATION_FORMAT = """{intro}
+{instruction_key}
+{instruction}
+{response_key}
+""".format(
+    intro=INTRO_BLURB,
+    instruction_key=INSTRUCTION_KEY,
+    instruction="{instruction}",
+    response_key=RESPONSE_KEY,
+)
+def get_special_token_id(tokenizer: PreTrainedTokenizer, key: str) -> int:
+    """Gets the token ID for a given string that has been added to the tokenizer as a special token.
+    When training, we configure the tokenizer so that the sequences like "### Instruction:" and "### End" are
+    treated specially and converted to a single, new token.  This retrieves the token ID each of these keys map to.
+    Args:
+        tokenizer (PreTrainedTokenizer): the tokenizer
+        key (str): the key to convert to a single token
+    Raises:
+        RuntimeError: if more than one ID was generated
+    Returns:
+        int: the token ID for the given key
+    """
+    token_ids = tokenizer.encode(key)
+    if len(token_ids) > 1:
+        raise ValueError(f"Expected only a single token for '{key}' but found {token_ids}")
+    return token_ids[0]
+class InstructionTextGenerationPipeline(Pipeline):
+    def __init__(
+        self, *args, do_sample: bool = True, max_new_tokens: int = 256, top_p: float = 0.92, top_k: int = 0, **kwargs
+    ):
+        super().__init__(*args, do_sample=do_sample, max_new_tokens=max_new_tokens, top_p=top_p, top_k=top_k, **kwargs)
+    def _sanitize_parameters(self, return_instruction_text=False, **generate_kwargs):
+        preprocess_params = {}
+        # newer versions of the tokenizer configure the response key as a special token.  newer versions still may
+        # append a newline to yield a single token.  find whatever token is configured for the response key.
+        tokenizer_response_key = next(
+            (token for token in self.tokenizer.additional_special_tokens if token.startswith(RESPONSE_KEY)), None
+        )
+        response_key_token_id = None
+        end_key_token_id = None
+        if tokenizer_response_key:
+            try:
+                response_key_token_id = get_special_token_id(self.tokenizer, tokenizer_response_key)
+                end_key_token_id = get_special_token_id(self.tokenizer, END_KEY)
+                # Ensure generation stops once it generates "### End"
+                generate_kwargs["eos_token_id"] = end_key_token_id
+            except ValueError:
+                pass
+        forward_params = generate_kwargs
+        postprocess_params = {
+            "response_key_token_id": response_key_token_id,
+            "end_key_token_id": end_key_token_id,
+            "return_instruction_text": return_instruction_text,
+        }
+        return preprocess_params, forward_params, postprocess_params
+    def preprocess(self, instruction_text, **generate_kwargs):
+        prompt_text = PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction_text)
+        inputs = self.tokenizer(
+            prompt_text,
+            return_tensors="pt",
+        )
+        inputs["prompt_text"] = prompt_text
+        inputs["instruction_text"] = instruction_text
+        return inputs
+    def _forward(self, model_inputs, **generate_kwargs):
+        input_ids = model_inputs["input_ids"]
+        attention_mask = model_inputs.get("attention_mask", None)
+        generated_sequence = self.model.generate(
+            input_ids=input_ids.to(self.model.device),
+            attention_mask=attention_mask,
+            pad_token_id=self.tokenizer.pad_token_id,
+            **generate_kwargs,
+        )[0].cpu()
+        instruction_text = model_inputs.pop("instruction_text")
+        return {"generated_sequence": generated_sequence, "input_ids": input_ids, "instruction_text": instruction_text}
+    def postprocess(self, model_outputs, response_key_token_id, end_key_token_id, return_instruction_text):
+        sequence = model_outputs["generated_sequence"]
+        instruction_text = model_outputs["instruction_text"]
+        # The response will be set to this variable if we can identify it.
+        decoded = None
+        # If we have token IDs for the response and end, then we can find the tokens and only decode between them.
+        if response_key_token_id and end_key_token_id:
+            # Find where "### Response:" is first found in the generated tokens.  Considering this is part of the
+            # prompt, we should definitely find it.  We will return the tokens found after this token.
+            response_pos = None
+            response_positions = np.where(sequence == response_key_token_id)[0]
+            if len(response_positions) == 0:
+                logger.warn(f"Could not find response key {response_key_token_id} in: {sequence}")
+            else:
+                response_pos = response_positions[0]
+            if response_pos:
+                # Next find where "### End" is located.  The model has been trained to end its responses with this
+                # sequence (or actually, the token ID it maps to, since it is a special token).  We may not find
+                # this token, as the response could be truncated.  If we don't find it then just return everything
+                # to the end.  Note that even though we set eos_token_id, we still see the this token at the end.
+                end_pos = None
+                end_positions = np.where(sequence == end_key_token_id)[0]
+                if len(end_positions) > 0:
+                    end_pos = end_positions[0]
+                decoded = self.tokenizer.decode(sequence[response_pos + 1 : end_pos]).strip()
+        else:
+            # Otherwise we'll decode everything and use a regex to find the response and end.
+            fully_decoded = self.tokenizer.decode(sequence)
+            # The response appears after "### Response:".  The model has been trained to append "### End" at the
+            # end.
+            m = re.search(r"#+\s*Response:\s*(.+?)#+\s*End", fully_decoded, flags=re.DOTALL)
+            if m:
+                decoded = m.group(1).strip()
+            else:
+                # The model might not generate the "### End" sequence before reaching the max tokens.  In this case,
+                # return everything after "### Response:".
+                m = re.search(r"#+\s*Response:\s*(.+)", fully_decoded, flags=re.DOTALL)
+                if m:
+                    decoded = m.group(1).strip()
+                else:
+                    logger.warn(f"Failed to find response in:\n{fully_decoded}")
+        if return_instruction_text:
+            return {"instruction_text": instruction_text, "generated_text": decoded}
+        return decoded

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+accelerate>=0.12.0
+transformers[torch]==4.25.1
+bitsandbytes