jannisborn commited on
Commit
c564047
·
unverified ·
1 Parent(s): e3475d1
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +51 -50
  3. model_cards/examples.csv +5 -2
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: GT4SD - HuggingFace transformers
3
  emoji: 💡
4
  colorFrom: green
5
  colorTo: blue
 
1
  ---
2
+ title: GT4SD - Patent Generative Transformers
3
  emoji: 💡
4
  colorFrom: green
5
  colorTo: blue
app.py CHANGED
@@ -2,14 +2,11 @@ import logging
2
  import pathlib
3
  import gradio as gr
4
  import pandas as pd
5
- from gt4sd.algorithms.generation.hugging_face import (
6
- HuggingFaceCTRLGenerator,
7
- HuggingFaceGenerationAlgorithm,
8
- HuggingFaceGPT2Generator,
9
- HuggingFaceTransfoXLGenerator,
10
- HuggingFaceOpenAIGPTGenerator,
11
- HuggingFaceXLMGenerator,
12
- HuggingFaceXLNetGenerator,
13
  )
14
  from gt4sd.algorithms.registry import ApplicationsRegistry
15
 
@@ -18,42 +15,36 @@ logger = logging.getLogger(__name__)
18
  logger.addHandler(logging.NullHandler())
19
 
20
  MODEL_FN = {
21
- "HuggingFaceCTRLGenerator": HuggingFaceCTRLGenerator,
22
- "HuggingFaceGPT2Generator": HuggingFaceGPT2Generator,
23
- "HuggingFaceTransfoXLGenerator": HuggingFaceTransfoXLGenerator,
24
- "HuggingFaceOpenAIGPTGenerator": HuggingFaceOpenAIGPTGenerator,
25
- "HuggingFaceXLMGenerator": HuggingFaceXLMGenerator,
26
- "HuggingFaceXLNetGenerator": HuggingFaceXLNetGenerator,
27
  }
28
 
29
 
30
  def run_inference(
31
  model_type: str,
 
 
 
32
  prompt: str,
33
- length: float,
34
- temperature: float,
35
- prefix: str,
36
- k: float,
37
  p: float,
38
- repetition_penalty: float,
39
  ):
40
- model = model_type.split("_")[0]
41
- version = model_type.split("_")[1]
42
-
43
- if model not in MODEL_FN.keys():
44
- raise ValueError(f"Model type {model} not supported")
45
- config = MODEL_FN[model](
46
- algorithm_version=version,
47
- prompt=prompt,
48
- length=length,
49
- temperature=temperature,
50
- repetition_penalty=repetition_penalty,
51
- k=k,
52
- p=p,
53
- prefix=prefix,
54
- )
55
 
56
- model = HuggingFaceGenerationAlgorithm(config)
 
 
 
 
 
 
 
 
 
 
 
57
  text = list(model.sample(1))[0]
58
 
59
  return text
@@ -64,8 +55,8 @@ if __name__ == "__main__":
64
  # Preparation (retrieve all available algorithms)
65
  all_algos = ApplicationsRegistry.list_available()
66
  algos = [
67
- x["algorithm_application"] + "_" + x["algorithm_version"]
68
- for x in list(filter(lambda x: "HuggingFace" in x["algorithm_name"], all_algos))
69
  ]
70
 
71
  # Load metadata
@@ -81,30 +72,40 @@ if __name__ == "__main__":
81
  with open(metadata_root.joinpath("description.md"), "r") as f:
82
  description = f.read()
83
 
 
 
 
 
 
 
 
84
  demo = gr.Interface(
85
  fn=run_inference,
86
- title="HuggingFace language models",
87
  inputs=[
 
 
 
88
  gr.Dropdown(
89
- algos,
90
- label="Language model",
91
- value="HuggingFaceGPT2Generator_gpt2",
92
  ),
93
  gr.Textbox(
94
  label="Text prompt",
95
- placeholder="I'm a stochastic parrot.",
96
- lines=1,
97
- ),
98
- gr.Slider(minimum=5, maximum=100, value=20, label="Maximal length", step=1),
99
- gr.Slider(
100
- minimum=0.6, maximum=1.5, value=1.1, label="Decoding temperature"
101
  ),
102
  gr.Textbox(
103
- label="Prefix", placeholder="Some prefix (before the prompt)", lines=1
 
 
 
 
 
104
  ),
105
  gr.Slider(minimum=2, maximum=500, value=50, label="Top-k", step=1),
106
- gr.Slider(minimum=0.5, maximum=1, value=1.0, label="Decoding-p", step=1),
107
- gr.Slider(minimum=0.5, maximum=5, value=1.0, label="Repetition penalty"),
108
  ],
109
  outputs=gr.Textbox(label="Output"),
110
  article=article,
 
2
  import pathlib
3
  import gradio as gr
4
  import pandas as pd
5
+ from gt4sd.algorithms.generation.pgt import (
6
+ PGT,
7
+ PGTCoherenceChecker,
8
+ PGTEditor,
9
+ PGTGenerator,
 
 
 
10
  )
11
  from gt4sd.algorithms.registry import ApplicationsRegistry
12
 
 
15
  logger.addHandler(logging.NullHandler())
16
 
17
  MODEL_FN = {
18
+ "PGTGenerator": PGTGenerator,
19
+ "PGTEditor": PGTEditor,
20
+ "PGTCoherenceChecker": PGTCoherenceChecker,
 
 
 
21
  }
22
 
23
 
24
  def run_inference(
25
  model_type: str,
26
+ generator_task: str,
27
+ editor_task: str,
28
+ checker_task: str,
29
  prompt: str,
30
+ second_prompt: str,
31
+ length: int,
32
+ k: int,
 
33
  p: float,
 
34
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ kwargs = {"max_length": length, "top_k": k, "top_p": p}
37
+
38
+ if model_type == "PGTGenerator":
39
+ config = PGTGenerator(task=generator_task, input_text=prompt, **kwargs)
40
+ elif model_type == "PGTEditor":
41
+ config = PGTEditor(input_type=editor_task, input_text=prompt, **kwargs)
42
+ elif model_type == "PGTCoherenceChecker":
43
+ config = PGTCoherenceChecker(
44
+ coherence_type=checker_task, input_a=prompt, input_b=second_prompt, **kwargs
45
+ )
46
+
47
+ model = PGT(config)
48
  text = list(model.sample(1))[0]
49
 
50
  return text
 
55
  # Preparation (retrieve all available algorithms)
56
  all_algos = ApplicationsRegistry.list_available()
57
  algos = [
58
+ x["algorithm_application"]
59
+ for x in list(filter(lambda x: "PGT" in x["algorithm_name"], all_algos))
60
  ]
61
 
62
  # Load metadata
 
72
  with open(metadata_root.joinpath("description.md"), "r") as f:
73
  description = f.read()
74
 
75
+ gen_tasks = [
76
+ "title-to-abstract",
77
+ "abstract-to-title",
78
+ "abstract-to-claim",
79
+ "claim-to-abstract",
80
+ ]
81
+
82
  demo = gr.Interface(
83
  fn=run_inference,
84
+ title="Patent Generative Transformer",
85
  inputs=[
86
+ gr.Dropdown(algos, label="Model type", value="PGTGenerator"),
87
+ gr.Dropdown(gen_tasks, label="Generator task", value="title-to-abstract"),
88
+ gr.Dropdown(["abstract", "claim"], label="Editor task", value="abstract"),
89
  gr.Dropdown(
90
+ ["title-abstract", "title-claim", "abstract-claim"],
91
+ label="Checker task",
92
+ value="title-abstract",
93
  ),
94
  gr.Textbox(
95
  label="Text prompt",
96
+ placeholder="Artificial intelligence and machine learning infrastructure",
97
+ lines=5,
 
 
 
 
98
  ),
99
  gr.Textbox(
100
+ label="Secondary text prompt (only for coherence checker)",
101
+ placeholder="",
102
+ lines=1
103
+ ),
104
+ gr.Slider(
105
+ minimum=5, maximum=1024, value=512, label="Maximal length", step=1
106
  ),
107
  gr.Slider(minimum=2, maximum=500, value=50, label="Top-k", step=1),
108
+ gr.Slider(minimum=0.5, maximum=1, value=1.0, label="Top-p", step=1),
 
109
  ],
110
  outputs=gr.Textbox(label="Output"),
111
  article=article,
model_cards/examples.csv CHANGED
@@ -1,2 +1,5 @@
1
- HuggingFaceGPT2Generator_gpt2, The role of generative models is,20,1.1,,50,1,1
2
- HuggingFaceOpenAIGPTGenerator_openai-gpt, The best country in the world is,10,0.9,,50,1,1
 
 
 
 
1
+ PGTGenerator|title-to-abstract|||Artificial intelligence and machine learning infrastructure||512|50|1.0
2
+ PGTGenerator|title-to-abstract|||Artificial intelligence and machine learning infrastructure||756|20|0.95
3
+ PGTEditor||abstract||In one step of a method for infusing an [MASK], the infusion fluid is pumped through a fluid delivery line of an infusion system. In another step, measurements are taken with at least one sensor connected to the infusion system. In an additional step, an air determination is determined with at least one processor. The air determination is related to air in the fluid delivery line. The air determination is based on the measurements taken by the at least one sensor. The air determination is further based on: (1) [MASK] information regarding the infusion of the infusion fluid; or (2) multi-channel filtering of the measurements from the at least one sensor or non-linear mapping of the measurements from the at least one sensor; and statistical process control charts applied to the multi-channel filtered measurements or applied to the non-linear mapped measurements.|512|50|1
4
+ PGTCoherenceChecker|||title-abstract|Artificial intelligence and machine learning infrastructure|An artificial intelligence and machine learning infrastructure system, including: one or more storage systems comprising, respectively, one or more storage devices; and one or more graphical processing units, wherein the graphical processing units are configured to communicate with the one or more storage systems over a communication fabric; where the one or more storage systems, the one or more graphical processing units, and the communication fabric are implemented within a single chassis.|512|50|1
5
+ PGTCoherenceChecker|||title-abstract|Analog image processing|An artificial intelligence and machine learning infrastructure system for image classification, including: one or more storage systems comprising, respectively, one or more storage devices; and one or more graphical processing units, wherein the graphical processing units are configured to communicate with the one or more storage systems over a communication fabric; where the one or more storage systems, the one or more graphical processing units, and the communication fabric are implemented within a single chassis.|512|50|1