Spaces:
Runtime error
Runtime error
File size: 3,577 Bytes
48d28d5 6a00324 745b80e 9e0ab0f 6d99166 48d28d5 98b274f 9e0ab0f 745b80e 96664a2 2ebfb5d 745b80e 16e66e6 04e7c76 16e66e6 04e7c76 16e66e6 10520fd c1157cb 24890d4 16e66e6 f59d886 b5bcb4b f0dea8f 92abcaa ab39a91 f1da055 0ac4d87 4185abb 6d16879 f1da055 41c8dfe dc2b889 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import gradio as gr
import sentence_transformers
from sentence_transformers import SentenceTransformer
import torch
from sentence_transformers.util import semantic_search
import pandas as pd
model = SentenceTransformer('JoBeer/eng-distelBERT-se-autogen') #gart-labor/eng-distilBERT-se-eclass
corpus = pd.read_json('corpus.jsonl', lines = True, encoding = 'utf-8')
def predict(name, description):
text = 'Description: '+ description + '; Name: ' + name
query_embedding = model.encode(text, convert_to_tensor=True)
corpus_embeddings = torch.Tensor(corpus["embeddings"])
output = sentence_transformers.util.semantic_search(query_embedding, corpus_embeddings, top_k = 5)
preferedName1 = corpus.iloc[output[0][0].get('corpus_id'),2]
definition1 = corpus.iloc[output[0][0].get('corpus_id'),1]
IRDI1 = corpus.iloc[output[0][0].get('corpus_id'),4]
score1 = output[0][0].get('score')
if score1 < 0.5:
reliable = 'FALSE'
else:
reliable = 'TRUE'
scoreOutput = reliable + ' (score = ' + str(score1) + ')'
return preferedName1, definition1, IRDI1, scoreOutput
interface = gr.Interface(fn = predict,
inputs = [gr.Textbox(label="Name:", placeholder="Name of the Pump Property", lines=1), gr.Textbox(label="Description:", placeholder="Description of the Pump Property", lines=1)],
outputs = [gr.Textbox(label = 'preferedName'),gr.Textbox(label = 'definition'), gr.Textbox(label = 'IDRI'),gr.Textbox(label = 'prediction reliable')],
#outputs = [gr.Dataframe(row_count = (5, "fixed"), col_count=(3, "fixed"), label="Predictions", headers=['ECLASS preferedName', 'ECLASS IRDI', 'simularity score'])],
examples = [['Device type', 'describing a set of common specific characteristics in products or goods'], ['Item type','the type of product, an item can be assigned to'],
['Nominal power','power being consumed by or dissipated within an electric component as a variable'], ['Power consumption', 'power that is typically taken from the auxiliary power supply when the device is operating normally']],
#theme = 'huggingface',
title = 'ECLASS-Property-Search',
description = "This is a semantic search algorithm that maps unknown pump properties to the ECLASS standard. It is created by the GART-labortory ot the cologne university of applied science for the usecase of semantic interoperable asset administration shells (industry 4.0).",
article = """<center><Strong><font size="5em">Functionality and further development of the demo</font></strong></center>
This demo is based on a sentence-transformer <a href="https://huggingface.co/gart-labor/eng-distilBERT-se-eclass">language model</a>, which is trained on a ECLASS specific <a href="https://huggingface.co/datasets/gart-labor/eclassTrainST">dataset</a>. This dataset consists of manually generated paraphrases of ECLASS pump properties. During training the language model learns to map these paraphrases to the eclass pump properties. In future work, this approach can be extended to additional ECLASS properties (e.g. heating systems, ventilation, etc.) and thus a general language model can be trained. To reduce the manual effort, the integration of chatGPT is suitable for the automated creation of the paraphrases required for training.
<br>
<br>
<center><img src='https://imagizer.imageshack.com/img923/6324/WOXHiX.png' width=900p></center>""")
interface.launch() |