Adding the necessary points · Edgar404/Candy

	@@ -0,0 +1,99 @@

+# -*- coding: utf-8 -*-
+"""Demo.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1Icb8zeoaudyTDOKM1QySNay1cXzltRAp
+"""
+import gradio as gr
+from PIL import Image
+import re
+import torch
+import torch.nn as nn
+from warnings import simplefilter
+simplefilter('ignore')
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+# Seting up the model
+from transformers import  DonutProcessor, VisionEncoderDecoderModel
+print('Loading the base model ....')
+base_model = VisionEncoderDecoderModel.from_pretrained('Edgar404/donut-shivi-recognition')
+base_processor = DonutProcessor.from_pretrained('Edgar404/donut-shivi-recognition')
+print('Loading complete')
+print('Loading the optimized model ....')
+optimized_model = VisionEncoderDecoderModel.from_pretrained('Edgar404/donut-shivi-cheques_KD_320', torch_dtype = torch.bfloat16 )
+optimized_processor = DonutProcessor.from_pretrained('Edgar404/donut-shivi-cheques_KD_320')
+print('Loading complete')
+# setting
+def process_image(image , mode = 'optimized' ):
+    """ Function that takes an image and perform an OCR using the model DonUT via the task document
+    parsing
+    parameters
+    __________
+    image : a machine readable image of class PIL or numpy"""
+    model = optimized_model if mode == 'optimized' else base_model
+    processor = optimized_processor if mode == 'optimized' else base_processor
+    d_type = torch.bfloat16 if (mode == 'optimized' & device =='cuda') else torch.float32
+    model.to(device)
+    model.eval()
+    task_prompt = "<s_cord-v2>"
+    decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
+    pixel_values = processor(image, return_tensors="pt").pixel_values
+    outputs = model.generate(
+        pixel_values.to(device , dtype  = d_type),
+        decoder_input_ids=decoder_input_ids.to(device),
+        max_length=model.decoder.config.max_position_embeddings,
+        pad_token_id=processor.tokenizer.pad_token_id,
+        eos_token_id=processor.tokenizer.eos_token_id,
+        use_cache=True,
+        bad_words_ids=[[processor.tokenizer.unk_token_id]],
+        return_dict_in_generate=True,
+    )
+    sequence = processor.batch_decode(outputs.sequences)[0]
+    sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+    sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()
+    output = processor.token2json(sequence)
+    return output
+def image_classifier(image , mode):
+    return process_image(image , mode)
+examples_list = [['./test_images/test_0.jpg' ,"base"] ,
+                 ['./test_images/test_1.jpg','base'],
+                 ['./test_images/test_2.jpg' ,"base"],
+                 ['./test_images/test_3.jpg','base'],
+                 ['./test_images/test_4.jpg','base'],
+                 ['./test_images/test_5.jpg' ,"base"],
+                 ['./test_images/test_6.jpg' ,"base"],
+                 ['./test_images/test_7.jpg','base'],
+                 ['./test_images/test_8.jpg','base'],
+                 ['./test_images/test_9.jpg','base']
+                 ]
+demo = gr.Interface(fn=image_classifier, inputs=["image",
+                                                 gr.Radio(["base" , "optimized"], label="mode")],
+                     outputs="text",
+                    examples = examples_list )
+demo.launch(share = True , debug = True)

@@ -1,10 +1,10 @@
 ---
-title: Candy Prototype
-emoji: 🌖
-colorFrom: pink
-colorTo: purple
 sdk: gradio
-sdk_version: 4.36.0
 app_file: app.py
 pinned: false
 ---

 ---
+title: Donut Prototype
+emoji: 🏃
+colorFrom: red
+colorTo: red
 sdk: gradio
+sdk_version: 4.24.0
 app_file: app.py
 pinned: false
 ---

	@@ -0,0 +1,5 @@