spdraptor commited on
Commit
e2e727d
·
1 Parent(s): d42634f

file added

Browse files
Files changed (3) hide show
  1. app.py +35 -4
  2. modules/masking_module.py +168 -0
  3. requirements.txt +10 -0
app.py CHANGED
@@ -1,7 +1,38 @@
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
1
+ #Script added by SPDraptor
2
+
3
+ from typing import Optional
4
+ import subprocess
5
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
6
+ import torch
7
+ print("cuda present = ",torch.cuda.is_available())
8
+ import os
9
+ import sys
10
  import gradio as gr
11
+ from PIL import Image
12
+ from modules import masking_module
13
+
14
+
15
+
16
+ DESCRIPTION = "Welcome to Raptor APIs"
17
+
18
+ css = """
19
+ #output {
20
+ height: 500px;
21
+ overflow: auto;
22
+ border: 1px solid #ccc;
23
+ }
24
+ """
25
 
26
+ with gr.Blocks(css=css) as demo:
27
+ gr.Markdown(DESCRIPTION)
28
+ with gr.Tab(label="OBJ_mask"):
29
+ with gr.Row():
30
+ with gr.Column():
31
+ image = gr.Image(label="Input main Picture")
32
+ image_object = gr.Textbox(label="object name")
33
+ mask_btn = gr.Button(value="createMask")
34
+ mask_btn.click(masking_module.masking_process,input=[image,image_object],output=output_mask,api_name="masking_step")
35
+ with gr.Column():
36
+ output_mask = gr.Image(label="mask")
37
 
38
+ demo.launch(debug=True)
 
modules/masking_module.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Script added by SPDraptor
2
+
3
+ import copy
4
+ import numpy as np
5
+ import spaces
6
+ import torch
7
+ from PIL import Image, ImageDraw
8
+ from transformers import AutoProcessor, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
9
+ from typing import Any
10
+ import supervision as sv
11
+ from sam2.build_sam import build_sam2, build_sam2_video_predictor
12
+ from sam2.sam2_image_predictor import SAM2ImagePredictor
13
+
14
+ device = torch.device('cuda')
15
+
16
+ model_id = 'microsoft/Florence-2-large'
17
+
18
+ models_dict = {
19
+ 'Florence_model':AutoModelForCausalLM.from_pretrained(model_id,
20
+ trust_remote_code=True,
21
+ attn_implementation="flash_attention_2",
22
+ device_map=device).eval(),
23
+ 'Florence_processor':AutoProcessor.from_pretrained(model_id, trust_remote_code=True),
24
+ }
25
+
26
+ SAM_CHECKPOINT = "/home/user/app/sam2_hiera_large.pt"
27
+ SAM_CONFIG = "sam2_hiera_l.yaml"
28
+
29
+ def load_sam_image_model(
30
+ device: torch.device,
31
+ config: str = SAM_CONFIG,
32
+ checkpoint: str = SAM_CHECKPOINT
33
+ ) -> SAM2ImagePredictor:
34
+ model = build_sam2(config, checkpoint, device=device)
35
+ return SAM2ImagePredictor(sam_model=model)
36
+
37
+ SAM_IMAGE_MODEL = load_sam_image_model(device=device)
38
+
39
+ def run_sam_inference(
40
+ model: Any,
41
+ image: Image,
42
+ detections: sv.Detections
43
+ ) -> sv.Detections:
44
+ image = np.array(image.convert("RGB"))
45
+ model.set_image(image)
46
+ print(type(detections.xyxy),detections.xyxy)
47
+ if detections.xyxy.size == 0:
48
+ return {
49
+ 'code': 400,
50
+ 'data':'null',
51
+ 'message':'The AI couldn’t detect the object you want to mask.'
52
+ }
53
+
54
+ mask, score, _ = model.predict(box=detections.xyxy, multimask_output=False)
55
+
56
+ # dirty fix; remove this later
57
+ if len(mask.shape) == 4:
58
+ mask = np.squeeze(mask)
59
+
60
+ detections.mask = mask.astype(bool)
61
+ return {
62
+ 'code': 200,
63
+ 'data':detections,
64
+ 'message':'The AI couldn’t detect the object you want to mask.'
65
+ }
66
+
67
+ def florence2(image,task_prompt, text_input=None):
68
+ """
69
+ Calling the Microsoft Florence2 model
70
+ """
71
+ model = models_dict['Florence_model']
72
+ processor = models_dict['Florence_processor']
73
+ # print(image)
74
+ if text_input is None:
75
+ prompt = task_prompt
76
+ else:
77
+ prompt = task_prompt + text_input
78
+
79
+ input_florence = processor(text=prompt, images=image, return_tensors="pt").to(torch.float16).to("cuda")
80
+ print(input_florence)
81
+ generated_ids = model.generate(
82
+ input_ids=input_florence["input_ids"],
83
+ pixel_values=input_florence["pixel_values"],
84
+ max_new_tokens=1024,
85
+ early_stopping=False,
86
+ do_sample=False,
87
+ num_beams=3,
88
+ )
89
+ generated_text = processor.batch_decode(generated_ids,
90
+ skip_special_tokens=False)[0]
91
+ parsed_answer = processor.post_process_generation(
92
+ generated_text,
93
+ task=task_prompt,
94
+ image_size=(image.width, image.height))
95
+
96
+ return parsed_answer
97
+
98
+ def draw_MASK(image, prediction, fill_mask=False):
99
+ """
100
+ Draws segmentation masks with polygons on an image.
101
+
102
+ Parameters:
103
+ - image_path: Path to the image file.
104
+ - prediction: Dictionary containing 'polygons' and 'labels' keys.
105
+ 'polygons' is a list of lists, each containing vertices of a polygon.
106
+ 'labels' is a list of labels corresponding to each polygon.
107
+ - fill_mask: Boolean indicating whether to fill the polygons with color.
108
+ """
109
+ width=image.width
110
+ height=image.height
111
+ new_image = Image.new("RGB", (width, height), color="black")
112
+ draw = ImageDraw.Draw(new_image)
113
+ scale = 1
114
+
115
+ for polygons, label in zip(prediction['polygons'], prediction['labels']):
116
+ color = "white"
117
+ fill_color = "white" if fill_mask else None
118
+
119
+ for _polygon in polygons:
120
+ _polygon = np.array(_polygon).reshape(-1, 2)
121
+ if len(_polygon) < 3:
122
+ print('Invalid polygon:', _polygon)
123
+ continue
124
+
125
+ _polygon = (_polygon * scale).reshape(-1).tolist()
126
+ if fill_mask:
127
+ draw.polygon(_polygon, outline=color, fill=fill_color)
128
+ else:
129
+ draw.polygon(_polygon, outline=color)
130
+ draw.text((_polygon[0] + 8, _polygon[1] + 2), label, fill=color)
131
+
132
+ return new_image
133
+
134
+ # @spaces.GPU
135
+ def masking_process(image,obj):
136
+ # task_prompt = '<REGION_TO_SEGMENTATION>'
137
+ # # task_prompt = '<OPEN_VOCABULARY_DETECTION>'
138
+ # print(type(task_prompt),type(obj))
139
+ image = Image.open(image.file).convert("RGB")
140
+
141
+ # results = florence2(image,task_prompt, text_input=obj)
142
+ # output_image = copy.deepcopy(image)
143
+ # img=draw_MASK(output_image,
144
+ # results['<REGION_TO_SEGMENTATION>'],
145
+ # fill_mask=True)
146
+ # mask=img.convert('1')
147
+ task_prompt = '<OPEN_VOCABULARY_DETECTION>'
148
+
149
+ # image = Image.open("/content/tiger.jpeg").convert("RGB")
150
+
151
+ # obj = "Tiger"
152
+
153
+ Florence_results = florence2(image,task_prompt, text_input=obj)
154
+
155
+ detections = sv.Detections.from_lmm(
156
+ lmm=sv.LMM.FLORENCE_2,
157
+ result=Florence_results,
158
+ resolution_wh=image.size
159
+ )
160
+ response = run_sam_inference(SAM_IMAGE_MODEL, image, detections)
161
+ if response['code'] == 400:
162
+ return response
163
+ else:
164
+ detections2=response['data']
165
+ mask = Image.fromarray(detections2.mask[0])
166
+ response['data']=mask
167
+ torch.cuda.empty_cache()
168
+ return response
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ tqdm
2
+ einops
3
+ spaces
4
+ timm
5
+ transformers
6
+ samv2
7
+ gradio
8
+ supervision
9
+ opencv-python
10
+ pytest