Spaces:
Running
Running
File size: 5,031 Bytes
10364bf c2ba438 36703af c2ba438 32389f0 c2ba438 10364bf c2ba438 10364bf c2ba438 5d3a749 c2ba438 f77ed0a c2ba438 ce75747 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import json
import os
import gradio as gr
import pandas as pd
from PIL import Image
from google import genai
# Client and prompt setup
client = genai.Client(api_key=os.getenv('GOOGLE_API_KEY'))
model_name = "gemini-2.0-flash-exp" # Change to other models, but be careful as response might be with different structure
safety_settings = [
genai.types.SafetySetting(
category="HARM_CATEGORY_DANGEROUS_CONTENT",
threshold="BLOCK_ONLY_HIGH",
),
]
bounding_box_system_instructions = """Return bounding boxes as a JSON array with labels, CO2 estimate, and an explanation. Never return masks or code fencing. Limit to 5 objects."""
prompt = """Provide an estimation of how much CO2 is involved in all activities in this picture. Give CO2 in grams.
As examples, think of transport, smoking, meat, and other similar emission activities.
Do not provide actions that don't have CO2 emissions.
Be comprehensive, but don't list more than 10 objects. Detect the 2D bounding boxes of these activities,
including the label, the CO2 gram quantity, and a short explanation explaining the estimation
for each activity.
"""
def parse_json(json_output):
# Based on https://github.com/google-gemini/cookbook/blob/main/gemini-2/spatial_understanding.ipynb
lines = json_output.splitlines()
for i, line in enumerate(lines):
if line == "```json":
json_output = "\n".join(lines[i+1:]) # Remove everything before "```json"
json_output = json_output.split("```")[0] # Remove everything after the closing "```"
break # Exit the loop once "```json" is found
return json.loads(json_output)
def parse_info(image, json_data):
width, height = image.size
df_data = []
boxes_with_labels = []
# Iterate over each detected action actions
for action in json_data:
box_2d = action.get("box_2d")
label = action.get("label")
co2_grams = action.get("co2_grams")
explanation = action.get("explanation")
if not all([box_2d, label, co2_grams, explanation]):
continue
# Convert normalized coordinates to absolute coordinates
abs_y1 = int(box_2d[0] / 1000 * height)
abs_x1 = int(box_2d[1] / 1000 * width)
abs_y2 = int(box_2d[2] / 1000 * height)
abs_x2 = int(box_2d[3] / 1000 * width)
abs_x1, abs_x2 = min(abs_x1, abs_x2), max(abs_x1, abs_x2)
abs_y1, abs_y2 = min(abs_y1, abs_y2), max(abs_y1, abs_y2)
boxes_with_labels.append([(abs_x1, abs_y1, abs_x2, abs_y2), label])
df_data.append({
"label": label,
"co2": co2_grams,
"explanation": explanation
})
return boxes_with_labels, pd.DataFrame(df_data)
def estimate_co2(image):
resized_image = image.resize(
(1024, int(1024 * image.size[1] / image.size[0])),
Image.Resampling.LANCZOS
)
# Get resuls from model
response = client.models.generate_content(
model=model_name,
contents=[prompt, resized_image],
config = genai.types.GenerateContentConfig(
system_instruction=bounding_box_system_instructions,
temperature=0.4,
safety_settings=safety_settings
)
)
json_data = parse_json(response.text)
boxes_with_labels, data = parse_info(resized_image, json_data)
return [resized_image, boxes_with_labels], data
iface = gr.Interface(
fn=estimate_co2,
inputs=gr.Image(type="pil"),
outputs=[
gr.AnnotatedImage(),
gr.Dataframe(
label="CO2 Estimation Data",
interactive=False,
headers=["co2", "item_name", "rationale"]
)
],
title="CO2 Estimation from Images",
description="Upload an image and get an estimation of the CO2 involved in the activities depicted.",
article="This is a very rough estimate, and can be misleading or factually inaccurate. Take this as a demo project and not as scientific/exact results."
#examples=[
# ["example.jpeg"] # Add an example image if you have one
#],
)
markdown = """# CO2 Estimation
Upload an image and get an **estimation** of the CO2 involved in the activities depicted. This is a very rough estimate, and can be misleading or factually inaccurate. Take this as a demo project and not as scientific/exact results.
Powered by [the Gemini API](https://ai.google.dev/gemini-api/docs) and [AI Studio](https://aistudio.google.com/).
"""
with gr.Blocks() as demo:
with gr.Row():
gr.Markdown(markdown)
with gr.Row():
input_image = gr.Image(type="pil", label="Input Image")
output_image = gr.AnnotatedImage(label="Output Image")
with gr.Row():
output_dataframe = gr.Dataframe(
label="CO2 Estimated Data",
interactive=False,
headers=["co2", "item_name", "rationale"]
)
input_image.change(
fn=estimate_co2,
inputs=input_image,
outputs=[output_image, output_dataframe]
) |