co2_estimator / app.py
osanseviero's picture
Update app.py
00b0cb2 verified
import json
import os
import gradio as gr
import pandas as pd
from PIL import Image
from google import genai
# Client and prompt setup
client = genai.Client(api_key=os.getenv('GOOGLE_API_KEY'))
model_name = "gemini-2.0-flash-exp" # Change to other models, but be careful as response might be with different structure
safety_settings = [
genai.types.SafetySetting(
category="HARM_CATEGORY_DANGEROUS_CONTENT",
threshold="BLOCK_ONLY_HIGH",
),
]
bounding_box_system_instructions = """Return bounding boxes as a JSON array with labels, CO2 estimate, and an explanation. Never return masks or code fencing. Limit to 5 objects."""
prompt = """Provide an estimation of how much CO2 is involved in all activities in this picture. Give CO2 in grams.
As examples, think of transport, smoking, meat, and other similar emission activities.
Do not provide actions that don't have CO2 emissions.
Be comprehensive, but don't list more than 10 objects. Detect the 2D bounding boxes of these activities,
including the label, the CO2 gram quantity, and a short explanation explaining the estimation
for each activity.
"""
def parse_json(json_output):
# Based on https://github.com/google-gemini/cookbook/blob/main/gemini-2/spatial_understanding.ipynb
lines = json_output.splitlines()
for i, line in enumerate(lines):
if line == "```json":
json_output = "\n".join(lines[i+1:]) # Remove everything before "```json"
json_output = json_output.split("```")[0] # Remove everything after the closing "```"
break # Exit the loop once "```json" is found
return json.loads(json_output)
def parse_info(image, json_data):
width, height = image.size
df_data = []
boxes_with_labels = []
# Iterate over each detected action actions
for action in json_data:
box_2d = action.get("box_2d")
label = action.get("label")
co2_grams = action.get("co2_grams")
explanation = action.get("explanation")
if not all([box_2d, label, co2_grams, explanation]):
continue
# Convert normalized coordinates to absolute coordinates
abs_y1 = int(box_2d[0] / 1000 * height)
abs_x1 = int(box_2d[1] / 1000 * width)
abs_y2 = int(box_2d[2] / 1000 * height)
abs_x2 = int(box_2d[3] / 1000 * width)
abs_x1, abs_x2 = min(abs_x1, abs_x2), max(abs_x1, abs_x2)
abs_y1, abs_y2 = min(abs_y1, abs_y2), max(abs_y1, abs_y2)
boxes_with_labels.append([(abs_x1, abs_y1, abs_x2, abs_y2), label])
df_data.append({
"label": label,
"co2": co2_grams,
"explanation": explanation
})
return boxes_with_labels, pd.DataFrame(df_data)
def estimate_co2(image):
resized_image = image.resize(
(1024, int(1024 * image.size[1] / image.size[0])),
Image.Resampling.LANCZOS
)
# Get resuls from model
response = client.models.generate_content(
model=model_name,
contents=[prompt, resized_image],
config = genai.types.GenerateContentConfig(
system_instruction=bounding_box_system_instructions,
temperature=0.4,
safety_settings=safety_settings
)
)
json_data = parse_json(response.text)
boxes_with_labels, data = parse_info(resized_image, json_data)
return [resized_image, boxes_with_labels], data
iface = gr.Interface(
fn=estimate_co2,
inputs=gr.Image(type="pil"),
outputs=[
gr.AnnotatedImage(),
gr.Dataframe(
label="CO2 Estimation Data",
interactive=False,
headers=["co2", "item_name", "rationale"]
)
],
title="CO2 Estimation from Images",
description="Upload an image and get an estimation of the CO2 involved in the activities depicted.",
article="This is a very rough estimate, and can be misleading or factually inaccurate. Take this as a demo project and not as scientific/exact results."
#examples=[
# ["example.jpeg"] # Add an example image if you have one
#],
)
markdown = """# CO2 Estimation
Upload an image and get an **estimation** of the CO2 involved in the activities depicted. This is a very rough estimate, and can be misleading or factually inaccurate. Take this as a demo project and not as scientific/exact results.
Powered by [the Gemini API](https://ai.google.dev/gemini-api/docs) and [AI Studio](https://aistudio.google.com/).
"""
with gr.Blocks() as demo:
with gr.Row():
gr.Markdown(markdown)
with gr.Row():
input_image = gr.Image(type="pil", label="Input Image")
output_image = gr.AnnotatedImage(label="Output Image")
with gr.Row():
output_dataframe = gr.Dataframe(
label="CO2 Estimated Data",
interactive=False,
headers=["co2", "item_name", "rationale"]
)
gr.Examples(
examples=[
"car_smoke.jpg",
"grill.jpeg",
],
inputs=input_image,
label="Try these examples:",
)
input_image.change(
fn=estimate_co2,
inputs=input_image,
outputs=[output_image, output_dataframe]
)
demo.launch()