Spaces:

ReefNet
/

reefnet-demo

Sleeping

shakesbeardz

Update .gitattributes to track large and binary files with Git LFS

b40e563 about 2 months ago

4.63 kB

	import io
	import boto3
	import requests
	import numpy as np
	import polars as pl
	from PIL import Image
	from botocore.config import Config
	import logging

	logger = logging.getLogger(__name__)

	# S3 for sample images
	my_config = Config(
	region_name='us-east-1'
	)
	s3_client = boto3.client('s3', config=my_config)

	# Set basepath for EOL pages for info
	EOL_URL = "https://eol.org/pages/"
	RANKS = ["kingdom", "phylum", "class", "order", "family", "genus", "species"]

	def get_sample(df, pred_taxon, rank):
	'''
	Function to retrieve a sample image of the predicted taxon and EOL page link for more info.

	Parameters:
	-----------
	df : DataFrame
	DataFrame with all sample images listed and their filepaths (in "file_path" column).
	pred_taxon : str
	Predicted taxon of the uploaded image.
	rank : int
	Index of rank in RANKS chosen for prediction.

	Returns:
	--------
	img : PIL.Image
	Sample image of predicted taxon for display.
	eol_page : str
	URL to EOL page for the taxon (may be a lower rank, e.g., species sample).
	'''
	logger.info(f"Getting sample for taxon: {pred_taxon} at rank: {rank}")
	try:
	filepath, eol_page_id, full_name, is_exact = get_sample_data(df, pred_taxon, rank)
	except Exception as e:
	logger.error(f"Error retrieving sample data: {e}")
	return None, f"We encountered the following error trying to retrieve a sample image: {e}."
	if filepath is None:
	logger.warning(f"No sample image found for taxon: {pred_taxon}")
	return None, f"Sorry, our EOL images do not include {pred_taxon}."

	# Get sample image of selected individual
	try:
	img_src = s3_client.generate_presigned_url('get_object',
	Params={'Bucket': 'treeoflife-10m-sample-images',
	'Key': filepath}
	)
	img_resp = requests.get(img_src)
	img = Image.open(io.BytesIO(img_resp.content))
	full_eol_url = EOL_URL + eol_page_id
	if is_exact:
	eol_page = f"<p>Check out the EOL entry for {pred_taxon} to learn more: <a href={full_eol_url} target='_blank'>{full_eol_url}</a>.</p>"
	else:
	eol_page = f"<p>Check out an example EOL entry within {pred_taxon} to learn more: {full_name} <a href={full_eol_url} target='_blank'>{full_eol_url}</a>.</p>"
	logger.info(f"Successfully retrieved sample image and EOL page for {pred_taxon}")
	return img, eol_page
	except Exception as e:
	logger.error(f"Error retrieving sample image: {e}")
	return None, f"We encountered the following error trying to retrieve a sample image: {e}."

	def get_sample_data(df, pred_taxon, rank):
	'''
	Function to randomly select a sample individual of the given taxon and provide associated native location.

	Parameters:
	-----------
	df : DataFrame
	DataFrame with all sample images listed and their filepaths (in "file_path" column).
	pred_taxon : str
	Predicted taxon of the uploaded image.
	rank : int
	Index of rank in RANKS chosen for prediction.

	Returns:
	--------
	filepath : str
	Filepath of selected sample image for predicted taxon.
	eol_page_id : str
	EOL page ID associated with predicted taxon for more information.
	full_name : str
	Full taxonomic name of the selected sample.
	is_exact : bool
	Flag indicating if the match is exact (i.e., with empty lower ranks).
	'''
	for idx in range(rank + 1):
	taxon = RANKS[idx]
	target_taxon = pred_taxon.split(" ")[idx]
	df = df.filter(pl.col(taxon) == target_taxon)

	if df.shape[0] == 0:
	return None, np.nan, "", False

	# First, try to find entries with empty lower ranks
	exact_df = df
	for lower_rank in RANKS[rank + 1:]:
	exact_df = exact_df.filter((pl.col(lower_rank).is_null()) \| (pl.col(lower_rank) == ""))

	if exact_df.shape[0] > 0:
	df_filtered = exact_df.sample()
	full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0))
	return df_filtered["file_path"][0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, True

	# If no exact matches, return any entry with the specified rank
	df_filtered = df.sample()
	full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0)) + " " + " ".join(df_filtered.select(RANKS[rank+1:]).row(0))
	return df_filtered["file_path"][0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, False