Spaces:

ReefNet
/

reefnet-demo

Runtime error

App Files Files Community

shakesbeardz commited on Sep 30, 2024

Commit

b40e563

1 Parent(s): deda75b

Update .gitattributes to track large and binary files with Git LFS

Browse files

Files changed (29) hide show

.gitattributes +5 -0
.gitignore +2 -0
README.md +6 -9
app.py +334 -4
components/metadata.csv +3 -0
components/metadata_readme.md +11 -0
components/query.py +116 -0
components/sync_samples_to_s3.bash +34 -0
embed_texts.sh +12 -0
examples/Actinostola-abyssorum.png +3 -0
examples/Amanita-muscaria.jpeg +3 -0
examples/Carnegiea-gigantea.png +3 -0
examples/Felis-catus.jpeg +3 -0
examples/Onoclea-hintonii.jpg +0 -0
examples/Onoclea-sensibilis.jpg +0 -0
examples/Phoca-vitulina.png +3 -0
examples/Sarcoscypha-coccinea.jpeg +3 -0
examples/Ursus-arctos.jpeg +3 -0
examples/coral-snake.jpeg +3 -0
examples/milk-snake.png +3 -0
lib.py +170 -0
make_txt_embedding.py +193 -0
name_lookup.json +3 -0
requirements.txt +7 -0
templates.py +82 -0
test_lib.py +481 -0
txt_emb.npy +3 -0
txt_emb_species.json +3 -0
txt_emb_species.npy +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.json filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+components/metadata.csv filter=lfs diff=lfs merge=lfs -text
+txt_emb_species.json filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .venv/
2	+ __pycache__/

README.md CHANGED Viewed

@@ -1,14 +1,11 @@
 ---
-title: Reefnet Demo
-emoji: 🐨
-colorFrom: yellow
-colorTo: indigo
 sdk: gradio
-sdk_version: 4.44.0
 app_file: app.py
 pinned: false
-license: cc-by-4.0
-short_description: ReefNet Demo
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Bioclip Demo
+emoji: 🐘
+colorFrom: indigo
+colorTo: purple
 sdk: gradio
+sdk_version: 4.36.1
 app_file: app.py
 pinned: false
+license: mit
 ---

app.py CHANGED Viewed

@@ -1,7 +1,337 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+import collections
+import heapq
+import json
+import os
+import logging
 import gradio as gr
+import numpy as np
+import polars as pl
+import torch
+import torch.nn.functional as F
+from open_clip import create_model, get_tokenizer
+from torchvision import transforms
+from templates import openai_imagenet_template
+from components.query import  get_sample
+log_format = "[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s"
+logging.basicConfig(level=logging.INFO, format=log_format)
+logger = logging.getLogger()
+hf_token = os.getenv("HF_TOKEN")
+# For sample images
+METADATA_PATH = "components/metadata.csv"
+# Read page ID as int and filter out smaller ablation duplicated training split
+metadata_df = pl.read_csv(METADATA_PATH, low_memory = False)
+metadata_df = metadata_df.with_columns(pl.col("eol_page_id").cast(pl.Int64))
+model_str = "hf-hub:imageomics/bioclip"
+tokenizer_str = "ViT-B-16"
+txt_emb_npy = "txt_emb_species.npy"
+txt_names_json = "txt_emb_species.json"
+min_prob = 1e-9
+k = 5
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+preprocess_img = transforms.Compose(
+    [
+        transforms.ToTensor(),
+        transforms.Resize((224, 224), antialias=True),
+        transforms.Normalize(
+            mean=(0.48145466, 0.4578275, 0.40821073),
+            std=(0.26862954, 0.26130258, 0.27577711),
+        ),
+    ]
+)
+ranks = ("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
+open_domain_examples = [
+    ["examples/Ursus-arctos.jpeg", "Species"],
+    ["examples/Phoca-vitulina.png", "Species"],
+    ["examples/Felis-catus.jpeg", "Genus"],
+    ["examples/Sarcoscypha-coccinea.jpeg", "Order"],
+]
+zero_shot_examples = [
+    [
+        "examples/Ursus-arctos.jpeg",
+        "brown bear\nblack bear\npolar bear\nkoala bear\ngrizzly bear",
+    ],
+    ["examples/milk-snake.png", "coral snake\nmilk snake"],
+    ["examples/coral-snake.jpeg", "coral snake\nmilk snake"],
+    [
+        "examples/Carnegiea-gigantea.png",
+        "Carnegiea gigantea\nSchlumbergera opuntioides\nMammillaria albicoma",
+    ],
+    [
+        "examples/Amanita-muscaria.jpeg",
+        "Amanita fulva\nAmanita vaginata (grisette)\nAmanita calyptrata (coccoli)\nAmanita crocea\nAmanita rubescens (blusher)\nAmanita caesarea (Caesar's mushroom)\nAmanita jacksonii (American Caesar's mushroom)\nAmanita muscaria (fly agaric)\nAmanita pantherina (panther cap)",
+    ],
+    [
+        "examples/Actinostola-abyssorum.png",
+        "Animalia Cnidaria Hexacorallia Actiniaria Actinostolidae Actinostola abyssorum\nAnimalia Cnidaria Hexacorallia Actiniaria Actinostolidae Actinostola bulbosa\nAnimalia Cnidaria Hexacorallia Actiniaria Actinostolidae Actinostola callosa\nAnimalia Cnidaria Hexacorallia Actiniaria Actinostolidae Actinostola capensis\nAnimalia Cnidaria Hexacorallia Actiniaria Actinostolidae Actinostola carlgreni",
+    ],
+    [
+        "examples/Sarcoscypha-coccinea.jpeg",
+        "scarlet elf cup (coccinea)\nscharlachroter kelchbecherling (austriaca)\ncrimson cup (dudleyi)\nstalked scarlet cup (occidentalis)",
+    ],
+    [
+        "examples/Onoclea-hintonii.jpg",
+        "Onoclea attenuata\nOnoclea boryana\nOnoclea hintonii\nOnoclea intermedia\nOnoclea sensibilis",
+    ],
+    [
+        "examples/Onoclea-sensibilis.jpg",
+        "Onoclea attenuata\nOnoclea boryana\nOnoclea hintonii\nOnoclea intermedia\nOnoclea sensibilis",
+    ],
+]
+def indexed(lst, indices):
+    return [lst[i] for i in indices]
+@torch.no_grad()
+def get_txt_features(classnames, templates):
+    all_features = []
+    for classname in classnames:
+        txts = [template(classname) for template in templates]
+        txts = tokenizer(txts).to(device)
+        txt_features = model.encode_text(txts)
+        txt_features = F.normalize(txt_features, dim=-1).mean(dim=0)
+        txt_features /= txt_features.norm()
+        all_features.append(txt_features)
+    all_features = torch.stack(all_features, dim=1)
+    return all_features
+@torch.no_grad()
+def zero_shot_classification(img, cls_str: str) -> dict[str, float]:
+    classes = [cls.strip() for cls in cls_str.split("\n") if cls.strip()]
+    txt_features = get_txt_features(classes, openai_imagenet_template)
+    img = preprocess_img(img).to(device)
+    img_features = model.encode_image(img.unsqueeze(0))
+    img_features = F.normalize(img_features, dim=-1)
+    logits = (model.logit_scale.exp() * img_features @ txt_features).squeeze()
+    probs = F.softmax(logits, dim=0).to("cpu").tolist()
+    return {cls: prob for cls, prob in zip(classes, probs)}
+def format_name(taxon, common):
+    taxon = " ".join(taxon)
+    if not common:
+        return taxon
+    return f"{taxon} ({common})"
+@torch.no_grad()
+def open_domain_classification(img, rank: int, return_all=False):
+    """
+    Predicts from the entire tree of life.
+    If targeting a higher rank than species, then this function predicts among all
+    species, then sums up species-level probabilities for the given rank.
+    """
+    logger.info(f"Starting open domain classification for rank: {rank}")
+    img = preprocess_img(img).to(device)
+    img_features = model.encode_image(img.unsqueeze(0))
+    img_features = F.normalize(img_features, dim=-1)
+    logits = (model.logit_scale.exp() * img_features @ txt_emb).squeeze()
+    probs = F.softmax(logits, dim=0)
+    if rank + 1 == len(ranks):
+        topk = probs.topk(k)
+        prediction_dict = {
+            format_name(*txt_names[i]): prob for i, prob in zip(topk.indices, topk.values)
+        }
+        logger.info(f"Top K predictions: {prediction_dict}")
+        top_prediction_name = format_name(*txt_names[topk.indices[0]]).split("(")[0]
+        logger.info(f"Top prediction name: {top_prediction_name}")
+        sample_img, taxon_url = get_sample(metadata_df, top_prediction_name, rank)
+        if return_all:
+            return prediction_dict, sample_img, taxon_url
+        return prediction_dict
+    output = collections.defaultdict(float)
+    for i in torch.nonzero(probs > min_prob).squeeze():
+        output[" ".join(txt_names[i][0][: rank + 1])] += probs[i]
+    topk_names = heapq.nlargest(k, output, key=output.get)
+    prediction_dict = {name: output[name] for name in topk_names}
+    logger.info(f"Top K names for output: {topk_names}")
+    logger.info(f"Prediction dictionary: {prediction_dict}")
+    top_prediction_name = topk_names[0]
+    logger.info(f"Top prediction name: {top_prediction_name}")
+    sample_img, taxon_url = get_sample(metadata_df, top_prediction_name, rank)
+    logger.info(f"Sample image and taxon URL: {sample_img}, {taxon_url}")
+    if return_all:
+        return prediction_dict, sample_img, taxon_url
+    return prediction_dict
+def change_output(choice):
+    return gr.Label(num_top_classes=k, label=ranks[choice], show_label=True, value=None)
+if __name__ == "__main__":
+    logger.info("Starting.")
+    model = create_model(model_str, output_dict=True, require_pretrained=True)
+    model = model.to(device)
+    logger.info("Created model.")
+    model = torch.compile(model)
+    logger.info("Compiled model.")
+    tokenizer = get_tokenizer(tokenizer_str)
+    txt_emb = torch.from_numpy(np.load(txt_emb_npy, mmap_mode="r")).to(device)
+    with open(txt_names_json) as fd:
+        txt_names = json.load(fd)
+    done = txt_emb.any(axis=0).sum().item()
+    total = txt_emb.shape[1]
+    status_msg = ""
+    if done != total:
+        status_msg = f"{done}/{total} ({done / total * 100:.1f}%) indexed"
+    with gr.Blocks() as app:
+        with gr.Tab("Open-Ended"):
+            with gr.Row(variant = "panel", elem_id = "images_panel"):
+                with gr.Column():
+                    img_input = gr.Image(height = 400, sources=["upload"])
+                with gr.Column():
+                    # display sample image of top predicted taxon
+                    sample_img = gr.Image(label = "Sample Image of Predicted Taxon",
+                                        height = 400,
+                                        show_download_button = False)
+                    taxon_url = gr.HTML(label = "More Information",
+                                    elem_id = "url"
+                                    )
+            with gr.Row():
+                with gr.Column():
+                    rank_dropdown = gr.Dropdown(
+                        label="Taxonomic Rank",
+                        info="Which taxonomic rank to predict. Fine-grained ranks (genus, species) are more challenging.",
+                        choices=ranks,
+                        value="Species",
+                        type="index",
+                    )
+                    open_domain_btn = gr.Button("Submit", variant="primary")
+                with gr.Column():
+                    open_domain_output = gr.Label(
+                        num_top_classes=k,
+                        label="Prediction",
+                        show_label=True,
+                        value=None,
+                    )
+                  #  open_domain_flag_btn = gr.Button("Flag Mistake", variant="primary")
+            with gr.Row():
+                gr.Examples(
+                    examples=open_domain_examples,
+                    inputs=[img_input, rank_dropdown],
+                    cache_examples=True,
+                    fn=lambda img, rank: open_domain_classification(img, rank, return_all=False),
+                    outputs=[open_domain_output],
+                )
+            '''
+            # Flagging Code
+            open_domain_callback = gr.HuggingFaceDatasetSaver(
+                hf_token, "bioclip-demo-open-domain-mistakes", private=True
+            )
+            open_domain_callback.setup(
+                [img_input, rank_dropdown, open_domain_output],
+                flagging_dir="bioclip-demo-open-domain-mistakes/logs/flagged",
+            )
+            open_domain_flag_btn.click(
+                lambda *args: open_domain_callback.flag(args),
+                [img_input, rank_dropdown, open_domain_output],
+                None,
+                preprocess=False,
+            )
+            '''
+        with gr.Tab("Zero-Shot"):
+            with gr.Row():
+                img_input_zs = gr.Image(height = 400, sources=["upload"])
+            with gr.Row():
+                with gr.Column():
+                    classes_txt = gr.Textbox(
+                        placeholder="Canis familiaris (dog)\nFelis catus (cat)\n...",
+                        lines=3,
+                        label="Classes",
+                        show_label=True,
+                        info="Use taxonomic names where possible; include common names if possible.",
+                    )
+                    zero_shot_btn = gr.Button("Submit", variant="primary")
+                with gr.Column():
+                    zero_shot_output = gr.Label(
+                        num_top_classes=k, label="Prediction", show_label=True
+                    )
+             #       zero_shot_flag_btn = gr.Button("Flag Mistake", variant="primary")
+            with gr.Row():
+                gr.Examples(
+                    examples=zero_shot_examples,
+                    inputs=[img_input_zs, classes_txt],
+                    cache_examples=True,
+                    fn=zero_shot_classification,
+                    outputs=[zero_shot_output],
+                )
+        '''
+        # Flagging Code
+        zero_shot_callback = gr.HuggingFaceDatasetSaver(
+            hf_token, "bioclip-demo-zero-shot-mistakes", private=True
+        )
+        zero_shot_callback.setup(
+            [img_input, zero_shot_output], flagging_dir="bioclip-demo-zero-shot-mistakes/logs/flagged"
+        )
+        zero_shot_flag_btn.click(
+            lambda *args: zero_shot_callback.flag(args),
+            [img_input, zero_shot_output],
+            None,
+            preprocess=False,
+        )
+        '''
+        rank_dropdown.change(
+            fn=change_output, inputs=rank_dropdown, outputs=[open_domain_output]
+        )
+        open_domain_btn.click(
+            fn=lambda img, rank: open_domain_classification(img, rank, return_all=True),
+            inputs=[img_input, rank_dropdown],
+            outputs=[open_domain_output, sample_img, taxon_url],
+        )
+        zero_shot_btn.click(
+            fn=zero_shot_classification,
+            inputs=[img_input_zs, classes_txt],
+            outputs=zero_shot_output,
+        )
+        # Footer to point out to model and data from app page.
+        gr.Markdown(
+            """
+            For more information on the [BioCLIP Model](https://huggingface.co/imageomics/bioclip) creation, see our [BioCLIP Project GitHub](https://github.com/Imageomics/bioclip), and
+            for easier integration of BioCLIP, checkout [pybioclip](https://github.com/Imageomics/pybioclip).
+            To learn more about the data, check out our [TreeOfLife-10M Dataset](https://huggingface.co/datasets/imageomics/TreeOfLife-10M).
+            """
+        )
+    app.queue(max_size=20)
+    app.launch(share=True)

components/metadata.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8576f6ca106f35387506369a70df01fb92192a740c3b5da2a12ad8303976aad
+size 233934143

components/metadata_readme.md ADDED Viewed

	@@ -0,0 +1,11 @@

+---
+title: Bioclip Demo
+emoji: 🐘
+colorFrom: indigo
+colorTo: purple
+sdk: gradio
+sdk_version: 4.36.1
+app_file: app.py
+pinned: false
+license: mit
+---

components/query.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import io
+import boto3
+import requests
+import numpy as np
+import polars as pl
+from PIL import Image
+from botocore.config import Config
+import logging
+logger = logging.getLogger(__name__)
+# S3 for sample images
+my_config = Config(
+    region_name='us-east-1'
+)
+s3_client = boto3.client('s3', config=my_config)
+# Set basepath for EOL pages for info
+EOL_URL = "https://eol.org/pages/"
+RANKS = ["kingdom", "phylum", "class", "order", "family", "genus", "species"]
+def get_sample(df, pred_taxon, rank):
+    '''
+    Function to retrieve a sample image of the predicted taxon and EOL page link for more info.
+    Parameters:
+    -----------
+    df : DataFrame
+        DataFrame with all sample images listed and their filepaths (in "file_path" column).
+    pred_taxon : str
+        Predicted taxon of the uploaded image.
+    rank : int
+        Index of rank in RANKS chosen for prediction.
+    Returns:
+    --------
+    img : PIL.Image
+        Sample image of predicted taxon for display.
+    eol_page : str
+        URL to EOL page for the taxon (may be a lower rank, e.g., species sample).
+    '''
+    logger.info(f"Getting sample for taxon: {pred_taxon} at rank: {rank}")
+    try:
+        filepath, eol_page_id, full_name, is_exact = get_sample_data(df, pred_taxon, rank)
+    except Exception as e:
+        logger.error(f"Error retrieving sample data: {e}")
+        return None, f"We encountered the following error trying to retrieve a sample image: {e}."
+    if filepath is None:
+        logger.warning(f"No sample image found for taxon: {pred_taxon}")
+        return None, f"Sorry, our EOL images do not include {pred_taxon}."
+    # Get sample image of selected individual
+    try:
+        img_src = s3_client.generate_presigned_url('get_object',
+                                                   Params={'Bucket': 'treeoflife-10m-sample-images',
+                                                           'Key': filepath}
+                                                   )
+        img_resp = requests.get(img_src)
+        img = Image.open(io.BytesIO(img_resp.content))
+        full_eol_url = EOL_URL + eol_page_id
+        if is_exact:
+            eol_page = f"<p>Check out the EOL entry for {pred_taxon} to learn more: <a href={full_eol_url} target='_blank'>{full_eol_url}</a>.</p>"
+        else:
+            eol_page = f"<p>Check out an example EOL entry within {pred_taxon} to learn more: {full_name} <a href={full_eol_url} target='_blank'>{full_eol_url}</a>.</p>"
+        logger.info(f"Successfully retrieved sample image and EOL page for {pred_taxon}")
+        return img, eol_page
+    except Exception as e:
+        logger.error(f"Error retrieving sample image: {e}")
+        return None, f"We encountered the following error trying to retrieve a sample image: {e}."
+def get_sample_data(df, pred_taxon, rank):
+    '''
+    Function to randomly select a sample individual of the given taxon and provide associated native location.
+    Parameters:
+    -----------
+    df : DataFrame
+        DataFrame with all sample images listed and their filepaths (in "file_path" column).
+    pred_taxon : str
+        Predicted taxon of the uploaded image.
+    rank : int
+        Index of rank in RANKS chosen for prediction.
+    Returns:
+    --------
+    filepath : str
+        Filepath of selected sample image for predicted taxon.
+    eol_page_id : str
+        EOL page ID associated with predicted taxon for more information.
+    full_name : str
+        Full taxonomic name of the selected sample.
+    is_exact : bool
+        Flag indicating if the match is exact (i.e., with empty lower ranks).
+    '''
+    for idx in range(rank + 1):
+        taxon = RANKS[idx]
+        target_taxon = pred_taxon.split(" ")[idx]
+        df = df.filter(pl.col(taxon) == target_taxon)
+    if df.shape[0] == 0:
+        return None, np.nan, "", False
+    # First, try to find entries with empty lower ranks
+    exact_df = df
+    for lower_rank in RANKS[rank + 1:]:
+        exact_df = exact_df.filter((pl.col(lower_rank).is_null()) | (pl.col(lower_rank) == ""))
+    if exact_df.shape[0] > 0:
+        df_filtered = exact_df.sample()
+        full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0))
+        return df_filtered["file_path"][0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, True
+    # If no exact matches, return any entry with the specified rank
+    df_filtered = df.sample()
+    full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0)) + " " + " ".join(df_filtered.select(RANKS[rank+1:]).row(0))
+    return df_filtered["file_path"][0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, False

components/sync_samples_to_s3.bash ADDED Viewed

	@@ -0,0 +1,34 @@

+#!/bin/bash
+<<COMMENT
+Usage:
+bash sync_samples_to_s3.bash <BASE_DIR>
+Dependencies:
+- awscli (https://aws.amazon.com/cli/)
+Credentials to export as environment variables:
+- AWS_ACCESS_KEY_ID
+- AWS_SECRET_ACCESS_KEY
+COMMENT
+# Check if a valid directory is provided as an argument
+if [ -z "$1" ]; then
+  echo "Usage: $0 <BASE_DIR>"
+  exit 1
+fi
+if [ ! -d "$1" ]; then
+  echo "Error: $1 is not a valid directory"
+  exit 1
+fi
+BASE_DIR="$1"
+S3_BUCKET="s3://treeoflife-10m-sample-images"
+# Loop through all directories and sync them to S3
+for dir in $BASE_DIR/*; do
+  if [ -d "$dir" ]; then
+    dir_name=$(basename "$dir")
+    aws s3 sync "$dir" "$S3_BUCKET/$dir_name/"
+  fi
+done

embed_texts.sh ADDED Viewed

	@@ -0,0 +1,12 @@

+#!/usr/bin/env bash
+#SBATCH --nodes=1
+#SBATCH --account=PAS2136
+#SBATCH --gpus-per-node=1
+#SBATCH --ntasks-per-node=10
+#SBATCH --job-name=embed-treeoflife
+#SBATCH --time=12:00:00
+#SBATCH --partition=gpu
+python make_txt_embedding.py \
+    --catalog-path /fs/ess/PAS2136/open_clip/data/evobio10m-v3.3/predicted-statistics.csv \
+    --out-path text_emb.bin

examples/Actinostola-abyssorum.png ADDED Viewed

Git LFS Details

SHA256: cc56a3aedc6966da7add6093506ba3fc792b6dd2d3178878968c9c6978a4535a
Pointer size: 132 Bytes
Size of remote file: 1.13 MB

examples/Amanita-muscaria.jpeg ADDED Viewed

Git LFS Details

SHA256: c633755d4d45bc8bf86b4f4b889fc3f7acbeaa0e86cc69fce5f25165e21063eb
Pointer size: 132 Bytes
Size of remote file: 1.23 MB

examples/Carnegiea-gigantea.png ADDED Viewed

Git LFS Details

SHA256: 8e55ff224c0b9421b66c2feaf592f20ba473425b79a5e79abca1c8ca8a001e67
Pointer size: 131 Bytes
Size of remote file: 419 kB

examples/Felis-catus.jpeg ADDED Viewed

Git LFS Details

SHA256: 4d68c295156ee782524cc9f4269e3111743f7a12441f49c095b975000512829f
Pointer size: 131 Bytes
Size of remote file: 650 kB

examples/Onoclea-hintonii.jpg ADDED Viewed

examples/Onoclea-sensibilis.jpg ADDED Viewed

examples/Phoca-vitulina.png ADDED Viewed

Git LFS Details

SHA256: c717b35bfc07ebc9b9afd041f62bd1744f69e7e40ed9a6eac3a14f11f1ebc7fc
Pointer size: 131 Bytes
Size of remote file: 455 kB

examples/Sarcoscypha-coccinea.jpeg ADDED Viewed

Git LFS Details

SHA256: 84dfec1fe373d375cd31f129dfd961dfa9d0b400575f9dd9610a08d900fd1cf9
Pointer size: 131 Bytes
Size of remote file: 409 kB

examples/Ursus-arctos.jpeg ADDED Viewed

Git LFS Details

SHA256: b1ead956025e2ef9afa71e352326a299881e575bfb42fae65ae2c157196e2e73
Pointer size: 131 Bytes
Size of remote file: 610 kB

examples/coral-snake.jpeg ADDED Viewed

Git LFS Details

SHA256: 871066d1d902bbc5ab9fffa38b2a2d5117bf1b5eacc932188b782cdb6a6eed01
Pointer size: 130 Bytes
Size of remote file: 51.8 kB

examples/milk-snake.png ADDED Viewed

Git LFS Details

SHA256: 4c5820dfcdaa056903767cc7a3dade6e9e9d24c686fab9d457889879e80fa3ab
Pointer size: 131 Bytes
Size of remote file: 411 kB

lib.py ADDED Viewed

	@@ -0,0 +1,170 @@

+"""
+Mostly a TaxonomicTree class that implements a taxonomy and some helpers for easily
+walking and looking in the tree.
+A tree is an arrangement of TaxonomicNodes.
+"""
+import itertools
+import json
+class TaxonomicNode:
+    __slots__ = ("name", "index", "root", "_children")
+    def __init__(self, name, index, root):
+        self.name = name
+        self.index = index
+        self.root = root
+        self._children = {}
+    def add(self, name):
+        added = 0
+        if not name:
+            return added
+        first, rest = name[0], name[1:]
+        if first not in self._children:
+            self._children[first] = TaxonomicNode(first, self.root.size, self.root)
+            self.root.size += 1
+        self._children[first].add(rest)
+    def children(self, name):
+        if not name:
+            return set((child.name, child.index) for child in self._children.values())
+        first, rest = name[0], name[1:]
+        if first not in self._children:
+            return set()
+        return self._children[first].children(rest)
+    def descendants(self, prefix=None):
+        """Iterates over all values in the subtree that match prefix."""
+        if not prefix:
+            yield (self.name,), self.index
+            for child in self._children.values():
+                for name, i in child.descendants():
+                    yield (self.name, *name), i
+            return
+        first, rest = prefix[0], prefix[1:]
+        if first not in self._children:
+            return
+        for name, i in self._children[first].descendants(rest):
+            yield (self.name, *name), i
+    def values(self):
+        """Iterates over all (name, i) pairs in the tree."""
+        yield (self.name,), self.index
+        for child in self._children.values():
+            for name, index in child.values():
+                yield (self.name, *name), index
+    @classmethod
+    def from_dict(cls, dct, root):
+        node = cls(dct["name"], dct["index"], root)
+        node._children = {
+            child["name"]: cls.from_dict(child, root) for child in dct["children"]
+        }
+        return node
+class TaxonomicTree:
+    """
+    Efficient structure for finding taxonomic names and their descendants.
+    Also returns an integer index i for each possible name.
+    """
+    def __init__(self):
+        self.kingdoms = {}
+        self.size = 0
+    def add(self, name: list[str]):
+        if not name:
+            return
+        first, rest = name[0], name[1:]
+        if first not in self.kingdoms:
+            self.kingdoms[first] = TaxonomicNode(first, self.size, self)
+            self.size += 1
+        self.kingdoms[first].add(rest)
+    def children(self, name=None):
+        if not name:
+            return set(
+                (kingdom.name, kingdom.index) for kingdom in self.kingdoms.values()
+            )
+        first, rest = name[0], name[1:]
+        if first not in self.kingdoms:
+            return set()
+        return self.kingdoms[first].children(rest)
+    def descendants(self, prefix=None):
+        """Iterates over all values in the tree that match prefix."""
+        if not prefix:
+            # Give them all the subnodes
+            for kingdom in self.kingdoms.values():
+                yield from kingdom.descendants()
+            return
+        first, rest = prefix[0], prefix[1:]
+        if first not in self.kingdoms:
+            return
+        yield from self.kingdoms[first].descendants(rest)
+    def values(self):
+        """Iterates over all (name, i) pairs in the tree."""
+        for kingdom in self.kingdoms.values():
+            yield from kingdom.values()
+    def __len__(self):
+        return self.size
+    @classmethod
+    def from_dict(cls, dct):
+        tree = cls()
+        tree.kingdoms = {
+            kingdom["name"]: TaxonomicNode.from_dict(kingdom, tree)
+            for kingdom in dct["kingdoms"]
+        }
+        tree.size = dct["size"]
+        return tree
+class TaxonomicJsonEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, TaxonomicNode):
+            return {
+                "name": obj.name,
+                "index": obj.index,
+                "children": list(obj._children.values()),
+            }
+        elif isinstance(obj, TaxonomicTree):
+            return {
+                "kingdoms": list(obj.kingdoms.values()),
+                "size": obj.size,
+            }
+        else:
+            super().default(self, obj)
+def batched(iterable, n):
+    # batched('ABCDEFG', 3) --> ABC DEF G
+    if n < 1:
+        raise ValueError("n must be at least one")
+    it = iter(iterable)
+    while batch := tuple(itertools.islice(it, n)):
+        yield zip(*batch)

make_txt_embedding.py ADDED Viewed

	@@ -0,0 +1,193 @@

+"""
+Makes the entire set of text emebeddings for all possible names in the tree of life.
+Uses the catalog.csv file from TreeOfLife-10M.
+"""
+import argparse
+import csv
+import json
+import os
+import logging
+import numpy as np
+import torch
+import torch.nn.functional as F
+from open_clip import create_model, get_tokenizer
+from tqdm import tqdm
+import lib
+from templates import openai_imagenet_template
+log_format = "[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s"
+logging.basicConfig(level=logging.INFO, format=log_format)
+logger = logging.getLogger()
+model_str = "hf-hub:imageomics/bioclip"
+tokenizer_str = "ViT-B-16"
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+ranks = ("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
+@torch.no_grad()
+def write_txt_features(name_lookup):
+    if os.path.isfile(args.out_path):
+        all_features = np.load(args.out_path)
+    else:
+        all_features = np.zeros((512, len(name_lookup)), dtype=np.float32)
+    batch_size = args.batch_size // len(openai_imagenet_template)
+    for batch, (names, indices) in enumerate(
+        tqdm(
+            lib.batched(name_lookup.values(), batch_size),
+            desc="txt feats",
+            total=len(name_lookup) // batch_size,
+        )
+    ):
+        # Skip if any non-zero elements
+        if all_features[:, indices].any():
+            logger.info(f"Skipping batch {batch}")
+            continue
+        txts = [
+            template(name) for name in names for template in openai_imagenet_template
+        ]
+        txts = tokenizer(txts).to(device)
+        txt_features = model.encode_text(txts)
+        txt_features = torch.reshape(
+            txt_features, (len(names), len(openai_imagenet_template), 512)
+        )
+        txt_features = F.normalize(txt_features, dim=2).mean(dim=1)
+        txt_features /= txt_features.norm(dim=1, keepdim=True)
+        all_features[:, indices] = txt_features.T.cpu().numpy()
+        if batch % 100 == 0:
+            np.save(args.out_path, all_features)
+    np.save(args.out_path, all_features)
+def convert_txt_features_to_avgs(name_lookup):
+    assert os.path.isfile(args.out_path)
+    # Put that big boy on the GPU. We're going fast.
+    all_features = torch.from_numpy(np.load(args.out_path)).to(device)
+    logger.info("Loaded text features from disk to %s.", device)
+    names_by_rank = [set() for rank in ranks]
+    for name, index in tqdm(name_lookup.values()):
+        i = len(name) - 1
+        names_by_rank[i].add((name, index))
+    zeroed = 0
+    for i, rank in reversed(list(enumerate(ranks))):
+        if rank == "Species":
+            continue
+        for name, index in tqdm(names_by_rank[i], desc=rank):
+            species = tuple(
+                zip(
+                    *(
+                        (d, i)
+                        for d, i in name_lookup.descendants(prefix=name)
+                        if len(d) >= 6
+                    )
+                )
+            )
+            if not species:
+                logger.warning("No species for %s.", " ".join(name))
+                all_features[:, index] = 0.0
+                zeroed += 1
+                continue
+            values, indices = species
+            mean = all_features[:, indices].mean(dim=1)
+            all_features[:, index] = F.normalize(mean, dim=0)
+    out_path, ext = os.path.splitext(args.out_path)
+    np.save(f"{out_path}_avgs{ext}", all_features.cpu().numpy())
+    if zeroed:
+        logger.warning(
+            "Zeroed out %d nodes because they didn't have any genus or species-level labels.",
+            zeroed,
+        )
+def convert_txt_features_to_species_only(name_lookup):
+    assert os.path.isfile(args.out_path)
+    all_features = np.load(args.out_path)
+    logger.info("Loaded text features from disk.")
+    species = [(d, i) for d, i in name_lookup.descendants() if len(d) == 7]
+    species_features = np.zeros((512, len(species)), dtype=np.float32)
+    species_names = [""] * len(species)
+    for new_i, (name, old_i) in enumerate(tqdm(species)):
+        species_features[:, new_i] = all_features[:, old_i]
+        species_names[new_i] = name
+    out_path, ext = os.path.splitext(args.out_path)
+    np.save(f"{out_path}_species{ext}", species_features)
+    with open(f"{out_path}_species.json", "w") as fd:
+        json.dump(species_names, fd, indent=2)
+def get_name_lookup(catalog_path, cache_path):
+    if os.path.isfile(cache_path):
+        with open(cache_path) as fd:
+            lookup = lib.TaxonomicTree.from_dict(json.load(fd))
+        return lookup
+    lookup = lib.TaxonomicTree()
+    with open(catalog_path) as fd:
+        reader = csv.DictReader(fd)
+        for row in tqdm(reader, desc="catalog"):
+            name = [
+                row["kingdom"],
+                row["phylum"],
+                row["class"],
+                row["order"],
+                row["family"],
+                row["genus"],
+                row["species"],
+            ]
+            if any(not value for value in name):
+                name = name[: name.index("")]
+            lookup.add(name)
+    with open(args.name_cache_path, "w") as fd:
+        json.dump(lookup, fd, cls=lib.TaxonomicJsonEncoder)
+    return lookup
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--catalog-path",
+        help="Path to the catalog.csv file from TreeOfLife-10M.",
+        required=True,
+    )
+    parser.add_argument("--out-path", help="Path to the output file.", required=True)
+    parser.add_argument(
+        "--name-cache-path",
+        help="Path to the name cache file.",
+        default="name_lookup.json",
+    )
+    parser.add_argument("--batch-size", help="Batch size.", default=2**15, type=int)
+    args = parser.parse_args()
+    name_lookup = get_name_lookup(args.catalog_path, cache_path=args.name_cache_path)
+    logger.info("Got name lookup.")
+    model = create_model(model_str, output_dict=True, require_pretrained=True)
+    model = model.to(device)
+    logger.info("Created model.")
+    model = torch.compile(model)
+    logger.info("Compiled model.")
+    tokenizer = get_tokenizer(tokenizer_str)
+    write_txt_features(name_lookup)
+    convert_txt_features_to_avgs(name_lookup)
+    convert_txt_features_to_species_only(name_lookup)

name_lookup.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:20d731d9d901f1c17927187bc87e4a2513279845a1a6ba5982dbf779f2ac1434
+size 26462858

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+open_clip_torch
+torchvision
+torch
+gradio
+polars
+pillow
+boto3

templates.py ADDED Viewed

	@@ -0,0 +1,82 @@

+openai_imagenet_template = [
+    lambda c: f"a bad photo of a {c}.",
+    lambda c: f"a photo of many {c}.",
+    lambda c: f"a sculpture of a {c}.",
+    lambda c: f"a photo of the hard to see {c}.",
+    lambda c: f"a low resolution photo of the {c}.",
+    lambda c: f"a rendering of a {c}.",
+    lambda c: f"graffiti of a {c}.",
+    lambda c: f"a bad photo of the {c}.",
+    lambda c: f"a cropped photo of the {c}.",
+    lambda c: f"a tattoo of a {c}.",
+    lambda c: f"the embroidered {c}.",
+    lambda c: f"a photo of a hard to see {c}.",
+    lambda c: f"a bright photo of a {c}.",
+    lambda c: f"a photo of a clean {c}.",
+    lambda c: f"a photo of a dirty {c}.",
+    lambda c: f"a dark photo of the {c}.",
+    lambda c: f"a drawing of a {c}.",
+    lambda c: f"a photo of my {c}.",
+    lambda c: f"the plastic {c}.",
+    lambda c: f"a photo of the cool {c}.",
+    lambda c: f"a close-up photo of a {c}.",
+    lambda c: f"a black and white photo of the {c}.",
+    lambda c: f"a painting of the {c}.",
+    lambda c: f"a painting of a {c}.",
+    lambda c: f"a pixelated photo of the {c}.",
+    lambda c: f"a sculpture of the {c}.",
+    lambda c: f"a bright photo of the {c}.",
+    lambda c: f"a cropped photo of a {c}.",
+    lambda c: f"a plastic {c}.",
+    lambda c: f"a photo of the dirty {c}.",
+    lambda c: f"a jpeg corrupted photo of a {c}.",
+    lambda c: f"a blurry photo of the {c}.",
+    lambda c: f"a photo of the {c}.",
+    lambda c: f"a good photo of the {c}.",
+    lambda c: f"a rendering of the {c}.",
+    lambda c: f"a {c} in a video game.",
+    lambda c: f"a photo of one {c}.",
+    lambda c: f"a doodle of a {c}.",
+    lambda c: f"a close-up photo of the {c}.",
+    lambda c: f"a photo of a {c}.",
+    lambda c: f"the origami {c}.",
+    lambda c: f"the {c} in a video game.",
+    lambda c: f"a sketch of a {c}.",
+    lambda c: f"a doodle of the {c}.",
+    lambda c: f"a origami {c}.",
+    lambda c: f"a low resolution photo of a {c}.",
+    lambda c: f"the toy {c}.",
+    lambda c: f"a rendition of the {c}.",
+    lambda c: f"a photo of the clean {c}.",
+    lambda c: f"a photo of a large {c}.",
+    lambda c: f"a rendition of a {c}.",
+    lambda c: f"a photo of a nice {c}.",
+    lambda c: f"a photo of a weird {c}.",
+    lambda c: f"a blurry photo of a {c}.",
+    lambda c: f"a cartoon {c}.",
+    lambda c: f"art of a {c}.",
+    lambda c: f"a sketch of the {c}.",
+    lambda c: f"a embroidered {c}.",
+    lambda c: f"a pixelated photo of a {c}.",
+    lambda c: f"itap of the {c}.",
+    lambda c: f"a jpeg corrupted photo of the {c}.",
+    lambda c: f"a good photo of a {c}.",
+    lambda c: f"a plushie {c}.",
+    lambda c: f"a photo of the nice {c}.",
+    lambda c: f"a photo of the small {c}.",
+    lambda c: f"a photo of the weird {c}.",
+    lambda c: f"the cartoon {c}.",
+    lambda c: f"art of the {c}.",
+    lambda c: f"a drawing of the {c}.",
+    lambda c: f"a photo of the large {c}.",
+    lambda c: f"a black and white photo of a {c}.",
+    lambda c: f"the plushie {c}.",
+    lambda c: f"a dark photo of a {c}.",
+    lambda c: f"itap of a {c}.",
+    lambda c: f"graffiti of the {c}.",
+    lambda c: f"a toy {c}.",
+    lambda c: f"itap of my {c}.",
+    lambda c: f"a photo of a cool {c}.",
+    lambda c: f"a photo of a small {c}.",
+    lambda c: f"a tattoo of the {c}.",
+]

test_lib.py ADDED Viewed

	@@ -0,0 +1,481 @@

+import lib
+def test_taxonomiclookup_empty():
+    lookup = lib.TaxonomicTree()
+    assert lookup.size == 0
+def test_taxonomiclookup_kingdom_size():
+    lookup = lib.TaxonomicTree()
+    lookup.add(("Animalia",))
+    assert lookup.size == 1
+def test_taxonomiclookup_genus_size():
+    lookup = lib.TaxonomicTree()
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Accipitriformes",
+            "Accipitridae",
+            "Halieaeetus",
+        )
+    )
+    assert lookup.size == 6
+def test_taxonomictree_kingdom_children():
+    lookup = lib.TaxonomicTree()
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Accipitriformes",
+            "Accipitridae",
+            "Halieaeetus",
+        )
+    )
+    expected = set([("Animalia", 0)])
+    actual = lookup.children()
+    assert actual == expected
+def test_taxonomiclookup_children_of_animal_only_birds():
+    lookup = lib.TaxonomicTree()
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Accipitriformes",
+            "Accipitridae",
+            "Halieaeetus",
+            "leucocephalus",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Strigiformes",
+            "Strigidae",
+            "Ninox",
+            "scutulata",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Strigiformes",
+            "Strigidae",
+            "Ninox",
+            "plesseni",
+        )
+    )
+    actual = lookup.children(("Animalia",))
+    expected = set([("Chordata", 1)])
+    assert actual == expected
+def test_taxonomiclookup_children_of_animal():
+    lookup = lib.TaxonomicTree()
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Accipitriformes",
+            "Accipitridae",
+            "Halieaeetus",
+            "leucocephalus",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Strigiformes",
+            "Strigidae",
+            "Ninox",
+            "scutulata",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Strigiformes",
+            "Strigidae",
+            "Ninox",
+            "plesseni",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Mammalia",
+            "Primates",
+            "Hominidae",
+            "Gorilla",
+            "gorilla",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Arthropoda",
+            "Insecta",
+            "Hymenoptera",
+            "Apidae",
+            "Bombus",
+            "balteatus",
+        )
+    )
+    actual = lookup.children(("Animalia",))
+    expected = set([("Chordata", 1), ("Arthropoda", 17)])
+    assert actual == expected
+def test_taxonomiclookup_children_of_chordata():
+    lookup = lib.TaxonomicTree()
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Accipitriformes",
+            "Accipitridae",
+            "Halieaeetus",
+            "leucocephalus",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Strigiformes",
+            "Strigidae",
+            "Ninox",
+            "scutulata",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Strigiformes",
+            "Strigidae",
+            "Ninox",
+            "plesseni",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Mammalia",
+            "Primates",
+            "Hominidae",
+            "Gorilla",
+            "gorilla",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Arthropoda",
+            "Insecta",
+            "Hymenoptera",
+            "Apidae",
+            "Bombus",
+            "balteatus",
+        )
+    )
+    actual = lookup.children(("Animalia", "Chordata"))
+    expected = set([("Aves", 2), ("Mammalia", 12)])
+    assert actual == expected
+def test_taxonomiclookup_children_of_strigiformes():
+    lookup = lib.TaxonomicTree()
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Accipitriformes",
+            "Accipitridae",
+            "Halieaeetus",
+            "leucocephalus",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Strigiformes",
+            "Strigidae",
+            "Ninox",
+            "scutulata",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Strigiformes",
+            "Strigidae",
+            "Ninox",
+            "plesseni",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Mammalia",
+            "Primates",
+            "Hominidae",
+            "Gorilla",
+            "gorilla",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Arthropoda",
+            "Insecta",
+            "Hymenoptera",
+            "Apidae",
+            "Bombus",
+            "balteatus",
+        )
+    )
+    actual = lookup.children(("Animalia", "Chordata", "Aves", "Strigiformes"))
+    expected = set([("Strigidae", 8)])
+    assert actual == expected
+def test_taxonomiclookup_children_of_ninox():
+    lookup = lib.TaxonomicTree()
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Accipitriformes",
+            "Accipitridae",
+            "Halieaeetus",
+            "leucocephalus",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Strigiformes",
+            "Strigidae",
+            "Ninox",
+            "scutulata",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Strigiformes",
+            "Strigidae",
+            "Ninox",
+            "plesseni",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Mammalia",
+            "Primates",
+            "Hominidae",
+            "Gorilla",
+            "gorilla",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Arthropoda",
+            "Insecta",
+            "Hymenoptera",
+            "Apidae",
+            "Bombus",
+            "balteatus",
+        )
+    )
+    actual = lookup.children(
+        ("Animalia", "Chordata", "Aves", "Strigiformes", "Strigidae", "Ninox")
+    )
+    expected = set([("scutulata", 10), ("plesseni", 11)])
+    assert actual == expected
+def test_taxonomiclookup_children_of_gorilla():
+    lookup = lib.TaxonomicTree()
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Accipitriformes",
+            "Accipitridae",
+            "Halieaeetus",
+            "leucocephalus",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Strigiformes",
+            "Strigidae",
+            "Ninox",
+            "scutulata",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Aves",
+            "Strigiformes",
+            "Strigidae",
+            "Ninox",
+            "plesseni",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Chordata",
+            "Mammalia",
+            "Primates",
+            "Hominidae",
+            "Gorilla",
+            "gorilla",
+        )
+    )
+    lookup.add(
+        (
+            "Animalia",
+            "Arthropoda",
+            "Insecta",
+            "Hymenoptera",
+            "Apidae",
+            "Bombus",
+            "balteatus",
+        )
+    )
+    actual = lookup.children(
+        (
+            "Animalia",
+            "Chordata",
+            "Mammalia",
+            "Primates",
+            "Hominidae",
+            "Gorilla",
+            "gorilla",
+        )
+    )
+    expected = set()
+    assert actual == expected
+def test_taxonomictree_descendants_last():
+    lookup = lib.TaxonomicTree()
+    lookup.add(("A", "B", "C", "D", "E", "F", "G"))
+    actual = list(lookup.descendants(("A", "B", "C", "D", "E", "F", "G")))
+    expected = [
+        (("A", "B", "C", "D", "E", "F", "G"), 6),
+    ]
+    assert actual == expected
+def test_taxonomictree_descendants_entire_tree():
+    lookup = lib.TaxonomicTree()
+    lookup.add(("A", "B"))
+    actual = list(lookup.descendants())
+    expected = [
+        (("A",), 0),
+        (("A", "B"), 1),
+    ]
+    assert actual == expected
+def test_taxonomictree_descendants_entire_tree_with_prefix():
+    lookup = lib.TaxonomicTree()
+    lookup.add(("A", "B"))
+    actual = list(lookup.descendants(prefix=("A",)))
+    expected = [
+        (("A",), 0),
+        (("A", "B"), 1),
+    ]
+    assert actual == expected
+def test_taxonomictree_descendants_general():
+    lookup = lib.TaxonomicTree()
+    lookup.add(("A", "B", "C", "D", "E", "F", "G"))
+    actual = list(lookup.descendants(("A", "B", "C", "D")))
+    expected = [
+        (("A", "B", "C", "D"), 3),
+        (("A", "B", "C", "D", "E"), 4),
+        (("A", "B", "C", "D", "E", "F"), 5),
+        (("A", "B", "C", "D", "E", "F", "G"), 6),
+    ]
+    assert actual == expected

txt_emb.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4a3c3412c3dae49cf92cc760aba5ee84227362adf1eb08f04dd50ee2a756e43
+size 969818240

txt_emb_species.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:844e6fabc06cac072214d566b78f40825b154efa9479eb11285030ca038b2ece
+size 65731052

txt_emb_species.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91ce02dff2433222e3138b8bf7eefa1dd74b30f4d406c16cd3301f66d65ab4ed
+size 787435648