Spaces:

runa91
/

barc_gradio

Runtime error

barc_gradio / src /configs /dog_breeds /dog_breed_class.py

Nadine Rueegg

initial commit for barc

7629b39 over 2 years ago

8.1 kB


	import os
	import warnings
	warnings.filterwarnings("ignore", category=DeprecationWarning)
	import pandas as pd
	import difflib
	import json
	import pickle as pkl
	import csv
	import numpy as np


	# ----------------------------------------------------------------------------------------------------------------- #
	class DogBreed(object):
	def __init__(self, abbrev, name_akc=None, name_stanext=None, name_xlsx=None, path_akc=None, path_stanext=None, ind_in_xlsx=None, ind_in_xlsx_matrix=None, ind_in_stanext=None, clade=None):
	self._abbrev = abbrev
	self._name_xlsx = name_xlsx
	self._name_akc = name_akc
	self._name_stanext = name_stanext
	self._path_stanext = path_stanext
	self._additional_names = set()
	if self._name_akc is not None:
	self.add_akc_info(name_akc, path_akc)
	if self._name_stanext is not None:
	self.add_stanext_info(name_stanext, path_stanext, ind_in_stanext)
	if self._name_xlsx is not None:
	self.add_xlsx_info(name_xlsx, ind_in_xlsx, ind_in_xlsx_matrix, clade)
	def add_xlsx_info(self, name_xlsx, ind_in_xlsx, ind_in_xlsx_matrix, clade):
	assert (name_xlsx is not None) and (ind_in_xlsx is not None) and (ind_in_xlsx_matrix is not None) and (clade is not None)
	self._name_xlsx = name_xlsx
	self._ind_in_xlsx = ind_in_xlsx
	self._ind_in_xlsx_matrix = ind_in_xlsx_matrix
	self._clade = clade
	def add_stanext_info(self, name_stanext, path_stanext, ind_in_stanext):
	assert (name_stanext is not None) and (path_stanext is not None) and (ind_in_stanext is not None)
	self._name_stanext = name_stanext
	self._path_stanext = path_stanext
	self._ind_in_stanext = ind_in_stanext
	def add_akc_info(self, name_akc, path_akc):
	assert (name_akc is not None) and (path_akc is not None)
	self._name_akc = name_akc
	self._path_akc = path_akc
	def add_additional_names(self, name_list):
	self._additional_names = self._additional_names.union(set(name_list))
	def add_text_info(self, text_height, text_weight, text_life_exp):
	self._text_height = text_height
	self._text_weight = text_weight
	self._text_life_exp = text_life_exp
	def get_datasets(self):
	# all datasets in which this breed is found
	datasets = set()
	if self._name_akc is not None:
	datasets.add('akc')
	if self._name_stanext is not None:
	datasets.add('stanext')
	if self._name_xlsx is not None:
	datasets.add('xlsx')
	return datasets
	def get_names(self):
	# set of names for this breed
	names = {self._abbrev, self._name_akc, self._name_stanext, self._name_xlsx, self._path_stanext}.union(self._additional_names)
	names.discard(None)
	return names
	def get_names_as_pointing_dict(self):
	# each name points to the abbreviation
	names = self.get_names()
	my_dict = {}
	for name in names:
	my_dict[name] = self._abbrev
	return my_dict
	def print_overview(self):
	# print important information to get an overview of the class instance
	if self._name_akc is not None:
	name = self._name_akc
	elif self._name_xlsx is not None:
	name = self._name_xlsx
	else:
	name = self._name_stanext
	print('----------------------------------------------------')
	print('----- dog breed: ' + name )
	print('----------------------------------------------------')
	print('[names]')
	print(self.get_names())
	print('[datasets]')
	print(self.get_datasets())
	# see https://stackoverflow.com/questions/9058305/getting-attributes-of-a-class
	print('[instance attributes]')
	for attribute, value in self.__dict__.items():
	print(attribute, '=', value)
	def use_dict_to_save_class_instance(self):
	my_dict = {}
	for attribute, value in self.__dict__.items():
	my_dict[attribute] = value
	return my_dict
	def use_dict_to_load_class_instance(self, my_dict):
	for attribute, value in my_dict.items():
	setattr(self, attribute, value)
	return

	# ----------------------------------------------------------------------------------------------------------------- #
	def get_name_list_from_summary(summary):
	name_from_abbrev_dict = {}
	for breed in summary.values():
	abbrev = breed._abbrev
	all_names = breed.get_names()
	name_from_abbrev_dict[abbrev] = list(all_names)
	return name_from_abbrev_dict
	def get_partial_summary(summary, part):
	assert part in ['xlsx', 'akc', 'stanext']
	partial_summary = {}
	for key, value in summary.items():
	if (part == 'xlsx' and value._name_xlsx is not None) \
	or (part == 'akc' and value._name_akc is not None) \
	or (part == 'stanext' and value._name_stanext is not None):
	partial_summary[key] = value
	return partial_summary
	def get_akc_but_not_stanext_partial_summary(summary):
	partial_summary = {}
	for key, value in summary.items():
	if value._name_akc is not None:
	if value._name_stanext is None:
	partial_summary[key] = value
	return partial_summary

	# ----------------------------------------------------------------------------------------------------------------- #
	def main_load_dog_breed_classes(path_complete_abbrev_dict_v1, path_complete_summary_breeds_v1):
	with open(path_complete_abbrev_dict_v1, 'rb') as file:
	complete_abbrev_dict = pkl.load(file)
	with open(path_complete_summary_breeds_v1, 'rb') as file:
	complete_summary_breeds_attributes_only = pkl.load(file)

	complete_summary_breeds = {}
	for key, value in complete_summary_breeds_attributes_only.items():
	attributes_only = complete_summary_breeds_attributes_only[key]
	complete_summary_breeds[key] = DogBreed(abbrev=attributes_only['_abbrev'])
	complete_summary_breeds[key].use_dict_to_load_class_instance(attributes_only)
	return complete_abbrev_dict, complete_summary_breeds


	# ----------------------------------------------------------------------------------------------------------------- #
	def load_similarity_matrix_raw(xlsx_path):
	# --- LOAD EXCEL FILE FROM DOG BREED PAPER
	xlsx = pd.read_excel(xlsx_path)
	# create an array
	abbrev_indices = {}
	matrix_raw = np.zeros((168, 168))
	for ind in range(1, 169):
	abbrev = xlsx[xlsx.columns[2]][ind]
	abbrev_indices[abbrev] = ind-1
	for ind_col in range(0, 168):
	for ind_row in range(0, 168):
	matrix_raw[ind_col, ind_row] = float(xlsx[xlsx.columns[3+ind_col]][1+ind_row])
	return matrix_raw, abbrev_indices



	# ----------------------------------------------------------------------------------------------------------------- #
	# ----------------------------------------------------------------------------------------------------------------- #
	# load the (in advance created) final dict of dog breed classes
	ROOT_PATH_BREED_DATA = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', '..', 'data', 'breed_data')
	path_complete_abbrev_dict_v1 = os.path.join(ROOT_PATH_BREED_DATA, 'complete_abbrev_dict_v2.pkl')
	path_complete_summary_breeds_v1 = os.path.join(ROOT_PATH_BREED_DATA, 'complete_summary_breeds_v2.pkl')
	COMPLETE_ABBREV_DICT, COMPLETE_SUMMARY_BREEDS = main_load_dog_breed_classes(path_complete_abbrev_dict_v1, path_complete_summary_breeds_v1)
	# load similarity matrix, data from:
	# Parker H. G., Dreger D. L., Rimbault M., Davis B. W., Mullen A. B., Carpintero-Ramirez G., and Ostrander E. A.
	# Genomic analyses reveal the influence of geographic origin, migration, and hybridization on modern dog breed
	# development. Cell Reports, 4(19):697–708, 2017.
	xlsx_path = os.path.join(ROOT_PATH_BREED_DATA, 'NIHMS866262-supplement-2.xlsx')
	SIM_MATRIX_RAW, SIM_ABBREV_INDICES = load_similarity_matrix_raw(xlsx_path)