from typing import List, Iterable, Union from pprint import pprint class SingleCluster: def __init__(self): self.__container__ = [] self.__keyphrases__ = {} def add(self, id:int): self.__container__.append(id) def __str__(self) -> str: return str(self.__container__) def elements(self) -> List: return self.__container__ def get_keyphrases(self): return self.__keyphrases__ def add_keyphrase(self, keyphrase:Union[str,Iterable]): if isinstance(keyphrase,str): if keyphrase not in self.__keyphrases__.keys(): self.__keyphrases__[keyphrase] = 1 else: self.__keyphrases__[keyphrase] += 1 elif isinstance(keyphrase,Iterable): for i in keyphrase: self.add_keyphrase(i) def __len__(self): return len(self.__container__) def print_keyphrases(self): pprint(self.__keyphrases__) class ClusterList: def __init__(self, k:int): self.__clusters__ = [SingleCluster() for _ in range(k)] # subscriptable and slice-able def __getitem__(self, idx): if isinstance(idx, int): return self.__clusters__[idx] if isinstance(idx, slice): # return return self.__clusters__[0 if idx.start is None else idx.start: idx.stop: 0 if idx.step is None else idx.step] def instantiate(self, labels: Iterable): for id, label in enumerate(labels): self.__clusters__[label].add(id) def __str__(self): ret = f'There are {len(self.__clusters__)} clusters:\n' for id,cluster in enumerate(self.__clusters__): ret += f'cluster {id} contains: {cluster}.\n' return ret # return an iterator that can be used in for loop etc. def __iter__(self): return self.__clusters__.__iter__() def __len__(self): return len(self.__clusters__) def sort(self): self.__clusters__.sort(key=len,reverse=True)