Spaces:

KawshikManikantan
/

MEIRa

Sleeping

App Files Files Community

MEIRa / model /memory /base_memory.py

KawshikManikantan

commit log print

a7f75f6 3 months ago

raw

history blame contribute delete

13.1 kB

	import torch
	import torch.nn as nn
	from pytorch_utils.modules import MLP
	import math
	from omegaconf import DictConfig
	from typing import Dict, Tuple
	from torch import Tensor

	LOG2 = math.log(2)


	class BaseMemory(nn.Module):
	"""Base clustering module."""

	def __init__(self, config: DictConfig, span_emb_size: int, drop_module: nn.Module):
	super(BaseMemory, self).__init__()
	self.config = config

	self.mem_size = span_emb_size
	self.drop_module = drop_module

	if self.config.sim_func == "endpoint":
	num_embs = 2 # Span start, Span end
	else:
	num_embs = 3 # Span start, Span end, Hadamard product between the two

	self.mem_coref_mlp = MLP(
	num_embs * self.mem_size + config.num_feats * config.emb_size,
	config.mlp_size,
	1,
	drop_module=drop_module,
	num_hidden_layers=config.mlp_depth,
	bias=True,
	)

	if config.entity_rep == "learned_avg":
	# Parameter for updating the cluster representation
	self.alpha = MLP(
	2 * self.mem_size,
	config.mlp_size,
	1,
	num_hidden_layers=1,
	bias=True,
	drop_module=drop_module,
	)

	if config.pseudo_dist:
	self.distance_embeddings = nn.Embedding(
	self.config.num_embeds + 1, config.emb_size
	)
	else:
	self.distance_embeddings = nn.Embedding(
	self.config.num_embeds, config.emb_size
	)

	self.counter_embeddings = nn.Embedding(self.config.num_embeds, config.emb_size)

	@property
	def device(self) -> torch.device:
	return next(self.mem_coref_mlp.parameters()).device

	def initialize_memory(
	self,
	mem: Tensor = None,
	mem_init: Tensor = None,
	ent_counter: Tensor = None,
	last_mention_start: Tensor = None,
	rep=[],
	**kwargs
	) -> Tuple[Tensor, Tensor, Tensor]:
	"""Method to initialize the clusters and related bookkeeping variables."""
	# Check for unintialized memory
	if mem is None or ent_counter is None or last_mention_start is None:
	mem = torch.zeros(len(rep), self.mem_size).to(self.device)
	mem_init = torch.zeros(len(rep), self.mem_size).to(self.device)

	for idx, rep_vec in enumerate(rep):
	mem[idx] = rep_vec
	mem_init[idx] = rep_vec

	ent_counter = torch.tensor([1.0] * len(rep)).to(self.device)
	last_mention_start = -torch.ones(len(rep)).long().to(self.device)

	elif len(rep):
	for rep_emb in rep:
	mem = torch.cat([mem, rep_emb.unsqueeze(0).to(self.device)], dim=0)
	mem_init = torch.cat(
	[mem_init, rep_emb.unsqueeze(0).to(self.device)], dim=0
	)
	ent_counter = torch.cat(
	[ent_counter, torch.tensor([1.0]).to(self.device)]
	)
	last_mention_start = torch.cat(
	[last_mention_start, torch.tensor([-1]).to(self.device)]
	)

	return mem, mem_init, ent_counter, last_mention_start

	@staticmethod
	def get_bucket(count: Tensor) -> Tensor:
	"""Bucket distance and entity counters using the same logic."""

	logspace_idx = (
	torch.floor(
	torch.log(torch.max(count.float(), torch.tensor(1.0))) / LOG2
	).long()
	+ 3
	)
	use_identity = (count <= 4).long()
	combined_idx = use_identity * count + (1 - use_identity) * logspace_idx
	return torch.clamp(combined_idx, 0, 9)

	@staticmethod
	def get_distance_bucket(distances: Tensor) -> Tensor:
	return BaseMemory.get_bucket(distances)

	@staticmethod
	def get_counter_bucket(count: Tensor) -> Tensor:
	return BaseMemory.get_bucket(count)

	def get_distance_emb(self, distance: Tensor) -> Tensor:
	distance_tens = self.get_distance_bucket(distance)
	distance_embs = self.distance_embeddings(distance_tens)
	return distance_embs

	def get_counter_emb(self, ent_counter: Tensor) -> Tensor:
	counter_buckets = self.get_counter_bucket(ent_counter.long())
	counter_embs = self.counter_embeddings(counter_buckets)
	return counter_embs

	@staticmethod
	def get_coref_mask(ent_counter: Tensor) -> Tensor:
	"""Mask for whether the cluster representation corresponds to any entity or not."""
	cell_mask = (ent_counter > 0.0).float()
	return cell_mask

	def get_feature_embs_tensorized(
	self,
	ment_start: Tensor, ## [B]
	last_mention_start: Tensor, ## [E]
	ent_counter: Tensor, ## [E]
	metadata: Dict, ## [Assuming no metadata]
	):
	## Return [B, E, 20]
	## Get distance embeddings
	distance_embs = self.distance_embeddings(
	torch.tensor(self.config.num_embeds).long().to(self.device)
	).repeat(
	ment_start.shape[0], last_mention_start.shape[0], 1
	) ## [B, D, 20]
	## Get counter embeddings
	ent_counter_batch = ent_counter.unsqueeze(0).repeat(
	ment_start.shape[0], 1
	) ## [B, E]
	counter_embs = self.get_counter_emb(ent_counter_batch) ## [B, E, 20]
	feature_embs_list = [distance_embs, counter_embs]
	feature_embs = self.drop_module(torch.cat(feature_embs_list, dim=-1))
	return feature_embs

	def get_feature_embs(
	self,
	ment_start: Tensor,
	last_mention_start: Tensor,
	ent_counter: Tensor,
	metadata: Dict,
	) -> Tensor:

	distance_embs = self.get_distance_emb(ment_start - last_mention_start)
	if self.config.pseudo_dist:
	rep_distance_mask = (last_mention_start < 0).unsqueeze(1).float()
	rep_distance_embs = self.distance_embeddings(
	torch.tensor(self.config.num_embeds).long().to(self.device)
	).repeat(last_mention_start.shape[0], 1)
	distance_embs = (
	distance_embs * (1 - rep_distance_mask)
	+ rep_distance_embs * rep_distance_mask
	)

	counter_embs = self.get_counter_emb(ent_counter)

	feature_embs_list = [distance_embs, counter_embs]

	if "genre" in metadata:
	genre_emb = metadata["genre"]
	num_ents = distance_embs.shape[0]
	genre_emb = torch.unsqueeze(genre_emb, dim=0).repeat(num_ents, 1)
	feature_embs_list.append(genre_emb)

	feature_embs = self.drop_module(torch.cat(feature_embs_list, dim=-1))
	return feature_embs

	def get_coref_new_scores_tensorized(
	self,
	ment_emb: Tensor, ## [B,D]
	mem_vectors: Tensor, ## [E,D]
	mem_vectors_init: Tensor, ## [E,D] ## Not used here
	ent_counter: Tensor, ## not used here
	feature_embs: Tensor, ## [B,E,20]
	) -> Tensor:
	rep_ment_emb = ment_emb.unsqueeze(1).repeat(
	1, mem_vectors.shape[0], 1
	) ## [B,E,D]
	rep_mem_vectors = mem_vectors.unsqueeze(0).repeat(
	ment_emb.shape[0], 1, 1
	) ## [B,E,D]
	pair_vec = torch.cat(
	[
	rep_mem_vectors,
	rep_ment_emb,
	rep_mem_vectors * rep_ment_emb,
	feature_embs,
	],
	dim=-1,
	) ## [B,E,3D+20]

	# print(pair_vec)
	pair_score = self.mem_coref_mlp(pair_vec)
	coref_score = torch.squeeze(pair_score, dim=-1) # [B,E]
	# zero_col = torch.zeros(coref_score.shape[0], 1).to(self.device)
	base_col = (
	torch.ones(coref_score.shape[0], 1).to(self.device) * self.config.thresh
	)
	coref_new_score = torch.cat([coref_score, base_col], dim=-1) ## [B,E+1]

	return coref_new_score

	def get_coref_new_scores(
	self,
	ment_emb: Tensor,
	mem_vectors: Tensor,
	mem_vectors_init: Tensor,
	ent_counter: Tensor,
	feature_embs: Tensor,
	) -> Tensor:
	"""Calculate the coreference score with existing clusters.

	For creating a new cluster we use a dummy score of 0.
	This is a free variable and this idea is borrowed from Lee et al 2017

	Args:
	ment_emb (d'): Mention representation
	mem_vectors (M x d'): Cluster representations
	ent_counter (M): Mention counter of clusters.
	feature_embs (M x p): Embedding of features such as distance from last
	mention of the cluster.

	Returns:
	coref_new_score (M + 1):
	Coref scores concatenated with the score of forming a new cluster.
	"""

	# Repeat the query vector for comparison against all cells
	num_ents = mem_vectors.shape[0]
	rep_ment_emb = ment_emb.repeat(num_ents, 1) # M x H

	# Coref Score
	if self.config.sim_func == "endpoint":
	pair_vec = torch.cat([mem_vectors, rep_ment_emb, feature_embs], dim=-1)
	pair_score = self.mem_coref_mlp(pair_vec)

	if self.config.type == "hybrid":
	## Adding pairwise similarity with initial memory
	pair_vec_init = torch.cat(
	[mem_vectors_init, rep_ment_emb, feature_embs], dim=-1
	)
	pair_score_init = self.mem_coref_mlp(pair_vec_init)
	pair_score = pair_score + pair_score_init

	else:

	## Pairwise similarity score generated with mem. mem is dynamic when type is not static
	pair_vec = torch.cat(
	[mem_vectors, rep_ment_emb, mem_vectors * rep_ment_emb, feature_embs],
	dim=-1,
	)

	pair_score = self.mem_coref_mlp(pair_vec)

	if self.config.type == "hybrid":
	## Adding pairwise similarity with initial memory
	pair_vec_init = torch.cat(
	[
	mem_vectors_init,
	rep_ment_emb,
	mem_vectors_init * rep_ment_emb,
	feature_embs,
	],
	dim=-1,
	)
	pair_score_init = self.mem_coref_mlp(pair_vec_init) ## Static score
	pair_score = (
	pair_score + pair_score_init
	) ## Similarity score with current repr. and initial repr.

	coref_score = torch.squeeze(pair_score, dim=-1) # M

	coref_new_mask = torch.cat(
	[self.get_coref_mask(ent_counter), torch.tensor([1.0], device=self.device)],
	dim=0,
	)

	# Use a dummy score of 0 for froming a new cluster
	# print("Threshold: ", self.config.thresh)
	coref_new_score = torch.cat(
	([coref_score, torch.tensor([self.config.thresh], device=self.device)]),
	dim=0,
	)
	coref_new_score = coref_new_score * coref_new_mask + (1 - coref_new_mask) * (
	-1e4
	)

	return coref_new_score

	@staticmethod
	def assign_cluster_tensorized(coref_new_scores: Tensor) -> Tuple[int, str]:
	"""Decode the action from argmax of clustering scores"""
	## coref_new_scores : [B,E+1]
	num_ents = coref_new_scores.shape[-1] - 1
	pred_max_idx = torch.argmax(coref_new_scores, dim=-1).tolist() ## [B]
	action_str = ["c" if idx < num_ents else "o" for idx in pred_max_idx]
	return zip(pred_max_idx, action_str)

	@staticmethod
	def assign_cluster(coref_new_scores: Tensor) -> Tuple[int, str]:
	"""Decode the action from argmax of clustering scores"""

	num_ents = coref_new_scores.shape[0] - 1
	pred_max_idx = torch.argmax(coref_new_scores).item()
	if pred_max_idx < num_ents:
	# Coref
	return pred_max_idx, "c"
	else:
	# New cluster
	return num_ents, "o"

	def coref_update(
	self, ment_emb: Tensor, mem_vectors: Tensor, cell_idx: int, ent_counter: Tensor
	) -> Tensor:
	"""Updates the cluster representation given the new mention representation."""

	if self.config.entity_rep == "learned_avg":
	alpha_wt = torch.sigmoid(
	self.alpha(torch.cat([mem_vectors[cell_idx], ment_emb], dim=0))
	)
	coref_vec = alpha_wt * mem_vectors[cell_idx] + (1 - alpha_wt) * ment_emb
	elif self.config.entity_rep == "max":
	coref_vec = torch.max(mem_vectors[cell_idx], ment_emb)
	else:
	cluster_count = ent_counter[cell_idx].item()
	coref_vec = (mem_vectors[cell_idx] * cluster_count + ment_emb) / (
	cluster_count + 1
	)

	return coref_vec