Spaces:

awsaf49
/

sonics-fake-song-detection

Running

App Files Files Community

sonics-fake-song-detection / sonics /utils /perf.py

awsaf49

Initial Commit

3f50570 14 days ago

raw

history blame contribute delete

3.48 kB

	import time
	import torch
	import pandas as pd
	from fvcore.nn import FlopCountAnalysis, ActivationCountAnalysis


	def profile_model(model, input_tensor, display=False):
	flops = calculate_flops(model, input_tensor[0:1, ...]) # (1, n_mels, n_frames)
	acts = calculate_activations(model, input_tensor[0:1, ...]) # (1, n_mels, n_frames)
	params = calculate_params(model)
	speed = calculate_speed(model, input_tensor[0:1, ...]) # (1, n_mels, n_frames)
	memory = calculate_memory(model, input_tensor) # (B, n_mels, n_frames)
	profile_data = {
	"Metric": [
	"FLOPs (G)",
	"Activations (M)",
	"Params (M)",
	"Memory (GB)",
	"Speed (A/S)",
	],
	"Value": [flops, acts, params, memory, speed],
	}
	profile_df = pd.DataFrame(profile_data).set_index("Metric").T
	if display:
	print(profile_df.to_markdown(index=False, tablefmt="grid"))
	return profile_df


	def calculate_speed(model, input_tensor, num_runs=100, warmup_runs=5):
	model.eval()

	if torch.cuda.is_available():
	# Warm-up iterations
	with torch.no_grad():
	for _ in range(warmup_runs):
	_ = model(input_tensor)

	# Create CUDA events for timing
	start = torch.cuda.Event(enable_timing=True)
	end = torch.cuda.Event(enable_timing=True)

	# Actual timing
	start.record()
	with torch.no_grad():
	for _ in range(num_runs):
	_ = model(input_tensor)
	end.record()

	# Synchronize to wait for the events to be recorded
	torch.cuda.synchronize()

	# Calculate elapsed time
	elapsed_time = start.elapsed_time(end) # in milliseconds
	latency = elapsed_time / num_runs / 1000.0 # convert to seconds
	else:
	# Warm-up iterations
	with torch.no_grad():
	for _ in range(warmup_runs):
	_ = model(input_tensor)

	# Actual timing
	start = time.time()
	with torch.no_grad():
	for _ in range(num_runs):
	_ = model(input_tensor)
	end = time.time()

	# Calculate elapsed time
	latency = (end - start) / num_runs

	return 1.0 / latency


	def calculate_flops(model, input_tensor):
	"""Calculate FLOPs in GigaFLOPs.
	Models often reports MACs as FLOPs e.g. ConvNeXt, timm library
	Reference:
	1. https://github.com/huggingface/pytorch-image-models/blob/main/benchmark.py#L206
	2. https://github.com/facebookresearch/fvcore/issues/69
	"""
	flops = FlopCountAnalysis(model, input_tensor).total()
	return flops / 1e9 # in GigaFLOPs


	def calculate_activations(model, input_tensor):
	acts = ActivationCountAnalysis(model, input_tensor).total()
	return acts / 1e6 # in Millions


	def calculate_params(model):
	return sum(p.numel() for p in model.parameters()) / 1e6 # in Millions


	def calculate_memory(model, input_tensor):
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	torch.cuda.reset_peak_memory_stats(device=None)
	start_memory = torch.cuda.max_memory_allocated(device=None)
	model.train()
	_ = model(input_tensor)
	end_memory = torch.cuda.max_memory_allocated(device=None)
	torch.cuda.empty_cache()
	torch.cuda.reset_peak_memory_stats(device=None)
	memory = (end_memory - start_memory) / (1024**3) # in GB
	else:
	memory = 0
	return memory