from sentence_transformers import SentenceTransformer
import numpy as np
import pandas as pd


paper_df = pd.read_csv('anlp2025.tsv', names=["pid", "title"], sep="\t")
assert len(paper_df) == 778

input_texts = [title for title in paper_df["title"].tolist()]
assert input_texts[0] == "LLMのアテンションヘッドに着目したジェイルブレイク攻撃の分析と防御手法の提案"
assert input_texts[-1] == "ニュース記事中の企業名のEntity LinkingにおけるQuestion Answeringを用いた曖昧性解消"

model = SentenceTransformer("sbintuitions/sarashina-embedding-v1-1b")
embeddings = model.encode(input_texts)
assert embeddings.shape == (778, 1792)

np.savez("anlp2025", embeddings)