from sentence_transformers import SentenceTransformer import numpy as np import pandas as pd paper_df = pd.read_csv('anlp2025.tsv', names=["pid", "title"], sep="\t") assert len(paper_df) == 778 input_texts = [title for title in paper_df["title"].tolist()] assert input_texts[0] == "LLMのアテンションヘッドに着目したジェイルブレイク攻撃の分析と防御手法の提案" assert input_texts[-1] == "ニュース記事中の企業名のEntity LinkingにおけるQuestion Answeringを用いた曖昧性解消" model = SentenceTransformer("sbintuitions/sarashina-embedding-v1-1b") embeddings = model.encode(input_texts) assert embeddings.shape == (778, 1792) np.savez("anlp2025", embeddings)