Spaces:
Running
Running
#!/usr/bin/env python | |
# coding: utf-8 | |
# # AgentReview | |
# | |
# | |
# | |
# In this tutorial, you will explore customizing the AgentReview experiment. | |
# | |
# π Venue: EMNLP 2024 (Oral) | |
# | |
# π arXiv: [https://arxiv.org/abs/2406.12708](https://arxiv.org/abs/2406.12708) | |
# | |
# π Website: [https://agentreview.github.io/](https://agentreview.github.io/) | |
# | |
# ```bibtex | |
# @inproceedings{jin2024agentreview, | |
# title={AgentReview: Exploring Peer Review Dynamics with LLM Agents}, | |
# author={Jin, Yiqiao and Zhao, Qinlin and Wang, Yiyang and Chen, Hao and Zhu, Kaijie and Xiao, Yijia and Wang, Jindong}, | |
# booktitle={EMNLP}, | |
# year={2024} | |
# } | |
# ``` | |
# | |
# In[2]: | |
import os | |
import numpy as np | |
from agentreview import const | |
os.environ["OPENAI_API_VERSION"] = "2024-06-01-preview" | |
# ## Overview | |
# | |
# AgentReview features a range of customizable variables, such as characteristics of reviewers, authors, area chairs (ACs), as well as the reviewing mechanisms | |
# In[3]: | |
# ## Review Pipeline | |
# | |
# The simulation adopts a structured, 5-phase pipeline (Section 2 in the [paper](https://arxiv.org/abs/2406.12708)): | |
# | |
# * **I. Reviewer Assessment.** Each manuscript is evaluated by three reviewers independently. | |
# * **II. Author-Reviewer Discussion.** Authors submit rebuttals to address reviewers' concerns; | |
# * **III. Reviewer-AC Discussion.** The AC facilitates discussions among reviewers, prompting updates to their initial assessments. | |
# * **IV. Meta-Review Compilation.** The AC synthesizes the discussions into a meta-review. | |
# * **V. Paper Decision.** The AC makes the final decision on whether to accept or reject the paper, based on all gathered inputs. | |
# In[2]: | |
# In[4]: | |
import os | |
if os.path.basename(os.getcwd()) == "notebooks": | |
os.chdir("..") | |
# Change the working directory to AgentReview | |
print(f"Changing the current working directory to {os.path.basename(os.getcwd())}") | |
# In[5]: | |
from argparse import Namespace | |
args = Namespace(openai_key=None, | |
deployment=None, | |
openai_client_type='azure_openai', | |
endpoint=None, | |
api_version='2023-05-15', | |
ac_scoring_method='ranking', | |
conference='ICLR2024', | |
num_reviewers_per_paper=3, | |
ignore_missing_metareviews=False, | |
overwrite=False, | |
num_papers_per_area_chair=10, | |
model_name='gpt-4o', | |
output_dir='outputs', | |
max_num_words=16384, | |
visual_dir='outputs/visual', | |
device='cuda', | |
data_dir='./data', # Directory to all paper PDF | |
acceptance_rate=0.32, | |
task='paper_review') | |
os.environ['OPENAI_API_VERSION'] = args.api_version | |
# In[13]: | |
malicious_Rx1_setting = { | |
"AC": [ | |
"BASELINE" | |
], | |
"reviewer": [ | |
"malicious", | |
"BASELINE", | |
"BASELINE" | |
], | |
"author": [ | |
"BASELINE" | |
], | |
"global_settings":{ | |
"provides_numeric_rating": ['reviewer', 'ac'], | |
"persons_aware_of_authors_identities": [] | |
} | |
} | |
all_settings = {"malicious_Rx1": malicious_Rx1_setting} | |
args.experiment_name = "malicious_Rx1_setting" | |
# | |
# `malicious_Rx1` means 1 reviewer is a malicious reviewer, and the other reviewers are default (i.e. `BASELINE`) reviewers. | |
# | |
# | |
# ## Reviews | |
# | |
# Define the review pipeline | |
# In[10]: | |
from agentreview.environments import PaperReview | |
def review_one_paper(paper_id, setting): | |
paper_decision = paper_id2decision[paper_id] | |
experiment_setting = get_experiment_settings(paper_id=paper_id, | |
paper_decision=paper_decision, | |
setting=setting) | |
print(f"Paper ID: {paper_id} (Decision in {args.conference}: {paper_decision})") | |
players = initialize_players(experiment_setting=experiment_setting, args=args) | |
player_names = [player.name for player in players] | |
env = PaperReview(player_names=player_names, paper_decision=paper_decision, paper_id=paper_id, | |
args=args, experiment_setting=experiment_setting) | |
arena = PaperReviewArena(players=players, environment=env, args=args) | |
arena.launch_cli(interactive=False) | |
# In[11]: | |
import os | |
import sys | |
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "agentreview"))) | |
from agentreview.paper_review_settings import get_experiment_settings | |
from agentreview.paper_review_arena import PaperReviewArena | |
from agentreview.utility.experiment_utils import initialize_players | |
from agentreview.utility.utils import project_setup, get_paper_decision_mapping | |
# In[14]: | |
sampled_paper_ids = [39] | |
paper_id2decision, paper_decision2ids = get_paper_decision_mapping(args.data_dir, args.conference) | |
for paper_id in sampled_paper_ids: | |
review_one_paper(paper_id, malicious_Rx1_setting) | |
def run_paper_decision(): | |
args.task = "paper_decision" | |
# Make sure the same set of papers always go through the same AC no matter which setting we choose | |
NUM_PAPERS = len(const.year2paper_ids[args.conference]) | |
order = np.random.choice(range(NUM_PAPERS), size=NUM_PAPERS, replace=False) | |
# Paper IDs we actually used in experiments | |
experimental_paper_ids = [] | |
# For papers that have not been decided yet, load their metareviews | |
metareviews = [] | |
print("Shuffling paper IDs") | |
sampled_paper_ids = np.array(const.year2paper_ids[args.conference])[order] | |
# Exclude papers that already have AC decisions | |
existing_ac_decisions = load_llm_ac_decisions(output_dir=args.output_dir, | |
conference=args.conference, | |
model_name=args.model_name, | |
ac_scoring_method=args.ac_scoring_method, | |
experiment_name=args.experiment_name, | |
num_papers_per_area_chair=args.num_papers_per_area_chair) | |
sampled_paper_ids = [paper_id for paper_id in sampled_paper_ids if paper_id not in existing_ac_decisions] | |
# In[ ]: | |