AgentReview / demo.py
Yiqiao Jin
Update demo
53709ed
raw
history blame
6.16 kB
#!/usr/bin/env python
# coding: utf-8
# # AgentReview
#
#
#
# In this tutorial, you will explore customizing the AgentReview experiment.
#
# πŸ“‘ Venue: EMNLP 2024 (Oral)
#
# πŸ”— arXiv: [https://arxiv.org/abs/2406.12708](https://arxiv.org/abs/2406.12708)
#
# 🌐 Website: [https://agentreview.github.io/](https://agentreview.github.io/)
#
# ```bibtex
# @inproceedings{jin2024agentreview,
# title={AgentReview: Exploring Peer Review Dynamics with LLM Agents},
# author={Jin, Yiqiao and Zhao, Qinlin and Wang, Yiyang and Chen, Hao and Zhu, Kaijie and Xiao, Yijia and Wang, Jindong},
# booktitle={EMNLP},
# year={2024}
# }
# ```
#
# In[2]:
import os
import numpy as np
from agentreview import const
os.environ["OPENAI_API_VERSION"] = "2024-06-01-preview"
# ## Overview
#
# AgentReview features a range of customizable variables, such as characteristics of reviewers, authors, area chairs (ACs), as well as the reviewing mechanisms
# In[3]:
# ## Review Pipeline
#
# The simulation adopts a structured, 5-phase pipeline (Section 2 in the [paper](https://arxiv.org/abs/2406.12708)):
#
# * **I. Reviewer Assessment.** Each manuscript is evaluated by three reviewers independently.
# * **II. Author-Reviewer Discussion.** Authors submit rebuttals to address reviewers' concerns;
# * **III. Reviewer-AC Discussion.** The AC facilitates discussions among reviewers, prompting updates to their initial assessments.
# * **IV. Meta-Review Compilation.** The AC synthesizes the discussions into a meta-review.
# * **V. Paper Decision.** The AC makes the final decision on whether to accept or reject the paper, based on all gathered inputs.
# In[2]:
# In[4]:
import os
if os.path.basename(os.getcwd()) == "notebooks":
os.chdir("..")
# Change the working directory to AgentReview
print(f"Changing the current working directory to {os.path.basename(os.getcwd())}")
# In[5]:
from argparse import Namespace
args = Namespace(openai_key=None,
deployment=None,
openai_client_type='azure_openai',
endpoint=None,
api_version='2023-05-15',
ac_scoring_method='ranking',
conference='ICLR2024',
num_reviewers_per_paper=3,
ignore_missing_metareviews=False,
overwrite=False,
num_papers_per_area_chair=10,
model_name='gpt-4o',
output_dir='outputs',
max_num_words=16384,
visual_dir='outputs/visual',
device='cuda',
data_dir='./data', # Directory to all paper PDF
acceptance_rate=0.32,
task='paper_review')
os.environ['OPENAI_API_VERSION'] = args.api_version
# In[13]:
malicious_Rx1_setting = {
"AC": [
"BASELINE"
],
"reviewer": [
"malicious",
"BASELINE",
"BASELINE"
],
"author": [
"BASELINE"
],
"global_settings":{
"provides_numeric_rating": ['reviewer', 'ac'],
"persons_aware_of_authors_identities": []
}
}
all_settings = {"malicious_Rx1": malicious_Rx1_setting}
args.experiment_name = "malicious_Rx1_setting"
#
# `malicious_Rx1` means 1 reviewer is a malicious reviewer, and the other reviewers are default (i.e. `BASELINE`) reviewers.
#
#
# ## Reviews
#
# Define the review pipeline
# In[10]:
from agentreview.environments import PaperReview
def review_one_paper(paper_id, setting):
paper_decision = paper_id2decision[paper_id]
experiment_setting = get_experiment_settings(paper_id=paper_id,
paper_decision=paper_decision,
setting=setting)
print(f"Paper ID: {paper_id} (Decision in {args.conference}: {paper_decision})")
players = initialize_players(experiment_setting=experiment_setting, args=args)
player_names = [player.name for player in players]
env = PaperReview(player_names=player_names, paper_decision=paper_decision, paper_id=paper_id,
args=args, experiment_setting=experiment_setting)
arena = PaperReviewArena(players=players, environment=env, args=args)
arena.launch_cli(interactive=False)
# In[11]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "agentreview")))
from agentreview.paper_review_settings import get_experiment_settings
from agentreview.paper_review_arena import PaperReviewArena
from agentreview.utility.experiment_utils import initialize_players
from agentreview.utility.utils import project_setup, get_paper_decision_mapping
# In[14]:
sampled_paper_ids = [39]
paper_id2decision, paper_decision2ids = get_paper_decision_mapping(args.data_dir, args.conference)
for paper_id in sampled_paper_ids:
review_one_paper(paper_id, malicious_Rx1_setting)
def run_paper_decision():
args.task = "paper_decision"
# Make sure the same set of papers always go through the same AC no matter which setting we choose
NUM_PAPERS = len(const.year2paper_ids[args.conference])
order = np.random.choice(range(NUM_PAPERS), size=NUM_PAPERS, replace=False)
# Paper IDs we actually used in experiments
experimental_paper_ids = []
# For papers that have not been decided yet, load their metareviews
metareviews = []
print("Shuffling paper IDs")
sampled_paper_ids = np.array(const.year2paper_ids[args.conference])[order]
# Exclude papers that already have AC decisions
existing_ac_decisions = load_llm_ac_decisions(output_dir=args.output_dir,
conference=args.conference,
model_name=args.model_name,
ac_scoring_method=args.ac_scoring_method,
experiment_name=args.experiment_name,
num_papers_per_area_chair=args.num_papers_per_area_chair)
sampled_paper_ids = [paper_id for paper_id in sampled_paper_ids if paper_id not in existing_ac_decisions]
# In[ ]: