Spaces:
Running
Running
File size: 6,160 Bytes
53709ed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
#!/usr/bin/env python
# coding: utf-8
# # AgentReview
#
#
#
# In this tutorial, you will explore customizing the AgentReview experiment.
#
# π Venue: EMNLP 2024 (Oral)
#
# π arXiv: [https://arxiv.org/abs/2406.12708](https://arxiv.org/abs/2406.12708)
#
# π Website: [https://agentreview.github.io/](https://agentreview.github.io/)
#
# ```bibtex
# @inproceedings{jin2024agentreview,
# title={AgentReview: Exploring Peer Review Dynamics with LLM Agents},
# author={Jin, Yiqiao and Zhao, Qinlin and Wang, Yiyang and Chen, Hao and Zhu, Kaijie and Xiao, Yijia and Wang, Jindong},
# booktitle={EMNLP},
# year={2024}
# }
# ```
#
# In[2]:
import os
import numpy as np
from agentreview import const
os.environ["OPENAI_API_VERSION"] = "2024-06-01-preview"
# ## Overview
#
# AgentReview features a range of customizable variables, such as characteristics of reviewers, authors, area chairs (ACs), as well as the reviewing mechanisms
# In[3]:
# ## Review Pipeline
#
# The simulation adopts a structured, 5-phase pipeline (Section 2 in the [paper](https://arxiv.org/abs/2406.12708)):
#
# * **I. Reviewer Assessment.** Each manuscript is evaluated by three reviewers independently.
# * **II. Author-Reviewer Discussion.** Authors submit rebuttals to address reviewers' concerns;
# * **III. Reviewer-AC Discussion.** The AC facilitates discussions among reviewers, prompting updates to their initial assessments.
# * **IV. Meta-Review Compilation.** The AC synthesizes the discussions into a meta-review.
# * **V. Paper Decision.** The AC makes the final decision on whether to accept or reject the paper, based on all gathered inputs.
# In[2]:
# In[4]:
import os
if os.path.basename(os.getcwd()) == "notebooks":
os.chdir("..")
# Change the working directory to AgentReview
print(f"Changing the current working directory to {os.path.basename(os.getcwd())}")
# In[5]:
from argparse import Namespace
args = Namespace(openai_key=None,
deployment=None,
openai_client_type='azure_openai',
endpoint=None,
api_version='2023-05-15',
ac_scoring_method='ranking',
conference='ICLR2024',
num_reviewers_per_paper=3,
ignore_missing_metareviews=False,
overwrite=False,
num_papers_per_area_chair=10,
model_name='gpt-4o',
output_dir='outputs',
max_num_words=16384,
visual_dir='outputs/visual',
device='cuda',
data_dir='./data', # Directory to all paper PDF
acceptance_rate=0.32,
task='paper_review')
os.environ['OPENAI_API_VERSION'] = args.api_version
# In[13]:
malicious_Rx1_setting = {
"AC": [
"BASELINE"
],
"reviewer": [
"malicious",
"BASELINE",
"BASELINE"
],
"author": [
"BASELINE"
],
"global_settings":{
"provides_numeric_rating": ['reviewer', 'ac'],
"persons_aware_of_authors_identities": []
}
}
all_settings = {"malicious_Rx1": malicious_Rx1_setting}
args.experiment_name = "malicious_Rx1_setting"
#
# `malicious_Rx1` means 1 reviewer is a malicious reviewer, and the other reviewers are default (i.e. `BASELINE`) reviewers.
#
#
# ## Reviews
#
# Define the review pipeline
# In[10]:
from agentreview.environments import PaperReview
def review_one_paper(paper_id, setting):
paper_decision = paper_id2decision[paper_id]
experiment_setting = get_experiment_settings(paper_id=paper_id,
paper_decision=paper_decision,
setting=setting)
print(f"Paper ID: {paper_id} (Decision in {args.conference}: {paper_decision})")
players = initialize_players(experiment_setting=experiment_setting, args=args)
player_names = [player.name for player in players]
env = PaperReview(player_names=player_names, paper_decision=paper_decision, paper_id=paper_id,
args=args, experiment_setting=experiment_setting)
arena = PaperReviewArena(players=players, environment=env, args=args)
arena.launch_cli(interactive=False)
# In[11]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "agentreview")))
from agentreview.paper_review_settings import get_experiment_settings
from agentreview.paper_review_arena import PaperReviewArena
from agentreview.utility.experiment_utils import initialize_players
from agentreview.utility.utils import project_setup, get_paper_decision_mapping
# In[14]:
sampled_paper_ids = [39]
paper_id2decision, paper_decision2ids = get_paper_decision_mapping(args.data_dir, args.conference)
for paper_id in sampled_paper_ids:
review_one_paper(paper_id, malicious_Rx1_setting)
def run_paper_decision():
args.task = "paper_decision"
# Make sure the same set of papers always go through the same AC no matter which setting we choose
NUM_PAPERS = len(const.year2paper_ids[args.conference])
order = np.random.choice(range(NUM_PAPERS), size=NUM_PAPERS, replace=False)
# Paper IDs we actually used in experiments
experimental_paper_ids = []
# For papers that have not been decided yet, load their metareviews
metareviews = []
print("Shuffling paper IDs")
sampled_paper_ids = np.array(const.year2paper_ids[args.conference])[order]
# Exclude papers that already have AC decisions
existing_ac_decisions = load_llm_ac_decisions(output_dir=args.output_dir,
conference=args.conference,
model_name=args.model_name,
ac_scoring_method=args.ac_scoring_method,
experiment_name=args.experiment_name,
num_papers_per_area_chair=args.num_papers_per_area_chair)
sampled_paper_ids = [paper_id for paper_id in sampled_paper_ids if paper_id not in existing_ac_decisions]
# In[ ]:
|