Spaces:

Ahren09
/

AgentReview

Running

AgentReview / demo.py

Yiqiao Jin

Update demo

53709ed 4 months ago

6.16 kB

	#!/usr/bin/env python
	# coding: utf-8

	# # AgentReview
	#
	#
	#
	# In this tutorial, you will explore customizing the AgentReview experiment.
	#
	# 📑 Venue: EMNLP 2024 (Oral)
	#
	# 🔗 arXiv: [https://arxiv.org/abs/2406.12708](https://arxiv.org/abs/2406.12708)
	#
	# 🌐 Website: [https://agentreview.github.io/](https://agentreview.github.io/)
	#
	# ```bibtex
	# @inproceedings{jin2024agentreview,
	# title={AgentReview: Exploring Peer Review Dynamics with LLM Agents},
	# author={Jin, Yiqiao and Zhao, Qinlin and Wang, Yiyang and Chen, Hao and Zhu, Kaijie and Xiao, Yijia and Wang, Jindong},
	# booktitle={EMNLP},
	# year={2024}
	# }
	# ```
	#

	# In[2]:


	import os

	import numpy as np

	from agentreview import const

	os.environ["OPENAI_API_VERSION"] = "2024-06-01-preview"


	# ## Overview
	#
	# AgentReview features a range of customizable variables, such as characteristics of reviewers, authors, area chairs (ACs), as well as the reviewing mechanisms

	# In[3]:



	# ## Review Pipeline
	#
	# The simulation adopts a structured, 5-phase pipeline (Section 2 in the [paper](https://arxiv.org/abs/2406.12708)):
	#
	# * I. Reviewer Assessment. Each manuscript is evaluated by three reviewers independently.
	# * II. Author-Reviewer Discussion. Authors submit rebuttals to address reviewers' concerns;
	# * III. Reviewer-AC Discussion. The AC facilitates discussions among reviewers, prompting updates to their initial assessments.
	# * IV. Meta-Review Compilation. The AC synthesizes the discussions into a meta-review.
	# * V. Paper Decision. The AC makes the final decision on whether to accept or reject the paper, based on all gathered inputs.

	# In[2]:



	# In[4]:


	import os

	if os.path.basename(os.getcwd()) == "notebooks":
	os.chdir("..")
	# Change the working directory to AgentReview
	print(f"Changing the current working directory to {os.path.basename(os.getcwd())}")


	# In[5]:


	from argparse import Namespace

	args = Namespace(openai_key=None,
	deployment=None,
	openai_client_type='azure_openai',
	endpoint=None,
	api_version='2023-05-15',
	ac_scoring_method='ranking',
	conference='ICLR2024',
	num_reviewers_per_paper=3,
	ignore_missing_metareviews=False,
	overwrite=False,
	num_papers_per_area_chair=10,
	model_name='gpt-4o',
	output_dir='outputs',
	max_num_words=16384,
	visual_dir='outputs/visual',
	device='cuda',
	data_dir='./data', # Directory to all paper PDF
	acceptance_rate=0.32,
	task='paper_review')

	os.environ['OPENAI_API_VERSION'] = args.api_version

	# In[13]:


	malicious_Rx1_setting = {
	"AC": [
	"BASELINE"
	],

	"reviewer": [
	"malicious",
	"BASELINE",
	"BASELINE"
	],

	"author": [
	"BASELINE"
	],
	"global_settings":{
	"provides_numeric_rating": ['reviewer', 'ac'],
	"persons_aware_of_authors_identities": []
	}
	}

	all_settings = {"malicious_Rx1": malicious_Rx1_setting}
	args.experiment_name = "malicious_Rx1_setting"


	#
	# `malicious_Rx1` means 1 reviewer is a malicious reviewer, and the other reviewers are default (i.e. `BASELINE`) reviewers.
	#
	#

	# ## Reviews
	#
	# Define the review pipeline

	# In[10]:


	from agentreview.environments import PaperReview

	def review_one_paper(paper_id, setting):
	paper_decision = paper_id2decision[paper_id]

	experiment_setting = get_experiment_settings(paper_id=paper_id,
	paper_decision=paper_decision,
	setting=setting)
	print(f"Paper ID: {paper_id} (Decision in {args.conference}: {paper_decision})")

	players = initialize_players(experiment_setting=experiment_setting, args=args)

	player_names = [player.name for player in players]

	env = PaperReview(player_names=player_names, paper_decision=paper_decision, paper_id=paper_id,
	args=args, experiment_setting=experiment_setting)

	arena = PaperReviewArena(players=players, environment=env, args=args)
	arena.launch_cli(interactive=False)


	# In[11]:


	import os
	import sys

	sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "agentreview")))

	from agentreview.paper_review_settings import get_experiment_settings
	from agentreview.paper_review_arena import PaperReviewArena
	from agentreview.utility.experiment_utils import initialize_players
	from agentreview.utility.utils import project_setup, get_paper_decision_mapping


	# In[14]:


	sampled_paper_ids = [39]

	paper_id2decision, paper_decision2ids = get_paper_decision_mapping(args.data_dir, args.conference)

	for paper_id in sampled_paper_ids:
	review_one_paper(paper_id, malicious_Rx1_setting)



	def run_paper_decision():
	args.task = "paper_decision"

	# Make sure the same set of papers always go through the same AC no matter which setting we choose
	NUM_PAPERS = len(const.year2paper_ids[args.conference])
	order = np.random.choice(range(NUM_PAPERS), size=NUM_PAPERS, replace=False)


	# Paper IDs we actually used in experiments
	experimental_paper_ids = []

	# For papers that have not been decided yet, load their metareviews
	metareviews = []
	print("Shuffling paper IDs")
	sampled_paper_ids = np.array(const.year2paper_ids[args.conference])[order]

	# Exclude papers that already have AC decisions
	existing_ac_decisions = load_llm_ac_decisions(output_dir=args.output_dir,
	conference=args.conference,
	model_name=args.model_name,
	ac_scoring_method=args.ac_scoring_method,
	experiment_name=args.experiment_name,
	num_papers_per_area_chair=args.num_papers_per_area_chair)

	sampled_paper_ids = [paper_id for paper_id in sampled_paper_ids if paper_id not in existing_ac_decisions]




	# In[ ]: