Spaces:

Ahren09
/

AgentReview

Running

File size: 6,160 Bytes

53709ed

#!/usr/bin/env python
# coding: utf-8

# # AgentReview
# 
# 
# 
# In this tutorial, you will explore customizing the AgentReview experiment.
# 
# 📑 Venue: EMNLP 2024 (Oral)
# 
# 🔗 arXiv: [https://arxiv.org/abs/2406.12708](https://arxiv.org/abs/2406.12708)
# 
# 🌐 Website: [https://agentreview.github.io/](https://agentreview.github.io/)
# 
# ```bibtex
# @inproceedings{jin2024agentreview,
#   title={AgentReview: Exploring Peer Review Dynamics with LLM Agents},
#   author={Jin, Yiqiao and Zhao, Qinlin and Wang, Yiyang and Chen, Hao and Zhu, Kaijie and Xiao, Yijia and Wang, Jindong},
#   booktitle={EMNLP},
#   year={2024}
# }
# ```
# 

# In[2]:


import os

import numpy as np

from agentreview import const

os.environ["OPENAI_API_VERSION"] = "2024-06-01-preview"


# ## Overview
# 
# AgentReview features a range of customizable variables, such as characteristics of reviewers, authors, area chairs (ACs), as well as the reviewing mechanisms 

# In[3]:



# ## Review Pipeline
# 
# The simulation adopts a structured, 5-phase pipeline (Section 2 in the [paper](https://arxiv.org/abs/2406.12708)):
# 
# * **I. Reviewer Assessment.** Each manuscript is evaluated by three reviewers independently.
# * **II. Author-Reviewer Discussion.** Authors submit rebuttals to address reviewers' concerns;
# * **III. Reviewer-AC Discussion.** The AC facilitates discussions among reviewers, prompting updates to their initial assessments.
# * **IV. Meta-Review Compilation.** The AC synthesizes the discussions into a meta-review.
# * **V. Paper Decision.** The AC makes the final decision on whether to accept or reject the paper, based on all gathered inputs.

# In[2]:



# In[4]:


import os

if os.path.basename(os.getcwd()) == "notebooks":
    os.chdir("..")
# Change the working directory to AgentReview
print(f"Changing the current working directory to {os.path.basename(os.getcwd())}")


# In[5]:


from argparse import Namespace

args = Namespace(openai_key=None, 
          deployment=None, 
          openai_client_type='azure_openai', 
          endpoint=None, 
          api_version='2023-05-15',
          ac_scoring_method='ranking', 
          conference='ICLR2024', 
          num_reviewers_per_paper=3,  
          ignore_missing_metareviews=False, 
          overwrite=False, 
          num_papers_per_area_chair=10, 
          model_name='gpt-4o', 
          output_dir='outputs', 
          max_num_words=16384, 
          visual_dir='outputs/visual', 
          device='cuda', 
          data_dir='./data', # Directory to all paper PDF
          acceptance_rate=0.32, 
          task='paper_review')

os.environ['OPENAI_API_VERSION'] = args.api_version

# In[13]:


malicious_Rx1_setting = {
    "AC": [
        "BASELINE"
    ],

    "reviewer": [
        "malicious",
        "BASELINE",
        "BASELINE"
    ],

    "author": [
        "BASELINE"
    ],
    "global_settings":{
        "provides_numeric_rating": ['reviewer', 'ac'],
        "persons_aware_of_authors_identities": []
    }
}

all_settings = {"malicious_Rx1": malicious_Rx1_setting}
args.experiment_name = "malicious_Rx1_setting"


# 
# `malicious_Rx1` means 1 reviewer is a malicious reviewer, and the other reviewers are default (i.e. `BASELINE`) reviewers.
# 
# 

# ## Reviews
# 
# Define the review pipeline

# In[10]:


from agentreview.environments import PaperReview

def review_one_paper(paper_id, setting):
    paper_decision = paper_id2decision[paper_id]

    experiment_setting = get_experiment_settings(paper_id=paper_id,
                                                 paper_decision=paper_decision,
                                                 setting=setting)
    print(f"Paper ID: {paper_id} (Decision in {args.conference}: {paper_decision})")

    players = initialize_players(experiment_setting=experiment_setting, args=args)

    player_names = [player.name for player in players]

    env = PaperReview(player_names=player_names, paper_decision=paper_decision, paper_id=paper_id,
                          args=args, experiment_setting=experiment_setting)

    arena = PaperReviewArena(players=players, environment=env, args=args)
    arena.launch_cli(interactive=False)


# In[11]:


import os
import sys

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "agentreview")))

from agentreview.paper_review_settings import get_experiment_settings
from agentreview.paper_review_arena import PaperReviewArena
from agentreview.utility.experiment_utils import initialize_players
from agentreview.utility.utils import project_setup, get_paper_decision_mapping


# In[14]:


sampled_paper_ids = [39]

paper_id2decision, paper_decision2ids = get_paper_decision_mapping(args.data_dir, args.conference)

for paper_id in sampled_paper_ids:
    review_one_paper(paper_id, malicious_Rx1_setting)



def run_paper_decision():
    args.task = "paper_decision"

    # Make sure the same set of papers always go through the same AC no matter which setting we choose
    NUM_PAPERS = len(const.year2paper_ids[args.conference])
    order = np.random.choice(range(NUM_PAPERS), size=NUM_PAPERS, replace=False)


    # Paper IDs we actually used in experiments
    experimental_paper_ids = []

    # For papers that have not been decided yet, load their metareviews
    metareviews = []
    print("Shuffling paper IDs")
    sampled_paper_ids = np.array(const.year2paper_ids[args.conference])[order]

    # Exclude papers that already have AC decisions
    existing_ac_decisions = load_llm_ac_decisions(output_dir=args.output_dir,
                                                             conference=args.conference,
                                                             model_name=args.model_name,
                                                             ac_scoring_method=args.ac_scoring_method,
                                                             experiment_name=args.experiment_name,
                                                             num_papers_per_area_chair=args.num_papers_per_area_chair)

    sampled_paper_ids = [paper_id for paper_id in sampled_paper_ids if paper_id not in existing_ac_decisions]




# In[ ]: