Эх сурвалжийг харах

remove unused swebench scripts

Xingyao Wang 1 жил өмнө
parent
commit
7b6ae3638e

+ 0 - 39
evaluation/swe_bench/BUILD_TESTBED_AND_ENV.md

@@ -1,39 +0,0 @@
-# Pre-build Testbed and Env
-
-In the original SWE-Bench implementation, conda environment for evaluation is typically installed from scratch while evaluating on a particular instance. This poses several challenges:
-
-- Efficiency: most time of evaluation will be wasted on downloading packages
-- Stability: setup could failed due to bad internet connectivity
-- Reliability: it is possible that an instance is considered failed not because the agent did badly, but because the environment setup failed.
-
-In OpenDevin-SWE-Bench fork, we try to pre-build the **testbed** (i.e., code of the repository we want the agent to edit) AND the **conda environment**, so that in evaluation (inference) time, we can directly leverage existing environments for efficient evaluation.
-
-NOTE: We only support SWE-Bench lite for now. But modifying our existing scripts for full SWE-Bench should be quite straight forward.
-
-## How to pre-build your testbed
-
-### Setup Eval Workspace (Util + Data)
-
-Setup your eval workspace by:
-1. Clone OpenDevin SWE-Bench [fork](https://github.com/OpenDevin/OD-SWE-bench.git)
-2. Prepare SWE-Bench data
-
-Run the following command to do the above two steps. The results will be saved to `evaluation/SWE-bench/eval_workspace`.
-
-```bash
-./evaluation/swe_bench/scripts/setup/prepare_swe_utils.sh
-```
-
-### Pre-build Conda Env and Test Bed
-
-```bash
-./evaluation/swe_bench/scripts/setup/swe_env_setup.sh
-```
-
-### Build the pre-build conda env and testbed into ONE docker image
-
-```bash
-pushd evaluation/swe_bench
-docker build -t ghcr.io/opendevin/eval-swe-bench:full-v1.2.1 -f ./scripts/docker/Dockerfile.full.v1.1 .
-docker push ghcr.io/opendevin/eval-swe-bench:full-v1.2.1
-```

+ 0 - 17
evaluation/swe_bench/scripts/docker/Dockerfile.builder

@@ -1,17 +0,0 @@
-FROM ghcr.io/opendevin/sandbox:main
-
-RUN apt-get update && \
-    apt-get install -y libffi-dev bash gcc git jq wget pkg-config libfreetype-dev libfreetype6 libfreetype6-dev rsync && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN ln -sfn /bin/bash /bin/sh
-RUN mkdir -p /opendevin/logs && chmod 777 /opendevin/logs
-
-# Setup Git
-RUN git config --global user.email "swebench@swebench.ai"
-RUN git config --global user.name "swebench"
-
-CMD ["/bin/bash"]
-# pushd evaluation/swe_bench
-# docker build -t ghcr.io/opendevin/eval-swe-bench:builder -f ./scripts/docker/Dockerfile.builder .

+ 0 - 19
evaluation/swe_bench/scripts/docker/Dockerfile.builder_with_conda

@@ -1,19 +0,0 @@
-FROM ghcr.io/opendevin/eval-swe-bench:builder
-
-# # Install Mamba/Conda
-RUN wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
-# install to /opt/miniforge3
-RUN mkdir /swe_util
-RUN bash Miniforge3-$(uname)-$(uname -m).sh -b -p /swe_util/miniforge3
-RUN export PATH=/swe_util/miniforge3/bin:$PATH
-RUN /swe_util/miniforge3/bin/mamba init bash
-
-# Setup SWE-Bench Eval Env
-RUN /bin/bash -c "/swe_util/miniforge3/bin/mamba create -n swe-bench-eval python==3.11.5 -y"
-RUN /bin/bash -c ". /swe_util/miniforge3/etc/profile.d/conda.sh && conda activate swe-bench-eval && \
-pip install requests python-dotenv GitPython datasets pandas beautifulsoup4 ghapi"
-RUN /bin/bash -c ". /swe_util/miniforge3/etc/profile.d/conda.sh && conda config --set changeps1 False && conda config --append channels conda-forge"
-
-CMD ["/bin/bash"]
-# pushd evaluation/swe_bench
-# docker build -t ghcr.io/opendevin/eval-swe-bench:builder_with_conda -f ./scripts/docker/Dockerfile.builder_with_conda .

+ 0 - 13
evaluation/swe_bench/scripts/docker/Dockerfile.full.v1.1

@@ -1,13 +0,0 @@
-FROM ghcr.io/opendevin/eval-swe-bench:full_deps
-
-# ================== COPY Smaller things ==================
-# copy everything except the folder of `eval_data` or `miniforge3`
-# typically, this should be the OD codebase
-RUN --mount=type=bind,source=./eval_workspace,target=/eval_workspace \
-    rsync -ar --progress \
-    --exclude='eval_data' \
-    --exclude='miniforge3' \
-    /eval_workspace/ /swe_util/
-
-# pushd evaluation/SWE-bench
-# docker build -t ghcr.io/opendevin/eval-swe-bench:full-v1.1 -f ./scripts/docker/Dockerfile.full.v1.1 .

+ 0 - 12
evaluation/swe_bench/scripts/docker/Dockerfile.full.v1.2

@@ -1,12 +0,0 @@
-FROM ghcr.io/opendevin/eval-swe-bench:full-v1.1
-
-RUN apt-get update && apt-get install -y \
-    libgl1-mesa-glx \
-    && rm -rf /var/lib/apt/lists/*
-
-# install basic dependencies for CodeActAgent
-RUN pip3 install --upgrade pip
-RUN pip3 install jupyterlab notebook jupyter_kernel_gateway flake8
-# TODO: those dependencies are needed for agentskills, we should pack them in a new sandbox image
-RUN pip3 install python-docx PyPDF2 python-pptx pylatexenc openai opencv-python
-# docker build -t ghcr.io/opendevin/eval-swe-bench:full-v1.2 -f ./scripts/docker/Dockerfile.full.v1.2 .

+ 0 - 10
evaluation/swe_bench/scripts/docker/Dockerfile.full.v1.2.1

@@ -1,10 +0,0 @@
-FROM ghcr.io/opendevin/eval-swe-bench:full-v1.2
-
-# ================== Update OD-SWE-Bench ==================
-# copy everything except the folder of `eval_data` or `miniforge3`
-# typically, this should be the OD codebase
-RUN --mount=type=bind,source=./eval_workspace,target=/eval_workspace \
-    rsync -ar --progress /eval_workspace/OD-SWE-bench/ /swe_util/OD-SWE-bench
-
-# pushd evaluation/SWE-bench
-# docker build -t ghcr.io/opendevin/eval-swe-bench:full-v1.2.1 -f ./scripts/docker/Dockerfile.full.v1.2.1 .

+ 0 - 72
evaluation/swe_bench/scripts/docker/Dockerfile.full_deps

@@ -1,72 +0,0 @@
-FROM ghcr.io/opendevin/eval-swe-bench:builder
-
-# This Dockerfile is used to build the Docker image for the evaluation of the SWE-Bench.
-# YOU SHOULD ENSURE ./eval_workspace CONTAINS THE EVALUATION WORKSPACE (testbed, conda)
-# Check BUILD_TESTBED_AND_ENV.md for more details.
-
-RUN mkdir -p /swe_util
-
-# Use https://github.com/moby/moby/issues/15771#issuecomment-1762893340
-# to copy files from host to container with --exclude
-
-# # ================== Prepare Eval Data ==================
-# Copy everything in eval_data except the "testbeds"
-RUN --mount=type=bind,source=./eval_workspace,target=/eval_workspace \
-    rsync -ar --progress \
-    --exclude='testbeds' \
-    /eval_workspace/eval_data /swe_util/
-
-RUN --mount=type=bind,source=./eval_workspace,target=/eval_workspace \
-    rsync -ar --progress \
-    --exclude='matplotlib*' \
-    --exclude='scikit-learn*' \
-    /eval_workspace/eval_data/testbeds /swe_util/eval_data/
-
-# # copy the larger ones in separate layers
-# COPY ./eval_workspace/eval_data/testbeds/matplotlib* /swe_util/eval_data/testbeds/
-RUN --mount=type=bind,source=./eval_workspace,target=/eval_workspace \
-    rsync -ar --progress \
-    /eval_workspace/eval_data/testbeds/matplotlib* /swe_util/eval_data/testbeds/
-
-# COPY ./eval_workspace/eval_data/testbeds/scikit-learn* /swe_util/eval_data/testbeds/
-RUN --mount=type=bind,source=./eval_workspace,target=/eval_workspace \
-    rsync -ar --progress \
-    /eval_workspace/eval_data/testbeds/scikit-learn* /swe_util/eval_data/testbeds/
-
-# ================== Prepare Miniconda3 ==================
-# Copy the Miniconda3 environment
-# copy everything except the folder of `envs` & `pkgs` (two large folders)
-RUN --mount=type=bind,source=./eval_workspace,target=/eval_workspace \
-    rsync -ar --progress \
-    --exclude='envs' \
-    --exclude='pkgs' \
-    /eval_workspace/miniforge3 /swe_util/
-
-# copy pkgs in separate layers (~9.4GB)
-RUN --mount=type=bind,source=./eval_workspace,target=/eval_workspace \
-    rsync -ar --progress \
-    /eval_workspace/miniforge3/pkgs /swe_util/miniforge3/
-
-# copy envs in separate layers (except matplotlib & scikit-learn - larger ones)
-RUN --mount=type=bind,source=./eval_workspace,target=/eval_workspace \
-    rsync -ar --progress \
-    --exclude='matplotlib*' \
-    --exclude='scikit-learn*' \
-    --exclude='pydata*' \
-    /eval_workspace/miniforge3/envs /swe_util/miniforge3/
-
-# copy the larger ones in separate layers
-RUN --mount=type=bind,source=./eval_workspace,target=/eval_workspace \
-    rsync -ar --progress \
-    /eval_workspace/miniforge3/envs/matplotlib* /swe_util/miniforge3/envs/
-
-RUN --mount=type=bind,source=./eval_workspace,target=/eval_workspace \
-    rsync -ar --progress \
-    /eval_workspace/miniforge3/envs/scikit-learn* /swe_util/miniforge3/envs/
-
-RUN --mount=type=bind,source=./eval_workspace,target=/eval_workspace \
-    rsync -ar --progress \
-    /eval_workspace/miniforge3/envs/pydata* /swe_util/miniforge3/envs/
-
-# pushd evaluation/SWE-bench
-# docker build -t ghcr.io/opendevin/eval-swe-bench:full_deps -f ./scripts/docker/Dockerfile.full_deps .

+ 0 - 13
evaluation/swe_bench/scripts/docker/README.md

@@ -1,13 +0,0 @@
-# Docker Build Guide
-
-## Builder
-
-This constructs docker container used for `evaluation/swe_bench/scripts/prepare_swe_utils.sh` that downloads the datasets.
-
-```bash
-pushd evaluation/swe_bench
-# This builds base image with basic dependencies
-docker build -t ghcr.io/opendevin/eval-swe-bench:builder -f ./scripts/docker/Dockerfile.builder .
-# This builds image with SWE-Bench conda environment pre-installed
-docker build -t ghcr.io/opendevin/eval-swe-bench:builder_with_conda -f ./scripts/docker/Dockerfile.builder_with_conda .
-```

+ 0 - 34
evaluation/swe_bench/scripts/eval/download_swe_bench_data.py

@@ -1,34 +0,0 @@
-import argparse
-import json
-
-import pandas as pd
-from datasets import load_dataset
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    'output_dir',
-    type=str,
-    default='eval_data/instances',
-    help='Path to the directory to save the instances.',
-)
-args = parser.parse_args()
-
-dataset = load_dataset('princeton-nlp/SWE-bench')
-test = dataset['test'].to_pandas()
-test['FAIL_TO_PASS'] = test['FAIL_TO_PASS'].apply(json.loads)
-test['PASS_TO_PASS'] = test['PASS_TO_PASS'].apply(json.loads)
-test.to_json(f'{args.output_dir}/swe-bench-test.json', orient='records')
-
-dataset = load_dataset('princeton-nlp/SWE-bench_Lite')
-test = dataset['test'].to_pandas()
-test['FAIL_TO_PASS'] = test['FAIL_TO_PASS'].apply(json.loads)
-test['PASS_TO_PASS'] = test['PASS_TO_PASS'].apply(json.loads)
-test.to_json(f'{args.output_dir}/swe-bench-lite-test.json', orient='records')
-
-dev = dataset['dev'].to_pandas()
-dev['FAIL_TO_PASS'] = dev['FAIL_TO_PASS'].apply(json.loads)
-dev['PASS_TO_PASS'] = dev['PASS_TO_PASS'].apply(json.loads)
-dev.to_json(f'{args.output_dir}/swe-bench-lite-dev.json', orient='records')
-
-all_data = pd.concat([test, dev])
-all_data.to_json(f'{args.output_dir}/swe-bench-lite-all.json', orient='records')

+ 0 - 81
evaluation/swe_bench/scripts/setup/_swe_env_setup.sh

@@ -1,81 +0,0 @@
-#!/bin/bash
-# THIS SCRIPT ONLY NEED TO BE RUN ONCE BEFORE EVALUATION
-set -e
-
-function setup_environment_and_testbed {
-    local instance_file_name=$1
-
-    # throw error if user name is not opendevin
-    if [ "$USER" != "opendevin" ]; then
-        echo "Error: This script is intended to be run by the 'opendevin' user only." >&2
-        exit 1
-    fi
-
-    # =======================================================
-    # Install & Setup Conda
-
-    # assume /swe_util/miniforge3 already exists
-    # install if swe-util does NOT have conda
-    if [ ! -d /swe_util/miniforge3 ]; then
-        pushd /swe_util
-        echo "Downloading and installing Miniforge3"
-        wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
-        bash Miniforge3-$(uname)-$(uname -m).sh -b -p /swe_util/miniforge3
-    fi
-
-    echo 'export PATH=/swe_util/miniforge3/bin:$PATH' >> ~/.bashrc
-    eval "$(/swe_util/miniforge3/bin/conda shell.bash hook)"
-    conda init bash
-    source ~/.bashrc
-    conda config --set changeps1 False
-    conda config --append channels conda-forge
-
-    # =======================================================
-    # Install swe-bench-eval environment if it does not exist
-    ENV_EXISTS=$(conda info --envs | awk '/swe-bench-eval/ {print $1}')
-    echo "ENV_EXISTS: $ENV_EXISTS"
-    if [ -z "$ENV_EXISTS" ]; then
-        echo "Environment swe-bench-eval does not exist. Creating the environment."
-        conda create -n swe-bench-eval python==3.11.5 -y
-        conda activate swe-bench-eval
-        pip install requests python-dotenv GitPython datasets pandas beautifulsoup4 ghapi
-    fi
-    conda activate swe-bench-eval
-    echo 'swe-bench-eval environment is ready.'
-
-    # =======================================================
-    # Read the swe-bench-test-lite.json / swe-bench-test.json file and extract the required item based on instance_id
-    INSTANCE_DATA_FILE=/swe_util/eval_data/instances/$instance_file_name
-    echo "Instance data file loaded: $INSTANCE_DATA_FILE"
-
-    # =======================================================
-    # generate testbed & conda environment for ALL instances in the test file
-    echo "Generating testbed & conda environment for all instances in the test file"
-    export PYTHONPATH=/swe_util/OD-SWE-bench:$PYTHONPATH
-    python3 /swe_util/OD-SWE-bench/swebench/harness/engine_testbed.py \
-        --instances_path $INSTANCE_DATA_FILE \
-        --log_dir /swe_util/eval_data/testbed_logs \
-        --conda_path /swe_util/miniforge3 \
-        --testbed /swe_util/eval_data/testbeds \
-        --timeout 1000
-
-    # Check every log in /swe_util/eval_data/testbed_logs to see if they contains "Init Succeeded"
-    # If not, print the log file name and exit
-    for log_file in /swe_util/eval_data/testbed_logs/*; do
-        if ! grep -q "Init Succeeded" $log_file; then
-            echo "Error: $log_file does not contain 'Init Succeeded'"
-            exit 1
-        fi
-    done
-    echo "All logs contain 'Init Succeeded'. Testbed & conda environment setup is successful."
-}
-
-# check if $1 is either swe-bench-test-lite.json or swe-bench-test.json
-if [ "$1" != "swe-bench-test-lite.json" ] && [ "$1" != "swe-bench-test.json" ]; then
-    echo "Error: Invalid input file name. Please provide either swe-bench-test-lite.json or swe-bench-test.json"
-    exit 1
-fi
-
-# call the function
-echo "Calling setup_environment_and_testbed with $1"
-setup_environment_and_testbed $1

+ 0 - 86
evaluation/swe_bench/scripts/setup/get_agent_report.sh

@@ -1,86 +0,0 @@
-#!/bin/bash
-
-# Initialize variables
-output_file=""
-agent_name=""
-dataset=""
-num_processes=15
-experiment_name=""
-merge_report=false
-
-# Parse command-line arguments
-while [[ "$#" -gt 0 ]]; do
-    case $1 in
-        --output-file) output_file="$2"; shift ;;
-        --agent-name) agent_name="$2"; shift ;;
-        --dataset) dataset="$2"; shift ;;
-        --num-processes) num_processes="$2"; shift ;;
-        --experiment-name) experiment_name="$2"; shift ;;
-        --merge-report) merge_report=true ;;
-        *) echo "Unknown parameter passed: $1"; exit 1 ;;
-    esac
-    shift
-done
-
-# Check if arguments are provided
-if [[ -z "$output_file" || -z "$agent_name" || -z "$dataset" ]]; then
-    echo "output-file, agent-name and dataset are required!"
-    exit 1
-fi
-echo "output file: $output_file"
-echo "agent name: $agent_name"
-echo "dataset: $dataset"
-echo "num processes: $num_processes"
-if [ ! -z "$experiment_name" ]
-then
-    echo "use provided experiment name: $experiment_name"
-else
-    current_folder=$(basename $(dirname $output_file))
-    parent_foler=$(basename $(dirname $(dirname $output_file)))
-    experiment_name="${parent_foler}_${current_folder}"
-    echo "use generated experiment name: $experiment_name"
-fi
-
-# Convert the agent output to the SWE-Bench format
-if [ -z "$EVAL_DATA_DIR" ]; then
-    echo "EVAL_DATA_DIR is not set."
-    exit 1
-fi
-target_file="${EVAL_DATA_DIR}/outputs/${experiment_name}_${dataset}.json"
-python process_output_json_file.py $output_file $agent_name $target_file
-
-# Run the evaluation script
-if [ -z "$OD_SWE_BENCH" ]; then
-    echo "OD_SWE_BENCH is not set."
-    exit 1
-fi
-if [ -z "$MINICONDA3" ]; then
-    echo "MINICONDA3 is not set."
-    exit 1
-fi
-mkdir -p $EVAL_DATA_DIR/eval_logs/$experiment_name
-export PYTHONPATH=$OD_SWE_BENCH && cd $OD_SWE_BENCH && . $MINICONDA3/etc/profile.d/conda.sh && conda activate $MINICONDA3/envs/swe-bench-eval && python swebench/harness/run_evaluation.py \
-    --swe_bench_tasks $EVAL_DATA_DIR/instances/$dataset.json \
-    --temp_dir $EVAL_DATA_DIR/eval_temp \
-    --testbed $EVAL_DATA_DIR/testbeds \
-    --conda_path $MINICONDA3 \
-    --predictions_path $target_file \
-    --log_dir $EVAL_DATA_DIR/eval_logs/$experiment_name \
-    --num_processes 15 \
-    --skip_existing \
-    --timeout 1600 \
-    --verbose
-
-# Get the report
-cp $target_file $EVAL_DATA_DIR/eval_logs/$experiment_name
-export PYTHONPATH=$OD_SWE_BENCH && cd $OD_SWE_BENCH && . $MINICONDA3/etc/profile.d/conda.sh && conda activate $MINICONDA3/envs/swe-bench-eval && python swebench/metrics/get_model_report.py \
-	--model $agent_name \
-    --swe_bench_tasks $EVAL_DATA_DIR/instances/$dataset.json \
-    --predictions_path $EVAL_DATA_DIR/eval_logs/$experiment_name/${experiment_name}_${dataset}.json \
-    --log_dir $EVAL_DATA_DIR/eval_logs/$experiment_name/$agent_name
-
-# Merge report to the agent output
-if [ "$merge_report" = true ]; then
-    cd /swe_util && python merge_fine_grained_report.py --od_output_file $output_file \
-    --fine_grained_report_file $EVAL_DATA_DIR/eval_logs/$experiment_name/${experiment_name}_${dataset}.report.json
-fi

+ 0 - 61
evaluation/swe_bench/scripts/setup/get_model_report.sh

@@ -1,61 +0,0 @@
-#!/bin/bash
-
-# Input arguments
-output_file=""
-model_name=""
-dataset=""
-num_processes=15
-experiment_name=""
-
-# Parse command-line arguments
-while [[ "$#" -gt 0 ]]; do
-    case $1 in
-        --output-file) output_file="$2"; shift ;;
-        --model-name) model_name="$2"; shift ;;
-        --dataset) dataset="$2"; shift ;;
-        --num-processes) num_processes="$2"; shift ;;
-        --experiment-name) experiment_name="$2"; shift ;;
-        *) echo "Unknown parameter passed: $1"; exit 1 ;;
-    esac
-    shift
-done
-
-# Check if arguments are provided
-if [[ -z "$output_file" || -z "$model_name" || -z "$dataset" ]]; then
-    echo "output-file, model-name and dataset are required!"
-    exit 1
-fi
-echo "output file: $output_file"
-echo "model name: $model_name"
-echo "dataset: $dataset"
-echo "num processes: $num_processes"
-if [ ! -z "$experiment_name" ]
-then
-    echo "use provided experiment name: $experiment_name"
-else
-    experiment_name=${model_name}__${dataset}
-    echo "use generated experiment name: $experiment_name"
-fi
-
-# Run the evaluation script
-mkdir -p $EVAL_DATA_DIR/eval_logs/$experiment_name
-export PYTHONPATH=$OD_SWE_BENCH && cd $OD_SWE_BENCH && . $MINICONDA3/etc/profile.d/conda.sh && conda activate $MINICONDA3/envs/swe-bench-eval && python swebench/harness/run_evaluation.py \
-    --swe_bench_tasks $EVAL_DATA_DIR/instances/$dataset.json \
-    --temp_dir $EVAL_DATA_DIR/eval_temp \
-    --testbed $EVAL_DATA_DIR/testbeds \
-    --conda_path $MINICONDA3 \
-    --predictions_path $output_file \
-    --log_dir $EVAL_DATA_DIR/eval_logs/$experiment_name \
-    --num_processes $num_processes \
-    --skip_existing \
-    --timeout 1600 \
-    --verbose
-
-# Get the report
-predictions_fname=$(basename $output_file)
-cp $output_file $EVAL_DATA_DIR/eval_logs/$experiment_name
-export PYTHONPATH=$OD_SWE_BENCH && cd $OD_SWE_BENCH && . $MINICONDA3/etc/profile.d/conda.sh && conda activate $MINICONDA3/envs/swe-bench-eval && python swebench/metrics/get_model_report.py \
-	--model $model_name \
-    --swe_bench_tasks $EVAL_DATA_DIR/instances/$dataset.json \
-    --predictions_path $EVAL_DATA_DIR/eval_logs/$experiment_name/$predictions_fname \
-    --log_dir $EVAL_DATA_DIR/eval_logs/$experiment_name/$model_name

+ 0 - 29
evaluation/swe_bench/scripts/setup/merge_fine_grained_report.py

@@ -1,29 +0,0 @@
-import argparse
-import json
-
-
-def merge_fine_grained_report(od_output_file, fine_grained_report_file):
-    merged_od_output_file = od_output_file.replace('.jsonl', '.merged.jsonl')
-    merged_report = []
-    fine_grained_report = json.load(open(fine_grained_report_file))
-    for line in open(od_output_file):
-        line = json.loads(line)
-        instance_id = line['instance_id']
-        line['fine_grained_report'] = fine_grained_report[instance_id]
-        merged_report.append(line)
-    # dump the merged report as a jsonl file
-    with open(merged_od_output_file, 'w') as f:
-        for line in merged_report:
-            f.write(json.dumps(line) + '\n')
-    print(f'Agent output with report merged created at {merged_od_output_file}')
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--od_output_file', help='Path to the OD output file')
-    parser.add_argument(
-        '--fine_grained_report_file', help='Path to the fine grained report file'
-    )
-    args = parser.parse_args()
-
-    merge_fine_grained_report(args.od_output_file, args.fine_grained_report_file)

+ 0 - 35
evaluation/swe_bench/scripts/setup/process_output_json_file.py

@@ -1,35 +0,0 @@
-import json
-import sys
-
-
-def process_jsonl(input_file, model_name, output_file):
-    try:
-        with open(input_file, 'r') as infile, open(output_file, 'w') as outfile:
-            data = []
-            for line in infile:
-                if line.strip():  # Ensure the line is not empty
-                    json_obj = json.loads(line)
-                    # Create new object with required fields and new model_name
-                    new_obj = {
-                        'instance_id': json_obj['instance_id'],
-                        'model_patch': json_obj['git_patch'],
-                        'model_name_or_path': model_name,
-                    }
-                    data.append(new_obj)
-            json.dump(
-                data, outfile, indent=2
-            )  # Write the list of JSON objects to a file
-        print(f'Output JSON list created at {output_file}')
-    except Exception as e:
-        print(f'Error: {e}')
-
-
-# Usage: python script.py input.jsonl model_name output.json
-if __name__ == '__main__':
-    if len(sys.argv) != 4:
-        print('Usage: python script.py <input_file> <model_name> <output_file>')
-    else:
-        input_file = sys.argv[1]
-        model_name = sys.argv[2]
-        output_file = sys.argv[3]
-        process_jsonl(input_file, model_name, output_file)

+ 0 - 31
evaluation/swe_bench/scripts/setup/swe_env_setup.sh

@@ -1,31 +0,0 @@
-#!/bin/bash
-# THIS SCRIPT ONLY NEED TO BE RUN ONCE BEFORE EVALUATION
-
-EVAL_DOCKER_IMAGE=ghcr.io/opendevin/eval-swe-bench:builder
-EVAL_WORKSPACE="evaluation/swe_bench/eval_workspace"
-EVAL_WORKSPACE=$(realpath $EVAL_WORKSPACE)
-
-SETUP_INSTANCE_FILENAME=swe-bench-test.json # OR swe-bench-test-lite.json
-
-if [ ! -d $EVAL_WORKSPACE ]; then
-    mkdir -p $EVAL_WORKSPACE
-fi
-
-if [ -f $EVAL_WORKSPACE/swe_env_setup.sh ]; then
-    rm $EVAL_WORKSPACE/swe_env_setup.sh
-fi
-SCRIPT_DIR=evaluation/swe_bench/scripts/setup
-
-cp $SCRIPT_DIR/_swe_env_setup.sh $EVAL_WORKSPACE/swe_env_setup.sh
-cp $SCRIPT_DIR/swe_entry.sh $EVAL_WORKSPACE/swe_entry.sh
-cp $SCRIPT_DIR/get_model_report.sh $EVAL_WORKSPACE/get_model_report.sh
-cp $SCRIPT_DIR/get_agent_report.sh $EVAL_WORKSPACE/get_agent_report.sh
-cp $SCRIPT_DIR/process_output_json_file.py $EVAL_WORKSPACE/process_output_json_file.py
-cp $SCRIPT_DIR/merge_fine_grained_report.py $EVAL_WORKSPACE/merge_fine_grained_report.py
-
-docker run \
-    -v $EVAL_WORKSPACE:/swe_util \
-    -e UID=$(id -u) \
-    --rm -it $EVAL_DOCKER_IMAGE \
-    bash -c "useradd -rm -d /home/opendevin -s /bin/bash -u $(id -u) opendevin && su opendevin -c 'bash /swe_util/swe_env_setup.sh $SETUP_INSTANCE_FILENAME'"
-#