|
|
@@ -11,6 +11,17 @@ unset SANDBOX_ENV_OPENAI_API_KEY
|
|
|
unset OPENAI_BASE_URL
|
|
|
unset OPENAI_MODEL
|
|
|
|
|
|
+# Get the absolute path of the script directory
|
|
|
+get_script_dir() {
|
|
|
+ local source="${BASH_SOURCE[0]}"
|
|
|
+ while [ -h "$source" ]; do
|
|
|
+ local dir="$( cd -P "$( dirname "$source" )" && pwd )"
|
|
|
+ source="$(readlink "$source")"
|
|
|
+ [[ $source != /* ]] && source="$dir/$source"
|
|
|
+ done
|
|
|
+ echo "$( cd -P "$( dirname "$source" )" && pwd )"
|
|
|
+}
|
|
|
+
|
|
|
TMP_FILE="${TMP_FILE:-tmp.log}"
|
|
|
|
|
|
if [ -z $WORKSPACE_MOUNT_PATH ]; then
|
|
|
@@ -20,14 +31,23 @@ if [ -z $WORKSPACE_BASE ]; then
|
|
|
WORKSPACE_BASE=$(pwd)
|
|
|
fi
|
|
|
|
|
|
-WORKSPACE_MOUNT_PATH+="/_test_workspace"
|
|
|
-WORKSPACE_BASE+="/_test_workspace"
|
|
|
+export SCRIPT_DIR=$(get_script_dir)
|
|
|
+export PROJECT_ROOT=$(realpath "$SCRIPT_DIR/../..")
|
|
|
+
|
|
|
+WORKSPACE_MOUNT_PATH=$(realpath "${WORKSPACE_MOUNT_PATH}/_test_workspace")
|
|
|
+WORKSPACE_BASE=$(realpath "${WORKSPACE_BASE}/_test_workspace")
|
|
|
WORKSPACE_MOUNT_PATH_IN_SANDBOX="/workspace"
|
|
|
|
|
|
+echo "Current working directory: $(pwd)"
|
|
|
+echo "SCRIPT_DIR: $SCRIPT_DIR"
|
|
|
+echo "PROJECT_ROOT: $PROJECT_ROOT"
|
|
|
echo "WORKSPACE_BASE: $WORKSPACE_BASE"
|
|
|
echo "WORKSPACE_MOUNT_PATH: $WORKSPACE_MOUNT_PATH"
|
|
|
echo "WORKSPACE_MOUNT_PATH_IN_SANDBOX: $WORKSPACE_MOUNT_PATH_IN_SANDBOX"
|
|
|
|
|
|
+# Ensure we're in the correct directory
|
|
|
+cd "$PROJECT_ROOT" || exit 1
|
|
|
+
|
|
|
mkdir -p $WORKSPACE_BASE
|
|
|
|
|
|
# use environmental variable if exists, otherwise use "ssh"
|
|
|
@@ -71,14 +91,18 @@ num_of_agents=${#agents[@]}
|
|
|
|
|
|
# run integration test against a specific agent & test
|
|
|
run_test() {
|
|
|
- local pytest_cmd="poetry run pytest -s ./tests/integration/test_agent.py::$test_name"
|
|
|
+ # Ensure we're in the correct directory
|
|
|
+ cd "$PROJECT_ROOT" || exit 1
|
|
|
|
|
|
+ local pytest_cmd="poetry run pytest --cache-clear -s $SCRIPT_DIR/test_agent.py::$test_name"
|
|
|
# Check if TEST_IN_CI is defined
|
|
|
if [ -n "$TEST_IN_CI" ]; then
|
|
|
pytest_cmd+=" --cov=agenthub --cov=opendevin --cov-report=xml --cov-append"
|
|
|
fi
|
|
|
|
|
|
- SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
|
|
|
+ env SCRIPT_DIR="$SCRIPT_DIR" \
|
|
|
+ PROJECT_ROOT="$PROJECT_ROOT" \
|
|
|
+ SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
|
|
|
PERSIST_SANDBOX=$PERSIST_SANDBOX \
|
|
|
WORKSPACE_BASE=$WORKSPACE_BASE \
|
|
|
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
|
|
|
@@ -120,7 +144,7 @@ run_test() {
|
|
|
|
|
|
# browsing capability needs a local http server
|
|
|
launch_http_server() {
|
|
|
- poetry run python tests/integration/start_http_server.py &
|
|
|
+ poetry run python $SCRIPT_DIR/start_http_server.py &
|
|
|
HTTP_SERVER_PID=$!
|
|
|
echo "Test http server launched, PID = $HTTP_SERVER_PID"
|
|
|
sleep 10
|
|
|
@@ -147,15 +171,17 @@ trap cleanup EXIT
|
|
|
regenerate_without_llm() {
|
|
|
# set -x to print the command being executed
|
|
|
set -x
|
|
|
- SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
|
|
|
- PERSIST_SANDBOX=$PERSIST_SANDBOX \
|
|
|
- WORKSPACE_BASE=$WORKSPACE_BASE \
|
|
|
- WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
|
|
|
- WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
|
|
|
- MAX_ITERATIONS=$MAX_ITERATIONS \
|
|
|
- FORCE_APPLY_PROMPTS=true \
|
|
|
- DEFAULT_AGENT=$agent \
|
|
|
- poetry run pytest -s ./tests/integration/test_agent.py::$test_name
|
|
|
+ env SCRIPT_DIR="$SCRIPT_DIR" \
|
|
|
+ PROJECT_ROOT="$PROJECT_ROOT" \
|
|
|
+ SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
|
|
|
+ PERSIST_SANDBOX=$PERSIST_SANDBOX \
|
|
|
+ WORKSPACE_BASE=$WORKSPACE_BASE \
|
|
|
+ WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
|
|
|
+ WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
|
|
|
+ MAX_ITERATIONS=$MAX_ITERATIONS \
|
|
|
+ FORCE_APPLY_PROMPTS=true \
|
|
|
+ DEFAULT_AGENT=$agent \
|
|
|
+ poetry run pytest -s $SCRIPT_DIR/test_agent.py::$test_name
|
|
|
set +x
|
|
|
}
|
|
|
|
|
|
@@ -165,29 +191,32 @@ regenerate_with_llm() {
|
|
|
fi
|
|
|
|
|
|
rm -rf $WORKSPACE_BASE/*
|
|
|
- if [ -d "tests/integration/workspace/$test_name" ]; then
|
|
|
- cp -r tests/integration/workspace/$test_name/* $WORKSPACE_BASE
|
|
|
+ if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then
|
|
|
+ cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE
|
|
|
fi
|
|
|
|
|
|
rm -rf logs
|
|
|
- rm -rf tests/integration/mock/$agent/$test_name/*
|
|
|
+ rm -rf "$SCRIPT_DIR/mock/$agent/$test_name/*"
|
|
|
# set -x to print the command being executed
|
|
|
set -x
|
|
|
echo -e "/exit\n" | \
|
|
|
- DEBUG=true \
|
|
|
- SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
|
|
|
- PERSIST_SANDBOX=$PERSIST_SANDBOX \
|
|
|
- WORKSPACE_BASE=$WORKSPACE_BASE \
|
|
|
- WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH AGENT=$agent \
|
|
|
- WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
|
|
|
- poetry run python ./opendevin/core/main.py \
|
|
|
- -i $MAX_ITERATIONS \
|
|
|
- -t "$task Do not ask me for confirmation at any point." \
|
|
|
- -c $agent
|
|
|
+ env SCRIPT_DIR="$SCRIPT_DIR" \
|
|
|
+ PROJECT_ROOT="$PROJECT_ROOT" \
|
|
|
+ DEBUG=true \
|
|
|
+ SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
|
|
|
+ PERSIST_SANDBOX=$PERSIST_SANDBOX \
|
|
|
+ WORKSPACE_BASE=$WORKSPACE_BASE \
|
|
|
+ WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
|
|
|
+ AGENT=$agent \
|
|
|
+ WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
|
|
|
+ poetry run python "$PROJECT_ROOT/opendevin/core/main.py" \
|
|
|
+ -i $MAX_ITERATIONS \
|
|
|
+ -t "$task Do not ask me for confirmation at any point." \
|
|
|
+ -c $agent
|
|
|
set +x
|
|
|
|
|
|
- mkdir -p tests/integration/mock/$agent/$test_name/
|
|
|
- mv logs/llm/**/* tests/integration/mock/$agent/$test_name/
|
|
|
+ mkdir -p "$SCRIPT_DIR/mock/$agent/$test_name/"
|
|
|
+ mv logs/llm/**/* "$SCRIPT_DIR/mock/$agent/$test_name/"
|
|
|
|
|
|
}
|
|
|
|
|
|
@@ -195,7 +224,6 @@ regenerate_with_llm() {
|
|
|
## MAIN PROGRAM ##
|
|
|
##############################################################
|
|
|
|
|
|
-
|
|
|
if [ "$num_of_tests" -ne "${#test_names[@]}" ]; then
|
|
|
echo "Every task must correspond to one test case"
|
|
|
exit 1
|
|
|
@@ -222,8 +250,8 @@ for ((i = 0; i < num_of_tests; i++)); do
|
|
|
|
|
|
echo -e "\n\n\n\n========STEP 1: Running $test_name for $agent========\n\n\n\n"
|
|
|
rm -rf $WORKSPACE_BASE/*
|
|
|
- if [ -d "tests/integration/workspace/$test_name" ]; then
|
|
|
- cp -r "tests/integration/workspace/$test_name"/* $WORKSPACE_BASE
|
|
|
+ if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then
|
|
|
+ cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE
|
|
|
fi
|
|
|
|
|
|
if [ "$TEST_ONLY" = true ]; then
|
|
|
@@ -245,7 +273,7 @@ for ((i = 0; i < num_of_tests; i++)); do
|
|
|
|
|
|
if [ "$FORCE_USE_LLM" = true ]; then
|
|
|
echo -e "\n\n\n\n========FORCE_USE_LLM, skipping step 2 & 3========\n\n\n\n"
|
|
|
- elif [ ! -d "tests/integration/mock/$agent/$test_name" ]; then
|
|
|
+ elif [ ! -d "$SCRIPT_DIR/mock/$agent/$test_name" ]; then
|
|
|
echo -e "\n\n\n\n========No existing mock responses for $agent/$test_name, skipping step 2 & 3========\n\n\n\n"
|
|
|
else
|
|
|
echo -e "\n\n\n\n========STEP 2: $test_name failed, regenerating prompts for $agent WITHOUT money cost========\n\n\n\n"
|