|
| 1 | +import contextlib |
| 2 | +import json |
| 3 | +import os |
| 4 | +import subprocess |
| 5 | +from pathlib import Path |
| 6 | +from subprocess import CompletedProcess, TimeoutExpired |
| 7 | +from tempfile import TemporaryDirectory |
| 8 | + |
| 9 | +import pytest |
| 10 | + |
| 11 | +from app.task import Task, SweTask, PlainTask |
| 12 | +import app.utils as apputils |
| 13 | +import app.config as config |
| 14 | +from app.data_structures import ReproResult |
| 15 | +from test.pytest_utils import * |
| 16 | + |
| 17 | +from test.pytest_utils import DummyTask as BaseDummyTask |
| 18 | + |
| 19 | +# Extend the existing DummyTask to accept extra arguments without breaking behavior. |
| 20 | +# This new class is “open for extension” per the Open/Closed Principle. |
| 21 | +class DummyTask(BaseDummyTask): |
| 22 | + def __init__(self, project_path="dummy_project", issue="dummy issue", env_name="dummy_env"): |
| 23 | + super().__init__(project_path, issue) |
| 24 | + self.env_name = env_name |
| 25 | + |
| 26 | +# ----------------------------------------------------------------------------- |
| 27 | +# Helper: Fake cd context manager that does nothing |
| 28 | +# ----------------------------------------------------------------------------- |
| 29 | +@contextlib.contextmanager |
| 30 | +def fake_cd(newdir): |
| 31 | + yield |
| 32 | + |
| 33 | +# ----------------------------------------------------------------------------- |
| 34 | +# Tests for PlainTask |
| 35 | +# ----------------------------------------------------------------------------- |
| 36 | +def test_plain_task_getters_and_reset(monkeypatch, tmp_path): |
| 37 | + # Use a temporary directory as the project directory. |
| 38 | + local_dir = tmp_path / "plain_project" |
| 39 | + local_dir.mkdir() |
| 40 | + commit = "dummy_commit" |
| 41 | + problem = "dummy problem" |
| 42 | + task = PlainTask(commit_hash=commit, local_path=str(local_dir), problem_statement=problem) |
| 43 | + |
| 44 | + # Test getters. |
| 45 | + assert task.get_issue_statement() == problem |
| 46 | + assert task.project_path == str(local_dir) |
| 47 | + |
| 48 | + # Patch cd to be a no-op. |
| 49 | + monkeypatch.setattr(apputils, "cd", fake_cd) |
| 50 | + |
| 51 | + # Patch repo_reset_and_clean_checkout to record calls. |
| 52 | + calls = [] |
| 53 | + def fake_reset(commit_arg): |
| 54 | + calls.append(commit_arg) |
| 55 | + monkeypatch.setattr(apputils, "repo_reset_and_clean_checkout", fake_reset) |
| 56 | + |
| 57 | + task.setup_project() |
| 58 | + assert calls == [commit] |
| 59 | + |
| 60 | + calls.clear() |
| 61 | + task.reset_project() |
| 62 | + assert calls == [commit] |
| 63 | + |
| 64 | + # Validate is not implemented. |
| 65 | + with pytest.raises(NotImplementedError): |
| 66 | + task.validate("dummy patch") |
| 67 | + |
| 68 | + |
| 69 | +# ----------------------------------------------------------------------------- |
| 70 | +# Fixture: Create a dummy SweTask instance |
| 71 | +# ----------------------------------------------------------------------------- |
| 72 | +@pytest.fixture |
| 73 | +def dummy_swe_task(monkeypatch, tmp_path): |
| 74 | + # Create a temporary directory to simulate a repository. |
| 75 | + repo_dir = tmp_path / "swe_repo" |
| 76 | + repo_dir.mkdir() |
| 77 | + # Create a dummy .gitignore (required for make_noop_patch) |
| 78 | + (repo_dir / ".gitignore").write_text("ignored.txt") |
| 79 | + |
| 80 | + params = { |
| 81 | + "task_id": "dummy_task", |
| 82 | + "problem_statement": "dummy problem", |
| 83 | + "repo_path": str(repo_dir), |
| 84 | + "commit": "dummy_commit", |
| 85 | + "env_name": "dummy_env", |
| 86 | + "repo_name": "dummy_repo", |
| 87 | + "repo_version": "v1.0", |
| 88 | + "pre_install_cmds": [], |
| 89 | + "install_cmd": "", |
| 90 | + "test_cmd": "echo test", |
| 91 | + "test_patch": "dummy_patch", |
| 92 | + "testcases_passing": [], |
| 93 | + "testcases_failing": [], |
| 94 | + } |
| 95 | + # Set a config flag so that _do_install is executed. |
| 96 | + config.enable_sbfl = True |
| 97 | + |
| 98 | + # Patch cd and other git functions to avoid real commands. |
| 99 | + monkeypatch.setattr(apputils, "cd", fake_cd) |
| 100 | + monkeypatch.setattr(apputils, "repo_reset_and_clean_checkout", lambda commit: None) |
| 101 | + monkeypatch.setattr(apputils, "repo_commit_current_changes", lambda: None) |
| 102 | + # Patch run_string_cmd_in_conda to simulate successful install commands. |
| 103 | + def fake_run_string_cmd_in_conda(cmd, env, **kwargs): |
| 104 | + return CompletedProcess(cmd, 0, stdout="ok", stderr="") |
| 105 | + monkeypatch.setattr(apputils, "run_string_cmd_in_conda", fake_run_string_cmd_in_conda) |
| 106 | + |
| 107 | + return SweTask(**params) |
| 108 | + |
| 109 | + |
| 110 | +def test_swe_task_getters_and_setup(dummy_swe_task, monkeypatch): |
| 111 | + # Test getters. |
| 112 | + assert dummy_swe_task.get_issue_statement() == "dummy problem" |
| 113 | + assert dummy_swe_task.project_path == dummy_swe_task.repo_path |
| 114 | + |
| 115 | + # Patch _do_install to record its call. |
| 116 | + install_called = False |
| 117 | + def fake_install(): |
| 118 | + nonlocal install_called |
| 119 | + install_called = True |
| 120 | + monkeypatch.setattr(dummy_swe_task, "_do_install", fake_install) |
| 121 | + |
| 122 | + dummy_swe_task.setup_project() |
| 123 | + assert install_called is True |
| 124 | + |
| 125 | + # Test reset_project by recording calls. |
| 126 | + calls = [] |
| 127 | + monkeypatch.setattr(apputils, "repo_reset_and_clean_checkout", lambda commit: calls.append(commit)) |
| 128 | + dummy_swe_task.reset_project() |
| 129 | + assert calls == [dummy_swe_task.commit] |
| 130 | + |
| 131 | + |
| 132 | +def test_swe_task_validate(monkeypatch, dummy_swe_task, tmp_path): |
| 133 | + # Test validate method by patching inner functions. |
| 134 | + # We'll have our fake _run_test_suite_for_regression_docker accept both patch_content and log_file. |
| 135 | + def fake_run_test_suite_docker(patch_content, log_file): |
| 136 | + # Return a tuple with (tests_passed, message, orig_log_file) |
| 137 | + return (True, "dummy message", "dummy_orig_log_file") |
| 138 | + monkeypatch.setattr(dummy_swe_task, "_run_test_suite_for_regression_docker", fake_run_test_suite_docker) |
| 139 | + |
| 140 | + # Patch apply_patch to be a no-op by returning a nullcontext. |
| 141 | + from contextlib import nullcontext |
| 142 | + monkeypatch.setattr(dummy_swe_task, "apply_patch", lambda patch: nullcontext()) |
| 143 | + |
| 144 | + # Call validate; note that validate internally calls mkstemp to create a log file. |
| 145 | + tests_passed, msg, log_file, orig_log_file = dummy_swe_task.validate("patch") |
| 146 | + assert tests_passed is True |
| 147 | + assert msg == "dummy message" |
| 148 | + # We expect orig_log_file to be our dummy value. |
| 149 | + assert orig_log_file == "dummy_orig_log_file" |
| 150 | + # log_file should be a string (its value comes from mkstemp, so we can't predict it exactly). |
| 151 | + assert isinstance(log_file, str) |
| 152 | + |
| 153 | + |
| 154 | +def test_swe_task_make_noop_patch(monkeypatch, tmp_path): |
| 155 | + # Test the make_noop_patch class method. |
| 156 | + d = tmp_path / "dummy_repo" |
| 157 | + d.mkdir() |
| 158 | + # Create a dummy .gitignore file. |
| 159 | + gitignore = d / ".gitignore" |
| 160 | + gitignore.write_text("ignored_file.txt") |
| 161 | + |
| 162 | + # Patch subprocess.run to simulate git commands. |
| 163 | + def fake_run(cmd, cwd, **kwargs): |
| 164 | + if cmd[:3] == ["git", "diff", "HEAD~"]: |
| 165 | + return CompletedProcess(cmd, 0, stdout="noop diff", stderr="") |
| 166 | + return CompletedProcess(cmd, 0, stdout="", stderr="") |
| 167 | + monkeypatch.setattr(subprocess, "run", fake_run) |
| 168 | + noop_patch = SweTask.make_noop_patch(str(d)) |
| 169 | + assert "noop diff" in noop_patch |
| 170 | + |
| 171 | + |
| 172 | +# ----------------------------------------------------------------------------- |
| 173 | +# Test for Task.apply_patch context manager. |
| 174 | +# ----------------------------------------------------------------------------- |
| 175 | +def test_apply_patch(monkeypatch, tmp_path): |
| 176 | + # Create a dummy subclass of Task to test apply_patch. |
| 177 | + class DummyApplyTask(Task): |
| 178 | + @property |
| 179 | + def project_path(self) -> str: |
| 180 | + return str(tmp_path / "dummy_project") |
| 181 | + def get_issue_statement(self) -> str: |
| 182 | + return "dummy issue" |
| 183 | + def setup_project(self) -> None: |
| 184 | + pass |
| 185 | + def reset_project(self) -> None: |
| 186 | + pass |
| 187 | + def validate(self, patch_content: str): |
| 188 | + return True, "", "", "" |
| 189 | + |
| 190 | + # Create a dummy project directory. |
| 191 | + proj_dir = Path(tmp_path / "dummy_project") |
| 192 | + proj_dir.mkdir() |
| 193 | + # Create a dummy file inside the project. |
| 194 | + dummy_file = proj_dir / "dummy.txt" |
| 195 | + dummy_file.write_text("original content") |
| 196 | + |
| 197 | + # Patch cd to be a no-op. |
| 198 | + monkeypatch.setattr(apputils, "cd", fake_cd) |
| 199 | + # Patch subprocess.run used in apply_patch to simulate a successful git apply. |
| 200 | + def fake_run(cmd, **kwargs): |
| 201 | + return CompletedProcess(cmd, 0, stdout="applied", stderr="") |
| 202 | + monkeypatch.setattr(subprocess, "run", fake_run) |
| 203 | + # Patch repo_clean_changes to record its call. |
| 204 | + clean_called = False |
| 205 | + def fake_clean(): |
| 206 | + nonlocal clean_called |
| 207 | + clean_called = True |
| 208 | + monkeypatch.setattr(apputils, "repo_clean_changes", fake_clean) |
| 209 | + |
| 210 | + dummy = DummyApplyTask() |
| 211 | + with dummy.apply_patch("patch content"): |
| 212 | + # Nothing to do inside the context. |
| 213 | + pass |
| 214 | + assert clean_called, "Expected repo_clean_changes to be called after apply_patch" |
| 215 | + |
| 216 | +# Dummy values for required fields in SweTask. |
| 217 | +DUMMY_TASK_PARAMS = { |
| 218 | + "task_id": "dummy_task", |
| 219 | + "problem_statement": "dummy problem", |
| 220 | + # We'll use a temporary directory as repo_path in each test. |
| 221 | + "repo_path": None, |
| 222 | + "commit": "dummy_commit", |
| 223 | + "env_name": "dummy_env", |
| 224 | + "repo_name": "dummy_repo", |
| 225 | + "repo_version": "v1.0", |
| 226 | + "pre_install_cmds": [], |
| 227 | + "install_cmd": "", |
| 228 | + "test_cmd": "echo test", # Not used in execute_reproducer |
| 229 | + "test_patch": "dummy_patch", |
| 230 | + "testcases_passing": [], |
| 231 | + "testcases_failing": [], |
| 232 | +} |
| 233 | + |
| 234 | +# ----------------------------------------------------------------------------- |
| 235 | +# Test execute_reproducer - normal execution case. |
| 236 | +# ----------------------------------------------------------------------------- |
| 237 | +def test_execute_reproducer_normal(monkeypatch, tmp_path): |
| 238 | + # Create a temporary directory to simulate a repository. |
| 239 | + repo_dir = tmp_path / "dummy_repo" |
| 240 | + repo_dir.mkdir() |
| 241 | + # Write a dummy file to ensure the directory is not empty. |
| 242 | + (repo_dir / "dummy.txt").write_text("original content") |
| 243 | + |
| 244 | + # Update the dummy parameters with the temporary repo path. |
| 245 | + params = DUMMY_TASK_PARAMS.copy() |
| 246 | + params["repo_path"] = str(repo_dir) |
| 247 | + |
| 248 | + # Instantiate a SweTask. |
| 249 | + task = SweTask(**params) |
| 250 | + |
| 251 | + # Patch run_script_in_conda to simulate a successful process execution. |
| 252 | + def fake_run_script_in_conda(args, env_name, cwd, **kwargs): |
| 253 | + # args is expected to be a list with a temporary filename. |
| 254 | + return CompletedProcess(args, 0, stdout="dummy stdout", stderr="dummy stderr") |
| 255 | + # Patch in the module where execute_reproducer looks it up. |
| 256 | + monkeypatch.setattr("app.task.run_script_in_conda", fake_run_script_in_conda) |
| 257 | + |
| 258 | + # For this test, we do not provide any patch content, so apply_patch is not used. |
| 259 | + test_content = "print('hello world')" |
| 260 | + result = task.execute_reproducer(test_content, patch_content=None) |
| 261 | + |
| 262 | + # Assert that the returned ReproResult matches our fake CompletedProcess. |
| 263 | + assert result.stdout == "dummy stdout" |
| 264 | + assert result.stderr == "dummy stderr" |
| 265 | + assert result.returncode == 0 |
| 266 | + |
| 267 | +# ----------------------------------------------------------------------------- |
| 268 | +# Test execute_reproducer - timeout case. |
| 269 | +# ----------------------------------------------------------------------------- |
| 270 | +def test_execute_reproducer_timeout(monkeypatch, tmp_path): |
| 271 | + # Create a temporary dummy repo directory. |
| 272 | + repo_dir = tmp_path / "dummy_repo" |
| 273 | + repo_dir.mkdir() |
| 274 | + |
| 275 | + params = DUMMY_TASK_PARAMS.copy() |
| 276 | + params["repo_path"] = str(repo_dir) |
| 277 | + |
| 278 | + task = SweTask(**params) |
| 279 | + |
| 280 | + # Patch run_script_in_conda to simulate a timeout. |
| 281 | + def fake_run_script_in_conda(args, env_name, cwd, **kwargs): |
| 282 | + raise TimeoutExpired(cmd=args, timeout=kwargs.get("timeout", 120)) |
| 283 | + monkeypatch.setattr("app.task.run_script_in_conda", fake_run_script_in_conda) |
| 284 | + |
| 285 | + test_content = "print('hello world')" |
| 286 | + result = task.execute_reproducer(test_content, patch_content=None) |
| 287 | + |
| 288 | + # In the timeout case, we expect: |
| 289 | + # - stdout to be empty, |
| 290 | + # - stderr to equal "Test execution timeout.", |
| 291 | + # - returncode to be -1. |
| 292 | + assert result.stdout == "" |
| 293 | + assert result.stderr == "Test execution timeout." |
| 294 | + assert result.returncode == -1 |
0 commit comments