diff --git a/benchkit/benchmark.py b/benchkit/benchmark.py index a34e1e70..372cb556 100644 --- a/benchkit/benchmark.py +++ b/benchkit/benchmark.py @@ -30,14 +30,7 @@ ) from benchkit.utils.system import get_boot_args from benchkit.utils.tee import teeprint -from benchkit.utils.types import ( - Command, - Constants, - Environment, - PathType, - Pretty, - SplitCommand, -) +from benchkit.utils.types import Command, Constants, Environment, PathType, Pretty, SplitCommand from benchkit.utils.variables import list_groupby RecordKey = str diff --git a/benchkit/cli/init.py b/benchkit/cli/init.py index 1b3d22d2..9c71cf63 100644 --- a/benchkit/cli/init.py +++ b/benchkit/cli/init.py @@ -7,11 +7,7 @@ import black import isort -from benchkit.cli.generate import ( - generate_benchmark, - generate_campaign, - get_gitignore_content, -) +from benchkit.cli.generate import generate_benchmark, generate_campaign, get_gitignore_content from benchkit.utils.misc import get_benchkit_temp_folder_str _DOTGIT_DIR = Path(".git") diff --git a/benchkit/commandwrappers/javaperf.py b/benchkit/commandwrappers/javaperf.py index 33cd4b40..48905385 100644 --- a/benchkit/commandwrappers/javaperf.py +++ b/benchkit/commandwrappers/javaperf.py @@ -14,11 +14,7 @@ from typing import Dict, List, Optional from benchkit.benchmark import RecordResult, WriteRecordFileFunction -from benchkit.commandwrappers.perf import ( - PerfRecordWrap, - PerfStatWrap, - _perf_command_prefix, -) +from benchkit.commandwrappers.perf import PerfRecordWrap, PerfStatWrap, _perf_command_prefix from benchkit.helpers.linux import ps from benchkit.platforms import Platform from benchkit.shell.shell import shell_interactive, shell_out diff --git a/benchkit/commandwrappers/perf.py b/benchkit/commandwrappers/perf.py index 4d4f979c..0132da66 100644 --- a/benchkit/commandwrappers/perf.py +++ b/benchkit/commandwrappers/perf.py @@ -805,6 +805,31 @@ def post_run_hook_report( if self._report_interactive: shell_interactive(command=command, ignore_ret_codes=(-13,)) # ignore broken pipe error + def post_run_hook_script( + self, + experiment_results_lines: List[RecordResult], + record_data_dir: PathType, + write_record_file_fun: WriteRecordFileFunction, + ) -> Optional[Dict[str, Any]]: + """Post run hook to generate extension of result dict holding the results of perf script. + + Args: + experiment_results_lines (List[RecordResult]): the record results. + record_data_dir (PathType): path to the record data directory. + write_record_file_fun (WriteRecordFileFunction): callback to record a file into data + directory. + """ + assert experiment_results_lines and record_data_dir + + perf_data_pathname = self.latest_perf_path + self._chown(pathname=perf_data_pathname) + + command = self._perf_script_command(perf_data_pathname=perf_data_pathname) + + output = shell_out(command, print_output=False) + + write_record_file_fun(file_content=output.strip(), filename="perf.script") + def fetch_flamegraph( self, flamegraph_commit: str = "41fee1f99f9276008b7cd112fca19dc3ea84ac32", @@ -1024,6 +1049,16 @@ def _perf_report_command(self, perf_data_pathname: PathType) -> SplitCommand: return command + def _perf_script_command(self, perf_data_pathname: PathType) -> SplitCommand: + command = [ + self._perf_bin, + "script", + "--input", + f"{perf_data_pathname}", + ] + self.perf_report_options + + return command + def _fzf( self, search_dir: PathType, diff --git a/benchkit/commandwrappers/speedupstack.py b/benchkit/commandwrappers/speedupstack.py index 72ce6cdd..8b218b82 100644 --- a/benchkit/commandwrappers/speedupstack.py +++ b/benchkit/commandwrappers/speedupstack.py @@ -1,16 +1,28 @@ # Copyright (C) 2025 Vrije Universiteit Brussel. All rights reserved. # SPDX-License-Identifier: MIT +import pathlib +import re +import time +from collections import defaultdict from signal import SIGCONT, SIGSTOP -from typing import List +from threading import Thread +from typing import Any, Dict, List, Optional +from benchkit.benchmark import RecordResult, WriteRecordFileFunction from benchkit.commandattachments.klockstat import Klockstat from benchkit.commandattachments.llcstat import Llcstat from benchkit.commandattachments.offcputime import Offcputime from benchkit.commandattachments.signal import Signal from benchkit.commandwrappers import CommandWrapper +from benchkit.commandwrappers.perf import PerfRecordWrap, _perf_command_prefix from benchkit.commandwrappers.strace import StraceWrap from benchkit.dependencies.packages import PackageDependency +from benchkit.helpers.linux import ps +from benchkit.platforms import get_current_platform +from benchkit.platforms.generic import Platform +from benchkit.shell.shell import shell_out +from benchkit.shell.shellasync import AsyncProcess from benchkit.utils.types import PathType @@ -23,9 +35,18 @@ def __init__(self, libbpf_tools_dir: PathType) -> None: self._llcstat = Llcstat(libbpf_tools_dir) self._strace = StraceWrap(pid=True, summary=False, summary_only=True) + self._perf_record_lock = PerfRecordLockWrap( + perf_record_options=["-e", "syscalls:sys_enter_futex,syscalls:sys_exit_futex"], + perf_report_options=["--ns"], + report_file=True, + report_interactive=False, + ) + self._sigstop = Signal(signal_type=SIGSTOP) self._sigcont = Signal(signal_type=SIGCONT) + self._perf_record_lock_attachment_thread = None + def command_wrappers(self): return [] @@ -36,6 +57,11 @@ def command_attachments(self): self._offcputime.attachment, self._llcstat.attachment, self._strace.attachment, + lambda process, record_data_dir: self._perf_record_lock.attach_every_thread( + platform=get_current_platform(), + process=process, + record_data_dir=record_data_dir, + ), self._sigcont.attachment, ] @@ -45,6 +71,7 @@ def post_run_hooks(self): self._offcputime.post_run_hook, self._llcstat.post_run_hook, self._strace.post_run_hook, + self._perf_record_lock.post_run_hook_script, ] def dependencies(self) -> List[PackageDependency]: @@ -59,3 +86,136 @@ def dependencies(self) -> List[PackageDependency]: deps.extend(self._llcstat.dependencies()) return deps + + +class PerfRecordLockWrap(PerfRecordWrap): + """ + This is a command wrapper for the `perf record` utility on multi-threaded benchmarks. + """ + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.attachment_thread = None + self._data_paths = {} + + def attach_every_thread(self, **kwargs): + self.attachment_thread = Thread(target=self.attach_every_thread_worker, kwargs=kwargs) + self.attachment_thread.start() + + def attach_every_thread_worker( + self, + process: AsyncProcess, + platform: Platform, + record_data_dir: pathlib.Path, + poll_ms: int = 10, + ): + """Command attachment that will attach to every thread of the wrapped process. + + Args: + process (AsyncProcess): the process to attach perf-stat to. + platform (Platform): the platform where the process is running. + record_data_dir (pathlib.Path): the path to the record data directory of the benchmark. + poll_ms (int, optional): the period at which to poll the process to detect newly created + threads. Defaults to 10. + """ + perf_prefix = _perf_command_prefix(perf_bin=self._perf_bin, platform=platform) + prefix = ["sudo"] + perf_prefix + ["record"] + self.perf_record_options + ["-t"] + + tids2perf_cmd = {} + + while not process.is_finished(): + current_tids = ps.get_threads_of_process(pid=process.pid, ignore_any_error_code=True) + for tid in current_tids: + if tid not in tids2perf_cmd: + perf_data_pathname = record_data_dir / f"perf-record-lock-val-tid-{tid}.txt" + self._data_paths[tid] = perf_data_pathname + tids2perf_cmd[tid] = Thread( + target=self.attach_on_thread_worker, + kwargs={ + "prefix": prefix, + "tid": tid, + "perf_data_pathname": perf_data_pathname, + }, + ) + tids2perf_cmd[tid].start() + + time.sleep(poll_ms / 1000) + + for current_process in tids2perf_cmd.values(): + try: + current_process.join() + except AsyncProcess.AsyncProcessError: + pass + + def attach_on_thread_worker( + self, prefix: list[str], tid: int, perf_data_pathname: pathlib.Path + ): + cmd = prefix + [f"{tid}", "--output", f"{perf_data_pathname}"] + self.platform.comm.shell(command=cmd) + + def post_run_hook_script( + self, + experiment_results_lines: List[RecordResult], + record_data_dir: PathType, + write_record_file_fun: WriteRecordFileFunction, + ) -> Optional[Dict[str, Any]]: + """Post run hook to generate extension of result dict holding the results of perf report. + + Args: + experiment_results_lines (List[RecordResult]): the record results. + record_data_dir (PathType): path to the record data directory. + write_record_file_fun (WriteRecordFileFunction): callback to record a file into data + directory. + """ + assert experiment_results_lines and record_data_dir + assert self.attachment_thread is not None + + self.attachment_thread.join() + + row_re = re.compile(r"^\s*(\S+)\s+(\d+)\s+\[(\d+)\]\s+(\S+):\s+(\S+):\s+(.*)\s*$") + + wait_start: Dict[int, int] = {} + total_wait_time: int = 0 + total_wait_time_per_thread: Dict[int, int] = defaultdict(lambda: 0) + + for tid_of_file, data_path in self._data_paths.items(): + self._chown(pathname=data_path) + command = self._perf_script_command(perf_data_pathname=data_path) + script_file = ( + record_data_dir / pathlib.Path(f"perf-record-lock-tid-{tid_of_file}.script") + ).as_posix() + + output = shell_out(command, print_output=False) + + write_record_file_fun(file_content=output.strip(), filename=script_file) + + for line in output.splitlines(): + line = line.rstrip() + m = row_re.search(line) + if m: + # comm = m.group(1) + tid = int(m.group(2)) + # cpu = int(m.group(3)) + timestamp = int(round(float(m.group(4)) * 1e9)) + event = m.group(5) + # data = m.group(6) + + if event == "syscalls:sys_enter_futex": + wait_start[tid] = timestamp + + if event == "syscalls:sys_exit_futex": + if tid in wait_start: + wait_time_ns = timestamp - wait_start[tid] + total_wait_time += wait_time_ns + total_wait_time_per_thread[tid] += wait_time_ns + del wait_start[tid] + + self._data_paths = {} + return { + "perf_record_lock_total_wait_ns": total_wait_time, + "perf_record_lock_thread_avg_wait_ns": ( + (total_wait_time / len(total_wait_time_per_thread)) + if len(total_wait_time_per_thread) != 0 + else 0 + ), + } diff --git a/benchkit/helpers/linux/kernel.py b/benchkit/helpers/linux/kernel.py index 7070bd53..a82d2f34 100644 --- a/benchkit/helpers/linux/kernel.py +++ b/benchkit/helpers/linux/kernel.py @@ -8,12 +8,7 @@ import pathlib from typing import Dict, Iterable, List -from benchkit.helpers.linux.build import ( - KernelEntry, - LinuxBuild, - Option, - configure_standard_kernel, -) +from benchkit.helpers.linux.build import KernelEntry, LinuxBuild, Option, configure_standard_kernel from benchkit.shell.shell import shell_out from benchkit.utils.types import PathType diff --git a/benchkit/helpers/linux/ps.py b/benchkit/helpers/linux/ps.py index 37a20179..f9901b11 100644 --- a/benchkit/helpers/linux/ps.py +++ b/benchkit/helpers/linux/ps.py @@ -10,7 +10,7 @@ from benchkit.shell.shell import shell_out -def get_threads_of_process(pid: int) -> List[int]: +def get_threads_of_process(pid: int, ignore_any_error_code: bool = False) -> List[int]: """Get thread identifiers (TIDs) of the given process identifier (PID). Args: @@ -26,6 +26,7 @@ def get_threads_of_process(pid: int) -> List[int]: f"ps -T -p {pid}", print_input=False, print_output=False, + ignore_any_error_code=ignore_any_error_code, ) tids = [] diff --git a/benchkit/hooks/stressNg.py b/benchkit/hooks/stressNg.py index ac74c662..5780fc6f 100644 --- a/benchkit/hooks/stressNg.py +++ b/benchkit/hooks/stressNg.py @@ -3,12 +3,7 @@ from typing import List, Optional -from benchkit.benchmark import ( - PostRunHook, - PreRunHook, - RecordResult, - WriteRecordFileFunction, -) +from benchkit.benchmark import PostRunHook, PreRunHook, RecordResult, WriteRecordFileFunction from benchkit.platforms import get_current_platform from benchkit.shell.shellasync import shell_async from benchkit.utils.types import PathType diff --git a/benchkit/platforms/generic.py b/benchkit/platforms/generic.py index 2a1d68bb..4c233b00 100644 --- a/benchkit/platforms/generic.py +++ b/benchkit/platforms/generic.py @@ -8,11 +8,7 @@ from benchkit.communication import CommunicationLayer from benchkit.platforms import evenorder -from benchkit.platforms.utils import ( - get_nb_cpus_active, - get_nb_cpus_isolated, - get_nb_cpus_total, -) +from benchkit.platforms.utils import get_nb_cpus_active, get_nb_cpus_isolated, get_nb_cpus_total from benchkit.utils import lscpu diff --git a/benchkit/sharedlibs/assignlib.py b/benchkit/sharedlibs/assignlib.py index de11dbd6..65ff9890 100644 --- a/benchkit/sharedlibs/assignlib.py +++ b/benchkit/sharedlibs/assignlib.py @@ -8,11 +8,7 @@ import os from typing import Iterable, Optional, Tuple -from benchkit.sharedlibs import ( - EnvironmentVariables, - FromSourceSharedLib, - LdPreloadLibraries, -) +from benchkit.sharedlibs import EnvironmentVariables, FromSourceSharedLib, LdPreloadLibraries from benchkit.shell.shell import shell_out from benchkit.utils.dir import ( caller_file_abs_path, diff --git a/benchkit/sharedlibs/tiltlib.py b/benchkit/sharedlibs/tiltlib.py index 6b23ce63..944d522e 100644 --- a/benchkit/sharedlibs/tiltlib.py +++ b/benchkit/sharedlibs/tiltlib.py @@ -10,11 +10,7 @@ from typing import Tuple from benchkit.platforms import Platform -from benchkit.sharedlibs import ( - EnvironmentVariables, - FromSourceSharedLib, - LdPreloadLibraries, -) +from benchkit.sharedlibs import EnvironmentVariables, FromSourceSharedLib, LdPreloadLibraries def cmake_configure_build( diff --git a/examples/cameracampaign/camera_campaign.py b/examples/cameracampaign/camera_campaign.py index 99653261..f69c20de 100644 --- a/examples/cameracampaign/camera_campaign.py +++ b/examples/cameracampaign/camera_campaign.py @@ -6,10 +6,7 @@ from typing import Any, Dict, Iterable, List from pythainer.examples.builders import get_user_gui_builder -from pythainer.examples.installs import ( - opencv_lib_install_from_src, - realsense2_lib_install_from_src, -) +from pythainer.examples.installs import opencv_lib_install_from_src, realsense2_lib_install_from_src from pythainer.examples.runners import camera_runner, gui_runner, personal_runner from pythainer.runners import ConcreteDockerRunner diff --git a/examples/ipc/ipc.py b/examples/ipc/ipc.py index f102abbb..50a65604 100644 --- a/examples/ipc/ipc.py +++ b/examples/ipc/ipc.py @@ -161,10 +161,7 @@ def main() -> None: match target: case Target.HARMONY: - from benchkit.devices.hdc import ( - OpenHarmonyCommLayer, - OpenHarmonyDeviceConnector, - ) + from benchkit.devices.hdc import OpenHarmonyCommLayer, OpenHarmonyDeviceConnector bench_dir = "/data/local/tmp" device = list(OpenHarmonyDeviceConnector.query_devices())[0] diff --git a/examples/rocksdb/README.md b/examples/rocksdb/README.md index c6c0240c..38202ed8 100644 --- a/examples/rocksdb/README.md +++ b/examples/rocksdb/README.md @@ -55,7 +55,10 @@ cd ../../.. Running the speedup stack campaign. ``` +sudo -v +while true; do sudo -v; sleep 60; done & ./campaign_rocksdb_speedup_stacks.py +kill %1 ``` @@ -88,9 +91,9 @@ sudo setcap cap_sys_resource,cap_sys_admin+eip ./klockstat sudo setcap cap_sys_resource,cap_sys_admin+eip ./offcputime sudo setcap cap_sys_resource,cap_sys_admin+eip ./llcstat sudo setcap cap_sys_ptrace+ep /usr/bin/strace -kill %1 cd ../../.. ./campaign_rocksdb_speedup_stacks.py +kill %1 ``` diff --git a/examples/rocksdb/campaign_rocksdb_speedup_stacks.py b/examples/rocksdb/campaign_rocksdb_speedup_stacks.py index ee7dddcc..a6c8508c 100755 --- a/examples/rocksdb/campaign_rocksdb_speedup_stacks.py +++ b/examples/rocksdb/campaign_rocksdb_speedup_stacks.py @@ -8,12 +8,16 @@ from rocksdb import rocksdb_campaign from benchkit.campaign import CampaignSuite +from benchkit.commandwrappers.perf import enable_non_sudo_perf from benchkit.commandwrappers.speedupstack import SpeedupStackWrapper +from benchkit.platforms import get_current_platform from benchkit.utils.dir import get_curdir def main() -> None: """Main function of the campaign script.""" + platform = get_current_platform() + enable_non_sudo_perf(comm_layer=platform.comm) rocksdb_src_dir = (get_curdir(__file__) / "deps/rocksdb/").resolve() libbpf_tools_dir = (get_curdir(__file__) / "deps/bcc/libbpf-tools/").resolve() @@ -22,10 +26,16 @@ def main() -> None: campaign = rocksdb_campaign( src_dir=rocksdb_src_dir, - bench_name=["readrandom"], + bench_name=[ + "readrandom", + # "readmissing", + # "seekrandom", + # "multireadrandom", + # "readwhilewriting", + ], nb_runs=5, - benchmark_duration_seconds=3, - nb_threads=[2, 4, 8], + benchmark_duration_seconds=10, + nb_threads=[1, 2, 4, 8], command_wrappers=([speedupstackwrapper] + speedupstackwrapper.command_wrappers()), command_attachments=speedupstackwrapper.command_attachments(), post_run_hooks=speedupstackwrapper.post_run_hooks(), @@ -78,6 +88,22 @@ def main() -> None: hue="bench_name", ) + suite.generate_graph( + title="Perf Record Lock", + plot_name="lineplot", + x="nb_threads", + y="perf_record_lock_total_wait_ns", + hue="bench_name", + ) + + suite.generate_graph( + title="Perf Record Lock Thread Avg", + plot_name="lineplot", + x="nb_threads", + y="perf_record_lock_thread_avg_wait_ns", + hue="bench_name", + ) + if __name__ == "__main__": main() diff --git a/examples/rocksdb/kit/rocksdb.py b/examples/rocksdb/kit/rocksdb.py index fa531932..e90304f5 100644 --- a/examples/rocksdb/kit/rocksdb.py +++ b/examples/rocksdb/kit/rocksdb.py @@ -11,13 +11,7 @@ import re from typing import Any, Dict, Iterable, List -from benchkit.benchmark import ( - Benchmark, - CommandAttachment, - CommandWrapper, - PostRunHook, - PreRunHook, -) +from benchkit.benchmark import Benchmark, CommandAttachment, CommandWrapper, PostRunHook, PreRunHook from benchkit.campaign import CampaignCartesianProduct from benchkit.dependencies.packages import PackageDependency from benchkit.platforms import Platform diff --git a/tests/test_commlayer.py b/tests/test_commlayer.py index c39269c9..b7f50bde 100644 --- a/tests/test_commlayer.py +++ b/tests/test_commlayer.py @@ -23,10 +23,7 @@ def main() -> None: platform = get_current_platform() print(platform) case Target.HARMONY: - from benchkit.devices.hdc import ( - OpenHarmonyCommLayer, - OpenHarmonyDeviceConnector, - ) + from benchkit.devices.hdc import OpenHarmonyCommLayer, OpenHarmonyDeviceConnector device = list(OpenHarmonyDeviceConnector.query_devices())[0] hdc = OpenHarmonyDeviceConnector.from_device(device) diff --git a/tests/test_logging.py b/tests/test_logging.py index 09734d95..54e27df9 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -13,11 +13,7 @@ import logging from pathlib import Path -from benchkit.utils.logging import ( - bkpprint, - bkprint, - configure_logging, -) +from benchkit.utils.logging import bkpprint, bkprint, configure_logging def main() -> None: