-
Notifications
You must be signed in to change notification settings - Fork 42
Fix nsys subfield merging behavior
#795
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES | ||
| # Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
|
|
@@ -515,3 +515,160 @@ def test_get_reporters_nccl(self): | |
| assert len(reporters) == 2 | ||
| assert NcclTestPerformanceReportGenerationStrategy in reporters | ||
| assert NcclTestPredictionReportGenerationStrategy in reporters | ||
|
|
||
|
|
||
| class TestNsysMerging: | ||
| def test_nsys_partial_override_preserves_base_config( | ||
| self, test_scenario_parser: TestScenarioParser, slurm_system: SlurmSystem | ||
| ): | ||
| from cloudai.core import NsysConfiguration | ||
|
|
||
| test_scenario_parser.test_mapping = { | ||
| "nccl": NCCLTestDefinition( | ||
| name="nccl", | ||
| description="desc", | ||
| test_template_name="NcclTest", | ||
| cmd_args=NCCLCmdArgs(docker_image_url="fake://url/nccl"), | ||
| nsys=NsysConfiguration( | ||
| enable=True, | ||
| nsys_binary="/custom/nsys", | ||
| output="/base/output", | ||
| trace="cuda,nvtx", | ||
| sample="cpu", | ||
| ), | ||
| ) | ||
| } | ||
| model = TestScenarioModel.model_validate( | ||
| toml.loads( | ||
| """ | ||
| name = "test" | ||
|
|
||
| [[Tests]] | ||
| id = "1" | ||
| test_name = "nccl" | ||
|
|
||
| [Tests.nsys] | ||
| output = "/scenario/output" | ||
| """ | ||
| ) | ||
| ) | ||
| tdef = test_scenario_parser._prepare_tdef(model.tests[0]) | ||
|
|
||
| assert tdef.nsys is not None | ||
| assert tdef.nsys.output == "/scenario/output" | ||
| assert tdef.nsys.nsys_binary == "/custom/nsys" | ||
| assert tdef.nsys.trace == "cuda,nvtx" | ||
| assert tdef.nsys.sample == "cpu" | ||
| assert tdef.nsys.enable is True | ||
|
|
||
| def test_nsys_multiple_fields_override(self, test_scenario_parser: TestScenarioParser, slurm_system: SlurmSystem): | ||
| from cloudai.core import NsysConfiguration | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can it be imported on top of this file? (same for all tests) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @juntaowww I assume you used an AI assistant to help you with the code. they really tend to bring imports deep inside. could you please share what is the assistant so we could prepare some common agents config (if there's such an option) to let the agent know about the code style? |
||
|
|
||
| test_scenario_parser.test_mapping = { | ||
| "nccl": NCCLTestDefinition( | ||
| name="nccl", | ||
| description="desc", | ||
| test_template_name="NcclTest", | ||
| cmd_args=NCCLCmdArgs(docker_image_url="fake://url/nccl"), | ||
| nsys=NsysConfiguration( | ||
| enable=True, | ||
| nsys_binary="/base/nsys", | ||
| output="/base/output", | ||
| trace="cuda", | ||
| force_overwrite=False, | ||
| ), | ||
| ) | ||
| } | ||
| model = TestScenarioModel.model_validate( | ||
| toml.loads( | ||
| """ | ||
| name = "test" | ||
|
|
||
| [[Tests]] | ||
| id = "1" | ||
| test_name = "nccl" | ||
|
|
||
| [Tests.nsys] | ||
| output = "/new/output" | ||
| force_overwrite = true | ||
| """ | ||
| ) | ||
| ) | ||
| tdef = test_scenario_parser._prepare_tdef(model.tests[0]) | ||
|
|
||
| assert tdef.nsys is not None | ||
| assert tdef.nsys.output == "/new/output" | ||
| assert tdef.nsys.force_overwrite is True | ||
| assert tdef.nsys.nsys_binary == "/base/nsys" | ||
| assert tdef.nsys.trace == "cuda" | ||
| assert tdef.nsys.enable is True | ||
|
|
||
| def test_nsys_scenario_adds_to_base_without_nsys( | ||
| self, test_scenario_parser: TestScenarioParser, slurm_system: SlurmSystem | ||
| ): | ||
| test_scenario_parser.test_mapping = { | ||
| "nccl": NCCLTestDefinition( | ||
| name="nccl", | ||
| description="desc", | ||
| test_template_name="NcclTest", | ||
| cmd_args=NCCLCmdArgs(docker_image_url="fake://url/nccl"), | ||
| # No nsys in base config | ||
| ) | ||
| } | ||
| model = TestScenarioModel.model_validate( | ||
| toml.loads( | ||
| """ | ||
| name = "test" | ||
|
|
||
| [[Tests]] | ||
| id = "1" | ||
| test_name = "nccl" | ||
|
|
||
| [Tests.nsys] | ||
| output = "/scenario/output" | ||
| trace = "cuda,nvtx" | ||
| """ | ||
| ) | ||
| ) | ||
| tdef = test_scenario_parser._prepare_tdef(model.tests[0]) | ||
|
|
||
| assert tdef.nsys is not None | ||
| assert tdef.nsys.output == "/scenario/output" | ||
| assert tdef.nsys.trace == "cuda,nvtx" | ||
| assert tdef.nsys.enable is True | ||
| assert tdef.nsys.nsys_binary == "nsys" | ||
|
|
||
| def test_nsys_disable_override(self, test_scenario_parser: TestScenarioParser, slurm_system: SlurmSystem): | ||
| from cloudai.core import NsysConfiguration | ||
coderabbitai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| test_scenario_parser.test_mapping = { | ||
| "nccl": NCCLTestDefinition( | ||
| name="nccl", | ||
| description="desc", | ||
| test_template_name="NcclTest", | ||
| cmd_args=NCCLCmdArgs(docker_image_url="fake://url/nccl"), | ||
| nsys=NsysConfiguration( | ||
| enable=True, | ||
| output="/base/output", | ||
| ), | ||
| ) | ||
| } | ||
| model = TestScenarioModel.model_validate( | ||
| toml.loads( | ||
| """ | ||
| name = "test" | ||
|
|
||
| [[Tests]] | ||
| id = "1" | ||
| test_name = "nccl" | ||
|
|
||
| [Tests.nsys] | ||
| enable = false | ||
| """ | ||
| ) | ||
| ) | ||
| tdef = test_scenario_parser._prepare_tdef(model.tests[0]) | ||
|
|
||
| assert tdef.nsys is not None | ||
| assert tdef.nsys.enable is False | ||
| assert tdef.nsys.output == "/base/output" | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can't that lead to a dict without required fields set?
BTW, I think this is similar to what you did for reports. Maybe we should have a function
merge_models(tdef, tscenario)or something like that? Do you think that will be useful?Our current approach is "all or nothing", meaning that everything should be either set on one level or another. If we start doing smarter merge, I'd prefer we do it consistently. Overall this feels as a better and friendlier approach.
cc @podkidyshev
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
still learning, so my opinion should be rather optional here :)
the change seems safe and positive. agree with Andrey that it would be nice to make it consistent across all model merges but I would assume it's on the cloudai dev team's shoulders
seems safe, if this is the risk you meant