From 607438a6830e6e7daf935704741cc654869ad06b Mon Sep 17 00:00:00 2001 From: Denys Fridman Date: Tue, 2 Dec 2025 12:17:13 +0100 Subject: [PATCH 1/9] add deepseek constant --- mlperf_logging/mllog/constants.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlperf_logging/mllog/constants.py b/mlperf_logging/mllog/constants.py index 57972a6f..758b94d5 100644 --- a/mlperf_logging/mllog/constants.py +++ b/mlperf_logging/mllog/constants.py @@ -57,6 +57,7 @@ LLAMA31_405B = "llama31_405b" LLAMA31_8B = "llama31_8b" FLUX1 = "flux1" +DEEPSEEK_V3 = "deepseek_v3" # Constant values - model info ADAGRAD = "adagrad" From d2bf303e22af81eefd4bf10a963d768ca412fca7 Mon Sep 17 00:00:00 2001 From: Denys Fridman Date: Tue, 2 Dec 2025 15:28:00 +0100 Subject: [PATCH 2/9] add deepseek to compliance check --- .../training_6.0.0/closed_bert.yaml | 48 ++++++++++++ .../training_6.0.0/closed_common.yaml | 2 +- .../training_6.0.0/closed_deepseek_v3.yaml | 74 +++++++++++++++++++ .../training_6.0.0/closed_retinanet.yaml | 35 +++++++++ .../training_6.0.0/closed_rgat.yaml | 21 ++++++ .../training_6.0.0/open_bert.yaml | 7 ++ .../training_6.0.0/open_common.yaml | 2 +- .../training_6.0.0/open_deepseek_v3.yaml | 65 ++++++++++++++++ .../training_6.0.0/open_retinanet.yaml | 7 ++ .../training_6.0.0/open_rgat.yaml | 7 ++ 10 files changed, 266 insertions(+), 2 deletions(-) create mode 100644 mlperf_logging/compliance_checker/training_6.0.0/closed_bert.yaml create mode 100644 mlperf_logging/compliance_checker/training_6.0.0/closed_deepseek_v3.yaml create mode 100644 mlperf_logging/compliance_checker/training_6.0.0/closed_retinanet.yaml create mode 100644 mlperf_logging/compliance_checker/training_6.0.0/closed_rgat.yaml create mode 100644 mlperf_logging/compliance_checker/training_6.0.0/open_bert.yaml create mode 100644 mlperf_logging/compliance_checker/training_6.0.0/open_deepseek_v3.yaml create mode 100644 mlperf_logging/compliance_checker/training_6.0.0/open_retinanet.yaml create mode 100644 mlperf_logging/compliance_checker/training_6.0.0/open_rgat.yaml diff --git a/mlperf_logging/compliance_checker/training_6.0.0/closed_bert.yaml b/mlperf_logging/compliance_checker/training_6.0.0/closed_bert.yaml new file mode 100644 index 00000000..408f669b --- /dev/null +++ b/mlperf_logging/compliance_checker/training_6.0.0/closed_bert.yaml @@ -0,0 +1,48 @@ +- KEY: + NAME: global_batch_size + REQ: EXACTLY_ONE + POST: > + s['global_batch_size'] = v['value'] + +- KEY: + NAME: opt_base_learning_rate + REQ: EXACTLY_ONE + +- KEY: + NAME: opt_lamb_epsilon + REQ: EXACTLY_ONE + +- KEY: + NAME: opt_learning_rate_training_steps + REQ: EXACTLY_ONE + +- KEY: + NAME: opt_learning_rate_warmup_steps + REQ: EXACTLY_ONE + +- KEY: + NAME: num_warmup_steps + REQ: EXACTLY_ONE + +- KEY: + NAME: start_warmup_step + REQ: EXACTLY_ONE + +- KEY: + NAME: opt_lamb_beta_1 + REQ: EXACTLY_ONE + +- KEY: + NAME: opt_lamb_beta_2 + REQ: EXACTLY_ONE + +- KEY: + NAME: opt_lamb_weight_decay_rate + REQ: EXACTLY_ONE + +- KEY: + NAME: eval_accuracy + REQ: AT_LEAST_ONE + CHECK: + - "'epoch_num' in v['metadata']" + ATLEAST_ONE_CHECK: "(v['value'] >= 0.720) and v['value'] < 1.0" diff --git a/mlperf_logging/compliance_checker/training_6.0.0/closed_common.yaml b/mlperf_logging/compliance_checker/training_6.0.0/closed_common.yaml index c17d1432..2b11ae6e 100755 --- a/mlperf_logging/compliance_checker/training_6.0.0/closed_common.yaml +++ b/mlperf_logging/compliance_checker/training_6.0.0/closed_common.yaml @@ -2,7 +2,7 @@ - KEY: NAME: submission_benchmark REQ: EXACTLY_ONE - CHECK: " v['value'] in ['flux1', 'dlrm_dcnv2', 'llama31_8b', 'llama2_70b_lora', 'llama31_405b'] " + CHECK: " v['value'] in ['flux1', 'dlrm_dcnv2', 'llama31_8b', 'llama2_70b_lora', 'llama31_405b', 'deepseek_v3'] " POST: " enqueue_config('training_6.0.0/closed_{}.yaml'.format(v['value'])) " - KEY: diff --git a/mlperf_logging/compliance_checker/training_6.0.0/closed_deepseek_v3.yaml b/mlperf_logging/compliance_checker/training_6.0.0/closed_deepseek_v3.yaml new file mode 100644 index 00000000..d71468fd --- /dev/null +++ b/mlperf_logging/compliance_checker/training_6.0.0/closed_deepseek_v3.yaml @@ -0,0 +1,74 @@ +- KEY: + NAME: global_batch_size + REQ: EXACTLY_ONE + POST: > + s['global_batch_size'] = v['value'] + +- KEY: + NAME: max_sequence_length + REQ: EXACTLY_ONE + CHECK: " v['value'] == 4096 " + +- KEY: + NAME: opt_name + REQ: EXACTLY_ONE + CHECK: " v['value'] == 'adamw' " + +- KEY: + NAME: opt_base_learning_rate + REQ: EXACTLY_ONE + +- KEY: + NAME: opt_learning_rate_warmup_steps + REQ: EXACTLY_ONE + +- KEY: + NAME: opt_learning_rate_decay_steps + REQ: EXACTLY_ONE + +- KEY: + NAME: opt_learning_rate_decay_schedule + REQ: EXACTLY_ONE + CHECK: " v['value'] == 'cosine with linear warmup' " + +- KEY: + NAME: opt_adamw_beta_1 + REQ: EXACTLY_ONE + CHECK: " v['value'] == 0.9 " + +- KEY: + NAME: opt_adamw_beta_2 + REQ: EXACTLY_ONE + CHECK: " v['value'] == 0.95 " + +- KEY: + NAME: opt_adamw_epsilon + REQ: EXACTLY_ONE + CHECK: " v['value'] == 1e-05 " + +- KEY: + NAME: opt_adamw_weight_decay + REQ: EXACTLY_ONE + CHECK: " v['value'] == 0.1 " + +- KEY: + NAME: opt_gradient_clip_norm + REQ: EXACTLY_ONE + CHECK: " v['value'] == 1.0 " + +- KEY: + NAME: gradient_accumulation_steps + REQ: EXACTLY_ONE + CHECK: " v['value'] > 0 " + +- KEY: + NAME: eval_samples + REQ: EXACTLY_ONE + CHECK: " v['value'] == 6144 " + +- KEY: + NAME: eval_accuracy + REQ: AT_LEAST_ONE + CHECK: + - "'samples_count' in v['metadata']" + ATLEAST_ONE_CHECK: "(v['value'] <= 4.0) and v['value'] > 0.0" diff --git a/mlperf_logging/compliance_checker/training_6.0.0/closed_retinanet.yaml b/mlperf_logging/compliance_checker/training_6.0.0/closed_retinanet.yaml new file mode 100644 index 00000000..794ab7ab --- /dev/null +++ b/mlperf_logging/compliance_checker/training_6.0.0/closed_retinanet.yaml @@ -0,0 +1,35 @@ +- KEY: + NAME: global_batch_size + REQ: EXACTLY_ONE + +- KEY: + NAME: opt_name + REQ: EXACTLY_ONE + CHECK: " v['value'] == 'adam' " + +- KEY: + NAME: opt_base_learning_rate + REQ: EXACTLY_ONE + CHECK: " v['value'] >= 0.0" + +- KEY: + NAME: opt_weight_decay + REQ: EXACTLY_ONE + CHECK: " v['value'] == 0.0" + +- KEY: + NAME: opt_learning_rate_warmup_epochs + REQ: EXACTLY_ONE + CHECK: " v['value'] >= 0.0" + +- KEY: + NAME: opt_learning_rate_warmup_factor + REQ: EXACTLY_ONE + CHECK: " v['value'] >= 0.0" + +- KEY: + NAME: eval_accuracy + REQ: AT_LEAST_ONE + CHECK: + - "'epoch_num' in v['metadata']" + ATLEAST_ONE_CHECK: "v['value'] >= 0.340 and v['value'] < 1.0" diff --git a/mlperf_logging/compliance_checker/training_6.0.0/closed_rgat.yaml b/mlperf_logging/compliance_checker/training_6.0.0/closed_rgat.yaml new file mode 100644 index 00000000..2c1f7286 --- /dev/null +++ b/mlperf_logging/compliance_checker/training_6.0.0/closed_rgat.yaml @@ -0,0 +1,21 @@ +- KEY: + NAME: global_batch_size + REQ: EXACTLY_ONE + CHECK: " v['value'] > 0" + +- KEY: + NAME: opt_name + REQ: EXACTLY_ONE + CHECK: " v['value'] == 'adam' " + +- KEY: + NAME: opt_base_learning_rate + REQ: EXACTLY_ONE + CHECK: " v['value'] >= 0.0" + +- KEY: + NAME: eval_accuracy + REQ: AT_LEAST_ONE + CHECK: + - "'epoch_num' in v['metadata']" + ATLEAST_ONE_CHECK: "v['value'] >= 0.72 and v['value'] < 1.0" diff --git a/mlperf_logging/compliance_checker/training_6.0.0/open_bert.yaml b/mlperf_logging/compliance_checker/training_6.0.0/open_bert.yaml new file mode 100644 index 00000000..14c4176d --- /dev/null +++ b/mlperf_logging/compliance_checker/training_6.0.0/open_bert.yaml @@ -0,0 +1,7 @@ + +- KEY: + NAME: eval_accuracy + REQ: AT_LEAST_ONE + CHECK: + - "'epoch_num' in v['metadata']" + ATLEAST_ONE_CHECK: "v['value'] < 1.0" diff --git a/mlperf_logging/compliance_checker/training_6.0.0/open_common.yaml b/mlperf_logging/compliance_checker/training_6.0.0/open_common.yaml index ab82d076..1f5c54a0 100644 --- a/mlperf_logging/compliance_checker/training_6.0.0/open_common.yaml +++ b/mlperf_logging/compliance_checker/training_6.0.0/open_common.yaml @@ -2,5 +2,5 @@ - KEY: NAME: submission_benchmark REQ: EXACTLY_ONE - CHECK: " v['value'] in ['flux1', 'dlrm_dcnv2', 'llama31_8b', 'llama2_70b_lora', 'llama31_405b'] " + CHECK: " v['value'] in ['flux1', 'dlrm_dcnv2', 'llama31_8b', 'llama2_70b_lora', 'llama31_405b', 'deepseek_v3'] " POST: " enqueue_config('training_6.0.0/open_{}.yaml'.format(v['value'])) " diff --git a/mlperf_logging/compliance_checker/training_6.0.0/open_deepseek_v3.yaml b/mlperf_logging/compliance_checker/training_6.0.0/open_deepseek_v3.yaml new file mode 100644 index 00000000..84512fea --- /dev/null +++ b/mlperf_logging/compliance_checker/training_6.0.0/open_deepseek_v3.yaml @@ -0,0 +1,65 @@ +- KEY: + NAME: global_batch_size + REQ: EXACTLY_ONE + POST: > + s['global_batch_size'] = v['value'] + +- KEY: + NAME: max_sequence_length + REQ: EXACTLY_ONE + CHECK: " v['value'] == 4096 " + +- KEY: + NAME: opt_name + REQ: EXACTLY_ONE + CHECK: " v['value'] == 'adamw' " + +- KEY: + NAME: opt_base_learning_rate + REQ: EXACTLY_ONE + +- KEY: + NAME: opt_learning_rate_warmup_steps + REQ: EXACTLY_ONE + +- KEY: + NAME: opt_learning_rate_decay_schedule + REQ: EXACTLY_ONE + +- KEY: + NAME: opt_adamw_beta_1 + REQ: EXACTLY_ONE + +- KEY: + NAME: opt_adamw_beta_2 + REQ: EXACTLY_ONE + +- KEY: + NAME: opt_adamw_epsilon + REQ: EXACTLY_ONE + +- KEY: + NAME: opt_adamw_weight_decay + REQ: EXACTLY_ONE + +- KEY: + NAME: opt_gradient_clip_norm + REQ: EXACTLY_ONE + +- KEY: + NAME: gradient_accumulation_steps + REQ: EXACTLY_ONE + CHECK: " v['value'] > 0 " + +- KEY: + NAME: eval_samples + REQ: EXACTLY_ONE + CHECK: " v['value'] == 6144 " + +- KEY: + NAME: eval_accuracy + REQ: AT_LEAST_ONE + CHECK: + - "'epoch_num' in v['metadata']" + ATLEAST_ONE_CHECK: "(v['value'] <= 4.0) and v['value'] > 0.0" + diff --git a/mlperf_logging/compliance_checker/training_6.0.0/open_retinanet.yaml b/mlperf_logging/compliance_checker/training_6.0.0/open_retinanet.yaml new file mode 100644 index 00000000..14c4176d --- /dev/null +++ b/mlperf_logging/compliance_checker/training_6.0.0/open_retinanet.yaml @@ -0,0 +1,7 @@ + +- KEY: + NAME: eval_accuracy + REQ: AT_LEAST_ONE + CHECK: + - "'epoch_num' in v['metadata']" + ATLEAST_ONE_CHECK: "v['value'] < 1.0" diff --git a/mlperf_logging/compliance_checker/training_6.0.0/open_rgat.yaml b/mlperf_logging/compliance_checker/training_6.0.0/open_rgat.yaml new file mode 100644 index 00000000..14c4176d --- /dev/null +++ b/mlperf_logging/compliance_checker/training_6.0.0/open_rgat.yaml @@ -0,0 +1,7 @@ + +- KEY: + NAME: eval_accuracy + REQ: AT_LEAST_ONE + CHECK: + - "'epoch_num' in v['metadata']" + ATLEAST_ONE_CHECK: "v['value'] < 1.0" From 28270a6052cf650d8e2fbe31113dc234c2dc3b08 Mon Sep 17 00:00:00 2001 From: Denys Fridman Date: Fri, 6 Feb 2026 14:17:44 +0100 Subject: [PATCH 3/9] rm closed_bert.yaml --- .../training_6.0.0/closed_bert.yaml | 48 ------------------- 1 file changed, 48 deletions(-) delete mode 100644 mlperf_logging/compliance_checker/training_6.0.0/closed_bert.yaml diff --git a/mlperf_logging/compliance_checker/training_6.0.0/closed_bert.yaml b/mlperf_logging/compliance_checker/training_6.0.0/closed_bert.yaml deleted file mode 100644 index 408f669b..00000000 --- a/mlperf_logging/compliance_checker/training_6.0.0/closed_bert.yaml +++ /dev/null @@ -1,48 +0,0 @@ -- KEY: - NAME: global_batch_size - REQ: EXACTLY_ONE - POST: > - s['global_batch_size'] = v['value'] - -- KEY: - NAME: opt_base_learning_rate - REQ: EXACTLY_ONE - -- KEY: - NAME: opt_lamb_epsilon - REQ: EXACTLY_ONE - -- KEY: - NAME: opt_learning_rate_training_steps - REQ: EXACTLY_ONE - -- KEY: - NAME: opt_learning_rate_warmup_steps - REQ: EXACTLY_ONE - -- KEY: - NAME: num_warmup_steps - REQ: EXACTLY_ONE - -- KEY: - NAME: start_warmup_step - REQ: EXACTLY_ONE - -- KEY: - NAME: opt_lamb_beta_1 - REQ: EXACTLY_ONE - -- KEY: - NAME: opt_lamb_beta_2 - REQ: EXACTLY_ONE - -- KEY: - NAME: opt_lamb_weight_decay_rate - REQ: EXACTLY_ONE - -- KEY: - NAME: eval_accuracy - REQ: AT_LEAST_ONE - CHECK: - - "'epoch_num' in v['metadata']" - ATLEAST_ONE_CHECK: "(v['value'] >= 0.720) and v['value'] < 1.0" From ff964eb2ddfaea5984622e81e2287860f74ecdbc Mon Sep 17 00:00:00 2001 From: Denys Fridman Date: Fri, 6 Feb 2026 14:19:28 +0100 Subject: [PATCH 4/9] update deepseek values --- .../training_6.0.0/closed_deepseek_v3.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlperf_logging/compliance_checker/training_6.0.0/closed_deepseek_v3.yaml b/mlperf_logging/compliance_checker/training_6.0.0/closed_deepseek_v3.yaml index d71468fd..07059d7e 100644 --- a/mlperf_logging/compliance_checker/training_6.0.0/closed_deepseek_v3.yaml +++ b/mlperf_logging/compliance_checker/training_6.0.0/closed_deepseek_v3.yaml @@ -44,7 +44,7 @@ - KEY: NAME: opt_adamw_epsilon REQ: EXACTLY_ONE - CHECK: " v['value'] == 1e-05 " + CHECK: " v['value'] == 1e-08 " - KEY: NAME: opt_adamw_weight_decay @@ -64,11 +64,11 @@ - KEY: NAME: eval_samples REQ: EXACTLY_ONE - CHECK: " v['value'] == 6144 " + CHECK: " v['value'] == 1024 " - KEY: NAME: eval_accuracy REQ: AT_LEAST_ONE CHECK: - "'samples_count' in v['metadata']" - ATLEAST_ONE_CHECK: "(v['value'] <= 4.0) and v['value'] > 0.0" + ATLEAST_ONE_CHECK: "(v['value'] <= 2.7) and v['value'] > 0.0" # TODO(dfridman): Update this once we have the exact value From 6119f6954b852e64ff76edfe35a7978520ee60ea Mon Sep 17 00:00:00 2001 From: Denys Fridman Date: Fri, 6 Feb 2026 14:19:46 +0100 Subject: [PATCH 5/9] rm closed_retinanet.yaml --- .../training_6.0.0/closed_retinanet.yaml | 35 ------------------- 1 file changed, 35 deletions(-) delete mode 100644 mlperf_logging/compliance_checker/training_6.0.0/closed_retinanet.yaml diff --git a/mlperf_logging/compliance_checker/training_6.0.0/closed_retinanet.yaml b/mlperf_logging/compliance_checker/training_6.0.0/closed_retinanet.yaml deleted file mode 100644 index 794ab7ab..00000000 --- a/mlperf_logging/compliance_checker/training_6.0.0/closed_retinanet.yaml +++ /dev/null @@ -1,35 +0,0 @@ -- KEY: - NAME: global_batch_size - REQ: EXACTLY_ONE - -- KEY: - NAME: opt_name - REQ: EXACTLY_ONE - CHECK: " v['value'] == 'adam' " - -- KEY: - NAME: opt_base_learning_rate - REQ: EXACTLY_ONE - CHECK: " v['value'] >= 0.0" - -- KEY: - NAME: opt_weight_decay - REQ: EXACTLY_ONE - CHECK: " v['value'] == 0.0" - -- KEY: - NAME: opt_learning_rate_warmup_epochs - REQ: EXACTLY_ONE - CHECK: " v['value'] >= 0.0" - -- KEY: - NAME: opt_learning_rate_warmup_factor - REQ: EXACTLY_ONE - CHECK: " v['value'] >= 0.0" - -- KEY: - NAME: eval_accuracy - REQ: AT_LEAST_ONE - CHECK: - - "'epoch_num' in v['metadata']" - ATLEAST_ONE_CHECK: "v['value'] >= 0.340 and v['value'] < 1.0" From a071aeb555ba596f70351fe6caf338215b9c8560 Mon Sep 17 00:00:00 2001 From: Denys Fridman Date: Fri, 6 Feb 2026 14:21:22 +0100 Subject: [PATCH 6/9] rm unused configs + update deepseek --- .../training_6.0.0/closed_rgat.yaml | 21 ------------------- .../training_6.0.0/open_bert.yaml | 7 ------- .../training_6.0.0/open_deepseek_v3.yaml | 4 ++-- .../training_6.0.0/open_retinanet.yaml | 7 ------- .../training_6.0.0/open_rgat.yaml | 7 ------- 5 files changed, 2 insertions(+), 44 deletions(-) delete mode 100644 mlperf_logging/compliance_checker/training_6.0.0/closed_rgat.yaml delete mode 100644 mlperf_logging/compliance_checker/training_6.0.0/open_bert.yaml delete mode 100644 mlperf_logging/compliance_checker/training_6.0.0/open_retinanet.yaml delete mode 100644 mlperf_logging/compliance_checker/training_6.0.0/open_rgat.yaml diff --git a/mlperf_logging/compliance_checker/training_6.0.0/closed_rgat.yaml b/mlperf_logging/compliance_checker/training_6.0.0/closed_rgat.yaml deleted file mode 100644 index 2c1f7286..00000000 --- a/mlperf_logging/compliance_checker/training_6.0.0/closed_rgat.yaml +++ /dev/null @@ -1,21 +0,0 @@ -- KEY: - NAME: global_batch_size - REQ: EXACTLY_ONE - CHECK: " v['value'] > 0" - -- KEY: - NAME: opt_name - REQ: EXACTLY_ONE - CHECK: " v['value'] == 'adam' " - -- KEY: - NAME: opt_base_learning_rate - REQ: EXACTLY_ONE - CHECK: " v['value'] >= 0.0" - -- KEY: - NAME: eval_accuracy - REQ: AT_LEAST_ONE - CHECK: - - "'epoch_num' in v['metadata']" - ATLEAST_ONE_CHECK: "v['value'] >= 0.72 and v['value'] < 1.0" diff --git a/mlperf_logging/compliance_checker/training_6.0.0/open_bert.yaml b/mlperf_logging/compliance_checker/training_6.0.0/open_bert.yaml deleted file mode 100644 index 14c4176d..00000000 --- a/mlperf_logging/compliance_checker/training_6.0.0/open_bert.yaml +++ /dev/null @@ -1,7 +0,0 @@ - -- KEY: - NAME: eval_accuracy - REQ: AT_LEAST_ONE - CHECK: - - "'epoch_num' in v['metadata']" - ATLEAST_ONE_CHECK: "v['value'] < 1.0" diff --git a/mlperf_logging/compliance_checker/training_6.0.0/open_deepseek_v3.yaml b/mlperf_logging/compliance_checker/training_6.0.0/open_deepseek_v3.yaml index 84512fea..a9f73830 100644 --- a/mlperf_logging/compliance_checker/training_6.0.0/open_deepseek_v3.yaml +++ b/mlperf_logging/compliance_checker/training_6.0.0/open_deepseek_v3.yaml @@ -54,12 +54,12 @@ - KEY: NAME: eval_samples REQ: EXACTLY_ONE - CHECK: " v['value'] == 6144 " + CHECK: " v['value'] == 1024 " - KEY: NAME: eval_accuracy REQ: AT_LEAST_ONE CHECK: - "'epoch_num' in v['metadata']" - ATLEAST_ONE_CHECK: "(v['value'] <= 4.0) and v['value'] > 0.0" + ATLEAST_ONE_CHECK: "(v['value'] <= 2.7) and v['value'] > 0.0" # TODO(dfridman): Update this once we have the exact value diff --git a/mlperf_logging/compliance_checker/training_6.0.0/open_retinanet.yaml b/mlperf_logging/compliance_checker/training_6.0.0/open_retinanet.yaml deleted file mode 100644 index 14c4176d..00000000 --- a/mlperf_logging/compliance_checker/training_6.0.0/open_retinanet.yaml +++ /dev/null @@ -1,7 +0,0 @@ - -- KEY: - NAME: eval_accuracy - REQ: AT_LEAST_ONE - CHECK: - - "'epoch_num' in v['metadata']" - ATLEAST_ONE_CHECK: "v['value'] < 1.0" diff --git a/mlperf_logging/compliance_checker/training_6.0.0/open_rgat.yaml b/mlperf_logging/compliance_checker/training_6.0.0/open_rgat.yaml deleted file mode 100644 index 14c4176d..00000000 --- a/mlperf_logging/compliance_checker/training_6.0.0/open_rgat.yaml +++ /dev/null @@ -1,7 +0,0 @@ - -- KEY: - NAME: eval_accuracy - REQ: AT_LEAST_ONE - CHECK: - - "'epoch_num' in v['metadata']" - ATLEAST_ONE_CHECK: "v['value'] < 1.0" From 58a5d1ebc5d6de6fdb5ca3e1afeae95518a7fc87 Mon Sep 17 00:00:00 2001 From: Denys Fridman Date: Fri, 6 Feb 2026 14:25:58 +0100 Subject: [PATCH 7/9] fix deepseek name --- .../compliance_checker/training_6.0.0/closed_common.yaml | 2 +- .../{closed_deepseek_v3.yaml => closed_deepseek_v3_671b.yaml} | 0 .../compliance_checker/training_6.0.0/open_common.yaml | 2 +- .../{open_deepseek_v3.yaml => open_deepseek_v3_671b.yaml} | 0 4 files changed, 2 insertions(+), 2 deletions(-) rename mlperf_logging/compliance_checker/training_6.0.0/{closed_deepseek_v3.yaml => closed_deepseek_v3_671b.yaml} (100%) rename mlperf_logging/compliance_checker/training_6.0.0/{open_deepseek_v3.yaml => open_deepseek_v3_671b.yaml} (100%) diff --git a/mlperf_logging/compliance_checker/training_6.0.0/closed_common.yaml b/mlperf_logging/compliance_checker/training_6.0.0/closed_common.yaml index 2b11ae6e..7b5c4b12 100755 --- a/mlperf_logging/compliance_checker/training_6.0.0/closed_common.yaml +++ b/mlperf_logging/compliance_checker/training_6.0.0/closed_common.yaml @@ -2,7 +2,7 @@ - KEY: NAME: submission_benchmark REQ: EXACTLY_ONE - CHECK: " v['value'] in ['flux1', 'dlrm_dcnv2', 'llama31_8b', 'llama2_70b_lora', 'llama31_405b', 'deepseek_v3'] " + CHECK: " v['value'] in ['flux1', 'dlrm_dcnv2', 'llama31_8b', 'llama2_70b_lora', 'llama31_405b', 'deepseek_v3_671b'] " POST: " enqueue_config('training_6.0.0/closed_{}.yaml'.format(v['value'])) " - KEY: diff --git a/mlperf_logging/compliance_checker/training_6.0.0/closed_deepseek_v3.yaml b/mlperf_logging/compliance_checker/training_6.0.0/closed_deepseek_v3_671b.yaml similarity index 100% rename from mlperf_logging/compliance_checker/training_6.0.0/closed_deepseek_v3.yaml rename to mlperf_logging/compliance_checker/training_6.0.0/closed_deepseek_v3_671b.yaml diff --git a/mlperf_logging/compliance_checker/training_6.0.0/open_common.yaml b/mlperf_logging/compliance_checker/training_6.0.0/open_common.yaml index 1f5c54a0..8b0b43a6 100644 --- a/mlperf_logging/compliance_checker/training_6.0.0/open_common.yaml +++ b/mlperf_logging/compliance_checker/training_6.0.0/open_common.yaml @@ -2,5 +2,5 @@ - KEY: NAME: submission_benchmark REQ: EXACTLY_ONE - CHECK: " v['value'] in ['flux1', 'dlrm_dcnv2', 'llama31_8b', 'llama2_70b_lora', 'llama31_405b', 'deepseek_v3'] " + CHECK: " v['value'] in ['flux1', 'dlrm_dcnv2', 'llama31_8b', 'llama2_70b_lora', 'llama31_405b', 'deepseek_v3_671b'] " POST: " enqueue_config('training_6.0.0/open_{}.yaml'.format(v['value'])) " diff --git a/mlperf_logging/compliance_checker/training_6.0.0/open_deepseek_v3.yaml b/mlperf_logging/compliance_checker/training_6.0.0/open_deepseek_v3_671b.yaml similarity index 100% rename from mlperf_logging/compliance_checker/training_6.0.0/open_deepseek_v3.yaml rename to mlperf_logging/compliance_checker/training_6.0.0/open_deepseek_v3_671b.yaml From 51db55544bc484f4fa67a480d55c661455ba91ba Mon Sep 17 00:00:00 2001 From: Denys Fridman Date: Fri, 6 Feb 2026 14:27:11 +0100 Subject: [PATCH 8/9] fix deepseek name --- mlperf_logging/mllog/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlperf_logging/mllog/constants.py b/mlperf_logging/mllog/constants.py index 758b94d5..a59a1ae2 100644 --- a/mlperf_logging/mllog/constants.py +++ b/mlperf_logging/mllog/constants.py @@ -57,7 +57,7 @@ LLAMA31_405B = "llama31_405b" LLAMA31_8B = "llama31_8b" FLUX1 = "flux1" -DEEPSEEK_V3 = "deepseek_v3" +DEEPSEEK_V3 = "deepseek_v3_671b" # Constant values - model info ADAGRAD = "adagrad" From 039c2fa6c4e0b0ead485637fb64949d3c5e392b5 Mon Sep 17 00:00:00 2001 From: Denys Fridman Date: Fri, 6 Feb 2026 15:01:32 +0100 Subject: [PATCH 9/9] +DEEPSEEK_V3 -> +DEEPSEEK_V3_671B --- mlperf_logging/mllog/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlperf_logging/mllog/constants.py b/mlperf_logging/mllog/constants.py index a59a1ae2..3016f0a3 100644 --- a/mlperf_logging/mllog/constants.py +++ b/mlperf_logging/mllog/constants.py @@ -57,7 +57,7 @@ LLAMA31_405B = "llama31_405b" LLAMA31_8B = "llama31_8b" FLUX1 = "flux1" -DEEPSEEK_V3 = "deepseek_v3_671b" +DEEPSEEK_V3_671B = "deepseek_v3_671b" # Constant values - model info ADAGRAD = "adagrad"