From a7c5904304749a4dcf760cc1b734c5646565e9f1 Mon Sep 17 00:00:00 2001 From: Thomas Krijnen Date: Tue, 25 Nov 2025 12:50:38 +0100 Subject: [PATCH 1/3] MAX_OUTCOMES_PER_RULE --- backend/apps/ifc_validation/checks/check_gherkin.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/apps/ifc_validation/checks/check_gherkin.py b/backend/apps/ifc_validation/checks/check_gherkin.py index 16d3131d..fc77d110 100644 --- a/backend/apps/ifc_validation/checks/check_gherkin.py +++ b/backend/apps/ifc_validation/checks/check_gherkin.py @@ -7,7 +7,7 @@ except: import apps.ifc_validation.checks.ifc_gherkin_rules as gherkin_rules # tests -def perform(ifc_fn, task_id, rule_type, verbose, purepythonparser=False): +def perform(ifc_fn, task_id, rule_type, max_outcomes: int, verbose, purepythonparser=False): try: @@ -15,6 +15,7 @@ def perform(ifc_fn, task_id, rule_type, verbose, purepythonparser=False): rules_run = gherkin_rules.run( filename=ifc_fn, rule_type=gherkin_rule_type, + max_outcomes=max_outcomes, task_id=task_id, with_console_output=verbose, purepythonparser=purepythonparser @@ -34,6 +35,7 @@ def perform(ifc_fn, task_id, rule_type, verbose, purepythonparser=False): parser.add_argument("--file-name", "-f", type=str, required=True) parser.add_argument("--task-id", "-t", type=int, required=False, default=None) parser.add_argument("--rule-type", "-r", type=str, default='ALL') + parser.add_argument('--max-outcomes', "-m", type=int, default=0) parser.add_argument("--verbose", "-v", action='store_true') parser.add_argument("--purepythonparser", "-p", action="store_true") args = parser.parse_args() @@ -42,6 +44,7 @@ def perform(ifc_fn, task_id, rule_type, verbose, purepythonparser=False): ifc_fn=args.file_name, task_id=args.task_id, rule_type=args.rule_type, + max_outcomes=args.max_outcomes, verbose=args.verbose, purepythonparser=args.purepythonparser ) \ No newline at end of file From 8f8666bf51c62f35e5a058350f043b0b331adc00 Mon Sep 17 00:00:00 2001 From: Thomas Krijnen Date: Tue, 25 Nov 2025 14:25:43 +0100 Subject: [PATCH 2/3] MAX_OUTCOMES_PER_RULE --- .../apps/ifc_validation/tasks/check_programs.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/backend/apps/ifc_validation/tasks/check_programs.py b/backend/apps/ifc_validation/tasks/check_programs.py index 94f4cf22..9aa5fc0d 100644 --- a/backend/apps/ifc_validation/tasks/check_programs.py +++ b/backend/apps/ifc_validation/tasks/check_programs.py @@ -12,7 +12,7 @@ from apps.ifc_validation_models.settings import TASK_TIMEOUT_LIMIT from apps.ifc_validation_models.models import ValidationTask -from core.settings import MAX_FILE_SIZE_IN_MB +from core.settings import MAX_FILE_SIZE_IN_MB, MAX_OUTCOMES_PER_RULE from .logger import logger from .context import TaskContext @@ -204,8 +204,9 @@ def check_prerequisites(context:TaskContext): os.path.join(checks_dir, "check_gherkin.py"), "--file-name", context.file_path, "--task-id", str(context.task.id), - "--rule-type", "CRITICAL", - "--purepythonparser" + "--rule-type", "CRITICAL", + "--max-outcomes", str(MAX_OUTCOMES_PER_RULE), + "--purepythonparser", ] ) raw_output = check_proc_success_or_fail(proc, context.task) @@ -220,7 +221,8 @@ def check_normative_ia(context:TaskContext): os.path.join(checks_dir, "check_gherkin.py"), "--file-name", context.file_path, "--task-id", str(context.task.id), - "--rule-type", "IMPLEMENTER_AGREEMENT" + "--rule-type", "IMPLEMENTER_AGREEMENT", + "--max-outcomes", str(MAX_OUTCOMES_PER_RULE), ] ) raw_output = check_proc_success_or_fail(proc, context.task) @@ -235,7 +237,8 @@ def check_normative_ip(context:TaskContext): os.path.join(checks_dir, "check_gherkin.py"), "--file-name", context.file_path, "--task-id", str(context.task.id), - "--rule-type", "INFORMAL_PROPOSITION" + "--rule-type", "INFORMAL_PROPOSITION", + "--max-outcomes", str(MAX_OUTCOMES_PER_RULE), ] ) raw_output = check_proc_success_or_fail(proc, context.task) @@ -250,7 +253,8 @@ def check_industry_practices(context:TaskContext): os.path.join(checks_dir, "check_gherkin.py"), "--file-name", context.file_path, "--task-id", str(context.task.id), - "--rule-type", "INDUSTRY_PRACTICE" + "--rule-type", "INDUSTRY_PRACTICE", + "--max-outcomes", str(MAX_OUTCOMES_PER_RULE), ] ) raw_output = check_proc_success_or_fail(proc, context.task) From f780499356fad0274256908c073c9a2fb47d596e Mon Sep 17 00:00:00 2001 From: Thomas Krijnen Date: Wed, 26 Nov 2025 13:03:41 +0100 Subject: [PATCH 3/3] Run instance completion in subprocess to prevent celery mem from increasing --- .../tasks/processing/instance_completion.py | 49 +++++++++++++++---- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/backend/apps/ifc_validation/tasks/processing/instance_completion.py b/backend/apps/ifc_validation/tasks/processing/instance_completion.py index 1e26e6c5..4ecbfadb 100644 --- a/backend/apps/ifc_validation/tasks/processing/instance_completion.py +++ b/backend/apps/ifc_validation/tasks/processing/instance_completion.py @@ -1,21 +1,52 @@ +import json +import subprocess +import sys +import textwrap import ifcopenshell from .. import TaskContext, logger from apps.ifc_validation_models.models import ModelInstance -from django.db import transaction +from django.db import transaction + +_completion_script_str = textwrap.dedent( + """ + import sys + import json + import ifcopenshell + + file_path, step_ids = json.load(sys.stdin) + ifc_file = ifcopenshell.open(file_path) + json.dump([ifc_file[step_id].is_a() for step_id in step_ids], sys.stdout) + """ +) + + +def _obtain_ifc_types(file_path: str, step_ids: list[int]) -> list[str]: + return json.loads(subprocess.run( + [sys.executable, "-u", "-c", _completion_script_str], + input=json.dumps([file_path, step_ids]), + capture_output=True, + text=True, + check=True, + ).stdout) + def process_instance_completion(context:TaskContext): # the current task doesn't have any execution layer and links instance ids to outcomes - ifc_file = ifcopenshell.open(context.file_path) + model_id = context.request.model.id + model_instances = ModelInstance.objects.filter(model_id=model_id, ifc_type__in=[None, '']) + instance_count = model_instances.count() + logger.info(f'Retrieved {instance_count:,} ModelInstance record(s)') + + step_ids = list( + model_instances.values_list("stepfile_id", flat=True) + ) + step_id_to_type = dict(zip(step_ids, _obtain_ifc_types(context.file_path, step_ids))) + with transaction.atomic(): - model_id = context.request.model.id - model_instances = ModelInstance.objects.filter(model_id=model_id, ifc_type__in=[None, '']) - instance_count = model_instances.count() - logger.info(f'Retrieved {instance_count:,} ModelInstance record(s)') - for inst in model_instances.iterator(): - inst.ifc_type = ifc_file[inst.stepfile_id].is_a() + inst.ifc_type = step_id_to_type[inst.stepfile_id] inst.save() - return f'Updated {instance_count:,} ModelInstance record(s)' + return f'Updated {instance_count:,} ModelInstance record(s)'