Skip to content

Commit f8fd743

Browse files
chg: make name uniqueness rules more consistent
- main changes: - case-insensitive duplicate is a warning rather than an error since it is not a hard requirement and only applies to some use cases - group names can be re-used in a different context rather than having to be unique anywhere in the survey structure (like repeats) - if pyxform reference variables ${} are used the target question, group, or repeat still has to be unique anywhere - add uniqueness validation to xls2json so the errors can use row refs - add an error message specific to the 'meta' reserved name - use same validation funcs in xls2json and section.py/survey.py - update test assertions to use message templates
1 parent a4e05b1 commit f8fd743

File tree

12 files changed

+526
-245
lines changed

12 files changed

+526
-245
lines changed

pyxform/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
OSM_TYPE = "binary"
8787

8888
NAMESPACES = "namespaces"
89+
META = "meta"
8990

9091
# The following are the possible sheet names:
9192
SUPPORTED_SHEET_NAMES = {
@@ -125,6 +126,7 @@ class EntityColumns(StrEnum):
125126
ENTITY_ID = "entity_id"
126127
CREATE_IF = "create_if"
127128
UPDATE_IF = "update_if"
129+
REPEAT = "repeat"
128130
LABEL = "label"
129131

130132

@@ -169,3 +171,4 @@ class EntityColumns(StrEnum):
169171
}
170172
SUPPORTED_MEDIA_TYPES = {"image", "big-image", "audio", "video"}
171173
OR_OTHER_CHOICE = {NAME: "other", LABEL: "Other"}
174+
RESERVED_NAMES_SURVEY_SHEET = {META}

pyxform/errors.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,41 @@
22
Common base classes for pyxform exceptions.
33
"""
44

5+
from string import Formatter
6+
7+
8+
class _ErrorFormatter(Formatter):
9+
"""Allows specifying a default for missing format keys."""
10+
11+
def __init__(self, default_value: str = "unknown"):
12+
self.default_value: str = default_value
13+
14+
def get_value(self, key, args, kwargs):
15+
if isinstance(key, str):
16+
value = kwargs.get(key, None)
17+
if value is None:
18+
return self.default_value
19+
else:
20+
return value
21+
else:
22+
return super().get_value(key, args, kwargs)
23+
24+
25+
_ERROR_FORMATTER = _ErrorFormatter()
26+
27+
28+
class Detail:
29+
"""ErrorCode details."""
30+
31+
__slots__ = ("msg", "name")
32+
33+
def __init__(self, name: str, msg: str) -> None:
34+
self.name: str = name
35+
self.msg: str = msg
36+
37+
def format(self, **kwargs):
38+
return _ERROR_FORMATTER.format(self.msg, **kwargs)
39+
540

641
class PyXFormError(Exception):
742
"""Common base class for pyxform exceptions."""

pyxform/section.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
from typing import TYPE_CHECKING
88

99
from pyxform import constants
10-
from pyxform.errors import PyXFormError
1110
from pyxform.external_instance import ExternalInstance
1211
from pyxform.survey_element import SURVEY_ELEMENT_FIELDS, SurveyElement
1312
from pyxform.utils import DetachableElement, node
13+
from pyxform.validators.pyxform import unique_names
1414

1515
if TYPE_CHECKING:
1616
from pyxform.question import Question
@@ -94,18 +94,18 @@ def iter_descendants(
9494
iter_into_section_items=iter_into_section_items,
9595
)
9696

97-
# there's a stronger test of this when creating the xpath
98-
# dictionary for a survey.
9997
def _validate_uniqueness_of_element_names(self):
100-
element_slugs = set()
98+
child_names = set()
99+
child_names_lower = set()
100+
warnings = []
101101
for element in self.children:
102-
elem_lower = element.name.lower()
103-
if elem_lower in element_slugs:
104-
raise PyXFormError(
105-
f"There are more than one survey elements named '{elem_lower}' "
106-
f"(case-insensitive) in the section named '{self.name}'."
107-
)
108-
element_slugs.add(elem_lower)
102+
unique_names.validate_question_group_repeat_name(
103+
name=element.name,
104+
seen_names=child_names,
105+
seen_names_lower=child_names_lower,
106+
warnings=warnings,
107+
check_reserved=False,
108+
)
109109

110110
def xml_instance(self, survey: "Survey", **kwargs):
111111
"""

pyxform/survey.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from pyxform.parsing.expression import has_last_saved
2222
from pyxform.parsing.instance_expression import replace_with_output
2323
from pyxform.question import Itemset, MultipleChoiceQuestion, Option, Question, Tag
24-
from pyxform.section import SECTION_EXTRA_FIELDS, Section
24+
from pyxform.section import SECTION_EXTRA_FIELDS, RepeatingSection, Section
2525
from pyxform.survey_element import SURVEY_ELEMENT_FIELDS, SurveyElement
2626
from pyxform.utils import (
2727
BRACKETED_TAG_REGEX,
@@ -31,6 +31,7 @@
3131
node,
3232
)
3333
from pyxform.validators import enketo_validate, odk_validate
34+
from pyxform.validators.pyxform import unique_names
3435
from pyxform.validators.pyxform.iana_subtags.validation import get_languages_with_bad_tags
3536

3637
RE_BRACKET = re.compile(r"\[([^]]+)\]")
@@ -292,21 +293,16 @@ def validate(self):
292293

293294
def _validate_uniqueness_of_section_names(self):
294295
root_node_name = self.name
295-
section_names = set()
296-
for element in self.iter_descendants(condition=lambda i: isinstance(i, Section)):
297-
if element.name in section_names:
298-
if element.name == root_node_name:
299-
# The root node name is rarely explictly set; explain
300-
# the problem in a more helpful way (#510)
301-
msg = (
302-
f"The name '{element.name}' is the same as the form name. "
303-
"Use a different section name (or change the form name in "
304-
"the 'name' column of the settings sheet)."
305-
)
306-
raise PyXFormError(msg)
307-
msg = f"There are two sections with the name {element.name}."
308-
raise PyXFormError(msg)
309-
section_names.add(element.name)
296+
repeat_names = set()
297+
for element in self.iter_descendants(
298+
condition=lambda i: isinstance(i, RepeatingSection)
299+
):
300+
unique_names.validate_repeat_name(
301+
name=element.name,
302+
control_type=constants.REPEAT,
303+
instance_element_name=root_node_name,
304+
seen_names=repeat_names,
305+
)
310306

311307
def get_nsmap(self):
312308
"""Add additional namespaces"""
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
from pyxform import constants as const
2+
from pyxform.errors import Detail, PyXFormError
3+
4+
NAMES001 = Detail(
5+
name="Invalid duplicate name in same context",
6+
msg=(
7+
"[row : {row}] On the 'survey' sheet, the 'name' value '{value}' is invalid. "
8+
"Questions, groups, and repeats must be unique within their context i.e. nearest "
9+
"parent group or repeat, or the survey if not inside a group or repeat."
10+
),
11+
)
12+
NAMES002 = Detail(
13+
name="Invalid duplicate name in context (case-insensitive)",
14+
msg=(
15+
"[row : {row}] On the 'survey' sheet, the 'name' value '{value}' is problematic. "
16+
"The name is a case-insensitive match to another name. Questions, groups, and "
17+
"repeats must be unique within their context i.e. nearest parent group or repeat, "
18+
"or the survey if not inside a group or repeat. Some data processing tools are not "
19+
"case-sensitive, so the current names may make analysis difficult."
20+
),
21+
)
22+
NAMES003 = Detail(
23+
name="Invalid repeat name same as survey",
24+
msg=(
25+
"[row : {row}] On the 'survey' sheet, the 'name' value '{value}' is invalid. "
26+
"Repeat names must not be the same as the survey root (which defaults to 'data')."
27+
),
28+
)
29+
NAMES004 = Detail(
30+
name="Invalid duplicate repeat name in the survey",
31+
msg=(
32+
"[row : {row}] On the 'survey' sheet, the 'name' value '{value}' is invalid. "
33+
"Repeat names must unique anywhere the survey, at all levels of group or repeat nesting."
34+
),
35+
)
36+
NAMES005 = Detail(
37+
name="Invalid duplicate meta name in the survey",
38+
msg=(
39+
"[row : {row}] On the 'survey' sheet, the 'name' value 'meta' is invalid. "
40+
"The name 'meta' is reserved for form metadata."
41+
),
42+
)
43+
44+
45+
def validate_question_group_repeat_name(
46+
name: str | None,
47+
seen_names: set[str],
48+
seen_names_lower: set[str],
49+
warnings: list[str],
50+
row_number: int | None = None,
51+
check_reserved: bool = True,
52+
):
53+
"""
54+
Warn about duplicate or problematic names on the survey sheet.
55+
56+
May append the name to `seen_names` and `neen_names_lower`. May append to `warnings`.
57+
58+
:param name: Question or group name.
59+
:param seen_names: Names already processed in the sheet.
60+
:param seen_names_lower: Same as seen_names but lower case.
61+
:param warnings: Warnings list.
62+
:param row_number: Current sheet row number.
63+
:param check_reserved: If True, check the name against any reserved names. When
64+
checking names in the context of SurveyElement processing, it's difficult to
65+
differentiate user-specified names from names added by pyxform.
66+
"""
67+
if not name:
68+
return
69+
70+
if check_reserved and not seen_names >= const.RESERVED_NAMES_SURVEY_SHEET:
71+
seen_names.update(const.RESERVED_NAMES_SURVEY_SHEET)
72+
73+
if name in seen_names:
74+
if name == const.META:
75+
raise PyXFormError(NAMES005.format(row=row_number))
76+
else:
77+
raise PyXFormError(NAMES001.format(row=row_number, value=name))
78+
seen_names.add(name)
79+
80+
question_name_lower = name.lower()
81+
if question_name_lower in seen_names_lower:
82+
# No case-insensitive warning for 'meta' since it's not an exported data table.
83+
warnings.append(NAMES002.format(row=row_number, value=name))
84+
seen_names_lower.add(question_name_lower)
85+
86+
87+
def validate_repeat_name(
88+
name: str | None,
89+
control_type: str,
90+
instance_element_name: str,
91+
seen_names: set[str],
92+
row_number: int | None = None,
93+
):
94+
"""
95+
Warn about duplicate or problematic names.
96+
97+
May appends the name to `seen_names` and `neen_names_lower`. May append to `warnings`.
98+
These checks are additional to the above in validate_survey_sheet_name so does not
99+
re-check reserved names etc.
100+
101+
:param row_number: Current sheet row number.
102+
:param name: Question or group name.
103+
:param control_type: E.g. group, repeat, or loop.
104+
:param instance_element_name: Name of the main survey instance element.
105+
:param seen_names: Names already processed in the sheet.
106+
"""
107+
if control_type == const.REPEAT:
108+
if name == instance_element_name:
109+
raise PyXFormError(NAMES003.format(row=row_number, value=name))
110+
elif name in seen_names:
111+
raise PyXFormError(NAMES004.format(row=row_number, value=name))
112+
seen_names.add(name)

pyxform/xls2json.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
default_is_dynamic,
2929
print_pyobj_to_json,
3030
)
31-
from pyxform.validators.pyxform import parameters_generic, select_from_file
31+
from pyxform.validators.pyxform import parameters_generic, select_from_file, unique_names
3232
from pyxform.validators.pyxform import question_types as qt
3333
from pyxform.validators.pyxform.android_package_name import validate_android_package_name
3434
from pyxform.validators.pyxform.choices import validate_and_clean_choices
@@ -527,6 +527,8 @@ def workbook_to_json(
527527
"control_type": None,
528528
"control_name": None,
529529
"parent_children": json_dict.get(constants.CHILDREN),
530+
"child_names": set(),
531+
"child_names_lower": set(),
530532
"row_number": None,
531533
}
532534
]
@@ -539,11 +541,14 @@ def workbook_to_json(
539541
# To check that questions with triggers refer to other questions that exist.
540542
question_names = set()
541543
trigger_references = []
544+
repeat_names = set()
542545

543546
# row by row, validate questions, throwing errors and adding warnings where needed.
544547
for row_number, row in enumerate(survey_sheet.data, start=2):
545548
prev_control_type = stack[-1]["control_type"]
546549
parent_children_array = stack[-1]["parent_children"]
550+
child_names = stack[-1]["child_names"]
551+
child_names_lower = stack[-1]["child_names_lower"]
547552

548553
# Disabled should probably be first
549554
# so the attributes below can be disabled.
@@ -789,9 +794,9 @@ def workbook_to_json(
789794

790795
# Make sure the row has a valid name
791796
if constants.NAME not in row:
792-
if row["type"] == "note":
797+
if row[constants.TYPE] == "note":
793798
# autogenerate names for notes without them
794-
row["name"] = "generated_note_name_" + str(row_number)
799+
row[constants.NAME] = "generated_note_name_" + str(row_number)
795800
else:
796801
raise PyXFormError(
797802
ROW_FORMAT_STRING % row_number + " Question or group with no name."
@@ -805,7 +810,17 @@ def workbook_to_json(
805810
f"{ROW_FORMAT_STRING % row_number} Invalid question name '{question_name}'. Names {XML_IDENTIFIER_ERROR_MESSAGE}"
806811
)
807812

808-
in_repeat = any(ancestor["control_type"] == "repeat" for ancestor in stack)
813+
unique_names.validate_question_group_repeat_name(
814+
row_number=row_number,
815+
name=question_name,
816+
seen_names=child_names,
817+
seen_names_lower=child_names_lower,
818+
warnings=warnings,
819+
)
820+
821+
in_repeat = any(
822+
ancestor["control_type"] == constants.REPEAT for ancestor in stack
823+
)
809824
validate_entity_saveto(row, row_number, in_repeat, entity_declaration)
810825

811826
# Try to parse question as begin control statement
@@ -820,7 +835,14 @@ def workbook_to_json(
820835
# (so following questions are nested under it)
821836
# until an end command is encountered.
822837
control_type = aliases.control[parse_dict["type"]]
823-
control_name = question_name
838+
839+
unique_names.validate_repeat_name(
840+
row_number=row_number,
841+
name=question_name,
842+
control_type=control_type,
843+
instance_element_name=json_dict[constants.NAME],
844+
seen_names=repeat_names,
845+
)
824846

825847
# Check if the control item has a label, if applicable.
826848
# This label check used to apply to all items, but no longer is
@@ -854,7 +876,7 @@ def workbook_to_json(
854876
new_json_dict[constants.TYPE] = control_type
855877
child_list = []
856878
new_json_dict[constants.CHILDREN] = child_list
857-
if control_type is constants.LOOP:
879+
if control_type == constants.LOOP:
858880
if not parse_dict.get(constants.LIST_NAME_U):
859881
# TODO: Perhaps warn and make repeat into a group?
860882
raise PyXFormError(
@@ -932,8 +954,10 @@ def workbook_to_json(
932954
stack.append(
933955
{
934956
"control_type": control_type,
935-
"control_name": control_name,
957+
"control_name": question_name,
936958
"parent_children": child_list,
959+
"child_names": set(),
960+
"child_names_lower": set(),
937961
"row_number": row_number,
938962
}
939963
)

tests/test_external_instances.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
from textwrap import dedent
88

9+
from pyxform.validators.pyxform import unique_names
10+
911
from tests.pyxform_test_case import PyxformTestCase, PyxformTestError
1012
from tests.xpath_helpers.choices import xpc
1113

@@ -48,9 +50,7 @@ def test_cannot__use_same_external_xml_id_in_same_section(self):
4850
self.assertPyxformXform(
4951
md=md,
5052
errored=True,
51-
error__contains=[
52-
"There are more than one survey elements named 'mydata' (case-insensitive) in the section named 'test_name'"
53-
],
53+
error__contains=[unique_names.NAMES001.format(row=3, value="mydata")],
5454
)
5555

5656
def test_can__use_unique_external_xml_in_same_section(self):

0 commit comments

Comments
 (0)