diff --git a/pyxform/xls2json.py b/pyxform/xls2json.py index fe1dedab..59ded054 100644 --- a/pyxform/xls2json.py +++ b/pyxform/xls2json.py @@ -355,7 +355,9 @@ def get_entity_declaration(workbook_dict: Dict, warnings: List) -> Dict: entities_sheet = workbook_dict.get(constants.ENTITIES, []) if len(entities_sheet) == 0: - similar = find_sheet_misspellings(key=constants.ENTITIES, keys=workbook_dict.keys()) + similar = find_sheet_misspellings( + key=constants.ENTITIES, keys=workbook_dict.keys() + ) if similar is not None: warnings.append(similar + _MSG_SUPPRESS_SPELLING) return {} @@ -365,6 +367,20 @@ def get_entity_declaration(workbook_dict: Dict, warnings: List) -> Dict: ) entity = entities_sheet[0] + dataset = entity["dataset"] + + if dataset.startswith(ENTITIES_RESERVED_PREFIX): + raise PyXFormError( + f"Invalid dataset name: '{dataset}' starts with reserved prefix {ENTITIES_RESERVED_PREFIX}." + ) + + if not is_valid_xml_tag(dataset): + if isinstance(dataset, bytes): + dataset = dataset.encode("utf-8") + + raise PyXFormError( + f"Invalid dataset name: '{dataset}'. Dataset names {XML_IDENTIFIER_ERROR_MESSAGE}" + ) if not ("label" in entity): raise PyXFormError("The entities sheet is missing the required label column.") @@ -375,7 +391,7 @@ def get_entity_declaration(workbook_dict: Dict, warnings: List) -> Dict: "name": "entity", "type": "entity", "parameters": { - "dataset": entities_sheet[0]["dataset"], + "dataset": dataset, "create": creation_condition, "label": entity["label"], }, diff --git a/tests/test_entities.py b/tests/test_entities.py index 40026f80..2b99db69 100644 --- a/tests/test_entities.py +++ b/tests/test_entities.py @@ -51,6 +51,40 @@ def test_dataset_in_entities_sheet__adds_dataset_attribute_to_entity(self): ], ) + def test_dataset_with_reserved_prefix__errors(self): + self.assertPyxformXform( + name="data", + md=""" + | survey | | | | + | | type | name | label | + | | text | a | A | + | entities | | | | + | | dataset | label | | + | | __sweet | a | | + """, + errored=True, + error__contains=[ + "Invalid dataset name: '__sweet' starts with reserved prefix __." + ], + ) + + def test_dataset_with_invalid_xml_name__errors(self): + self.assertPyxformXform( + name="data", + md=""" + | survey | | | | + | | type | name | label | + | | text | a | A | + | entities | | | | + | | dataset | label | | + | | $sweet | a | | + """, + errored=True, + error__contains=[ + "Invalid dataset name: '$sweet'. Dataset names must begin with a letter, colon, or underscore. Other characters can include numbers, dashes, and periods." + ], + ) + def test_worksheet_name_close_to_entities__produces_warning(self): self.assertPyxformXform( name="data", @@ -62,7 +96,9 @@ def test_worksheet_name_close_to_entities__produces_warning(self): | | dataset | label | | | | trees | a | | """, - warnings__contains=["When looking for a sheet named 'entities', the following sheets with similar names were found: 'entitoes'."] + warnings__contains=[ + "When looking for a sheet named 'entities', the following sheets with similar names were found: 'entitoes'." + ], ) def test_dataset_in_entities_sheet__defaults_to_always_creating(self):