Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test HTML validation for project status updates #1538

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 22 additions & 22 deletions moped-etl/arcgis/components_to_agol.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,31 +53,31 @@ def is_valid_HTML_tag(html_string_to_check):
return html_string_to_check == str(soup)


def handle_status_updates(features):
"""Check project status updates for valid or invalid HTML; escape HTML if needed.
Project status updates can be plain text, valid HTML, or invalid HTML. If invalid HTML is found,
the content of the update is escaped to prevent it from being rejected by AGOL (504 or 400 error).
# def handle_status_updates(features):
# """Check project status updates for valid or invalid HTML; escape HTML if needed.
# Project status updates can be plain text, valid HTML, or invalid HTML. If invalid HTML is found,
# the content of the update is escaped to prevent it from being rejected by AGOL (504 or 400 error).

Args:
features (list): list of Esri feature objects
# Args:
# features (list): list of Esri feature objects

Returns:
list: list of Esri feature objects with status update HTML escaped if needed
"""
for record in features:
id = record["attributes"]["project_id"]
# Returns:
# list: list of Esri feature objects with status update HTML escaped if needed
# """
# for record in features:
# id = record["attributes"]["project_id"]

status_update = record["attributes"]["project_status_update"]
# status_update = record["attributes"]["project_status_update"]

if status_update != None and has_html_tags(status_update):
if not is_valid_HTML_tag(status_update):
logger.info(
f"Invalid HTML tag found in project_id: {id}. Getting HTML text..."
)
html_text = BeautifulSoup(status_update, "html.parser").get_text()
record["attributes"]["project_status_update"] = html_text
# if status_update != None and has_html_tags(status_update):
# if not is_valid_HTML_tag(status_update):
# logger.info(
# f"Invalid HTML tag found in project_id: {id}. Getting HTML text..."
# )
# html_text = BeautifulSoup(status_update, "html.parser").get_text()
# record["attributes"]["project_status_update"] = html_text

return features
# return features


def get_esri_geometry_key(geometry):
Expand Down Expand Up @@ -247,7 +247,7 @@ def main(args):
for feature_type in ["points", "lines", "combined", "exploded"]:
logger.info(f"Processing {feature_type} features...")
features_of_type = all_features[feature_type]
features = handle_status_updates(features_of_type)
features = features_of_type

logger.info("Deleting all existing features...")
if not args.test:
Expand All @@ -268,7 +268,7 @@ def main(args):
for feature_type in ["points", "lines", "combined", "exploded"]:
logger.info(f"Processing {feature_type} features...")
features_of_type = all_features[feature_type]
features = handle_status_updates(features_of_type)
features = features_of_type

logger.info(
f"Deleting all existing features in {feature_type} layer for updated projects in chunks of {UPLOAD_CHUNK_SIZE}..."
Expand Down