From 83f9bb7637014282fabef7dc82919c207fdaf0d3 Mon Sep 17 00:00:00 2001 From: Andrew Hayzen Date: Thu, 1 Feb 2024 11:50:52 +0900 Subject: [PATCH] analyze_html: display the last url of each shortcode This allows for finding examples of where they are used in the existing site. --- wagtail_wordpress_import/analysis.py | 6 +++++- wagtail_wordpress_import/importers/wordpress.py | 4 ++-- .../management/commands/analyze_html_content.py | 3 ++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/wagtail_wordpress_import/analysis.py b/wagtail_wordpress_import/analysis.py index f7d96289..d40615a5 100644 --- a/wagtail_wordpress_import/analysis.py +++ b/wagtail_wordpress_import/analysis.py @@ -20,6 +20,7 @@ def __init__(self): self.styles_unique_pages = Counter() self.classes_unique_pages = Counter() self.shortcodes_unique_pages = Counter() + self.shortcodes_page_url = {} @classmethod def find_all_tags(cls, dom): @@ -91,7 +92,7 @@ def find_all_shortcodes(cls, dom): return shortcodes - def analyze(self, html): + def analyze(self, html, page_url): self.total += 1 try: @@ -117,3 +118,6 @@ def analyze(self, html): self.styles_unique_pages.update(styles.keys()) self.classes_unique_pages.update(classes.keys()) self.shortcodes_unique_pages.update(shortcodes.keys()) + + for shortcode in shortcodes.keys(): + self.shortcodes_page_url[shortcode] = page_url diff --git a/wagtail_wordpress_import/importers/wordpress.py b/wagtail_wordpress_import/importers/wordpress.py index 8c056580..263c2d04 100644 --- a/wagtail_wordpress_import/importers/wordpress.py +++ b/wagtail_wordpress_import/importers/wordpress.py @@ -263,9 +263,9 @@ def analyze_html(self, html_analyzer, *, page_types, page_statuses): item.get("wp:post_type") in page_types and item.get("wp:status") in page_statuses ): - html_analyzer.analyze( - filter_linebreaks_wp(item.get("content:encoded")) + filter_linebreaks_wp(item.get("content:encoded")), + item.get("link"), ) def connect_richtext_page_links(self, imported_pages): diff --git a/wagtail_wordpress_import/management/commands/analyze_html_content.py b/wagtail_wordpress_import/management/commands/analyze_html_content.py index 314ab404..b7fa6816 100644 --- a/wagtail_wordpress_import/management/commands/analyze_html_content.py +++ b/wagtail_wordpress_import/management/commands/analyze_html_content.py @@ -112,10 +112,11 @@ def handle(self, **options): "Shortcode", "Pages used on", "Total occurrences", + "Last URL", ] for shortcode, total_pages in analyzer.shortcodes_unique_pages.most_common(): shortcodes_table.add_row( - [shortcode, total_pages, analyzer.shortcodes_total[shortcode]] + [shortcode, total_pages, analyzer.shortcodes_total[shortcode], analyzer.shortcodes_page_url[shortcode]] ) self.stdout.write("Most commonly used shortcodes")