diff --git a/wagtail_wordpress_import/analysis.py b/wagtail_wordpress_import/analysis.py index f7d96289..d40615a5 100644 --- a/wagtail_wordpress_import/analysis.py +++ b/wagtail_wordpress_import/analysis.py @@ -20,6 +20,7 @@ def __init__(self): self.styles_unique_pages = Counter() self.classes_unique_pages = Counter() self.shortcodes_unique_pages = Counter() + self.shortcodes_page_url = {} @classmethod def find_all_tags(cls, dom): @@ -91,7 +92,7 @@ def find_all_shortcodes(cls, dom): return shortcodes - def analyze(self, html): + def analyze(self, html, page_url): self.total += 1 try: @@ -117,3 +118,6 @@ def analyze(self, html): self.styles_unique_pages.update(styles.keys()) self.classes_unique_pages.update(classes.keys()) self.shortcodes_unique_pages.update(shortcodes.keys()) + + for shortcode in shortcodes.keys(): + self.shortcodes_page_url[shortcode] = page_url diff --git a/wagtail_wordpress_import/importers/wordpress.py b/wagtail_wordpress_import/importers/wordpress.py index 8c056580..f8629b27 100644 --- a/wagtail_wordpress_import/importers/wordpress.py +++ b/wagtail_wordpress_import/importers/wordpress.py @@ -265,7 +265,8 @@ def analyze_html(self, html_analyzer, *, page_types, page_statuses): ): html_analyzer.analyze( - filter_linebreaks_wp(item.get("content:encoded")) + filter_linebreaks_wp(item.get("content:encoded")), + item.get("link"), ) def connect_richtext_page_links(self, imported_pages): diff --git a/wagtail_wordpress_import/management/commands/analyze_html_content.py b/wagtail_wordpress_import/management/commands/analyze_html_content.py index 314ab404..b7fa6816 100644 --- a/wagtail_wordpress_import/management/commands/analyze_html_content.py +++ b/wagtail_wordpress_import/management/commands/analyze_html_content.py @@ -112,10 +112,11 @@ def handle(self, **options): "Shortcode", "Pages used on", "Total occurrences", + "Last URL", ] for shortcode, total_pages in analyzer.shortcodes_unique_pages.most_common(): shortcodes_table.add_row( - [shortcode, total_pages, analyzer.shortcodes_total[shortcode]] + [shortcode, total_pages, analyzer.shortcodes_total[shortcode], analyzer.shortcodes_page_url[shortcode]] ) self.stdout.write("Most commonly used shortcodes")