From 84e3719806751e128bbf6f3a2b8981ea9e8660a5 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 4 Apr 2022 18:05:38 +0200 Subject: [PATCH] Correctly detect the number of article for zim version <= 6 --- src/book.cpp | 2 +- src/tools/archiveTools.cpp | 24 ++++++++++++++++++++++++ src/tools/archiveTools.h | 1 + 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/book.cpp b/src/book.cpp index c49ab2f19..32766b6f2 100644 --- a/src/book.cpp +++ b/src/book.cpp @@ -83,7 +83,7 @@ void Book::update(const zim::Archive& archive) { m_flavour = getMetaFlavour(archive); m_tags = getMetaTags(archive); m_category = getCategoryFromTags(); - m_articleCount = archive.getArticleCount(); + m_articleCount = getArchiveArticleCount(archive); m_mediaCount = getArchiveMediaCount(archive); m_size = static_cast(getArchiveFileSize(archive)) << 10; diff --git a/src/tools/archiveTools.cpp b/src/tools/archiveTools.cpp index 5ec5bd9e9..5971ec495 100644 --- a/src/tools/archiveTools.cpp +++ b/src/tools/archiveTools.cpp @@ -125,6 +125,30 @@ unsigned int getArchiveMediaCount(const zim::Archive& archive) { return counter; } +unsigned int getArchiveArticleCount(const zim::Archive& archive) { + // [HACK] + // getArticleCount() returns different things depending of the "version" of the zim. + // On old zim (<=6), it returns the number of entry in `A` namespace + // On recent zim (>=7), it returns: + // - the number of entry in `C` namespace (==getEntryCount) if no frontArticleIndex is present + // - the number of front article if a frontArticleIndex is present + // The use case >=7 without frontArticleIndex is pretty rare so we don't care + // We can detect if we are reading a zim <= 6 by checking if we have a newNamespaceScheme. + if (archive.hasNewNamespaceScheme()) { + //The articleCount is "good" + return archive.getArticleCount(); + } else { + // We have to parse the `M/Counter` metadata + unsigned int counter = 0; + for(const auto& pair:parseArchiveCounter(archive)) { + if (startsWith(pair.first, "text/html")) { + counter += pair.second; + } + } + return counter; + } +} + unsigned int getArchiveFileSize(const zim::Archive& archive) { return archive.getFilesize() / 1024; } diff --git a/src/tools/archiveTools.h b/src/tools/archiveTools.h index cfcf1ced1..0e2108045 100644 --- a/src/tools/archiveTools.h +++ b/src/tools/archiveTools.h @@ -46,6 +46,7 @@ namespace kiwix std::string& content, std::string& mimeType); unsigned int getArchiveMediaCount(const zim::Archive& archive); + unsigned int getArchiveArticleCount(const zim::Archive& archive); unsigned int getArchiveFileSize(const zim::Archive& archive); zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry);