From ab912a5586b3187ea91484c25c4980de4dede57c Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Sat, 17 Feb 2024 22:30:46 -0800 Subject: [PATCH] Canonicalize and dedup URLs in to_rb JSON::LD::Context#parse will only look in the PRELOADED hash with a fully canonicalized URL including replacing https with http. This means that any preloads or aliases under non-canonicalized names can't be used and will just waste memory. This commit fully canonicalizes both the base and alias URLs (including changing https to http) and removes any duplicates. --- lib/json/ld/context.rb | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/json/ld/context.rb b/lib/json/ld/context.rb index 3334974..5d052c2 100644 --- a/lib/json/ld/context.rb +++ b/lib/json/ld/context.rb @@ -1724,6 +1724,16 @@ def compact_value(property, value, base: nil) # @return [String] def to_rb(*aliases) canon_base = RDF::URI(context_base).canonicalize + canon_base.scheme = 'http' if canon_base.scheme == 'https' + + aliases = aliases.map do |url| + url = RDF::URI(url).canonicalize + url.scheme = 'http' if url.scheme == 'https' + url.to_s + end.uniq + + aliases.delete(canon_base.to_s) + defn = [] defn << "base: #{base.to_s.inspect}" if base