diff --git a/rb/lib/twitter-text/extractor.rb b/rb/lib/twitter-text/extractor.rb index a2fd7db7b..25734fe58 100644 --- a/rb/lib/twitter-text/extractor.rb +++ b/rb/lib/twitter-text/extractor.rb @@ -3,7 +3,7 @@ # http://www.apache.org/licenses/LICENSE-2.0 # encoding: utf-8 -require 'idn' +require 'simpleidn' class String # Helper function to count the character length by first converting to an @@ -57,6 +57,8 @@ module Extractor extend self # Maximum URL length as defined by Twitter's backend. MAX_URL_LENGTH = 4096 + MAX_DOMAIN_LABEL_LENGTH = 63 + # The maximum t.co path length that the Twitter backend supports. MAX_TCO_SLUG_LENGTH = 40 @@ -373,7 +375,12 @@ def is_valid_domain(url_length, domain, protocol) begin raise ArgumentError.new("invalid empty domain") unless domain original_domain_length = domain.length - encoded_domain = IDN::Idna.toASCII(domain) + encoded_domain = SimpleIDN.to_ascii(domain) + # If the domain starts with xn-- but is not only ASCII characters, it's invalid. + return false if domain.start_with?("xn--") && !domain.ascii_only? + labels = encoded_domain.split('.') + # If any label of the domain is longer than 63 characters, it's invalid. + return false if labels.any?{|label| label.length > MAX_DOMAIN_LABEL_LENGTH} updated_domain_length = encoded_domain.length url_length += (updated_domain_length - original_domain_length) if (updated_domain_length > original_domain_length) url_length += URL_PROTOCOL_LENGTH unless protocol diff --git a/rb/spec/test_urls.rb b/rb/spec/test_urls.rb index dc2b5c7ff..942194f3a 100644 --- a/rb/spec/test_urls.rb +++ b/rb/spec/test_urls.rb @@ -41,7 +41,9 @@ module TestUrls "http://foobar.中国", "http://foobar.پاکستان", "https://www.youtube.com/playlist?list=PL0ZPu8XSRTB7wZzn0mLHMvyzVFeRxbWn-", - "http://ああ.com" + "http://ああ.com", + "twitter.联通", + "https://🌈🌈🌈.st" ] unless defined?(TestUrls::VALID) INVALID = [ diff --git a/rb/twitter-text.gemspec b/rb/twitter-text.gemspec index e145b6362..2eef1410c 100644 --- a/rb/twitter-text.gemspec +++ b/rb/twitter-text.gemspec @@ -19,14 +19,13 @@ Gem::Specification.new do |s| s.add_development_dependency "test-unit" s.add_development_dependency "multi_json", "~> 1.3" - s.add_development_dependency "nokogiri", "~> 1.10.9" + s.add_development_dependency "nokogiri", "~> 1.15.3" s.add_development_dependency "rake" s.add_development_dependency "rdoc" s.add_development_dependency "rspec", "~> 3.0" s.add_development_dependency "simplecov" s.add_runtime_dependency "unf", "~> 0.1.0" - # Use of idn-ruby requires libidn to be installed separately - s.add_runtime_dependency "idn-ruby" + s.add_runtime_dependency "simpleidn" s.files = `git ls-files`.split("\n") + ['lib/assets/tld_lib.yml'] + Dir['config/*'] s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")