Skip to content

Commit

Permalink
Merge branch 'unicode-12.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
mislav committed Nov 29, 2019
2 parents 59b3fde + d1002aa commit cefc4b0
Show file tree
Hide file tree
Showing 7 changed files with 4,602 additions and 4,210 deletions.
2 changes: 1 addition & 1 deletion Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,5 @@ namespace :db do
end

file 'vendor/unicode-emoji-test.txt' do |t|
system 'curl', '-fsSL', 'http://unicode.org/Public/emoji/12.0/emoji-test.txt', '-o', t.name
system 'curl', '-fsSL', 'http://unicode.org/Public/emoji/12.1/emoji-test.txt', '-o', t.name
end
26 changes: 12 additions & 14 deletions db/dump.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,34 +12,32 @@
for category in categories
for sub_category in category[:emoji]
for emoji_item in sub_category[:emoji]
unicodes = emoji_item[:sequences].sort_by(&:bytesize)
existing_emoji = nil
unicodes.detect do |raw|
existing_emoji = Emoji.find_by_unicode(raw)
raw = emoji_item[:sequences][0]
existing_emoji = Emoji.find_by_unicode(raw) || Emoji.find_by_unicode("#{raw}\u{fe0f}")
if seen_existing.key?(existing_emoji)
existing_emoji = nil
else
seen_existing[existing_emoji] = true
end
existing_emoji = nil if seen_existing.key?(existing_emoji)
description = emoji_item[:description].sub(/^E\d+(\.\d+)? /, '')
output_item = {
emoji: unicodes[0],
description: emoji_item[:description],
emoji: raw,
description: description,
category: category[:name],
}
if existing_emoji
eu = existing_emoji.unicode_aliases
preferred_raw = eu.size == 2 && eu[0] == "#{eu[1]}\u{fe0f}" ? eu[1] : eu[0]
output_item.update(
emoji: preferred_raw,
aliases: existing_emoji.aliases,
tags: existing_emoji.tags,
unicode_version: existing_emoji.unicode_version,
ios_version: existing_emoji.ios_version,
)
seen_existing[existing_emoji] = true
else
output_item.update(
aliases: [emoji_item[:description].gsub(/\W+/, '_').downcase],
aliases: [description.gsub(/\W+/, '_').downcase],
tags: [],
unicode_version: "12.0",
ios_version: "13.0",
unicode_version: "12.1",
ios_version: "13.2",
)
end
output_item[:skin_tones] = true if emoji_item[:skin_tones]
Expand Down
14 changes: 5 additions & 9 deletions db/emoji-test-parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,8 @@ module EmojiTestParser
"\u{1F3FE}", # medium-dark skin tone
"\u{1F3FF}", # dark skin tone
]
HAIR_MODIFIERS = [
"\u{1F9B0}", # red-haired
"\u{1F9B1}", # curly-haired
"\u{1F9B2}", # bald
"\u{1F9B3}", # white-haired
]
SKIN_TONES_RE = /(#{SKIN_TONES.join("|")})/o
SKIP_TYPES = ["unqualified", "component"]

module_function

Expand Down Expand Up @@ -52,12 +48,12 @@ def parse_file(io)
else
row, desc = line.split("#", 2)
desc = desc.strip.split(" ", 2)[1]
codepoints, _ = row.split(";", 2)
codepoints, qualification = row.split(";", 2)
next if SKIP_TYPES.include?(qualification.strip)
emoji_raw = codepoints.strip.split.map { |c| c.hex }.pack("U*")
next if HAIR_MODIFIERS.include?(emoji_raw)
emoji_normalized = emoji_raw
.gsub(VARIATION_SELECTOR_16, "")
.gsub(/(#{SKIN_TONES.join("|")})/o, "")
.gsub(SKIN_TONES_RE, "")
emoji_item = emoji_map[emoji_normalized]
if SKIN_TONES.any? { |s| emoji_raw.include?(s) }
emoji_item[:skin_tones] = true if emoji_item
Expand Down
Loading

0 comments on commit cefc4b0

Please sign in to comment.