Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement emoji support #508

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Implement emoji support
Liberatys committed Jun 5, 2021
commit 69492873ffeee77081ae313070b45b1e8be672e4
84 changes: 62 additions & 22 deletions ext/psych/extconf.rb
Original file line number Diff line number Diff line change
@@ -7,35 +7,75 @@

dir_config 'libyaml'

if enable_config("bundled-libyaml", false) || !(find_header('yaml.h') && find_library('yaml', 'yaml_get_version'))
# Embed libyaml since we could not find it.
$VPATH << "$(srcdir)/yaml"
$INCFLAGS << " -I$(srcdir)/yaml"

$VPATH << "$(srcdir)/yaml"
$INCFLAGS << " -I$(srcdir)/yaml"
$srcs = Dir.glob("#{$srcdir}/{,yaml/}*.c").map {|n| File.basename(n)}.sort

$srcs = Dir.glob("#{$srcdir}/{,yaml/}*.c").map {|n| File.basename(n)}.sort
header = 'yaml/yaml.h'
header = "{$(VPATH)}#{header}" if $nmake
if have_macro("_WIN32")
$CPPFLAGS << " -DYAML_DECLARE_STATIC -DHAVE_CONFIG_H"
end

have_header 'dlfcn.h'
have_header 'inttypes.h'
have_header 'memory.h'
have_header 'stdint.h'
have_header 'stdlib.h'
have_header 'strings.h'
have_header 'string.h'
have_header 'sys/stat.h'
have_header 'sys/types.h'
have_header 'unistd.h'
have_header 'unicode/ucol.h'

find_header 'yaml.h'
have_header 'config.h'

header = 'yaml/yaml.h'
header = "{$(VPATH)}#{header}" if $nmake
if have_macro("_WIN32")
$CPPFLAGS << " -DYAML_DECLARE_STATIC -DHAVE_CONFIG_H"
##
# ICU dependency
#

ldflags = cppflags = nil

if RbConfig::CONFIG["host_os"] =~ /darwin/
begin
brew_prefix = `brew --prefix icu4c`.chomp
ldflags = "#{brew_prefix}/lib"
cppflags = "#{brew_prefix}/include"
pkg_conf = "#{brew_prefix}/lib/pkgconfig"
# pkg_config should be less error prone than parsing compiler
# commandline options, but we need to set default ldflags and cpp flags
# in case the user doesn't have pkg-config installed
ENV['PKG_CONFIG_PATH'] ||= pkg_conf
rescue
end
end

have_header 'dlfcn.h'
have_header 'inttypes.h'
have_header 'memory.h'
have_header 'stdint.h'
have_header 'stdlib.h'
have_header 'strings.h'
have_header 'string.h'
have_header 'sys/stat.h'
have_header 'sys/types.h'
have_header 'unistd.h'

find_header 'yaml.h'
have_header 'config.h'
dir_config 'icu', cppflags, ldflags

pkg_config("icu-i18n")
pkg_config("icu-io")
pkg_config("icu-uc")

$CXXFLAGS << ' -std=c++11' unless $CXXFLAGS.include?("-std=")

unless have_library 'icui18n' and have_header 'unicode/ucnv.h'
STDERR.puts "\n\n"
STDERR.puts "***************************************************************************************"
STDERR.puts "*********** icu required (brew install icu4c or apt-get install libicu-dev) ***********"
STDERR.puts "***************************************************************************************"
exit(1)
end

have_library 'z' or abort 'libz missing'
have_library 'icuuc' or abort 'libicuuc missing'
have_library 'icudata' or abort 'libicudata missing'

$CFLAGS << ' -Wall -funroll-loops'
$CFLAGS << ' -Wextra -O0 -ggdb3' if ENV['DEBUG']

create_makefile 'psych' do |mk|
mk << "YAML_H = #{header}".strip << "\n"
end
44 changes: 41 additions & 3 deletions ext/psych/yaml/emitter.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

#include "yaml_private.h"

#include <unicode/utf8.h>
#include <unicode/uchar.h>
/*
* Flush the buffer if needed.
*/
@@ -416,6 +417,43 @@ yaml_emitter_increase_indent(yaml_emitter_t *emitter,
return 1;
}

/*
* Checks if given utf-8 encoded code point represent printable character.
*/

static inline int
yaml_emitter_is_printable(yaml_string_t string)
{
unsigned char octet;
unsigned int width;
unsigned int value;

octet = string.pointer[0];
width = (octet & 0x80) == 0x00 ? 1 :
(octet & 0xE0) == 0xC0 ? 2 :
(octet & 0xF0) == 0xE0 ? 3 :
(octet & 0xF8) == 0xF0 ? 4 : 0;
value = (octet & 0x80) == 0x00 ? octet & 0x7F :
(octet & 0xE0) == 0xC0 ? octet & 0x1F :
(octet & 0xF0) == 0xE0 ? octet & 0x0F :
(octet & 0xF8) == 0xF0 ? octet & 0x07 : 0;
for (int k = 1; k < (int)width; k ++) {
octet = string.pointer[k];
value = (value << 6) + (octet & 0x3F);
}
return (((string).pointer[0] == 0x0A)
|| ((string).pointer[0] >= 0x20 && (string).pointer[0] <= 0x7E)
|| ((string).pointer[0] == 0xC2 && (string).pointer[1] >= 0xA0)
|| ((string).pointer[0] > 0xC2 && (string).pointer[0] < 0xED)
|| ((string).pointer[0] == 0xED && (string).pointer[1] < 0xA0)
|| ((string).pointer[0] == 0xEE)
|| ((string).pointer[0] == 0xEF
&& !((string).pointer[1] == 0xBB && (string).pointer[2] == 0xBF)
&& !((string).pointer[1] == 0xBF
&& ((string).pointer[2] == 0xBE || (string).pointer[2] == 0xBF)))
|| u_isprint(value));
}

/*
* State dispatcher.
*/
@@ -1598,7 +1636,7 @@ yaml_emitter_analyze_scalar(yaml_emitter_t *emitter,
}
}

if (!IS_PRINTABLE(string)
if (!yaml_emitter_is_printable(string)
|| (!IS_ASCII(string) && !emitter->unicode)) {
special_characters = 1;
}
@@ -2061,7 +2099,7 @@ yaml_emitter_write_double_quoted_scalar(yaml_emitter_t *emitter,

while (string.pointer != string.end)
{
if (!IS_PRINTABLE(string) || (!emitter->unicode && !IS_ASCII(string))
if (!yaml_emitter_is_printable(string) || (!emitter->unicode && !IS_ASCII(string))
|| IS_BOM(string) || IS_BREAK(string)
|| CHECK(string, '"') || CHECK(string, '\\'))
{
24 changes: 0 additions & 24 deletions ext/psych/yaml/yaml_private.h
Original file line number Diff line number Diff line change
@@ -258,30 +258,6 @@ yaml_string_join(

#define IS_ASCII(string) IS_ASCII_AT((string),0)

/*
* Check if the character can be printed unescaped.
*/

#define IS_PRINTABLE_AT(string,offset) \
(((string).pointer[offset] == 0x0A) /* . == #x0A */ \
|| ((string).pointer[offset] >= 0x20 /* #x20 <= . <= #x7E */ \
&& (string).pointer[offset] <= 0x7E) \
|| ((string).pointer[offset] == 0xC2 /* #0xA0 <= . <= #xD7FF */ \
&& (string).pointer[offset+1] >= 0xA0) \
|| ((string).pointer[offset] > 0xC2 \
&& (string).pointer[offset] < 0xED) \
|| ((string).pointer[offset] == 0xED \
&& (string).pointer[offset+1] < 0xA0) \
|| ((string).pointer[offset] == 0xEE) \
|| ((string).pointer[offset] == 0xEF /* #xE000 <= . <= #xFFFD */ \
&& !((string).pointer[offset+1] == 0xBB /* && . != #xFEFF */ \
&& (string).pointer[offset+2] == 0xBF) \
&& !((string).pointer[offset+1] == 0xBF \
&& ((string).pointer[offset+2] == 0xBE \
|| (string).pointer[offset+2] == 0xBF))))

#define IS_PRINTABLE(string) IS_PRINTABLE_AT((string),0)

/*
* Check if the character at the specified position is NUL.
*/
5 changes: 5 additions & 0 deletions test/psych/test_string.rb
Original file line number Diff line number Diff line change
@@ -165,6 +165,11 @@ def test_string_with_base_60
assert_equal '01:03:05', Psych.load(yaml)
end

def test_unicode_string
yaml = Psych.dump '😃'.encode('utf-8')
assert_match "😃", yaml
end

def test_nonascii_string_as_binary
string = "hello \x80 world!".dup
string.force_encoding 'ascii-8bit'