From dcb7f37410d67ef5ab9c5c798a19d7fef7ceaa21 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 27 Jan 2017 10:23:36 -0800 Subject: [PATCH 1/4] allow identifiers to start with category No (Number, other) --- NEWS.md | 4 ++++ doc/src/manual/variables.md | 4 ++-- src/flisp/julia_extensions.c | 4 ++-- test/parse.jl | 6 ++++++ 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index ae54fe133b9ae..2db8b3b2977e0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -46,6 +46,10 @@ New language features * Keyword arguments can be required: if a default value is omitted, then an exception is thrown if the caller does not assign the keyword a value ([#25830]). + + * Identifiers can now start with numeric symbols in category + [No: Number, other](http://www.fileformat.info/info/unicode/category/No/list.htm), + allowing you to have variables with names like `⅓x` or `¹x₂` ([#20278]). Language changes ---------------- diff --git a/doc/src/manual/variables.md b/doc/src/manual/variables.md index 317b1622173bf..f7852324bec89 100644 --- a/doc/src/manual/variables.md +++ b/doc/src/manual/variables.md @@ -94,9 +94,9 @@ ERROR: cannot assign variable Base.sqrt from module Main Variable names must begin with a letter (A-Z or a-z), underscore, or a subset of Unicode code points greater than 00A0; in particular, [Unicode character categories](http://www.fileformat.info/info/unicode/category/index.htm) -Lu/Ll/Lt/Lm/Lo/Nl (letters), Sc/So (currency and other symbols), and a few other letter-like characters +Lu/Ll/Lt/Lm/Lo/Nl/No (letters and certain numeric symbols), Sc/So (currency and other symbols), and a few other letter-like characters (e.g. a subset of the Sm math symbols) are allowed. Subsequent characters may also include ! and -digits (0-9 and other characters in categories Nd/No), as well as other Unicode code points: diacritics +digits (0-9 and other decimal digits in category Nd), as well as other Unicode code points: diacritics and other modifying marks (categories Mn/Mc/Me/Sk), some punctuation connectors (category Pc), primes, and a few other characters. diff --git a/src/flisp/julia_extensions.c b/src/flisp/julia_extensions.c index 7b9ac64a81dfc..684bc5618031d 100644 --- a/src/flisp/julia_extensions.c +++ b/src/flisp/julia_extensions.c @@ -60,7 +60,8 @@ static int is_wc_cat_id_start(uint32_t wc, utf8proc_category_t cat) { return (cat == UTF8PROC_CATEGORY_LU || cat == UTF8PROC_CATEGORY_LL || cat == UTF8PROC_CATEGORY_LT || cat == UTF8PROC_CATEGORY_LM || - cat == UTF8PROC_CATEGORY_LO || cat == UTF8PROC_CATEGORY_NL || + cat == UTF8PROC_CATEGORY_LO || + cat == UTF8PROC_CATEGORY_NL || cat == UTF8PROC_CATEGORY_NO || cat == UTF8PROC_CATEGORY_SC || // allow currency symbols // other symbols, but not arrows (cat == UTF8PROC_CATEGORY_SO && !(wc >= 0x2190 && wc <= 0x21FF)) || @@ -131,7 +132,6 @@ JL_DLLEXPORT int jl_id_char(uint32_t wc) if (cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_MC || cat == UTF8PROC_CATEGORY_ND || cat == UTF8PROC_CATEGORY_PC || cat == UTF8PROC_CATEGORY_SK || cat == UTF8PROC_CATEGORY_ME || - cat == UTF8PROC_CATEGORY_NO || // primes (single, double, triple, their reverses, and quadruple) (wc >= 0x2032 && wc <= 0x2037) || (wc == 0x2057) || // Other_ID_Continue diff --git a/test/parse.jl b/test/parse.jl index 077d0fb508b66..059a22e602302 100644 --- a/test/parse.jl +++ b/test/parse.jl @@ -255,6 +255,12 @@ end @test_throws ArgumentError parse(Complex{Int}, "3 + 4.2im") end +# identifiers starting with category No: +let ½x = 1/2, ¹x = 12 + @test ½x === 1/2 + @test ¹x === 12 +end + # added ⟂ to operator precedence (#24404) @test Meta.parse("a ⟂ b ⟂ c") == Expr(:comparison, :a, :⟂, :b, :⟂, :c) @test Meta.parse("a ⟂ b ∥ c") == Expr(:comparison, :a, :⟂, :b, :∥, :c) From 86b680fa6c4974ce70c3f26f81a46749ea796a54 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 27 Jan 2017 10:39:27 -0800 Subject: [PATCH 2/4] add NEWS link --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 2db8b3b2977e0..91621944b3d14 100644 --- a/NEWS.md +++ b/NEWS.md @@ -46,7 +46,7 @@ New language features * Keyword arguments can be required: if a default value is omitted, then an exception is thrown if the caller does not assign the keyword a value ([#25830]). - + * Identifiers can now start with numeric symbols in category [No: Number, other](http://www.fileformat.info/info/unicode/category/No/list.htm), allowing you to have variables with names like `⅓x` or `¹x₂` ([#20278]). From cebd113fa5915ff8fd2d7bfccf04ac20aefcf0a1 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 19 Oct 2017 17:16:14 -0400 Subject: [PATCH 3/4] allow No to start identifier, but disallow all-No identifiers --- src/flisp/julia_extensions.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/flisp/julia_extensions.c b/src/flisp/julia_extensions.c index 684bc5618031d..a064eb2047880 100644 --- a/src/flisp/julia_extensions.c +++ b/src/flisp/julia_extensions.c @@ -305,10 +305,11 @@ value_t fl_accum_julia_symbol(fl_context_t *fl_ctx, value_t *args, uint32_t narg type_error(fl_ctx, "accum-julia-symbol", "wchar", args[0]); uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0])); ios_t str; - int allascii=1; + int allascii=1, allNo=1; ios_mem(&str, 0); do { allascii &= (wc <= 0x7f); + allNo = allNo && UTF8PROC_CATEGORY_NO == utf8proc_category((utf8proc_int32_t) wc); ios_getutf8(s, &wc); if (wc == '!') { uint32_t nwc; @@ -324,6 +325,8 @@ value_t fl_accum_julia_symbol(fl_context_t *fl_ctx, value_t *args, uint32_t narg break; } while (jl_id_char(wc)); ios_pututf8(&str, 0); + if (allNo) /* identifiers cannot consist only of category-No */ + lerrorf(fl_ctx, symbol(fl_ctx, "error"), "invalid identifier %s", str.buf); return symbol(fl_ctx, allascii ? str.buf : normalize(fl_ctx, str.buf)); } From fffb1fc6c0cac25a4213555a250f9687b18d4707 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 12 Feb 2018 13:33:33 -0500 Subject: [PATCH 4/4] =?UTF-8?q?make=20sure=20=C2=BD=20is=20still=20invalid?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/parse.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/parse.jl b/test/parse.jl index 059a22e602302..0a66660996524 100644 --- a/test/parse.jl +++ b/test/parse.jl @@ -259,6 +259,7 @@ end let ½x = 1/2, ¹x = 12 @test ½x === 1/2 @test ¹x === 12 + Meta.parse("½ = 0.5",raise=false) == Expr(:error, "invalid identifier ½") end # added ⟂ to operator precedence (#24404)