diff --git a/NEWS.md b/NEWS.md index fc3d975091212..f9c4fbb829645 100644 --- a/NEWS.md +++ b/NEWS.md @@ -35,6 +35,10 @@ New language features which can be overloaded to customize its behavior for different collection types ([#37410]). + * Identifiers can now start with numeric symbols in category + [No: Number, other](http://www.fileformat.info/info/unicode/category/No/list.htm), + allowing you to have variables with names like `⅓x` or `¹x₂` ([#20278]). + Language changes ---------------- diff --git a/doc/src/manual/variables.md b/doc/src/manual/variables.md index e7b2899dd931b..a52720d3edd72 100644 --- a/doc/src/manual/variables.md +++ b/doc/src/manual/variables.md @@ -94,9 +94,9 @@ ERROR: cannot assign a value to variable Base.sqrt from module Main Variable names must begin with a letter (A-Z or a-z), underscore, or a subset of Unicode code points greater than 00A0; in particular, [Unicode character categories](http://www.fileformat.info/info/unicode/category/index.htm) -Lu/Ll/Lt/Lm/Lo/Nl (letters), Sc/So (currency and other symbols), and a few other letter-like characters +Lu/Ll/Lt/Lm/Lo/Nl/No (letters and certain numeric symbols), Sc/So (currency and other symbols), and a few other letter-like characters (e.g. a subset of the Sm math symbols) are allowed. Subsequent characters may also include ! and -digits (0-9 and other characters in categories Nd/No), as well as other Unicode code points: diacritics +digits (0-9 and other decimal digits in category Nd), as well as other Unicode code points: diacritics and other modifying marks (categories Mn/Mc/Me/Sk), some punctuation connectors (category Pc), primes, and a few other characters. diff --git a/src/flisp/julia_extensions.c b/src/flisp/julia_extensions.c index e6ffcfcde131c..ef616bba0ca6d 100644 --- a/src/flisp/julia_extensions.c +++ b/src/flisp/julia_extensions.c @@ -68,7 +68,8 @@ static int is_wc_cat_id_start(uint32_t wc, utf8proc_category_t cat) { return (cat == UTF8PROC_CATEGORY_LU || cat == UTF8PROC_CATEGORY_LL || cat == UTF8PROC_CATEGORY_LT || cat == UTF8PROC_CATEGORY_LM || - cat == UTF8PROC_CATEGORY_LO || cat == UTF8PROC_CATEGORY_NL || + cat == UTF8PROC_CATEGORY_LO || + cat == UTF8PROC_CATEGORY_NL || cat == UTF8PROC_CATEGORY_NO || cat == UTF8PROC_CATEGORY_SC || // allow currency symbols // other symbols, but not arrows or replacement characters (cat == UTF8PROC_CATEGORY_SO && !(wc >= 0x2190 && wc <= 0x21FF) && @@ -144,7 +145,6 @@ JL_DLLEXPORT int jl_id_char(uint32_t wc) if (cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_MC || cat == UTF8PROC_CATEGORY_ND || cat == UTF8PROC_CATEGORY_PC || cat == UTF8PROC_CATEGORY_SK || cat == UTF8PROC_CATEGORY_ME || - cat == UTF8PROC_CATEGORY_NO || // primes (single, double, triple, their reverses, and quadruple) (wc >= 0x2032 && wc <= 0x2037) || (wc == 0x2057)) return 1; @@ -329,10 +329,11 @@ value_t fl_accum_julia_symbol(fl_context_t *fl_ctx, value_t *args, uint32_t narg type_error(fl_ctx, "accum-julia-symbol", "wchar", args[0]); uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0])); ios_t str; - int allascii = 1; + int allascii = 1, allNo = 1; ios_mem(&str, 0); do { allascii &= (wc <= 0x7f); + allNo = allNo && UTF8PROC_CATEGORY_NO == utf8proc_category((utf8proc_int32_t) wc); ios_getutf8(s, &wc); if (wc == '!') { uint32_t nwc = 0; @@ -348,6 +349,8 @@ value_t fl_accum_julia_symbol(fl_context_t *fl_ctx, value_t *args, uint32_t narg break; } while (jl_id_char(wc)); ios_pututf8(&str, 0); + if (allNo) /* identifiers cannot consist only of category-No */ + lerrorf(fl_ctx, symbol(fl_ctx, "error"), "invalid identifier %s", str.buf); return symbol(fl_ctx, allascii ? str.buf : normalize(fl_ctx, str.buf)); } diff --git a/test/parse.jl b/test/parse.jl index 2deeecd516f2a..49c88dca70c6d 100644 --- a/test/parse.jl +++ b/test/parse.jl @@ -281,6 +281,21 @@ end @test_throws ArgumentError parse(Complex{Int}, "3 + 4.2im") end +# identifiers starting with category No: +let ½x = 1/2, ¹x = 12 + @test ½x === 1/2 + @test ¹x === 12 + Meta.parse("½ = 0.5",raise=false) == Expr(:error, "invalid identifier ½") +end + +# added ⟂ to operator precedence (#24404) +@test Meta.parse("a ⟂ b ⟂ c") == Expr(:comparison, :a, :⟂, :b, :⟂, :c) +@test Meta.parse("a ⟂ b ∥ c") == Expr(:comparison, :a, :⟂, :b, :∥, :c) + +# only allow certain characters after interpolated vars (#25231) +@test Meta.parse("\"\$x෴ \"",raise=false) == Expr(:error, "interpolated variable \$x ends with invalid character \"෴\"; use \"\$(x)\" instead.") +@test Base.incomplete_tag(Meta.parse("\"\$foo", raise=false)) == :string + @testset "parse and tryparse type inference" begin @inferred parse(Int, "12") @inferred parse(Float64, "12")