Skip to content

allow identifiers to start with category No (Number, other) #20278

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ New language features
which can be overloaded to customize its behavior for different collection types
([#37410]).

* Identifiers can now start with numeric symbols in category
[No: Number, other](http://www.fileformat.info/info/unicode/category/No/list.htm),
allowing you to have variables with names like `⅓x` or `¹x₂` ([#20278]).

Language changes
----------------

Expand Down
4 changes: 2 additions & 2 deletions doc/src/manual/variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ ERROR: cannot assign a value to variable Base.sqrt from module Main

Variable names must begin with a letter (A-Z or a-z), underscore, or a subset of Unicode code
points greater than 00A0; in particular, [Unicode character categories](http://www.fileformat.info/info/unicode/category/index.htm)
Lu/Ll/Lt/Lm/Lo/Nl (letters), Sc/So (currency and other symbols), and a few other letter-like characters
Lu/Ll/Lt/Lm/Lo/Nl/No (letters and certain numeric symbols), Sc/So (currency and other symbols), and a few other letter-like characters
(e.g. a subset of the Sm math symbols) are allowed. Subsequent characters may also include ! and
digits (0-9 and other characters in categories Nd/No), as well as other Unicode code points: diacritics
digits (0-9 and other decimal digits in category Nd), as well as other Unicode code points: diacritics
and other modifying marks (categories Mn/Mc/Me/Sk), some punctuation connectors (category Pc),
primes, and a few other characters.

Expand Down
9 changes: 6 additions & 3 deletions src/flisp/julia_extensions.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ static int is_wc_cat_id_start(uint32_t wc, utf8proc_category_t cat)
{
return (cat == UTF8PROC_CATEGORY_LU || cat == UTF8PROC_CATEGORY_LL ||
cat == UTF8PROC_CATEGORY_LT || cat == UTF8PROC_CATEGORY_LM ||
cat == UTF8PROC_CATEGORY_LO || cat == UTF8PROC_CATEGORY_NL ||
cat == UTF8PROC_CATEGORY_LO ||
cat == UTF8PROC_CATEGORY_NL || cat == UTF8PROC_CATEGORY_NO ||
cat == UTF8PROC_CATEGORY_SC || // allow currency symbols
// other symbols, but not arrows or replacement characters
(cat == UTF8PROC_CATEGORY_SO && !(wc >= 0x2190 && wc <= 0x21FF) &&
Expand Down Expand Up @@ -144,7 +145,6 @@ JL_DLLEXPORT int jl_id_char(uint32_t wc)
if (cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_MC ||
cat == UTF8PROC_CATEGORY_ND || cat == UTF8PROC_CATEGORY_PC ||
cat == UTF8PROC_CATEGORY_SK || cat == UTF8PROC_CATEGORY_ME ||
cat == UTF8PROC_CATEGORY_NO ||
// primes (single, double, triple, their reverses, and quadruple)
(wc >= 0x2032 && wc <= 0x2037) || (wc == 0x2057))
return 1;
Expand Down Expand Up @@ -329,10 +329,11 @@ value_t fl_accum_julia_symbol(fl_context_t *fl_ctx, value_t *args, uint32_t narg
type_error(fl_ctx, "accum-julia-symbol", "wchar", args[0]);
uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0]));
ios_t str;
int allascii = 1;
int allascii = 1, allNo = 1;
ios_mem(&str, 0);
do {
allascii &= (wc <= 0x7f);
allNo = allNo && UTF8PROC_CATEGORY_NO == utf8proc_category((utf8proc_int32_t) wc);
ios_getutf8(s, &wc);
if (wc == '!') {
uint32_t nwc = 0;
Expand All @@ -348,6 +349,8 @@ value_t fl_accum_julia_symbol(fl_context_t *fl_ctx, value_t *args, uint32_t narg
break;
} while (jl_id_char(wc));
ios_pututf8(&str, 0);
if (allNo) /* identifiers cannot consist only of category-No */
lerrorf(fl_ctx, symbol(fl_ctx, "error"), "invalid identifier %s", str.buf);
return symbol(fl_ctx, allascii ? str.buf : normalize(fl_ctx, str.buf));
}

Expand Down
15 changes: 15 additions & 0 deletions test/parse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,21 @@ end
@test_throws ArgumentError parse(Complex{Int}, "3 + 4.2im")
end

# identifiers starting with category No:
let ½x = 1/2, ¹x = 12
@test ½x === 1/2
@test ¹x === 12
Meta.parse("½ = 0.5",raise=false) == Expr(:error, "invalid identifier ½")
end

# added ⟂ to operator precedence (#24404)
@test Meta.parse("a ⟂ b ⟂ c") == Expr(:comparison, :a, :⟂, :b, :⟂, :c)
@test Meta.parse("a ⟂ b ∥ c") == Expr(:comparison, :a, :⟂, :b, :∥, :c)

# only allow certain characters after interpolated vars (#25231)
@test Meta.parse("\"\$x෴ \"",raise=false) == Expr(:error, "interpolated variable \$x ends with invalid character \"෴\"; use \"\$(x)\" instead.")
@test Base.incomplete_tag(Meta.parse("\"\$foo", raise=false)) == :string

@testset "parse and tryparse type inference" begin
@inferred parse(Int, "12")
@inferred parse(Float64, "12")
Expand Down