Skip to content

Commit

Permalink
fix obsolete assumptions that Char == UTF-32 (#55)
Browse files Browse the repository at this point in the history
  • Loading branch information
stevengj committed May 3, 2023
1 parent 5cb70b1 commit ec1d540
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 8 deletions.
17 changes: 9 additions & 8 deletions src/utf32.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This file includes code that was formerly a part of Julia. License is MIT: http://julialang.org/license

UTF32String(data::Vector{Char}) = UTF32String(reinterpret(UInt32, data))
UTF32String(data::AbstractVector{<:AbstractChar}) = convert(UTF32String, data)

# UTF-32 basic functions
next(s::UTF32String, i::Int) = (Char(s.data[i]), i+1)
Expand Down Expand Up @@ -137,10 +137,10 @@ end
convert(::Type{UTF32String}, data::AbstractVector{Int32}) =
convert(UTF32String, reinterpret(UInt32, convert(Vector{Int32}, data)))

convert(::Type{UTF32String}, data::AbstractVector{Char}) =
convert(::Type{UTF32String}, data::AbstractVector{<:AbstractChar}) =
convert(UTF32String, map(UInt32, data))

convert(::Type{T}, v::AbstractVector{S}) where {T<:AbstractString, S<:Union{UInt32,Char,Int32}} =
convert(::Type{T}, v::AbstractVector{S}) where {T<:Union{ASCIIString,UTF16String}, S<:Union{UInt32,AbstractChar,Int32}} =
convert(T, utf32(v))

# specialize for performance reasons:
Expand Down Expand Up @@ -206,10 +206,11 @@ isvalid(str::Vector{Char}) = isvalid(UTF32String, str)
utf32(x) = convert(UTF32String, x)

utf32(p::Ptr{UInt32}, len::Integer) = utf32(unsafe_wrap(Array, p, len))
utf32(p::Union{Ptr{Char}, Ptr{Int32}}, len::Integer) = utf32(convert(Ptr{UInt32}, p), len)
function utf32(p::Union{Ptr{UInt32}, Ptr{Char}, Ptr{Int32}})
utf32(p::Ptr{Int32}, len::Integer) = utf32(convert(Ptr{UInt32}, p), len)
utf32(p::Ptr{Char}, len::Integer) = utf32(unsafe_wrap(Array, p, len))
function utf32(p::Ptr{T}) where {T<:Union{UInt32,Char,Int32}}
len = 0
while unsafe_load(p, len+1) != 0; len += 1; end
while unsafe_load(p, len+1) != T(0); len += 1; end
utf32(p, len)
end

Expand All @@ -223,7 +224,7 @@ function map(f, s::UTF32String)
if !isa(c2, Char)
throw(UnicodeError(UTF_ERR_MAP_CHAR, 0, 0))
end
out[i] = (c2::Char)
out[i] = UInt32(c2::Char)
end
UTF32String(out)
end
Expand Down Expand Up @@ -273,7 +274,7 @@ Note that the resulting `UTF32String` data is terminated by the NUL codepoint (3
which is not treated as a character in the string (so that it is mostly invisible in Julia);
this allows the string to be passed directly to external functions requiring NUL-terminated
data. This NUL is appended automatically by the `utf32(s)` conversion function. If you have
a `Char` or `UInt32` array `A` that is already NUL-terminated UTF-32 data, then you can
a `UInt32` array `A` that is already NUL-terminated UTF-32 data, then you can
instead use `UTF32String(A)` to construct the string without making a copy of the data and
treating the NUL as a terminator rather than as part of the string.
"""
Expand Down
5 changes: 5 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,12 @@ for s in ["", "a", "â", "Julia", "줄리아"]
for u in [LegacyStrings.ascii, utf8, utf16, utf32]
u == LegacyStrings.ascii && !isascii(s) && continue
@test length(s) == length(u(s))
@test map(uppercase, s) == map(uppercase, u(s))
end
c = collect(s)
c0 = [c; Char(0)]
@test utf32(c) == UTF32String(c) == s
GC.@preserve c0 @test utf32(pointer(c0)) == s
end


Expand Down

2 comments on commit ec1d540

@stevengj
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/82822

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v1.1.0 -m "<description of version>" ec1d540bf1db0e6ce182f658af76ed25e4ef891c
git push origin v1.1.0

Please sign in to comment.