71
71
}
72
72
73
73
#[ inline]
74
+ #[ allow( unsafe_code) ]
74
75
fn decompose < D , F > ( c : char , decompose_char : D , mut emit_char : F )
75
76
where
76
77
D : Fn ( char ) -> Option < & ' static [ char ] > ,
84
85
85
86
// Perform decomposition for Hangul
86
87
if is_hangul_syllable ( c) {
87
- decompose_hangul ( c, emit_char) ;
88
+ // Safety: Hangul Syllables invariant checked by is_hangul_syllable above
89
+ unsafe {
90
+ decompose_hangul ( c, emit_char) ;
91
+ }
88
92
return ;
89
93
}
90
94
@@ -127,27 +131,37 @@ const T_LAST: u32 = T_BASE + T_COUNT - 1;
127
131
// i.e. `T_BASE + 1 ..= T_LAST`.
128
132
const T_FIRST : u32 = T_BASE + 1 ;
129
133
134
+ // Safety-usable invariant: This ensures that c is a valid Hangul Syllable character (U+AC00..U+D7AF)
130
135
pub ( crate ) fn is_hangul_syllable ( c : char ) -> bool {
136
+ // Safety: This checks the range 0xAC00 (S_BASE) to 0xD7A4 (S_BASE + S_COUNT), upholding the safety-usable invariant
131
137
( c as u32 ) >= S_BASE && ( c as u32 ) < ( S_BASE + S_COUNT )
132
138
}
133
139
134
140
// Decompose a precomposed Hangul syllable
135
- #[ allow( unsafe_code) ]
141
+ // Safety: `s` MUST be a valid Hangul Syllable character, between U+AC00..U+D7AF
142
+ #[ allow( unsafe_code, unused_unsafe) ]
136
143
#[ inline( always) ]
137
- fn decompose_hangul < F > ( s : char , mut emit_char : F )
144
+ unsafe fn decompose_hangul < F > ( s : char , mut emit_char : F )
138
145
where
139
146
F : FnMut ( char ) ,
140
147
{
148
+ // This will be at most 0x2baf, the size of the Hangul Syllables block
141
149
let s_index = s as u32 - S_BASE ;
150
+ // This will be at most 0x2baf / (21 * 28), 19
142
151
let l_index = s_index / N_COUNT ;
143
152
unsafe {
153
+ // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800)
144
154
emit_char ( char:: from_u32_unchecked ( L_BASE + l_index) ) ;
145
155
156
+ // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21
146
157
let v_index = ( s_index % N_COUNT ) / T_COUNT ;
158
+ // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800)
147
159
emit_char ( char:: from_u32_unchecked ( V_BASE + v_index) ) ;
148
160
161
+ // Safety: This will be at most T_COUNT - 1 (27)
149
162
let t_index = s_index % T_COUNT ;
150
163
if t_index > 0 {
164
+ // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800)
151
165
emit_char ( char:: from_u32_unchecked ( T_BASE + t_index) ) ;
152
166
}
153
167
}
@@ -173,14 +187,23 @@ fn compose_hangul(a: char, b: char) -> Option<char> {
173
187
match ( a, b) {
174
188
// Compose a leading consonant and a vowel together into an LV_Syllable
175
189
( L_BASE ..=L_LAST , V_BASE ..=V_LAST ) => {
190
+ // Safety: based on the above bounds, l_index will be less than or equal to L_COUNT (19)
191
+ // and v_index will be <= V_COUNT (21)
176
192
let l_index = a - L_BASE ;
177
193
let v_index = b - V_BASE ;
194
+ // Safety: This will be <= 19 * (20 * 21) + (21 * 20), which is 8400.
178
195
let lv_index = l_index * N_COUNT + v_index * T_COUNT ;
196
+ // Safety: This is between 0xAC00 and 0xCCD0, which are in range for Hangul Syllables (U+AC00..U+D7AF) and also in range
197
+ // for BMP unicode
179
198
let s = S_BASE + lv_index;
199
+ // Safety: We've verified this is in-range
180
200
Some ( unsafe { char:: from_u32_unchecked ( s) } )
181
201
}
182
202
// Compose an LV_Syllable and a trailing consonant into an LVT_Syllable
183
203
( S_BASE ..=S_LAST , T_FIRST ..=T_LAST ) if ( a - S_BASE ) % T_COUNT == 0 => {
204
+ // Safety: a is between 0xAC00 and (0xAC00 + 19 * 21 * 28). b - T_BASE is between 0 and 19.
205
+ // Adding a number 0 to 19 to a number that is at largest 0xD7A4 will not go out of bounds to 0xD800 (where the
206
+ // surrogates start), so this is safe.
184
207
Some ( unsafe { char:: from_u32_unchecked ( a + ( b - T_BASE ) ) } )
185
208
}
186
209
_ => None ,
0 commit comments