diff --git a/vidyut-lipi/src/reshape.rs b/vidyut-lipi/src/reshape.rs index 7686094..37b9d13 100644 --- a/vidyut-lipi/src/reshape.rs +++ b/vidyut-lipi/src/reshape.rs @@ -39,6 +39,9 @@ const BENGALI_VIRAMA: char = '\u{09cd}'; /// Used instead of space (' ') in Bhaiksuki. const BHAIKSUKI_WORD_SEPARATOR: char = '\u{11c43}'; +/// Used to mark pluta in Grantha. +const GRANTHA_SIGN_PLUTA: char = '\u{1135d}'; + /// Javanese virama. const JAVANESE_PANGKON: char = '\u{a9c0}'; @@ -77,12 +80,15 @@ const MYANMAR_SIGN_VIRAMA: char = '\u{1039}'; const MYANMAR_SIGN_ASAT: char = '\u{103a}'; -// Tai Tham virama. +/// Tai Tham virama. const TAI_THAM_SIGN_RA_HAAM: char = '\u{1a7a}'; -// Tai Tham combiner. +/// Tai Tham combiner. const TAI_THAM_SIGN_SAKOT: char = '\u{1a60}'; +/// Tamil digit 3 (also used in Grantha) +const TAMIL_DIGIT_THREE: char = '\u{0be9}'; + /// Used instead of space (' ') in Tibetan const TIBETAN_MARK_INTERSYLLABLIC_TSHEG: char = '\u{0f0b}'; @@ -106,6 +112,29 @@ fn is_svara(c: char) -> bool { matches!(c, '\u{0951}' | '\u{0952}' | '\u{1cda}') } +fn is_bengali_sound(c: char) -> bool { + match c { + // Signs, vowels, consonants + '\u{0981}'..='\u{09bc}' => true, + // Dependent vowels + '\u{09be}'..='\u{09cc}' => true, + // Other consonants and signs + '\u{09ce}'..='\u{09e3}' => true, + // Assamese + '\u{09f0}'..='\u{09f1}' => true, + _ => false, + } +} + +fn accepts_grantha_pluta_marker(c: char) -> bool { + // Independent vowels, consonants + ('\u{11305}'..='\u{11339}').contains(&c) + // Dependent vowel signs + || ('\u{1133e}'..='\u{1134c}').contains(&c) + // R, RR, L, LL + || ('\u{11360}'..='\u{11363}').contains(&c) +} + fn is_grantha_svara(c: char) -> bool { matches!(c, '\u{1cf4}' | '\u{0951}' | '\u{0952}') } @@ -611,6 +640,9 @@ pub fn reshape_before(input: &str, from: Scheme) -> String { while m.not_empty() { if m.match_2(|x, y| is_grantha_ayogavaha(x) && is_grantha_svara(y)) { m.take_2(|buf, x, y| buf.extend(&[y, x])); + } else if m.match_1(|x| x == GRANTHA_SIGN_PLUTA) { + // Convert back to 3 for other schemes. + m.take_1(|buf, _| buf.extend(&[TAMIL_DIGIT_THREE])); } else { m.push_next(); } @@ -807,20 +839,6 @@ pub fn reshape_before(input: &str, from: Scheme) -> String { } } -fn is_bengali_sound(c: char) -> bool { - match c { - // Signs, vowels, consonants - '\u{0981}'..='\u{09bc}' => true, - // Dependent vowels - '\u{09be}'..='\u{09cc}' => true, - // Other consonants and signs - '\u{09ce}'..='\u{09e3}' => true, - // Assamese - '\u{09f0}'..='\u{09f1}' => true, - _ => false, - } -} - /// Reshapes `output` after we run the main transliteration function. pub fn reshape_after(output: String, to: Scheme) -> String { let mut m = Matcher::new(output); @@ -933,6 +951,10 @@ pub fn reshape_after(output: String, to: Scheme) -> String { while m.not_empty() { if m.match_2(|x, y| is_grantha_svara(x) && is_grantha_ayogavaha(y)) { m.take_2(|buf, x, y| buf.extend(&[y, x])); + } else if m + .match_2(|x, y| accepts_grantha_pluta_marker(x) && y == TAMIL_DIGIT_THREE) + { + m.take_2(|buf, x, _| buf.extend(&[x, GRANTHA_SIGN_PLUTA])); } else { m.push_next(); } diff --git a/vidyut-lipi/tests/basic.rs b/vidyut-lipi/tests/basic.rs index ec4d01e..e400fc3 100644 --- a/vidyut-lipi/tests/basic.rs +++ b/vidyut-lipi/tests/basic.rs @@ -272,6 +272,46 @@ fn sanskrit_dependent_vowels() { ); } +// TODO: not very familiar with pluta, check with someone who knows better. +#[test] +fn sanskrit_pluta() { + // Independent vowels. + assert_two_way_pairwise(&[ + ( + HarvardKyoto, + "a3 A3 i3 I3 u3 U3 R3 RR3 lR3 lRR3 e3 ai3 o3 au3", + ), + (Slp1, "a3 A3 i3 I3 u3 U3 f3 F3 x3 X3 e3 E3 o3 O3"), + (Devanagari, "अ३ आ३ इ३ ई३ उ३ ऊ३ ऋ३ ॠ३ ऌ३ ॡ३ ए३ ऐ३ ओ३ औ३"), + (Grantha, "𑌅𑍝 𑌆𑍝 𑌇𑍝 𑌈𑍝 𑌉𑍝 𑌊𑍝 𑌋𑍝 𑍠𑍝 𑌌𑍝 𑍡𑍝 𑌏𑍝 𑌐𑍝 𑌓𑍝 𑌔𑍝"), + ]); + + // Dependent vowels. + assert_two_way_pairwise(&[ + ( + HarvardKyoto, + "ka3 kA3 ki3 kI3 ku3 kU3 kR3 kRR3 klR3 klRR3 ke3 kai3 ko3 kau3", + ), + ( + Slp1, + "ka3 kA3 ki3 kI3 ku3 kU3 kf3 kF3 kx3 kX3 ke3 kE3 ko3 kO3", + ), + (Devanagari, "क३ का३ कि३ की३ कु३ कू३ कृ३ कॄ३ कॢ३ कॣ३ के३ कै३ को३ कौ३"), + ( + Grantha, + "𑌕𑍝 𑌕𑌾𑍝 𑌕𑌿𑍝 𑌕𑍀𑍝 𑌕𑍁𑍝 𑌕𑍂𑍝 𑌕𑍃𑍝 𑌕𑍄𑍝 𑌕𑍢𑍝 𑌕𑍣𑍝 𑌕𑍇𑍝 𑌕𑍈𑍝 𑌕𑍋𑍝 𑌕𑍌𑍝", + ), + ]); + + // Candrabindu. + assert_two_way_pairwise(&[ + (HarvardKyoto, "a~3 ka~3"), + (HarvardKyoto, "a~3 ka~3"), + (Devanagari, "अँ३ कँ३"), + (Grantha, "𑌅𑌁௩ 𑌕𑌁௩"), + ]); +} + #[test] fn sanskrit_ayogavahas() { assert_two_way_pairwise(&[