Skip to content

Commit

Permalink
[lipi] Add basic support for Grantha pluta
Browse files Browse the repository at this point in the history
  • Loading branch information
akprasad committed Jan 22, 2025
1 parent 188ffec commit 804b62c
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 16 deletions.
54 changes: 38 additions & 16 deletions vidyut-lipi/src/reshape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ const BENGALI_VIRAMA: char = '\u{09cd}';
/// Used instead of space (' ') in Bhaiksuki.
const BHAIKSUKI_WORD_SEPARATOR: char = '\u{11c43}';

/// Used to mark pluta in Grantha.
const GRANTHA_SIGN_PLUTA: char = '\u{1135d}';

/// Javanese virama.
const JAVANESE_PANGKON: char = '\u{a9c0}';

Expand Down Expand Up @@ -77,12 +80,15 @@ const MYANMAR_SIGN_VIRAMA: char = '\u{1039}';

const MYANMAR_SIGN_ASAT: char = '\u{103a}';

// Tai Tham virama.
/// Tai Tham virama.
const TAI_THAM_SIGN_RA_HAAM: char = '\u{1a7a}';

// Tai Tham combiner.
/// Tai Tham combiner.
const TAI_THAM_SIGN_SAKOT: char = '\u{1a60}';

/// Tamil digit 3 (also used in Grantha)
const TAMIL_DIGIT_THREE: char = '\u{0be9}';

/// Used instead of space (' ') in Tibetan
const TIBETAN_MARK_INTERSYLLABLIC_TSHEG: char = '\u{0f0b}';

Expand All @@ -106,6 +112,29 @@ fn is_svara(c: char) -> bool {
matches!(c, '\u{0951}' | '\u{0952}' | '\u{1cda}')
}

fn is_bengali_sound(c: char) -> bool {
match c {
// Signs, vowels, consonants
'\u{0981}'..='\u{09bc}' => true,
// Dependent vowels
'\u{09be}'..='\u{09cc}' => true,
// Other consonants and signs
'\u{09ce}'..='\u{09e3}' => true,
// Assamese
'\u{09f0}'..='\u{09f1}' => true,
_ => false,
}
}

fn accepts_grantha_pluta_marker(c: char) -> bool {
// Independent vowels, consonants
('\u{11305}'..='\u{11339}').contains(&c)
// Dependent vowel signs
|| ('\u{1133e}'..='\u{1134c}').contains(&c)
// R, RR, L, LL
|| ('\u{11360}'..='\u{11363}').contains(&c)
}

fn is_grantha_svara(c: char) -> bool {
matches!(c, '\u{1cf4}' | '\u{0951}' | '\u{0952}')
}
Expand Down Expand Up @@ -611,6 +640,9 @@ pub fn reshape_before(input: &str, from: Scheme) -> String {
while m.not_empty() {
if m.match_2(|x, y| is_grantha_ayogavaha(x) && is_grantha_svara(y)) {
m.take_2(|buf, x, y| buf.extend(&[y, x]));
} else if m.match_1(|x| x == GRANTHA_SIGN_PLUTA) {
// Convert back to 3 for other schemes.
m.take_1(|buf, _| buf.extend(&[TAMIL_DIGIT_THREE]));
} else {
m.push_next();
}
Expand Down Expand Up @@ -807,20 +839,6 @@ pub fn reshape_before(input: &str, from: Scheme) -> String {
}
}

fn is_bengali_sound(c: char) -> bool {
match c {
// Signs, vowels, consonants
'\u{0981}'..='\u{09bc}' => true,
// Dependent vowels
'\u{09be}'..='\u{09cc}' => true,
// Other consonants and signs
'\u{09ce}'..='\u{09e3}' => true,
// Assamese
'\u{09f0}'..='\u{09f1}' => true,
_ => false,
}
}

/// Reshapes `output` after we run the main transliteration function.
pub fn reshape_after(output: String, to: Scheme) -> String {
let mut m = Matcher::new(output);
Expand Down Expand Up @@ -933,6 +951,10 @@ pub fn reshape_after(output: String, to: Scheme) -> String {
while m.not_empty() {
if m.match_2(|x, y| is_grantha_svara(x) && is_grantha_ayogavaha(y)) {
m.take_2(|buf, x, y| buf.extend(&[y, x]));
} else if m
.match_2(|x, y| accepts_grantha_pluta_marker(x) && y == TAMIL_DIGIT_THREE)
{
m.take_2(|buf, x, _| buf.extend(&[x, GRANTHA_SIGN_PLUTA]));
} else {
m.push_next();
}
Expand Down
40 changes: 40 additions & 0 deletions vidyut-lipi/tests/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,46 @@ fn sanskrit_dependent_vowels() {
);
}

// TODO: not very familiar with pluta, check with someone who knows better.
#[test]
fn sanskrit_pluta() {
// Independent vowels.
assert_two_way_pairwise(&[
(
HarvardKyoto,
"a3 A3 i3 I3 u3 U3 R3 RR3 lR3 lRR3 e3 ai3 o3 au3",
),
(Slp1, "a3 A3 i3 I3 u3 U3 f3 F3 x3 X3 e3 E3 o3 O3"),
(Devanagari, "अ३ आ३ इ३ ई३ उ३ ऊ३ ऋ३ ॠ३ ऌ३ ॡ३ ए३ ऐ३ ओ३ औ३"),
(Grantha, "𑌅𑍝 𑌆𑍝 𑌇𑍝 𑌈𑍝 𑌉𑍝 𑌊𑍝 𑌋𑍝 𑍠𑍝 𑌌𑍝 𑍡𑍝 𑌏𑍝 𑌐𑍝 𑌓𑍝 𑌔𑍝"),
]);

// Dependent vowels.
assert_two_way_pairwise(&[
(
HarvardKyoto,
"ka3 kA3 ki3 kI3 ku3 kU3 kR3 kRR3 klR3 klRR3 ke3 kai3 ko3 kau3",
),
(
Slp1,
"ka3 kA3 ki3 kI3 ku3 kU3 kf3 kF3 kx3 kX3 ke3 kE3 ko3 kO3",
),
(Devanagari, "क३ का३ कि३ की३ कु३ कू३ कृ३ कॄ३ कॢ३ कॣ३ के३ कै३ को३ कौ३"),
(
Grantha,
"𑌕𑍝 𑌕𑌾𑍝 𑌕𑌿𑍝 𑌕𑍀𑍝 𑌕𑍁𑍝 𑌕𑍂𑍝 𑌕𑍃𑍝 𑌕𑍄𑍝 𑌕𑍢𑍝 𑌕𑍣𑍝 𑌕𑍇𑍝 𑌕𑍈𑍝 𑌕𑍋𑍝 𑌕𑍌𑍝",
),
]);

// Candrabindu.
assert_two_way_pairwise(&[
(HarvardKyoto, "a~3 ka~3"),
(HarvardKyoto, "a~3 ka~3"),
(Devanagari, "अँ३ कँ३"),
(Grantha, "𑌅𑌁௩ 𑌕𑌁௩"),
]);
}

#[test]
fn sanskrit_ayogavahas() {
assert_two_way_pairwise(&[
Expand Down

0 comments on commit 804b62c

Please sign in to comment.