This repository has been archived by the owner on Aug 2, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathpinyin.js
153 lines (116 loc) · 5.59 KB
/
pinyin.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
// Set up our own object to store the stuff in
function PinyinJs() {
// Arrays of pīnyīn characters
this.pinyinChars = {
1: ['ā','ē','ī','ō','ū','ǖ','Ā','Ē','Ī','Ō'],
2: ['á','é','í','ó','ú','ǘ','Á','É','Í','Ó'],
3: ['ǎ','ě','ǐ','ǒ','ǔ','ǚ','Ǎ','Ě','Ǐ','Ǒ'],
4: ['à','è','ì','ò','ù','ǜ','À','È','Ì','Ò']
};
// Toneless pīnyīn vowels
this.tonelessChars = ['a','e','i','o','u','ü','A','E','I','O'];
// Asterisks determine the position of the accent in pīnyīn vowel clusters
this.accentsMap = {
iao: 'ia*o', uai: 'ua*i',
ai: 'a*i', ao: 'a*o', ei: 'e*i', ia: 'ia*', ie: 'ie*',
io: 'io*', iu: 'iu*', Ai: 'A*i', Ao: 'A*o', Ei: 'E*i',
ou: 'o*u', ua: 'ua*', ue: 'ue*', ui: 'ui*', uo: 'uo*',
ve: 'üe*', Ou: 'O*u',
a: 'a*', e: 'e*', i: 'i*', o: 'o*', u: 'u*', v: 'v*',
A: 'A*', E: 'E*', O: 'O*'
};
// Vowels to replace with their accented forms
this.vowels = ['a*','e*','i*','o*','u*','v*','A*','E*','O*'];
this.makeObject = false;
// Convert a numeric pīnyīn string into proper pīnyīn
// Pass true for the second parameter to return a stuctured object
this.convert = function(words, makeObject) {
// Make sure to preserve the scope
var self = this;
// The function to convert a single syllable
var _convert = function(match) {
// Extract the tone number from the match
var toneNumber = match.substr(-1, 1);
// Extract just the syllable
// Given that the toneNumber is a number
var syllable = (!parseInt(toneNumber)) ? match : match.substring(0, match.indexOf(toneNumber));
// If it’s zero, bigger than 4, or not a number, treat it as the fifth tone
// Exit right now
if (toneNumber == 0 || toneNumber > 4 || !parseInt(toneNumber)) {
if (makeObject) {
return {tone: 5, syllable: syllable, originalSyllable: match};
}
else
return syllable;
}
// Put an asterisk inside of the first found vowel cluster
for (var val in self.accentsMap) {
if (syllable.search(val) != -1) {
syllable = syllable.replace(new RegExp(val), self.accentsMap[val]);
break;
}
}
// Replace the asterisk’d vowel with an accented character
for (i=0; i<10; i++)
syllable = syllable.replace(self.vowels[i], self.pinyinChars[toneNumber][i]);
// If asked to create an object, do it
if (makeObject)
return {tone: toneNumber, syllable: syllable, originalSyllable: match};
// Otherwise, just return the toned syllable so it gets replaced
else
return syllable;
};
// Replace each numeric pinyin syllable in the string with a proper syllable
if (!makeObject)
words = words.replace(/([a-zA-ZüÜ]+)(\d)/g, _convert);
// If asked to make an object:
else {
// Define the results object array (used if asked)
var results = [];
// Insert a space after each tone number in the string, unless the space it’s already there
words = words.replace(/([a-zA-ZüÜ]+)([\d])([^ ])/g, "$1$2 $3");
// Split the words string into an array, placing each syllable separately
var syllables = words.split(' ');
// Number of syllables
var syllablesNum = syllables.length;
// Run the conversion for each one and push the resulting object into the array
for (j=0; j<syllablesNum; j++)
results.push(_convert(syllables[j]));
}
return (makeObject) ? results : words;
};
// Extract the tones from each syllable in the string
// Always returns a structured object
this.revert = function(syllables) {
// Split the word into an array, placing each syllable separately
var syllables = syllables.split(' ');
var syllablesNum = syllables.length;
// Prepare the array to store the results
var results = [];
// For each syllable, loop through each of the pinyin character array sets
// When an occurence is found, stop and mark down the tone
for (j=0; j<syllablesNum; j++) {
var foundTone = 0;
var cleanSyllable = syllables[j];
for (i=1; i<5; i++) {
if (foundTone == 0) {
for (var val in this.pinyinChars[i]) {
if (cleanSyllable.search(this.pinyinChars[i][val]) != -1) {
cleanSyllable = cleanSyllable.replace(new RegExp(this.pinyinChars[i][val]), this.tonelessChars[val]);
foundTone = i;
results.push({tone: foundTone, syllable: cleanSyllable, originalSyllable: syllables[j]});
break;
}
}
}
else
break;
}
// If the found tone is still zero, assume this word to be toneless (5th tone)
if (foundTone == 0)
results.push({tone: 5, syllable: cleanSyllable, originalSyllable: syllables[j]});
}
return results;
}
}
var pinyinJs = new PinyinJs;