cws.bib


@inproceedings{zhang_type-supervised_2014,
	address = {Gothenburg, Sweden},
	title = {Type-Supervised Domain Adaptation for Joint Segmentation and {POS-Tagging}},
	url = {http://www.aclweb.org/anthology/E14-1062},
	urldate = {2014-07-25},
	booktitle = {Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics},
	publisher = {Association for Computational Linguistics},
	author = {Zhang, Meishan and Zhang, Yue and Che, Wanxiang and Liu, Ting},
	month = apr,
	year = {2014},
	pages = {588–597},
	file = {Zhang+ 2014.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\7EAJWU43\\Zhang 等. - 2014 - Type-Supervised Domain Adaptation for Joint Segmen.pdf:application/pdf}
}

@article{qiu_automatic_2014,
	title = {Automatic Corpus Expansion for Chinese Word Segmentation by Exploiting the Redundancy of Web Information},
	url = {http://www.aclweb.org/anthology/C14-1109},
	urldate = {2014-08-20},
	journal = {{COOLING} 2014},
	author = {Qiu, Xipeng and Huang, ChaoChao and Huang, Xuanjing},
	year = {2014},
	annote = {拿不准的到网上去搜索},
	file = {[PDF] from aclweb.org:E\:\\Dropbox\\Others\\zotero\\storage\\JP29B6GG\\Qiu 等. - Automatic Corpus Expansion for Chinese Word Segmen.pdf:application/pdf}
}

@inproceedings{zhang_exploring_2013,
	address = {Seattle, Washington, {USA}},
	title = {Exploring Representations from Unlabeled Data with Co-training for Chinese Word Segmentation},
	url = {http://www.aclweb.org/anthology/D13-1031},
	urldate = {2013-12-03},
	booktitle = {Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing},
	publisher = {Association for Computational Linguistics},
	author = {Zhang, Longkai and Wang, Houfeng and Sun, Xu and Mansur, Mairgup},
	month = oct,
	year = {2013},
	pages = {311–321},
	file = {Zhang+ 2013.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\HKW9A2R7\\Zhang et al. - 2013 - Exploring Representations from Unlabeled Data with.pdf:application/pdf}
}

@inproceedings{zhang_improving_2013,
	address = {Sofia, Bulgaria},
	title = {Improving Chinese Word Segmentation on Micro-blog Using Rich Punctuations},
	url = {http://www.aclweb.org/anthology/P13-2032},
	urldate = {2013-08-06},
	booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
	publisher = {Association for Computational Linguistics},
	author = {Zhang, Longkai and Li, Li and He, Zhengyan and Wang, Houfeng and Sun, Ni},
	month = aug,
	year = {2013},
	pages = {177–182},
	file = {Zhang+ 2013.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\3SQP2B5I\\Zhang et al. - 2013 - Improving Chinese Word Segmentation on Micro-blog .pdf:application/pdf}
}

@inproceedings{zeng_graph-based_2013,
	address = {Sofia, Bulgaria},
	title = {Graph-based Semi-Supervised Model for Joint Chinese Word Segmentation and Part-of-Speech Tagging},
	url = {http://www.aclweb.org/anthology/P13-1076},
	urldate = {2013-08-06},
	booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
	publisher = {Association for Computational Linguistics},
	author = {Zeng, Xiaodong and Wong, Derek F. and Chao, Lidia S. and Trancoso, Isabel},
	month = aug,
	year = {2013},
	pages = {770–779},
	file = {Zeng+ 2013.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\76SWQ76I\\Zeng et al. - 2013 - Graph-based Semi-Supervised Model for Joint Chines.pdf:application/pdf}
}

@inproceedings{zeng_co-regularizing_2013,
	address = {Sofia, Bulgaria},
	title = {Co-regularizing character-based and word-based models for semi-supervised Chinese word segmentation},
	url = {http://www.aclweb.org/anthology/P13-2031},
	urldate = {2013-08-06},
	booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
	publisher = {Association for Computational Linguistics},
	author = {Zeng, Xiaodong and Wong, Derek F. and Chao, Lidia S. and Trancoso, Isabel},
	month = aug,
	year = {2013},
	pages = {171–176},
	file = {Zeng+ 2013.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\DCFF4QW7\\Zeng et al. - 2013 - Co-regularizing character-based and word-based mod.pdf:application/pdf}
}

@incollection{wu_text_2013,
	title = {Text Window Denoising Autoencoder: Building Deep Architecture for Chinese Word Segmentation},
	shorttitle = {Text Window Denoising Autoencoder},
	url = {http://link.springer.com/chapter/10.1007/978-3-642-41644-6_1},
	urldate = {2013-12-03},
	booktitle = {Natural Language Processing and Chinese Computing},
	publisher = {Springer},
	author = {Wu, Ke and Gao, Zhiqiang and Peng, Cheng and Wen, Xiao},
	year = {2013},
	pages = {1–12},
	file = {10.1007-978-3-642-41644-6_1.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\UQD5JQ45\\10.1007-978-3-642-41644-6_1.pdf:application/pdf;Snapshot:E\:\\Dropbox\\Others\\zotero\\storage\\N4PXNSB2\\978-3-642-41644-6_1.html:text/html}
}

@inproceedings{wang_lattice-based_2013,
	address = {Sofia, Bulgaria},
	title = {A Lattice-based Framework for Joint Chinese Word Segmentation, {POS} Tagging and Parsing},
	url = {http://www.aclweb.org/anthology/P13-2110},
	urldate = {2013-08-06},
	booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
	publisher = {Association for Computational Linguistics},
	author = {Wang, Zhiguo and Zong, Chengqing and Xue, Nianwen},
	month = aug,
	year = {2013},
	pages = {623–627},
	file = {Wang+ 2013.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\CS2S2IZZ\\Wang et al. - 2013 - A Lattice-based Framework for Joint Chinese Word S.pdf:application/pdf}
}

@inproceedings{wang_mining_2013,
	address = {Sofia, Bulgaria},
	title = {Mining Informal Language from Chinese Microtext: Joint Word Recognition and Segmentation},
	shorttitle = {Mining Informal Language from Chinese Microtext},
	url = {http://www.aclweb.org/anthology/P13-1072},
	urldate = {2013-08-06},
	booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
	publisher = {Association for Computational Linguistics},
	author = {Wang, Aobo and Kan, Min-Yen},
	month = aug,
	year = {2013},
	pages = {731–741},
	file = {Wang & Kan 2013.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\NCR43BK3\\Wang 和 Kan - 2013 - Mining Informal Language from Chinese Microtext J.pdf:application/pdf}
}

@inproceedings{jiang_discriminative_2013,
	address = {Sofia, Bulgaria},
	title = {Discriminative Learning with Natural Annotations: Word Segmentation as a Case Study},
	shorttitle = {Discriminative Learning with Natural Annotations},
	url = {http://www.aclweb.org/anthology/P13-1075},
	urldate = {2013-08-06},
	booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
	publisher = {Association for Computational Linguistics},
	author = {Jiang, Wenbin and Sun, Meng and Lü, Yajuan and Yang, Yating and Liu, Qun},
	month = aug,
	year = {2013},
	pages = {761–769},
	file = {Jiang+ 2013.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\EK8FX6CM\\Jiang et al. - 2013 - Discriminative Learning with Natural Annotations .pdf:application/pdf}
}

@inproceedings{hagiwara_accurate_2013,
	address = {Sofia, Bulgaria},
	title = {Accurate Word Segmentation using Transliteration and Language Model Projection},
	url = {http://www.aclweb.org/anthology/P13-2033},
	urldate = {2013-08-06},
	booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
	publisher = {Association for Computational Linguistics},
	author = {Hagiwara, Masato and Sekine, Satoshi},
	month = aug,
	year = {2013},
	pages = {183–189},
	file = {Hagiwara & Sekine 2013.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\HTH64K8T\\Hagiwara 和 Sekine - 2013 - Accurate Word Segmentation using Transliteration a.pdf:application/pdf}
}

@phdthesis{__2012,
	title = {使用压缩表示的中文分词词性标注研究},
	school = {清华大学},
	author = {张, 开旭},
	year = {2012},
}

@inproceedings{sun_fast_2012,
	address = {Jeju Island, Korea},
	title = {Fast Online Training with Frequency-Adaptive Learning Rates for Chinese Word Segmentation and New Word Detection},
	url = {http://www.aclweb.org/anthology/P12-1027},
	urldate = {2012-07-24},
	booktitle = {Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
	publisher = {Association for Computational Linguistics},
	author = {Sun, Xu and Wang, Houfeng and Li, Wenjie},
	month = jul,
	year = {2012},
	pages = {253–262},
	annote = {{基于CRF的改进}
{引入新Feature：基于word} unigram和word bigram的，基于两个标签的复杂特征
引入新的权重更新算法，步长与特征频度有关，高频特征步长衰减快},
	file = {Sun+ 2012.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\26S4ZKME\\Sun et al. - 2012 - Fast Online Training with Frequency-Adaptive Learn.pdf:application/pdf}
}

@inproceedings{sun_reducing_2012,
	address = {Jeju Island, Korea},
	title = {Reducing Approximation and Estimation Errors for Chinese Lexical Processing with Heterogeneous Annotations},
	url = {http://www.aclweb.org/anthology/P12-1025},
	urldate = {2012-07-24},
	booktitle = {Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
	publisher = {Association for Computational Linguistics},
	author = {Sun, Weiwei and Wan, Xiaojun},
	month = jul,
	year = {2012},
	pages = {232–241},
	file = {Sun & Wan 2012.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\BCQHNZME\\Sun 和 Wan - 2012 - Reducing Approximation and Estimation Errors for C.pdf:application/pdf}
}

@inproceedings{sun_capturing_2012,
	address = {Jeju Island, Korea},
	title = {Capturing Paradigmatic and Syntagmatic Lexical Relations: Towards Accurate Chinese Part-of-Speech Tagging},
	shorttitle = {Capturing Paradigmatic and Syntagmatic Lexical Relations},
	url = {http://www.aclweb.org/anthology/P12-1026},
	urldate = {2012-07-24},
	booktitle = {Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
	publisher = {Association for Computational Linguistics},
	author = {Sun, Weiwei and Uszkoreit, Hans},
	month = jul,
	year = {2012},
	pages = {242–252},
	file = {Sun & Uszkoreit 2012.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\V69XSEDK\\Sun 和 Uszkoreit - 2012 - Capturing Paradigmatic and Syntagmatic Lexical Rel.pdf:application/pdf}
}

@inproceedings{qian_joint_2012,
	address = {Jeju Island, Korea},
	title = {Joint Chinese Word Segmentation, {POS} Tagging and Parsing},
	url = {http://www.aclweb.org/anthology/D12-1046},
	urldate = {2012-07-24},
	booktitle = {Proceedings of the 2012 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning},
	publisher = {Association for Computational Linguistics},
	author = {Qian, Xian and Liu, Yang},
	month = jul,
	year = {2012},
	pages = {501–511},
	annote = {分词、词性标注、句法分析三个模型分别训练
然后解码的时候再合起来},
	file = {Qian & Liu 2012.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\KSIDXTSX\\Qian 和 Liu - 2012 - Joint Chinese Word Segmentation, POS Tagging and P.pdf:application/pdf}
}

@inproceedings{li_unified_2012,
	address = {Jeju Island, Korea},
	title = {Unified Dependency Parsing of Chinese Morphological and Syntactic Structures},
	url = {http://www.aclweb.org/anthology/D12-1132},
	urldate = {2012-07-24},
	booktitle = {Proceedings of the 2012 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning},
	publisher = {Association for Computational Linguistics},
	author = {Li, Zhongguo and Zhou, Guodong},
	month = jul,
	year = {2012},
	pages = {1445–1454},
	file = {Li & Zhou 2012.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\V645P37I\\Li 和 Zhou - 2012 - Unified Dependency Parsing of Chinese Morphologica.pdf:application/pdf}
}

@inproceedings{li_integrating_2012,
	address = {Mumbai, India},
	title = {Integrating Surface and Abstract Features for Robust Cross-Domain Chinese Word Segmentation},
	url = {http://www.aclweb.org/anthology/C12-1101},
	booktitle = {Proceedings of {COLING} 2012},
	publisher = {The {COLING} 2012 Organizing Committee},
	author = {Li, Xiaoqing and Wang, Kun and Zong, Chengqing and Su, Keh-Yih},
	month = dec,
	year = {2012},
	pages = {1653–1670},
	file = {C12-1101.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\5GZT9V6S\\C12-1101.pdf:application/pdf}
}

@inproceedings{li_active_2012,
	address = {Mumbai, India},
	title = {Active Learning for Chinese Word Segmentation},
	url = {http://www.aclweb.org/anthology/C12-2067},
	booktitle = {Proceedings of {COLING} 2012: Posters},
	publisher = {The {COLING} 2012 Organizing Committee},
	author = {Li, Shoushan and Zhou, Guodong and Huang, Chu-Ren},
	month = dec,
	year = {2012},
	pages = {683–692},
	file = {C12-2067.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\W7X9QT8P\\C12-2067.pdf:application/pdf}
}

@inproceedings{jiang_iterative_2012,
	address = {Jeju Island, Korea},
	title = {Iterative Annotation Transformation with Predict-Self Reestimation for Chinese Word Segmentation},
	url = {http://www.aclweb.org/anthology/D12-1038},
	urldate = {2012-07-24},
	booktitle = {Proceedings of the 2012 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning},
	publisher = {Association for Computational Linguistics},
	author = {Jiang, Wenbin and Meng, Fandong and Liu, Qun and Lü, Yajuan},
	month = jul,
	year = {2012},
	pages = {412–420},
	file = {Jiang+ 2012.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\9JEIXMU9\\Jiang et al. - 2012 - Iterative Annotation Transformation with Predict-S.pdf:application/pdf}
}

@inproceedings{hatori_incremental_2012,
	address = {Jeju Island, Korea},
	title = {Incremental Joint Approach to Word Segmentation, {POS} Tagging, and Dependency Parsing in Chinese},
	url = {http://www.aclweb.org/anthology/P12-1110},
	urldate = {2012-07-24},
	booktitle = {Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
	publisher = {Association for Computational Linguistics},
	author = {Hatori, Jun and Matsuzaki, Takuya and Miyao, Yusuke and Tsujii, Jun'ichi},
	month = jul,
	year = {2012},
	pages = {1045–1053},
	file = {Hatori+ 2012.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\VVTJ9R7I\\Hatori et al. - 2012 - Incremental Joint Approach to Word Segmentation, P.pdf:application/pdf}
}

@inproceedings{duan_cips-sighan_2012,
	address = {Tianjin, China},
	title = {The {CIPS-SIGHAN} {CLP} 2012 {ChineseWord} Segmentation {onMicroBlog} Corpora Bakeoff},
	url = {http://www.aclweb.org/anthology/W12-6307},
	booktitle = {Proceedings of the Second {CIPS-SIGHAN} Joint Conference on Chinese Language Processing},
	publisher = {Association for Computational Linguistics},
	author = {Duan, Huiming and Sui, Zhifang and Tian, Ye and Li, Wenjie},
	month = dec,
	year = {2012},
	pages = {35–40}
}

@article{zhang_syntactic_2011,
	title = {Syntactic Processing using the Generalized Perceptron and Beam Search},
	number = {Early Access},
	journal = {Computational Linguistics},
	author = {Zhang, Y. and Clark, S.},
	year = {2011},
	keywords = {{CL}, perceptron},
	pages = {1--47},
	annote = {之前工作的总结。
将平均感知器，应用于汉语的词法分析、句法分析。
使用beam search。},
	file = {J11-1005.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\5VUSJ55Z\\J11-1005.pdf:application/pdf}
}

@inproceedings{wang_improving_2011,
	address = {Chiang Mai, Thailand},
	title = {Improving Chinese Word Segmentation and {POS} Tagging with Semi-supervised Methods Using Large Auto-Analyzed Data},
	url = {http://www.aclweb.org/anthology/I11-1035},
	booktitle = {Proceedings of 5th International Joint Conference on Natural Language Processing},
	publisher = {Asian Federation of Natural Language Processing},
	author = {Wang, Yiou and Kazama, Jun'ichi and Tsuruoka, Yoshimasa and Chen, Wenliang and Zhang, Yujie and Torisawa, Kentaro},
	month = nov,
	year = {2011},
	pages = {309–317},
	file = {I11-1035.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\GCPGXMC7\\I11-1035.pdf:application/pdf}
}

@article{wang_new_2011,
	title = {A New Unsupervised Approach to Word Segmentation},
	number = {Just Accepted},
	journal = {Computational Linguistics},
	author = {Wang, H. and Zhu, J. and Tang, S. and Fan, X.},
	year = {2011},
	keywords = {{CL}, unsupervised},
	pages = {1–48},
	file = {J11-3001.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\JTSKXP9F\\J11-3001.pdf:application/pdf}
}

@inproceedings{sun_stacked_2011,
	address = {Portland, Oregon, {USA}},
	title = {A Stacked Sub-Word Model for Joint Chinese Word Segmentation and Part-of-Speech Tagging},
	url = {http://www.aclweb.org/anthology/P11-1139},
	urldate = {2011-06-17},
	booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},
	publisher = {Association for Computational Linguistics},
	author = {Sun, Weiwei},
	month = jun,
	year = {2011},
	keywords = {{ACL}, stacked},
	pages = {1385–1394},
	annote = {使用stacked learning这种meta-learning algorithm，有机制避免两层在训练时使用重叠的训练数据，但也能最大限度利用数据。
第一层使用了三个模型，基于词的，基于字序列标注的，基于单字分类的。},
	file = {P11-1139.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\8MWJ725K\\P11-1139.pdf:application/pdf}
}

@inproceedings{sun_enhancing_2011,
	address = {Edinburgh, Scotland, {UK.}},
	title = {Enhancing Chinese Word Segmentation Using Unlabeled Data},
	url = {http://www.aclweb.org/anthology/D11-1090},
	urldate = {2011-08-04},
	booktitle = {Proceedings of the 2011 Conference on Empirical Methods in Natural Language Processing},
	publisher = {Association for Computational Linguistics},
	author = {Sun, Weiwei and Xu, Jia},
	month = jul,
	year = {2011},
	keywords = {{EMNLP}, semi-supervised},
	pages = {970–979},
	annote = {feature engineering，使用in-domain的未标注数据帮助中文分词。
{增加的特征有：互信息；Accessor} Variety；基于标点符号的特征；篇章级的特征。
另外一个结论是使用实数值作为特征值不如用binary的。},
	file = {Sun & Xu 2011.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\ZN9PGR2V\\Sun and Xu - 2011 - Enhancing Chinese Word Segmentation Using Unlabele.pdf:application/pdf}
}

@inproceedings{li_parsing_2011,
	address = {Portland, Oregon, {USA}},
	title = {Parsing the Internal Structure of Words: A New Paradigm for Chinese Word Segmentation},
	shorttitle = {Parsing the Internal Structure of Words},
	url = {http://www.aclweb.org/anthology/P11-1141},
	urldate = {2011-06-17},
	booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},
	publisher = {Association for Computational Linguistics},
	author = {Li, Zhongguo},
	month = jun,
	year = {2011},
	keywords = {{ACL}, parsing},
	pages = {1405–1414},
	annote = {将词法分析与句法分析结合。在同一棵树下使用不同的“成分”标签。
使用句法分析的算法解码。},
	file = {P11-1141.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\9DMUAUZF\\P11-1141.pdf:application/pdf}
}

@inproceedings{zhao_cips-sighan_2010,
	title = {The {CIPS-SIGHAN} {CLP2010} Chinese Word Segmentation Backoff},
	booktitle = {{CIPS-SIGHAN} Joint Conference on Chinese Language Processing},
	author = {Zhao, Hongmei and Liu, Qun},
	year = {2010}
}

@inproceedings{zhang_fast_2010,
	address = {Cambridge, {MA}},
	title = {A Fast Decoder for Joint Word Segmentation and {POS-Tagging} Using a Single Discriminative Model},
	url = {http://www.aclweb.org/anthology/D10-1082},
	booktitle = {Proceedings of the 2010 Conference on Empirical Methods in Natural Language Processing},
	publisher = {Association for Computational Linguistics},
	author = {Zhang, Yue and Clark, Stephen},
	month = oct,
	year = {2010},
	pages = {843–852},
	annote = {解码速度从每秒2.24句，提高到每秒24.94就},
	file = {emnlp10yue.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\3C75ZFUB\\emnlp10yue.pdf:application/pdf}
}

@article{zhang_local_2010,
	title = {A Local Generative Model for Chinese Word Segmentation},
	journal = {Information Retrieval Technology},
	author = {Zhang, K. and Sun, M. and Xue, P.},
	year = {2010},
	pages = {420--431},
	annote = {提出一种用局部的语言模型做分词的方法。
{提出一种构造切分二叉树的方法，处理分词粒度问题，该方法也可直接利用CRF的输出构造二叉树。}},
	file = {fulltext.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\SUGAIZG4\\fulltext.pdf:application/pdf;Snapshot:E\:\\Dropbox\\Others\\zotero\\storage\\GC53NMIV\\60u60u58k06m426p.html:text/html}
}

@inproceedings{xiao_joint_2010,
	address = {Beijing, China},
	title = {Joint Tokenization and Translation},
	url = {http://www.aclweb.org/anthology/C10-1135},
	booktitle = {Proceedings of the 23rd International Conference on Computational Linguistics (Coling 2010)},
	publisher = {Coling 2010 Organizing Committee},
	author = {Xiao, Xinyan and Liu, Yang and Hwang, YoungSook and Liu, Qun and Lin, Shouxun},
	month = aug,
	year = {2010},
	pages = {1200–1208},
	file = {C10-1135.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\RFRTHEI5\\C10-1135.pdf:application/pdf}
}

@inproceedings{wang_character-based_2010,
	address = {Beijing, China},
	title = {A Character-Based Joint Model for Chinese Word Segmentation},
	url = {http://www.aclweb.org/anthology/C10-1132},
	booktitle = {Proceedings of the 23rd International Conference on Computational Linguistics (Coling 2010)},
	publisher = {Coling 2010 Organizing Committee},
	author = {Wang, Kun and Zong, Chengqing and Su, Keh-Yih},
	month = aug,
	year = {2010},
	pages = {1173–1181},
	annote = {整合一个产生式模型和判别式模型
另外发现将某些binary特征值的权重改一下，可以提高效果。},
	file = {2010.08 COLING-wangkun.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\JSS9PX3J\\2010.08 COLING-wangkun.pdf:application/pdf}
}

@inproceedings{qian_joint_2010,
	title = {Joint training and decoding using virtual nodes for cascaded segmentation and tagging tasks},
	booktitle = {Proceedings of the 2010 Conference on Empirical Methods in Natural Language Processing},
	author = {Qian, X. and Zhang, Q. and Zhou, Y. and Huang, X. and Wu, L.},
	year = {2010},
	keywords = {{EMNLP}},
	pages = {187–195},
	file = {D10-1019.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\87DEKU63\\D10-1019.pdf:application/pdf}
}

@inproceedings{_-_2009,
	title = {基于字依存树的中文词法-句法一体化分析},
	booktitle = {中国计算机语言学研究前沿进展 (2007-2009)},
	author = {赵, 海 and 揭, 春雨 and 宋, 彦},
	year = {2009},
	file = {基于字依存树的中文词法_句法一体化分析.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\ZIQKTMVJ\\基于字依存树的中文词法_句法一体化分析.pdf:application/pdf}
}

@phdthesis{__2009,
	title = {基于 {CRFs} 的中文分词和短文本分类技术},
	author = {滕, 少华},
	year = {2009},
	annote = {{就分词来说，用Chi方做特征选择，一半的特征仍然可以保持性能。}
个别字（如“的”，“和”，“了”）的有无对整句切分的正确性有帮助与干扰。
{使用CRF的置信度输出，低置信度产生高错误率。}
基于规则的、基于篇章上下文统计的低置信度后处理过程。},
	file = {thesis.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\EMD8MJVG\\thesis.pdf:application/pdf}
}

@inproceedings{zhao_character-level_2009,
	address = {Athens, Greece},
	title = {Character-Level Dependencies in Chinese: Usefulness and Learning},
	url = {http://www.aclweb.org/anthology/E09-1100},
	booktitle = {Proceedings of the 12th Conference of the European Chapter of the {ACL} ({EACL} 2009)},
	publisher = {Association for Computational Linguistics},
	author = {Zhao, Hai},
	month = mar,
	year = {2009},
	pages = {879–887},
	annote = {用字的依存树做分词。
最后系统，词内是词法字依存关系，词之间是线性依存关系。
当然最终效果没有现有最优系统好。},
	file = {E09-1100.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\XXR2ZEU4\\E09-1100.pdf:application/pdf}
}

@inproceedings{zhao_simple_2009,
	title = {A Simple and Efficient Model Pruning Method for Conditional Random Fields},
	publisher = {Springer},
	author = {Zhao, H. and Kit, C.},
	year = {2009},
	pages = {145--155},
	annote = {{CRF训练后，按参数值去掉大部分特征，性能都不会下降，用事实证明CRF有太多冗余。}},
	file = {PruneCRFs-20090107-ICCPOL09-final.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\T2GM3AQD\\PruneCRFs-20090107-ICCPOL09-final.pdf:application/pdf}
}

@article{tsai_chinese_2009,
	title = {Chinese text segmentation: A hybrid approach using transductive learning and statistical association measures},
	journal = {Expert Systems with Applications},
	author = {Tsai, R. T. H.},
	year = {2009},
	annote = {{多种加入各种特征提高CRF性能的方法。}},
	file = {sdarticle-1.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\GG9CIJ26\\sdarticle-1.pdf:application/pdf}
}

@inproceedings{mochihashi_bayesian_2009,
	title = {Bayesian Unsupervised Word Segmentation with Nested Pitman-Yor Language Modeling},
	booktitle = {Proceedings of the Joint Conference of the 47th Annual Meeting of the {ACL} and the 4th International Joint Conference on Natural Language Processing of the {AFNLP}},
	author = {Mochihashi, Daichi and Yamada, Takeshi and Ueda, Naonori},
	year = {2009},
	keywords = {{ACL}, unsupervised},
	pages = {100–108},
	annote = {{用Pitman-Yor，建立了两层语言模型，一个是词的，一个是} 句子的。},
	file = {P09-1012.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\KNTWK2T7\\P09-1012.pdf:application/pdf}
}

@article{li_punctuation_2009,
	title = {Punctuation as Implicit Annotations for Chinese Word Segmentation},
	volume = {35},
	number = {4},
	journal = {Computational Linguistics},
	author = {Li, Zhongguo and Sun, Maosong},
	year = {2009},
	keywords = {{CL}, {ME}, semi-supervised},
	pages = {505--512},
	file = {coli.2009.35.4.35403.lowlink.pdf_v03.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\56DRVTTK\\coli.2009.35.4.35403.lowlink.pdf_v03.pdf:application/pdf}
}

@inproceedings{kruengkrai_error-driven_2009,
	address = {Suntec, Singapore},
	title = {An Error-Driven Word-Character Hybrid Model for Joint Chinese Word Segmentation and {POS} Tagging},
	url = {http://www.aclweb.org/anthology/P/P09/P09-1058},
	booktitle = {Proc. of {ACL-IJCNLP} 2009},
	publisher = {Association for Computational Linguistics},
	author = {Kruengkrai, Canasai and Uchimoto, Kiyotaka and Kazama, Jun'ichi and Wang, Yiou and Torisawa, Kentaro and Isahara, Hitoshi},
	year = {2009},
	keywords = {{ACL}},
	pages = {513–521},
	annote = {词典词与生词分别对待},
	file = {P09-1058.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\UV5HTZU4\\P09-1058.pdf:application/pdf}
}

@inproceedings{jiang_automatic_2009,
	address = {Suntec, Singapore},
	title = {Automatic Adaptation of Annotation Standards: Chinese Word Segmentation and {POS} Tagging – A Case Study},
	url = {http://www.aclweb.org/anthology/P/P09/P09-1059},
	booktitle = {Proceedings of the 47th {ACL}},
	publisher = {Association for Computational Linguistics},
	author = {Jiang, Wenbin and Huang, Liang and Liu, Qun},
	month = aug,
	year = {2009},
	keywords = {{ACL}, perceptron},
	pages = {522–530},
	annote = {Perceptron，分词与词性标注结合。将一种标注体系下的参数，转移到另一种标注体系中使用。},
	file = {P09-1059.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\CDPGIDHM\\P09-1059.pdf:application/pdf}
}

@inproceedings{zhao_unsupervised_2008,
	title = {Unsupervised segmentation helps supervised learning of character tagging for word segmentation and named entity recognition},
	booktitle = {The Sixth {SIGHAN} Workshop on Chinese Language Processing},
	author = {Zhao, Hai and Kit, Chunyu},
	year = {2008},
	pages = {106–111},
	annote = {将accessor variety ({AV)的结果离散化，然后分散到字，给为CRF的输入，可以提高分词效果。}},
	file = {I08-4017.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\M9VWFCWI\\I08-4017.pdf:application/pdf}
}

@inproceedings{zhao_empirical_2008,
	title = {An Empirical Comparison of Goodness Measures for Unsupervised Chinese Word Segmentation with a Unified Framework},
	booktitle = {The Third International Joint Conference on Natural Language Processing ({IJCNLP-2008)}, Hyderabad, India},
	author = {Zhao, Hai and Kit, Chunyu},
	year = {2008},
	annote = {{描述了四种用于无监督中文分词的判别量：Frequency} of Substring with {ReductionDescription} Length Gain ({DLG)Accessor} Variety ({AV)Boundary} Entropy (Branching Entropy, {BE)}},
	file = {I08-1002.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\4K9T24X9\\I08-1002.pdf:application/pdf}
}

@inproceedings{zhang_joint_2008,
	address = {Columbus, Ohio},
	title = {Joint Word Segmentation and {POS} Tagging Using a Single Perceptron},
	url = {http://www.aclweb.org/anthology/P/P08/P08-1101},
	booktitle = {Proceedings of {ACL-08:} {HLT}},
	publisher = {Association for Computational Linguistics},
	author = {Zhang, Yue and Clark, Stephen},
	month = jun,
	year = {2008},
	pages = {888–896},
	file = {url.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\TV26HWGD\\url.pdf:application/pdf}
}

@inproceedings{xu_bayesian_2008,
	title = {Bayesian semi-supervised chinese word segmentation for statistical machine translation},
	booktitle = {Proceedings of the 22nd International Conference on Computational Linguistics-Volume 1},
	publisher = {Association for Computational Linguistics},
	author = {Xu, J. and Gao, J. and Toutanova, K. and Ney, H.},
	year = {2008},
	keywords = {{COLING}},
	pages = {1017--1024},
	file = {C08-1128.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\IFEN978A\\C08-1128.pdf:application/pdf}
}

@inproceedings{qiao_statistical_2008,
	title = {Statistical Properties of Overlapping Ambiguities in Chinese Word Segmentation and a Strategy for Their Disambiguation},
	booktitle = {Text, Speech and Dialogue},
	publisher = {Springer},
	author = {Qiao, W. and Sun, M. and Menzel, W.},
	year = {2008},
	pages = {177--186},
	file = {21.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\A6M5D3VX\\21.pdf:application/pdf}
}

@inproceedings{liu_information_2008,
	title = {Information retrieval oriented word segmentation based on character associative strength ranking},
	booktitle = {Proceedings of the Conference on Empirical Methods in Natural Language Processing},
	publisher = {Association for Computational Linguistics},
	author = {Liu, Y. and Wang, B. and Ding, F. and Xu, S.},
	year = {2008},
	keywords = {{EMNLP}},
	pages = {1061--1069},
	annote = {{用了RankingSVM的方法分词，用于IR}},
	file = {D08-1111.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\E3BV5RVM\\D08-1111.pdf:application/pdf;D08-1111.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\PJVB3N7N\\D08-1111.pdf:application/pdf}
}

@inproceedings{jin_fourth_2008,
	title = {The Fourth International Chinese Language Processing Bakeoff: Chinese Word Segmentation, Named Entity Recognition and Chinese {POS} Tagging},
	booktitle = {Proceedings of the Sixth {SIGHAN} Workshop on Chinese Language Processing},
	author = {Jin, Guangjin and Chen, Xiao},
	year = {2008}
}

@inproceedings{jiang_word_2008,
	address = {Manchester, {UK}},
	title = {Word Lattice Reranking for Chinese Word Segmentation and Part-of-Speech Tagging},
	url = {http://www.aclweb.org/anthology/C08-1049},
	booktitle = {Proceedings of the 22nd International Conference on Computational Linguistics (Coling 2008)},
	publisher = {Coling 2008 Organizing Committee},
	author = {Jiang, Wenbin and Mi, Haitao and Liu, Qun},
	month = aug,
	year = {2008},
	pages = {385–392},
	annote = {使用reranking。有别于top-n的reranking，使用指数规模的word lattice reranking。至少看oracle，后者比前者就好。
解决的问题有：如何构造lattice，如何算oracle，有哪些特征，以及reranking的时候的cube剪枝。},
	file = {C08-1049.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\ER5ZXIG9\\C08-1049.pdf:application/pdf}
}

@inproceedings{jiang_cascaded_2008,
	address = {Columbus, Ohio},
	title = {A Cascaded Linear Model for Joint Chinese Word Segmentation and Part-of-Speech Tagging},
	url = {http://www.aclweb.org/anthology/P/P08/P08-1102},
	booktitle = {Proceedings of {ACL-08:} {HLT}},
	publisher = {Association for Computational Linguistics},
	author = {Jiang, Wenbin and Huang, Liang and Liu, Qun and Lü, Yajuan},
	month = jun,
	year = {2008},
	pages = {897–904},
	file = {P08-1102.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\D6D9TK2G\\P08-1102.pdf:application/pdf}
}

@article{__2007,
	title = {基于有效子串标注的中文分词},
	volume = {21},
	number = {005},
	journal = {中文信息学报},
	author = {赵, 海 and 揭, 春雨},
	year = {2007},
	pages = {8--13},
	file = {基于有效子串标注的中文分词.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\JTK394VB\\基于有效子串标注的中文分词.pdf:application/pdf}
}

@article{__2007-1,
	title = {中文分词十年回顾},
	volume = {21},
	number = {003},
	journal = {中文信息学报},
	author = {黄, 昌宁 and 赵, 海},
	year = {2007},
	pages = {8–19},
	annote = {中文词的认同度。从863、973到sig {han评测。语料库的质量控制（包括对“心理词”的规则制定）。基于语法的、基于规则的不如基于词的，又被基于字的取代。大规模真实文本中未登录词造成的分词精度失落比歧义切分造成的精度失落至少大5倍以上。基于字的，最大熵，SVM，CRF等。词位转移，2标注，4标注，微软的6标注。5字窗口足够了。}},
	file = {中文分词十年回顾.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\DBHJZ9BU\\中文分词十年回顾.pdf:application/pdf}
}

@inproceedings{zhang_chinese_2007,
	address = {Prague, Czech Republic},
	title = {Chinese Segmentation with a Word-Based Perceptron Algorithm},
	url = {http://www.aclweb.org/anthology/P/P07/P07-1106},
	publisher = {Association for Computational Linguistics},
	author = {Zhang, Yue and Clark, Stephen},
	month = jun,
	year = {2007},
	keywords = {{ACL}},
	pages = {840--847},
	annote = {采用average perceptron,然后用一种lazy update的方法。
采用了基于词的特征，所以解码使用柱搜索，而不能用贪心或者动态规划。},
	file = {P07-1106.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\SXMSM8UQ\\P07-1106.pdf:application/pdf;P07-1106.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\GQ6KETAD\\P07-1106.pdf:application/pdf}
}

@inproceedings{shi_dual-layer_2007,
	title = {A dual-layer {CRFs} based joint decoding method for cascaded segmentation and labeling tasks},
	volume = {7},
	booktitle = {Proceedings of {IJCAI}},
	author = {Shi, Y. and Wang, M.},
	year = {2007},
	keywords = {{IJCAI}},
	pages = {1707--1712},
	annote = {{双层CRF做分词与词性标注，中规中矩。}
第一层基于字信息分词；第二层基于词，以及字信息标注词性。
{两层CRF分开训练，联合测试。第一层找N-best，再综合第一层第二层的结果重新排序。}},
	file = {IJCAI07-276.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\KBRZKT5Z\\IJCAI07-276.pdf:application/pdf}
}

@inproceedings{nakagawa_hybrid_2007,
	title = {A hybrid approach to word segmentation and pos tagging},
	volume = {45},
	booktitle = {{ANNUAL} {MEETING-ASSOCIATION} {FOR} {COMPUTATIONAL} {LINGUISTICS}},
	author = {Nakagawa, Tetsuji and Uchimoto, Kiyotaka},
	year = {2007},
	pages = {2},
	annote = {{字与词结合的Lattice，然后分词与标注结合。仍然用马尔可夫模型}},
	file = {P07-2055.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\ZHCMGXP6\\P07-2055.pdf:application/pdf}
}

@inproceedings{huang_rethinking_2007,
	title = {Rethinking Chinese word segmentation: tokenization, character classification, or wordbreak identification},
	booktitle = {Proceedings of the 45th Annual Meeting of the {ACL} on Interactive Poster and Demonstration Sessions},
	publisher = {Association for Computational Linguistics Morristown, {NJ}, {USA}},
	author = {Huang, Chu-Ren and Simon, Petr and Hsieh, Shu-Kai and Prévot, L.},
	year = {2007},
	keywords = {{ACL}},
	pages = {69--72},
	annote = {不使用字标注，直接关心字间间隔（断开与不断开）。
使用滑动窗口的方法进行判断。},
	file = {HKPolyU070411.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\GK9TZS89\\HKPolyU070411.pdf:application/pdf;P07-2018.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\IFQWMT45\\P07-2018.pdf:application/pdf}
}

@article{__2006,
	title = {汉语词典的快速查询算法研究},
	journal = {中文信息学报},
	author = {李, 江波 and 周, 强 and 陈, 祖舜},
	year = {2006},
	annote = {{双数组Trie数是相当高效的词典查询算法，适合中文分词。简单说是逐字哈希，而哈希函数是平凡的f(x)=x，而且不会有冲突。所以很快。但维护双数组也很难。}}
}

@inproceedings{zhao_improved_2006,
	title = {An improved Chinese word segmentation system with conditional random field},
	booktitle = {Proceedings of the Fifth {SIGHAN} Workshop on Chinese Language Processing},
	publisher = {Sydney: July},
	author = {Zhao, H. and Huang, C. N. and Li, M.},
	year = {2006},
	pages = {162–165},
	annote = {6-tag settone featureassistant segmenters },
	file = {CSB-SIGHAN5_20071015-rev.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\C87THDC2\\CSB-SIGHAN5_20071015-rev.pdf:application/pdf}
}

@inproceedings{zhang_subword-based_2006,
	address = {Sydney, Australia},
	title = {Subword-Based Tagging for Confidence-Dependent Chinese Word Segmentation},
	url = {http://www.aclweb.org/anthology/P/P06/P06-2123},
	booktitle = {Proceedings of the {COLING/ACL} 2006 Main Conference Poster Sessions},
	publisher = {Association for Computational Linguistics},
	author = {Zhang, Ruiqiang and Kikui, Genichiro and Sumita, Eiichiro},
	month = jul,
	year = {2006},
	pages = {961–968},
	annote = {subword-based tagging, 比如北京市 标注为 北京/l 市/r
不过还是用的三标注系统
{使用CRF中的置信度，与基于词典的方法融合}
{CRF倾向于较高的OOV的F1，而较低的IV的F1}},
	file = {N06-2049.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\4KBGBZVR\\N06-2049.pdf:application/pdf}
}

@inproceedings{li_discriminative_2006,
	title = {Discriminative pruning of language models for Chinese word segmentation},
	booktitle = {Proceedings of the 21st International Conference on Computational Linguistics and the 44th annual meeting of the Association for Computational Linguistics},
	publisher = {Association for Computational Linguistics},
	author = {Li, J. and Wang, H. and Ren, D. and Li, G.},
	year = {2006},
	pages = {1008},
	file = {P06-1126.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\36S6QKRR\\P06-1126.pdf:application/pdf}
}

@inproceedings{levow_third_2006,
	address = {Sydney, Australia},
	title = {The Third International Chinese Language Processing Bakeoff: Word Segmentation and Named Entity Recognition},
	url = {http://www.aclweb.org/anthology/W/W06/W06-0115},
	booktitle = {Proceedings of the Fifth {SIGHAN} Workshop on Chinese Language Processing},
	publisher = {Association for Computational Linguistics},
	author = {Levow, Gina-Anne},
	month = jul,
	year = {2006},
	pages = {108–117}
}

@inproceedings{jin_unsupervised_2006,
	address = {Sydney, Australia},
	title = {Unsupervised Segmentation of Chinese Text by Use of Branching Entropy},
	url = {http://www.aclweb.org/anthology/P/P06/P06-2056},
	booktitle = {Proceedings of the {COLING/ACL} 2006 Main Conference Poster Sessions},
	publisher = {Association for Computational Linguistics},
	author = {Jin, Zhihui and Tanaka-Ishii, Kumiko},
	month = jul,
	year = {2006},
	pages = {428–435},
	annote = {如nature，随着字母的读入，nature后面跟的字母的不确定性比natur大得多，所以认为前者是一个可能的词边界。

论文中以此为基础，算出句子每个子序列的边界熵（前向后先两个方向）以此为判据，禁欲},
	file = {P06-2056.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\KIS3H56R\\P06-2056.pdf:application/pdf}
}

@inproceedings{goldwater_contextual_2006,
	title = {Contextual Dependencies in Unsupervised Word Segmentation},
	booktitle = {Proceedings of the 21st International Conference on Computational Linguistics and the 44th annual meeting of the Association for Computational Linguistics},
	publisher = {Association for Computational Linguistics},
	author = {Goldwater, Sharon and Griffiths, Thomas L. and Johnson, Mark},
	year = {2006},
	pages = {680},
	annote = {{基于D过程的语言模型与词法模型两个词两个词的Gibbs采样}},
	file = {P06-1085.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\W8ZFPVTH\\P06-1085.pdf:application/pdf}
}

@article{__2005,
	title = {现代汉语语料库建设及深加工},
	volume = {2},
	url = {http://www.corpus4u.org/forum/upload/forum/2005072322202796.pdf},
	urldate = {2012-08-17},
	journal = {语言文字应用},
	author = {靳光瑾 and 肖航 and 富丽 and 章云帆},
	year = {2005},
	pages = {111–120},
	annote = {国家语委的语料库介绍},
	file = {[PDF] from corpus4u.org:E\:\\Dropbox\\Others\\zotero\\storage\\H4ZDBP95\\靳光瑾 et al. - 2005 - 现代汉语语料库建设及深加工.pdf:application/pdf}
}

@inproceedings{tseng_conditional_2005,
	title = {A conditional random field word segmenter for sighan bakeoff 2005},
	booktitle = {Proceedings of the Fourth {SIGHAN} Workshop on Chinese Language Processing},
	publisher = {Jeju Island, Korea},
	author = {Tseng, H. and Chang, P. and Andrew, G. and Jurafsky, D. and Manning, C.},
	year = {2005},
	pages = {168--171},
	annote = {{SIGHAN} bakekoff 2005 中相当好的一个系统
{加了简单的词缀和叠字的feature在CRF里面}},
	file = {I05-3027.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\XE28MPSH\\I05-3027.pdf:application/pdf}
}

@inproceedings{li_perceptron_2005,
	title = {Perceptron Learning for Chinese Word Segmentation},
	booktitle = {Proceedings of Fourth {SIGHAN} Workshop on Chinese Language processing (Sighan-05)},
	author = {Li, Y. and Miao, C. and Bontcheva, K. and Cunningham, H.},
	year = {2005},
	pages = {154–157},
	file = {I05-3023.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\GG6F67ST\\I05-3023.pdf:application/pdf}
}

@article{gao_chinese_2005,
	title = {Chinese Word Segmentation and Named Entity Recognition: A Pragmatic Approach},
	volume = {31},
	shorttitle = {Chinese Word Segmentation and Named Entity Recognition},
	url = {http://dx.doi.org/10.1162/089120105775299177},
	doi = {10.1162/089120105775299177},
	abstract = {This article presents a pragmatic approach to Chinese word segmentation. It differs from most previous approaches mainly in three respects. First, while theoretical linguists have defined Chinese words using various linguistic criteria, Chinese words in this study are defined pragmatically as segmentation units whose definition depends on how they are used and processed in realistic computer applications. Second, we propose a pragmatic mathematical framework in which segmenting known words and detecting unknown words of different types (i.e., morphologically derived words, factoids, named entities, and other unlisted words) can be performed simultaneously in a unified way. These tasks are usually conducted separately in other systems. Finally, we do not assume the existence of a universal word segmentation standard that is application-independent. Instead, we argue for the necessity of multiple segmentation standards due to the pragmatic fact that different natural language processing applications might require different granularities of Chinese words.},
	number = {4},
	urldate = {2009-03-04},
	journal = {Computational Linguistics},
	author = {Gao, Jianfeng and Li, Mu and Huang, Chang-Ning and Wu, Andi},
	month = dec,
	year = {2005},
	keywords = {{CL}, perceptron},
	pages = {531--574},
	annote = {使用perceptron学习线性模型与基于字标注不同，解码前构造word lattice。相当于事先缩小了可能的字标注结果集合的大小。将词分为若干类，每一类会按概率计算一些概率值，作为perceptron的参数。perceptron的参数全是非binary的。只有词类的trigram的概率，不涉及任何具体字。},
	file = {089120105775299177.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\U342JTWV\\089120105775299177.pdf:application/pdf}
}

@inproceedings{emerson_second_2005,
	title = {The second international chinese word segmentation bakeoff},
	booktitle = {Proceedings of the Fourth {SIGHAN} Workshop on Chinese Language Processing},
	publisher = {Jeju Island, Korea},
	author = {Emerson, Thomas},
	year = {2005},
	pages = {123--133},
	file = {I05-3017.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\JUU4DEM5\\I05-3017.pdf:application/pdf}
}

@article{duan_statistic_2005,
	title = {A Statistic Study of Three-character Unknown Words in Chinese},
	volume = {15},
	number = {2},
	journal = {Journal of Chinese Language and Computing},
	author = {Duan, ZWXZH},
	year = {2005},
	pages = {113--123},
	file = {JCLC_V15_N2_5.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\NER3EJ4J\\JCLC_V15_N2_5.pdf:application/pdf}
}

@inproceedings{chen_unigram_2005,
	title = {Unigram language model for Chinese word segmentation},
	url = {http://acl.ldc.upenn.edu/I/I05/I05-3019.pdf},
	urldate = {2012-10-22},
	booktitle = {Proceedings of the 4th {SIGHAN} Workshop on Chinese Language Processing},
	author = {Chen, A. and Zhou, Y. and Zhang, A. and Sun, G.},
	year = {2005},
	pages = {138–141},
	file = {Full Text:E\:\\Dropbox\\Others\\zotero\\storage\\I7GTIX2H\\Chen et al. - 2005 - Unigram language model for Chinese word segmentati.pdf:application/pdf}
}

@inproceedings{asahara_combination_2005,
	title = {Combination of machine learning methods for optimum chinese word segmentation},
	url = {http://acl.ldc.upenn.edu/I/I05/I05-3018.pdf},
	urldate = {2012-10-22},
	booktitle = {Proc. Fourth {SIGHAN} Workshop on Chinese Language Processing},
	author = {Asahara, M. and Fukuoka, K. and Azuma, A. and Goh, C. L. and Watanabe, Y. and Matsumoto, Y. and Tsuzuki, T.},
	year = {2005},
	pages = {134–137},
	file = {Full Text:E\:\\Dropbox\\Others\\zotero\\storage\\MQFQ2Z3A\\Asahara et al. - 2005 - Combination of machine learning methods for optimu.pdf:application/pdf}
}

@article{__2004,
	title = {基于无指导学习策略的无词表条件下的汉语自动分词},
	volume = {27},
	number = {006},
	journal = {计算机学报},
	author = {孙, 茂松 and 肖, 明 and 邹, 嘉彦},
	year = {2004},
	pages = {736--742},
	annote = {使用互信息与t测试差当作两个判据以字为单位进行无监督分词。以字算的标注准确度可到85\%左右。},
	file = {基于词频统计的中文分词的研究.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\TBAU7EVF\\基于词频统计的中文分词的研究.pdf:application/pdf;基于无指导学习策略的无词表条件下的汉语自动分词.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\B6K4UGEW\\基于无指导学习策略的无词表条件下的汉语自动分词.pdf:application/pdf}
}

@inproceedings{peng_chinese_2004,
	address = {Geneva, Switzerland},
	title = {Chinese Segmentation and New Word Detection using Conditional Random Fields},
	booktitle = {Proceedings of Coling 2004},
	publisher = {{COLING}},
	author = {Peng, Fuchun and Feng, Fangfang and McCallum, Andrew},
	month = aug,
	year = {2004},
	pages = {562–568},
	annote = {{将CRF引入中文分词}},
	file = {C04-1081.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\Z95MIN2M\\C04-1081.pdf:application/pdf}
}

@inproceedings{ng_chinese_2004,
	address = {Barcelona, Spain},
	title = {Chinese Part-of-Speech Tagging: One-at-a-Time or All-at-Once? Word-Based or Character-Based?},
	booktitle = {Proceedings of {EMNLP} 2004},
	publisher = {Association for Computational Linguistics},
	author = {Ng, Hwee Tou and Low, Jin Kiat},
	editor = {Lin, Dekang and Wu, Dekai},
	month = jul,
	year = {2004},
	pages = {277–284},
	annote = {用最大熵模型试了三种方法，分开做分词与标注或者同时做，词性标注用基于字的特征或者用基于词的特征：
同时的基于字的最好，但是时间慢很多。
分开基于字的稍差，但快很多。
分开基于词的，分词性能当然与基于字的一样，但词性标注差很多，总时间快一点。词性标注差是因为词之中的字对确定词性很重要。
没有同时而且基于词的，估计是因为机器跑不动。也没有实验在分词阶段用基于词的特征。},
	file = {Ng.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\KDHXD98B\\Ng.pdf:application/pdf}
}

@inproceedings{kudo_applying_2004,
	title = {Applying conditional random fields to Japanese morphological analysis},
	volume = {2004},
	booktitle = {Proc. of {EMNLP}},
	author = {Kudo, T. and Yamamoto, K. and Matsumoto, Y.},
	year = {2004},
	annote = {{用改造过的CRF模型做日文分词。以词为单位，即y长度与x不一定相等。}},
	file = {W04-3230.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\IEFZXRIT\\W04-3230.pdf:application/pdf}
}

@inproceedings{gao_adaptive_2004,
	title = {Adaptive Chinese word segmentation},
	booktitle = {Proceedings of {ACL-2004}},
	author = {Gao, J. and Wu, A. and Li, M. and Huang, C. N. and Li, H. and Xia, X. and Qin, H.},
	year = {2004},
	file = {133_pdf_2-col.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\AC3KXQNK\\133_pdf_2-col.pdf:application/pdf}
}

@article{feng_unsupervised_2004,
	title = {Unsupervised segmentation of Chinese corpus using accessor variety},
	journal = {Natural Language Processing {IJCNLP} 2004},
	author = {Feng, Haodi and Chen, Kang and Kit, Chunyu and Deng, Xiaotie},
	year = {2004},
	pages = {694--703},
	annote = {{如何用Accessor} variety 构造一个分词器。如何设计目标函数。},
	file = {download.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\RTTCFJH3\\download.pdf:application/pdf}
}

@article{feng_accessor_2004,
	title = {Accessor variety criteria for Chinese word extraction},
	volume = {30},
	number = {1},
	journal = {Computational Linguistics},
	author = {Feng, Haodi and Chen, Kang and Deng, Xiaotie and Zheng, Weimin},
	year = {2004},
	keywords = {{CL}},
	pages = {75--93},
	file = {089120104773633394.lowlink.pdf_v03.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\NZPE2TZZ\\089120104773633394.lowlink.pdf_v03.pdf:application/pdf}
}

@inproceedings{zhang_hhmm-based_2003,
	title = {{HHMM-based} Chinese lexical analyzer {ICTCLAS}},
	booktitle = {Proceedings of the second {SIGHAN} workshop on Chinese language processing-Volume 17},
	publisher = {Association for Computational Linguistics},
	author = {Zhang, H. P. and Yu, H. K. and Xiong, D. Y. and Liu, Q.},
	year = {2003},
	pages = {187},
	annote = {{实用化的分词工具包ICTCLAS的介绍性论文。}},
	file = {W03-1730.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\3NUB4A8H\\W03-1730.pdf:application/pdf}
}

@inproceedings{zhang_chinese_2003,
	title = {Chinese lexical analysis using hierarchical hidden markov model},
	booktitle = {Proceedings of the second {SIGHAN} workshop on Chinese language processing-Volume 17},
	publisher = {Association for Computational Linguistics},
	author = {Zhang, H. P. and Liu, Q. and Cheng, X. Q. and Zhang, H. and Yu, H. K.},
	year = {2003},
	pages = {70},
	file = {ft_gateway.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\H2MV6RQP\\ft_gateway.pdf:application/pdf}
}

@article{xue_chinese_2003,
	title = {Chinese Word Segmentation as Character Tagging},
	volume = {8},
	number = {1},
	journal = {Computational Linguistics and Chinese Language Processing},
	author = {Xue, Nianwen},
	year = {2003},
	pages = {29--48},
	file = {v8n1a2.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\FVNFAIQN\\v8n1a2.pdf:application/pdf}
}

@inproceedings{xue_chinese_2003-1,
	title = {Chinese Word Segmentation as {LMR} Tagging},
	booktitle = {Proceedings of the second {SIGHAN} workshop on Chinese language processing-Volume 17},
	publisher = {Association for Computational Linguistics},
	author = {Xue, Nianwen and Shen, Libin},
	year = {2003},
	pages = {176--179},
	file = {Xue 和 Shen - 2003 - Chinese Word Segmentation as LMR Tagging.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\FD6TT8VP\\Xue 和 Shen - 2003 - Chinese Word Segmentation as LMR Tagging.pdf:application/pdf}
}

@inproceedings{sproat_first_2003,
	title = {The first international Chinese word segmentation bakeoff},
	booktitle = {Proceedings of the second {SIGHAN} workshop on Chinese language processing},
	publisher = {Sapporo, Japan: July},
	author = {Sproat, R. and Emerson, T.},
	year = {2003},
	pages = {133–143},
	file = {Sproat.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\ISTB7KV5\\Sproat.pdf:application/pdf}
}

@inproceedings{luo_maximum_2003,
	title = {A maximum entropy Chinese character-based parser},
	publisher = {Association for Computational Linguistics Morristown, {NJ}, {USA}},
	author = {Luo, X.},
	year = {2003},
	pages = {192--199},
	file = {Luo.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\P8U9QSD7\\Luo.pdf:application/pdf;W03-1025.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\3WXTGNCN\\W03-1025.pdf:application/pdf}
}

@inproceedings{gao_improved_2003,
	title = {Improved source-channel models for Chinese word segmentation},
	booktitle = {Proceedings of the 41st Annual Meeting on Association for Computational Linguistics},
	author = {Gao, J. and Li, M. and Huang, C. N.},
	year = {2003},
	pages = {272--279}
}

@inproceedings{chen_chinese_2003,
	title = {Chinese word segmentation using minimal linguistic knowledge},
	booktitle = {Proceedings of the second {SIGHAN} workshop on Chinese language processing},
	publisher = {Sapporo, Japan: July},
	author = {Chen, A.},
	year = {2003},
	pages = {148--151},
	file = {Chen.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\KHQIWTH9\\Chen.pdf:application/pdf}
}

@inproceedings{asahara_combining_2003,
	title = {Combining segmenter and chunker for Chinese word segmentation},
	booktitle = {Proceedings of the 2nd {SIGHAN} Workshop on Chinese Language Processing},
	author = {Asahara, M. and Goh, C. L. and Wang, X. and Matsumoto, Y.},
	year = {2003},
	pages = {144–147},
	file = {Asahara.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\ZG5254F8\\Asahara.pdf:application/pdf}
}

@article{__2002,
	title = {北京大学现代汉语语料库基本加工规范},
	volume = {16},
	url = {http://www.icl.pku.cn/icl_tr/papers_2000-2003/2002/0409_22_%B1%B1%BE%A9%B4%F3%D1%A7%CF%D6%B4%FA%BA%BA%D3%EF%D3%EF%C1%CF%BF%E2%BB%F9%B1%BE%BC%D3%B9%A4%B9%E6%B7%B6.pdf},
	number = {5},
	urldate = {2012-08-17},
	journal = {中文信息学报},
	author = {俞士汶 and 段慧明 and 朱学锋 and 孙斌},
	year = {2002},
	pages = {49–64},
	file = {[PDF] from pku.cn:E\:\\Dropbox\\Others\\zotero\\storage\\IHK3PTF3\\俞士汶 et al. - 2002 - 北京大学现代汉语语料库基本加工规范.pdf:application/pdf}
}

@inproceedings{xue_combining_2002,
	title = {Combining classifiers for Chinese word segmentation},
	booktitle = {Proceedings of the 1st {SIGHAN} Workshop on Chinese Language Processing},
	author = {Xue, Nianwen and Converse, Susan},
	year = {2002},
	pages = {63–70},
	annote = {里程碑，第一次提出字标注的分词模型},
	file = {w09-15.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\24VZMJEP\\w09-15.pdf:application/pdf}
}

@article{sproat_corpus-based_2002,
	title = {Corpus-based methods in Chinese morphology and phonology},
	journal = {{COOLING} 2002},
	author = {Sproat, R. and Shih, C.},
	year = {2002},
	file = {lecture2.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\EQNP7NU2\\lecture2.pdf:application/pdf}
}

@article{sproat_corpus-based_2002-1,
	title = {Corpus-based methods in Chinese morphology},
	journal = {Tutorial at the 19th {COLING}},
	author = {Sproat, R. and Shih, C.},
	year = {2002},
	file = {10.1.1.80.5661.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\X5W8T6MZ\\10.1.1.80.5661.pdf:application/pdf}
}

@article{__2001,
	title = {汉语自动分词研究评述},
	volume = {3},
	number = {001},
	journal = {当代语言学},
	author = {孙, 茂松 and 邹, 嘉彦},
	year = {2001},
	pages = {22--32},
	annote = {{对上世纪中文分词研究的一个较好的回顾及评论。歧义，交集歧义与覆盖歧义；OOV。}},
	file = {汉语自动分词研究评述.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\6MH6EPF5\\汉语自动分词研究评述.pdf:application/pdf}
}

@phdthesis{xue_defining_2001,
	title = {Defining and automatically identifying words in Chinese},
	school = {University of Delaware},
	author = {Xue, Nianwen},
	year = {2001},
	file = {xue_diss.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\2NGGTTCP\\xue_diss.pdf:application/pdf}
}

@article{peng_self-supervised_2001,
	title = {Self-supervised Chinese word segmentation},
	journal = {Advances in Intelligent Data Analysis},
	author = {Peng, F. and Schuurmans, D.},
	year = {2001},
	pages = {238--247},
	annote = {{纯无监督分词，EM算法}
self-supervised，分两个词典。
{MI词典剪枝}},
	file = {paper_peng01selfsupervised.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\9E7WVNAA\\paper_peng01selfsupervised.pdf:application/pdf}
}

@book{__2000,
	title = {中文文本自动分词和标注},
	publisher = {商务印书馆},
	author = {刘开瑛},
	year = {2000}
}

@article{teahan_compression-based_2000,
	title = {A compression-based algorithm for Chinese word segmentation},
	volume = {26},
	url = {http://portal.acm.org/citation.cfm?id=971873&dl=},
	abstract = {Chinese is written without using spaces or other word delimiters. Although a text may be thought of as a corresponding sequence of words, there is considerable ambiguity in the placement of boundaries. Interpreting a text as a sequence of words is beneficial for some information retrieval and storage tasks:for example, fulltext search, word-based compression, and keyphrase extraction. We describe a scheme that infers appropriate positions for word boundaries using an adaptive language model that is standard in text compression. It is trained on a corpus of presegmented text, and when applied to new text, interpolates word boundaries so as to maximize the compression obtained. This simple and general method performs well with respect to specialized schemes for Chinese language segmentation.},
	number = {3},
	urldate = {2008-03-27},
	journal = {Comput. Linguist.},
	author = {Teahan, W. J. and McNab, Rodger and Wen, Yingying and Witten, Ian H.},
	year = {2000},
	pages = {375--393},
	file = {ACM Snapshot:E\:\\Dropbox\\Others\\zotero\\storage\\X8SFM47K\\blank.html:text/html;p375-teahan.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\MUGDTQNG\\p375-teahan.pdf:application/pdf}
}

@article{__1999,
	title = {谈谈汉语分词语料库的一致性问题},
	volume = {2},
	url = {http://www.fed.cuhk.edu.hk/en/lang/b2g_lang.phtml?URL=/en%2Flang%2F199900000002%2F0088c.htm},
	urldate = {2012-08-17},
	journal = {语言文字应用},
	author = {孙茂松},
	year = {1999},
	pages = {88–91},
	annote = {清华大学分词语料库标注},
	file = {Snapshot:E\:\\Dropbox\\Others\\zotero\\storage\\K33P8D7F\\b2g_lang.html:text/html}
}

@inproceedings{ge_discovering_1999,
	title = {Discovering Chinese words from unsegmented text (poster abstract)},
	booktitle = {Proceedings of the 22nd annual international {ACM} {SIGIR} conference on Research and development in information retrieval},
	publisher = {{ACM}},
	author = {Ge, X. and Pratt, W. and Smyth, P.},
	year = {1999},
	pages = {272},
	annote = {{纯无监督分词，EM算法，0阶隐马尔可夫链}},
	file = {p271-ge.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\ATCE6H5Z\\p271-ge.pdf:application/pdf}
}

@article{__1998,
	title = {串频统计和词形匹配相结合的汉语自动分词系统},
	volume = {12},
	number = {001},
	journal = {中文信息学报},
	author = {刘, 挺 and 吴, 岩},
	year = {1998},
	pages = {17--25},
	file = {串频统计和词形匹配相结合的汉语自动分词系统.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\M35BBPQ8\\串频统计和词形匹配相结合的汉语自动分词系统.pdf:application/pdf}
}

@inproceedings{sun_chinese_1998,
	title = {Chinese Word Segmentation without Using Lexicon and Hand-crafted Training Data},
	booktitle = {Proceedings of the 17th international conference on Computational linguistics-Volume 2},
	publisher = {Association for Computational Linguistics Morristown, {NJ}, {USA}},
	author = {Sun, Maosong and Shen, Dayang and Tsou, Benjamin  K},
	year = {1998},
	pages = {1265--1271},
	file = {C98-2201.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\CPXFA4BX\\C98-2201.pdf:application/pdf}
}

@article{kazakov_hybrid_1998,
	title = {A hybrid approach to word segmentation},
	journal = {Lecture notes in computer science},
	author = {Kazakov, D. and Manandhar, S.},
	year = {1998},
	pages = {125--134}
}

@inproceedings{__1997,
	title = {大规模语料库中词语接续对的统计与分析},
	url = {http://scholar.ilib.cn/A-%E4%BC%9A%E8%AE%AE%E8%AE%B0%E5%BD%95ID~42520.html},
	urldate = {2012-08-17},
	booktitle = {第四届计算语言学会议论文集 (语言工程)},
	author = {邱超捷 and 宋柔 and others},
	year = {1997},
	annote = {北语的语料库},
	file = {Snapshot:E\:\\Dropbox\\Others\\zotero\\storage\\W86E2XUC\\A-会议记录ID~42520.html:text/html}
}

@article{__1997-1,
	title = {中文信息处理中的分词问题},
	volume = {1},
	journal = {Applied Linguistics},
	author = {黄, 昌宁},
	year = {1997},
	pages = {72--78},
	file = {中文信息处理中的分词问题.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\HJNXENTN\\中文信息处理中的分词问题.pdf:application/pdf}
}

@article{chang_unsupervised_1997,
	title = {An unsupervised iterative method for Chinese new lexicon extraction},
	volume = {1},
	number = {1},
	journal = {International Journal of Computational Linguistics \& Chinese Language Processing},
	author = {Chang, J. S and Su, K. Y},
	year = {1997},
	pages = {101–157}
}

@article{sproat_stochastic_1996,
	title = {A stochastic finite-state word-segmentation algorithm for Chinese},
	volume = {22},
	number = {3},
	journal = {Computational Linguistics},
	author = {Sproat, R. and Gale, W. and Shih, C. and Chang, N.},
	year = {1996},
	pages = {377--404},
	file = {J96-3004.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\U6J3H6DN\\J96-3004.pdf:application/pdf}
}

@inproceedings{ponte_useg:_1996,
	title = {Useg: A retargetable word segmentation procedure for information retrieval},
	volume = {96},
	booktitle = {Symposium on Document Analysis and Information Retrieval},
	publisher = {Citeseer},
	author = {Ponte, J. M. and Croft, W. B.},
	year = {1996},
	file = {10.1.1.53.2710.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\BUB3PKTG\\10.1.1.53.2710.pdf:application/pdf}
}

@article{chen_sinica_1996,
	title = {Sinica corpus: Design methodology for balanced corpora},
	volume = {167},
	shorttitle = {Sinica corpus},
	url = {http://www.aclweb.org/anthology/Y/Y96/Y96-1018.pdf},
	urldate = {2012-08-21},
	journal = {Language},
	author = {Chen, K. J. and Huang, C. R. and Chang, L. P. and Hsu, H. L.},
	year = {1996},
	pages = {176},
	annote = {台湾中研院语料库
 },
	file = {[PDF] from aclweb.org:E\:\\Dropbox\\Others\\zotero\\storage\\K9A9TKSX\\Chen et al. - 1996 - Sinica corpus Design methodology for balanced cor.pdf:application/pdf}
}

@inproceedings{chen_word_1992,
	title = {Word identification for Mandarin Chinese sentences},
	booktitle = {Proceedings of the 14th conference on Computational linguistics-Volume 1},
	author = {Chen, K. J and Liu, S. H},
	year = {1992},
	pages = {101–107},
	file = {ft_gateway.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\GVETIT32\\ft_gateway.pdf:application/pdf}
}

@article{aoe_efficient_1992,
	title = {An efficient implementation of trie structures},
	volume = {22},
	issn = {1097-{024X}},
	url = {http://onlinelibrary.wiley.com/doi/10.1002/spe.4380220902/abstract},
	doi = {10.1002/spe.4380220902},
	abstract = {A new internal array structure, called a double-array, implementing a trie structure is presented. The double-array combines the fast access of a matrix form with the compactness of a list form. The algorithms for retrieval, insertion and deletion are introduced through examples. Although insertion is rather slow, it is still practical, and both the deletion and the retrieval time can be improved from the list form. From the comparison with the list for various large sets of keys, it is shown that the size of the double-array can be about 17 per cent smaller than that of the list, and that the retrieval speed of the double-array can be from 3–1 to 5–1 times faster than that of the list.},
	language = {en},
	number = {9},
	urldate = {2011-05-05},
	journal = {Software: Practice and Experience},
	author = {Aoe, Jun‐Ichi and Morimoto, Katsushi and Sato, Takashi},
	month = sep,
	year = {1992},
	keywords = {Dictionary, Information retrieval, Key retrieval strategies, Natural language processing},
	pages = {695--721},
	annote = {双数组trie书}
}

@article{sproat_statistical_1990,
	title = {A statistical method for finding word boundaries in Chinese text},
	volume = {4},
	number = {4},
	journal = {Computer Processing of Chinese and Oriental Languages},
	author = {Sproat, R. and Shih, C.},
	year = {1990},
	pages = {336--351},
	file = {cpcol.pdf:E\:\\Dropbox\\Others\\zotero\\storage\\DE3UPG4Q\\cpcol.pdf:application/pdf}
}