From 0645a81bcc7b071974c3c671ab00d9f3be221501 Mon Sep 17 00:00:00 2001 From: xu-song Date: Fri, 15 Jan 2021 18:11:49 +0800 Subject: [PATCH] refactor finalseg --- jieba/__init__.py | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/jieba/__init__.py b/jieba/__init__.py index 90f0bcd5..6c0308dc 100644 --- a/jieba/__init__.py +++ b/jieba/__init__.py @@ -247,6 +247,18 @@ def __cut_DAG_NO_HMM(self, sentence): buf = '' def __cut_DAG(self, sentence): + + def _cut_finalseg(buf): + if len(buf) == 1: + yield buf + elif not self.FREQ.get(buf): + recognized = finalseg.cut(buf) + for t in recognized: + yield t + else: + for elem in buf: + yield elem + DAG = self.get_DAG(sentence) route = {} self.calc(sentence, DAG, route) @@ -260,31 +272,15 @@ def __cut_DAG(self, sentence): buf += l_word else: if buf: - if len(buf) == 1: - yield buf - buf = '' - else: - if not self.FREQ.get(buf): - recognized = finalseg.cut(buf) - for t in recognized: - yield t - else: - for elem in buf: - yield elem - buf = '' + for buf_cut in _cut_finalseg(buf): + yield buf_cut + buf = '' yield l_word x = y if buf: - if len(buf) == 1: - yield buf - elif not self.FREQ.get(buf): - recognized = finalseg.cut(buf) - for t in recognized: - yield t - else: - for elem in buf: - yield elem + for buf_cut in _cut_finalseg(buf): + yield buf_cut def cut(self, sentence, cut_all=False, HMM=True, use_paddle=False): """