From 6c573c2d477cf3d45ed8629402c429a2352bca6b Mon Sep 17 00:00:00 2001 From: syimyuzya Date: Thu, 26 Dec 2024 21:32:10 +0800 Subject: [PATCH] =?UTF-8?q?Check=20=E5=8F=8D=E5=88=87=20&=20=E9=87=8B?= =?UTF-8?q?=E7=BE=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- check.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/check.py b/check.py index facdb91..e678842 100644 --- a/check.py +++ b/check.py @@ -9,6 +9,12 @@ PATTERN_描述 = re.compile( f'([{所有母}])([開合])?([{所有等}])([ABC])?([{所有韻}])([{所有聲}])' ) +PATTERN_反切 = re.compile( + r"""(?x)( + \[.\] | # 脫字 + . ( <.> | ⦉.⦊ | \(.\) | ⦅.⦆ )* # 原貌及校正 + ){2}""" +) def contains_ascii(s: str): @@ -35,12 +41,16 @@ def contains_ascii(s: str): 釋義, 釋義補充, ) = line.rstrip('\n').split(',') + assert ( PATTERN_描述.fullmatch(音韻地位描述) is not None ), f'invalid 音韻地位: {音韻地位描述}' - # TODO 反切 + + if 反切: + assert PATTERN_反切.fullmatch(反切) is not None, f'invalid 反切: {反切}' assert len(字頭) == 1, 'The length of 字頭 should be 1' + + assert 釋義 + 釋義補充, '釋義 and 釋義補充 should not be both empty' assert not contains_ascii( 釋義 ), '釋義 should not contain any ASCII characters' - # TODO 釋義 should not be empty