From ad7f6b8c188bfda400458466bd427a4e31c5b269 Mon Sep 17 00:00:00 2001 From: Will Harris Date: Tue, 13 Sep 2022 11:42:28 +0100 Subject: [PATCH 1/6] Add Cython to PyPI classifiers --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 86d777a..8552239 100644 --- a/setup.py +++ b/setup.py @@ -39,6 +39,7 @@ "GNU Library or Lesser General Public License (LGPL)", "Operating System :: POSIX", "Programming Language :: C", + "Programming Language :: Cython", "Programming Language :: Python", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: Indexing", From db17e872c93f9e5e3889ead947f1638a72ff9172 Mon Sep 17 00:00:00 2001 From: Will Harris Date: Tue, 13 Sep 2022 11:48:55 +0100 Subject: [PATCH 2/6] Update homepage This package move to Github when Google Code shut down circa 2016. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8552239..7548faa 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ install_requires=["setuptools"], author="Will Harris", author_email="esmre@greatlibrary.net", - url="http://code.google.com/p/esmre/", + url="https://github.com/wharris/esmre", license="GNU LGPL", platforms=["POSIX"], ext_modules=cythonize([module1]), From ea4531319c1f8cf386cc33b17e4a1a64e09a5c2d Mon Sep 17 00:00:00 2001 From: Will Harris Date: Tue, 13 Sep 2022 11:58:25 +0100 Subject: [PATCH 3/6] Bump version to 1.0 esmre has been used in production for about 15 years. I'm now confident it can now leave beta testing. --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 7548faa..e5b0797 100644 --- a/setup.py +++ b/setup.py @@ -28,12 +28,12 @@ setup( name="esmre", - version="0.5.2", + version="1.0", description="Regular expression accelerator", long_description=long_description, long_description_content_type="text/markdown", classifiers=[ - "Development Status :: 4 - Beta", + "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: " "GNU Library or Lesser General Public License (LGPL)", From 2a53e13349784183df03b0014c4079130cb5497f Mon Sep 17 00:00:00 2001 From: Will Harris Date: Tue, 13 Sep 2022 12:35:54 +0100 Subject: [PATCH 4/6] Apply consitent code style with black (v22.8.0) I used `black --line-length 79 .` --- src/esmre.py | 108 ++++++++++++------------ test/test_esm.py | 59 ++++++------- test/test_esmre.py | 201 ++++++++++++++++++++++----------------------- 3 files changed, 185 insertions(+), 183 deletions(-) diff --git a/src/esmre.py b/src/esmre.py index 304fba9..9e78158 100644 --- a/src/esmre.py +++ b/src/esmre.py @@ -3,17 +3,17 @@ # esmre.py - clue-indexed regular expressions module # Copyright (C) 2007-2008 Tideway Systems Limited. -# +# # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. -# +# # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. -# +# # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 @@ -22,10 +22,11 @@ import esm import threading + class InBackslashState(object): def __init__(self, parent_state): self.parent_state = parent_state - + def process_byte(self, ch): return self.parent_state @@ -33,14 +34,14 @@ def process_byte(self, ch): class InClassState(object): def __init__(self, parent_state): self.parent_state = parent_state - + def process_byte(self, ch): if ch == "]": return self.parent_state - + elif ch == "\\": return InBackslashState(self) - + else: return self @@ -48,11 +49,11 @@ def process_byte(self, ch): class InBracesState(object): def __init__(self, parent_state): self.parent_state = parent_state - + def process_byte(self, ch): if ch == "}": return self.parent_state - + else: return self @@ -64,56 +65,56 @@ def __init__(self): def process_byte(self, ch): self.update_hints(ch) return self.next_state(ch) - + def bank_current_hint_with_last_byte(self): self.hints.append("") - + def bank_current_hint_and_forget_last_byte(self): if isinstance(self.hints[-1], list): del self.hints[-1] else: self.hints[-1] = self.hints[-1][:-1] - + self.hints.append("") - + def forget_all_hints(self): self.hints = [""] - + def append_to_current_hint(self, ch): self.hints[-1] += ch - + def update_hints(self, ch): if ch in "?*{": self.bank_current_hint_and_forget_last_byte() - + elif ch in "+.^$([\\": self.bank_current_hint_with_last_byte() - + elif ch == "|": self.forget_all_hints() - + else: self.append_to_current_hint(ch) - + def next_state(self, ch): if ch == "(": return StartOfGroupState(self) - + elif ch == "[": return InClassState(self) - + elif ch == "{": return InBracesState(self) - + elif ch == "\\": return InBackslashState(self) - + elif ch == "|": return self.alternation_state() - + else: return self - + def alternation_state(self): raise NotImplementedError @@ -126,7 +127,7 @@ def alternation_state(self): class StartOfGroupState(object): def __init__(self, parent_state): self.parent_state = parent_state - + def process_byte(self, ch): if ch == "?": return StartOfExtensionGroupState(self.parent_state) @@ -139,23 +140,23 @@ def __init__(self, parent_state): CollectingState.__init__(self) self.parent_state = parent_state self.had_alternation = False - + def update_hints(self, ch): if ch == ")": if not self.had_alternation: self.parent_state.hints.append(self.hints) else: CollectingState.update_hints(self, ch) - + def next_state(self, ch): if ch == ")": return self.close_group_state() else: return CollectingState.next_state(self, ch) - + def close_group_state(self): return self.parent_state - + def alternation_state(self): self.had_alternation = True return self @@ -164,7 +165,7 @@ def alternation_state(self): class StartOfExtensionGroupState(object): def __init__(self, parent_state): self.parent_state = parent_state - + def process_byte(self, ch): if ch == "P": return MaybeStartOfNamedGroupState(self.parent_state) @@ -175,7 +176,7 @@ def process_byte(self, ch): class MaybeStartOfNamedGroupState(object): def __init__(self, parent_state): self.parent_state = parent_state - + def process_byte(self, ch): if ch == "<": return InNamedGroupNameState(self.parent_state) @@ -186,7 +187,7 @@ def process_byte(self, ch): class InNamedGroupNameState(object): def __init__(self, parent_state): self.parent_state = parent_state - + def process_byte(self, ch): if ch == ">": return InGroupState(self.parent_state) @@ -201,14 +202,14 @@ def update_hints(self, ch): def hints(regex): state = RootState() - + try: for ch in regex: state = state.process_byte(ch) - + except StopIteration: pass - + def flattened(l): for item in l: if isinstance(item, list): @@ -216,20 +217,20 @@ def flattened(l): yield i else: yield item - + return [hint for hint in flattened(state.hints) if hint] def shortlist(hints): if not hints: return [] - + best = "" - + for hint in hints: if len(hint) > len(best): best = hint - + return [best] @@ -239,37 +240,36 @@ def __init__(self): self.hintless_objects = list() self.fixed = False self.lock = threading.Lock() - - + def enter(self, regex, obj): self.lock.acquire() try: - + if self.fixed: raise TypeError("enter() cannot be called after query()") - + keywords = shortlist(hints(regex)) - + if not keywords: self.hintless_objects.append(obj) - + for hint in shortlist(hints(regex)): self.esm.enter(hint.lower(), obj) - + finally: self.lock.release() - - + def query(self, string): self.lock.acquire() try: - + if not self.fixed: self.esm.fix() self.fixed = True - + finally: self.lock.release() - - return self.hintless_objects + \ - [obj for (_, obj) in self.esm.query(string.lower())] + + return self.hintless_objects + [ + obj for (_, obj) in self.esm.query(string.lower()) + ] diff --git a/test/test_esm.py b/test/test_esm.py index 4d528f3..695fe80 100644 --- a/test/test_esm.py +++ b/test/test_esm.py @@ -3,17 +3,17 @@ # esm_tests.py - tests for esm extension module # Copyright (C) 2007 Tideway Systems Limited. -# +# # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. -# +# # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. -# +# # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 @@ -23,22 +23,23 @@ import esm import esmre + class IndexTests(unittest.TestCase): def testHasConstructor(self): self.assertTrue(esm.Index()) def testIndexHasEnterMethod(self): esm.Index().enter("keyword") - + def testIndexMustHaveStringForFirstArgument(self): self.assertRaises(TypeError, esm.Index().enter, 0) self.assertRaises(TypeError, esm.Index().enter, []) self.assertRaises(TypeError, esm.Index().enter) - + def testIndexFix(self): index = esm.Index() index.fix() - + def testQuery(self): index = esm.Index() index.enter("he") @@ -46,65 +47,67 @@ def testQuery(self): index.enter("his") index.enter("hers") index.fix() - - self.assertEqual([((1, 4), "his"), - ((5, 7), "he"), - ((13, 16), "his")], - index.query("this here is history")) -# 0123456789.123456789 + + self.assertEqual( + [((1, 4), "his"), ((5, 7), "he"), ((13, 16), "his")], + index.query("this here is history"), + ) + + # 0123456789.123456789 def testEnterObject(self): index = esm.Index() - + mint_object = dict() index.enter("mint", mint_object) - + pepper_object = dict() index.enter("pepper", pepper_object) - + index.fix() results = index.query("mint sauce") - + self.assertEqual(1, len(results)) self.assertTrue(isinstance(results[0], tuple)) - + slice_indices, associated_object = results[0] - - self.assertEqual((0,4), slice_indices) + + self.assertEqual((0, 4), slice_indices) self.assertTrue(associated_object is mint_object) - + def testCantFixIndexWhenAlreadyFixed(self): index = esm.Index() index.fix() - + self.assertRaises(TypeError, index.fix) def testCantEnterWhenAlreadyFixed(self): index = esm.Index() index.fix() - + self.assertRaises(TypeError, index.enter, "foo") def testQueryUntilFixed(self): index = esm.Index() self.assertRaises(TypeError, index.query, "foo") - + def testObjectsForCommonEndingsAreDecrefedCorrectly(self): o = "Owt" import sys + initial_ref_count = sys.getrefcount(o) - + index = esm.Index() index.enter("food", o) index.enter("ood", o) index.fix() index.query("blah") - + index = None del index - + self.assertEqual(initial_ref_count, sys.getrefcount(o)) -if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == "__main__": + unittest.main() diff --git a/test/test_esmre.py b/test/test_esmre.py index 330d75d..1460a84 100644 --- a/test/test_esmre.py +++ b/test/test_esmre.py @@ -3,17 +3,17 @@ # esmre_tests.py - tests for esmre module # Copyright (C) 2007-2008 Tideway Systems Limited. -# +# # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. -# +# # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. -# +# # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 @@ -22,142 +22,136 @@ import unittest import esmre + class HintExtractionTests(unittest.TestCase): def checkHints(self, expected_hints, regex): self.assertEqual(set(expected_hints), set(esmre.hints(regex))) - + def testSimpleString(self): self.checkHints(["yarr"], r"yarr") - + def testSkipsOptionalCharacter(self): self.checkHints(["dubloon"], r"dubloons?") - + def testStartsNewStringAfterOptionalCharacter(self): self.checkHints(["ship", "shape"], r"ship ?shape") - + def testSkipsOptionalRepeatedCharacter(self): self.checkHints(["bristol", "fasion"], r"bristol *fasion") - + def testIncludesRepeatedCharacterButStartsNewHint(self): - self.checkHints(["ava", "st me harties"], - r"ava+st me harties") - + self.checkHints(["ava", "st me harties"], r"ava+st me harties") + def testSkipsGroupsWithAlternation(self): - self.checkHints(["Hoist the ", ", ye ", "!"], - r"Hoist the (mizzen mast|main brace), " - r"ye (landlubbers|scurvy dogs)!") - + self.checkHints( + ["Hoist the ", ", ye ", "!"], + r"Hoist the (mizzen mast|main brace), " + r"ye (landlubbers|scurvy dogs)!", + ) + def testSkipsAny(self): - self.checkHints(["After 10 paces, ", " marks the spot"], - r"After 10 paces, . marks the spot") - + self.checkHints( + ["After 10 paces, ", " marks the spot"], + r"After 10 paces, . marks the spot", + ) + def testSkipsOneOrMoreAny(self): - self.checkHints(["Hard to ", "!"], - r"Hard to .+!") - + self.checkHints(["Hard to ", "!"], r"Hard to .+!") + def testSkipsNestedGroups(self): - self.checkHints(["Squark!", " Pieces of ", "!"], - r"Squark!( Pieces of (.+)!)") - + self.checkHints( + ["Squark!", " Pieces of ", "!"], r"Squark!( Pieces of (.+)!)" + ) + def testSkipsCharacterClass(self): - self.checkHints(["r"], - r"[ya]a*r+") + self.checkHints(["r"], r"[ya]a*r+") def testRightBracketDoesNotCloseGroupIfInClass(self): - self.checkHints([":=", "X"], - r":=([)D])X") - + self.checkHints([":=", "X"], r":=([)D])X") + def testSkipsBackslashMetacharacters(self): - self.checkHints(["Cap'n", " ", " Beard"], - r"Cap'n\b ([\S] Beard)") - + self.checkHints(["Cap'n", " ", " Beard"], r"Cap'n\b ([\S] Beard)") + def testBackslashBracketDoesNotCloseGroup(self): - self.checkHints([":=", "X"], - r":=(\)|D)X") - + self.checkHints([":=", "X"], r":=(\)|D)X") + def testBackslashSquareBracketDoesNotCloseClass(self): - self.checkHints([":=", "X"], - r":=[)D\]]X") - + self.checkHints([":=", "X"], r":=[)D\]]X") + def testSkipsMetacharactersAfterGroups(self): - self.checkHints(["Yo ", "ho ", " and a bottle of rum"], - r"Yo (ho )+ and a bottle of rum") - + self.checkHints( + ["Yo ", "ho ", " and a bottle of rum"], + r"Yo (ho )+ and a bottle of rum", + ) + def testSkipsRepetionBraces(self): - self.checkHints(["A", ", me harties"], - r"Ar{2-10}, me harties") - + self.checkHints(["A", ", me harties"], r"Ar{2-10}, me harties") + def testAlternationCausesEmptyResult(self): self.checkHints([], r"rum|grog") - + def testSkipMatchBeginning(self): self.checkHints(["The black perl"], r"^The black perl") - + def testSkipMatchEnd(self): self.checkHints(["Davey Jones' Locker"], r"Davey Jones' Locker$") - + def testOnlyGroupGivesEmptyResult(self): self.checkHints([], r"(rum|grog)") - + def testGetsHintsFromGroups(self): self.checkHints(["/"], r"([0-3][0-9]/[0-1][0-9]/[1-2][0-9]{3})") - + def testSkipsOptionalGroups(self): - self.checkHints(["Shiver me timbers!"], - r"Shiver me timbers!( Arrr!)?") - + self.checkHints(["Shiver me timbers!"], r"Shiver me timbers!( Arrr!)?") + def testSkipsMostExtensionGroups(self): for regex in [ - # set flag - r"(?i)(?L)(?m)(?s)(?u)(?x)", - - # non-grouping paren - r"(?:foo)", - - # previous named group - r"(?P=foo)", - - # comment - r"(?#foo)", - - # lookahead - r"(?=foo)", - - # negative lookahead - r"(?!foo)", - - # lookbehind - r"(?<=foo)", - - # negative lookbehind - r"(?[0-3][0-9]/[0-1][0-9]/[1-2][0-9]{3})") + ["/"], r"(?P[0-3][0-9]/[0-1][0-9]/[1-2][0-9]{3})" + ) class ShortlistTests(unittest.TestCase): def checkShortlist(self, expected_shortlist, hints): self.assertEqual(expected_shortlist, esmre.shortlist(hints)) - + def testShortlistIsEmptyForEmptyCandidates(self): self.checkShortlist([], []) - + def testShortlistIsOnlyCandidate(self): self.checkShortlist(["Blue Beard"], ["Blue Beard"]) - + def testShorlistSelectsLongestCandidate(self): self.checkShortlist(["Black Beard"], ["Black Beard", "Blue Beard"]) - + def testShorlistSelectsLongestCandidateAtEnd(self): - self.checkShortlist(["Yellow Beard"], - ["Black Beard", "Blue Beard", "Yellow Beard"]) + self.checkShortlist( + ["Yellow Beard"], ["Black Beard", "Blue Beard", "Yellow Beard"] + ) class IndexTests(unittest.TestCase): @@ -165,25 +159,30 @@ def setUp(self): self.index = esmre.Index() self.index.enter(r"Major-General\W*$", "savoy opera") self.index.enter(r"\bway\W+haye?\b", "sea shanty") - + def testSingleQuery(self): - self.assertEqual(["savoy opera"], self.index.query( - "I am the very model of a modern Major-General.")) - + self.assertEqual( + ["savoy opera"], + self.index.query("I am the very model of a modern Major-General."), + ) + def testCannotEnterAfterQuery(self): self.index.query("blah") self.assertRaises(TypeError, self.index.enter, "foo", "bar") - + def testCaseInsensitive(self): - self.assertEqual(["sea shanty"], self.index.query( - "Way, hay up she rises,")) - self.assertEqual(["sea shanty"], self.index.query( - "To my way haye, blow the man down,")) - + self.assertEqual( + ["sea shanty"], self.index.query("Way, hay up she rises,") + ) + self.assertEqual( + ["sea shanty"], + self.index.query("To my way haye, blow the man down,"), + ) + def testAlwaysReportsOpjectForHintlessExpressions(self): self.index.enter(r"(\d+\s)*(paces|yards)", "distance") self.assertTrue("distance" in self.index.query("'til morning")) - - -if __name__ == '__main__': - unittest.main() \ No newline at end of file + + +if __name__ == "__main__": + unittest.main() From ccf49aeb1f4920c31d7b7a0e317ff8445eaf8aa0 Mon Sep 17 00:00:00 2001 From: Will Harris Date: Tue, 13 Sep 2022 11:40:52 +0000 Subject: [PATCH 5/6] Clarify an ambiguous argument name Fixes flake8 report E741 --- src/esmre.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/esmre.py b/src/esmre.py index 9e78158..51f6950 100644 --- a/src/esmre.py +++ b/src/esmre.py @@ -210,8 +210,8 @@ def hints(regex): except StopIteration: pass - def flattened(l): - for item in l: + def flattened(hints): + for item in hints: if isinstance(item, list): for i in flattened(item): yield i From 3cf2f45072995d1bc62f41e952a453de3d39f692 Mon Sep 17 00:00:00 2001 From: Will Harris Date: Tue, 13 Sep 2022 12:42:54 +0100 Subject: [PATCH 6/6] Remove an unused import Fixes flake8 report F401 --- test/test_esm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_esm.py b/test/test_esm.py index 695fe80..ac834c2 100644 --- a/test/test_esm.py +++ b/test/test_esm.py @@ -21,7 +21,6 @@ import unittest import esm -import esmre class IndexTests(unittest.TestCase):