-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathblitztext.pyi
159 lines (125 loc) · 4.7 KB
/
blitztext.pyi
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
from typing import Dict, List, Optional, Tuple
class KeywordMatch:
"""
Represents a matched keyword in the text.
Attributes:
keyword (str): The matched keyword or its clean name.
similarity (float): The similarity score of the match (1.0 for exact matches).
start (int): The starting index of the match in the text.
end (int): The ending index of the match in the text.
"""
keyword: str
similarity: float
start: int
end: int
def __repr__(self) -> str:
"""
Returns a string representation of the KeywordMatch object.
"""
...
class KeywordProcessor:
"""
A processor for efficient keyword matching and replacement in text.
Based on AhoCorasick for fast keyword operations,
including fuzzy matching capabilities.
"""
def __init__(self, case_sensitive: Optional[bool] = None, allow_overlaps: Optional[bool] = None) -> None:
"""
Initialize a new KeywordProcessor.
Args:
case_sensitive: If True, matching will be case-sensitive. Default is False.
allow_overlaps: If True, allows overlapping matches. Default is True.
"""
...
def add_keyword(self, keyword: str, clean_name: Optional[str] = None) -> None:
"""
Add a keyword to the processor.
Args:
keyword: The keyword to add.
clean_name: An optional 'clean' version of the keyword to return when found.
"""
...
def remove_keyword(self, keyword: str) -> bool:
"""
Remove a keyword from the processor.
Args:
keyword: The keyword to remove.
Returns:
True if the keyword was found and removed, False otherwise.
"""
...
def extract_keywords(self, text: str, threshold: Optional[float] = None) -> List[KeywordMatch]:
"""
Extract keywords from the given text.
Args:
text: The text to search for keywords.
threshold: Threshold for fuzzy matching (0.0 to 1.0). Default is 1.0 (exact matches only).
Returns:
A list of KeywordMatch objects representing the found keywords.
"""
...
def parallel_extract_keywords_from_texts(self, texts: List[str], threshold: Optional[float] = None) -> List[List[KeywordMatch]]:
"""
Extract keywords from multiple texts in parallel.
Args:
texts: A list of texts to search for keywords.
threshold: Threshold for fuzzy matching (0.0 to 1.0). Default is 1.0 (exact matches only).
Returns:
A list of lists of KeywordMatch objects, one list per input text.
"""
...
def replace_keywords(self, text: str, threshold: Optional[float] = None) -> str:
"""
Replace keywords in the given text with their clean names.
Args:
text: The text in which to replace keywords.
threshold: Threshold for fuzzy matching (0.0 to 1.0). Default is 1.0 (exact matches only).
Returns:
The text with keywords replaced by their clean names.
"""
...
def parallel_replace_keywords_from_texts(self, texts: List[str], threshold: Optional[float] = None) -> List[str]:
"""
Replace keywords in multiple texts in parallel.
Args:
texts: A list of texts in which to replace keywords.
threshold: Threshold for fuzzy matching (0.0 to 1.0). Default is 1.0 (exact matches only).
Returns:
A list of strings with keywords replaced by their clean names.
"""
...
def get_all_keywords(self) -> Dict[str, str]:
"""
Get all keywords and their clean names.
Returns:
A dictionary where keys are keywords and values are their clean names.
"""
...
def set_non_word_boundaries(self, boundaries: List[str]) -> None:
"""
Set the characters that are considered part of a word.
Args:
boundaries: A list of characters to be set as non-word boundaries.
"""
...
def add_non_word_boundary(self, boundary: str) -> None:
"""
Add a single character as a non-word boundary.
Args:
boundary: The character to be added as a non-word boundary.
"""
...
def __len__(self) -> int:
"""
Get the number of keywords in the processor.
Returns:
The number of keywords.
"""
...
def __repr__(self) -> str:
"""
Get a string representation of the KeywordProcessor.
Returns:
A string representation including the number of keywords.
"""
...