-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil.py
117 lines (87 loc) · 3.48 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/python3.4
#
# util.py - Utility functions
#
# Copyright (C) 2016 Alicia González Martínez, [email protected]
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
############################################################################
import sys
import os
import re
import itertools as it
import operator as op
from configparser import ConfigParser
CURRENT_PATH = os.path.dirname(os.path.realpath(__file__))
config = ConfigParser(inline_comment_prefixes=('#'))
config.read(os.path.join(CURRENT_PATH, 'config.ini'))
def striplines(lines):
""" Removes empty strings only at the beginning and the end of a list of strings.
Empty strings in the middle are kept without change.
All strings are stripped.
Args:
lines (list): Lines of text.
yields:
str: Line of modified list.
Examples:
>>> list(striplines(['','', 'a','b','', 'c', '', '', 'd', '']))
['a', 'b', '', 'c', '', 'd']
"""
lines = [list(g) for k,g in it.groupby(l.strip() for l in lines)]
if not any(l.strip() for l in lines[0]): lines.pop(0)
if not any(l.strip() for l in lines[-1]): lines.pop(-1)
for line in lines:
yield line[0]
def subsequences(seq):
""" Arrange list of numbers in ordered subsequnces.
Args:
seq (iterable): List of numbers containing subsequences.
Returns:
list: Sublists of numbers groupped in sequences.
Examples:
>>> subsequences([1,2, 1,2,3, 1, 1, 3,4, 4,5,6, 1])
[[1, 2], [1, 2, 3], [1], [1], [3, 4], [4, 5, 6], [1]]
"""
groupped = it.groupby(enumerate(seq), lambda x: x[0]-x[1])
return [list(map(op.itemgetter(1), g)) for k,g in groupped]
def tochar(*args):
""" Convert each hex str given in args to corresponding char.
Args:
args: Variable number of strings.
Returns:
tuple: Chars corresponding to input hex strings.
"""
return tuple(chr(int(s, 16)) for s in args)
def isArabicalpha(c, ignore_dir=True, ignore_tatweel=True):
""" Tells if a character is inside the Arabic alphabetic range.
Arabic range is considered from U+0621 (hamza) until U+0652 (sukun).
Args:
c (str): Character to check.
ignore_dir (bool): Include RTL and LTR directionality marks as valid Arabic characters.
ignore_tatweel (bool): Include tatweel as a valid Arabic character.
Returns:
bool: True if the character is inside the Arabic alphabetic tange,
False otherwise.
"""
# char is in Arabic range
if ord(c)>=ord('ء') and ord(c)<=ord('ْ'):
if ignore_dir:
if c == tochar(config.get('unicode', 'ltr'))[0] or \
c == tochar(config.get('unicode', 'rtl'))[0]:
return True
if ignore_tatweel:
if c == tochar(config.get('unicode', 'tatweel'))[0]:
return True
return False