Skip to content

Commit 014f515

Browse files
committed
global library refactor + fixes + improvements + new additions
1 parent 5dfe43c commit 014f515

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+4018
-1993
lines changed

string_utils.py

-630
This file was deleted.

string_utils/__init__.py

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# -*- coding: utf-8 -*-
2+
3+
__version__ = '1.0.0'
4+
5+
# makes all the functions available at string_utils level
6+
# as the where in older versions (before 1.0.0) when it was a single python module
7+
from .validation import *
8+
from .manipulation import *
9+
from .generation import *

string_utils/_regex.py

+151
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import re
4+
5+
# INTERNAL USE ONLY REGEX!
6+
7+
NUMBER_RE = re.compile(r'^([+\-]?)((\d+)(\.\d+)?(e\d+)?|\.\d+)$')
8+
9+
URLS_RAW_STRING = (
10+
r'([a-z-]+://)' # scheme
11+
r'([a-z_\d-]+:[a-z_\d-]+@)?' # user:password
12+
r'(www\.)?' # www.
13+
r'((?<!\.)[a-z\d]+[a-z\d.-]+\.[a-z]{2,6}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|localhost)' # domain
14+
r'(:\d{2,})?' # port number
15+
r'(/[a-z\d_%+-]*)*' # folders
16+
r'(\.[a-z\d_%+-]+)*' # file extension
17+
r'(\?[a-z\d_+%-=]*)?' # query string
18+
r'(#\S*)?' # hash
19+
)
20+
21+
URL_RE = re.compile(r'^{}$'.format(URLS_RAW_STRING), re.IGNORECASE)
22+
23+
URLS_RE = re.compile(r'({})'.format(URLS_RAW_STRING), re.IGNORECASE)
24+
25+
EMAILS_RAW_STRING = r'[a-zA-Z\d._+-]+@[a-z\d-]+\.?[a-z\d-]+\.[a-z]{2,4}'
26+
27+
EMAIL_RE = re.compile(r'^{}$'.format(EMAILS_RAW_STRING))
28+
29+
EMAILS_RE = re.compile(r'({})'.format(EMAILS_RAW_STRING))
30+
31+
CAMEL_CASE_TEST_RE = re.compile(r'^[a-zA-Z]*([a-z]+[A-Z]+|[A-Z]+[a-z]+)[a-zA-Z\d]*$')
32+
33+
CAMEL_CASE_REPLACE_RE = re.compile(r'([a-z]|[A-Z]+)(?=[A-Z])')
34+
35+
SNAKE_CASE_TEST_RE = re.compile(r'^([a-z]+\d*_[a-z\d_]*|_+[a-z\d]+[a-z\d_]*)$', re.IGNORECASE)
36+
37+
SNAKE_CASE_TEST_DASH_RE = re.compile(r'([a-z]+\d*-[a-z\d-]*|-+[a-z\d]+[a-z\d-]*)$', re.IGNORECASE)
38+
39+
SNAKE_CASE_REPLACE_RE = re.compile(r'(_)([a-z\d])')
40+
41+
SNAKE_CASE_REPLACE_DASH_RE = re.compile(r'(-)([a-z\d])')
42+
43+
CREDIT_CARDS = {
44+
'VISA': re.compile(r'^4\d{12}(?:\d{3})?$'),
45+
'MASTERCARD': re.compile(r'^5[1-5]\d{14}$'),
46+
'AMERICAN_EXPRESS': re.compile(r'^3[47]\d{13}$'),
47+
'DINERS_CLUB': re.compile(r'^3(?:0[0-5]|[68]\d)\d{11}$'),
48+
'DISCOVER': re.compile(r'^6(?:011|5\d{2})\d{12}$'),
49+
'JCB': re.compile(r'^(?:2131|1800|35\d{3})\d{11}$')
50+
}
51+
52+
JSON_WRAPPER_RE = re.compile(r'^\s*[\[{]\s*(.*)\s*[\}\]]\s*$', re.MULTILINE | re.DOTALL)
53+
54+
UUID_RE = re.compile(r'^[a-f\d]{8}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{12}$', re.IGNORECASE)
55+
56+
SHALLOW_IP_V4_RE = re.compile(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$')
57+
58+
IP_V6_RE = re.compile(r'^([a-z\d]{0,4}:){7}[a-z\d]{0,4}$', re.IGNORECASE)
59+
60+
WORDS_COUNT_RE = re.compile(r'\W*[^\W_]+\W*', re.IGNORECASE | re.MULTILINE | re.UNICODE)
61+
62+
HTML_RE = re.compile(
63+
r'((<([a-z]+:)?[a-z]+[^>]*/?>)(.*?(</([a-z]+:)?[a-z]+>))?|<!--.*-->|<!doctype.*>)',
64+
re.IGNORECASE | re.MULTILINE | re.DOTALL
65+
)
66+
67+
HTML_TAG_ONLY_RE = re.compile(
68+
r'(<([a-z]+:)?[a-z]+[^>]*/?>|</([a-z]+:)?[a-z]+>|<!--.*-->|<!doctype.*>)',
69+
re.IGNORECASE | re.MULTILINE | re.DOTALL
70+
)
71+
72+
SPACES_RE = re.compile(r'\s')
73+
74+
PRETTIFY_RE = {
75+
# match repetitions of signs that should not be repeated (like multiple spaces or duplicated quotes)
76+
'DUPLICATES': re.compile(
77+
r'(\({2,}|\){2,}|\[{2,}|\]{2,}|{{2,}|\}{2,}|:{2,}|,{2,}|;{2,}|\+{2,}|-{2,}|\s{2,}|%{2,}|={2,}|"{2,}|\'{2,})',
78+
re.MULTILINE
79+
),
80+
81+
# check that a sign cannot have a space before or missing a space after,
82+
# unless it is a dot or a comma, where numbers may follow (5.5 or 5,5 is ok)
83+
'RIGHT_SPACE': re.compile(
84+
r'('
85+
r'(?<=[^\s\d]),(?=[^\s\d])|\s,\s|\s,(?=[^\s\d])|\s,(?!.)|' # comma (,)
86+
r'(?<=[^\s\d.])\.+(?=[^\s\d.])|\s\.+\s|\s\.+(?=[^\s\d])|\s\.+(?!\.)|' # dot (.)
87+
r'(?<=\S);(?=\S)|\s;\s|\s;(?=\S)|\s;(?!.)|' # semicolon (;)
88+
r'(?<=\S):(?=\S)|\s:\s|\s:(?=\S)|\s:(?!.)|' # colon (:)
89+
r'(?<=[^\s!])!+(?=[^\s!])|\s!+\s|\s!+(?=[^\s!])|\s!+(?!!)|' # exclamation (!)
90+
r'(?<=[^\s?])\?+(?=[^\s?])|\s\?+\s|\s\?+(?=[^\s?])|\s\?+(?!\?)|' # question (?)
91+
r'\d%(?=\S)|(?<=\d)\s%\s|(?<=\d)\s%(?=\S)|(?<=\d)\s%(?!.)' # percentage (%)
92+
r')',
93+
re.MULTILINE | re.DOTALL
94+
),
95+
96+
'LEFT_SPACE': re.compile(
97+
r'('
98+
99+
# quoted text ("hello world")
100+
r'\s"[^"]+"(?=[?.:!,;])|(?<=\S)"[^"]+"\s|(?<=\S)"[^"]+"(?=[?.:!,;])|'
101+
102+
# text in round brackets
103+
r'\s\([^)]+\)(?=[?.:!,;])|(?<=\S)\([^)]+\)\s|(?<=\S)(\([^)]+\))(?=[?.:!,;])'
104+
105+
r')',
106+
re.MULTILINE | re.DOTALL
107+
),
108+
109+
# finds the first char in the string (therefore this must not be MULTILINE)
110+
'UPPERCASE_FIRST_LETTER': re.compile(r'^\s*\w', re.UNICODE),
111+
112+
# match chars that must be followed by uppercase letters (like ".", "?"...)
113+
'UPPERCASE_AFTER_SIGN': re.compile(r'([.?!]\s\w)', re.MULTILINE | re.UNICODE),
114+
115+
'SPACES_AROUND': re.compile(
116+
r'('
117+
r'(?<=\S)\+(?=\S)|(?<=\S)\+\s|\s\+(?=\S)|' # plus (+)
118+
r'(?<=\S)-(?=\S)|(?<=\S)-\s|\s-(?=\S)|' # minus (-)
119+
r'(?<=\S)/(?=\S)|(?<=\S)/\s|\s/(?=\S)|' # division (/)
120+
r'(?<=\S)\*(?=\S)|(?<=\S)\*\s|\s\*(?=\S)|' # multiplication (*)
121+
r'(?<=\S)=(?=\S)|(?<=\S)=\s|\s=(?=\S)|' # equal (=)
122+
123+
# quoted text ("hello world")
124+
r'\s"[^"]+"(?=[^\s?.:!,;])|(?<=\S)"[^"]+"\s|(?<=\S)"[^"]+"(?=[^\s?.:!,;])|'
125+
126+
# text in round brackets
127+
r'\s\([^)]+\)(?=[^\s?.:!,;])|(?<=\S)\([^)]+\)\s|(?<=\S)(\([^)]+\))(?=[^\s?.:!,;])'
128+
129+
r')',
130+
re.MULTILINE | re.DOTALL
131+
),
132+
133+
'SPACES_INSIDE': re.compile(
134+
r'('
135+
r'(?<=")[^"]+(?=")|' # quoted text ("hello world")
136+
r'(?<=\()[^)]+(?=\))' # text in round brackets
137+
r')',
138+
re.MULTILINE | re.DOTALL
139+
),
140+
141+
'SAXON_GENITIVE': re.compile(
142+
r'('
143+
r'(?<=\w)\'\ss\s|(?<=\w)\s\'s(?=\w)|(?<=\w)\s\'s\s(?=\w)'
144+
r')',
145+
re.MULTILINE | re.UNICODE
146+
)
147+
}
148+
149+
NO_LETTERS_OR_NUMBERS_RE = re.compile(r'[^\w\d]+|_+', re.IGNORECASE | re.UNICODE)
150+
151+
MARGIN_RE = re.compile(r'^[^\S\r\n]+')

string_utils/errors.py

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# -*- coding: utf-8 -*-
2+
3+
4+
class InvalidInputError(TypeError):
5+
def __init__(self, input_data):
6+
"""
7+
Custom error used when received object is not a string as expected.
8+
9+
:param input_data: Any received object
10+
"""
11+
type_name = type(input_data).__name__
12+
msg = 'Expected "str", received "{}"'.format(type_name)
13+
super().__init__(msg)

string_utils/generation.py

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import binascii
2+
import os
3+
import random
4+
from uuid import uuid4
5+
import string
6+
7+
from string_utils import is_integer
8+
9+
__all__ = [
10+
'uuid',
11+
'random_string',
12+
'secure_random_hex',
13+
]
14+
15+
16+
def uuid():
17+
"""
18+
Generated an UUID string (using uuid.uuid4()).
19+
20+
:return: uuid string.
21+
:rtype: str
22+
"""
23+
return str(uuid4())
24+
25+
26+
def random_string(size: int) -> str:
27+
"""
28+
Returns a string of the specified size containing random characters (uppercase/lowercase ascii letters and digits).
29+
30+
Example:
31+
32+
>>> random_string(9) # possible output: "cx3QQbzYg"
33+
34+
:param size: Desired string size
35+
:return: Random string
36+
"""
37+
if not is_integer(str(size)) or size < 1:
38+
raise ValueError('size must be >= 1')
39+
40+
chars = string.ascii_letters + string.digits
41+
buffer = [random.choice(chars) for _ in range(size)]
42+
out = ''.join(buffer)
43+
44+
return out
45+
46+
47+
def secure_random_hex(byte_count: int) -> str:
48+
"""
49+
Generates a random string using secure low level random generator (os.urandom).
50+
BEAR IN MIND: due to hex conversion, the returned string will have a size that is exactly
51+
the double of the given `byte_count`.
52+
53+
:param byte_count: Number of random bytes to generate
54+
:return: Hexadecimal string representation of generated random bytes
55+
"""
56+
if not is_integer(str(byte_count)) or byte_count < 1:
57+
raise ValueError('byte_count must be >= 1')
58+
59+
random_bytes = os.urandom(byte_count)
60+
hex_bytes = binascii.hexlify(random_bytes)
61+
hex_string = hex_bytes.decode()
62+
63+
return hex_string

0 commit comments

Comments
 (0)