Skip to content

Commit

Permalink
Fix complex, exponent, binary, octal and hex numbers with underscores (
Browse files Browse the repository at this point in the history
  • Loading branch information
sobolevn authored Dec 14, 2024
1 parent 6716807 commit fe2335d
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 62 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@
'1000_000.000_001',
'10_000_000.000000001',
'10000000.000_000_001',
'0b1_01',
'0o10_11',
'0x10_2',
'1_234.157_001e-1123',
'1_234.1_57001e-1_123',
'12_34.157_001e-1123',
'1e1_1',
'1_1e1',
'3_3j',
'0.5_6j',
],
)
def test_underscored_number(
Expand All @@ -56,12 +66,8 @@ def test_underscored_number(
@pytest.mark.parametrize(
'primitive',
[
'1000',
'1000.0',
'-333555',
'-333555.5',
'1_000',
'1_000.0',
'+1_000',
'+1_000.0',
'-1_000',
Expand All @@ -70,23 +76,19 @@ def test_underscored_number(
'100_000',
'100_000_000',
'0.0_005',
'100_000.000_001',
'10_000_000.000_000_001',
'1_000_000.000_000_001',
'100_000.000_000_001',
'10_000.000_000_001',
'1_000.000_000_001',
'100.000_000_001',
'10.000_000_001',
'1.000_000_001',
'10_000_000.00_000_001',
'10_000_000.0_000_001',
'10_000_000.000_001',
'10_000_000.00_001',
'10_000_000.0_001',
'10_000_000.001',
'10_000_000.01',
'10_000_000.1',
'0b1_001',
'0o10_101',
'0x100_234',
'1_234.157_001e-1_123',
'3_333j',
'3j',
'0.5_655j',
'0.555j',
],
)
def test_correct_number(
Expand All @@ -104,3 +106,23 @@ def test_correct_number(
visitor.run()

assert_errors(visitor, [])


def test_numbers_do_not_error(
parse_tokens,
default_options,
primitives_usages,
mode,
):
"""Ensures that correct numbers are fine."""
try:
from test.test_grammar import VALID_UNDERSCORE_LITERALS # noqa: WPS433
except Exception:
pytest.skip('VALID_UNDERSCORE_LITERALS did not import')
for number in VALID_UNDERSCORE_LITERALS:
file_tokens = parse_tokens(mode(primitives_usages.format(number)))

visitor = WrongNumberTokenVisitor(
default_options, file_tokens=file_tokens
)
visitor.run()
36 changes: 36 additions & 0 deletions wemake_python_styleguide/logic/tokens/numbers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import re
from typing import Final

_UNDERSCORE_PATTERN: Final = re.compile(r'^\d{1,3}(_\d{3})*$')
_SPLIT_PATTERN: Final = re.compile(r'\.|e[\+-]?')


def has_correct_underscores(number: str) -> bool:
"""
Formats a number as a string separated by thousands with support floating.
>>> has_correct_underscores('1_234.157_000e-1_123')
True
>>> has_correct_underscores('0b1_001')
True
>>> has_correct_underscores('12_345.987_654_321')
True
>>> has_correct_underscores('10000_000_00')
False
"""
assert '_' in number # noqa: S101
number_cleared = (
number.strip()
.lower()
.removeprefix('0b')
.removeprefix('0x')
.removeprefix('0o')
.removesuffix('j')
)
return all(
_UNDERSCORE_PATTERN.match(number_part)
for number_part in _SPLIT_PATTERN.split(number_cleared)
)
36 changes: 0 additions & 36 deletions wemake_python_styleguide/logic/tokens/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,39 +32,3 @@ def has_triple_string_quotes(string_contents: str) -> bool:
def get_comment_text(token: tokenize.TokenInfo) -> str:
"""Returns comment without `#` char from comment tokens."""
return token.string[1:].strip()


def format_with_thousands(
number_str: str,
thousands_separator: str = '_',
decimal_separator: str = '.',
) -> str:
"""
Formats a number as a string separated by thousands with support floating.
>>> format_with_thousands('123456789')
'123_456_789'
>>> format_with_thousands('123456789.987654321')
'123_456_789.987_654_321'
>>> format_with_thousands('1000.00001')
'1_000.00_001'
>>> format_with_thousands('10000_000_00')
'1_000_000_000'
"""
number_cleared = number_str.strip().replace(thousands_separator, '')
number_formated = ''
digit_counter = 1
for char in reversed(number_cleared):
if char == decimal_separator:
number_formated = f'{char}{number_formated}'
digit_counter = 1
continue
if digit_counter > 3:
number_formated = f'{thousands_separator}{number_formated}'
digit_counter = 1
number_formated = f'{char}{number_formated}'
digit_counter += 1
return number_formated
11 changes: 5 additions & 6 deletions wemake_python_styleguide/violations/consistency.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,15 +277,14 @@ class UnderscoredNumberViolation(TokenizeViolation):
# Correct:
phone = 88313443
million = 1000000
decimal = 0.00001
formatted_million = 1_000_000
formatted_decimal = 0.00_001
million = 1_000_000.50_001
hexed = 1_234.157_000e-1_123
binary = 0b1_001_001
# Wrong:
phone = 8_83_134_43
million = 100_00_00
decimal = 0.000_01
million = 100_00_00.1_0
octal = 0o00_11
.. versionadded:: 0.1.0
.. versionchanged:: 1.0.0
Expand Down
8 changes: 4 additions & 4 deletions wemake_python_styleguide/visitors/tokenize/primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
from wemake_python_styleguide.logic.tokens.docstrings import (
get_docstring_tokens,
)
from wemake_python_styleguide.logic.tokens.numbers import (
has_correct_underscores,
)
from wemake_python_styleguide.logic.tokens.strings import (
format_with_thousands,
has_triple_string_quotes,
split_prefixes,
)
Expand Down Expand Up @@ -55,9 +57,7 @@ def visit_number(self, token: tokenize.TokenInfo) -> None:
self._check_float_zeros(token)

def _check_underscored_number(self, token: tokenize.TokenInfo) -> None:
if '_' in token.string and token.string != format_with_thousands(
number_str=token.string,
):
if '_' in token.string and not has_correct_underscores(token.string):
self.add_violation(
consistency.UnderscoredNumberViolation(
token,
Expand Down

0 comments on commit fe2335d

Please sign in to comment.