Skip to content

Commit

Permalink
Merge pull request #2620 from sopel-irc/irc.utils.safe-remove-nulls
Browse files Browse the repository at this point in the history
irc.utils, test: also remove null (\x00) in safe()
  • Loading branch information
dgw authored Oct 22, 2024
2 parents 61b217f + db4b296 commit cebfb38
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 31 deletions.
24 changes: 15 additions & 9 deletions sopel/irc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,34 +17,40 @@
from sopel.lifecycle import deprecated


def safe(string):
"""Remove newlines from a string.
def safe(string: str) -> str:
"""Remove disallowed bytes from a string, and ensure Unicode.
:param str string: input text to process
:return: the string without newlines
:rtype: str
:param string: input text to process
:return: the string as Unicode without characters prohibited in IRC messages
:raises TypeError: when ``string`` is ``None``
This function removes newlines from a string and always returns a unicode
string (``str``), but doesn't strip or alter it in any other way::
This function removes newlines and null bytes from a string. It will always
return a Unicode ``str``, even if given non-Unicode input, but doesn't strip
or alter the string in any other way::
>>> safe('some text\\r\\n')
>>> safe('some \\x00text\\r\\n')
'some text'
This is useful to ensure a string can be used in a IRC message.
This is useful to ensure a string can be used in a IRC message. Parameters
can **never** contain NUL, CR, or LF octets, per :rfc:`2812#section-2.3.1`.
.. versionchanged:: 7.1
This function now raises a :exc:`TypeError` instead of an unpredictable
behaviour when given ``None``.
.. versionchanged:: 8.0.1
Also remove NUL (``\\x00``) in addition to CR/LF.
"""
if string is None:
raise TypeError('safe function requires a string, not NoneType')
if isinstance(string, bytes):
string = string.decode("utf8")
string = string.replace('\n', '')
string = string.replace('\r', '')
string = string.replace('\x00', '')
return string


Expand Down
53 changes: 31 additions & 22 deletions test/irc/test_irc_utils.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,52 @@
"""Tests for core ``sopel.irc.utils``"""
from __future__ import annotations

from itertools import permutations

import pytest

from sopel.irc import utils


def test_safe():
@pytest.mark.parametrize('s1, s2, s3', permutations(('\n', '\r', '\x00')))
def test_safe(s1, s2, s3):
text = 'some text'
assert utils.safe(text + '\r\n') == text
assert utils.safe(text + '\n') == text
assert utils.safe(text + '\r') == text
assert utils.safe('\r\n' + text) == text
assert utils.safe('\n' + text) == text
assert utils.safe('\r' + text) == text
assert utils.safe('some \r\ntext') == text
assert utils.safe('some \ntext') == text
assert utils.safe('some \rtext') == text
seq = ''.join((s1, s2, s3))

assert utils.safe(text + seq) == text
assert utils.safe(seq + text) == text
assert utils.safe('some ' + seq + 'text') == text
assert utils.safe(
s1
+ 'some '
+ s2
+ 'text'
+ s3
) == text


def test_safe_empty():
text = ''
assert utils.safe(text) == text


def test_safe_null():
def test_safe_none():
with pytest.raises(TypeError):
utils.safe(None)


def test_safe_bytes():
@pytest.mark.parametrize('b1, b2, b3', permutations((b'\n', b'\r', b'\x00')))
def test_safe_bytes(b1, b2, b3):
text = b'some text'
assert utils.safe(text) == text.decode('utf-8')
assert utils.safe(text + b'\r\n') == text.decode('utf-8')
assert utils.safe(text + b'\n') == text.decode('utf-8')
assert utils.safe(text + b'\r') == text.decode('utf-8')
assert utils.safe(b'\r\n' + text) == text.decode('utf-8')
assert utils.safe(b'\n' + text) == text.decode('utf-8')
assert utils.safe(b'\r' + text) == text.decode('utf-8')
assert utils.safe(b'some \r\ntext') == text.decode('utf-8')
assert utils.safe(b'some \ntext') == text.decode('utf-8')
assert utils.safe(b'some \rtext') == text.decode('utf-8')
seq = b''.join((b1, b2, b3))

assert utils.safe(text + seq) == text.decode('utf-8')
assert utils.safe(seq + text) == text.decode('utf-8')
assert utils.safe(b'some ' + seq + b'text') == text.decode('utf-8')
assert utils.safe(
b1
+ b'some '
+ b2
+ b'text'
+ b3
) == text.decode('utf-8')

0 comments on commit cebfb38

Please sign in to comment.