Merge pull request #11320 from pymedusa/release/release-1.0.16

Release/release 1.0.16
pymedusa · May 27, 2023 · e98773f · e98773f
2 parents d45f98c + 8c6f701
commit e98773f
Show file tree

Hide file tree

Showing 141 changed files with 4,413 additions and 2,693 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,11 @@
+## 1.0.16 (27-05-2023)
+
+#### Improvements
+- Raise warning when TVDB returns malformed data
+- Update many JavaScript and Python dependencies
+
+-----
+
 ## 1.0.15 (21-05-2023)
 
 #### Fixes

diff --git a/ext/bs4/__init__.py b/ext/bs4/__init__.py
@@ -15,7 +15,7 @@
 """
 
 __author__ = "Leonard Richardson ([email protected])"
-__version__ = "4.11.2"
+__version__ = "4.12.2"
 __copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
 # Use of this source code is governed by the MIT license.
 __license__ = "MIT"
@@ -38,11 +38,13 @@
     builder_registry,
     ParserRejectedMarkup,
     XMLParsedAsHTMLWarning,
+    HTMLParserTreeBuilder
 )
 from .dammit import UnicodeDammit
 from .element import (
     CData,
     Comment,
+    CSS,
     DEFAULT_OUTPUT_ENCODING,
     Declaration,
     Doctype,
@@ -116,7 +118,7 @@ class BeautifulSoup(Tag):
     ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
 
     NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
-    
+
     def __init__(self, markup="", features=None, builder=None,
                  parse_only=None, from_encoding=None, exclude_encodings=None,
                  element_classes=None, **kwargs):
@@ -348,25 +350,49 @@ def deprecated_argument(old_name, new_name):
         self.markup = None
         self.builder.soup = None
 
-    def __copy__(self):
-        """Copy a BeautifulSoup object by converting the document to a string and parsing it again."""
-        copy = type(self)(
-            self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
-        )
+    def _clone(self):
+        """Create a new BeautifulSoup object with the same TreeBuilder,
+        but not associated with any markup.
 
-        # Although we encoded the tree to UTF-8, that may not have
-        # been the encoding of the original markup. Set the copy's
-        # .original_encoding to reflect the original object's
-        # .original_encoding.
-        copy.original_encoding = self.original_encoding
-        return copy
+        This is the first step of the deepcopy process.
+        """
+        clone = type(self)("", None, self.builder)
 
+        # Keep track of the encoding of the original document,
+        # since we won't be parsing it again.
+        clone.original_encoding = self.original_encoding
+        return clone
+
     def __getstate__(self):
         # Frequently a tree builder can't be pickled.
         d = dict(self.__dict__)
         if 'builder' in d and d['builder'] is not None and not self.builder.picklable:
-            d['builder'] = None
+            d['builder'] = type(self.builder)
+        # Store the contents as a Unicode string.
+        d['contents'] = []
+        d['markup'] = self.decode()
+
+        # If _most_recent_element is present, it's a Tag object left
+        # over from initial parse. It might not be picklable and we
+        # don't need it.
+        if '_most_recent_element' in d:
+            del d['_most_recent_element']
         return d
+
+    def __setstate__(self, state):
+        # If necessary, restore the TreeBuilder by looking it up.
+        self.__dict__ = state
+        if isinstance(self.builder, type):
+            self.builder = self.builder()
+        elif not self.builder:
+            # We don't know which builder was used to build this
+            # parse tree, so use a default we know is always available.
+            self.builder = HTMLParserTreeBuilder()
+        self.builder.soup = self
+        self.reset()
+        self._feed()
+        return state
+
 
     @classmethod
     def _decode_markup(cls, markup):
@@ -468,6 +494,7 @@ def reset(self):
         self.open_tag_counter = Counter()
         self.preserve_whitespace_tag_stack = []
         self.string_container_stack = []
+        self._most_recent_element = None
         self.pushTag(self)
 
     def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
@@ -749,7 +776,7 @@ def handle_data(self, data):
 
     def decode(self, pretty_print=False,
                eventual_encoding=DEFAULT_OUTPUT_ENCODING,
-               formatter="minimal"):
+               formatter="minimal", iterator=None):
         """Returns a string or Unicode representation of the parse tree
             as an HTML or XML document.
 
@@ -776,7 +803,7 @@ def decode(self, pretty_print=False,
         else:
             indent_level = 0
         return prefix + super(BeautifulSoup, self).decode(
-            indent_level, eventual_encoding, formatter)
+            indent_level, eventual_encoding, formatter, iterator)
 
 # Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
 _s = BeautifulSoup

diff --git a/ext/bs4/builder/_htmlparser.py b/ext/bs4/builder/_htmlparser.py
@@ -24,6 +24,7 @@
 
 from bs4.builder import (
     DetectsXMLParsedAsHTML,
+    ParserRejectedMarkup,
     HTML,
     HTMLTreeBuilder,
     STRICT,
@@ -70,6 +71,22 @@ def __init__(self, *args, **kwargs):
 
         self._initialize_xml_detector()
 
+    def error(self, message):
+        # NOTE: This method is required so long as Python 3.9 is
+        # supported. The corresponding code is removed from HTMLParser
+        # in 3.5, but not removed from ParserBase until 3.10.
+        # https://github.com/python/cpython/issues/76025
+        #
+        # The original implementation turned the error into a warning,
+        # but in every case I discovered, this made HTMLParser
+        # immediately crash with an error message that was less
+        # helpful than the warning. The new implementation makes it
+        # more clear that html.parser just can't parse this
+        # markup. The 3.10 implementation does the same, though it
+        # raises AssertionError rather than calling a method. (We
+        # catch this error and wrap it in a ParserRejectedMarkup.)
+        raise ParserRejectedMarkup(message)
+
     def handle_startendtag(self, name, attrs):
         """Handle an incoming empty-element tag.
 
@@ -359,6 +376,12 @@ def feed(self, markup):
         args, kwargs = self.parser_args
         parser = BeautifulSoupHTMLParser(*args, **kwargs)
         parser.soup = self.soup
-        parser.feed(markup)
+        try:
+            parser.feed(markup)
+        except AssertionError as e:
+            # html.parser raises AssertionError in rare cases to
+            # indicate a fatal problem with the markup, especially
+            # when there's an error in the doctype declaration.
+            raise ParserRejectedMarkup(e)
         parser.close()
         parser.already_closed_empty_element = []