`_
+* Releases: https://pypi.org/project/MarkupSafe/
+* Code: https://github.com/pallets/markupsafe
+* Issue tracker: https://github.com/pallets/markupsafe/issues
+* Test status:
+
+ * Linux, Mac: https://travis-ci.org/pallets/markupsafe
+ * Windows: https://ci.appveyor.com/project/pallets/markupsafe
+
+* Test coverage: https://codecov.io/gh/pallets/markupsafe
+* Official chat: https://discord.gg/t6rrQZH
+
+
diff --git a/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/RECORD b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/RECORD
new file mode 100644
index 0000000..f728e1f
--- /dev/null
+++ b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/RECORD
@@ -0,0 +1,15 @@
+MarkupSafe-1.1.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+MarkupSafe-1.1.1.dist-info/LICENSE.rst,sha256=RjHsDbX9kKVH4zaBcmTGeYIUM4FG-KyUtKV_lu6MnsQ,1503
+MarkupSafe-1.1.1.dist-info/METADATA,sha256=IFCP4hCNGjXJgMoSvdjPiKDLAMUTTWoxKXQsQvmyMNU,3653
+MarkupSafe-1.1.1.dist-info/RECORD,,
+MarkupSafe-1.1.1.dist-info/WHEEL,sha256=jovIjvNuo6l5lHtTPdXyjKVQ_5SCkmdptE5fkPNfjyM,101
+MarkupSafe-1.1.1.dist-info/top_level.txt,sha256=qy0Plje5IJuvsCBjejJyhDCjEAdcDLK_2agVcex8Z6U,11
+markupsafe/__init__.py,sha256=UAy1UKlykemnSZWIVn8RDqY0wvjV6lkeRwYOMNhw4bA,10453
+markupsafe/__pycache__/__init__.cpython-38.pyc,,
+markupsafe/__pycache__/_compat.cpython-38.pyc,,
+markupsafe/__pycache__/_constants.cpython-38.pyc,,
+markupsafe/__pycache__/_native.cpython-38.pyc,,
+markupsafe/_compat.py,sha256=XweNhJEcyTP_wIBUaIO6nxzIb6XFwweriXyZfiTpkdw,591
+markupsafe/_constants.py,sha256=IXLUQkLM6CTustG5vEQTEy6pBB3z5pm84NkYU1aW9qI,4954
+markupsafe/_native.py,sha256=LwsYk-GHoPsPboRD_tNC6_jTmCj3MLtsnDFis7HjE50,1942
+markupsafe/_speedups.cp38-win32.pyd,sha256=8nGEdcR_DUewvF72FxW6d-aD5SYoLMe2prWwEEnH9ck,12800
diff --git a/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/WHEEL b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/WHEEL
new file mode 100644
index 0000000..117ab3f
--- /dev/null
+++ b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/WHEEL
@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.33.6)
+Root-Is-Purelib: false
+Tag: cp38-cp38-win32
+
diff --git a/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/top_level.txt b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/top_level.txt
new file mode 100644
index 0000000..75bf729
--- /dev/null
+++ b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/top_level.txt
@@ -0,0 +1 @@
+markupsafe
diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/DESCRIPTION.rst b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/DESCRIPTION.rst
new file mode 100644
index 0000000..675f08d
--- /dev/null
+++ b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/DESCRIPTION.rst
@@ -0,0 +1,80 @@
+Werkzeug
+========
+
+Werkzeug is a comprehensive `WSGI`_ web application library. It began as
+a simple collection of various utilities for WSGI applications and has
+become one of the most advanced WSGI utility libraries.
+
+It includes:
+
+* An interactive debugger that allows inspecting stack traces and source
+ code in the browser with an interactive interpreter for any frame in
+ the stack.
+* A full-featured request object with objects to interact with headers,
+ query args, form data, files, and cookies.
+* A response object that can wrap other WSGI applications and handle
+ streaming data.
+* A routing system for matching URLs to endpoints and generating URLs
+ for endpoints, with an extensible system for capturing variables from
+ URLs.
+* HTTP utilities to handle entity tags, cache control, dates, user
+ agents, cookies, files, and more.
+* A threaded WSGI server for use while developing applications locally.
+* A test client for simulating HTTP requests during testing without
+ requiring running a server.
+
+Werkzeug is Unicode aware and doesn't enforce any dependencies. It is up
+to the developer to choose a template engine, database adapter, and even
+how to handle requests. It can be used to build all sorts of end user
+applications such as blogs, wikis, or bulletin boards.
+
+`Flask`_ wraps Werkzeug, using it to handle the details of WSGI while
+providing more structure and patterns for defining powerful
+applications.
+
+
+Installing
+----------
+
+Install and update using `pip`_:
+
+.. code-block:: text
+
+ pip install -U Werkzeug
+
+
+A Simple Example
+----------------
+
+.. code-block:: python
+
+ from werkzeug.wrappers import Request, Response
+
+ @Request.application
+ def application(request):
+ return Response('Hello, World!')
+
+ if __name__ == '__main__':
+ from werkzeug.serving import run_simple
+ run_simple('localhost', 4000, application)
+
+
+Links
+-----
+
+* Website: https://www.palletsprojects.com/p/werkzeug/
+* Releases: https://pypi.org/project/Werkzeug/
+* Code: https://github.com/pallets/werkzeug
+* Issue tracker: https://github.com/pallets/werkzeug/issues
+* Test status:
+
+ * Linux, Mac: https://travis-ci.org/pallets/werkzeug
+ * Windows: https://ci.appveyor.com/project/davidism/werkzeug
+
+* Test coverage: https://codecov.io/gh/pallets/werkzeug
+
+.. _WSGI: https://wsgi.readthedocs.io/en/latest/
+.. _Flask: https://www.palletsprojects.com/p/flask/
+.. _pip: https://pip.pypa.io/en/stable/quickstart/
+
+
diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/INSTALLER b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/INSTALLER
new file mode 100644
index 0000000..a1b589e
--- /dev/null
+++ b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/LICENSE.txt b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/LICENSE.txt
new file mode 100644
index 0000000..1cc75bb
--- /dev/null
+++ b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/LICENSE.txt
@@ -0,0 +1,31 @@
+Copyright © 2007 by the Pallets team.
+
+Some rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE AND DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
+BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE AND DOCUMENTATION, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/METADATA b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/METADATA
new file mode 100644
index 0000000..bfc3c4e
--- /dev/null
+++ b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/METADATA
@@ -0,0 +1,116 @@
+Metadata-Version: 2.0
+Name: Werkzeug
+Version: 0.14.1
+Summary: The comprehensive WSGI web application library.
+Home-page: https://www.palletsprojects.org/p/werkzeug/
+Author: Armin Ronacher
+Author-email: armin.ronacher@active-4.com
+License: BSD
+Description-Content-Type: UNKNOWN
+Platform: any
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Environment :: Web Environment
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.6
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Provides-Extra: dev
+Requires-Dist: coverage; extra == 'dev'
+Requires-Dist: pytest; extra == 'dev'
+Requires-Dist: sphinx; extra == 'dev'
+Requires-Dist: tox; extra == 'dev'
+Provides-Extra: termcolor
+Requires-Dist: termcolor; extra == 'termcolor'
+Provides-Extra: watchdog
+Requires-Dist: watchdog; extra == 'watchdog'
+
+Werkzeug
+========
+
+Werkzeug is a comprehensive `WSGI`_ web application library. It began as
+a simple collection of various utilities for WSGI applications and has
+become one of the most advanced WSGI utility libraries.
+
+It includes:
+
+* An interactive debugger that allows inspecting stack traces and source
+ code in the browser with an interactive interpreter for any frame in
+ the stack.
+* A full-featured request object with objects to interact with headers,
+ query args, form data, files, and cookies.
+* A response object that can wrap other WSGI applications and handle
+ streaming data.
+* A routing system for matching URLs to endpoints and generating URLs
+ for endpoints, with an extensible system for capturing variables from
+ URLs.
+* HTTP utilities to handle entity tags, cache control, dates, user
+ agents, cookies, files, and more.
+* A threaded WSGI server for use while developing applications locally.
+* A test client for simulating HTTP requests during testing without
+ requiring running a server.
+
+Werkzeug is Unicode aware and doesn't enforce any dependencies. It is up
+to the developer to choose a template engine, database adapter, and even
+how to handle requests. It can be used to build all sorts of end user
+applications such as blogs, wikis, or bulletin boards.
+
+`Flask`_ wraps Werkzeug, using it to handle the details of WSGI while
+providing more structure and patterns for defining powerful
+applications.
+
+
+Installing
+----------
+
+Install and update using `pip`_:
+
+.. code-block:: text
+
+ pip install -U Werkzeug
+
+
+A Simple Example
+----------------
+
+.. code-block:: python
+
+ from werkzeug.wrappers import Request, Response
+
+ @Request.application
+ def application(request):
+ return Response('Hello, World!')
+
+ if __name__ == '__main__':
+ from werkzeug.serving import run_simple
+ run_simple('localhost', 4000, application)
+
+
+Links
+-----
+
+* Website: https://www.palletsprojects.com/p/werkzeug/
+* Releases: https://pypi.org/project/Werkzeug/
+* Code: https://github.com/pallets/werkzeug
+* Issue tracker: https://github.com/pallets/werkzeug/issues
+* Test status:
+
+ * Linux, Mac: https://travis-ci.org/pallets/werkzeug
+ * Windows: https://ci.appveyor.com/project/davidism/werkzeug
+
+* Test coverage: https://codecov.io/gh/pallets/werkzeug
+
+.. _WSGI: https://wsgi.readthedocs.io/en/latest/
+.. _Flask: https://www.palletsprojects.com/p/flask/
+.. _pip: https://pip.pypa.io/en/stable/quickstart/
+
+
diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/RECORD b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/RECORD
new file mode 100644
index 0000000..9067b88
--- /dev/null
+++ b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/RECORD
@@ -0,0 +1,98 @@
+Werkzeug-0.14.1.dist-info/DESCRIPTION.rst,sha256=rOCN36jwsWtWsTpqPG96z7FMilB5qI1CIARSKRuUmz8,2452
+Werkzeug-0.14.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+Werkzeug-0.14.1.dist-info/LICENSE.txt,sha256=xndz_dD4m269AF9l_Xbl5V3tM1N3C1LoZC2PEPxWO-8,1534
+Werkzeug-0.14.1.dist-info/METADATA,sha256=FbfadrPdJNUWAxMOKxGUtHe5R3IDSBKYYmAz3FvI3uY,3872
+Werkzeug-0.14.1.dist-info/RECORD,,
+Werkzeug-0.14.1.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+Werkzeug-0.14.1.dist-info/WHEEL,sha256=GrqQvamwgBV4nLoJe0vhYRSWzWsx7xjlt74FT0SWYfE,110
+Werkzeug-0.14.1.dist-info/metadata.json,sha256=4489UTt6HBp2NQil95-pBkjU4Je93SMHvMxZ_rjOpqA,1452
+Werkzeug-0.14.1.dist-info/top_level.txt,sha256=QRyj2VjwJoQkrwjwFIOlB8Xg3r9un0NtqVHQF-15xaw,9
+werkzeug/__init__.py,sha256=NR0d4n_-U9BLVKlOISean3zUt2vBwhvK-AZE6M0sC0k,6842
+werkzeug/__pycache__/__init__.cpython-38.pyc,,
+werkzeug/__pycache__/_compat.cpython-38.pyc,,
+werkzeug/__pycache__/_internal.cpython-38.pyc,,
+werkzeug/__pycache__/_reloader.cpython-38.pyc,,
+werkzeug/__pycache__/datastructures.cpython-38.pyc,,
+werkzeug/__pycache__/exceptions.cpython-38.pyc,,
+werkzeug/__pycache__/filesystem.cpython-38.pyc,,
+werkzeug/__pycache__/formparser.cpython-38.pyc,,
+werkzeug/__pycache__/http.cpython-38.pyc,,
+werkzeug/__pycache__/local.cpython-38.pyc,,
+werkzeug/__pycache__/posixemulation.cpython-38.pyc,,
+werkzeug/__pycache__/routing.cpython-38.pyc,,
+werkzeug/__pycache__/script.cpython-38.pyc,,
+werkzeug/__pycache__/security.cpython-38.pyc,,
+werkzeug/__pycache__/serving.cpython-38.pyc,,
+werkzeug/__pycache__/test.cpython-38.pyc,,
+werkzeug/__pycache__/testapp.cpython-38.pyc,,
+werkzeug/__pycache__/urls.cpython-38.pyc,,
+werkzeug/__pycache__/useragents.cpython-38.pyc,,
+werkzeug/__pycache__/utils.cpython-38.pyc,,
+werkzeug/__pycache__/websocket.cpython-38.pyc,,
+werkzeug/__pycache__/wrappers.cpython-38.pyc,,
+werkzeug/__pycache__/wsgi.cpython-38.pyc,,
+werkzeug/_compat.py,sha256=8c4U9o6A_TR9nKCcTbpZNxpqCXcXDVIbFawwKM2s92c,6311
+werkzeug/_internal.py,sha256=GhEyGMlsSz_tYjsDWO9TG35VN7304MM8gjKDrXLEdVc,13873
+werkzeug/_reloader.py,sha256=AyPphcOHPbu6qzW0UbrVvTDJdre5WgpxbhIJN_TqzUc,9264
+werkzeug/contrib/__init__.py,sha256=f7PfttZhbrImqpr5Ezre8CXgwvcGUJK7zWNpO34WWrw,623
+werkzeug/contrib/__pycache__/__init__.cpython-38.pyc,,
+werkzeug/contrib/__pycache__/atom.cpython-38.pyc,,
+werkzeug/contrib/__pycache__/cache.cpython-38.pyc,,
+werkzeug/contrib/__pycache__/fixers.cpython-38.pyc,,
+werkzeug/contrib/__pycache__/iterio.cpython-38.pyc,,
+werkzeug/contrib/__pycache__/jsrouting.cpython-38.pyc,,
+werkzeug/contrib/__pycache__/limiter.cpython-38.pyc,,
+werkzeug/contrib/__pycache__/lint.cpython-38.pyc,,
+werkzeug/contrib/__pycache__/profiler.cpython-38.pyc,,
+werkzeug/contrib/__pycache__/securecookie.cpython-38.pyc,,
+werkzeug/contrib/__pycache__/sessions.cpython-38.pyc,,
+werkzeug/contrib/__pycache__/testtools.cpython-38.pyc,,
+werkzeug/contrib/__pycache__/wrappers.cpython-38.pyc,,
+werkzeug/contrib/atom.py,sha256=qqfJcfIn2RYY-3hO3Oz0aLq9YuNubcPQ_KZcNsDwVJo,15575
+werkzeug/contrib/cache.py,sha256=xBImHNj09BmX_7kC5NUCx8f_l4L8_O7zi0jCL21UZKE,32163
+werkzeug/contrib/fixers.py,sha256=gR06T-w71ur-tHQ_31kP_4jpOncPJ4Wc1dOqTvYusr8,10179
+werkzeug/contrib/iterio.py,sha256=RlqDvGhz0RneTpzE8dVc-yWCUv4nkPl1jEc_EDp2fH0,10814
+werkzeug/contrib/jsrouting.py,sha256=QTmgeDoKXvNK02KzXgx9lr3cAH6fAzpwF5bBdPNvJPs,8564
+werkzeug/contrib/limiter.py,sha256=iS8-ahPZ-JLRnmfIBzxpm7O_s3lPsiDMVWv7llAIDCI,1334
+werkzeug/contrib/lint.py,sha256=Mj9NeUN7s4zIUWeQOAVjrmtZIcl3Mm2yDe9BSIr9YGE,12558
+werkzeug/contrib/profiler.py,sha256=ISwCWvwVyGpDLRBRpLjo_qUWma6GXYBrTAco4PEQSHY,5151
+werkzeug/contrib/securecookie.py,sha256=uWMyHDHY3lkeBRiCSayGqWkAIy4a7xAbSE_Hln9ecqc,12196
+werkzeug/contrib/sessions.py,sha256=39LVNvLbm5JWpbxM79WC2l87MJFbqeISARjwYbkJatw,12577
+werkzeug/contrib/testtools.py,sha256=G9xN-qeihJlhExrIZMCahvQOIDxdL9NiX874jiiHFMs,2453
+werkzeug/contrib/wrappers.py,sha256=v7OYlz7wQtDlS9fey75UiRZ1IkUWqCpzbhsLy4k14Hw,10398
+werkzeug/datastructures.py,sha256=3IgNKNqrz-ZjmAG7y3YgEYK-enDiMT_b652PsypWcYg,90080
+werkzeug/debug/__init__.py,sha256=uSn9BqCZ5E3ySgpoZtundpROGsn-uYvZtSFiTfAX24M,17452
+werkzeug/debug/__pycache__/__init__.cpython-38.pyc,,
+werkzeug/debug/__pycache__/console.cpython-38.pyc,,
+werkzeug/debug/__pycache__/repr.cpython-38.pyc,,
+werkzeug/debug/__pycache__/tbtools.cpython-38.pyc,,
+werkzeug/debug/console.py,sha256=n3-dsKk1TsjnN-u4ZgmuWCU_HO0qw5IA7ttjhyyMM6I,5607
+werkzeug/debug/repr.py,sha256=bKqstDYGfECpeLerd48s_hxuqK4b6UWnjMu3d_DHO8I,9340
+werkzeug/debug/shared/FONT_LICENSE,sha256=LwAVEI1oYnvXiNMT9SnCH_TaLCxCpeHziDrMg0gPkAI,4673
+werkzeug/debug/shared/console.png,sha256=bxax6RXXlvOij_KeqvSNX0ojJf83YbnZ7my-3Gx9w2A,507
+werkzeug/debug/shared/debugger.js,sha256=PKPVYuyO4SX1hkqLOwCLvmIEO5154WatFYaXE-zIfKI,6264
+werkzeug/debug/shared/jquery.js,sha256=7LkWEzqTdpEfELxcZZlS6wAx5Ff13zZ83lYO2_ujj7g,95957
+werkzeug/debug/shared/less.png,sha256=-4-kNRaXJSONVLahrQKUxMwXGm9R4OnZ9SxDGpHlIR4,191
+werkzeug/debug/shared/more.png,sha256=GngN7CioHQoV58rH6ojnkYi8c_qED2Aka5FO5UXrReY,200
+werkzeug/debug/shared/source.png,sha256=RoGcBTE4CyCB85GBuDGTFlAnUqxwTBiIfDqW15EpnUQ,818
+werkzeug/debug/shared/style.css,sha256=IEO0PC2pWmh2aEyGCaN--txuWsRCliuhlbEhPDFwh0A,6270
+werkzeug/debug/shared/ubuntu.ttf,sha256=1eaHFyepmy4FyDvjLVzpITrGEBu_CZYY94jE0nED1c0,70220
+werkzeug/debug/tbtools.py,sha256=rBudXCmkVdAKIcdhxANxgf09g6kQjJWW9_5bjSpr4OY,18451
+werkzeug/exceptions.py,sha256=3wp95Hqj9FqV8MdikV99JRcHse_fSMn27V8tgP5Hw2c,20505
+werkzeug/filesystem.py,sha256=hHWeWo_gqLMzTRfYt8-7n2wWcWUNTnDyudQDLOBEICE,2175
+werkzeug/formparser.py,sha256=mUuCwjzjb8_E4RzrAT2AioLuZSYpqR1KXTK6LScRYzA,21722
+werkzeug/http.py,sha256=RQg4MJuhRv2isNRiEh__Phh09ebpfT3Kuu_GfrZ54_c,40079
+werkzeug/local.py,sha256=QdQhWV5L8p1Y1CJ1CDStwxaUs24SuN5aebHwjVD08C8,14553
+werkzeug/posixemulation.py,sha256=xEF2Bxc-vUCPkiu4IbfWVd3LW7DROYAT-ExW6THqyzw,3519
+werkzeug/routing.py,sha256=2JVtdSgxKGeANy4Z_FP-dKESvKtkYGCZ1J2fARCLGCY,67214
+werkzeug/script.py,sha256=DwaVDcXdaOTffdNvlBdLitxWXjKaRVT32VbhDtljFPY,11365
+werkzeug/security.py,sha256=0m107exslz4QJLWQCpfQJ04z3re4eGHVggRvrQVAdWc,9193
+werkzeug/serving.py,sha256=A0flnIJHufdn2QJ9oeuHfrXwP3LzP8fn3rNW6hbxKUg,31926
+werkzeug/test.py,sha256=XmECSmnpASiYQTct4oMiWr0LT5jHWCtKqnpYKZd2ui8,36100
+werkzeug/testapp.py,sha256=3HQRW1sHZKXuAjCvFMet4KXtQG3loYTFnvn6LWt-4zI,9396
+werkzeug/urls.py,sha256=dUeLg2IeTm0WLmSvFeD4hBZWGdOs-uHudR5-t8n9zPo,36771
+werkzeug/useragents.py,sha256=BhYMf4cBTHyN4U0WsQedePIocmNlH_34C-UwqSThGCc,5865
+werkzeug/utils.py,sha256=BrY1j0DHQ8RTb0K1StIobKuMJhN9SQQkWEARbrh2qpk,22972
+werkzeug/websocket.py,sha256=PpSeDxXD_0UsPAa5hQhQNM6mxibeUgn8lA8eRqiS0vM,11344
+werkzeug/wrappers.py,sha256=kbyL_aFjxELwPgMwfNCYjKu-CR6kNkh-oO8wv3GXbk8,84511
+werkzeug/wsgi.py,sha256=1Nob-aeChWQf7MsiicO8RZt6J90iRzEcik44ev9Qu8s,49347
diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/REQUESTED b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/REQUESTED
new file mode 100644
index 0000000..e69de29
diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/WHEEL b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/WHEEL
new file mode 100644
index 0000000..0de529b
--- /dev/null
+++ b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/WHEEL
@@ -0,0 +1,6 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.26.0)
+Root-Is-Purelib: true
+Tag: py2-none-any
+Tag: py3-none-any
+
diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/metadata.json b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/metadata.json
new file mode 100644
index 0000000..bca8d12
--- /dev/null
+++ b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/metadata.json
@@ -0,0 +1 @@
+{"generator": "bdist_wheel (0.26.0)", "summary": "The comprehensive WSGI web application library.", "classifiers": ["Development Status :: 5 - Production/Stable", "Environment :: Web Environment", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Topic :: Internet :: WWW/HTTP :: Dynamic Content", "Topic :: Software Development :: Libraries :: Python Modules"], "description_content_type": "UNKNOWN", "extensions": {"python.details": {"project_urls": {"Home": "https://www.palletsprojects.org/p/werkzeug/"}, "contacts": [{"email": "armin.ronacher@active-4.com", "name": "Armin Ronacher", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst", "license": "LICENSE.txt"}}}, "license": "BSD", "metadata_version": "2.0", "name": "Werkzeug", "platform": "any", "extras": ["dev", "termcolor", "watchdog"], "run_requires": [{"requires": ["coverage", "pytest", "sphinx", "tox"], "extra": "dev"}, {"requires": ["termcolor"], "extra": "termcolor"}, {"requires": ["watchdog"], "extra": "watchdog"}], "version": "0.14.1"}
\ No newline at end of file
diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/top_level.txt b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/top_level.txt
new file mode 100644
index 0000000..6fe8da8
--- /dev/null
+++ b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/top_level.txt
@@ -0,0 +1 @@
+werkzeug
diff --git a/venv/Lib/site-packages/__pycache__/easy_install.cpython-38.pyc b/venv/Lib/site-packages/__pycache__/easy_install.cpython-38.pyc
new file mode 100644
index 0000000..a5eb713
Binary files /dev/null and b/venv/Lib/site-packages/__pycache__/easy_install.cpython-38.pyc differ
diff --git a/venv/Lib/site-packages/__pycache__/itsdangerous.cpython-38.pyc b/venv/Lib/site-packages/__pycache__/itsdangerous.cpython-38.pyc
new file mode 100644
index 0000000..7a43cbb
Binary files /dev/null and b/venv/Lib/site-packages/__pycache__/itsdangerous.cpython-38.pyc differ
diff --git a/venv/Lib/site-packages/_distutils_hack/__init__.py b/venv/Lib/site-packages/_distutils_hack/__init__.py
new file mode 100644
index 0000000..c31edfe
--- /dev/null
+++ b/venv/Lib/site-packages/_distutils_hack/__init__.py
@@ -0,0 +1,123 @@
+import sys
+import os
+import re
+import importlib
+import warnings
+
+
+is_pypy = '__pypy__' in sys.builtin_module_names
+
+
+def warn_distutils_present():
+ if 'distutils' not in sys.modules:
+ return
+ if is_pypy and sys.version_info < (3, 7):
+ # PyPy for 3.6 unconditionally imports distutils, so bypass the warning
+ # https://foss.heptapod.net/pypy/pypy/-/blob/be829135bc0d758997b3566062999ee8b23872b4/lib-python/3/site.py#L250
+ return
+ warnings.warn(
+ "Distutils was imported before Setuptools, but importing Setuptools "
+ "also replaces the `distutils` module in `sys.modules`. This may lead "
+ "to undesirable behaviors or errors. To avoid these issues, avoid "
+ "using distutils directly, ensure that setuptools is installed in the "
+ "traditional way (e.g. not an editable install), and/or make sure "
+ "that setuptools is always imported before distutils.")
+
+
+def clear_distutils():
+ if 'distutils' not in sys.modules:
+ return
+ warnings.warn("Setuptools is replacing distutils.")
+ mods = [name for name in sys.modules if re.match(r'distutils\b', name)]
+ for name in mods:
+ del sys.modules[name]
+
+
+def enabled():
+ """
+ Allow selection of distutils by environment variable.
+ """
+ which = os.environ.get('SETUPTOOLS_USE_DISTUTILS', 'stdlib')
+ return which == 'local'
+
+
+def ensure_local_distutils():
+ clear_distutils()
+ distutils = importlib.import_module('setuptools._distutils')
+ distutils.__name__ = 'distutils'
+ sys.modules['distutils'] = distutils
+
+ # sanity check that submodules load as expected
+ core = importlib.import_module('distutils.core')
+ assert '_distutils' in core.__file__, core.__file__
+
+
+def do_override():
+ """
+ Ensure that the local copy of distutils is preferred over stdlib.
+
+ See https://github.com/pypa/setuptools/issues/417#issuecomment-392298401
+ for more motivation.
+ """
+ if enabled():
+ warn_distutils_present()
+ ensure_local_distutils()
+
+
+class DistutilsMetaFinder:
+ def find_spec(self, fullname, path, target=None):
+ if path is not None:
+ return
+
+ method_name = 'spec_for_{fullname}'.format(**locals())
+ method = getattr(self, method_name, lambda: None)
+ return method()
+
+ def spec_for_distutils(self):
+ import importlib.abc
+ import importlib.util
+
+ class DistutilsLoader(importlib.abc.Loader):
+
+ def create_module(self, spec):
+ return importlib.import_module('setuptools._distutils')
+
+ def exec_module(self, module):
+ pass
+
+ return importlib.util.spec_from_loader('distutils', DistutilsLoader())
+
+ def spec_for_pip(self):
+ """
+ Ensure stdlib distutils when running under pip.
+ See pypa/pip#8761 for rationale.
+ """
+ if self.pip_imported_during_build():
+ return
+ clear_distutils()
+ self.spec_for_distutils = lambda: None
+
+ @staticmethod
+ def pip_imported_during_build():
+ """
+ Detect if pip is being imported in a build script. Ref #2355.
+ """
+ import traceback
+ return any(
+ frame.f_globals['__file__'].endswith('setup.py')
+ for frame, line in traceback.walk_stack(None)
+ )
+
+
+DISTUTILS_FINDER = DistutilsMetaFinder()
+
+
+def add_shim():
+ sys.meta_path.insert(0, DISTUTILS_FINDER)
+
+
+def remove_shim():
+ try:
+ sys.meta_path.remove(DISTUTILS_FINDER)
+ except ValueError:
+ pass
diff --git a/venv/Lib/site-packages/_distutils_hack/__pycache__/__init__.cpython-38.pyc b/venv/Lib/site-packages/_distutils_hack/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000..5df4856
Binary files /dev/null and b/venv/Lib/site-packages/_distutils_hack/__pycache__/__init__.cpython-38.pyc differ
diff --git a/venv/Lib/site-packages/_distutils_hack/__pycache__/override.cpython-38.pyc b/venv/Lib/site-packages/_distutils_hack/__pycache__/override.cpython-38.pyc
new file mode 100644
index 0000000..0949718
Binary files /dev/null and b/venv/Lib/site-packages/_distutils_hack/__pycache__/override.cpython-38.pyc differ
diff --git a/venv/Lib/site-packages/_distutils_hack/override.py b/venv/Lib/site-packages/_distutils_hack/override.py
new file mode 100644
index 0000000..2cc433a
--- /dev/null
+++ b/venv/Lib/site-packages/_distutils_hack/override.py
@@ -0,0 +1 @@
+__import__('_distutils_hack').do_override()
diff --git a/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/DESCRIPTION.rst b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/DESCRIPTION.rst
new file mode 100644
index 0000000..30379a1
--- /dev/null
+++ b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/DESCRIPTION.rst
@@ -0,0 +1,3 @@
+Beautiful Soup sits atop an HTML or XML parser, providing Pythonic idioms for iterating, searching, and modifying the parse tree.
+
+
diff --git a/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/INSTALLER b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/INSTALLER
new file mode 100644
index 0000000..a1b589e
--- /dev/null
+++ b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
diff --git a/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/METADATA b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/METADATA
new file mode 100644
index 0000000..cc2f64b
--- /dev/null
+++ b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/METADATA
@@ -0,0 +1,28 @@
+Metadata-Version: 2.0
+Name: beautifulsoup4
+Version: 4.6.0
+Summary: Screen-scraping library
+Home-page: http://www.crummy.com/software/BeautifulSoup/bs4/
+Author: Leonard Richardson
+Author-email: leonardr@segfault.org
+License: MIT
+Download-URL: http://www.crummy.com/software/BeautifulSoup/bs4/download/
+Platform: UNKNOWN
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Text Processing :: Markup :: HTML
+Classifier: Topic :: Text Processing :: Markup :: XML
+Classifier: Topic :: Text Processing :: Markup :: SGML
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Provides-Extra: html5lib
+Requires-Dist: html5lib; extra == 'html5lib'
+Provides-Extra: lxml
+Requires-Dist: lxml; extra == 'lxml'
+
+Beautiful Soup sits atop an HTML or XML parser, providing Pythonic idioms for iterating, searching, and modifying the parse tree.
+
+
diff --git a/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/RECORD b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/RECORD
new file mode 100644
index 0000000..4a0db44
--- /dev/null
+++ b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/RECORD
@@ -0,0 +1,42 @@
+beautifulsoup4-4.6.0.dist-info/DESCRIPTION.rst,sha256=HReC3om4K1XIgRKEVjgzdTEKfTTE3F83hEHkQQJwuU0,132
+beautifulsoup4-4.6.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+beautifulsoup4-4.6.0.dist-info/METADATA,sha256=MygZZNNKIcXfK60bDPVe3zpyJeyqRoTy1PBE2uqb4TY,1109
+beautifulsoup4-4.6.0.dist-info/RECORD,,
+beautifulsoup4-4.6.0.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+beautifulsoup4-4.6.0.dist-info/WHEEL,sha256=dXGL5yz26tu5uNsUy9EBoBYhrvMYqmFH9Vm82OQUT-8,95
+beautifulsoup4-4.6.0.dist-info/metadata.json,sha256=5165aEwyWzVfTK8Rs17tqfMof2bpgGLGbjGAo24oFNc,1110
+beautifulsoup4-4.6.0.dist-info/top_level.txt,sha256=H8VT-IuPWLzQqwG9_eChjXDJ1z0H9RRebdSR90Bjnkw,4
+bs4/__init__.py,sha256=tO59vxn6pDf_-5iy_a_rG65rDph3TyY7wKqRu9zNQ_4,20394
+bs4/__pycache__/__init__.cpython-38.pyc,,
+bs4/__pycache__/dammit.cpython-38.pyc,,
+bs4/__pycache__/diagnose.cpython-38.pyc,,
+bs4/__pycache__/element.cpython-38.pyc,,
+bs4/__pycache__/testing.cpython-38.pyc,,
+bs4/builder/__init__.py,sha256=ECq2riLT2cie_wig7cTgMQU6YAuZ6cjZTFKWJC1st7g,11552
+bs4/builder/__pycache__/__init__.cpython-38.pyc,,
+bs4/builder/__pycache__/_html5lib.cpython-38.pyc,,
+bs4/builder/__pycache__/_htmlparser.cpython-38.pyc,,
+bs4/builder/__pycache__/_lxml.cpython-38.pyc,,
+bs4/builder/_html5lib.py,sha256=LZeT3YMgTWWKzydl48fkbwdB6IQC3nV67Ej_nbmJrcs,16688
+bs4/builder/_htmlparser.py,sha256=7ytwx-cp8Ju4nVvZqsUtL7bGNv0c3KgT44lbnyqQvHk,11609
+bs4/builder/_lxml.py,sha256=xPBXWAVfqRvWlAYoykIGVmp8JhhADxriYrXfmEjSqNE,9470
+bs4/dammit.py,sha256=T91drgzqXmIrH---Qm5tG_jvOqv1QaYdJPOt9lRJucw,29910
+bs4/diagnose.py,sha256=k_dyxYqq52gaikV1hfiwh0_PoWGbx2fsnjm5IAMS7PA,6773
+bs4/element.py,sha256=2EZ_aM5jWf7tREyxXjvziW23Q59tSijCacC8-hfyx5M,68798
+bs4/testing.py,sha256=GEdA91wNzzJ5XewPEgz1oCzDT1ihT-UYphgah2CyXDM,30800
+bs4/tests/__init__.py,sha256=bdUBDE750n7qNEfue7-3a1fBaUxJlvZMkvJvZa-lbYs,27
+bs4/tests/__pycache__/__init__.cpython-38.pyc,,
+bs4/tests/__pycache__/test_builder_registry.cpython-38.pyc,,
+bs4/tests/__pycache__/test_docs.cpython-38.pyc,,
+bs4/tests/__pycache__/test_html5lib.cpython-38.pyc,,
+bs4/tests/__pycache__/test_htmlparser.cpython-38.pyc,,
+bs4/tests/__pycache__/test_lxml.cpython-38.pyc,,
+bs4/tests/__pycache__/test_soup.cpython-38.pyc,,
+bs4/tests/__pycache__/test_tree.cpython-38.pyc,,
+bs4/tests/test_builder_registry.py,sha256=pllfRpArh9TYhjjRUiu1wITr9Ryyv4hiaAtRjij-k4E,5582
+bs4/tests/test_docs.py,sha256=FXfz2bGL4Xe0q6duwpmg9hmFiZuU4DVJPNZ0hTb6aH4,1067
+bs4/tests/test_html5lib.py,sha256=MYtpDf9mkYxHUNxBeRVmwqG5E0-R2b_NlpX3lOW30Zs,4907
+bs4/tests/test_htmlparser.py,sha256=22Ivw1wno80DD3j7NxZhc8rrBKSfgwBaAH4tzYIT7lM,1191
+bs4/tests/test_lxml.py,sha256=7ge3DMNPQIBibALPWvqn-hAdOBUCoQAVOHIu8rQhSFg,2379
+bs4/tests/test_soup.py,sha256=ugOafuY7DoCZFaxjMoBivnYNE_Iz0CRN2ZPkChLy6VY,20313
+bs4/tests/test_tree.py,sha256=psSal7EQIeDEoimz_DVloKKG1D_8CSAFJY-Yvov2bx0,78204
diff --git a/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/REQUESTED b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/REQUESTED
new file mode 100644
index 0000000..e69de29
diff --git a/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/WHEEL b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/WHEEL
new file mode 100644
index 0000000..a68f088
--- /dev/null
+++ b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/WHEEL
@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.30.0.a0)
+Root-Is-Purelib: true
+Tag: py3-none-any
+
diff --git a/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/metadata.json b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/metadata.json
new file mode 100644
index 0000000..830af35
--- /dev/null
+++ b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/metadata.json
@@ -0,0 +1 @@
+{"classifiers": ["Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Topic :: Text Processing :: Markup :: HTML", "Topic :: Text Processing :: Markup :: XML", "Topic :: Text Processing :: Markup :: SGML", "Topic :: Software Development :: Libraries :: Python Modules"], "download_url": "http://www.crummy.com/software/BeautifulSoup/bs4/download/", "extensions": {"python.details": {"contacts": [{"email": "leonardr@segfault.org", "name": "Leonard Richardson", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst"}, "project_urls": {"Home": "http://www.crummy.com/software/BeautifulSoup/bs4/"}}}, "extras": ["html5lib", "lxml"], "generator": "bdist_wheel (0.30.0.a0)", "license": "MIT", "metadata_version": "2.0", "name": "beautifulsoup4", "run_requires": [{"extra": "html5lib", "requires": ["html5lib"]}, {"extra": "lxml", "requires": ["lxml"]}], "summary": "Screen-scraping library", "version": "4.6.0"}
\ No newline at end of file
diff --git a/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/top_level.txt b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/top_level.txt
new file mode 100644
index 0000000..1315442
--- /dev/null
+++ b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/top_level.txt
@@ -0,0 +1 @@
+bs4
diff --git a/venv/Lib/site-packages/bs4/__init__.py b/venv/Lib/site-packages/bs4/__init__.py
new file mode 100644
index 0000000..62e9e5d
--- /dev/null
+++ b/venv/Lib/site-packages/bs4/__init__.py
@@ -0,0 +1,529 @@
+"""Beautiful Soup
+Elixir and Tonic
+"The Screen-Scraper's Friend"
+http://www.crummy.com/software/BeautifulSoup/
+
+Beautiful Soup uses a pluggable XML or HTML parser to parse a
+(possibly invalid) document into a tree representation. Beautiful Soup
+provides methods and Pythonic idioms that make it easy to navigate,
+search, and modify the parse tree.
+
+Beautiful Soup works with Python 2.7 and up. It works better if lxml
+and/or html5lib is installed.
+
+For more than you ever wanted to know about Beautiful Soup, see the
+documentation:
+http://www.crummy.com/software/BeautifulSoup/bs4/doc/
+
+"""
+
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+__author__ = "Leonard Richardson (leonardr@segfault.org)"
+__version__ = "4.6.0"
+__copyright__ = "Copyright (c) 2004-2017 Leonard Richardson"
+__license__ = "MIT"
+
+__all__ = ['BeautifulSoup']
+
+import os
+import re
+import traceback
+import warnings
+
+from .builder import builder_registry, ParserRejectedMarkup
+from .dammit import UnicodeDammit
+from .element import (
+ CData,
+ Comment,
+ DEFAULT_OUTPUT_ENCODING,
+ Declaration,
+ Doctype,
+ NavigableString,
+ PageElement,
+ ProcessingInstruction,
+ ResultSet,
+ SoupStrainer,
+ Tag,
+ )
+
+# The very first thing we do is give a useful error if someone is
+# running this code under Python 3 without converting it.
+'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
+
+class BeautifulSoup(Tag):
+ """
+ This class defines the basic interface called by the tree builders.
+
+ These methods will be called by the parser:
+ reset()
+ feed(markup)
+
+ The tree builder may call these methods from its feed() implementation:
+ handle_starttag(name, attrs) # See note about return value
+ handle_endtag(name)
+ handle_data(data) # Appends to the current data node
+ endData(containerClass=NavigableString) # Ends the current data node
+
+ No matter how complicated the underlying parser is, you should be
+ able to build a tree using 'start tag' events, 'end tag' events,
+ 'data' events, and "done with data" events.
+
+ If you encounter an empty-element tag (aka a self-closing tag,
+ like HTML's
tag), call handle_starttag and then
+ handle_endtag.
+ """
+ ROOT_TAG_NAME = '[document]'
+
+ # If the end-user gives no indication which tree builder they
+ # want, look for one with these features.
+ DEFAULT_BUILDER_FEATURES = ['html', 'fast']
+
+ ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
+
+ NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n"
+
+ def __init__(self, markup="", features=None, builder=None,
+ parse_only=None, from_encoding=None, exclude_encodings=None,
+ **kwargs):
+ """The Soup object is initialized as the 'root tag', and the
+ provided markup (which can be a string or a file-like object)
+ is fed into the underlying parser."""
+
+ if 'convertEntities' in kwargs:
+ warnings.warn(
+ "BS4 does not respect the convertEntities argument to the "
+ "BeautifulSoup constructor. Entities are always converted "
+ "to Unicode characters.")
+
+ if 'markupMassage' in kwargs:
+ del kwargs['markupMassage']
+ warnings.warn(
+ "BS4 does not respect the markupMassage argument to the "
+ "BeautifulSoup constructor. The tree builder is responsible "
+ "for any necessary markup massage.")
+
+ if 'smartQuotesTo' in kwargs:
+ del kwargs['smartQuotesTo']
+ warnings.warn(
+ "BS4 does not respect the smartQuotesTo argument to the "
+ "BeautifulSoup constructor. Smart quotes are always converted "
+ "to Unicode characters.")
+
+ if 'selfClosingTags' in kwargs:
+ del kwargs['selfClosingTags']
+ warnings.warn(
+ "BS4 does not respect the selfClosingTags argument to the "
+ "BeautifulSoup constructor. The tree builder is responsible "
+ "for understanding self-closing tags.")
+
+ if 'isHTML' in kwargs:
+ del kwargs['isHTML']
+ warnings.warn(
+ "BS4 does not respect the isHTML argument to the "
+ "BeautifulSoup constructor. Suggest you use "
+ "features='lxml' for HTML and features='lxml-xml' for "
+ "XML.")
+
+ def deprecated_argument(old_name, new_name):
+ if old_name in kwargs:
+ warnings.warn(
+ 'The "%s" argument to the BeautifulSoup constructor '
+ 'has been renamed to "%s."' % (old_name, new_name))
+ value = kwargs[old_name]
+ del kwargs[old_name]
+ return value
+ return None
+
+ parse_only = parse_only or deprecated_argument(
+ "parseOnlyThese", "parse_only")
+
+ from_encoding = from_encoding or deprecated_argument(
+ "fromEncoding", "from_encoding")
+
+ if from_encoding and isinstance(markup, str):
+ warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
+ from_encoding = None
+
+ if len(kwargs) > 0:
+ arg = list(kwargs.keys()).pop()
+ raise TypeError(
+ "__init__() got an unexpected keyword argument '%s'" % arg)
+
+ if builder is None:
+ original_features = features
+ if isinstance(features, str):
+ features = [features]
+ if features is None or len(features) == 0:
+ features = self.DEFAULT_BUILDER_FEATURES
+ builder_class = builder_registry.lookup(*features)
+ if builder_class is None:
+ raise FeatureNotFound(
+ "Couldn't find a tree builder with the features you "
+ "requested: %s. Do you need to install a parser library?"
+ % ",".join(features))
+ builder = builder_class()
+ if not (original_features == builder.NAME or
+ original_features in builder.ALTERNATE_NAMES):
+ if builder.is_xml:
+ markup_type = "XML"
+ else:
+ markup_type = "HTML"
+
+ caller = traceback.extract_stack()[0]
+ filename = caller[0]
+ line_number = caller[1]
+ warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
+ filename=filename,
+ line_number=line_number,
+ parser=builder.NAME,
+ markup_type=markup_type))
+
+ self.builder = builder
+ self.is_xml = builder.is_xml
+ self.known_xml = self.is_xml
+ self.builder.soup = self
+
+ self.parse_only = parse_only
+
+ if hasattr(markup, 'read'): # It's a file-type object.
+ markup = markup.read()
+ elif len(markup) <= 256 and (
+ (isinstance(markup, bytes) and not b'<' in markup)
+ or (isinstance(markup, str) and not '<' in markup)
+ ):
+ # Print out warnings for a couple beginner problems
+ # involving passing non-markup to Beautiful Soup.
+ # Beautiful Soup will still parse the input as markup,
+ # just in case that's what the user really wants.
+ if (isinstance(markup, str)
+ and not os.path.supports_unicode_filenames):
+ possible_filename = markup.encode("utf8")
+ else:
+ possible_filename = markup
+ is_file = False
+ try:
+ is_file = os.path.exists(possible_filename)
+ except Exception as e:
+ # This is almost certainly a problem involving
+ # characters not valid in filenames on this
+ # system. Just let it go.
+ pass
+ if is_file:
+ if isinstance(markup, str):
+ markup = markup.encode("utf8")
+ warnings.warn(
+ '"%s" looks like a filename, not markup. You should'
+ ' probably open this file and pass the filehandle into'
+ ' Beautiful Soup.' % markup)
+ self._check_markup_is_url(markup)
+
+ for (self.markup, self.original_encoding, self.declared_html_encoding,
+ self.contains_replacement_characters) in (
+ self.builder.prepare_markup(
+ markup, from_encoding, exclude_encodings=exclude_encodings)):
+ self.reset()
+ try:
+ self._feed()
+ break
+ except ParserRejectedMarkup:
+ pass
+
+ # Clear out the markup and remove the builder's circular
+ # reference to this object.
+ self.markup = None
+ self.builder.soup = None
+
+ def __copy__(self):
+ copy = type(self)(
+ self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
+ )
+
+ # Although we encoded the tree to UTF-8, that may not have
+ # been the encoding of the original markup. Set the copy's
+ # .original_encoding to reflect the original object's
+ # .original_encoding.
+ copy.original_encoding = self.original_encoding
+ return copy
+
+ def __getstate__(self):
+ # Frequently a tree builder can't be pickled.
+ d = dict(self.__dict__)
+ if 'builder' in d and not self.builder.picklable:
+ d['builder'] = None
+ return d
+
+ @staticmethod
+ def _check_markup_is_url(markup):
+ """
+ Check if markup looks like it's actually a url and raise a warning
+ if so. Markup can be unicode or str (py2) / bytes (py3).
+ """
+ if isinstance(markup, bytes):
+ space = b' '
+ cant_start_with = (b"http:", b"https:")
+ elif isinstance(markup, str):
+ space = ' '
+ cant_start_with = ("http:", "https:")
+ else:
+ return
+
+ if any(markup.startswith(prefix) for prefix in cant_start_with):
+ if not space in markup:
+ if isinstance(markup, bytes):
+ decoded_markup = markup.decode('utf-8', 'replace')
+ else:
+ decoded_markup = markup
+ warnings.warn(
+ '"%s" looks like a URL. Beautiful Soup is not an'
+ ' HTTP client. You should probably use an HTTP client like'
+ ' requests to get the document behind the URL, and feed'
+ ' that document to Beautiful Soup.' % decoded_markup
+ )
+
+ def _feed(self):
+ # Convert the document to Unicode.
+ self.builder.reset()
+
+ self.builder.feed(self.markup)
+ # Close out any unfinished strings and close all the open tags.
+ self.endData()
+ while self.currentTag.name != self.ROOT_TAG_NAME:
+ self.popTag()
+
+ def reset(self):
+ Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
+ self.hidden = 1
+ self.builder.reset()
+ self.current_data = []
+ self.currentTag = None
+ self.tagStack = []
+ self.preserve_whitespace_tag_stack = []
+ self.pushTag(self)
+
+ def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
+ """Create a new tag associated with this soup."""
+ return Tag(None, self.builder, name, namespace, nsprefix, attrs)
+
+ def new_string(self, s, subclass=NavigableString):
+ """Create a new NavigableString associated with this soup."""
+ return subclass(s)
+
+ def insert_before(self, successor):
+ raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
+
+ def insert_after(self, successor):
+ raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
+
+ def popTag(self):
+ tag = self.tagStack.pop()
+ if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:
+ self.preserve_whitespace_tag_stack.pop()
+ #print "Pop", tag.name
+ if self.tagStack:
+ self.currentTag = self.tagStack[-1]
+ return self.currentTag
+
+ def pushTag(self, tag):
+ #print "Push", tag.name
+ if self.currentTag:
+ self.currentTag.contents.append(tag)
+ self.tagStack.append(tag)
+ self.currentTag = self.tagStack[-1]
+ if tag.name in self.builder.preserve_whitespace_tags:
+ self.preserve_whitespace_tag_stack.append(tag)
+
+ def endData(self, containerClass=NavigableString):
+ if self.current_data:
+ current_data = ''.join(self.current_data)
+ # If whitespace is not preserved, and this string contains
+ # nothing but ASCII spaces, replace it with a single space
+ # or newline.
+ if not self.preserve_whitespace_tag_stack:
+ strippable = True
+ for i in current_data:
+ if i not in self.ASCII_SPACES:
+ strippable = False
+ break
+ if strippable:
+ if '\n' in current_data:
+ current_data = '\n'
+ else:
+ current_data = ' '
+
+ # Reset the data collector.
+ self.current_data = []
+
+ # Should we add this string to the tree at all?
+ if self.parse_only and len(self.tagStack) <= 1 and \
+ (not self.parse_only.text or \
+ not self.parse_only.search(current_data)):
+ return
+
+ o = containerClass(current_data)
+ self.object_was_parsed(o)
+
+ def object_was_parsed(self, o, parent=None, most_recent_element=None):
+ """Add an object to the parse tree."""
+ parent = parent or self.currentTag
+ previous_element = most_recent_element or self._most_recent_element
+
+ next_element = previous_sibling = next_sibling = None
+ if isinstance(o, Tag):
+ next_element = o.next_element
+ next_sibling = o.next_sibling
+ previous_sibling = o.previous_sibling
+ if not previous_element:
+ previous_element = o.previous_element
+
+ o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)
+
+ self._most_recent_element = o
+ parent.contents.append(o)
+
+ if parent.next_sibling:
+ # This node is being inserted into an element that has
+ # already been parsed. Deal with any dangling references.
+ index = len(parent.contents)-1
+ while index >= 0:
+ if parent.contents[index] is o:
+ break
+ index -= 1
+ else:
+ raise ValueError(
+ "Error building tree: supposedly %r was inserted "
+ "into %r after the fact, but I don't see it!" % (
+ o, parent
+ )
+ )
+ if index == 0:
+ previous_element = parent
+ previous_sibling = None
+ else:
+ previous_element = previous_sibling = parent.contents[index-1]
+ if index == len(parent.contents)-1:
+ next_element = parent.next_sibling
+ next_sibling = None
+ else:
+ next_element = next_sibling = parent.contents[index+1]
+
+ o.previous_element = previous_element
+ if previous_element:
+ previous_element.next_element = o
+ o.next_element = next_element
+ if next_element:
+ next_element.previous_element = o
+ o.next_sibling = next_sibling
+ if next_sibling:
+ next_sibling.previous_sibling = o
+ o.previous_sibling = previous_sibling
+ if previous_sibling:
+ previous_sibling.next_sibling = o
+
+ def _popToTag(self, name, nsprefix=None, inclusivePop=True):
+ """Pops the tag stack up to and including the most recent
+ instance of the given tag. If inclusivePop is false, pops the tag
+ stack up to but *not* including the most recent instqance of
+ the given tag."""
+ #print "Popping to %s" % name
+ if name == self.ROOT_TAG_NAME:
+ # The BeautifulSoup object itself can never be popped.
+ return
+
+ most_recently_popped = None
+
+ stack_size = len(self.tagStack)
+ for i in range(stack_size - 1, 0, -1):
+ t = self.tagStack[i]
+ if (name == t.name and nsprefix == t.prefix):
+ if inclusivePop:
+ most_recently_popped = self.popTag()
+ break
+ most_recently_popped = self.popTag()
+
+ return most_recently_popped
+
+ def handle_starttag(self, name, namespace, nsprefix, attrs):
+ """Push a start tag on to the stack.
+
+ If this method returns None, the tag was rejected by the
+ SoupStrainer. You should proceed as if the tag had not occurred
+ in the document. For instance, if this was a self-closing tag,
+ don't call handle_endtag.
+ """
+
+ # print "Start tag %s: %s" % (name, attrs)
+ self.endData()
+
+ if (self.parse_only and len(self.tagStack) <= 1
+ and (self.parse_only.text
+ or not self.parse_only.search_tag(name, attrs))):
+ return None
+
+ tag = Tag(self, self.builder, name, namespace, nsprefix, attrs,
+ self.currentTag, self._most_recent_element)
+ if tag is None:
+ return tag
+ if self._most_recent_element:
+ self._most_recent_element.next_element = tag
+ self._most_recent_element = tag
+ self.pushTag(tag)
+ return tag
+
+ def handle_endtag(self, name, nsprefix=None):
+ #print "End tag: " + name
+ self.endData()
+ self._popToTag(name, nsprefix)
+
+ def handle_data(self, data):
+ self.current_data.append(data)
+
+ def decode(self, pretty_print=False,
+ eventual_encoding=DEFAULT_OUTPUT_ENCODING,
+ formatter="minimal"):
+ """Returns a string or Unicode representation of this document.
+ To get Unicode, pass None for encoding."""
+
+ if self.is_xml:
+ # Print the XML declaration
+ encoding_part = ''
+ if eventual_encoding != None:
+ encoding_part = ' encoding="%s"' % eventual_encoding
+ prefix = '\n' % encoding_part
+ else:
+ prefix = ''
+ if not pretty_print:
+ indent_level = None
+ else:
+ indent_level = 0
+ return prefix + super(BeautifulSoup, self).decode(
+ indent_level, eventual_encoding, formatter)
+
+# Alias to make it easier to type import: 'from bs4 import _soup'
+_s = BeautifulSoup
+_soup = BeautifulSoup
+
+class BeautifulStoneSoup(BeautifulSoup):
+ """Deprecated interface to an XML parser."""
+
+ def __init__(self, *args, **kwargs):
+ kwargs['features'] = 'xml'
+ warnings.warn(
+ 'The BeautifulStoneSoup class is deprecated. Instead of using '
+ 'it, pass features="xml" into the BeautifulSoup constructor.')
+ super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
+
+
+class StopParsing(Exception):
+ pass
+
+class FeatureNotFound(ValueError):
+ pass
+
+
+#By default, act as an HTML pretty-printer.
+if __name__ == '__main__':
+ import sys
+ soup = BeautifulSoup(sys.stdin)
+ print(soup.prettify())
diff --git a/venv/Lib/site-packages/bs4/__pycache__/__init__.cpython-38.pyc b/venv/Lib/site-packages/bs4/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000..c306482
Binary files /dev/null and b/venv/Lib/site-packages/bs4/__pycache__/__init__.cpython-38.pyc differ
diff --git a/venv/Lib/site-packages/bs4/__pycache__/dammit.cpython-38.pyc b/venv/Lib/site-packages/bs4/__pycache__/dammit.cpython-38.pyc
new file mode 100644
index 0000000..d5ede25
Binary files /dev/null and b/venv/Lib/site-packages/bs4/__pycache__/dammit.cpython-38.pyc differ
diff --git a/venv/Lib/site-packages/bs4/__pycache__/diagnose.cpython-38.pyc b/venv/Lib/site-packages/bs4/__pycache__/diagnose.cpython-38.pyc
new file mode 100644
index 0000000..f64aba2
Binary files /dev/null and b/venv/Lib/site-packages/bs4/__pycache__/diagnose.cpython-38.pyc differ
diff --git a/venv/Lib/site-packages/bs4/__pycache__/element.cpython-38.pyc b/venv/Lib/site-packages/bs4/__pycache__/element.cpython-38.pyc
new file mode 100644
index 0000000..fb535e2
Binary files /dev/null and b/venv/Lib/site-packages/bs4/__pycache__/element.cpython-38.pyc differ
diff --git a/venv/Lib/site-packages/bs4/__pycache__/testing.cpython-38.pyc b/venv/Lib/site-packages/bs4/__pycache__/testing.cpython-38.pyc
new file mode 100644
index 0000000..8baa71d
Binary files /dev/null and b/venv/Lib/site-packages/bs4/__pycache__/testing.cpython-38.pyc differ
diff --git a/venv/Lib/site-packages/bs4/builder/__init__.py b/venv/Lib/site-packages/bs4/builder/__init__.py
new file mode 100644
index 0000000..4d6f96d
--- /dev/null
+++ b/venv/Lib/site-packages/bs4/builder/__init__.py
@@ -0,0 +1,333 @@
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+from collections import defaultdict
+import itertools
+import sys
+from bs4.element import (
+ CharsetMetaAttributeValue,
+ ContentMetaAttributeValue,
+ HTMLAwareEntitySubstitution,
+ whitespace_re
+ )
+
+__all__ = [
+ 'HTMLTreeBuilder',
+ 'SAXTreeBuilder',
+ 'TreeBuilder',
+ 'TreeBuilderRegistry',
+ ]
+
+# Some useful features for a TreeBuilder to have.
+FAST = 'fast'
+PERMISSIVE = 'permissive'
+STRICT = 'strict'
+XML = 'xml'
+HTML = 'html'
+HTML_5 = 'html5'
+
+
+class TreeBuilderRegistry(object):
+
+ def __init__(self):
+ self.builders_for_feature = defaultdict(list)
+ self.builders = []
+
+ def register(self, treebuilder_class):
+ """Register a treebuilder based on its advertised features."""
+ for feature in treebuilder_class.features:
+ self.builders_for_feature[feature].insert(0, treebuilder_class)
+ self.builders.insert(0, treebuilder_class)
+
+ def lookup(self, *features):
+ if len(self.builders) == 0:
+ # There are no builders at all.
+ return None
+
+ if len(features) == 0:
+ # They didn't ask for any features. Give them the most
+ # recently registered builder.
+ return self.builders[0]
+
+ # Go down the list of features in order, and eliminate any builders
+ # that don't match every feature.
+ features = list(features)
+ features.reverse()
+ candidates = None
+ candidate_set = None
+ while len(features) > 0:
+ feature = features.pop()
+ we_have_the_feature = self.builders_for_feature.get(feature, [])
+ if len(we_have_the_feature) > 0:
+ if candidates is None:
+ candidates = we_have_the_feature
+ candidate_set = set(candidates)
+ else:
+ # Eliminate any candidates that don't have this feature.
+ candidate_set = candidate_set.intersection(
+ set(we_have_the_feature))
+
+ # The only valid candidates are the ones in candidate_set.
+ # Go through the original list of candidates and pick the first one
+ # that's in candidate_set.
+ if candidate_set is None:
+ return None
+ for candidate in candidates:
+ if candidate in candidate_set:
+ return candidate
+ return None
+
+# The BeautifulSoup class will take feature lists from developers and use them
+# to look up builders in this registry.
+builder_registry = TreeBuilderRegistry()
+
+class TreeBuilder(object):
+ """Turn a document into a Beautiful Soup object tree."""
+
+ NAME = "[Unknown tree builder]"
+ ALTERNATE_NAMES = []
+ features = []
+
+ is_xml = False
+ picklable = False
+ preserve_whitespace_tags = set()
+ empty_element_tags = None # A tag will be considered an empty-element
+ # tag when and only when it has no contents.
+
+ # A value for these tag/attribute combinations is a space- or
+ # comma-separated list of CDATA, rather than a single CDATA.
+ cdata_list_attributes = {}
+
+
+ def __init__(self):
+ self.soup = None
+
+ def reset(self):
+ pass
+
+ def can_be_empty_element(self, tag_name):
+ """Might a tag with this name be an empty-element tag?
+
+ The final markup may or may not actually present this tag as
+ self-closing.
+
+ For instance: an HTMLBuilder does not consider a tag to be
+ an empty-element tag (it's not in
+ HTMLBuilder.empty_element_tags). This means an empty
tag
+ will be presented as "
", not "".
+
+ The default implementation has no opinion about which tags are
+ empty-element tags, so a tag will be presented as an
+ empty-element tag if and only if it has no contents.
+ "" will become "", and "bar" will
+ be left alone.
+ """
+ if self.empty_element_tags is None:
+ return True
+ return tag_name in self.empty_element_tags
+
+ def feed(self, markup):
+ raise NotImplementedError()
+
+ def prepare_markup(self, markup, user_specified_encoding=None,
+ document_declared_encoding=None):
+ return markup, None, None, False
+
+ def test_fragment_to_document(self, fragment):
+ """Wrap an HTML fragment to make it look like a document.
+
+ Different parsers do this differently. For instance, lxml
+ introduces an empty tag, and html5lib
+ doesn't. Abstracting this away lets us write simple tests
+ which run HTML fragments through the parser and compare the
+ results against other HTML fragments.
+
+ This method should not be used outside of tests.
+ """
+ return fragment
+
+ def set_up_substitutions(self, tag):
+ return False
+
+ def _replace_cdata_list_attribute_values(self, tag_name, attrs):
+ """Replaces class="foo bar" with class=["foo", "bar"]
+
+ Modifies its input in place.
+ """
+ if not attrs:
+ return attrs
+ if self.cdata_list_attributes:
+ universal = self.cdata_list_attributes.get('*', [])
+ tag_specific = self.cdata_list_attributes.get(
+ tag_name.lower(), None)
+ for attr in list(attrs.keys()):
+ if attr in universal or (tag_specific and attr in tag_specific):
+ # We have a "class"-type attribute whose string
+ # value is a whitespace-separated list of
+ # values. Split it into a list.
+ value = attrs[attr]
+ if isinstance(value, str):
+ values = whitespace_re.split(value)
+ else:
+ # html5lib sometimes calls setAttributes twice
+ # for the same tag when rearranging the parse
+ # tree. On the second call the attribute value
+ # here is already a list. If this happens,
+ # leave the value alone rather than trying to
+ # split it again.
+ values = value
+ attrs[attr] = values
+ return attrs
+
+class SAXTreeBuilder(TreeBuilder):
+ """A Beautiful Soup treebuilder that listens for SAX events."""
+
+ def feed(self, markup):
+ raise NotImplementedError()
+
+ def close(self):
+ pass
+
+ def startElement(self, name, attrs):
+ attrs = dict((key[1], value) for key, value in list(attrs.items()))
+ #print "Start %s, %r" % (name, attrs)
+ self.soup.handle_starttag(name, attrs)
+
+ def endElement(self, name):
+ #print "End %s" % name
+ self.soup.handle_endtag(name)
+
+ def startElementNS(self, nsTuple, nodeName, attrs):
+ # Throw away (ns, nodeName) for now.
+ self.startElement(nodeName, attrs)
+
+ def endElementNS(self, nsTuple, nodeName):
+ # Throw away (ns, nodeName) for now.
+ self.endElement(nodeName)
+ #handler.endElementNS((ns, node.nodeName), node.nodeName)
+
+ def startPrefixMapping(self, prefix, nodeValue):
+ # Ignore the prefix for now.
+ pass
+
+ def endPrefixMapping(self, prefix):
+ # Ignore the prefix for now.
+ # handler.endPrefixMapping(prefix)
+ pass
+
+ def characters(self, content):
+ self.soup.handle_data(content)
+
+ def startDocument(self):
+ pass
+
+ def endDocument(self):
+ pass
+
+
+class HTMLTreeBuilder(TreeBuilder):
+ """This TreeBuilder knows facts about HTML.
+
+ Such as which tags are empty-element tags.
+ """
+
+ preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
+ empty_element_tags = set([
+ # These are from HTML5.
+ 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
+
+ # These are from HTML4, removed in HTML5.
+ 'spacer', 'frame'
+ ])
+
+ # The HTML standard defines these attributes as containing a
+ # space-separated list of values, not a single value. That is,
+ # class="foo bar" means that the 'class' attribute has two values,
+ # 'foo' and 'bar', not the single value 'foo bar'. When we
+ # encounter one of these attributes, we will parse its value into
+ # a list of values if possible. Upon output, the list will be
+ # converted back into a string.
+ cdata_list_attributes = {
+ "*" : ['class', 'accesskey', 'dropzone'],
+ "a" : ['rel', 'rev'],
+ "link" : ['rel', 'rev'],
+ "td" : ["headers"],
+ "th" : ["headers"],
+ "td" : ["headers"],
+ "form" : ["accept-charset"],
+ "object" : ["archive"],
+
+ # These are HTML5 specific, as are *.accesskey and *.dropzone above.
+ "area" : ["rel"],
+ "icon" : ["sizes"],
+ "iframe" : ["sandbox"],
+ "output" : ["for"],
+ }
+
+ def set_up_substitutions(self, tag):
+ # We are only interested in tags
+ if tag.name != 'meta':
+ return False
+
+ http_equiv = tag.get('http-equiv')
+ content = tag.get('content')
+ charset = tag.get('charset')
+
+ # We are interested in tags that say what encoding the
+ # document was originally in. This means HTML 5-style
+ # tags that provide the "charset" attribute. It also means
+ # HTML 4-style tags that provide the "content"
+ # attribute and have "http-equiv" set to "content-type".
+ #
+ # In both cases we will replace the value of the appropriate
+ # attribute with a standin object that can take on any
+ # encoding.
+ meta_encoding = None
+ if charset is not None:
+ # HTML 5 style:
+ #
+ meta_encoding = charset
+ tag['charset'] = CharsetMetaAttributeValue(charset)
+
+ elif (content is not None and http_equiv is not None
+ and http_equiv.lower() == 'content-type'):
+ # HTML 4 style:
+ #
+ tag['content'] = ContentMetaAttributeValue(content)
+
+ return (meta_encoding is not None)
+
+def register_treebuilders_from(module):
+ """Copy TreeBuilders from the given module into this module."""
+ # I'm fairly sure this is not the best way to do this.
+ this_module = sys.modules['bs4.builder']
+ for name in module.__all__:
+ obj = getattr(module, name)
+
+ if issubclass(obj, TreeBuilder):
+ setattr(this_module, name, obj)
+ this_module.__all__.append(name)
+ # Register the builder while we're at it.
+ this_module.builder_registry.register(obj)
+
+class ParserRejectedMarkup(Exception):
+ pass
+
+# Builders are registered in reverse order of priority, so that custom
+# builder registrations will take precedence. In general, we want lxml
+# to take precedence over html5lib, because it's faster. And we only
+# want to use HTMLParser as a last result.
+from . import _htmlparser
+register_treebuilders_from(_htmlparser)
+try:
+ from . import _html5lib
+ register_treebuilders_from(_html5lib)
+except ImportError:
+ # They don't have html5lib installed.
+ pass
+try:
+ from . import _lxml
+ register_treebuilders_from(_lxml)
+except ImportError:
+ # They don't have lxml installed.
+ pass
diff --git a/venv/Lib/site-packages/bs4/builder/__pycache__/__init__.cpython-38.pyc b/venv/Lib/site-packages/bs4/builder/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000..50c18ab
Binary files /dev/null and b/venv/Lib/site-packages/bs4/builder/__pycache__/__init__.cpython-38.pyc differ
diff --git a/venv/Lib/site-packages/bs4/builder/__pycache__/_html5lib.cpython-38.pyc b/venv/Lib/site-packages/bs4/builder/__pycache__/_html5lib.cpython-38.pyc
new file mode 100644
index 0000000..1e795ce
Binary files /dev/null and b/venv/Lib/site-packages/bs4/builder/__pycache__/_html5lib.cpython-38.pyc differ
diff --git a/venv/Lib/site-packages/bs4/builder/__pycache__/_htmlparser.cpython-38.pyc b/venv/Lib/site-packages/bs4/builder/__pycache__/_htmlparser.cpython-38.pyc
new file mode 100644
index 0000000..4a30866
Binary files /dev/null and b/venv/Lib/site-packages/bs4/builder/__pycache__/_htmlparser.cpython-38.pyc differ
diff --git a/venv/Lib/site-packages/bs4/builder/__pycache__/_lxml.cpython-38.pyc b/venv/Lib/site-packages/bs4/builder/__pycache__/_lxml.cpython-38.pyc
new file mode 100644
index 0000000..be81b2d
Binary files /dev/null and b/venv/Lib/site-packages/bs4/builder/__pycache__/_lxml.cpython-38.pyc differ
diff --git a/venv/Lib/site-packages/bs4/builder/_html5lib.py b/venv/Lib/site-packages/bs4/builder/_html5lib.py
new file mode 100644
index 0000000..d9d468f
--- /dev/null
+++ b/venv/Lib/site-packages/bs4/builder/_html5lib.py
@@ -0,0 +1,426 @@
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+__all__ = [
+ 'HTML5TreeBuilder',
+ ]
+
+import warnings
+import re
+from bs4.builder import (
+ PERMISSIVE,
+ HTML,
+ HTML_5,
+ HTMLTreeBuilder,
+ )
+from bs4.element import (
+ NamespacedAttribute,
+ whitespace_re,
+)
+import html5lib
+from html5lib.constants import (
+ namespaces,
+ prefixes,
+ )
+from bs4.element import (
+ Comment,
+ Doctype,
+ NavigableString,
+ Tag,
+ )
+
+try:
+ # Pre-0.99999999
+ from html5lib.treebuilders import _base as treebuilder_base
+ new_html5lib = False
+except ImportError as e:
+ # 0.99999999 and up
+ from html5lib.treebuilders import base as treebuilder_base
+ new_html5lib = True
+
+class HTML5TreeBuilder(HTMLTreeBuilder):
+ """Use html5lib to build a tree."""
+
+ NAME = "html5lib"
+
+ features = [NAME, PERMISSIVE, HTML_5, HTML]
+
+ def prepare_markup(self, markup, user_specified_encoding,
+ document_declared_encoding=None, exclude_encodings=None):
+ # Store the user-specified encoding for use later on.
+ self.user_specified_encoding = user_specified_encoding
+
+ # document_declared_encoding and exclude_encodings aren't used
+ # ATM because the html5lib TreeBuilder doesn't use
+ # UnicodeDammit.
+ if exclude_encodings:
+ warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
+ yield (markup, None, None, False)
+
+ # These methods are defined by Beautiful Soup.
+ def feed(self, markup):
+ if self.soup.parse_only is not None:
+ warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
+ parser = html5lib.HTMLParser(tree=self.create_treebuilder)
+
+ extra_kwargs = dict()
+ if not isinstance(markup, str):
+ if new_html5lib:
+ extra_kwargs['override_encoding'] = self.user_specified_encoding
+ else:
+ extra_kwargs['encoding'] = self.user_specified_encoding
+ doc = parser.parse(markup, **extra_kwargs)
+
+ # Set the character encoding detected by the tokenizer.
+ if isinstance(markup, str):
+ # We need to special-case this because html5lib sets
+ # charEncoding to UTF-8 if it gets Unicode input.
+ doc.original_encoding = None
+ else:
+ original_encoding = parser.tokenizer.stream.charEncoding[0]
+ if not isinstance(original_encoding, str):
+ # In 0.99999999 and up, the encoding is an html5lib
+ # Encoding object. We want to use a string for compatibility
+ # with other tree builders.
+ original_encoding = original_encoding.name
+ doc.original_encoding = original_encoding
+
+ def create_treebuilder(self, namespaceHTMLElements):
+ self.underlying_builder = TreeBuilderForHtml5lib(
+ namespaceHTMLElements, self.soup)
+ return self.underlying_builder
+
+ def test_fragment_to_document(self, fragment):
+ """See `TreeBuilder`."""
+ return '%s' % fragment
+
+
+class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
+
+ def __init__(self, namespaceHTMLElements, soup=None):
+ if soup:
+ self.soup = soup
+ else:
+ from bs4 import BeautifulSoup
+ self.soup = BeautifulSoup("", "html.parser")
+ super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
+
+ def documentClass(self):
+ self.soup.reset()
+ return Element(self.soup, self.soup, None)
+
+ def insertDoctype(self, token):
+ name = token["name"]
+ publicId = token["publicId"]
+ systemId = token["systemId"]
+
+ doctype = Doctype.for_name_and_ids(name, publicId, systemId)
+ self.soup.object_was_parsed(doctype)
+
+ def elementClass(self, name, namespace):
+ tag = self.soup.new_tag(name, namespace)
+ return Element(tag, self.soup, namespace)
+
+ def commentClass(self, data):
+ return TextNode(Comment(data), self.soup)
+
+ def fragmentClass(self):
+ from bs4 import BeautifulSoup
+ self.soup = BeautifulSoup("", "html.parser")
+ self.soup.name = "[document_fragment]"
+ return Element(self.soup, self.soup, None)
+
+ def appendChild(self, node):
+ # XXX This code is not covered by the BS4 tests.
+ self.soup.append(node.element)
+
+ def getDocument(self):
+ return self.soup
+
+ def getFragment(self):
+ return treebuilder_base.TreeBuilder.getFragment(self).element
+
+ def testSerializer(self, element):
+ from bs4 import BeautifulSoup
+ rv = []
+ doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')
+
+ def serializeElement(element, indent=0):
+ if isinstance(element, BeautifulSoup):
+ pass
+ if isinstance(element, Doctype):
+ m = doctype_re.match(element)
+ if m:
+ name = m.group(1)
+ if m.lastindex > 1:
+ publicId = m.group(2) or ""
+ systemId = m.group(3) or m.group(4) or ""
+ rv.append("""|%s""" %
+ (' ' * indent, name, publicId, systemId))
+ else:
+ rv.append("|%s" % (' ' * indent, name))
+ else:
+ rv.append("|%s" % (' ' * indent,))
+ elif isinstance(element, Comment):
+ rv.append("|%s" % (' ' * indent, element))
+ elif isinstance(element, NavigableString):
+ rv.append("|%s\"%s\"" % (' ' * indent, element))
+ else:
+ if element.namespace:
+ name = "%s %s" % (prefixes[element.namespace],
+ element.name)
+ else:
+ name = element.name
+ rv.append("|%s<%s>" % (' ' * indent, name))
+ if element.attrs:
+ attributes = []
+ for name, value in list(element.attrs.items()):
+ if isinstance(name, NamespacedAttribute):
+ name = "%s %s" % (prefixes[name.namespace], name.name)
+ if isinstance(value, list):
+ value = " ".join(value)
+ attributes.append((name, value))
+
+ for name, value in sorted(attributes):
+ rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
+ indent += 2
+ for child in element.children:
+ serializeElement(child, indent)
+ serializeElement(element, 0)
+
+ return "\n".join(rv)
+
+class AttrList(object):
+ def __init__(self, element):
+ self.element = element
+ self.attrs = dict(self.element.attrs)
+ def __iter__(self):
+ return list(self.attrs.items()).__iter__()
+ def __setitem__(self, name, value):
+ # If this attribute is a multi-valued attribute for this element,
+ # turn its value into a list.
+ list_attr = HTML5TreeBuilder.cdata_list_attributes
+ if (name in list_attr['*']
+ or (self.element.name in list_attr
+ and name in list_attr[self.element.name])):
+ # A node that is being cloned may have already undergone
+ # this procedure.
+ if not isinstance(value, list):
+ value = whitespace_re.split(value)
+ self.element[name] = value
+ def items(self):
+ return list(self.attrs.items())
+ def keys(self):
+ return list(self.attrs.keys())
+ def __len__(self):
+ return len(self.attrs)
+ def __getitem__(self, name):
+ return self.attrs[name]
+ def __contains__(self, name):
+ return name in list(self.attrs.keys())
+
+
+class Element(treebuilder_base.Node):
+ def __init__(self, element, soup, namespace):
+ treebuilder_base.Node.__init__(self, element.name)
+ self.element = element
+ self.soup = soup
+ self.namespace = namespace
+
+ def appendChild(self, node):
+ string_child = child = None
+ if isinstance(node, str):
+ # Some other piece of code decided to pass in a string
+ # instead of creating a TextElement object to contain the
+ # string.
+ string_child = child = node
+ elif isinstance(node, Tag):
+ # Some other piece of code decided to pass in a Tag
+ # instead of creating an Element object to contain the
+ # Tag.
+ child = node
+ elif node.element.__class__ == NavigableString:
+ string_child = child = node.element
+ node.parent = self
+ else:
+ child = node.element
+ node.parent = self
+
+ if not isinstance(child, str) and child.parent is not None:
+ node.element.extract()
+
+ if (string_child and self.element.contents
+ and self.element.contents[-1].__class__ == NavigableString):
+ # We are appending a string onto another string.
+ # TODO This has O(n^2) performance, for input like
+ # "aaa..."
+ old_element = self.element.contents[-1]
+ new_element = self.soup.new_string(old_element + string_child)
+ old_element.replace_with(new_element)
+ self.soup._most_recent_element = new_element
+ else:
+ if isinstance(node, str):
+ # Create a brand new NavigableString from this string.
+ child = self.soup.new_string(node)
+
+ # Tell Beautiful Soup to act as if it parsed this element
+ # immediately after the parent's last descendant. (Or
+ # immediately after the parent, if it has no children.)
+ if self.element.contents:
+ most_recent_element = self.element._last_descendant(False)
+ elif self.element.next_element is not None:
+ # Something from further ahead in the parse tree is
+ # being inserted into this earlier element. This is
+ # very annoying because it means an expensive search
+ # for the last element in the tree.
+ most_recent_element = self.soup._last_descendant()
+ else:
+ most_recent_element = self.element
+
+ self.soup.object_was_parsed(
+ child, parent=self.element,
+ most_recent_element=most_recent_element)
+
+ def getAttributes(self):
+ if isinstance(self.element, Comment):
+ return {}
+ return AttrList(self.element)
+
+ def setAttributes(self, attributes):
+
+ if attributes is not None and len(attributes) > 0:
+
+ converted_attributes = []
+ for name, value in list(attributes.items()):
+ if isinstance(name, tuple):
+ new_name = NamespacedAttribute(*name)
+ del attributes[name]
+ attributes[new_name] = value
+
+ self.soup.builder._replace_cdata_list_attribute_values(
+ self.name, attributes)
+ for name, value in list(attributes.items()):
+ self.element[name] = value
+
+ # The attributes may contain variables that need substitution.
+ # Call set_up_substitutions manually.
+ #
+ # The Tag constructor called this method when the Tag was created,
+ # but we just set/changed the attributes, so call it again.
+ self.soup.builder.set_up_substitutions(self.element)
+ attributes = property(getAttributes, setAttributes)
+
+ def insertText(self, data, insertBefore=None):
+ text = TextNode(self.soup.new_string(data), self.soup)
+ if insertBefore:
+ self.insertBefore(text, insertBefore)
+ else:
+ self.appendChild(text)
+
+ def insertBefore(self, node, refNode):
+ index = self.element.index(refNode.element)
+ if (node.element.__class__ == NavigableString and self.element.contents
+ and self.element.contents[index-1].__class__ == NavigableString):
+ # (See comments in appendChild)
+ old_node = self.element.contents[index-1]
+ new_str = self.soup.new_string(old_node + node.element)
+ old_node.replace_with(new_str)
+ else:
+ self.element.insert(index, node.element)
+ node.parent = self
+
+ def removeChild(self, node):
+ node.element.extract()
+
+ def reparentChildren(self, new_parent):
+ """Move all of this tag's children into another tag."""
+ # print "MOVE", self.element.contents
+ # print "FROM", self.element
+ # print "TO", new_parent.element
+
+ element = self.element
+ new_parent_element = new_parent.element
+ # Determine what this tag's next_element will be once all the children
+ # are removed.
+ final_next_element = element.next_sibling
+
+ new_parents_last_descendant = new_parent_element._last_descendant(False, False)
+ if len(new_parent_element.contents) > 0:
+ # The new parent already contains children. We will be
+ # appending this tag's children to the end.
+ new_parents_last_child = new_parent_element.contents[-1]
+ new_parents_last_descendant_next_element = new_parents_last_descendant.next_element
+ else:
+ # The new parent contains no children.
+ new_parents_last_child = None
+ new_parents_last_descendant_next_element = new_parent_element.next_element
+
+ to_append = element.contents
+ if len(to_append) > 0:
+ # Set the first child's previous_element and previous_sibling
+ # to elements within the new parent
+ first_child = to_append[0]
+ if new_parents_last_descendant:
+ first_child.previous_element = new_parents_last_descendant
+ else:
+ first_child.previous_element = new_parent_element
+ first_child.previous_sibling = new_parents_last_child
+ if new_parents_last_descendant:
+ new_parents_last_descendant.next_element = first_child
+ else:
+ new_parent_element.next_element = first_child
+ if new_parents_last_child:
+ new_parents_last_child.next_sibling = first_child
+
+ # Find the very last element being moved. It is now the
+ # parent's last descendant. It has no .next_sibling and
+ # its .next_element is whatever the previous last
+ # descendant had.
+ last_childs_last_descendant = to_append[-1]._last_descendant(False, True)
+
+ last_childs_last_descendant.next_element = new_parents_last_descendant_next_element
+ if new_parents_last_descendant_next_element:
+ # TODO: This code has no test coverage and I'm not sure
+ # how to get html5lib to go through this path, but it's
+ # just the other side of the previous line.
+ new_parents_last_descendant_next_element.previous_element = last_childs_last_descendant
+ last_childs_last_descendant.next_sibling = None
+
+ for child in to_append:
+ child.parent = new_parent_element
+ new_parent_element.contents.append(child)
+
+ # Now that this element has no children, change its .next_element.
+ element.contents = []
+ element.next_element = final_next_element
+
+ # print "DONE WITH MOVE"
+ # print "FROM", self.element
+ # print "TO", new_parent_element
+
+ def cloneNode(self):
+ tag = self.soup.new_tag(self.element.name, self.namespace)
+ node = Element(tag, self.soup, self.namespace)
+ for key,value in self.attributes:
+ node.attributes[key] = value
+ return node
+
+ def hasContent(self):
+ return self.element.contents
+
+ def getNameTuple(self):
+ if self.namespace == None:
+ return namespaces["html"], self.name
+ else:
+ return self.namespace, self.name
+
+ nameTuple = property(getNameTuple)
+
+class TextNode(Element):
+ def __init__(self, element, soup):
+ treebuilder_base.Node.__init__(self, None)
+ self.element = element
+ self.soup = soup
+
+ def cloneNode(self):
+ raise NotImplementedError
diff --git a/venv/Lib/site-packages/bs4/builder/_htmlparser.py b/venv/Lib/site-packages/bs4/builder/_htmlparser.py
new file mode 100644
index 0000000..907d355
--- /dev/null
+++ b/venv/Lib/site-packages/bs4/builder/_htmlparser.py
@@ -0,0 +1,314 @@
+"""Use the HTMLParser library to parse HTML files that aren't too bad."""
+
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+__all__ = [
+ 'HTMLParserTreeBuilder',
+ ]
+
+from html.parser import HTMLParser
+
+try:
+ from html.parser import HTMLParseError
+except ImportError as e:
+ # HTMLParseError is removed in Python 3.5. Since it can never be
+ # thrown in 3.5, we can just define our own class as a placeholder.
+ class HTMLParseError(Exception):
+ pass
+
+import sys
+import warnings
+
+# Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
+# argument, which we'd like to set to False. Unfortunately,
+# http://bugs.python.org/issue13273 makes strict=True a better bet
+# before Python 3.2.3.
+#
+# At the end of this file, we monkeypatch HTMLParser so that
+# strict=True works well on Python 3.2.2.
+major, minor, release = sys.version_info[:3]
+CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
+CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
+CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
+
+
+from bs4.element import (
+ CData,
+ Comment,
+ Declaration,
+ Doctype,
+ ProcessingInstruction,
+ )
+from bs4.dammit import EntitySubstitution, UnicodeDammit
+
+from bs4.builder import (
+ HTML,
+ HTMLTreeBuilder,
+ STRICT,
+ )
+
+
+HTMLPARSER = 'html.parser'
+
+class BeautifulSoupHTMLParser(HTMLParser):
+
+ def __init__(self, *args, **kwargs):
+ HTMLParser.__init__(self, *args, **kwargs)
+
+ # Keep a list of empty-element tags that were encountered
+ # without an explicit closing tag. If we encounter a closing tag
+ # of this type, we'll associate it with one of those entries.
+ #
+ # This isn't a stack because we don't care about the
+ # order. It's a list of closing tags we've already handled and
+ # will ignore, assuming they ever show up.
+ self.already_closed_empty_element = []
+
+ def handle_startendtag(self, name, attrs):
+ # This is only called when the markup looks like
+ # .
+
+ # is_startend() tells handle_starttag not to close the tag
+ # just because its name matches a known empty-element tag. We
+ # know that this is an empty-element tag and we want to call
+ # handle_endtag ourselves.
+ tag = self.handle_starttag(name, attrs, handle_empty_element=False)
+ self.handle_endtag(name)
+
+ def handle_starttag(self, name, attrs, handle_empty_element=True):
+ # XXX namespace
+ attr_dict = {}
+ for key, value in attrs:
+ # Change None attribute values to the empty string
+ # for consistency with the other tree builders.
+ if value is None:
+ value = ''
+ attr_dict[key] = value
+ attrvalue = '""'
+ #print "START", name
+ tag = self.soup.handle_starttag(name, None, None, attr_dict)
+ if tag and tag.is_empty_element and handle_empty_element:
+ # Unlike other parsers, html.parser doesn't send separate end tag
+ # events for empty-element tags. (It's handled in
+ # handle_startendtag, but only if the original markup looked like
+ # .)
+ #
+ # So we need to call handle_endtag() ourselves. Since we
+ # know the start event is identical to the end event, we
+ # don't want handle_endtag() to cross off any previous end
+ # events for tags of this name.
+ self.handle_endtag(name, check_already_closed=False)
+
+ # But we might encounter an explicit closing tag for this tag
+ # later on. If so, we want to ignore it.
+ self.already_closed_empty_element.append(name)
+
+ def handle_endtag(self, name, check_already_closed=True):
+ #print "END", name
+ if check_already_closed and name in self.already_closed_empty_element:
+ # This is a redundant end tag for an empty-element tag.
+ # We've already called handle_endtag() for it, so just
+ # check it off the list.
+ # print "ALREADY CLOSED", name
+ self.already_closed_empty_element.remove(name)
+ else:
+ self.soup.handle_endtag(name)
+
+ def handle_data(self, data):
+ self.soup.handle_data(data)
+
+ def handle_charref(self, name):
+ # XXX workaround for a bug in HTMLParser. Remove this once
+ # it's fixed in all supported versions.
+ # http://bugs.python.org/issue13633
+ if name.startswith('x'):
+ real_name = int(name.lstrip('x'), 16)
+ elif name.startswith('X'):
+ real_name = int(name.lstrip('X'), 16)
+ else:
+ real_name = int(name)
+
+ try:
+ data = chr(real_name)
+ except (ValueError, OverflowError) as e:
+ data = "\N{REPLACEMENT CHARACTER}"
+
+ self.handle_data(data)
+
+ def handle_entityref(self, name):
+ character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name)
+ if character is not None:
+ data = character
+ else:
+ data = "&%s;" % name
+ self.handle_data(data)
+
+ def handle_comment(self, data):
+ self.soup.endData()
+ self.soup.handle_data(data)
+ self.soup.endData(Comment)
+
+ def handle_decl(self, data):
+ self.soup.endData()
+ if data.startswith("DOCTYPE "):
+ data = data[len("DOCTYPE "):]
+ elif data == 'DOCTYPE':
+ # i.e. ""
+ data = ''
+ self.soup.handle_data(data)
+ self.soup.endData(Doctype)
+
+ def unknown_decl(self, data):
+ if data.upper().startswith('CDATA['):
+ cls = CData
+ data = data[len('CDATA['):]
+ else:
+ cls = Declaration
+ self.soup.endData()
+ self.soup.handle_data(data)
+ self.soup.endData(cls)
+
+ def handle_pi(self, data):
+ self.soup.endData()
+ self.soup.handle_data(data)
+ self.soup.endData(ProcessingInstruction)
+
+
+class HTMLParserTreeBuilder(HTMLTreeBuilder):
+
+ is_xml = False
+ picklable = True
+ NAME = HTMLPARSER
+ features = [NAME, HTML, STRICT]
+
+ def __init__(self, *args, **kwargs):
+ if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
+ kwargs['strict'] = False
+ if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
+ kwargs['convert_charrefs'] = False
+ self.parser_args = (args, kwargs)
+
+ def prepare_markup(self, markup, user_specified_encoding=None,
+ document_declared_encoding=None, exclude_encodings=None):
+ """
+ :return: A 4-tuple (markup, original encoding, encoding
+ declared within markup, whether any characters had to be
+ replaced with REPLACEMENT CHARACTER).
+ """
+ if isinstance(markup, str):
+ yield (markup, None, None, False)
+ return
+
+ try_encodings = [user_specified_encoding, document_declared_encoding]
+ dammit = UnicodeDammit(markup, try_encodings, is_html=True,
+ exclude_encodings=exclude_encodings)
+ yield (dammit.markup, dammit.original_encoding,
+ dammit.declared_html_encoding,
+ dammit.contains_replacement_characters)
+
+ def feed(self, markup):
+ args, kwargs = self.parser_args
+ parser = BeautifulSoupHTMLParser(*args, **kwargs)
+ parser.soup = self.soup
+ try:
+ parser.feed(markup)
+ except HTMLParseError as e:
+ warnings.warn(RuntimeWarning(
+ "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
+ raise e
+ parser.already_closed_empty_element = []
+
+# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
+# 3.2.3 code. This ensures they don't treat markup like as a
+# string.
+#
+# XXX This code can be removed once most Python 3 users are on 3.2.3.
+if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT:
+ import re
+ attrfind_tolerant = re.compile(
+ r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*'
+ r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?')
+ HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant
+
+ locatestarttagend = re.compile(r"""
+ <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
+ (?:\s+ # whitespace before attribute name
+ (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
+ (?:\s*=\s* # value indicator
+ (?:'[^']*' # LITA-enclosed value
+ |\"[^\"]*\" # LIT-enclosed value
+ |[^'\">\s]+ # bare value
+ )
+ )?
+ )
+ )*
+ \s* # trailing whitespace
+""", re.VERBOSE)
+ BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
+
+ from html.parser import tagfind, attrfind
+
+ def parse_starttag(self, i):
+ self.__starttag_text = None
+ endpos = self.check_for_whole_start_tag(i)
+ if endpos < 0:
+ return endpos
+ rawdata = self.rawdata
+ self.__starttag_text = rawdata[i:endpos]
+
+ # Now parse the data between i+1 and j into a tag and attrs
+ attrs = []
+ match = tagfind.match(rawdata, i+1)
+ assert match, 'unexpected call to parse_starttag()'
+ k = match.end()
+ self.lasttag = tag = rawdata[i+1:k].lower()
+ while k < endpos:
+ if self.strict:
+ m = attrfind.match(rawdata, k)
+ else:
+ m = attrfind_tolerant.match(rawdata, k)
+ if not m:
+ break
+ attrname, rest, attrvalue = m.group(1, 2, 3)
+ if not rest:
+ attrvalue = None
+ elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
+ attrvalue[:1] == '"' == attrvalue[-1:]:
+ attrvalue = attrvalue[1:-1]
+ if attrvalue:
+ attrvalue = self.unescape(attrvalue)
+ attrs.append((attrname.lower(), attrvalue))
+ k = m.end()
+
+ end = rawdata[k:endpos].strip()
+ if end not in (">", "/>"):
+ lineno, offset = self.getpos()
+ if "\n" in self.__starttag_text:
+ lineno = lineno + self.__starttag_text.count("\n")
+ offset = len(self.__starttag_text) \
+ - self.__starttag_text.rfind("\n")
+ else:
+ offset = offset + len(self.__starttag_text)
+ if self.strict:
+ self.error("junk characters in start tag: %r"
+ % (rawdata[k:endpos][:20],))
+ self.handle_data(rawdata[i:endpos])
+ return endpos
+ if end.endswith('/>'):
+ # XHTML-style empty tag:
+ self.handle_startendtag(tag, attrs)
+ else:
+ self.handle_starttag(tag, attrs)
+ if tag in self.CDATA_CONTENT_ELEMENTS:
+ self.set_cdata_mode(tag)
+ return endpos
+
+ def set_cdata_mode(self, elem):
+ self.cdata_elem = elem.lower()
+ self.interesting = re.compile(r'\s*%s\s*>' % self.cdata_elem, re.I)
+
+ BeautifulSoupHTMLParser.parse_starttag = parse_starttag
+ BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
+
+ CONSTRUCTOR_TAKES_STRICT = True
diff --git a/venv/Lib/site-packages/bs4/builder/_lxml.py b/venv/Lib/site-packages/bs4/builder/_lxml.py
new file mode 100644
index 0000000..244d457
--- /dev/null
+++ b/venv/Lib/site-packages/bs4/builder/_lxml.py
@@ -0,0 +1,258 @@
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+__all__ = [
+ 'LXMLTreeBuilderForXML',
+ 'LXMLTreeBuilder',
+ ]
+
+from io import BytesIO
+from io import StringIO
+import collections
+from lxml import etree
+from bs4.element import (
+ Comment,
+ Doctype,
+ NamespacedAttribute,
+ ProcessingInstruction,
+ XMLProcessingInstruction,
+)
+from bs4.builder import (
+ FAST,
+ HTML,
+ HTMLTreeBuilder,
+ PERMISSIVE,
+ ParserRejectedMarkup,
+ TreeBuilder,
+ XML)
+from bs4.dammit import EncodingDetector
+
+LXML = 'lxml'
+
+class LXMLTreeBuilderForXML(TreeBuilder):
+ DEFAULT_PARSER_CLASS = etree.XMLParser
+
+ is_xml = True
+ processing_instruction_class = XMLProcessingInstruction
+
+ NAME = "lxml-xml"
+ ALTERNATE_NAMES = ["xml"]
+
+ # Well, it's permissive by XML parser standards.
+ features = [NAME, LXML, XML, FAST, PERMISSIVE]
+
+ CHUNK_SIZE = 512
+
+ # This namespace mapping is specified in the XML Namespace
+ # standard.
+ DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"}
+
+ def default_parser(self, encoding):
+ # This can either return a parser object or a class, which
+ # will be instantiated with default arguments.
+ if self._default_parser is not None:
+ return self._default_parser
+ return etree.XMLParser(
+ target=self, strip_cdata=False, recover=True, encoding=encoding)
+
+ def parser_for(self, encoding):
+ # Use the default parser.
+ parser = self.default_parser(encoding)
+
+ if isinstance(parser, collections.Callable):
+ # Instantiate the parser with default arguments
+ parser = parser(target=self, strip_cdata=False, encoding=encoding)
+ return parser
+
+ def __init__(self, parser=None, empty_element_tags=None):
+ # TODO: Issue a warning if parser is present but not a
+ # callable, since that means there's no way to create new
+ # parsers for different encodings.
+ self._default_parser = parser
+ if empty_element_tags is not None:
+ self.empty_element_tags = set(empty_element_tags)
+ self.soup = None
+ self.nsmaps = [self.DEFAULT_NSMAPS]
+
+ def _getNsTag(self, tag):
+ # Split the namespace URL out of a fully-qualified lxml tag
+ # name. Copied from lxml's src/lxml/sax.py.
+ if tag[0] == '{':
+ return tuple(tag[1:].split('}', 1))
+ else:
+ return (None, tag)
+
+ def prepare_markup(self, markup, user_specified_encoding=None,
+ exclude_encodings=None,
+ document_declared_encoding=None):
+ """
+ :yield: A series of 4-tuples.
+ (markup, encoding, declared encoding,
+ has undergone character replacement)
+
+ Each 4-tuple represents a strategy for parsing the document.
+ """
+ # Instead of using UnicodeDammit to convert the bytestring to
+ # Unicode using different encodings, use EncodingDetector to
+ # iterate over the encodings, and tell lxml to try to parse
+ # the document as each one in turn.
+ is_html = not self.is_xml
+ if is_html:
+ self.processing_instruction_class = ProcessingInstruction
+ else:
+ self.processing_instruction_class = XMLProcessingInstruction
+
+ if isinstance(markup, str):
+ # We were given Unicode. Maybe lxml can parse Unicode on
+ # this system?
+ yield markup, None, document_declared_encoding, False
+
+ if isinstance(markup, str):
+ # No, apparently not. Convert the Unicode to UTF-8 and
+ # tell lxml to parse it as UTF-8.
+ yield (markup.encode("utf8"), "utf8",
+ document_declared_encoding, False)
+
+ try_encodings = [user_specified_encoding, document_declared_encoding]
+ detector = EncodingDetector(
+ markup, try_encodings, is_html, exclude_encodings)
+ for encoding in detector.encodings:
+ yield (detector.markup, encoding, document_declared_encoding, False)
+
+ def feed(self, markup):
+ if isinstance(markup, bytes):
+ markup = BytesIO(markup)
+ elif isinstance(markup, str):
+ markup = StringIO(markup)
+
+ # Call feed() at least once, even if the markup is empty,
+ # or the parser won't be initialized.
+ data = markup.read(self.CHUNK_SIZE)
+ try:
+ self.parser = self.parser_for(self.soup.original_encoding)
+ self.parser.feed(data)
+ while len(data) != 0:
+ # Now call feed() on the rest of the data, chunk by chunk.
+ data = markup.read(self.CHUNK_SIZE)
+ if len(data) != 0:
+ self.parser.feed(data)
+ self.parser.close()
+ except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
+ raise ParserRejectedMarkup(str(e))
+
+ def close(self):
+ self.nsmaps = [self.DEFAULT_NSMAPS]
+
+ def start(self, name, attrs, nsmap={}):
+ # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
+ attrs = dict(attrs)
+ nsprefix = None
+ # Invert each namespace map as it comes in.
+ if len(self.nsmaps) > 1:
+ # There are no new namespaces for this tag, but
+ # non-default namespaces are in play, so we need a
+ # separate tag stack to know when they end.
+ self.nsmaps.append(None)
+ elif len(nsmap) > 0:
+ # A new namespace mapping has come into play.
+ inverted_nsmap = dict((value, key) for key, value in list(nsmap.items()))
+ self.nsmaps.append(inverted_nsmap)
+ # Also treat the namespace mapping as a set of attributes on the
+ # tag, so we can recreate it later.
+ attrs = attrs.copy()
+ for prefix, namespace in list(nsmap.items()):
+ attribute = NamespacedAttribute(
+ "xmlns", prefix, "http://www.w3.org/2000/xmlns/")
+ attrs[attribute] = namespace
+
+ # Namespaces are in play. Find any attributes that came in
+ # from lxml with namespaces attached to their names, and
+ # turn then into NamespacedAttribute objects.
+ new_attrs = {}
+ for attr, value in list(attrs.items()):
+ namespace, attr = self._getNsTag(attr)
+ if namespace is None:
+ new_attrs[attr] = value
+ else:
+ nsprefix = self._prefix_for_namespace(namespace)
+ attr = NamespacedAttribute(nsprefix, attr, namespace)
+ new_attrs[attr] = value
+ attrs = new_attrs
+
+ namespace, name = self._getNsTag(name)
+ nsprefix = self._prefix_for_namespace(namespace)
+ self.soup.handle_starttag(name, namespace, nsprefix, attrs)
+
+ def _prefix_for_namespace(self, namespace):
+ """Find the currently active prefix for the given namespace."""
+ if namespace is None:
+ return None
+ for inverted_nsmap in reversed(self.nsmaps):
+ if inverted_nsmap is not None and namespace in inverted_nsmap:
+ return inverted_nsmap[namespace]
+ return None
+
+ def end(self, name):
+ self.soup.endData()
+ completed_tag = self.soup.tagStack[-1]
+ namespace, name = self._getNsTag(name)
+ nsprefix = None
+ if namespace is not None:
+ for inverted_nsmap in reversed(self.nsmaps):
+ if inverted_nsmap is not None and namespace in inverted_nsmap:
+ nsprefix = inverted_nsmap[namespace]
+ break
+ self.soup.handle_endtag(name, nsprefix)
+ if len(self.nsmaps) > 1:
+ # This tag, or one of its parents, introduced a namespace
+ # mapping, so pop it off the stack.
+ self.nsmaps.pop()
+
+ def pi(self, target, data):
+ self.soup.endData()
+ self.soup.handle_data(target + ' ' + data)
+ self.soup.endData(self.processing_instruction_class)
+
+ def data(self, content):
+ self.soup.handle_data(content)
+
+ def doctype(self, name, pubid, system):
+ self.soup.endData()
+ doctype = Doctype.for_name_and_ids(name, pubid, system)
+ self.soup.object_was_parsed(doctype)
+
+ def comment(self, content):
+ "Handle comments as Comment objects."
+ self.soup.endData()
+ self.soup.handle_data(content)
+ self.soup.endData(Comment)
+
+ def test_fragment_to_document(self, fragment):
+ """See `TreeBuilder`."""
+ return '\n%s' % fragment
+
+
+class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
+
+ NAME = LXML
+ ALTERNATE_NAMES = ["lxml-html"]
+
+ features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
+ is_xml = False
+ processing_instruction_class = ProcessingInstruction
+
+ def default_parser(self, encoding):
+ return etree.HTMLParser
+
+ def feed(self, markup):
+ encoding = self.soup.original_encoding
+ try:
+ self.parser = self.parser_for(encoding)
+ self.parser.feed(markup)
+ self.parser.close()
+ except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
+ raise ParserRejectedMarkup(str(e))
+
+
+ def test_fragment_to_document(self, fragment):
+ """See `TreeBuilder`."""
+ return '%s' % fragment
diff --git a/venv/Lib/site-packages/bs4/dammit.py b/venv/Lib/site-packages/bs4/dammit.py
new file mode 100644
index 0000000..8e399e0
--- /dev/null
+++ b/venv/Lib/site-packages/bs4/dammit.py
@@ -0,0 +1,842 @@
+# -*- coding: utf-8 -*-
+"""Beautiful Soup bonus library: Unicode, Dammit
+
+This library converts a bytestream to Unicode through any means
+necessary. It is heavily based on code from Mark Pilgrim's Universal
+Feed Parser. It works best on XML and HTML, but it does not rewrite the
+XML or HTML to reflect a new encoding; that's the tree builder's job.
+"""
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+__license__ = "MIT"
+
+import codecs
+from html.entities import codepoint2name
+import re
+import logging
+import string
+
+# Import a library to autodetect character encodings.
+chardet_type = None
+try:
+ # First try the fast C implementation.
+ # PyPI package: cchardet
+ import cchardet
+ def chardet_dammit(s):
+ return cchardet.detect(s)['encoding']
+except ImportError:
+ try:
+ # Fall back to the pure Python implementation
+ # Debian package: python-chardet
+ # PyPI package: chardet
+ import chardet
+ def chardet_dammit(s):
+ return chardet.detect(s)['encoding']
+ #import chardet.constants
+ #chardet.constants._debug = 1
+ except ImportError:
+ # No chardet available.
+ def chardet_dammit(s):
+ return None
+
+# Available from http://cjkpython.i18n.org/.
+try:
+ import iconv_codec
+except ImportError:
+ pass
+
+xml_encoding_re = re.compile(
+ '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
+html_meta_re = re.compile(
+ '<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
+
+class EntitySubstitution(object):
+
+ """Substitute XML or HTML entities for the corresponding characters."""
+
+ def _populate_class_variables():
+ lookup = {}
+ reverse_lookup = {}
+ characters_for_re = []
+ for codepoint, name in list(codepoint2name.items()):
+ character = chr(codepoint)
+ if codepoint != 34:
+ # There's no point in turning the quotation mark into
+ # ", unless it happens within an attribute value, which
+ # is handled elsewhere.
+ characters_for_re.append(character)
+ lookup[character] = name
+ # But we do want to turn " into the quotation mark.
+ reverse_lookup[name] = character
+ re_definition = "[%s]" % "".join(characters_for_re)
+ return lookup, reverse_lookup, re.compile(re_definition)
+ (CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER,
+ CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables()
+
+ CHARACTER_TO_XML_ENTITY = {
+ "'": "apos",
+ '"': "quot",
+ "&": "amp",
+ "<": "lt",
+ ">": "gt",
+ }
+
+ BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
+ "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
+ ")")
+
+ AMPERSAND_OR_BRACKET = re.compile("([<>&])")
+
+ @classmethod
+ def _substitute_html_entity(cls, matchobj):
+ entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0))
+ return "&%s;" % entity
+
+ @classmethod
+ def _substitute_xml_entity(cls, matchobj):
+ """Used with a regular expression to substitute the
+ appropriate XML entity for an XML special character."""
+ entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)]
+ return "&%s;" % entity
+
+ @classmethod
+ def quoted_attribute_value(self, value):
+ """Make a value into a quoted XML attribute, possibly escaping it.
+
+ Most strings will be quoted using double quotes.
+
+ Bob's Bar -> "Bob's Bar"
+
+ If a string contains double quotes, it will be quoted using
+ single quotes.
+
+ Welcome to "my bar" -> 'Welcome to "my bar"'
+
+ If a string contains both single and double quotes, the
+ double quotes will be escaped, and the string will be quoted
+ using double quotes.
+
+ Welcome to "Bob's Bar" -> "Welcome to "Bob's bar"
+ """
+ quote_with = '"'
+ if '"' in value:
+ if "'" in value:
+ # The string contains both single and double
+ # quotes. Turn the double quotes into
+ # entities. We quote the double quotes rather than
+ # the single quotes because the entity name is
+ # """ whether this is HTML or XML. If we
+ # quoted the single quotes, we'd have to decide
+ # between ' and &squot;.
+ replace_with = """
+ value = value.replace('"', replace_with)
+ else:
+ # There are double quotes but no single quotes.
+ # We can use single quotes to quote the attribute.
+ quote_with = "'"
+ return quote_with + value + quote_with
+
+ @classmethod
+ def substitute_xml(cls, value, make_quoted_attribute=False):
+ """Substitute XML entities for special XML characters.
+
+ :param value: A string to be substituted. The less-than sign
+ will become <, the greater-than sign will become >,
+ and any ampersands will become &. If you want ampersands
+ that appear to be part of an entity definition to be left
+ alone, use substitute_xml_containing_entities() instead.
+
+ :param make_quoted_attribute: If True, then the string will be
+ quoted, as befits an attribute value.
+ """
+ # Escape angle brackets and ampersands.
+ value = cls.AMPERSAND_OR_BRACKET.sub(
+ cls._substitute_xml_entity, value)
+
+ if make_quoted_attribute:
+ value = cls.quoted_attribute_value(value)
+ return value
+
+ @classmethod
+ def substitute_xml_containing_entities(
+ cls, value, make_quoted_attribute=False):
+ """Substitute XML entities for special XML characters.
+
+ :param value: A string to be substituted. The less-than sign will
+ become <, the greater-than sign will become >, and any
+ ampersands that are not part of an entity defition will
+ become &.
+
+ :param make_quoted_attribute: If True, then the string will be
+ quoted, as befits an attribute value.
+ """
+ # Escape angle brackets, and ampersands that aren't part of
+ # entities.
+ value = cls.BARE_AMPERSAND_OR_BRACKET.sub(
+ cls._substitute_xml_entity, value)
+
+ if make_quoted_attribute:
+ value = cls.quoted_attribute_value(value)
+ return value
+
+ @classmethod
+ def substitute_html(cls, s):
+ """Replace certain Unicode characters with named HTML entities.
+
+ This differs from data.encode(encoding, 'xmlcharrefreplace')
+ in that the goal is to make the result more readable (to those
+ with ASCII displays) rather than to recover from
+ errors. There's absolutely nothing wrong with a UTF-8 string
+ containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that
+ character with "é" will make it more readable to some
+ people.
+ """
+ return cls.CHARACTER_TO_HTML_ENTITY_RE.sub(
+ cls._substitute_html_entity, s)
+
+
+class EncodingDetector:
+ """Suggests a number of possible encodings for a bytestring.
+
+ Order of precedence:
+
+ 1. Encodings you specifically tell EncodingDetector to try first
+ (the override_encodings argument to the constructor).
+
+ 2. An encoding declared within the bytestring itself, either in an
+ XML declaration (if the bytestring is to be interpreted as an XML
+ document), or in a tag (if the bytestring is to be
+ interpreted as an HTML document.)
+
+ 3. An encoding detected through textual analysis by chardet,
+ cchardet, or a similar external library.
+
+ 4. UTF-8.
+
+ 5. Windows-1252.
+ """
+ def __init__(self, markup, override_encodings=None, is_html=False,
+ exclude_encodings=None):
+ self.override_encodings = override_encodings or []
+ exclude_encodings = exclude_encodings or []
+ self.exclude_encodings = set([x.lower() for x in exclude_encodings])
+ self.chardet_encoding = None
+ self.is_html = is_html
+ self.declared_encoding = None
+
+ # First order of business: strip a byte-order mark.
+ self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup)
+
+ def _usable(self, encoding, tried):
+ if encoding is not None:
+ encoding = encoding.lower()
+ if encoding in self.exclude_encodings:
+ return False
+ if encoding not in tried:
+ tried.add(encoding)
+ return True
+ return False
+
+ @property
+ def encodings(self):
+ """Yield a number of encodings that might work for this markup."""
+ tried = set()
+ for e in self.override_encodings:
+ if self._usable(e, tried):
+ yield e
+
+ # Did the document originally start with a byte-order mark
+ # that indicated its encoding?
+ if self._usable(self.sniffed_encoding, tried):
+ yield self.sniffed_encoding
+
+ # Look within the document for an XML or HTML encoding
+ # declaration.
+ if self.declared_encoding is None:
+ self.declared_encoding = self.find_declared_encoding(
+ self.markup, self.is_html)
+ if self._usable(self.declared_encoding, tried):
+ yield self.declared_encoding
+
+ # Use third-party character set detection to guess at the
+ # encoding.
+ if self.chardet_encoding is None:
+ self.chardet_encoding = chardet_dammit(self.markup)
+ if self._usable(self.chardet_encoding, tried):
+ yield self.chardet_encoding
+
+ # As a last-ditch effort, try utf-8 and windows-1252.
+ for e in ('utf-8', 'windows-1252'):
+ if self._usable(e, tried):
+ yield e
+
+ @classmethod
+ def strip_byte_order_mark(cls, data):
+ """If a byte-order mark is present, strip it and return the encoding it implies."""
+ encoding = None
+ if isinstance(data, str):
+ # Unicode data cannot have a byte-order mark.
+ return data, encoding
+ if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
+ and (data[2:4] != '\x00\x00'):
+ encoding = 'utf-16be'
+ data = data[2:]
+ elif (len(data) >= 4) and (data[:2] == b'\xff\xfe') \
+ and (data[2:4] != '\x00\x00'):
+ encoding = 'utf-16le'
+ data = data[2:]
+ elif data[:3] == b'\xef\xbb\xbf':
+ encoding = 'utf-8'
+ data = data[3:]
+ elif data[:4] == b'\x00\x00\xfe\xff':
+ encoding = 'utf-32be'
+ data = data[4:]
+ elif data[:4] == b'\xff\xfe\x00\x00':
+ encoding = 'utf-32le'
+ data = data[4:]
+ return data, encoding
+
+ @classmethod
+ def find_declared_encoding(cls, markup, is_html=False, search_entire_document=False):
+ """Given a document, tries to find its declared encoding.
+
+ An XML encoding is declared at the beginning of the document.
+
+ An HTML encoding is declared in a tag, hopefully near the
+ beginning of the document.
+ """
+ if search_entire_document:
+ xml_endpos = html_endpos = len(markup)
+ else:
+ xml_endpos = 1024
+ html_endpos = max(2048, int(len(markup) * 0.05))
+
+ declared_encoding = None
+ declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
+ if not declared_encoding_match and is_html:
+ declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos)
+ if declared_encoding_match is not None:
+ declared_encoding = declared_encoding_match.groups()[0].decode(
+ 'ascii', 'replace')
+ if declared_encoding:
+ return declared_encoding.lower()
+ return None
+
+class UnicodeDammit:
+ """A class for detecting the encoding of a *ML document and
+ converting it to a Unicode string. If the source encoding is
+ windows-1252, can replace MS smart quotes with their HTML or XML
+ equivalents."""
+
+ # This dictionary maps commonly seen values for "charset" in HTML
+ # meta tags to the corresponding Python codec names. It only covers
+ # values that aren't in Python's aliases and can't be determined
+ # by the heuristics in find_codec.
+ CHARSET_ALIASES = {"macintosh": "mac-roman",
+ "x-sjis": "shift-jis"}
+
+ ENCODINGS_WITH_SMART_QUOTES = [
+ "windows-1252",
+ "iso-8859-1",
+ "iso-8859-2",
+ ]
+
+ def __init__(self, markup, override_encodings=[],
+ smart_quotes_to=None, is_html=False, exclude_encodings=[]):
+ self.smart_quotes_to = smart_quotes_to
+ self.tried_encodings = []
+ self.contains_replacement_characters = False
+ self.is_html = is_html
+ self.log = logging.getLogger(__name__)
+ self.detector = EncodingDetector(
+ markup, override_encodings, is_html, exclude_encodings)
+
+ # Short-circuit if the data is in Unicode to begin with.
+ if isinstance(markup, str) or markup == '':
+ self.markup = markup
+ self.unicode_markup = str(markup)
+ self.original_encoding = None
+ return
+
+ # The encoding detector may have stripped a byte-order mark.
+ # Use the stripped markup from this point on.
+ self.markup = self.detector.markup
+
+ u = None
+ for encoding in self.detector.encodings:
+ markup = self.detector.markup
+ u = self._convert_from(encoding)
+ if u is not None:
+ break
+
+ if not u:
+ # None of the encodings worked. As an absolute last resort,
+ # try them again with character replacement.
+
+ for encoding in self.detector.encodings:
+ if encoding != "ascii":
+ u = self._convert_from(encoding, "replace")
+ if u is not None:
+ self.log.warning(
+ "Some characters could not be decoded, and were "
+ "replaced with REPLACEMENT CHARACTER."
+ )
+ self.contains_replacement_characters = True
+ break
+
+ # If none of that worked, we could at this point force it to
+ # ASCII, but that would destroy so much data that I think
+ # giving up is better.
+ self.unicode_markup = u
+ if not u:
+ self.original_encoding = None
+
+ def _sub_ms_char(self, match):
+ """Changes a MS smart quote character to an XML or HTML
+ entity, or an ASCII character."""
+ orig = match.group(1)
+ if self.smart_quotes_to == 'ascii':
+ sub = self.MS_CHARS_TO_ASCII.get(orig).encode()
+ else:
+ sub = self.MS_CHARS.get(orig)
+ if type(sub) == tuple:
+ if self.smart_quotes_to == 'xml':
+ sub = ''.encode() + sub[1].encode() + ';'.encode()
+ else:
+ sub = '&'.encode() + sub[0].encode() + ';'.encode()
+ else:
+ sub = sub.encode()
+ return sub
+
+ def _convert_from(self, proposed, errors="strict"):
+ proposed = self.find_codec(proposed)
+ if not proposed or (proposed, errors) in self.tried_encodings:
+ return None
+ self.tried_encodings.append((proposed, errors))
+ markup = self.markup
+ # Convert smart quotes to HTML if coming from an encoding
+ # that might have them.
+ if (self.smart_quotes_to is not None
+ and proposed in self.ENCODINGS_WITH_SMART_QUOTES):
+ smart_quotes_re = b"([\x80-\x9f])"
+ smart_quotes_compiled = re.compile(smart_quotes_re)
+ markup = smart_quotes_compiled.sub(self._sub_ms_char, markup)
+
+ try:
+ #print "Trying to convert document to %s (errors=%s)" % (
+ # proposed, errors)
+ u = self._to_unicode(markup, proposed, errors)
+ self.markup = u
+ self.original_encoding = proposed
+ except Exception as e:
+ #print "That didn't work!"
+ #print e
+ return None
+ #print "Correct encoding: %s" % proposed
+ return self.markup
+
+ def _to_unicode(self, data, encoding, errors="strict"):
+ '''Given a string and its encoding, decodes the string into Unicode.
+ %encoding is a string recognized by encodings.aliases'''
+ return str(data, encoding, errors)
+
+ @property
+ def declared_html_encoding(self):
+ if not self.is_html:
+ return None
+ return self.detector.declared_encoding
+
+ def find_codec(self, charset):
+ value = (self._codec(self.CHARSET_ALIASES.get(charset, charset))
+ or (charset and self._codec(charset.replace("-", "")))
+ or (charset and self._codec(charset.replace("-", "_")))
+ or (charset and charset.lower())
+ or charset
+ )
+ if value:
+ return value.lower()
+ return None
+
+ def _codec(self, charset):
+ if not charset:
+ return charset
+ codec = None
+ try:
+ codecs.lookup(charset)
+ codec = charset
+ except (LookupError, ValueError):
+ pass
+ return codec
+
+
+ # A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities.
+ MS_CHARS = {b'\x80': ('euro', '20AC'),
+ b'\x81': ' ',
+ b'\x82': ('sbquo', '201A'),
+ b'\x83': ('fnof', '192'),
+ b'\x84': ('bdquo', '201E'),
+ b'\x85': ('hellip', '2026'),
+ b'\x86': ('dagger', '2020'),
+ b'\x87': ('Dagger', '2021'),
+ b'\x88': ('circ', '2C6'),
+ b'\x89': ('permil', '2030'),
+ b'\x8A': ('Scaron', '160'),
+ b'\x8B': ('lsaquo', '2039'),
+ b'\x8C': ('OElig', '152'),
+ b'\x8D': '?',
+ b'\x8E': ('#x17D', '17D'),
+ b'\x8F': '?',
+ b'\x90': '?',
+ b'\x91': ('lsquo', '2018'),
+ b'\x92': ('rsquo', '2019'),
+ b'\x93': ('ldquo', '201C'),
+ b'\x94': ('rdquo', '201D'),
+ b'\x95': ('bull', '2022'),
+ b'\x96': ('ndash', '2013'),
+ b'\x97': ('mdash', '2014'),
+ b'\x98': ('tilde', '2DC'),
+ b'\x99': ('trade', '2122'),
+ b'\x9a': ('scaron', '161'),
+ b'\x9b': ('rsaquo', '203A'),
+ b'\x9c': ('oelig', '153'),
+ b'\x9d': '?',
+ b'\x9e': ('#x17E', '17E'),
+ b'\x9f': ('Yuml', ''),}
+
+ # A parochial partial mapping of ISO-Latin-1 to ASCII. Contains
+ # horrors like stripping diacritical marks to turn á into a, but also
+ # contains non-horrors like turning “ into ".
+ MS_CHARS_TO_ASCII = {
+ b'\x80' : 'EUR',
+ b'\x81' : ' ',
+ b'\x82' : ',',
+ b'\x83' : 'f',
+ b'\x84' : ',,',
+ b'\x85' : '...',
+ b'\x86' : '+',
+ b'\x87' : '++',
+ b'\x88' : '^',
+ b'\x89' : '%',
+ b'\x8a' : 'S',
+ b'\x8b' : '<',
+ b'\x8c' : 'OE',
+ b'\x8d' : '?',
+ b'\x8e' : 'Z',
+ b'\x8f' : '?',
+ b'\x90' : '?',
+ b'\x91' : "'",
+ b'\x92' : "'",
+ b'\x93' : '"',
+ b'\x94' : '"',
+ b'\x95' : '*',
+ b'\x96' : '-',
+ b'\x97' : '--',
+ b'\x98' : '~',
+ b'\x99' : '(TM)',
+ b'\x9a' : 's',
+ b'\x9b' : '>',
+ b'\x9c' : 'oe',
+ b'\x9d' : '?',
+ b'\x9e' : 'z',
+ b'\x9f' : 'Y',
+ b'\xa0' : ' ',
+ b'\xa1' : '!',
+ b'\xa2' : 'c',
+ b'\xa3' : 'GBP',
+ b'\xa4' : '$', #This approximation is especially parochial--this is the
+ #generic currency symbol.
+ b'\xa5' : 'YEN',
+ b'\xa6' : '|',
+ b'\xa7' : 'S',
+ b'\xa8' : '..',
+ b'\xa9' : '',
+ b'\xaa' : '(th)',
+ b'\xab' : '<<',
+ b'\xac' : '!',
+ b'\xad' : ' ',
+ b'\xae' : '(R)',
+ b'\xaf' : '-',
+ b'\xb0' : 'o',
+ b'\xb1' : '+-',
+ b'\xb2' : '2',
+ b'\xb3' : '3',
+ b'\xb4' : ("'", 'acute'),
+ b'\xb5' : 'u',
+ b'\xb6' : 'P',
+ b'\xb7' : '*',
+ b'\xb8' : ',',
+ b'\xb9' : '1',
+ b'\xba' : '(th)',
+ b'\xbb' : '>>',
+ b'\xbc' : '1/4',
+ b'\xbd' : '1/2',
+ b'\xbe' : '3/4',
+ b'\xbf' : '?',
+ b'\xc0' : 'A',
+ b'\xc1' : 'A',
+ b'\xc2' : 'A',
+ b'\xc3' : 'A',
+ b'\xc4' : 'A',
+ b'\xc5' : 'A',
+ b'\xc6' : 'AE',
+ b'\xc7' : 'C',
+ b'\xc8' : 'E',
+ b'\xc9' : 'E',
+ b'\xca' : 'E',
+ b'\xcb' : 'E',
+ b'\xcc' : 'I',
+ b'\xcd' : 'I',
+ b'\xce' : 'I',
+ b'\xcf' : 'I',
+ b'\xd0' : 'D',
+ b'\xd1' : 'N',
+ b'\xd2' : 'O',
+ b'\xd3' : 'O',
+ b'\xd4' : 'O',
+ b'\xd5' : 'O',
+ b'\xd6' : 'O',
+ b'\xd7' : '*',
+ b'\xd8' : 'O',
+ b'\xd9' : 'U',
+ b'\xda' : 'U',
+ b'\xdb' : 'U',
+ b'\xdc' : 'U',
+ b'\xdd' : 'Y',
+ b'\xde' : 'b',
+ b'\xdf' : 'B',
+ b'\xe0' : 'a',
+ b'\xe1' : 'a',
+ b'\xe2' : 'a',
+ b'\xe3' : 'a',
+ b'\xe4' : 'a',
+ b'\xe5' : 'a',
+ b'\xe6' : 'ae',
+ b'\xe7' : 'c',
+ b'\xe8' : 'e',
+ b'\xe9' : 'e',
+ b'\xea' : 'e',
+ b'\xeb' : 'e',
+ b'\xec' : 'i',
+ b'\xed' : 'i',
+ b'\xee' : 'i',
+ b'\xef' : 'i',
+ b'\xf0' : 'o',
+ b'\xf1' : 'n',
+ b'\xf2' : 'o',
+ b'\xf3' : 'o',
+ b'\xf4' : 'o',
+ b'\xf5' : 'o',
+ b'\xf6' : 'o',
+ b'\xf7' : '/',
+ b'\xf8' : 'o',
+ b'\xf9' : 'u',
+ b'\xfa' : 'u',
+ b'\xfb' : 'u',
+ b'\xfc' : 'u',
+ b'\xfd' : 'y',
+ b'\xfe' : 'b',
+ b'\xff' : 'y',
+ }
+
+ # A map used when removing rogue Windows-1252/ISO-8859-1
+ # characters in otherwise UTF-8 documents.
+ #
+ # Note that \x81, \x8d, \x8f, \x90, and \x9d are undefined in
+ # Windows-1252.
+ WINDOWS_1252_TO_UTF8 = {
+ 0x80 : b'\xe2\x82\xac', # €
+ 0x82 : b'\xe2\x80\x9a', # ‚
+ 0x83 : b'\xc6\x92', # ƒ
+ 0x84 : b'\xe2\x80\x9e', # „
+ 0x85 : b'\xe2\x80\xa6', # …
+ 0x86 : b'\xe2\x80\xa0', # †
+ 0x87 : b'\xe2\x80\xa1', # ‡
+ 0x88 : b'\xcb\x86', # ˆ
+ 0x89 : b'\xe2\x80\xb0', # ‰
+ 0x8a : b'\xc5\xa0', # Š
+ 0x8b : b'\xe2\x80\xb9', # ‹
+ 0x8c : b'\xc5\x92', # Œ
+ 0x8e : b'\xc5\xbd', # Ž
+ 0x91 : b'\xe2\x80\x98', # ‘
+ 0x92 : b'\xe2\x80\x99', # ’
+ 0x93 : b'\xe2\x80\x9c', # “
+ 0x94 : b'\xe2\x80\x9d', # ”
+ 0x95 : b'\xe2\x80\xa2', # •
+ 0x96 : b'\xe2\x80\x93', # –
+ 0x97 : b'\xe2\x80\x94', # —
+ 0x98 : b'\xcb\x9c', # ˜
+ 0x99 : b'\xe2\x84\xa2', # ™
+ 0x9a : b'\xc5\xa1', # š
+ 0x9b : b'\xe2\x80\xba', # ›
+ 0x9c : b'\xc5\x93', # œ
+ 0x9e : b'\xc5\xbe', # ž
+ 0x9f : b'\xc5\xb8', # Ÿ
+ 0xa0 : b'\xc2\xa0', #
+ 0xa1 : b'\xc2\xa1', # ¡
+ 0xa2 : b'\xc2\xa2', # ¢
+ 0xa3 : b'\xc2\xa3', # £
+ 0xa4 : b'\xc2\xa4', # ¤
+ 0xa5 : b'\xc2\xa5', # ¥
+ 0xa6 : b'\xc2\xa6', # ¦
+ 0xa7 : b'\xc2\xa7', # §
+ 0xa8 : b'\xc2\xa8', # ¨
+ 0xa9 : b'\xc2\xa9', # ©
+ 0xaa : b'\xc2\xaa', # ª
+ 0xab : b'\xc2\xab', # «
+ 0xac : b'\xc2\xac', # ¬
+ 0xad : b'\xc2\xad', #
+ 0xae : b'\xc2\xae', # ®
+ 0xaf : b'\xc2\xaf', # ¯
+ 0xb0 : b'\xc2\xb0', # °
+ 0xb1 : b'\xc2\xb1', # ±
+ 0xb2 : b'\xc2\xb2', # ²
+ 0xb3 : b'\xc2\xb3', # ³
+ 0xb4 : b'\xc2\xb4', # ´
+ 0xb5 : b'\xc2\xb5', # µ
+ 0xb6 : b'\xc2\xb6', # ¶
+ 0xb7 : b'\xc2\xb7', # ·
+ 0xb8 : b'\xc2\xb8', # ¸
+ 0xb9 : b'\xc2\xb9', # ¹
+ 0xba : b'\xc2\xba', # º
+ 0xbb : b'\xc2\xbb', # »
+ 0xbc : b'\xc2\xbc', # ¼
+ 0xbd : b'\xc2\xbd', # ½
+ 0xbe : b'\xc2\xbe', # ¾
+ 0xbf : b'\xc2\xbf', # ¿
+ 0xc0 : b'\xc3\x80', # À
+ 0xc1 : b'\xc3\x81', # Á
+ 0xc2 : b'\xc3\x82', # Â
+ 0xc3 : b'\xc3\x83', # Ã
+ 0xc4 : b'\xc3\x84', # Ä
+ 0xc5 : b'\xc3\x85', # Å
+ 0xc6 : b'\xc3\x86', # Æ
+ 0xc7 : b'\xc3\x87', # Ç
+ 0xc8 : b'\xc3\x88', # È
+ 0xc9 : b'\xc3\x89', # É
+ 0xca : b'\xc3\x8a', # Ê
+ 0xcb : b'\xc3\x8b', # Ë
+ 0xcc : b'\xc3\x8c', # Ì
+ 0xcd : b'\xc3\x8d', # Í
+ 0xce : b'\xc3\x8e', # Î
+ 0xcf : b'\xc3\x8f', # Ï
+ 0xd0 : b'\xc3\x90', # Ð
+ 0xd1 : b'\xc3\x91', # Ñ
+ 0xd2 : b'\xc3\x92', # Ò
+ 0xd3 : b'\xc3\x93', # Ó
+ 0xd4 : b'\xc3\x94', # Ô
+ 0xd5 : b'\xc3\x95', # Õ
+ 0xd6 : b'\xc3\x96', # Ö
+ 0xd7 : b'\xc3\x97', # ×
+ 0xd8 : b'\xc3\x98', # Ø
+ 0xd9 : b'\xc3\x99', # Ù
+ 0xda : b'\xc3\x9a', # Ú
+ 0xdb : b'\xc3\x9b', # Û
+ 0xdc : b'\xc3\x9c', # Ü
+ 0xdd : b'\xc3\x9d', # Ý
+ 0xde : b'\xc3\x9e', # Þ
+ 0xdf : b'\xc3\x9f', # ß
+ 0xe0 : b'\xc3\xa0', # à
+ 0xe1 : b'\xa1', # á
+ 0xe2 : b'\xc3\xa2', # â
+ 0xe3 : b'\xc3\xa3', # ã
+ 0xe4 : b'\xc3\xa4', # ä
+ 0xe5 : b'\xc3\xa5', # å
+ 0xe6 : b'\xc3\xa6', # æ
+ 0xe7 : b'\xc3\xa7', # ç
+ 0xe8 : b'\xc3\xa8', # è
+ 0xe9 : b'\xc3\xa9', # é
+ 0xea : b'\xc3\xaa', # ê
+ 0xeb : b'\xc3\xab', # ë
+ 0xec : b'\xc3\xac', # ì
+ 0xed : b'\xc3\xad', # í
+ 0xee : b'\xc3\xae', # î
+ 0xef : b'\xc3\xaf', # ï
+ 0xf0 : b'\xc3\xb0', # ð
+ 0xf1 : b'\xc3\xb1', # ñ
+ 0xf2 : b'\xc3\xb2', # ò
+ 0xf3 : b'\xc3\xb3', # ó
+ 0xf4 : b'\xc3\xb4', # ô
+ 0xf5 : b'\xc3\xb5', # õ
+ 0xf6 : b'\xc3\xb6', # ö
+ 0xf7 : b'\xc3\xb7', # ÷
+ 0xf8 : b'\xc3\xb8', # ø
+ 0xf9 : b'\xc3\xb9', # ù
+ 0xfa : b'\xc3\xba', # ú
+ 0xfb : b'\xc3\xbb', # û
+ 0xfc : b'\xc3\xbc', # ü
+ 0xfd : b'\xc3\xbd', # ý
+ 0xfe : b'\xc3\xbe', # þ
+ }
+
+ MULTIBYTE_MARKERS_AND_SIZES = [
+ (0xc2, 0xdf, 2), # 2-byte characters start with a byte C2-DF
+ (0xe0, 0xef, 3), # 3-byte characters start with E0-EF
+ (0xf0, 0xf4, 4), # 4-byte characters start with F0-F4
+ ]
+
+ FIRST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[0][0]
+ LAST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[-1][1]
+
+ @classmethod
+ def detwingle(cls, in_bytes, main_encoding="utf8",
+ embedded_encoding="windows-1252"):
+ """Fix characters from one encoding embedded in some other encoding.
+
+ Currently the only situation supported is Windows-1252 (or its
+ subset ISO-8859-1), embedded in UTF-8.
+
+ The input must be a bytestring. If you've already converted
+ the document to Unicode, you're too late.
+
+ The output is a bytestring in which `embedded_encoding`
+ characters have been converted to their `main_encoding`
+ equivalents.
+ """
+ if embedded_encoding.replace('_', '-').lower() not in (
+ 'windows-1252', 'windows_1252'):
+ raise NotImplementedError(
+ "Windows-1252 and ISO-8859-1 are the only currently supported "
+ "embedded encodings.")
+
+ if main_encoding.lower() not in ('utf8', 'utf-8'):
+ raise NotImplementedError(
+ "UTF-8 is the only currently supported main encoding.")
+
+ byte_chunks = []
+
+ chunk_start = 0
+ pos = 0
+ while pos < len(in_bytes):
+ byte = in_bytes[pos]
+ if not isinstance(byte, int):
+ # Python 2.x
+ byte = ord(byte)
+ if (byte >= cls.FIRST_MULTIBYTE_MARKER
+ and byte <= cls.LAST_MULTIBYTE_MARKER):
+ # This is the start of a UTF-8 multibyte character. Skip
+ # to the end.
+ for start, end, size in cls.MULTIBYTE_MARKERS_AND_SIZES:
+ if byte >= start and byte <= end:
+ pos += size
+ break
+ elif byte >= 0x80 and byte in cls.WINDOWS_1252_TO_UTF8:
+ # We found a Windows-1252 character!
+ # Save the string up to this point as a chunk.
+ byte_chunks.append(in_bytes[chunk_start:pos])
+
+ # Now translate the Windows-1252 character into UTF-8
+ # and add it as another, one-byte chunk.
+ byte_chunks.append(cls.WINDOWS_1252_TO_UTF8[byte])
+ pos += 1
+ chunk_start = pos
+ else:
+ # Go on to the next character.
+ pos += 1
+ if chunk_start == 0:
+ # The string is unchanged.
+ return in_bytes
+ else:
+ # Store the final chunk.
+ byte_chunks.append(in_bytes[chunk_start:])
+ return b''.join(byte_chunks)
+
diff --git a/venv/Lib/site-packages/bs4/diagnose.py b/venv/Lib/site-packages/bs4/diagnose.py
new file mode 100644
index 0000000..1254861
--- /dev/null
+++ b/venv/Lib/site-packages/bs4/diagnose.py
@@ -0,0 +1,219 @@
+"""Diagnostic functions, mainly for use when doing tech support."""
+
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+__license__ = "MIT"
+
+import cProfile
+from io import StringIO
+from html.parser import HTMLParser
+import bs4
+from bs4 import BeautifulSoup, __version__
+from bs4.builder import builder_registry
+
+import os
+import pstats
+import random
+import tempfile
+import time
+import traceback
+import sys
+import cProfile
+
+def diagnose(data):
+ """Diagnostic suite for isolating common problems."""
+ print("Diagnostic running on Beautiful Soup %s" % __version__)
+ print("Python version %s" % sys.version)
+
+ basic_parsers = ["html.parser", "html5lib", "lxml"]
+ for name in basic_parsers:
+ for builder in builder_registry.builders:
+ if name in builder.features:
+ break
+ else:
+ basic_parsers.remove(name)
+ print((
+ "I noticed that %s is not installed. Installing it may help." %
+ name))
+
+ if 'lxml' in basic_parsers:
+ basic_parsers.append(["lxml", "xml"])
+ try:
+ from lxml import etree
+ print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))
+ except ImportError as e:
+ print (
+ "lxml is not installed or couldn't be imported.")
+
+
+ if 'html5lib' in basic_parsers:
+ try:
+ import html5lib
+ print("Found html5lib version %s" % html5lib.__version__)
+ except ImportError as e:
+ print (
+ "html5lib is not installed or couldn't be imported.")
+
+ if hasattr(data, 'read'):
+ data = data.read()
+ elif os.path.exists(data):
+ print('"%s" looks like a filename. Reading data from the file.' % data)
+ with open(data) as fp:
+ data = fp.read()
+ elif data.startswith("http:") or data.startswith("https:"):
+ print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)
+ print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
+ return
+ print()
+
+ for parser in basic_parsers:
+ print("Trying to parse your markup with %s" % parser)
+ success = False
+ try:
+ soup = BeautifulSoup(data, parser)
+ success = True
+ except Exception as e:
+ print("%s could not parse the markup." % parser)
+ traceback.print_exc()
+ if success:
+ print("Here's what %s did with the markup:" % parser)
+ print(soup.prettify())
+
+ print("-" * 80)
+
+def lxml_trace(data, html=True, **kwargs):
+ """Print out the lxml events that occur during parsing.
+
+ This lets you see how lxml parses a document when no Beautiful
+ Soup code is running.
+ """
+ from lxml import etree
+ for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
+ print(("%s, %4s, %s" % (event, element.tag, element.text)))
+
+class AnnouncingParser(HTMLParser):
+ """Announces HTMLParser parse events, without doing anything else."""
+
+ def _p(self, s):
+ print(s)
+
+ def handle_starttag(self, name, attrs):
+ self._p("%s START" % name)
+
+ def handle_endtag(self, name):
+ self._p("%s END" % name)
+
+ def handle_data(self, data):
+ self._p("%s DATA" % data)
+
+ def handle_charref(self, name):
+ self._p("%s CHARREF" % name)
+
+ def handle_entityref(self, name):
+ self._p("%s ENTITYREF" % name)
+
+ def handle_comment(self, data):
+ self._p("%s COMMENT" % data)
+
+ def handle_decl(self, data):
+ self._p("%s DECL" % data)
+
+ def unknown_decl(self, data):
+ self._p("%s UNKNOWN-DECL" % data)
+
+ def handle_pi(self, data):
+ self._p("%s PI" % data)
+
+def htmlparser_trace(data):
+ """Print out the HTMLParser events that occur during parsing.
+
+ This lets you see how HTMLParser parses a document when no
+ Beautiful Soup code is running.
+ """
+ parser = AnnouncingParser()
+ parser.feed(data)
+
+_vowels = "aeiou"
+_consonants = "bcdfghjklmnpqrstvwxyz"
+
+def rword(length=5):
+ "Generate a random word-like string."
+ s = ''
+ for i in range(length):
+ if i % 2 == 0:
+ t = _consonants
+ else:
+ t = _vowels
+ s += random.choice(t)
+ return s
+
+def rsentence(length=4):
+ "Generate a random sentence-like string."
+ return " ".join(rword(random.randint(4,9)) for i in range(length))
+
+def rdoc(num_elements=1000):
+ """Randomly generate an invalid HTML document."""
+ tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table']
+ elements = []
+ for i in range(num_elements):
+ choice = random.randint(0,3)
+ if choice == 0:
+ # New tag.
+ tag_name = random.choice(tag_names)
+ elements.append("<%s>" % tag_name)
+ elif choice == 1:
+ elements.append(rsentence(random.randint(1,4)))
+ elif choice == 2:
+ # Close a tag.
+ tag_name = random.choice(tag_names)
+ elements.append("%s>" % tag_name)
+ return "" + "\n".join(elements) + ""
+
+def benchmark_parsers(num_elements=100000):
+ """Very basic head-to-head performance benchmark."""
+ print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
+ data = rdoc(num_elements)
+ print("Generated a large invalid HTML document (%d bytes)." % len(data))
+
+ for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
+ success = False
+ try:
+ a = time.time()
+ soup = BeautifulSoup(data, parser)
+ b = time.time()
+ success = True
+ except Exception as e:
+ print("%s could not parse the markup." % parser)
+ traceback.print_exc()
+ if success:
+ print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))
+
+ from lxml import etree
+ a = time.time()
+ etree.HTML(data)
+ b = time.time()
+ print("Raw lxml parsed the markup in %.2fs." % (b-a))
+
+ import html5lib
+ parser = html5lib.HTMLParser()
+ a = time.time()
+ parser.parse(data)
+ b = time.time()
+ print("Raw html5lib parsed the markup in %.2fs." % (b-a))
+
+def profile(num_elements=100000, parser="lxml"):
+
+ filehandle = tempfile.NamedTemporaryFile()
+ filename = filehandle.name
+
+ data = rdoc(num_elements)
+ vars = dict(bs4=bs4, data=data, parser=parser)
+ cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename)
+
+ stats = pstats.Stats(filename)
+ # stats.strip_dirs()
+ stats.sort_stats("cumulative")
+ stats.print_stats('_html5lib|bs4', 50)
+
+if __name__ == '__main__':
+ diagnose(sys.stdin.read())
diff --git a/venv/Lib/site-packages/bs4/element.py b/venv/Lib/site-packages/bs4/element.py
new file mode 100644
index 0000000..a4a750d
--- /dev/null
+++ b/venv/Lib/site-packages/bs4/element.py
@@ -0,0 +1,1808 @@
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+__license__ = "MIT"
+
+import collections
+import re
+import shlex
+import sys
+import warnings
+from bs4.dammit import EntitySubstitution
+
+DEFAULT_OUTPUT_ENCODING = "utf-8"
+PY3K = (sys.version_info[0] > 2)
+
+whitespace_re = re.compile("\s+")
+
+def _alias(attr):
+ """Alias one attribute name to another for backward compatibility"""
+ @property
+ def alias(self):
+ return getattr(self, attr)
+
+ @alias.setter
+ def alias(self):
+ return setattr(self, attr)
+ return alias
+
+
+class NamespacedAttribute(str):
+
+ def __new__(cls, prefix, name, namespace=None):
+ if name is None:
+ obj = str.__new__(cls, prefix)
+ elif prefix is None:
+ # Not really namespaced.
+ obj = str.__new__(cls, name)
+ else:
+ obj = str.__new__(cls, prefix + ":" + name)
+ obj.prefix = prefix
+ obj.name = name
+ obj.namespace = namespace
+ return obj
+
+class AttributeValueWithCharsetSubstitution(str):
+ """A stand-in object for a character encoding specified in HTML."""
+
+class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
+ """A generic stand-in for the value of a meta tag's 'charset' attribute.
+
+ When Beautiful Soup parses the markup '', the
+ value of the 'charset' attribute will be one of these objects.
+ """
+
+ def __new__(cls, original_value):
+ obj = str.__new__(cls, original_value)
+ obj.original_value = original_value
+ return obj
+
+ def encode(self, encoding):
+ return encoding
+
+
+class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
+ """A generic stand-in for the value of a meta tag's 'content' attribute.
+
+ When Beautiful Soup parses the markup:
+
+
+ The value of the 'content' attribute will be one of these objects.
+ """
+
+ CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
+
+ def __new__(cls, original_value):
+ match = cls.CHARSET_RE.search(original_value)
+ if match is None:
+ # No substitution necessary.
+ return str.__new__(str, original_value)
+
+ obj = str.__new__(cls, original_value)
+ obj.original_value = original_value
+ return obj
+
+ def encode(self, encoding):
+ def rewrite(match):
+ return match.group(1) + encoding
+ return self.CHARSET_RE.sub(rewrite, self.original_value)
+
+class HTMLAwareEntitySubstitution(EntitySubstitution):
+
+ """Entity substitution rules that are aware of some HTML quirks.
+
+ Specifically, the contents of
+
+Hello, world!
+
+
+'''
+ soup = self.soup(html)
+ self.assertEqual("text/javascript", soup.find('script')['type'])
+
+ def test_comment(self):
+ # Comments are represented as Comment objects.
+ markup = "foobaz
"
+ self.assertSoupEquals(markup)
+
+ soup = self.soup(markup)
+ comment = soup.find(text="foobar")
+ self.assertEqual(comment.__class__, Comment)
+
+ # The comment is properly integrated into the tree.
+ foo = soup.find(text="foo")
+ self.assertEqual(comment, foo.next_element)
+ baz = soup.find(text="baz")
+ self.assertEqual(comment, baz.previous_element)
+
+ def test_preserved_whitespace_in_pre_and_textarea(self):
+ """Whitespace must be preserved in and