diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..6ef5940 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/../../../../../../:\_Python230\Assignments\Assignment05\ustjay-ethay-actsfayig\.idea/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..6854e0d --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,19 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..89ad2d6 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..e7c7e7b --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/ustjay-ethay-actsfayig.iml b/.idea/ustjay-ethay-actsfayig.iml new file mode 100644 index 0000000..66cd148 --- /dev/null +++ b/.idea/ustjay-ethay-actsfayig.iml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Procfile b/Procfile index 629b83a..aa3b02c 100644 --- a/Procfile +++ b/Procfile @@ -1 +1 @@ -web: python main.py +web: python -u main.py diff --git a/main.py b/main.py index bc096fb..aa93479 100644 --- a/main.py +++ b/main.py @@ -1,28 +1,56 @@ +#!/usr/bin/env python +""" +# ------------------------------------------------------------------------ # +# Title: Lesson 5 - 'Mashups & Microservices'. This script demonstrates +# how to scrape a random fact from http://unkno.com (Link to an external site) +# and then send it to a pig latin web application running on Heroku. +# Description: This is the main "main.py" script. Use this command 'python -u main.py' +# to start the server. Let the fun begin!! +# ChangeLog (Who,When,What): +# ASimpson, 10/27/2020, Modified code to complete lesson5 - Assignment05 +# ------------------------------------------------------------------------ # +""" +# Import Modules here import os - import requests from flask import Flask, send_file, Response from bs4 import BeautifulSoup +# Create the Flask app object here app = Flask(__name__) def get_fact(): - + """Get the ransom factoid from this website http://unkno.com""" response = requests.get("http://unkno.com") - + # Parse the response from the random factoid website using an HTML parser soup = BeautifulSoup(response.content, "html.parser") + # Pick out the HTML tags that have the random fact using soup facts = soup.find_all("div", id="content") - return facts[0].getText() +def pig_latinizer(message): + """pig latin address function generator""" + # Make a POST request to the /piglatinize/ site, set 'allow_redirects' to False + response = requests.post("https://hidden-journey-62459.herokuapp.com/piglatinize/", + data={'input_text': message}, allow_redirects=False, stream=True) + # Pull out the response 'Location' item + pig_latin_message = response.headers['Location'] + return pig_latin_message + + @app.route('/') def home(): - return "FILL ME!" + """Take a random fact from one website, convert it to Pig Latin and then show the address""" + str_fact_message = str(get_fact()) + str_pig_latin_addr = pig_latinizer(str_fact_message) + response_body = '

Crazy Factoid:

"{0}"

' \ + '

Convert this to Pig Latin (addr) ===> {1}


' \ + 'Click here for Pig Latin version!!'.format(str_fact_message, str_pig_latin_addr) + return response_body if __name__ == "__main__": port = int(os.environ.get("PORT", 6787)) app.run(host='0.0.0.0', port=port) - diff --git a/requirements.txt b/requirements.txt index 0ade18d..f54fd35 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,11 +2,21 @@ beautifulsoup4==4.6.0 certifi==2018.4.16 chardet==3.0.4 click==6.7 +ecdsa==0.16.0 +esptool==2.8 Flask==1.0.1 +guess-my-x==0.1 +gunicorn==20.0.4 idna==2.6 itsdangerous==0.24 Jinja2==2.10 MarkupSafe==1.0 +peewee==3.13.3 +psycopg2==2.8.6 +psycopg2-binary==2.8.6 +pyaes==1.6.1 +pyserial==3.4 requests==2.18.4 +six==1.15.0 urllib3==1.22 Werkzeug==0.14.1 diff --git a/venv/Lib/site-packages/Flask-1.0.1.dist-info/INSTALLER b/venv/Lib/site-packages/Flask-1.0.1.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/venv/Lib/site-packages/Flask-1.0.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/Lib/site-packages/Flask-1.0.1.dist-info/LICENSE.txt b/venv/Lib/site-packages/Flask-1.0.1.dist-info/LICENSE.txt new file mode 100644 index 0000000..8f9252f --- /dev/null +++ b/venv/Lib/site-packages/Flask-1.0.1.dist-info/LICENSE.txt @@ -0,0 +1,31 @@ +Copyright © 2010 by the Pallets team. + +Some rights reserved. + +Redistribution and use in source and binary forms of the software as +well as documentation, with or without modification, are permitted +provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE AND DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND +CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, +BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE AND DOCUMENTATION, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. diff --git a/venv/Lib/site-packages/Flask-1.0.1.dist-info/METADATA b/venv/Lib/site-packages/Flask-1.0.1.dist-info/METADATA new file mode 100644 index 0000000..74508be --- /dev/null +++ b/venv/Lib/site-packages/Flask-1.0.1.dist-info/METADATA @@ -0,0 +1,130 @@ +Metadata-Version: 2.1 +Name: Flask +Version: 1.0.1 +Summary: A simple framework for building complex web applications. +Home-page: https://www.palletsprojects.com/p/flask/ +Author: Armin Ronacher +Author-email: armin.ronacher@active-4.com +Maintainer: Pallets team +Maintainer-email: contact@palletsprojects.com +License: BSD +Project-URL: Documentation, http://flask.pocoo.org/docs/ +Project-URL: Code, https://github.com/pallets/pallets-sphinx-themes +Project-URL: Issue tracker, https://github.com/pallets/pallets-sphinx-themes/issues +Platform: any +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Web Environment +Classifier: Framework :: Flask +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content +Classifier: Topic :: Internet :: WWW/HTTP :: WSGI :: Application +Classifier: Topic :: Software Development :: Libraries :: Application Frameworks +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Provides-Extra: docs +Provides-Extra: dev +Provides-Extra: dotenv +Requires-Dist: Werkzeug (>=0.14) +Requires-Dist: Jinja2 (>=2.10) +Requires-Dist: itsdangerous (>=0.24) +Requires-Dist: click (>=5.1) +Provides-Extra: dev +Requires-Dist: pytest (>=3); extra == 'dev' +Requires-Dist: coverage; extra == 'dev' +Requires-Dist: tox; extra == 'dev' +Requires-Dist: sphinx; extra == 'dev' +Requires-Dist: pallets-sphinx-themes; extra == 'dev' +Requires-Dist: sphinxcontrib-log-cabinet; extra == 'dev' +Provides-Extra: docs +Requires-Dist: sphinx; extra == 'docs' +Requires-Dist: pallets-sphinx-themes; extra == 'docs' +Requires-Dist: sphinxcontrib-log-cabinet; extra == 'docs' +Provides-Extra: dotenv +Requires-Dist: python-dotenv; extra == 'dotenv' + +Flask +===== + +Flask is a lightweight `WSGI`_ web application framework. It is designed +to make getting started quick and easy, with the ability to scale up to +complex applications. It began as a simple wrapper around `Werkzeug`_ +and `Jinja`_ and has become one of the most popular Python web +application frameworks. + +Flask offers suggestions, but doesn't enforce any dependencies or +project layout. It is up to the developer to choose the tools and +libraries they want to use. There are many extensions provided by the +community that make adding new functionality easy. + + +Installing +---------- + +Install and update using `pip`_: + +.. code-block:: text + + pip install -U Flask + + +A Simple Example +---------------- + +.. code-block:: python + + from flask import Flask + + app = Flask(__name__) + + @app.route('/') + def hello(): + return 'Hello, World!' + +.. code-block:: text + + $ FLASK_APP=hello.py flask run + * Serving Flask app "hello" + * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit) + + +Donate +------ + +The Pallets organization develops and supports Flask and the libraries +it uses. In order to grow the community of contributors and users, and +allow the maintainers to devote more time to the projects, `please +donate today`_. + +.. _please donate today: https://psfmember.org/civicrm/contribute/transact?reset=1&id=20 + + +Links +----- + +* Website: https://www.palletsprojects.com/p/flask/ +* Documentation: http://flask.pocoo.org/docs/ +* License: `BSD `_ +* Releases: https://pypi.org/project/Flask/ +* Code: https://github.com/pallets/flask +* Issue tracker: https://github.com/pallets/flask/issues +* Test status: + + * Linux, Mac: https://travis-ci.org/pallets/flask + * Windows: https://ci.appveyor.com/project/pallets/flask + +* Test coverage: https://codecov.io/gh/pallets/flask + +.. _WSGI: https://wsgi.readthedocs.io +.. _Werkzeug: https://www.palletsprojects.com/p/werkzeug/ +.. _Jinja: https://www.palletsprojects.com/p/jinja/ +.. _pip: https://pip.pypa.io/en/stable/quickstart/ + + diff --git a/venv/Lib/site-packages/Flask-1.0.1.dist-info/RECORD b/venv/Lib/site-packages/Flask-1.0.1.dist-info/RECORD new file mode 100644 index 0000000..36f7212 --- /dev/null +++ b/venv/Lib/site-packages/Flask-1.0.1.dist-info/RECORD @@ -0,0 +1,49 @@ +../../Scripts/flask.exe,sha256=s2wRrQXGGJbcwiA0fj649R2A9bsfk7aqvjkmnXsOfKY,97167 +Flask-1.0.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +Flask-1.0.1.dist-info/LICENSE.txt,sha256=ziEXA3AIuaiUn1qe4cd1XxCESWTYrk4TjN7Qb06J3l8,1575 +Flask-1.0.1.dist-info/METADATA,sha256=dQhGUR2ccX3kL8VGYzbNrvEo6F8bi1ER11xrv0CNqqQ,4214 +Flask-1.0.1.dist-info/RECORD,, +Flask-1.0.1.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +Flask-1.0.1.dist-info/WHEEL,sha256=J3CsTk7Mf2JNUyhImI-mjX-fmI4oDjyiXgWT4qgZiCE,110 +Flask-1.0.1.dist-info/entry_points.txt,sha256=gBLA1aKg0OYR8AhbAfg8lnburHtKcgJLDU52BBctN0k,42 +Flask-1.0.1.dist-info/top_level.txt,sha256=dvi65F6AeGWVU0TBpYiC04yM60-FX1gJFkK31IKQr5c,6 +flask/__init__.py,sha256=NRZvAstllu2tdNupKD6ombhD_ONrXXYIlJK0ZW77zhM,1673 +flask/__main__.py,sha256=pgIXrHhxM5MAMvgzAqWpw_t6AXZ1zG38us4JRgJKtxk,291 +flask/__pycache__/__init__.cpython-38.pyc,, +flask/__pycache__/__main__.cpython-38.pyc,, +flask/__pycache__/_compat.cpython-38.pyc,, +flask/__pycache__/app.cpython-38.pyc,, +flask/__pycache__/blueprints.cpython-38.pyc,, +flask/__pycache__/cli.cpython-38.pyc,, +flask/__pycache__/config.cpython-38.pyc,, +flask/__pycache__/ctx.cpython-38.pyc,, +flask/__pycache__/debughelpers.cpython-38.pyc,, +flask/__pycache__/globals.cpython-38.pyc,, +flask/__pycache__/helpers.cpython-38.pyc,, +flask/__pycache__/logging.cpython-38.pyc,, +flask/__pycache__/sessions.cpython-38.pyc,, +flask/__pycache__/signals.cpython-38.pyc,, +flask/__pycache__/templating.cpython-38.pyc,, +flask/__pycache__/testing.cpython-38.pyc,, +flask/__pycache__/views.cpython-38.pyc,, +flask/__pycache__/wrappers.cpython-38.pyc,, +flask/_compat.py,sha256=UDFGhosh6mOdNB-4evKPuneHum1OpcAlwTNJCRm0irQ,2892 +flask/app.py,sha256=ahpe3T8w98rQd_Er5d7uDxK57S1nnqGQx3V3hirBovU,94147 +flask/blueprints.py,sha256=OsYjq0FDZ8fOfQYxx1Ly4n52ndXyNRLlt1yJ3QmMDRY,18252 +flask/cli.py,sha256=2oX2UTmfz05055cIJq7dgsPUwhDrDzhPh8YGIcRKJHo,29359 +flask/config.py,sha256=kznUhj4DLYxsTF_4kfDG8GEHto1oZG_kqblyrLFtpqQ,9951 +flask/ctx.py,sha256=leFzS9fzmo0uaLCdxpHc5_iiJZ1H0X_Ig4yPCOvT--g,16224 +flask/debughelpers.py,sha256=1ceC-UyqZTd4KsJkf0OObHPsVt5R3T6vnmYhiWBjV-w,6479 +flask/globals.py,sha256=pGg72QW_-4xUfsI33I5L_y76c21AeqfSqXDcbd8wvXU,1649 +flask/helpers.py,sha256=YCl8D1plTO1evEYP4KIgaY3H8Izww5j4EdgRJ89oHTw,40106 +flask/json/__init__.py,sha256=Ns1Hj805XIxuBMh2z0dYnMVfb_KUgLzDmP3WoUYaPhw,10729 +flask/json/__pycache__/__init__.cpython-38.pyc,, +flask/json/__pycache__/tag.cpython-38.pyc,, +flask/json/tag.py,sha256=9ehzrmt5k7hxf7ZEK0NOs3swvQyU9fWNe-pnYe69N60,8223 +flask/logging.py,sha256=qV9h0vt7NIRkKM9OHDWndzO61E5CeBMlqPJyTt-W2Wc,2231 +flask/sessions.py,sha256=2XHV4ASREhSEZ8bsPQW6pNVNuFtbR-04BzfKg0AfvHo,14452 +flask/signals.py,sha256=BGQbVyCYXnzKK2DVCzppKFyWN1qmrtW1QMAYUs-1Nr8,2211 +flask/templating.py,sha256=FDfWMbpgpC3qObW8GGXRAVrkHFF8K4CHOJymB1wvULI,4914 +flask/testing.py,sha256=XD3gWNvLUV8dqVHwKd9tZzsj81fSHtjOphQ1wTNtlMs,9379 +flask/views.py,sha256=Wy-_WkUVtCfE2zCXYeJehNgHuEtviE4v3HYfJ--MpbY,5733 +flask/wrappers.py,sha256=1Z9hF5-hXQajn_58XITQFRY8efv3Vy3uZ0avBfZu6XI,7511 diff --git a/venv/Lib/site-packages/Flask-1.0.1.dist-info/REQUESTED b/venv/Lib/site-packages/Flask-1.0.1.dist-info/REQUESTED new file mode 100644 index 0000000..e69de29 diff --git a/venv/Lib/site-packages/Flask-1.0.1.dist-info/WHEEL b/venv/Lib/site-packages/Flask-1.0.1.dist-info/WHEEL new file mode 100644 index 0000000..f21b51c --- /dev/null +++ b/venv/Lib/site-packages/Flask-1.0.1.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.31.0) +Root-Is-Purelib: true +Tag: py2-none-any +Tag: py3-none-any + diff --git a/venv/Lib/site-packages/Flask-1.0.1.dist-info/entry_points.txt b/venv/Lib/site-packages/Flask-1.0.1.dist-info/entry_points.txt new file mode 100644 index 0000000..1eb0252 --- /dev/null +++ b/venv/Lib/site-packages/Flask-1.0.1.dist-info/entry_points.txt @@ -0,0 +1,3 @@ +[console_scripts] +flask = flask.cli:main + diff --git a/venv/Lib/site-packages/Flask-1.0.1.dist-info/top_level.txt b/venv/Lib/site-packages/Flask-1.0.1.dist-info/top_level.txt new file mode 100644 index 0000000..7e10602 --- /dev/null +++ b/venv/Lib/site-packages/Flask-1.0.1.dist-info/top_level.txt @@ -0,0 +1 @@ +flask diff --git a/venv/Lib/site-packages/Jinja2-2.10.dist-info/DESCRIPTION.rst b/venv/Lib/site-packages/Jinja2-2.10.dist-info/DESCRIPTION.rst new file mode 100644 index 0000000..1594da5 --- /dev/null +++ b/venv/Lib/site-packages/Jinja2-2.10.dist-info/DESCRIPTION.rst @@ -0,0 +1,37 @@ + +Jinja2 +~~~~~~ + +Jinja2 is a template engine written in pure Python. It provides a +`Django`_ inspired non-XML syntax but supports inline expressions and +an optional `sandboxed`_ environment. + +Nutshell +-------- + +Here a small example of a Jinja template:: + + {% extends 'base.html' %} + {% block title %}Memberlist{% endblock %} + {% block content %} + + {% endblock %} + +Philosophy +---------- + +Application logic is for the controller but don't try to make the life +for the template designer too hard by giving him too few functionality. + +For more informations visit the new `Jinja2 webpage`_ and `documentation`_. + +.. _sandboxed: https://en.wikipedia.org/wiki/Sandbox_(computer_security) +.. _Django: https://www.djangoproject.com/ +.. _Jinja2 webpage: http://jinja.pocoo.org/ +.. _documentation: http://jinja.pocoo.org/2/documentation/ + + diff --git a/venv/Lib/site-packages/Jinja2-2.10.dist-info/INSTALLER b/venv/Lib/site-packages/Jinja2-2.10.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/venv/Lib/site-packages/Jinja2-2.10.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/Lib/site-packages/Jinja2-2.10.dist-info/LICENSE.txt b/venv/Lib/site-packages/Jinja2-2.10.dist-info/LICENSE.txt new file mode 100644 index 0000000..31bf900 --- /dev/null +++ b/venv/Lib/site-packages/Jinja2-2.10.dist-info/LICENSE.txt @@ -0,0 +1,31 @@ +Copyright (c) 2009 by the Jinja Team, see AUTHORS for more details. + +Some rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * The names of the contributors may not be used to endorse or + promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/venv/Lib/site-packages/Jinja2-2.10.dist-info/METADATA b/venv/Lib/site-packages/Jinja2-2.10.dist-info/METADATA new file mode 100644 index 0000000..40f2b46 --- /dev/null +++ b/venv/Lib/site-packages/Jinja2-2.10.dist-info/METADATA @@ -0,0 +1,68 @@ +Metadata-Version: 2.0 +Name: Jinja2 +Version: 2.10 +Summary: A small but fast and easy to use stand-alone template engine written in pure python. +Home-page: http://jinja.pocoo.org/ +Author: Armin Ronacher +Author-email: armin.ronacher@active-4.com +License: BSD +Description-Content-Type: UNKNOWN +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Web Environment +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.6 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Text Processing :: Markup :: HTML +Requires-Dist: MarkupSafe (>=0.23) +Provides-Extra: i18n +Requires-Dist: Babel (>=0.8); extra == 'i18n' + + +Jinja2 +~~~~~~ + +Jinja2 is a template engine written in pure Python. It provides a +`Django`_ inspired non-XML syntax but supports inline expressions and +an optional `sandboxed`_ environment. + +Nutshell +-------- + +Here a small example of a Jinja template:: + + {% extends 'base.html' %} + {% block title %}Memberlist{% endblock %} + {% block content %} + + {% endblock %} + +Philosophy +---------- + +Application logic is for the controller but don't try to make the life +for the template designer too hard by giving him too few functionality. + +For more informations visit the new `Jinja2 webpage`_ and `documentation`_. + +.. _sandboxed: https://en.wikipedia.org/wiki/Sandbox_(computer_security) +.. _Django: https://www.djangoproject.com/ +.. _Jinja2 webpage: http://jinja.pocoo.org/ +.. _documentation: http://jinja.pocoo.org/2/documentation/ + + diff --git a/venv/Lib/site-packages/Jinja2-2.10.dist-info/RECORD b/venv/Lib/site-packages/Jinja2-2.10.dist-info/RECORD new file mode 100644 index 0000000..87c37da --- /dev/null +++ b/venv/Lib/site-packages/Jinja2-2.10.dist-info/RECORD @@ -0,0 +1,64 @@ +Jinja2-2.10.dist-info/DESCRIPTION.rst,sha256=b5ckFDoM7vVtz_mAsJD4OPteFKCqE7beu353g4COoYI,978 +Jinja2-2.10.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +Jinja2-2.10.dist-info/LICENSE.txt,sha256=JvzUNv3Io51EiWrAPm8d_SXjhJnEjyDYvB3Tvwqqils,1554 +Jinja2-2.10.dist-info/METADATA,sha256=18EgU8zR6-av-0-5y_gXebzK4GnBB_76lALUsl-6QHM,2258 +Jinja2-2.10.dist-info/RECORD,, +Jinja2-2.10.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +Jinja2-2.10.dist-info/WHEEL,sha256=kdsN-5OJAZIiHN-iO4Rhl82KyS0bDWf4uBwMbkNafr8,110 +Jinja2-2.10.dist-info/entry_points.txt,sha256=NdzVcOrqyNyKDxD09aERj__3bFx2paZhizFDsKmVhiA,72 +Jinja2-2.10.dist-info/metadata.json,sha256=NPUJ9TMBxVQAv_kTJzvU8HwmP-4XZvbK9mz6_4YUVl4,1473 +Jinja2-2.10.dist-info/top_level.txt,sha256=PkeVWtLb3-CqjWi1fO29OCbj55EhX_chhKrCdrVe_zs,7 +jinja2/__init__.py,sha256=xJHjaMoy51_KXn1wf0cysH6tUUifUxZCwSOfcJGEYZw,2614 +jinja2/__pycache__/__init__.cpython-38.pyc,, +jinja2/__pycache__/_compat.cpython-38.pyc,, +jinja2/__pycache__/_identifier.cpython-38.pyc,, +jinja2/__pycache__/asyncfilters.cpython-38.pyc,, +jinja2/__pycache__/asyncsupport.cpython-38.pyc,, +jinja2/__pycache__/bccache.cpython-38.pyc,, +jinja2/__pycache__/compiler.cpython-38.pyc,, +jinja2/__pycache__/constants.cpython-38.pyc,, +jinja2/__pycache__/debug.cpython-38.pyc,, +jinja2/__pycache__/defaults.cpython-38.pyc,, +jinja2/__pycache__/environment.cpython-38.pyc,, +jinja2/__pycache__/exceptions.cpython-38.pyc,, +jinja2/__pycache__/ext.cpython-38.pyc,, +jinja2/__pycache__/filters.cpython-38.pyc,, +jinja2/__pycache__/idtracking.cpython-38.pyc,, +jinja2/__pycache__/lexer.cpython-38.pyc,, +jinja2/__pycache__/loaders.cpython-38.pyc,, +jinja2/__pycache__/meta.cpython-38.pyc,, +jinja2/__pycache__/nativetypes.cpython-38.pyc,, +jinja2/__pycache__/nodes.cpython-38.pyc,, +jinja2/__pycache__/optimizer.cpython-38.pyc,, +jinja2/__pycache__/parser.cpython-38.pyc,, +jinja2/__pycache__/runtime.cpython-38.pyc,, +jinja2/__pycache__/sandbox.cpython-38.pyc,, +jinja2/__pycache__/tests.cpython-38.pyc,, +jinja2/__pycache__/utils.cpython-38.pyc,, +jinja2/__pycache__/visitor.cpython-38.pyc,, +jinja2/_compat.py,sha256=xP60CE5Qr8FTYcDE1f54tbZLKGvMwYml4-8T7Q4KG9k,2596 +jinja2/_identifier.py,sha256=W1QBSY-iJsyt6oR_nKSuNNCzV95vLIOYgUNPUI1d5gU,1726 +jinja2/asyncfilters.py,sha256=cTDPvrS8Hp_IkwsZ1m9af_lr5nHysw7uTa5gV0NmZVE,4144 +jinja2/asyncsupport.py,sha256=UErQ3YlTLaSjFb94P4MVn08-aVD9jJxty2JVfMRb-1M,7878 +jinja2/bccache.py,sha256=nQldx0ZRYANMyfvOihRoYFKSlUdd5vJkS7BjxNwlOZM,12794 +jinja2/compiler.py,sha256=BqC5U6JxObSRhblyT_a6Tp5GtEU5z3US1a4jLQaxxgo,65386 +jinja2/constants.py,sha256=uwwV8ZUhHhacAuz5PTwckfsbqBaqM7aKfyJL7kGX5YQ,1626 +jinja2/debug.py,sha256=WTVeUFGUa4v6ReCsYv-iVPa3pkNB75OinJt3PfxNdXs,12045 +jinja2/defaults.py,sha256=Em-95hmsJxIenDCZFB1YSvf9CNhe9rBmytN3yUrBcWA,1400 +jinja2/environment.py,sha256=VnkAkqw8JbjZct4tAyHlpBrka2vqB-Z58RAP-32P1ZY,50849 +jinja2/exceptions.py,sha256=_Rj-NVi98Q6AiEjYQOsP8dEIdu5AlmRHzcSNOPdWix4,4428 +jinja2/ext.py,sha256=atMQydEC86tN1zUsdQiHw5L5cF62nDbqGue25Yiu3N4,24500 +jinja2/filters.py,sha256=yOAJk0MsH-_gEC0i0U6NweVQhbtYaC-uE8xswHFLF4w,36528 +jinja2/idtracking.py,sha256=2GbDSzIvGArEBGLkovLkqEfmYxmWsEf8c3QZwM4uNsw,9197 +jinja2/lexer.py,sha256=ySEPoXd1g7wRjsuw23uimS6nkGN5aqrYwcOKxCaVMBQ,28559 +jinja2/loaders.py,sha256=xiTuURKAEObyym0nU8PCIXu_Qp8fn0AJ5oIADUUm-5Q,17382 +jinja2/meta.py,sha256=fmKHxkmZYAOm9QyWWy8EMd6eefAIh234rkBMW2X4ZR8,4340 +jinja2/nativetypes.py,sha256=_sJhS8f-8Q0QMIC0dm1YEdLyxEyoO-kch8qOL5xUDfE,7308 +jinja2/nodes.py,sha256=L10L_nQDfubLhO3XjpF9qz46FSh2clL-3e49ogVlMmA,30853 +jinja2/optimizer.py,sha256=MsdlFACJ0FRdPtjmCAdt7JQ9SGrXFaDNUaslsWQaG3M,1722 +jinja2/parser.py,sha256=lPzTEbcpTRBLw8ii6OYyExHeAhaZLMA05Hpv4ll3ULk,35875 +jinja2/runtime.py,sha256=DHdD38Pq8gj7uWQC5usJyWFoNWL317A9AvXOW_CLB34,27755 +jinja2/sandbox.py,sha256=TVyZHlNqqTzsv9fv2NvJNmSdWRHTguhyMHdxjWms32U,16708 +jinja2/tests.py,sha256=iJQLwbapZr-EKquTG_fVOVdwHUUKf3SX9eNkjQDF8oU,4237 +jinja2/utils.py,sha256=q24VupGZotQ-uOyrJxCaXtDWhZC1RgsQG7kcdmjck2Q,20629 +jinja2/visitor.py,sha256=JD1H1cANA29JcntFfN5fPyqQxB4bI4wC00BzZa-XHks,3316 diff --git a/venv/Lib/site-packages/Jinja2-2.10.dist-info/REQUESTED b/venv/Lib/site-packages/Jinja2-2.10.dist-info/REQUESTED new file mode 100644 index 0000000..e69de29 diff --git a/venv/Lib/site-packages/Jinja2-2.10.dist-info/WHEEL b/venv/Lib/site-packages/Jinja2-2.10.dist-info/WHEEL new file mode 100644 index 0000000..7332a41 --- /dev/null +++ b/venv/Lib/site-packages/Jinja2-2.10.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.30.0) +Root-Is-Purelib: true +Tag: py2-none-any +Tag: py3-none-any + diff --git a/venv/Lib/site-packages/Jinja2-2.10.dist-info/entry_points.txt b/venv/Lib/site-packages/Jinja2-2.10.dist-info/entry_points.txt new file mode 100644 index 0000000..32e6b75 --- /dev/null +++ b/venv/Lib/site-packages/Jinja2-2.10.dist-info/entry_points.txt @@ -0,0 +1,4 @@ + + [babel.extractors] + jinja2 = jinja2.ext:babel_extract[i18n] + \ No newline at end of file diff --git a/venv/Lib/site-packages/Jinja2-2.10.dist-info/metadata.json b/venv/Lib/site-packages/Jinja2-2.10.dist-info/metadata.json new file mode 100644 index 0000000..7f5dc38 --- /dev/null +++ b/venv/Lib/site-packages/Jinja2-2.10.dist-info/metadata.json @@ -0,0 +1 @@ +{"classifiers": ["Development Status :: 5 - Production/Stable", "Environment :: Web Environment", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Topic :: Internet :: WWW/HTTP :: Dynamic Content", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: Markup :: HTML"], "description_content_type": "UNKNOWN", "extensions": {"python.details": {"contacts": [{"email": "armin.ronacher@active-4.com", "name": "Armin Ronacher", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst", "license": "LICENSE.txt"}, "project_urls": {"Home": "http://jinja.pocoo.org/"}}, "python.exports": {"babel.extractors": {"jinja2": "jinja2.ext:babel_extract [i18n]"}}}, "extras": ["i18n"], "generator": "bdist_wheel (0.30.0)", "license": "BSD", "metadata_version": "2.0", "name": "Jinja2", "run_requires": [{"extra": "i18n", "requires": ["Babel (>=0.8)"]}, {"requires": ["MarkupSafe (>=0.23)"]}], "summary": "A small but fast and easy to use stand-alone template engine written in pure python.", "version": "2.10"} \ No newline at end of file diff --git a/venv/Lib/site-packages/Jinja2-2.10.dist-info/top_level.txt b/venv/Lib/site-packages/Jinja2-2.10.dist-info/top_level.txt new file mode 100644 index 0000000..7f7afbf --- /dev/null +++ b/venv/Lib/site-packages/Jinja2-2.10.dist-info/top_level.txt @@ -0,0 +1 @@ +jinja2 diff --git a/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/INSTALLER b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/LICENSE.rst b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/LICENSE.rst new file mode 100644 index 0000000..9d227a0 --- /dev/null +++ b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/LICENSE.rst @@ -0,0 +1,28 @@ +Copyright 2010 Pallets + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/METADATA b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/METADATA new file mode 100644 index 0000000..c50370d --- /dev/null +++ b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/METADATA @@ -0,0 +1,105 @@ +Metadata-Version: 2.1 +Name: MarkupSafe +Version: 1.1.1 +Summary: Safely add untrusted strings to HTML/XML markup. +Home-page: https://palletsprojects.com/p/markupsafe/ +Author: Armin Ronacher +Author-email: armin.ronacher@active-4.com +Maintainer: The Pallets Team +Maintainer-email: contact@palletsprojects.com +License: BSD-3-Clause +Project-URL: Documentation, https://markupsafe.palletsprojects.com/ +Project-URL: Code, https://github.com/pallets/markupsafe +Project-URL: Issue tracker, https://github.com/pallets/markupsafe/issues +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Web Environment +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Text Processing :: Markup :: HTML +Requires-Python: >=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.* +Description-Content-Type: text/x-rst + +MarkupSafe +========== + +MarkupSafe implements a text object that escapes characters so it is +safe to use in HTML and XML. Characters that have special meanings are +replaced so that they display as the actual characters. This mitigates +injection attacks, meaning untrusted user input can safely be displayed +on a page. + + +Installing +---------- + +Install and update using `pip`_: + +.. code-block:: text + + pip install -U MarkupSafe + +.. _pip: https://pip.pypa.io/en/stable/quickstart/ + + +Examples +-------- + +.. code-block:: pycon + + >>> from markupsafe import Markup, escape + >>> # escape replaces special characters and wraps in Markup + >>> escape('') + Markup(u'<script>alert(document.cookie);</script>') + >>> # wrap in Markup to mark text "safe" and prevent escaping + >>> Markup('Hello') + Markup('hello') + >>> escape(Markup('Hello')) + Markup('hello') + >>> # Markup is a text subclass (str on Python 3, unicode on Python 2) + >>> # methods and operators escape their arguments + >>> template = Markup("Hello %s") + >>> template % '"World"' + Markup('Hello "World"') + + +Donate +------ + +The Pallets organization develops and supports MarkupSafe and other +libraries that use it. In order to grow the community of contributors +and users, and allow the maintainers to devote more time to the +projects, `please donate today`_. + +.. _please donate today: https://palletsprojects.com/donate + + +Links +----- + +* Website: https://palletsprojects.com/p/markupsafe/ +* Documentation: https://markupsafe.palletsprojects.com/ +* License: `BSD-3-Clause `_ +* Releases: https://pypi.org/project/MarkupSafe/ +* Code: https://github.com/pallets/markupsafe +* Issue tracker: https://github.com/pallets/markupsafe/issues +* Test status: + + * Linux, Mac: https://travis-ci.org/pallets/markupsafe + * Windows: https://ci.appveyor.com/project/pallets/markupsafe + +* Test coverage: https://codecov.io/gh/pallets/markupsafe +* Official chat: https://discord.gg/t6rrQZH + + diff --git a/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/RECORD b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/RECORD new file mode 100644 index 0000000..f728e1f --- /dev/null +++ b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/RECORD @@ -0,0 +1,15 @@ +MarkupSafe-1.1.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +MarkupSafe-1.1.1.dist-info/LICENSE.rst,sha256=RjHsDbX9kKVH4zaBcmTGeYIUM4FG-KyUtKV_lu6MnsQ,1503 +MarkupSafe-1.1.1.dist-info/METADATA,sha256=IFCP4hCNGjXJgMoSvdjPiKDLAMUTTWoxKXQsQvmyMNU,3653 +MarkupSafe-1.1.1.dist-info/RECORD,, +MarkupSafe-1.1.1.dist-info/WHEEL,sha256=jovIjvNuo6l5lHtTPdXyjKVQ_5SCkmdptE5fkPNfjyM,101 +MarkupSafe-1.1.1.dist-info/top_level.txt,sha256=qy0Plje5IJuvsCBjejJyhDCjEAdcDLK_2agVcex8Z6U,11 +markupsafe/__init__.py,sha256=UAy1UKlykemnSZWIVn8RDqY0wvjV6lkeRwYOMNhw4bA,10453 +markupsafe/__pycache__/__init__.cpython-38.pyc,, +markupsafe/__pycache__/_compat.cpython-38.pyc,, +markupsafe/__pycache__/_constants.cpython-38.pyc,, +markupsafe/__pycache__/_native.cpython-38.pyc,, +markupsafe/_compat.py,sha256=XweNhJEcyTP_wIBUaIO6nxzIb6XFwweriXyZfiTpkdw,591 +markupsafe/_constants.py,sha256=IXLUQkLM6CTustG5vEQTEy6pBB3z5pm84NkYU1aW9qI,4954 +markupsafe/_native.py,sha256=LwsYk-GHoPsPboRD_tNC6_jTmCj3MLtsnDFis7HjE50,1942 +markupsafe/_speedups.cp38-win32.pyd,sha256=8nGEdcR_DUewvF72FxW6d-aD5SYoLMe2prWwEEnH9ck,12800 diff --git a/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/WHEEL b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/WHEEL new file mode 100644 index 0000000..117ab3f --- /dev/null +++ b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.33.6) +Root-Is-Purelib: false +Tag: cp38-cp38-win32 + diff --git a/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/top_level.txt b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/top_level.txt new file mode 100644 index 0000000..75bf729 --- /dev/null +++ b/venv/Lib/site-packages/MarkupSafe-1.1.1.dist-info/top_level.txt @@ -0,0 +1 @@ +markupsafe diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/DESCRIPTION.rst b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/DESCRIPTION.rst new file mode 100644 index 0000000..675f08d --- /dev/null +++ b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/DESCRIPTION.rst @@ -0,0 +1,80 @@ +Werkzeug +======== + +Werkzeug is a comprehensive `WSGI`_ web application library. It began as +a simple collection of various utilities for WSGI applications and has +become one of the most advanced WSGI utility libraries. + +It includes: + +* An interactive debugger that allows inspecting stack traces and source + code in the browser with an interactive interpreter for any frame in + the stack. +* A full-featured request object with objects to interact with headers, + query args, form data, files, and cookies. +* A response object that can wrap other WSGI applications and handle + streaming data. +* A routing system for matching URLs to endpoints and generating URLs + for endpoints, with an extensible system for capturing variables from + URLs. +* HTTP utilities to handle entity tags, cache control, dates, user + agents, cookies, files, and more. +* A threaded WSGI server for use while developing applications locally. +* A test client for simulating HTTP requests during testing without + requiring running a server. + +Werkzeug is Unicode aware and doesn't enforce any dependencies. It is up +to the developer to choose a template engine, database adapter, and even +how to handle requests. It can be used to build all sorts of end user +applications such as blogs, wikis, or bulletin boards. + +`Flask`_ wraps Werkzeug, using it to handle the details of WSGI while +providing more structure and patterns for defining powerful +applications. + + +Installing +---------- + +Install and update using `pip`_: + +.. code-block:: text + + pip install -U Werkzeug + + +A Simple Example +---------------- + +.. code-block:: python + + from werkzeug.wrappers import Request, Response + + @Request.application + def application(request): + return Response('Hello, World!') + + if __name__ == '__main__': + from werkzeug.serving import run_simple + run_simple('localhost', 4000, application) + + +Links +----- + +* Website: https://www.palletsprojects.com/p/werkzeug/ +* Releases: https://pypi.org/project/Werkzeug/ +* Code: https://github.com/pallets/werkzeug +* Issue tracker: https://github.com/pallets/werkzeug/issues +* Test status: + + * Linux, Mac: https://travis-ci.org/pallets/werkzeug + * Windows: https://ci.appveyor.com/project/davidism/werkzeug + +* Test coverage: https://codecov.io/gh/pallets/werkzeug + +.. _WSGI: https://wsgi.readthedocs.io/en/latest/ +.. _Flask: https://www.palletsprojects.com/p/flask/ +.. _pip: https://pip.pypa.io/en/stable/quickstart/ + + diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/INSTALLER b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/LICENSE.txt b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/LICENSE.txt new file mode 100644 index 0000000..1cc75bb --- /dev/null +++ b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/LICENSE.txt @@ -0,0 +1,31 @@ +Copyright © 2007 by the Pallets team. + +Some rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE AND DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND +CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, +BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE AND DOCUMENTATION, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/METADATA b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/METADATA new file mode 100644 index 0000000..bfc3c4e --- /dev/null +++ b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/METADATA @@ -0,0 +1,116 @@ +Metadata-Version: 2.0 +Name: Werkzeug +Version: 0.14.1 +Summary: The comprehensive WSGI web application library. +Home-page: https://www.palletsprojects.org/p/werkzeug/ +Author: Armin Ronacher +Author-email: armin.ronacher@active-4.com +License: BSD +Description-Content-Type: UNKNOWN +Platform: any +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Web Environment +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.6 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Provides-Extra: dev +Requires-Dist: coverage; extra == 'dev' +Requires-Dist: pytest; extra == 'dev' +Requires-Dist: sphinx; extra == 'dev' +Requires-Dist: tox; extra == 'dev' +Provides-Extra: termcolor +Requires-Dist: termcolor; extra == 'termcolor' +Provides-Extra: watchdog +Requires-Dist: watchdog; extra == 'watchdog' + +Werkzeug +======== + +Werkzeug is a comprehensive `WSGI`_ web application library. It began as +a simple collection of various utilities for WSGI applications and has +become one of the most advanced WSGI utility libraries. + +It includes: + +* An interactive debugger that allows inspecting stack traces and source + code in the browser with an interactive interpreter for any frame in + the stack. +* A full-featured request object with objects to interact with headers, + query args, form data, files, and cookies. +* A response object that can wrap other WSGI applications and handle + streaming data. +* A routing system for matching URLs to endpoints and generating URLs + for endpoints, with an extensible system for capturing variables from + URLs. +* HTTP utilities to handle entity tags, cache control, dates, user + agents, cookies, files, and more. +* A threaded WSGI server for use while developing applications locally. +* A test client for simulating HTTP requests during testing without + requiring running a server. + +Werkzeug is Unicode aware and doesn't enforce any dependencies. It is up +to the developer to choose a template engine, database adapter, and even +how to handle requests. It can be used to build all sorts of end user +applications such as blogs, wikis, or bulletin boards. + +`Flask`_ wraps Werkzeug, using it to handle the details of WSGI while +providing more structure and patterns for defining powerful +applications. + + +Installing +---------- + +Install and update using `pip`_: + +.. code-block:: text + + pip install -U Werkzeug + + +A Simple Example +---------------- + +.. code-block:: python + + from werkzeug.wrappers import Request, Response + + @Request.application + def application(request): + return Response('Hello, World!') + + if __name__ == '__main__': + from werkzeug.serving import run_simple + run_simple('localhost', 4000, application) + + +Links +----- + +* Website: https://www.palletsprojects.com/p/werkzeug/ +* Releases: https://pypi.org/project/Werkzeug/ +* Code: https://github.com/pallets/werkzeug +* Issue tracker: https://github.com/pallets/werkzeug/issues +* Test status: + + * Linux, Mac: https://travis-ci.org/pallets/werkzeug + * Windows: https://ci.appveyor.com/project/davidism/werkzeug + +* Test coverage: https://codecov.io/gh/pallets/werkzeug + +.. _WSGI: https://wsgi.readthedocs.io/en/latest/ +.. _Flask: https://www.palletsprojects.com/p/flask/ +.. _pip: https://pip.pypa.io/en/stable/quickstart/ + + diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/RECORD b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/RECORD new file mode 100644 index 0000000..9067b88 --- /dev/null +++ b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/RECORD @@ -0,0 +1,98 @@ +Werkzeug-0.14.1.dist-info/DESCRIPTION.rst,sha256=rOCN36jwsWtWsTpqPG96z7FMilB5qI1CIARSKRuUmz8,2452 +Werkzeug-0.14.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +Werkzeug-0.14.1.dist-info/LICENSE.txt,sha256=xndz_dD4m269AF9l_Xbl5V3tM1N3C1LoZC2PEPxWO-8,1534 +Werkzeug-0.14.1.dist-info/METADATA,sha256=FbfadrPdJNUWAxMOKxGUtHe5R3IDSBKYYmAz3FvI3uY,3872 +Werkzeug-0.14.1.dist-info/RECORD,, +Werkzeug-0.14.1.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +Werkzeug-0.14.1.dist-info/WHEEL,sha256=GrqQvamwgBV4nLoJe0vhYRSWzWsx7xjlt74FT0SWYfE,110 +Werkzeug-0.14.1.dist-info/metadata.json,sha256=4489UTt6HBp2NQil95-pBkjU4Je93SMHvMxZ_rjOpqA,1452 +Werkzeug-0.14.1.dist-info/top_level.txt,sha256=QRyj2VjwJoQkrwjwFIOlB8Xg3r9un0NtqVHQF-15xaw,9 +werkzeug/__init__.py,sha256=NR0d4n_-U9BLVKlOISean3zUt2vBwhvK-AZE6M0sC0k,6842 +werkzeug/__pycache__/__init__.cpython-38.pyc,, +werkzeug/__pycache__/_compat.cpython-38.pyc,, +werkzeug/__pycache__/_internal.cpython-38.pyc,, +werkzeug/__pycache__/_reloader.cpython-38.pyc,, +werkzeug/__pycache__/datastructures.cpython-38.pyc,, +werkzeug/__pycache__/exceptions.cpython-38.pyc,, +werkzeug/__pycache__/filesystem.cpython-38.pyc,, +werkzeug/__pycache__/formparser.cpython-38.pyc,, +werkzeug/__pycache__/http.cpython-38.pyc,, +werkzeug/__pycache__/local.cpython-38.pyc,, +werkzeug/__pycache__/posixemulation.cpython-38.pyc,, +werkzeug/__pycache__/routing.cpython-38.pyc,, +werkzeug/__pycache__/script.cpython-38.pyc,, +werkzeug/__pycache__/security.cpython-38.pyc,, +werkzeug/__pycache__/serving.cpython-38.pyc,, +werkzeug/__pycache__/test.cpython-38.pyc,, +werkzeug/__pycache__/testapp.cpython-38.pyc,, +werkzeug/__pycache__/urls.cpython-38.pyc,, +werkzeug/__pycache__/useragents.cpython-38.pyc,, +werkzeug/__pycache__/utils.cpython-38.pyc,, +werkzeug/__pycache__/websocket.cpython-38.pyc,, +werkzeug/__pycache__/wrappers.cpython-38.pyc,, +werkzeug/__pycache__/wsgi.cpython-38.pyc,, +werkzeug/_compat.py,sha256=8c4U9o6A_TR9nKCcTbpZNxpqCXcXDVIbFawwKM2s92c,6311 +werkzeug/_internal.py,sha256=GhEyGMlsSz_tYjsDWO9TG35VN7304MM8gjKDrXLEdVc,13873 +werkzeug/_reloader.py,sha256=AyPphcOHPbu6qzW0UbrVvTDJdre5WgpxbhIJN_TqzUc,9264 +werkzeug/contrib/__init__.py,sha256=f7PfttZhbrImqpr5Ezre8CXgwvcGUJK7zWNpO34WWrw,623 +werkzeug/contrib/__pycache__/__init__.cpython-38.pyc,, +werkzeug/contrib/__pycache__/atom.cpython-38.pyc,, +werkzeug/contrib/__pycache__/cache.cpython-38.pyc,, +werkzeug/contrib/__pycache__/fixers.cpython-38.pyc,, +werkzeug/contrib/__pycache__/iterio.cpython-38.pyc,, +werkzeug/contrib/__pycache__/jsrouting.cpython-38.pyc,, +werkzeug/contrib/__pycache__/limiter.cpython-38.pyc,, +werkzeug/contrib/__pycache__/lint.cpython-38.pyc,, +werkzeug/contrib/__pycache__/profiler.cpython-38.pyc,, +werkzeug/contrib/__pycache__/securecookie.cpython-38.pyc,, +werkzeug/contrib/__pycache__/sessions.cpython-38.pyc,, +werkzeug/contrib/__pycache__/testtools.cpython-38.pyc,, +werkzeug/contrib/__pycache__/wrappers.cpython-38.pyc,, +werkzeug/contrib/atom.py,sha256=qqfJcfIn2RYY-3hO3Oz0aLq9YuNubcPQ_KZcNsDwVJo,15575 +werkzeug/contrib/cache.py,sha256=xBImHNj09BmX_7kC5NUCx8f_l4L8_O7zi0jCL21UZKE,32163 +werkzeug/contrib/fixers.py,sha256=gR06T-w71ur-tHQ_31kP_4jpOncPJ4Wc1dOqTvYusr8,10179 +werkzeug/contrib/iterio.py,sha256=RlqDvGhz0RneTpzE8dVc-yWCUv4nkPl1jEc_EDp2fH0,10814 +werkzeug/contrib/jsrouting.py,sha256=QTmgeDoKXvNK02KzXgx9lr3cAH6fAzpwF5bBdPNvJPs,8564 +werkzeug/contrib/limiter.py,sha256=iS8-ahPZ-JLRnmfIBzxpm7O_s3lPsiDMVWv7llAIDCI,1334 +werkzeug/contrib/lint.py,sha256=Mj9NeUN7s4zIUWeQOAVjrmtZIcl3Mm2yDe9BSIr9YGE,12558 +werkzeug/contrib/profiler.py,sha256=ISwCWvwVyGpDLRBRpLjo_qUWma6GXYBrTAco4PEQSHY,5151 +werkzeug/contrib/securecookie.py,sha256=uWMyHDHY3lkeBRiCSayGqWkAIy4a7xAbSE_Hln9ecqc,12196 +werkzeug/contrib/sessions.py,sha256=39LVNvLbm5JWpbxM79WC2l87MJFbqeISARjwYbkJatw,12577 +werkzeug/contrib/testtools.py,sha256=G9xN-qeihJlhExrIZMCahvQOIDxdL9NiX874jiiHFMs,2453 +werkzeug/contrib/wrappers.py,sha256=v7OYlz7wQtDlS9fey75UiRZ1IkUWqCpzbhsLy4k14Hw,10398 +werkzeug/datastructures.py,sha256=3IgNKNqrz-ZjmAG7y3YgEYK-enDiMT_b652PsypWcYg,90080 +werkzeug/debug/__init__.py,sha256=uSn9BqCZ5E3ySgpoZtundpROGsn-uYvZtSFiTfAX24M,17452 +werkzeug/debug/__pycache__/__init__.cpython-38.pyc,, +werkzeug/debug/__pycache__/console.cpython-38.pyc,, +werkzeug/debug/__pycache__/repr.cpython-38.pyc,, +werkzeug/debug/__pycache__/tbtools.cpython-38.pyc,, +werkzeug/debug/console.py,sha256=n3-dsKk1TsjnN-u4ZgmuWCU_HO0qw5IA7ttjhyyMM6I,5607 +werkzeug/debug/repr.py,sha256=bKqstDYGfECpeLerd48s_hxuqK4b6UWnjMu3d_DHO8I,9340 +werkzeug/debug/shared/FONT_LICENSE,sha256=LwAVEI1oYnvXiNMT9SnCH_TaLCxCpeHziDrMg0gPkAI,4673 +werkzeug/debug/shared/console.png,sha256=bxax6RXXlvOij_KeqvSNX0ojJf83YbnZ7my-3Gx9w2A,507 +werkzeug/debug/shared/debugger.js,sha256=PKPVYuyO4SX1hkqLOwCLvmIEO5154WatFYaXE-zIfKI,6264 +werkzeug/debug/shared/jquery.js,sha256=7LkWEzqTdpEfELxcZZlS6wAx5Ff13zZ83lYO2_ujj7g,95957 +werkzeug/debug/shared/less.png,sha256=-4-kNRaXJSONVLahrQKUxMwXGm9R4OnZ9SxDGpHlIR4,191 +werkzeug/debug/shared/more.png,sha256=GngN7CioHQoV58rH6ojnkYi8c_qED2Aka5FO5UXrReY,200 +werkzeug/debug/shared/source.png,sha256=RoGcBTE4CyCB85GBuDGTFlAnUqxwTBiIfDqW15EpnUQ,818 +werkzeug/debug/shared/style.css,sha256=IEO0PC2pWmh2aEyGCaN--txuWsRCliuhlbEhPDFwh0A,6270 +werkzeug/debug/shared/ubuntu.ttf,sha256=1eaHFyepmy4FyDvjLVzpITrGEBu_CZYY94jE0nED1c0,70220 +werkzeug/debug/tbtools.py,sha256=rBudXCmkVdAKIcdhxANxgf09g6kQjJWW9_5bjSpr4OY,18451 +werkzeug/exceptions.py,sha256=3wp95Hqj9FqV8MdikV99JRcHse_fSMn27V8tgP5Hw2c,20505 +werkzeug/filesystem.py,sha256=hHWeWo_gqLMzTRfYt8-7n2wWcWUNTnDyudQDLOBEICE,2175 +werkzeug/formparser.py,sha256=mUuCwjzjb8_E4RzrAT2AioLuZSYpqR1KXTK6LScRYzA,21722 +werkzeug/http.py,sha256=RQg4MJuhRv2isNRiEh__Phh09ebpfT3Kuu_GfrZ54_c,40079 +werkzeug/local.py,sha256=QdQhWV5L8p1Y1CJ1CDStwxaUs24SuN5aebHwjVD08C8,14553 +werkzeug/posixemulation.py,sha256=xEF2Bxc-vUCPkiu4IbfWVd3LW7DROYAT-ExW6THqyzw,3519 +werkzeug/routing.py,sha256=2JVtdSgxKGeANy4Z_FP-dKESvKtkYGCZ1J2fARCLGCY,67214 +werkzeug/script.py,sha256=DwaVDcXdaOTffdNvlBdLitxWXjKaRVT32VbhDtljFPY,11365 +werkzeug/security.py,sha256=0m107exslz4QJLWQCpfQJ04z3re4eGHVggRvrQVAdWc,9193 +werkzeug/serving.py,sha256=A0flnIJHufdn2QJ9oeuHfrXwP3LzP8fn3rNW6hbxKUg,31926 +werkzeug/test.py,sha256=XmECSmnpASiYQTct4oMiWr0LT5jHWCtKqnpYKZd2ui8,36100 +werkzeug/testapp.py,sha256=3HQRW1sHZKXuAjCvFMet4KXtQG3loYTFnvn6LWt-4zI,9396 +werkzeug/urls.py,sha256=dUeLg2IeTm0WLmSvFeD4hBZWGdOs-uHudR5-t8n9zPo,36771 +werkzeug/useragents.py,sha256=BhYMf4cBTHyN4U0WsQedePIocmNlH_34C-UwqSThGCc,5865 +werkzeug/utils.py,sha256=BrY1j0DHQ8RTb0K1StIobKuMJhN9SQQkWEARbrh2qpk,22972 +werkzeug/websocket.py,sha256=PpSeDxXD_0UsPAa5hQhQNM6mxibeUgn8lA8eRqiS0vM,11344 +werkzeug/wrappers.py,sha256=kbyL_aFjxELwPgMwfNCYjKu-CR6kNkh-oO8wv3GXbk8,84511 +werkzeug/wsgi.py,sha256=1Nob-aeChWQf7MsiicO8RZt6J90iRzEcik44ev9Qu8s,49347 diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/REQUESTED b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/REQUESTED new file mode 100644 index 0000000..e69de29 diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/WHEEL b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/WHEEL new file mode 100644 index 0000000..0de529b --- /dev/null +++ b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.26.0) +Root-Is-Purelib: true +Tag: py2-none-any +Tag: py3-none-any + diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/metadata.json b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/metadata.json new file mode 100644 index 0000000..bca8d12 --- /dev/null +++ b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/metadata.json @@ -0,0 +1 @@ +{"generator": "bdist_wheel (0.26.0)", "summary": "The comprehensive WSGI web application library.", "classifiers": ["Development Status :: 5 - Production/Stable", "Environment :: Web Environment", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Topic :: Internet :: WWW/HTTP :: Dynamic Content", "Topic :: Software Development :: Libraries :: Python Modules"], "description_content_type": "UNKNOWN", "extensions": {"python.details": {"project_urls": {"Home": "https://www.palletsprojects.org/p/werkzeug/"}, "contacts": [{"email": "armin.ronacher@active-4.com", "name": "Armin Ronacher", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst", "license": "LICENSE.txt"}}}, "license": "BSD", "metadata_version": "2.0", "name": "Werkzeug", "platform": "any", "extras": ["dev", "termcolor", "watchdog"], "run_requires": [{"requires": ["coverage", "pytest", "sphinx", "tox"], "extra": "dev"}, {"requires": ["termcolor"], "extra": "termcolor"}, {"requires": ["watchdog"], "extra": "watchdog"}], "version": "0.14.1"} \ No newline at end of file diff --git a/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/top_level.txt b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/top_level.txt new file mode 100644 index 0000000..6fe8da8 --- /dev/null +++ b/venv/Lib/site-packages/Werkzeug-0.14.1.dist-info/top_level.txt @@ -0,0 +1 @@ +werkzeug diff --git a/venv/Lib/site-packages/__pycache__/easy_install.cpython-38.pyc b/venv/Lib/site-packages/__pycache__/easy_install.cpython-38.pyc new file mode 100644 index 0000000..a5eb713 Binary files /dev/null and b/venv/Lib/site-packages/__pycache__/easy_install.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/__pycache__/itsdangerous.cpython-38.pyc b/venv/Lib/site-packages/__pycache__/itsdangerous.cpython-38.pyc new file mode 100644 index 0000000..7a43cbb Binary files /dev/null and b/venv/Lib/site-packages/__pycache__/itsdangerous.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/_distutils_hack/__init__.py b/venv/Lib/site-packages/_distutils_hack/__init__.py new file mode 100644 index 0000000..c31edfe --- /dev/null +++ b/venv/Lib/site-packages/_distutils_hack/__init__.py @@ -0,0 +1,123 @@ +import sys +import os +import re +import importlib +import warnings + + +is_pypy = '__pypy__' in sys.builtin_module_names + + +def warn_distutils_present(): + if 'distutils' not in sys.modules: + return + if is_pypy and sys.version_info < (3, 7): + # PyPy for 3.6 unconditionally imports distutils, so bypass the warning + # https://foss.heptapod.net/pypy/pypy/-/blob/be829135bc0d758997b3566062999ee8b23872b4/lib-python/3/site.py#L250 + return + warnings.warn( + "Distutils was imported before Setuptools, but importing Setuptools " + "also replaces the `distutils` module in `sys.modules`. This may lead " + "to undesirable behaviors or errors. To avoid these issues, avoid " + "using distutils directly, ensure that setuptools is installed in the " + "traditional way (e.g. not an editable install), and/or make sure " + "that setuptools is always imported before distutils.") + + +def clear_distutils(): + if 'distutils' not in sys.modules: + return + warnings.warn("Setuptools is replacing distutils.") + mods = [name for name in sys.modules if re.match(r'distutils\b', name)] + for name in mods: + del sys.modules[name] + + +def enabled(): + """ + Allow selection of distutils by environment variable. + """ + which = os.environ.get('SETUPTOOLS_USE_DISTUTILS', 'stdlib') + return which == 'local' + + +def ensure_local_distutils(): + clear_distutils() + distutils = importlib.import_module('setuptools._distutils') + distutils.__name__ = 'distutils' + sys.modules['distutils'] = distutils + + # sanity check that submodules load as expected + core = importlib.import_module('distutils.core') + assert '_distutils' in core.__file__, core.__file__ + + +def do_override(): + """ + Ensure that the local copy of distutils is preferred over stdlib. + + See https://github.com/pypa/setuptools/issues/417#issuecomment-392298401 + for more motivation. + """ + if enabled(): + warn_distutils_present() + ensure_local_distutils() + + +class DistutilsMetaFinder: + def find_spec(self, fullname, path, target=None): + if path is not None: + return + + method_name = 'spec_for_{fullname}'.format(**locals()) + method = getattr(self, method_name, lambda: None) + return method() + + def spec_for_distutils(self): + import importlib.abc + import importlib.util + + class DistutilsLoader(importlib.abc.Loader): + + def create_module(self, spec): + return importlib.import_module('setuptools._distutils') + + def exec_module(self, module): + pass + + return importlib.util.spec_from_loader('distutils', DistutilsLoader()) + + def spec_for_pip(self): + """ + Ensure stdlib distutils when running under pip. + See pypa/pip#8761 for rationale. + """ + if self.pip_imported_during_build(): + return + clear_distutils() + self.spec_for_distutils = lambda: None + + @staticmethod + def pip_imported_during_build(): + """ + Detect if pip is being imported in a build script. Ref #2355. + """ + import traceback + return any( + frame.f_globals['__file__'].endswith('setup.py') + for frame, line in traceback.walk_stack(None) + ) + + +DISTUTILS_FINDER = DistutilsMetaFinder() + + +def add_shim(): + sys.meta_path.insert(0, DISTUTILS_FINDER) + + +def remove_shim(): + try: + sys.meta_path.remove(DISTUTILS_FINDER) + except ValueError: + pass diff --git a/venv/Lib/site-packages/_distutils_hack/__pycache__/__init__.cpython-38.pyc b/venv/Lib/site-packages/_distutils_hack/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..5df4856 Binary files /dev/null and b/venv/Lib/site-packages/_distutils_hack/__pycache__/__init__.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/_distutils_hack/__pycache__/override.cpython-38.pyc b/venv/Lib/site-packages/_distutils_hack/__pycache__/override.cpython-38.pyc new file mode 100644 index 0000000..0949718 Binary files /dev/null and b/venv/Lib/site-packages/_distutils_hack/__pycache__/override.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/_distutils_hack/override.py b/venv/Lib/site-packages/_distutils_hack/override.py new file mode 100644 index 0000000..2cc433a --- /dev/null +++ b/venv/Lib/site-packages/_distutils_hack/override.py @@ -0,0 +1 @@ +__import__('_distutils_hack').do_override() diff --git a/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/DESCRIPTION.rst b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/DESCRIPTION.rst new file mode 100644 index 0000000..30379a1 --- /dev/null +++ b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/DESCRIPTION.rst @@ -0,0 +1,3 @@ +Beautiful Soup sits atop an HTML or XML parser, providing Pythonic idioms for iterating, searching, and modifying the parse tree. + + diff --git a/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/INSTALLER b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/METADATA b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/METADATA new file mode 100644 index 0000000..cc2f64b --- /dev/null +++ b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/METADATA @@ -0,0 +1,28 @@ +Metadata-Version: 2.0 +Name: beautifulsoup4 +Version: 4.6.0 +Summary: Screen-scraping library +Home-page: http://www.crummy.com/software/BeautifulSoup/bs4/ +Author: Leonard Richardson +Author-email: leonardr@segfault.org +License: MIT +Download-URL: http://www.crummy.com/software/BeautifulSoup/bs4/download/ +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Topic :: Text Processing :: Markup :: HTML +Classifier: Topic :: Text Processing :: Markup :: XML +Classifier: Topic :: Text Processing :: Markup :: SGML +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Provides-Extra: html5lib +Requires-Dist: html5lib; extra == 'html5lib' +Provides-Extra: lxml +Requires-Dist: lxml; extra == 'lxml' + +Beautiful Soup sits atop an HTML or XML parser, providing Pythonic idioms for iterating, searching, and modifying the parse tree. + + diff --git a/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/RECORD b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/RECORD new file mode 100644 index 0000000..4a0db44 --- /dev/null +++ b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/RECORD @@ -0,0 +1,42 @@ +beautifulsoup4-4.6.0.dist-info/DESCRIPTION.rst,sha256=HReC3om4K1XIgRKEVjgzdTEKfTTE3F83hEHkQQJwuU0,132 +beautifulsoup4-4.6.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +beautifulsoup4-4.6.0.dist-info/METADATA,sha256=MygZZNNKIcXfK60bDPVe3zpyJeyqRoTy1PBE2uqb4TY,1109 +beautifulsoup4-4.6.0.dist-info/RECORD,, +beautifulsoup4-4.6.0.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +beautifulsoup4-4.6.0.dist-info/WHEEL,sha256=dXGL5yz26tu5uNsUy9EBoBYhrvMYqmFH9Vm82OQUT-8,95 +beautifulsoup4-4.6.0.dist-info/metadata.json,sha256=5165aEwyWzVfTK8Rs17tqfMof2bpgGLGbjGAo24oFNc,1110 +beautifulsoup4-4.6.0.dist-info/top_level.txt,sha256=H8VT-IuPWLzQqwG9_eChjXDJ1z0H9RRebdSR90Bjnkw,4 +bs4/__init__.py,sha256=tO59vxn6pDf_-5iy_a_rG65rDph3TyY7wKqRu9zNQ_4,20394 +bs4/__pycache__/__init__.cpython-38.pyc,, +bs4/__pycache__/dammit.cpython-38.pyc,, +bs4/__pycache__/diagnose.cpython-38.pyc,, +bs4/__pycache__/element.cpython-38.pyc,, +bs4/__pycache__/testing.cpython-38.pyc,, +bs4/builder/__init__.py,sha256=ECq2riLT2cie_wig7cTgMQU6YAuZ6cjZTFKWJC1st7g,11552 +bs4/builder/__pycache__/__init__.cpython-38.pyc,, +bs4/builder/__pycache__/_html5lib.cpython-38.pyc,, +bs4/builder/__pycache__/_htmlparser.cpython-38.pyc,, +bs4/builder/__pycache__/_lxml.cpython-38.pyc,, +bs4/builder/_html5lib.py,sha256=LZeT3YMgTWWKzydl48fkbwdB6IQC3nV67Ej_nbmJrcs,16688 +bs4/builder/_htmlparser.py,sha256=7ytwx-cp8Ju4nVvZqsUtL7bGNv0c3KgT44lbnyqQvHk,11609 +bs4/builder/_lxml.py,sha256=xPBXWAVfqRvWlAYoykIGVmp8JhhADxriYrXfmEjSqNE,9470 +bs4/dammit.py,sha256=T91drgzqXmIrH---Qm5tG_jvOqv1QaYdJPOt9lRJucw,29910 +bs4/diagnose.py,sha256=k_dyxYqq52gaikV1hfiwh0_PoWGbx2fsnjm5IAMS7PA,6773 +bs4/element.py,sha256=2EZ_aM5jWf7tREyxXjvziW23Q59tSijCacC8-hfyx5M,68798 +bs4/testing.py,sha256=GEdA91wNzzJ5XewPEgz1oCzDT1ihT-UYphgah2CyXDM,30800 +bs4/tests/__init__.py,sha256=bdUBDE750n7qNEfue7-3a1fBaUxJlvZMkvJvZa-lbYs,27 +bs4/tests/__pycache__/__init__.cpython-38.pyc,, +bs4/tests/__pycache__/test_builder_registry.cpython-38.pyc,, +bs4/tests/__pycache__/test_docs.cpython-38.pyc,, +bs4/tests/__pycache__/test_html5lib.cpython-38.pyc,, +bs4/tests/__pycache__/test_htmlparser.cpython-38.pyc,, +bs4/tests/__pycache__/test_lxml.cpython-38.pyc,, +bs4/tests/__pycache__/test_soup.cpython-38.pyc,, +bs4/tests/__pycache__/test_tree.cpython-38.pyc,, +bs4/tests/test_builder_registry.py,sha256=pllfRpArh9TYhjjRUiu1wITr9Ryyv4hiaAtRjij-k4E,5582 +bs4/tests/test_docs.py,sha256=FXfz2bGL4Xe0q6duwpmg9hmFiZuU4DVJPNZ0hTb6aH4,1067 +bs4/tests/test_html5lib.py,sha256=MYtpDf9mkYxHUNxBeRVmwqG5E0-R2b_NlpX3lOW30Zs,4907 +bs4/tests/test_htmlparser.py,sha256=22Ivw1wno80DD3j7NxZhc8rrBKSfgwBaAH4tzYIT7lM,1191 +bs4/tests/test_lxml.py,sha256=7ge3DMNPQIBibALPWvqn-hAdOBUCoQAVOHIu8rQhSFg,2379 +bs4/tests/test_soup.py,sha256=ugOafuY7DoCZFaxjMoBivnYNE_Iz0CRN2ZPkChLy6VY,20313 +bs4/tests/test_tree.py,sha256=psSal7EQIeDEoimz_DVloKKG1D_8CSAFJY-Yvov2bx0,78204 diff --git a/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/REQUESTED b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/REQUESTED new file mode 100644 index 0000000..e69de29 diff --git a/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/WHEEL b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/WHEEL new file mode 100644 index 0000000..a68f088 --- /dev/null +++ b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.30.0.a0) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/metadata.json b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/metadata.json new file mode 100644 index 0000000..830af35 --- /dev/null +++ b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/metadata.json @@ -0,0 +1 @@ +{"classifiers": ["Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Topic :: Text Processing :: Markup :: HTML", "Topic :: Text Processing :: Markup :: XML", "Topic :: Text Processing :: Markup :: SGML", "Topic :: Software Development :: Libraries :: Python Modules"], "download_url": "http://www.crummy.com/software/BeautifulSoup/bs4/download/", "extensions": {"python.details": {"contacts": [{"email": "leonardr@segfault.org", "name": "Leonard Richardson", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst"}, "project_urls": {"Home": "http://www.crummy.com/software/BeautifulSoup/bs4/"}}}, "extras": ["html5lib", "lxml"], "generator": "bdist_wheel (0.30.0.a0)", "license": "MIT", "metadata_version": "2.0", "name": "beautifulsoup4", "run_requires": [{"extra": "html5lib", "requires": ["html5lib"]}, {"extra": "lxml", "requires": ["lxml"]}], "summary": "Screen-scraping library", "version": "4.6.0"} \ No newline at end of file diff --git a/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/top_level.txt b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/top_level.txt new file mode 100644 index 0000000..1315442 --- /dev/null +++ b/venv/Lib/site-packages/beautifulsoup4-4.6.0.dist-info/top_level.txt @@ -0,0 +1 @@ +bs4 diff --git a/venv/Lib/site-packages/bs4/__init__.py b/venv/Lib/site-packages/bs4/__init__.py new file mode 100644 index 0000000..62e9e5d --- /dev/null +++ b/venv/Lib/site-packages/bs4/__init__.py @@ -0,0 +1,529 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup uses a pluggable XML or HTML parser to parse a +(possibly invalid) document into a tree representation. Beautiful Soup +provides methods and Pythonic idioms that make it easy to navigate, +search, and modify the parse tree. + +Beautiful Soup works with Python 2.7 and up. It works better if lxml +and/or html5lib is installed. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/bs4/doc/ + +""" + +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "4.6.0" +__copyright__ = "Copyright (c) 2004-2017 Leonard Richardson" +__license__ = "MIT" + +__all__ = ['BeautifulSoup'] + +import os +import re +import traceback +import warnings + +from .builder import builder_registry, ParserRejectedMarkup +from .dammit import UnicodeDammit +from .element import ( + CData, + Comment, + DEFAULT_OUTPUT_ENCODING, + Declaration, + Doctype, + NavigableString, + PageElement, + ProcessingInstruction, + ResultSet, + SoupStrainer, + Tag, + ) + +# The very first thing we do is give a useful error if someone is +# running this code under Python 3 without converting it. +'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' + +class BeautifulSoup(Tag): + """ + This class defines the basic interface called by the tree builders. + + These methods will be called by the parser: + reset() + feed(markup) + + The tree builder may call these methods from its feed() implementation: + handle_starttag(name, attrs) # See note about return value + handle_endtag(name) + handle_data(data) # Appends to the current data node + endData(containerClass=NavigableString) # Ends the current data node + + No matter how complicated the underlying parser is, you should be + able to build a tree using 'start tag' events, 'end tag' events, + 'data' events, and "done with data" events. + + If you encounter an empty-element tag (aka a self-closing tag, + like HTML's
tag), call handle_starttag and then + handle_endtag. + """ + ROOT_TAG_NAME = '[document]' + + # If the end-user gives no indication which tree builder they + # want, look for one with these features. + DEFAULT_BUILDER_FEATURES = ['html', 'fast'] + + ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' + + NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n" + + def __init__(self, markup="", features=None, builder=None, + parse_only=None, from_encoding=None, exclude_encodings=None, + **kwargs): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser.""" + + if 'convertEntities' in kwargs: + warnings.warn( + "BS4 does not respect the convertEntities argument to the " + "BeautifulSoup constructor. Entities are always converted " + "to Unicode characters.") + + if 'markupMassage' in kwargs: + del kwargs['markupMassage'] + warnings.warn( + "BS4 does not respect the markupMassage argument to the " + "BeautifulSoup constructor. The tree builder is responsible " + "for any necessary markup massage.") + + if 'smartQuotesTo' in kwargs: + del kwargs['smartQuotesTo'] + warnings.warn( + "BS4 does not respect the smartQuotesTo argument to the " + "BeautifulSoup constructor. Smart quotes are always converted " + "to Unicode characters.") + + if 'selfClosingTags' in kwargs: + del kwargs['selfClosingTags'] + warnings.warn( + "BS4 does not respect the selfClosingTags argument to the " + "BeautifulSoup constructor. The tree builder is responsible " + "for understanding self-closing tags.") + + if 'isHTML' in kwargs: + del kwargs['isHTML'] + warnings.warn( + "BS4 does not respect the isHTML argument to the " + "BeautifulSoup constructor. Suggest you use " + "features='lxml' for HTML and features='lxml-xml' for " + "XML.") + + def deprecated_argument(old_name, new_name): + if old_name in kwargs: + warnings.warn( + 'The "%s" argument to the BeautifulSoup constructor ' + 'has been renamed to "%s."' % (old_name, new_name)) + value = kwargs[old_name] + del kwargs[old_name] + return value + return None + + parse_only = parse_only or deprecated_argument( + "parseOnlyThese", "parse_only") + + from_encoding = from_encoding or deprecated_argument( + "fromEncoding", "from_encoding") + + if from_encoding and isinstance(markup, str): + warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.") + from_encoding = None + + if len(kwargs) > 0: + arg = list(kwargs.keys()).pop() + raise TypeError( + "__init__() got an unexpected keyword argument '%s'" % arg) + + if builder is None: + original_features = features + if isinstance(features, str): + features = [features] + if features is None or len(features) == 0: + features = self.DEFAULT_BUILDER_FEATURES + builder_class = builder_registry.lookup(*features) + if builder_class is None: + raise FeatureNotFound( + "Couldn't find a tree builder with the features you " + "requested: %s. Do you need to install a parser library?" + % ",".join(features)) + builder = builder_class() + if not (original_features == builder.NAME or + original_features in builder.ALTERNATE_NAMES): + if builder.is_xml: + markup_type = "XML" + else: + markup_type = "HTML" + + caller = traceback.extract_stack()[0] + filename = caller[0] + line_number = caller[1] + warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict( + filename=filename, + line_number=line_number, + parser=builder.NAME, + markup_type=markup_type)) + + self.builder = builder + self.is_xml = builder.is_xml + self.known_xml = self.is_xml + self.builder.soup = self + + self.parse_only = parse_only + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + elif len(markup) <= 256 and ( + (isinstance(markup, bytes) and not b'<' in markup) + or (isinstance(markup, str) and not '<' in markup) + ): + # Print out warnings for a couple beginner problems + # involving passing non-markup to Beautiful Soup. + # Beautiful Soup will still parse the input as markup, + # just in case that's what the user really wants. + if (isinstance(markup, str) + and not os.path.supports_unicode_filenames): + possible_filename = markup.encode("utf8") + else: + possible_filename = markup + is_file = False + try: + is_file = os.path.exists(possible_filename) + except Exception as e: + # This is almost certainly a problem involving + # characters not valid in filenames on this + # system. Just let it go. + pass + if is_file: + if isinstance(markup, str): + markup = markup.encode("utf8") + warnings.warn( + '"%s" looks like a filename, not markup. You should' + ' probably open this file and pass the filehandle into' + ' Beautiful Soup.' % markup) + self._check_markup_is_url(markup) + + for (self.markup, self.original_encoding, self.declared_html_encoding, + self.contains_replacement_characters) in ( + self.builder.prepare_markup( + markup, from_encoding, exclude_encodings=exclude_encodings)): + self.reset() + try: + self._feed() + break + except ParserRejectedMarkup: + pass + + # Clear out the markup and remove the builder's circular + # reference to this object. + self.markup = None + self.builder.soup = None + + def __copy__(self): + copy = type(self)( + self.encode('utf-8'), builder=self.builder, from_encoding='utf-8' + ) + + # Although we encoded the tree to UTF-8, that may not have + # been the encoding of the original markup. Set the copy's + # .original_encoding to reflect the original object's + # .original_encoding. + copy.original_encoding = self.original_encoding + return copy + + def __getstate__(self): + # Frequently a tree builder can't be pickled. + d = dict(self.__dict__) + if 'builder' in d and not self.builder.picklable: + d['builder'] = None + return d + + @staticmethod + def _check_markup_is_url(markup): + """ + Check if markup looks like it's actually a url and raise a warning + if so. Markup can be unicode or str (py2) / bytes (py3). + """ + if isinstance(markup, bytes): + space = b' ' + cant_start_with = (b"http:", b"https:") + elif isinstance(markup, str): + space = ' ' + cant_start_with = ("http:", "https:") + else: + return + + if any(markup.startswith(prefix) for prefix in cant_start_with): + if not space in markup: + if isinstance(markup, bytes): + decoded_markup = markup.decode('utf-8', 'replace') + else: + decoded_markup = markup + warnings.warn( + '"%s" looks like a URL. Beautiful Soup is not an' + ' HTTP client. You should probably use an HTTP client like' + ' requests to get the document behind the URL, and feed' + ' that document to Beautiful Soup.' % decoded_markup + ) + + def _feed(self): + # Convert the document to Unicode. + self.builder.reset() + + self.builder.feed(self.markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def reset(self): + Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME) + self.hidden = 1 + self.builder.reset() + self.current_data = [] + self.currentTag = None + self.tagStack = [] + self.preserve_whitespace_tag_stack = [] + self.pushTag(self) + + def new_tag(self, name, namespace=None, nsprefix=None, **attrs): + """Create a new tag associated with this soup.""" + return Tag(None, self.builder, name, namespace, nsprefix, attrs) + + def new_string(self, s, subclass=NavigableString): + """Create a new NavigableString associated with this soup.""" + return subclass(s) + + def insert_before(self, successor): + raise NotImplementedError("BeautifulSoup objects don't support insert_before().") + + def insert_after(self, successor): + raise NotImplementedError("BeautifulSoup objects don't support insert_after().") + + def popTag(self): + tag = self.tagStack.pop() + if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]: + self.preserve_whitespace_tag_stack.pop() + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + if tag.name in self.builder.preserve_whitespace_tags: + self.preserve_whitespace_tag_stack.append(tag) + + def endData(self, containerClass=NavigableString): + if self.current_data: + current_data = ''.join(self.current_data) + # If whitespace is not preserved, and this string contains + # nothing but ASCII spaces, replace it with a single space + # or newline. + if not self.preserve_whitespace_tag_stack: + strippable = True + for i in current_data: + if i not in self.ASCII_SPACES: + strippable = False + break + if strippable: + if '\n' in current_data: + current_data = '\n' + else: + current_data = ' ' + + # Reset the data collector. + self.current_data = [] + + # Should we add this string to the tree at all? + if self.parse_only and len(self.tagStack) <= 1 and \ + (not self.parse_only.text or \ + not self.parse_only.search(current_data)): + return + + o = containerClass(current_data) + self.object_was_parsed(o) + + def object_was_parsed(self, o, parent=None, most_recent_element=None): + """Add an object to the parse tree.""" + parent = parent or self.currentTag + previous_element = most_recent_element or self._most_recent_element + + next_element = previous_sibling = next_sibling = None + if isinstance(o, Tag): + next_element = o.next_element + next_sibling = o.next_sibling + previous_sibling = o.previous_sibling + if not previous_element: + previous_element = o.previous_element + + o.setup(parent, previous_element, next_element, previous_sibling, next_sibling) + + self._most_recent_element = o + parent.contents.append(o) + + if parent.next_sibling: + # This node is being inserted into an element that has + # already been parsed. Deal with any dangling references. + index = len(parent.contents)-1 + while index >= 0: + if parent.contents[index] is o: + break + index -= 1 + else: + raise ValueError( + "Error building tree: supposedly %r was inserted " + "into %r after the fact, but I don't see it!" % ( + o, parent + ) + ) + if index == 0: + previous_element = parent + previous_sibling = None + else: + previous_element = previous_sibling = parent.contents[index-1] + if index == len(parent.contents)-1: + next_element = parent.next_sibling + next_sibling = None + else: + next_element = next_sibling = parent.contents[index+1] + + o.previous_element = previous_element + if previous_element: + previous_element.next_element = o + o.next_element = next_element + if next_element: + next_element.previous_element = o + o.next_sibling = next_sibling + if next_sibling: + next_sibling.previous_sibling = o + o.previous_sibling = previous_sibling + if previous_sibling: + previous_sibling.next_sibling = o + + def _popToTag(self, name, nsprefix=None, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + # The BeautifulSoup object itself can never be popped. + return + + most_recently_popped = None + + stack_size = len(self.tagStack) + for i in range(stack_size - 1, 0, -1): + t = self.tagStack[i] + if (name == t.name and nsprefix == t.prefix): + if inclusivePop: + most_recently_popped = self.popTag() + break + most_recently_popped = self.popTag() + + return most_recently_popped + + def handle_starttag(self, name, namespace, nsprefix, attrs): + """Push a start tag on to the stack. + + If this method returns None, the tag was rejected by the + SoupStrainer. You should proceed as if the tag had not occurred + in the document. For instance, if this was a self-closing tag, + don't call handle_endtag. + """ + + # print "Start tag %s: %s" % (name, attrs) + self.endData() + + if (self.parse_only and len(self.tagStack) <= 1 + and (self.parse_only.text + or not self.parse_only.search_tag(name, attrs))): + return None + + tag = Tag(self, self.builder, name, namespace, nsprefix, attrs, + self.currentTag, self._most_recent_element) + if tag is None: + return tag + if self._most_recent_element: + self._most_recent_element.next_element = tag + self._most_recent_element = tag + self.pushTag(tag) + return tag + + def handle_endtag(self, name, nsprefix=None): + #print "End tag: " + name + self.endData() + self._popToTag(name, nsprefix) + + def handle_data(self, data): + self.current_data.append(data) + + def decode(self, pretty_print=False, + eventual_encoding=DEFAULT_OUTPUT_ENCODING, + formatter="minimal"): + """Returns a string or Unicode representation of this document. + To get Unicode, pass None for encoding.""" + + if self.is_xml: + # Print the XML declaration + encoding_part = '' + if eventual_encoding != None: + encoding_part = ' encoding="%s"' % eventual_encoding + prefix = '\n' % encoding_part + else: + prefix = '' + if not pretty_print: + indent_level = None + else: + indent_level = 0 + return prefix + super(BeautifulSoup, self).decode( + indent_level, eventual_encoding, formatter) + +# Alias to make it easier to type import: 'from bs4 import _soup' +_s = BeautifulSoup +_soup = BeautifulSoup + +class BeautifulStoneSoup(BeautifulSoup): + """Deprecated interface to an XML parser.""" + + def __init__(self, *args, **kwargs): + kwargs['features'] = 'xml' + warnings.warn( + 'The BeautifulStoneSoup class is deprecated. Instead of using ' + 'it, pass features="xml" into the BeautifulSoup constructor.') + super(BeautifulStoneSoup, self).__init__(*args, **kwargs) + + +class StopParsing(Exception): + pass + +class FeatureNotFound(ValueError): + pass + + +#By default, act as an HTML pretty-printer. +if __name__ == '__main__': + import sys + soup = BeautifulSoup(sys.stdin) + print(soup.prettify()) diff --git a/venv/Lib/site-packages/bs4/__pycache__/__init__.cpython-38.pyc b/venv/Lib/site-packages/bs4/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..c306482 Binary files /dev/null and b/venv/Lib/site-packages/bs4/__pycache__/__init__.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/bs4/__pycache__/dammit.cpython-38.pyc b/venv/Lib/site-packages/bs4/__pycache__/dammit.cpython-38.pyc new file mode 100644 index 0000000..d5ede25 Binary files /dev/null and b/venv/Lib/site-packages/bs4/__pycache__/dammit.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/bs4/__pycache__/diagnose.cpython-38.pyc b/venv/Lib/site-packages/bs4/__pycache__/diagnose.cpython-38.pyc new file mode 100644 index 0000000..f64aba2 Binary files /dev/null and b/venv/Lib/site-packages/bs4/__pycache__/diagnose.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/bs4/__pycache__/element.cpython-38.pyc b/venv/Lib/site-packages/bs4/__pycache__/element.cpython-38.pyc new file mode 100644 index 0000000..fb535e2 Binary files /dev/null and b/venv/Lib/site-packages/bs4/__pycache__/element.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/bs4/__pycache__/testing.cpython-38.pyc b/venv/Lib/site-packages/bs4/__pycache__/testing.cpython-38.pyc new file mode 100644 index 0000000..8baa71d Binary files /dev/null and b/venv/Lib/site-packages/bs4/__pycache__/testing.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/bs4/builder/__init__.py b/venv/Lib/site-packages/bs4/builder/__init__.py new file mode 100644 index 0000000..4d6f96d --- /dev/null +++ b/venv/Lib/site-packages/bs4/builder/__init__.py @@ -0,0 +1,333 @@ +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +from collections import defaultdict +import itertools +import sys +from bs4.element import ( + CharsetMetaAttributeValue, + ContentMetaAttributeValue, + HTMLAwareEntitySubstitution, + whitespace_re + ) + +__all__ = [ + 'HTMLTreeBuilder', + 'SAXTreeBuilder', + 'TreeBuilder', + 'TreeBuilderRegistry', + ] + +# Some useful features for a TreeBuilder to have. +FAST = 'fast' +PERMISSIVE = 'permissive' +STRICT = 'strict' +XML = 'xml' +HTML = 'html' +HTML_5 = 'html5' + + +class TreeBuilderRegistry(object): + + def __init__(self): + self.builders_for_feature = defaultdict(list) + self.builders = [] + + def register(self, treebuilder_class): + """Register a treebuilder based on its advertised features.""" + for feature in treebuilder_class.features: + self.builders_for_feature[feature].insert(0, treebuilder_class) + self.builders.insert(0, treebuilder_class) + + def lookup(self, *features): + if len(self.builders) == 0: + # There are no builders at all. + return None + + if len(features) == 0: + # They didn't ask for any features. Give them the most + # recently registered builder. + return self.builders[0] + + # Go down the list of features in order, and eliminate any builders + # that don't match every feature. + features = list(features) + features.reverse() + candidates = None + candidate_set = None + while len(features) > 0: + feature = features.pop() + we_have_the_feature = self.builders_for_feature.get(feature, []) + if len(we_have_the_feature) > 0: + if candidates is None: + candidates = we_have_the_feature + candidate_set = set(candidates) + else: + # Eliminate any candidates that don't have this feature. + candidate_set = candidate_set.intersection( + set(we_have_the_feature)) + + # The only valid candidates are the ones in candidate_set. + # Go through the original list of candidates and pick the first one + # that's in candidate_set. + if candidate_set is None: + return None + for candidate in candidates: + if candidate in candidate_set: + return candidate + return None + +# The BeautifulSoup class will take feature lists from developers and use them +# to look up builders in this registry. +builder_registry = TreeBuilderRegistry() + +class TreeBuilder(object): + """Turn a document into a Beautiful Soup object tree.""" + + NAME = "[Unknown tree builder]" + ALTERNATE_NAMES = [] + features = [] + + is_xml = False + picklable = False + preserve_whitespace_tags = set() + empty_element_tags = None # A tag will be considered an empty-element + # tag when and only when it has no contents. + + # A value for these tag/attribute combinations is a space- or + # comma-separated list of CDATA, rather than a single CDATA. + cdata_list_attributes = {} + + + def __init__(self): + self.soup = None + + def reset(self): + pass + + def can_be_empty_element(self, tag_name): + """Might a tag with this name be an empty-element tag? + + The final markup may or may not actually present this tag as + self-closing. + + For instance: an HTMLBuilder does not consider a

tag to be + an empty-element tag (it's not in + HTMLBuilder.empty_element_tags). This means an empty

tag + will be presented as "

", not "

". + + The default implementation has no opinion about which tags are + empty-element tags, so a tag will be presented as an + empty-element tag if and only if it has no contents. + "" will become "", and "bar" will + be left alone. + """ + if self.empty_element_tags is None: + return True + return tag_name in self.empty_element_tags + + def feed(self, markup): + raise NotImplementedError() + + def prepare_markup(self, markup, user_specified_encoding=None, + document_declared_encoding=None): + return markup, None, None, False + + def test_fragment_to_document(self, fragment): + """Wrap an HTML fragment to make it look like a document. + + Different parsers do this differently. For instance, lxml + introduces an empty tag, and html5lib + doesn't. Abstracting this away lets us write simple tests + which run HTML fragments through the parser and compare the + results against other HTML fragments. + + This method should not be used outside of tests. + """ + return fragment + + def set_up_substitutions(self, tag): + return False + + def _replace_cdata_list_attribute_values(self, tag_name, attrs): + """Replaces class="foo bar" with class=["foo", "bar"] + + Modifies its input in place. + """ + if not attrs: + return attrs + if self.cdata_list_attributes: + universal = self.cdata_list_attributes.get('*', []) + tag_specific = self.cdata_list_attributes.get( + tag_name.lower(), None) + for attr in list(attrs.keys()): + if attr in universal or (tag_specific and attr in tag_specific): + # We have a "class"-type attribute whose string + # value is a whitespace-separated list of + # values. Split it into a list. + value = attrs[attr] + if isinstance(value, str): + values = whitespace_re.split(value) + else: + # html5lib sometimes calls setAttributes twice + # for the same tag when rearranging the parse + # tree. On the second call the attribute value + # here is already a list. If this happens, + # leave the value alone rather than trying to + # split it again. + values = value + attrs[attr] = values + return attrs + +class SAXTreeBuilder(TreeBuilder): + """A Beautiful Soup treebuilder that listens for SAX events.""" + + def feed(self, markup): + raise NotImplementedError() + + def close(self): + pass + + def startElement(self, name, attrs): + attrs = dict((key[1], value) for key, value in list(attrs.items())) + #print "Start %s, %r" % (name, attrs) + self.soup.handle_starttag(name, attrs) + + def endElement(self, name): + #print "End %s" % name + self.soup.handle_endtag(name) + + def startElementNS(self, nsTuple, nodeName, attrs): + # Throw away (ns, nodeName) for now. + self.startElement(nodeName, attrs) + + def endElementNS(self, nsTuple, nodeName): + # Throw away (ns, nodeName) for now. + self.endElement(nodeName) + #handler.endElementNS((ns, node.nodeName), node.nodeName) + + def startPrefixMapping(self, prefix, nodeValue): + # Ignore the prefix for now. + pass + + def endPrefixMapping(self, prefix): + # Ignore the prefix for now. + # handler.endPrefixMapping(prefix) + pass + + def characters(self, content): + self.soup.handle_data(content) + + def startDocument(self): + pass + + def endDocument(self): + pass + + +class HTMLTreeBuilder(TreeBuilder): + """This TreeBuilder knows facts about HTML. + + Such as which tags are empty-element tags. + """ + + preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags + empty_element_tags = set([ + # These are from HTML5. + 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr', + + # These are from HTML4, removed in HTML5. + 'spacer', 'frame' + ]) + + # The HTML standard defines these attributes as containing a + # space-separated list of values, not a single value. That is, + # class="foo bar" means that the 'class' attribute has two values, + # 'foo' and 'bar', not the single value 'foo bar'. When we + # encounter one of these attributes, we will parse its value into + # a list of values if possible. Upon output, the list will be + # converted back into a string. + cdata_list_attributes = { + "*" : ['class', 'accesskey', 'dropzone'], + "a" : ['rel', 'rev'], + "link" : ['rel', 'rev'], + "td" : ["headers"], + "th" : ["headers"], + "td" : ["headers"], + "form" : ["accept-charset"], + "object" : ["archive"], + + # These are HTML5 specific, as are *.accesskey and *.dropzone above. + "area" : ["rel"], + "icon" : ["sizes"], + "iframe" : ["sandbox"], + "output" : ["for"], + } + + def set_up_substitutions(self, tag): + # We are only interested in tags + if tag.name != 'meta': + return False + + http_equiv = tag.get('http-equiv') + content = tag.get('content') + charset = tag.get('charset') + + # We are interested in tags that say what encoding the + # document was originally in. This means HTML 5-style + # tags that provide the "charset" attribute. It also means + # HTML 4-style tags that provide the "content" + # attribute and have "http-equiv" set to "content-type". + # + # In both cases we will replace the value of the appropriate + # attribute with a standin object that can take on any + # encoding. + meta_encoding = None + if charset is not None: + # HTML 5 style: + # + meta_encoding = charset + tag['charset'] = CharsetMetaAttributeValue(charset) + + elif (content is not None and http_equiv is not None + and http_equiv.lower() == 'content-type'): + # HTML 4 style: + # + tag['content'] = ContentMetaAttributeValue(content) + + return (meta_encoding is not None) + +def register_treebuilders_from(module): + """Copy TreeBuilders from the given module into this module.""" + # I'm fairly sure this is not the best way to do this. + this_module = sys.modules['bs4.builder'] + for name in module.__all__: + obj = getattr(module, name) + + if issubclass(obj, TreeBuilder): + setattr(this_module, name, obj) + this_module.__all__.append(name) + # Register the builder while we're at it. + this_module.builder_registry.register(obj) + +class ParserRejectedMarkup(Exception): + pass + +# Builders are registered in reverse order of priority, so that custom +# builder registrations will take precedence. In general, we want lxml +# to take precedence over html5lib, because it's faster. And we only +# want to use HTMLParser as a last result. +from . import _htmlparser +register_treebuilders_from(_htmlparser) +try: + from . import _html5lib + register_treebuilders_from(_html5lib) +except ImportError: + # They don't have html5lib installed. + pass +try: + from . import _lxml + register_treebuilders_from(_lxml) +except ImportError: + # They don't have lxml installed. + pass diff --git a/venv/Lib/site-packages/bs4/builder/__pycache__/__init__.cpython-38.pyc b/venv/Lib/site-packages/bs4/builder/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..50c18ab Binary files /dev/null and b/venv/Lib/site-packages/bs4/builder/__pycache__/__init__.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/bs4/builder/__pycache__/_html5lib.cpython-38.pyc b/venv/Lib/site-packages/bs4/builder/__pycache__/_html5lib.cpython-38.pyc new file mode 100644 index 0000000..1e795ce Binary files /dev/null and b/venv/Lib/site-packages/bs4/builder/__pycache__/_html5lib.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/bs4/builder/__pycache__/_htmlparser.cpython-38.pyc b/venv/Lib/site-packages/bs4/builder/__pycache__/_htmlparser.cpython-38.pyc new file mode 100644 index 0000000..4a30866 Binary files /dev/null and b/venv/Lib/site-packages/bs4/builder/__pycache__/_htmlparser.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/bs4/builder/__pycache__/_lxml.cpython-38.pyc b/venv/Lib/site-packages/bs4/builder/__pycache__/_lxml.cpython-38.pyc new file mode 100644 index 0000000..be81b2d Binary files /dev/null and b/venv/Lib/site-packages/bs4/builder/__pycache__/_lxml.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/bs4/builder/_html5lib.py b/venv/Lib/site-packages/bs4/builder/_html5lib.py new file mode 100644 index 0000000..d9d468f --- /dev/null +++ b/venv/Lib/site-packages/bs4/builder/_html5lib.py @@ -0,0 +1,426 @@ +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +__all__ = [ + 'HTML5TreeBuilder', + ] + +import warnings +import re +from bs4.builder import ( + PERMISSIVE, + HTML, + HTML_5, + HTMLTreeBuilder, + ) +from bs4.element import ( + NamespacedAttribute, + whitespace_re, +) +import html5lib +from html5lib.constants import ( + namespaces, + prefixes, + ) +from bs4.element import ( + Comment, + Doctype, + NavigableString, + Tag, + ) + +try: + # Pre-0.99999999 + from html5lib.treebuilders import _base as treebuilder_base + new_html5lib = False +except ImportError as e: + # 0.99999999 and up + from html5lib.treebuilders import base as treebuilder_base + new_html5lib = True + +class HTML5TreeBuilder(HTMLTreeBuilder): + """Use html5lib to build a tree.""" + + NAME = "html5lib" + + features = [NAME, PERMISSIVE, HTML_5, HTML] + + def prepare_markup(self, markup, user_specified_encoding, + document_declared_encoding=None, exclude_encodings=None): + # Store the user-specified encoding for use later on. + self.user_specified_encoding = user_specified_encoding + + # document_declared_encoding and exclude_encodings aren't used + # ATM because the html5lib TreeBuilder doesn't use + # UnicodeDammit. + if exclude_encodings: + warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.") + yield (markup, None, None, False) + + # These methods are defined by Beautiful Soup. + def feed(self, markup): + if self.soup.parse_only is not None: + warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.") + parser = html5lib.HTMLParser(tree=self.create_treebuilder) + + extra_kwargs = dict() + if not isinstance(markup, str): + if new_html5lib: + extra_kwargs['override_encoding'] = self.user_specified_encoding + else: + extra_kwargs['encoding'] = self.user_specified_encoding + doc = parser.parse(markup, **extra_kwargs) + + # Set the character encoding detected by the tokenizer. + if isinstance(markup, str): + # We need to special-case this because html5lib sets + # charEncoding to UTF-8 if it gets Unicode input. + doc.original_encoding = None + else: + original_encoding = parser.tokenizer.stream.charEncoding[0] + if not isinstance(original_encoding, str): + # In 0.99999999 and up, the encoding is an html5lib + # Encoding object. We want to use a string for compatibility + # with other tree builders. + original_encoding = original_encoding.name + doc.original_encoding = original_encoding + + def create_treebuilder(self, namespaceHTMLElements): + self.underlying_builder = TreeBuilderForHtml5lib( + namespaceHTMLElements, self.soup) + return self.underlying_builder + + def test_fragment_to_document(self, fragment): + """See `TreeBuilder`.""" + return '%s' % fragment + + +class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder): + + def __init__(self, namespaceHTMLElements, soup=None): + if soup: + self.soup = soup + else: + from bs4 import BeautifulSoup + self.soup = BeautifulSoup("", "html.parser") + super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements) + + def documentClass(self): + self.soup.reset() + return Element(self.soup, self.soup, None) + + def insertDoctype(self, token): + name = token["name"] + publicId = token["publicId"] + systemId = token["systemId"] + + doctype = Doctype.for_name_and_ids(name, publicId, systemId) + self.soup.object_was_parsed(doctype) + + def elementClass(self, name, namespace): + tag = self.soup.new_tag(name, namespace) + return Element(tag, self.soup, namespace) + + def commentClass(self, data): + return TextNode(Comment(data), self.soup) + + def fragmentClass(self): + from bs4 import BeautifulSoup + self.soup = BeautifulSoup("", "html.parser") + self.soup.name = "[document_fragment]" + return Element(self.soup, self.soup, None) + + def appendChild(self, node): + # XXX This code is not covered by the BS4 tests. + self.soup.append(node.element) + + def getDocument(self): + return self.soup + + def getFragment(self): + return treebuilder_base.TreeBuilder.getFragment(self).element + + def testSerializer(self, element): + from bs4 import BeautifulSoup + rv = [] + doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$') + + def serializeElement(element, indent=0): + if isinstance(element, BeautifulSoup): + pass + if isinstance(element, Doctype): + m = doctype_re.match(element) + if m: + name = m.group(1) + if m.lastindex > 1: + publicId = m.group(2) or "" + systemId = m.group(3) or m.group(4) or "" + rv.append("""|%s""" % + (' ' * indent, name, publicId, systemId)) + else: + rv.append("|%s" % (' ' * indent, name)) + else: + rv.append("|%s" % (' ' * indent,)) + elif isinstance(element, Comment): + rv.append("|%s" % (' ' * indent, element)) + elif isinstance(element, NavigableString): + rv.append("|%s\"%s\"" % (' ' * indent, element)) + else: + if element.namespace: + name = "%s %s" % (prefixes[element.namespace], + element.name) + else: + name = element.name + rv.append("|%s<%s>" % (' ' * indent, name)) + if element.attrs: + attributes = [] + for name, value in list(element.attrs.items()): + if isinstance(name, NamespacedAttribute): + name = "%s %s" % (prefixes[name.namespace], name.name) + if isinstance(value, list): + value = " ".join(value) + attributes.append((name, value)) + + for name, value in sorted(attributes): + rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) + indent += 2 + for child in element.children: + serializeElement(child, indent) + serializeElement(element, 0) + + return "\n".join(rv) + +class AttrList(object): + def __init__(self, element): + self.element = element + self.attrs = dict(self.element.attrs) + def __iter__(self): + return list(self.attrs.items()).__iter__() + def __setitem__(self, name, value): + # If this attribute is a multi-valued attribute for this element, + # turn its value into a list. + list_attr = HTML5TreeBuilder.cdata_list_attributes + if (name in list_attr['*'] + or (self.element.name in list_attr + and name in list_attr[self.element.name])): + # A node that is being cloned may have already undergone + # this procedure. + if not isinstance(value, list): + value = whitespace_re.split(value) + self.element[name] = value + def items(self): + return list(self.attrs.items()) + def keys(self): + return list(self.attrs.keys()) + def __len__(self): + return len(self.attrs) + def __getitem__(self, name): + return self.attrs[name] + def __contains__(self, name): + return name in list(self.attrs.keys()) + + +class Element(treebuilder_base.Node): + def __init__(self, element, soup, namespace): + treebuilder_base.Node.__init__(self, element.name) + self.element = element + self.soup = soup + self.namespace = namespace + + def appendChild(self, node): + string_child = child = None + if isinstance(node, str): + # Some other piece of code decided to pass in a string + # instead of creating a TextElement object to contain the + # string. + string_child = child = node + elif isinstance(node, Tag): + # Some other piece of code decided to pass in a Tag + # instead of creating an Element object to contain the + # Tag. + child = node + elif node.element.__class__ == NavigableString: + string_child = child = node.element + node.parent = self + else: + child = node.element + node.parent = self + + if not isinstance(child, str) and child.parent is not None: + node.element.extract() + + if (string_child and self.element.contents + and self.element.contents[-1].__class__ == NavigableString): + # We are appending a string onto another string. + # TODO This has O(n^2) performance, for input like + # "aaa..." + old_element = self.element.contents[-1] + new_element = self.soup.new_string(old_element + string_child) + old_element.replace_with(new_element) + self.soup._most_recent_element = new_element + else: + if isinstance(node, str): + # Create a brand new NavigableString from this string. + child = self.soup.new_string(node) + + # Tell Beautiful Soup to act as if it parsed this element + # immediately after the parent's last descendant. (Or + # immediately after the parent, if it has no children.) + if self.element.contents: + most_recent_element = self.element._last_descendant(False) + elif self.element.next_element is not None: + # Something from further ahead in the parse tree is + # being inserted into this earlier element. This is + # very annoying because it means an expensive search + # for the last element in the tree. + most_recent_element = self.soup._last_descendant() + else: + most_recent_element = self.element + + self.soup.object_was_parsed( + child, parent=self.element, + most_recent_element=most_recent_element) + + def getAttributes(self): + if isinstance(self.element, Comment): + return {} + return AttrList(self.element) + + def setAttributes(self, attributes): + + if attributes is not None and len(attributes) > 0: + + converted_attributes = [] + for name, value in list(attributes.items()): + if isinstance(name, tuple): + new_name = NamespacedAttribute(*name) + del attributes[name] + attributes[new_name] = value + + self.soup.builder._replace_cdata_list_attribute_values( + self.name, attributes) + for name, value in list(attributes.items()): + self.element[name] = value + + # The attributes may contain variables that need substitution. + # Call set_up_substitutions manually. + # + # The Tag constructor called this method when the Tag was created, + # but we just set/changed the attributes, so call it again. + self.soup.builder.set_up_substitutions(self.element) + attributes = property(getAttributes, setAttributes) + + def insertText(self, data, insertBefore=None): + text = TextNode(self.soup.new_string(data), self.soup) + if insertBefore: + self.insertBefore(text, insertBefore) + else: + self.appendChild(text) + + def insertBefore(self, node, refNode): + index = self.element.index(refNode.element) + if (node.element.__class__ == NavigableString and self.element.contents + and self.element.contents[index-1].__class__ == NavigableString): + # (See comments in appendChild) + old_node = self.element.contents[index-1] + new_str = self.soup.new_string(old_node + node.element) + old_node.replace_with(new_str) + else: + self.element.insert(index, node.element) + node.parent = self + + def removeChild(self, node): + node.element.extract() + + def reparentChildren(self, new_parent): + """Move all of this tag's children into another tag.""" + # print "MOVE", self.element.contents + # print "FROM", self.element + # print "TO", new_parent.element + + element = self.element + new_parent_element = new_parent.element + # Determine what this tag's next_element will be once all the children + # are removed. + final_next_element = element.next_sibling + + new_parents_last_descendant = new_parent_element._last_descendant(False, False) + if len(new_parent_element.contents) > 0: + # The new parent already contains children. We will be + # appending this tag's children to the end. + new_parents_last_child = new_parent_element.contents[-1] + new_parents_last_descendant_next_element = new_parents_last_descendant.next_element + else: + # The new parent contains no children. + new_parents_last_child = None + new_parents_last_descendant_next_element = new_parent_element.next_element + + to_append = element.contents + if len(to_append) > 0: + # Set the first child's previous_element and previous_sibling + # to elements within the new parent + first_child = to_append[0] + if new_parents_last_descendant: + first_child.previous_element = new_parents_last_descendant + else: + first_child.previous_element = new_parent_element + first_child.previous_sibling = new_parents_last_child + if new_parents_last_descendant: + new_parents_last_descendant.next_element = first_child + else: + new_parent_element.next_element = first_child + if new_parents_last_child: + new_parents_last_child.next_sibling = first_child + + # Find the very last element being moved. It is now the + # parent's last descendant. It has no .next_sibling and + # its .next_element is whatever the previous last + # descendant had. + last_childs_last_descendant = to_append[-1]._last_descendant(False, True) + + last_childs_last_descendant.next_element = new_parents_last_descendant_next_element + if new_parents_last_descendant_next_element: + # TODO: This code has no test coverage and I'm not sure + # how to get html5lib to go through this path, but it's + # just the other side of the previous line. + new_parents_last_descendant_next_element.previous_element = last_childs_last_descendant + last_childs_last_descendant.next_sibling = None + + for child in to_append: + child.parent = new_parent_element + new_parent_element.contents.append(child) + + # Now that this element has no children, change its .next_element. + element.contents = [] + element.next_element = final_next_element + + # print "DONE WITH MOVE" + # print "FROM", self.element + # print "TO", new_parent_element + + def cloneNode(self): + tag = self.soup.new_tag(self.element.name, self.namespace) + node = Element(tag, self.soup, self.namespace) + for key,value in self.attributes: + node.attributes[key] = value + return node + + def hasContent(self): + return self.element.contents + + def getNameTuple(self): + if self.namespace == None: + return namespaces["html"], self.name + else: + return self.namespace, self.name + + nameTuple = property(getNameTuple) + +class TextNode(Element): + def __init__(self, element, soup): + treebuilder_base.Node.__init__(self, None) + self.element = element + self.soup = soup + + def cloneNode(self): + raise NotImplementedError diff --git a/venv/Lib/site-packages/bs4/builder/_htmlparser.py b/venv/Lib/site-packages/bs4/builder/_htmlparser.py new file mode 100644 index 0000000..907d355 --- /dev/null +++ b/venv/Lib/site-packages/bs4/builder/_htmlparser.py @@ -0,0 +1,314 @@ +"""Use the HTMLParser library to parse HTML files that aren't too bad.""" + +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +__all__ = [ + 'HTMLParserTreeBuilder', + ] + +from html.parser import HTMLParser + +try: + from html.parser import HTMLParseError +except ImportError as e: + # HTMLParseError is removed in Python 3.5. Since it can never be + # thrown in 3.5, we can just define our own class as a placeholder. + class HTMLParseError(Exception): + pass + +import sys +import warnings + +# Starting in Python 3.2, the HTMLParser constructor takes a 'strict' +# argument, which we'd like to set to False. Unfortunately, +# http://bugs.python.org/issue13273 makes strict=True a better bet +# before Python 3.2.3. +# +# At the end of this file, we monkeypatch HTMLParser so that +# strict=True works well on Python 3.2.2. +major, minor, release = sys.version_info[:3] +CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3 +CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3 +CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4 + + +from bs4.element import ( + CData, + Comment, + Declaration, + Doctype, + ProcessingInstruction, + ) +from bs4.dammit import EntitySubstitution, UnicodeDammit + +from bs4.builder import ( + HTML, + HTMLTreeBuilder, + STRICT, + ) + + +HTMLPARSER = 'html.parser' + +class BeautifulSoupHTMLParser(HTMLParser): + + def __init__(self, *args, **kwargs): + HTMLParser.__init__(self, *args, **kwargs) + + # Keep a list of empty-element tags that were encountered + # without an explicit closing tag. If we encounter a closing tag + # of this type, we'll associate it with one of those entries. + # + # This isn't a stack because we don't care about the + # order. It's a list of closing tags we've already handled and + # will ignore, assuming they ever show up. + self.already_closed_empty_element = [] + + def handle_startendtag(self, name, attrs): + # This is only called when the markup looks like + # . + + # is_startend() tells handle_starttag not to close the tag + # just because its name matches a known empty-element tag. We + # know that this is an empty-element tag and we want to call + # handle_endtag ourselves. + tag = self.handle_starttag(name, attrs, handle_empty_element=False) + self.handle_endtag(name) + + def handle_starttag(self, name, attrs, handle_empty_element=True): + # XXX namespace + attr_dict = {} + for key, value in attrs: + # Change None attribute values to the empty string + # for consistency with the other tree builders. + if value is None: + value = '' + attr_dict[key] = value + attrvalue = '""' + #print "START", name + tag = self.soup.handle_starttag(name, None, None, attr_dict) + if tag and tag.is_empty_element and handle_empty_element: + # Unlike other parsers, html.parser doesn't send separate end tag + # events for empty-element tags. (It's handled in + # handle_startendtag, but only if the original markup looked like + # .) + # + # So we need to call handle_endtag() ourselves. Since we + # know the start event is identical to the end event, we + # don't want handle_endtag() to cross off any previous end + # events for tags of this name. + self.handle_endtag(name, check_already_closed=False) + + # But we might encounter an explicit closing tag for this tag + # later on. If so, we want to ignore it. + self.already_closed_empty_element.append(name) + + def handle_endtag(self, name, check_already_closed=True): + #print "END", name + if check_already_closed and name in self.already_closed_empty_element: + # This is a redundant end tag for an empty-element tag. + # We've already called handle_endtag() for it, so just + # check it off the list. + # print "ALREADY CLOSED", name + self.already_closed_empty_element.remove(name) + else: + self.soup.handle_endtag(name) + + def handle_data(self, data): + self.soup.handle_data(data) + + def handle_charref(self, name): + # XXX workaround for a bug in HTMLParser. Remove this once + # it's fixed in all supported versions. + # http://bugs.python.org/issue13633 + if name.startswith('x'): + real_name = int(name.lstrip('x'), 16) + elif name.startswith('X'): + real_name = int(name.lstrip('X'), 16) + else: + real_name = int(name) + + try: + data = chr(real_name) + except (ValueError, OverflowError) as e: + data = "\N{REPLACEMENT CHARACTER}" + + self.handle_data(data) + + def handle_entityref(self, name): + character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name) + if character is not None: + data = character + else: + data = "&%s;" % name + self.handle_data(data) + + def handle_comment(self, data): + self.soup.endData() + self.soup.handle_data(data) + self.soup.endData(Comment) + + def handle_decl(self, data): + self.soup.endData() + if data.startswith("DOCTYPE "): + data = data[len("DOCTYPE "):] + elif data == 'DOCTYPE': + # i.e. "" + data = '' + self.soup.handle_data(data) + self.soup.endData(Doctype) + + def unknown_decl(self, data): + if data.upper().startswith('CDATA['): + cls = CData + data = data[len('CDATA['):] + else: + cls = Declaration + self.soup.endData() + self.soup.handle_data(data) + self.soup.endData(cls) + + def handle_pi(self, data): + self.soup.endData() + self.soup.handle_data(data) + self.soup.endData(ProcessingInstruction) + + +class HTMLParserTreeBuilder(HTMLTreeBuilder): + + is_xml = False + picklable = True + NAME = HTMLPARSER + features = [NAME, HTML, STRICT] + + def __init__(self, *args, **kwargs): + if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED: + kwargs['strict'] = False + if CONSTRUCTOR_TAKES_CONVERT_CHARREFS: + kwargs['convert_charrefs'] = False + self.parser_args = (args, kwargs) + + def prepare_markup(self, markup, user_specified_encoding=None, + document_declared_encoding=None, exclude_encodings=None): + """ + :return: A 4-tuple (markup, original encoding, encoding + declared within markup, whether any characters had to be + replaced with REPLACEMENT CHARACTER). + """ + if isinstance(markup, str): + yield (markup, None, None, False) + return + + try_encodings = [user_specified_encoding, document_declared_encoding] + dammit = UnicodeDammit(markup, try_encodings, is_html=True, + exclude_encodings=exclude_encodings) + yield (dammit.markup, dammit.original_encoding, + dammit.declared_html_encoding, + dammit.contains_replacement_characters) + + def feed(self, markup): + args, kwargs = self.parser_args + parser = BeautifulSoupHTMLParser(*args, **kwargs) + parser.soup = self.soup + try: + parser.feed(markup) + except HTMLParseError as e: + warnings.warn(RuntimeWarning( + "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) + raise e + parser.already_closed_empty_element = [] + +# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some +# 3.2.3 code. This ensures they don't treat markup like

as a +# string. +# +# XXX This code can be removed once most Python 3 users are on 3.2.3. +if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT: + import re + attrfind_tolerant = re.compile( + r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*' + r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?') + HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant + + locatestarttagend = re.compile(r""" + <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name + (?:\s+ # whitespace before attribute name + (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name + (?:\s*=\s* # value indicator + (?:'[^']*' # LITA-enclosed value + |\"[^\"]*\" # LIT-enclosed value + |[^'\">\s]+ # bare value + ) + )? + ) + )* + \s* # trailing whitespace +""", re.VERBOSE) + BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend + + from html.parser import tagfind, attrfind + + def parse_starttag(self, i): + self.__starttag_text = None + endpos = self.check_for_whole_start_tag(i) + if endpos < 0: + return endpos + rawdata = self.rawdata + self.__starttag_text = rawdata[i:endpos] + + # Now parse the data between i+1 and j into a tag and attrs + attrs = [] + match = tagfind.match(rawdata, i+1) + assert match, 'unexpected call to parse_starttag()' + k = match.end() + self.lasttag = tag = rawdata[i+1:k].lower() + while k < endpos: + if self.strict: + m = attrfind.match(rawdata, k) + else: + m = attrfind_tolerant.match(rawdata, k) + if not m: + break + attrname, rest, attrvalue = m.group(1, 2, 3) + if not rest: + attrvalue = None + elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ + attrvalue[:1] == '"' == attrvalue[-1:]: + attrvalue = attrvalue[1:-1] + if attrvalue: + attrvalue = self.unescape(attrvalue) + attrs.append((attrname.lower(), attrvalue)) + k = m.end() + + end = rawdata[k:endpos].strip() + if end not in (">", "/>"): + lineno, offset = self.getpos() + if "\n" in self.__starttag_text: + lineno = lineno + self.__starttag_text.count("\n") + offset = len(self.__starttag_text) \ + - self.__starttag_text.rfind("\n") + else: + offset = offset + len(self.__starttag_text) + if self.strict: + self.error("junk characters in start tag: %r" + % (rawdata[k:endpos][:20],)) + self.handle_data(rawdata[i:endpos]) + return endpos + if end.endswith('/>'): + # XHTML-style empty tag: + self.handle_startendtag(tag, attrs) + else: + self.handle_starttag(tag, attrs) + if tag in self.CDATA_CONTENT_ELEMENTS: + self.set_cdata_mode(tag) + return endpos + + def set_cdata_mode(self, elem): + self.cdata_elem = elem.lower() + self.interesting = re.compile(r'' % self.cdata_elem, re.I) + + BeautifulSoupHTMLParser.parse_starttag = parse_starttag + BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode + + CONSTRUCTOR_TAKES_STRICT = True diff --git a/venv/Lib/site-packages/bs4/builder/_lxml.py b/venv/Lib/site-packages/bs4/builder/_lxml.py new file mode 100644 index 0000000..244d457 --- /dev/null +++ b/venv/Lib/site-packages/bs4/builder/_lxml.py @@ -0,0 +1,258 @@ +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +__all__ = [ + 'LXMLTreeBuilderForXML', + 'LXMLTreeBuilder', + ] + +from io import BytesIO +from io import StringIO +import collections +from lxml import etree +from bs4.element import ( + Comment, + Doctype, + NamespacedAttribute, + ProcessingInstruction, + XMLProcessingInstruction, +) +from bs4.builder import ( + FAST, + HTML, + HTMLTreeBuilder, + PERMISSIVE, + ParserRejectedMarkup, + TreeBuilder, + XML) +from bs4.dammit import EncodingDetector + +LXML = 'lxml' + +class LXMLTreeBuilderForXML(TreeBuilder): + DEFAULT_PARSER_CLASS = etree.XMLParser + + is_xml = True + processing_instruction_class = XMLProcessingInstruction + + NAME = "lxml-xml" + ALTERNATE_NAMES = ["xml"] + + # Well, it's permissive by XML parser standards. + features = [NAME, LXML, XML, FAST, PERMISSIVE] + + CHUNK_SIZE = 512 + + # This namespace mapping is specified in the XML Namespace + # standard. + DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"} + + def default_parser(self, encoding): + # This can either return a parser object or a class, which + # will be instantiated with default arguments. + if self._default_parser is not None: + return self._default_parser + return etree.XMLParser( + target=self, strip_cdata=False, recover=True, encoding=encoding) + + def parser_for(self, encoding): + # Use the default parser. + parser = self.default_parser(encoding) + + if isinstance(parser, collections.Callable): + # Instantiate the parser with default arguments + parser = parser(target=self, strip_cdata=False, encoding=encoding) + return parser + + def __init__(self, parser=None, empty_element_tags=None): + # TODO: Issue a warning if parser is present but not a + # callable, since that means there's no way to create new + # parsers for different encodings. + self._default_parser = parser + if empty_element_tags is not None: + self.empty_element_tags = set(empty_element_tags) + self.soup = None + self.nsmaps = [self.DEFAULT_NSMAPS] + + def _getNsTag(self, tag): + # Split the namespace URL out of a fully-qualified lxml tag + # name. Copied from lxml's src/lxml/sax.py. + if tag[0] == '{': + return tuple(tag[1:].split('}', 1)) + else: + return (None, tag) + + def prepare_markup(self, markup, user_specified_encoding=None, + exclude_encodings=None, + document_declared_encoding=None): + """ + :yield: A series of 4-tuples. + (markup, encoding, declared encoding, + has undergone character replacement) + + Each 4-tuple represents a strategy for parsing the document. + """ + # Instead of using UnicodeDammit to convert the bytestring to + # Unicode using different encodings, use EncodingDetector to + # iterate over the encodings, and tell lxml to try to parse + # the document as each one in turn. + is_html = not self.is_xml + if is_html: + self.processing_instruction_class = ProcessingInstruction + else: + self.processing_instruction_class = XMLProcessingInstruction + + if isinstance(markup, str): + # We were given Unicode. Maybe lxml can parse Unicode on + # this system? + yield markup, None, document_declared_encoding, False + + if isinstance(markup, str): + # No, apparently not. Convert the Unicode to UTF-8 and + # tell lxml to parse it as UTF-8. + yield (markup.encode("utf8"), "utf8", + document_declared_encoding, False) + + try_encodings = [user_specified_encoding, document_declared_encoding] + detector = EncodingDetector( + markup, try_encodings, is_html, exclude_encodings) + for encoding in detector.encodings: + yield (detector.markup, encoding, document_declared_encoding, False) + + def feed(self, markup): + if isinstance(markup, bytes): + markup = BytesIO(markup) + elif isinstance(markup, str): + markup = StringIO(markup) + + # Call feed() at least once, even if the markup is empty, + # or the parser won't be initialized. + data = markup.read(self.CHUNK_SIZE) + try: + self.parser = self.parser_for(self.soup.original_encoding) + self.parser.feed(data) + while len(data) != 0: + # Now call feed() on the rest of the data, chunk by chunk. + data = markup.read(self.CHUNK_SIZE) + if len(data) != 0: + self.parser.feed(data) + self.parser.close() + except (UnicodeDecodeError, LookupError, etree.ParserError) as e: + raise ParserRejectedMarkup(str(e)) + + def close(self): + self.nsmaps = [self.DEFAULT_NSMAPS] + + def start(self, name, attrs, nsmap={}): + # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy. + attrs = dict(attrs) + nsprefix = None + # Invert each namespace map as it comes in. + if len(self.nsmaps) > 1: + # There are no new namespaces for this tag, but + # non-default namespaces are in play, so we need a + # separate tag stack to know when they end. + self.nsmaps.append(None) + elif len(nsmap) > 0: + # A new namespace mapping has come into play. + inverted_nsmap = dict((value, key) for key, value in list(nsmap.items())) + self.nsmaps.append(inverted_nsmap) + # Also treat the namespace mapping as a set of attributes on the + # tag, so we can recreate it later. + attrs = attrs.copy() + for prefix, namespace in list(nsmap.items()): + attribute = NamespacedAttribute( + "xmlns", prefix, "http://www.w3.org/2000/xmlns/") + attrs[attribute] = namespace + + # Namespaces are in play. Find any attributes that came in + # from lxml with namespaces attached to their names, and + # turn then into NamespacedAttribute objects. + new_attrs = {} + for attr, value in list(attrs.items()): + namespace, attr = self._getNsTag(attr) + if namespace is None: + new_attrs[attr] = value + else: + nsprefix = self._prefix_for_namespace(namespace) + attr = NamespacedAttribute(nsprefix, attr, namespace) + new_attrs[attr] = value + attrs = new_attrs + + namespace, name = self._getNsTag(name) + nsprefix = self._prefix_for_namespace(namespace) + self.soup.handle_starttag(name, namespace, nsprefix, attrs) + + def _prefix_for_namespace(self, namespace): + """Find the currently active prefix for the given namespace.""" + if namespace is None: + return None + for inverted_nsmap in reversed(self.nsmaps): + if inverted_nsmap is not None and namespace in inverted_nsmap: + return inverted_nsmap[namespace] + return None + + def end(self, name): + self.soup.endData() + completed_tag = self.soup.tagStack[-1] + namespace, name = self._getNsTag(name) + nsprefix = None + if namespace is not None: + for inverted_nsmap in reversed(self.nsmaps): + if inverted_nsmap is not None and namespace in inverted_nsmap: + nsprefix = inverted_nsmap[namespace] + break + self.soup.handle_endtag(name, nsprefix) + if len(self.nsmaps) > 1: + # This tag, or one of its parents, introduced a namespace + # mapping, so pop it off the stack. + self.nsmaps.pop() + + def pi(self, target, data): + self.soup.endData() + self.soup.handle_data(target + ' ' + data) + self.soup.endData(self.processing_instruction_class) + + def data(self, content): + self.soup.handle_data(content) + + def doctype(self, name, pubid, system): + self.soup.endData() + doctype = Doctype.for_name_and_ids(name, pubid, system) + self.soup.object_was_parsed(doctype) + + def comment(self, content): + "Handle comments as Comment objects." + self.soup.endData() + self.soup.handle_data(content) + self.soup.endData(Comment) + + def test_fragment_to_document(self, fragment): + """See `TreeBuilder`.""" + return '\n%s' % fragment + + +class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): + + NAME = LXML + ALTERNATE_NAMES = ["lxml-html"] + + features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE] + is_xml = False + processing_instruction_class = ProcessingInstruction + + def default_parser(self, encoding): + return etree.HTMLParser + + def feed(self, markup): + encoding = self.soup.original_encoding + try: + self.parser = self.parser_for(encoding) + self.parser.feed(markup) + self.parser.close() + except (UnicodeDecodeError, LookupError, etree.ParserError) as e: + raise ParserRejectedMarkup(str(e)) + + + def test_fragment_to_document(self, fragment): + """See `TreeBuilder`.""" + return '%s' % fragment diff --git a/venv/Lib/site-packages/bs4/dammit.py b/venv/Lib/site-packages/bs4/dammit.py new file mode 100644 index 0000000..8e399e0 --- /dev/null +++ b/venv/Lib/site-packages/bs4/dammit.py @@ -0,0 +1,842 @@ +# -*- coding: utf-8 -*- +"""Beautiful Soup bonus library: Unicode, Dammit + +This library converts a bytestream to Unicode through any means +necessary. It is heavily based on code from Mark Pilgrim's Universal +Feed Parser. It works best on XML and HTML, but it does not rewrite the +XML or HTML to reflect a new encoding; that's the tree builder's job. +""" +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +__license__ = "MIT" + +import codecs +from html.entities import codepoint2name +import re +import logging +import string + +# Import a library to autodetect character encodings. +chardet_type = None +try: + # First try the fast C implementation. + # PyPI package: cchardet + import cchardet + def chardet_dammit(s): + return cchardet.detect(s)['encoding'] +except ImportError: + try: + # Fall back to the pure Python implementation + # Debian package: python-chardet + # PyPI package: chardet + import chardet + def chardet_dammit(s): + return chardet.detect(s)['encoding'] + #import chardet.constants + #chardet.constants._debug = 1 + except ImportError: + # No chardet available. + def chardet_dammit(s): + return None + +# Available from http://cjkpython.i18n.org/. +try: + import iconv_codec +except ImportError: + pass + +xml_encoding_re = re.compile( + '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I) +html_meta_re = re.compile( + '<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I) + +class EntitySubstitution(object): + + """Substitute XML or HTML entities for the corresponding characters.""" + + def _populate_class_variables(): + lookup = {} + reverse_lookup = {} + characters_for_re = [] + for codepoint, name in list(codepoint2name.items()): + character = chr(codepoint) + if codepoint != 34: + # There's no point in turning the quotation mark into + # ", unless it happens within an attribute value, which + # is handled elsewhere. + characters_for_re.append(character) + lookup[character] = name + # But we do want to turn " into the quotation mark. + reverse_lookup[name] = character + re_definition = "[%s]" % "".join(characters_for_re) + return lookup, reverse_lookup, re.compile(re_definition) + (CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER, + CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables() + + CHARACTER_TO_XML_ENTITY = { + "'": "apos", + '"': "quot", + "&": "amp", + "<": "lt", + ">": "gt", + } + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + ")") + + AMPERSAND_OR_BRACKET = re.compile("([<>&])") + + @classmethod + def _substitute_html_entity(cls, matchobj): + entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0)) + return "&%s;" % entity + + @classmethod + def _substitute_xml_entity(cls, matchobj): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)] + return "&%s;" % entity + + @classmethod + def quoted_attribute_value(self, value): + """Make a value into a quoted XML attribute, possibly escaping it. + + Most strings will be quoted using double quotes. + + Bob's Bar -> "Bob's Bar" + + If a string contains double quotes, it will be quoted using + single quotes. + + Welcome to "my bar" -> 'Welcome to "my bar"' + + If a string contains both single and double quotes, the + double quotes will be escaped, and the string will be quoted + using double quotes. + + Welcome to "Bob's Bar" -> "Welcome to "Bob's bar" + """ + quote_with = '"' + if '"' in value: + if "'" in value: + # The string contains both single and double + # quotes. Turn the double quotes into + # entities. We quote the double quotes rather than + # the single quotes because the entity name is + # """ whether this is HTML or XML. If we + # quoted the single quotes, we'd have to decide + # between ' and &squot;. + replace_with = """ + value = value.replace('"', replace_with) + else: + # There are double quotes but no single quotes. + # We can use single quotes to quote the attribute. + quote_with = "'" + return quote_with + value + quote_with + + @classmethod + def substitute_xml(cls, value, make_quoted_attribute=False): + """Substitute XML entities for special XML characters. + + :param value: A string to be substituted. The less-than sign + will become <, the greater-than sign will become >, + and any ampersands will become &. If you want ampersands + that appear to be part of an entity definition to be left + alone, use substitute_xml_containing_entities() instead. + + :param make_quoted_attribute: If True, then the string will be + quoted, as befits an attribute value. + """ + # Escape angle brackets and ampersands. + value = cls.AMPERSAND_OR_BRACKET.sub( + cls._substitute_xml_entity, value) + + if make_quoted_attribute: + value = cls.quoted_attribute_value(value) + return value + + @classmethod + def substitute_xml_containing_entities( + cls, value, make_quoted_attribute=False): + """Substitute XML entities for special XML characters. + + :param value: A string to be substituted. The less-than sign will + become <, the greater-than sign will become >, and any + ampersands that are not part of an entity defition will + become &. + + :param make_quoted_attribute: If True, then the string will be + quoted, as befits an attribute value. + """ + # Escape angle brackets, and ampersands that aren't part of + # entities. + value = cls.BARE_AMPERSAND_OR_BRACKET.sub( + cls._substitute_xml_entity, value) + + if make_quoted_attribute: + value = cls.quoted_attribute_value(value) + return value + + @classmethod + def substitute_html(cls, s): + """Replace certain Unicode characters with named HTML entities. + + This differs from data.encode(encoding, 'xmlcharrefreplace') + in that the goal is to make the result more readable (to those + with ASCII displays) rather than to recover from + errors. There's absolutely nothing wrong with a UTF-8 string + containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that + character with "é" will make it more readable to some + people. + """ + return cls.CHARACTER_TO_HTML_ENTITY_RE.sub( + cls._substitute_html_entity, s) + + +class EncodingDetector: + """Suggests a number of possible encodings for a bytestring. + + Order of precedence: + + 1. Encodings you specifically tell EncodingDetector to try first + (the override_encodings argument to the constructor). + + 2. An encoding declared within the bytestring itself, either in an + XML declaration (if the bytestring is to be interpreted as an XML + document), or in a tag (if the bytestring is to be + interpreted as an HTML document.) + + 3. An encoding detected through textual analysis by chardet, + cchardet, or a similar external library. + + 4. UTF-8. + + 5. Windows-1252. + """ + def __init__(self, markup, override_encodings=None, is_html=False, + exclude_encodings=None): + self.override_encodings = override_encodings or [] + exclude_encodings = exclude_encodings or [] + self.exclude_encodings = set([x.lower() for x in exclude_encodings]) + self.chardet_encoding = None + self.is_html = is_html + self.declared_encoding = None + + # First order of business: strip a byte-order mark. + self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup) + + def _usable(self, encoding, tried): + if encoding is not None: + encoding = encoding.lower() + if encoding in self.exclude_encodings: + return False + if encoding not in tried: + tried.add(encoding) + return True + return False + + @property + def encodings(self): + """Yield a number of encodings that might work for this markup.""" + tried = set() + for e in self.override_encodings: + if self._usable(e, tried): + yield e + + # Did the document originally start with a byte-order mark + # that indicated its encoding? + if self._usable(self.sniffed_encoding, tried): + yield self.sniffed_encoding + + # Look within the document for an XML or HTML encoding + # declaration. + if self.declared_encoding is None: + self.declared_encoding = self.find_declared_encoding( + self.markup, self.is_html) + if self._usable(self.declared_encoding, tried): + yield self.declared_encoding + + # Use third-party character set detection to guess at the + # encoding. + if self.chardet_encoding is None: + self.chardet_encoding = chardet_dammit(self.markup) + if self._usable(self.chardet_encoding, tried): + yield self.chardet_encoding + + # As a last-ditch effort, try utf-8 and windows-1252. + for e in ('utf-8', 'windows-1252'): + if self._usable(e, tried): + yield e + + @classmethod + def strip_byte_order_mark(cls, data): + """If a byte-order mark is present, strip it and return the encoding it implies.""" + encoding = None + if isinstance(data, str): + # Unicode data cannot have a byte-order mark. + return data, encoding + if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16be' + data = data[2:] + elif (len(data) >= 4) and (data[:2] == b'\xff\xfe') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16le' + data = data[2:] + elif data[:3] == b'\xef\xbb\xbf': + encoding = 'utf-8' + data = data[3:] + elif data[:4] == b'\x00\x00\xfe\xff': + encoding = 'utf-32be' + data = data[4:] + elif data[:4] == b'\xff\xfe\x00\x00': + encoding = 'utf-32le' + data = data[4:] + return data, encoding + + @classmethod + def find_declared_encoding(cls, markup, is_html=False, search_entire_document=False): + """Given a document, tries to find its declared encoding. + + An XML encoding is declared at the beginning of the document. + + An HTML encoding is declared in a tag, hopefully near the + beginning of the document. + """ + if search_entire_document: + xml_endpos = html_endpos = len(markup) + else: + xml_endpos = 1024 + html_endpos = max(2048, int(len(markup) * 0.05)) + + declared_encoding = None + declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos) + if not declared_encoding_match and is_html: + declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos) + if declared_encoding_match is not None: + declared_encoding = declared_encoding_match.groups()[0].decode( + 'ascii', 'replace') + if declared_encoding: + return declared_encoding.lower() + return None + +class UnicodeDammit: + """A class for detecting the encoding of a *ML document and + converting it to a Unicode string. If the source encoding is + windows-1252, can replace MS smart quotes with their HTML or XML + equivalents.""" + + # This dictionary maps commonly seen values for "charset" in HTML + # meta tags to the corresponding Python codec names. It only covers + # values that aren't in Python's aliases and can't be determined + # by the heuristics in find_codec. + CHARSET_ALIASES = {"macintosh": "mac-roman", + "x-sjis": "shift-jis"} + + ENCODINGS_WITH_SMART_QUOTES = [ + "windows-1252", + "iso-8859-1", + "iso-8859-2", + ] + + def __init__(self, markup, override_encodings=[], + smart_quotes_to=None, is_html=False, exclude_encodings=[]): + self.smart_quotes_to = smart_quotes_to + self.tried_encodings = [] + self.contains_replacement_characters = False + self.is_html = is_html + self.log = logging.getLogger(__name__) + self.detector = EncodingDetector( + markup, override_encodings, is_html, exclude_encodings) + + # Short-circuit if the data is in Unicode to begin with. + if isinstance(markup, str) or markup == '': + self.markup = markup + self.unicode_markup = str(markup) + self.original_encoding = None + return + + # The encoding detector may have stripped a byte-order mark. + # Use the stripped markup from this point on. + self.markup = self.detector.markup + + u = None + for encoding in self.detector.encodings: + markup = self.detector.markup + u = self._convert_from(encoding) + if u is not None: + break + + if not u: + # None of the encodings worked. As an absolute last resort, + # try them again with character replacement. + + for encoding in self.detector.encodings: + if encoding != "ascii": + u = self._convert_from(encoding, "replace") + if u is not None: + self.log.warning( + "Some characters could not be decoded, and were " + "replaced with REPLACEMENT CHARACTER." + ) + self.contains_replacement_characters = True + break + + # If none of that worked, we could at this point force it to + # ASCII, but that would destroy so much data that I think + # giving up is better. + self.unicode_markup = u + if not u: + self.original_encoding = None + + def _sub_ms_char(self, match): + """Changes a MS smart quote character to an XML or HTML + entity, or an ASCII character.""" + orig = match.group(1) + if self.smart_quotes_to == 'ascii': + sub = self.MS_CHARS_TO_ASCII.get(orig).encode() + else: + sub = self.MS_CHARS.get(orig) + if type(sub) == tuple: + if self.smart_quotes_to == 'xml': + sub = '&#x'.encode() + sub[1].encode() + ';'.encode() + else: + sub = '&'.encode() + sub[0].encode() + ';'.encode() + else: + sub = sub.encode() + return sub + + def _convert_from(self, proposed, errors="strict"): + proposed = self.find_codec(proposed) + if not proposed or (proposed, errors) in self.tried_encodings: + return None + self.tried_encodings.append((proposed, errors)) + markup = self.markup + # Convert smart quotes to HTML if coming from an encoding + # that might have them. + if (self.smart_quotes_to is not None + and proposed in self.ENCODINGS_WITH_SMART_QUOTES): + smart_quotes_re = b"([\x80-\x9f])" + smart_quotes_compiled = re.compile(smart_quotes_re) + markup = smart_quotes_compiled.sub(self._sub_ms_char, markup) + + try: + #print "Trying to convert document to %s (errors=%s)" % ( + # proposed, errors) + u = self._to_unicode(markup, proposed, errors) + self.markup = u + self.original_encoding = proposed + except Exception as e: + #print "That didn't work!" + #print e + return None + #print "Correct encoding: %s" % proposed + return self.markup + + def _to_unicode(self, data, encoding, errors="strict"): + '''Given a string and its encoding, decodes the string into Unicode. + %encoding is a string recognized by encodings.aliases''' + return str(data, encoding, errors) + + @property + def declared_html_encoding(self): + if not self.is_html: + return None + return self.detector.declared_encoding + + def find_codec(self, charset): + value = (self._codec(self.CHARSET_ALIASES.get(charset, charset)) + or (charset and self._codec(charset.replace("-", ""))) + or (charset and self._codec(charset.replace("-", "_"))) + or (charset and charset.lower()) + or charset + ) + if value: + return value.lower() + return None + + def _codec(self, charset): + if not charset: + return charset + codec = None + try: + codecs.lookup(charset) + codec = charset + except (LookupError, ValueError): + pass + return codec + + + # A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities. + MS_CHARS = {b'\x80': ('euro', '20AC'), + b'\x81': ' ', + b'\x82': ('sbquo', '201A'), + b'\x83': ('fnof', '192'), + b'\x84': ('bdquo', '201E'), + b'\x85': ('hellip', '2026'), + b'\x86': ('dagger', '2020'), + b'\x87': ('Dagger', '2021'), + b'\x88': ('circ', '2C6'), + b'\x89': ('permil', '2030'), + b'\x8A': ('Scaron', '160'), + b'\x8B': ('lsaquo', '2039'), + b'\x8C': ('OElig', '152'), + b'\x8D': '?', + b'\x8E': ('#x17D', '17D'), + b'\x8F': '?', + b'\x90': '?', + b'\x91': ('lsquo', '2018'), + b'\x92': ('rsquo', '2019'), + b'\x93': ('ldquo', '201C'), + b'\x94': ('rdquo', '201D'), + b'\x95': ('bull', '2022'), + b'\x96': ('ndash', '2013'), + b'\x97': ('mdash', '2014'), + b'\x98': ('tilde', '2DC'), + b'\x99': ('trade', '2122'), + b'\x9a': ('scaron', '161'), + b'\x9b': ('rsaquo', '203A'), + b'\x9c': ('oelig', '153'), + b'\x9d': '?', + b'\x9e': ('#x17E', '17E'), + b'\x9f': ('Yuml', ''),} + + # A parochial partial mapping of ISO-Latin-1 to ASCII. Contains + # horrors like stripping diacritical marks to turn á into a, but also + # contains non-horrors like turning “ into ". + MS_CHARS_TO_ASCII = { + b'\x80' : 'EUR', + b'\x81' : ' ', + b'\x82' : ',', + b'\x83' : 'f', + b'\x84' : ',,', + b'\x85' : '...', + b'\x86' : '+', + b'\x87' : '++', + b'\x88' : '^', + b'\x89' : '%', + b'\x8a' : 'S', + b'\x8b' : '<', + b'\x8c' : 'OE', + b'\x8d' : '?', + b'\x8e' : 'Z', + b'\x8f' : '?', + b'\x90' : '?', + b'\x91' : "'", + b'\x92' : "'", + b'\x93' : '"', + b'\x94' : '"', + b'\x95' : '*', + b'\x96' : '-', + b'\x97' : '--', + b'\x98' : '~', + b'\x99' : '(TM)', + b'\x9a' : 's', + b'\x9b' : '>', + b'\x9c' : 'oe', + b'\x9d' : '?', + b'\x9e' : 'z', + b'\x9f' : 'Y', + b'\xa0' : ' ', + b'\xa1' : '!', + b'\xa2' : 'c', + b'\xa3' : 'GBP', + b'\xa4' : '$', #This approximation is especially parochial--this is the + #generic currency symbol. + b'\xa5' : 'YEN', + b'\xa6' : '|', + b'\xa7' : 'S', + b'\xa8' : '..', + b'\xa9' : '', + b'\xaa' : '(th)', + b'\xab' : '<<', + b'\xac' : '!', + b'\xad' : ' ', + b'\xae' : '(R)', + b'\xaf' : '-', + b'\xb0' : 'o', + b'\xb1' : '+-', + b'\xb2' : '2', + b'\xb3' : '3', + b'\xb4' : ("'", 'acute'), + b'\xb5' : 'u', + b'\xb6' : 'P', + b'\xb7' : '*', + b'\xb8' : ',', + b'\xb9' : '1', + b'\xba' : '(th)', + b'\xbb' : '>>', + b'\xbc' : '1/4', + b'\xbd' : '1/2', + b'\xbe' : '3/4', + b'\xbf' : '?', + b'\xc0' : 'A', + b'\xc1' : 'A', + b'\xc2' : 'A', + b'\xc3' : 'A', + b'\xc4' : 'A', + b'\xc5' : 'A', + b'\xc6' : 'AE', + b'\xc7' : 'C', + b'\xc8' : 'E', + b'\xc9' : 'E', + b'\xca' : 'E', + b'\xcb' : 'E', + b'\xcc' : 'I', + b'\xcd' : 'I', + b'\xce' : 'I', + b'\xcf' : 'I', + b'\xd0' : 'D', + b'\xd1' : 'N', + b'\xd2' : 'O', + b'\xd3' : 'O', + b'\xd4' : 'O', + b'\xd5' : 'O', + b'\xd6' : 'O', + b'\xd7' : '*', + b'\xd8' : 'O', + b'\xd9' : 'U', + b'\xda' : 'U', + b'\xdb' : 'U', + b'\xdc' : 'U', + b'\xdd' : 'Y', + b'\xde' : 'b', + b'\xdf' : 'B', + b'\xe0' : 'a', + b'\xe1' : 'a', + b'\xe2' : 'a', + b'\xe3' : 'a', + b'\xe4' : 'a', + b'\xe5' : 'a', + b'\xe6' : 'ae', + b'\xe7' : 'c', + b'\xe8' : 'e', + b'\xe9' : 'e', + b'\xea' : 'e', + b'\xeb' : 'e', + b'\xec' : 'i', + b'\xed' : 'i', + b'\xee' : 'i', + b'\xef' : 'i', + b'\xf0' : 'o', + b'\xf1' : 'n', + b'\xf2' : 'o', + b'\xf3' : 'o', + b'\xf4' : 'o', + b'\xf5' : 'o', + b'\xf6' : 'o', + b'\xf7' : '/', + b'\xf8' : 'o', + b'\xf9' : 'u', + b'\xfa' : 'u', + b'\xfb' : 'u', + b'\xfc' : 'u', + b'\xfd' : 'y', + b'\xfe' : 'b', + b'\xff' : 'y', + } + + # A map used when removing rogue Windows-1252/ISO-8859-1 + # characters in otherwise UTF-8 documents. + # + # Note that \x81, \x8d, \x8f, \x90, and \x9d are undefined in + # Windows-1252. + WINDOWS_1252_TO_UTF8 = { + 0x80 : b'\xe2\x82\xac', # € + 0x82 : b'\xe2\x80\x9a', # ‚ + 0x83 : b'\xc6\x92', # ƒ + 0x84 : b'\xe2\x80\x9e', # „ + 0x85 : b'\xe2\x80\xa6', # … + 0x86 : b'\xe2\x80\xa0', # † + 0x87 : b'\xe2\x80\xa1', # ‡ + 0x88 : b'\xcb\x86', # ˆ + 0x89 : b'\xe2\x80\xb0', # ‰ + 0x8a : b'\xc5\xa0', # Š + 0x8b : b'\xe2\x80\xb9', # ‹ + 0x8c : b'\xc5\x92', # Œ + 0x8e : b'\xc5\xbd', # Ž + 0x91 : b'\xe2\x80\x98', # ‘ + 0x92 : b'\xe2\x80\x99', # ’ + 0x93 : b'\xe2\x80\x9c', # “ + 0x94 : b'\xe2\x80\x9d', # ” + 0x95 : b'\xe2\x80\xa2', # • + 0x96 : b'\xe2\x80\x93', # – + 0x97 : b'\xe2\x80\x94', # — + 0x98 : b'\xcb\x9c', # ˜ + 0x99 : b'\xe2\x84\xa2', # ™ + 0x9a : b'\xc5\xa1', # š + 0x9b : b'\xe2\x80\xba', # › + 0x9c : b'\xc5\x93', # œ + 0x9e : b'\xc5\xbe', # ž + 0x9f : b'\xc5\xb8', # Ÿ + 0xa0 : b'\xc2\xa0', #   + 0xa1 : b'\xc2\xa1', # ¡ + 0xa2 : b'\xc2\xa2', # ¢ + 0xa3 : b'\xc2\xa3', # £ + 0xa4 : b'\xc2\xa4', # ¤ + 0xa5 : b'\xc2\xa5', # ¥ + 0xa6 : b'\xc2\xa6', # ¦ + 0xa7 : b'\xc2\xa7', # § + 0xa8 : b'\xc2\xa8', # ¨ + 0xa9 : b'\xc2\xa9', # © + 0xaa : b'\xc2\xaa', # ª + 0xab : b'\xc2\xab', # « + 0xac : b'\xc2\xac', # ¬ + 0xad : b'\xc2\xad', # ­ + 0xae : b'\xc2\xae', # ® + 0xaf : b'\xc2\xaf', # ¯ + 0xb0 : b'\xc2\xb0', # ° + 0xb1 : b'\xc2\xb1', # ± + 0xb2 : b'\xc2\xb2', # ² + 0xb3 : b'\xc2\xb3', # ³ + 0xb4 : b'\xc2\xb4', # ´ + 0xb5 : b'\xc2\xb5', # µ + 0xb6 : b'\xc2\xb6', # ¶ + 0xb7 : b'\xc2\xb7', # · + 0xb8 : b'\xc2\xb8', # ¸ + 0xb9 : b'\xc2\xb9', # ¹ + 0xba : b'\xc2\xba', # º + 0xbb : b'\xc2\xbb', # » + 0xbc : b'\xc2\xbc', # ¼ + 0xbd : b'\xc2\xbd', # ½ + 0xbe : b'\xc2\xbe', # ¾ + 0xbf : b'\xc2\xbf', # ¿ + 0xc0 : b'\xc3\x80', # À + 0xc1 : b'\xc3\x81', # Á + 0xc2 : b'\xc3\x82', #  + 0xc3 : b'\xc3\x83', # à + 0xc4 : b'\xc3\x84', # Ä + 0xc5 : b'\xc3\x85', # Å + 0xc6 : b'\xc3\x86', # Æ + 0xc7 : b'\xc3\x87', # Ç + 0xc8 : b'\xc3\x88', # È + 0xc9 : b'\xc3\x89', # É + 0xca : b'\xc3\x8a', # Ê + 0xcb : b'\xc3\x8b', # Ë + 0xcc : b'\xc3\x8c', # Ì + 0xcd : b'\xc3\x8d', # Í + 0xce : b'\xc3\x8e', # Î + 0xcf : b'\xc3\x8f', # Ï + 0xd0 : b'\xc3\x90', # Ð + 0xd1 : b'\xc3\x91', # Ñ + 0xd2 : b'\xc3\x92', # Ò + 0xd3 : b'\xc3\x93', # Ó + 0xd4 : b'\xc3\x94', # Ô + 0xd5 : b'\xc3\x95', # Õ + 0xd6 : b'\xc3\x96', # Ö + 0xd7 : b'\xc3\x97', # × + 0xd8 : b'\xc3\x98', # Ø + 0xd9 : b'\xc3\x99', # Ù + 0xda : b'\xc3\x9a', # Ú + 0xdb : b'\xc3\x9b', # Û + 0xdc : b'\xc3\x9c', # Ü + 0xdd : b'\xc3\x9d', # Ý + 0xde : b'\xc3\x9e', # Þ + 0xdf : b'\xc3\x9f', # ß + 0xe0 : b'\xc3\xa0', # à + 0xe1 : b'\xa1', # á + 0xe2 : b'\xc3\xa2', # â + 0xe3 : b'\xc3\xa3', # ã + 0xe4 : b'\xc3\xa4', # ä + 0xe5 : b'\xc3\xa5', # å + 0xe6 : b'\xc3\xa6', # æ + 0xe7 : b'\xc3\xa7', # ç + 0xe8 : b'\xc3\xa8', # è + 0xe9 : b'\xc3\xa9', # é + 0xea : b'\xc3\xaa', # ê + 0xeb : b'\xc3\xab', # ë + 0xec : b'\xc3\xac', # ì + 0xed : b'\xc3\xad', # í + 0xee : b'\xc3\xae', # î + 0xef : b'\xc3\xaf', # ï + 0xf0 : b'\xc3\xb0', # ð + 0xf1 : b'\xc3\xb1', # ñ + 0xf2 : b'\xc3\xb2', # ò + 0xf3 : b'\xc3\xb3', # ó + 0xf4 : b'\xc3\xb4', # ô + 0xf5 : b'\xc3\xb5', # õ + 0xf6 : b'\xc3\xb6', # ö + 0xf7 : b'\xc3\xb7', # ÷ + 0xf8 : b'\xc3\xb8', # ø + 0xf9 : b'\xc3\xb9', # ù + 0xfa : b'\xc3\xba', # ú + 0xfb : b'\xc3\xbb', # û + 0xfc : b'\xc3\xbc', # ü + 0xfd : b'\xc3\xbd', # ý + 0xfe : b'\xc3\xbe', # þ + } + + MULTIBYTE_MARKERS_AND_SIZES = [ + (0xc2, 0xdf, 2), # 2-byte characters start with a byte C2-DF + (0xe0, 0xef, 3), # 3-byte characters start with E0-EF + (0xf0, 0xf4, 4), # 4-byte characters start with F0-F4 + ] + + FIRST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[0][0] + LAST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[-1][1] + + @classmethod + def detwingle(cls, in_bytes, main_encoding="utf8", + embedded_encoding="windows-1252"): + """Fix characters from one encoding embedded in some other encoding. + + Currently the only situation supported is Windows-1252 (or its + subset ISO-8859-1), embedded in UTF-8. + + The input must be a bytestring. If you've already converted + the document to Unicode, you're too late. + + The output is a bytestring in which `embedded_encoding` + characters have been converted to their `main_encoding` + equivalents. + """ + if embedded_encoding.replace('_', '-').lower() not in ( + 'windows-1252', 'windows_1252'): + raise NotImplementedError( + "Windows-1252 and ISO-8859-1 are the only currently supported " + "embedded encodings.") + + if main_encoding.lower() not in ('utf8', 'utf-8'): + raise NotImplementedError( + "UTF-8 is the only currently supported main encoding.") + + byte_chunks = [] + + chunk_start = 0 + pos = 0 + while pos < len(in_bytes): + byte = in_bytes[pos] + if not isinstance(byte, int): + # Python 2.x + byte = ord(byte) + if (byte >= cls.FIRST_MULTIBYTE_MARKER + and byte <= cls.LAST_MULTIBYTE_MARKER): + # This is the start of a UTF-8 multibyte character. Skip + # to the end. + for start, end, size in cls.MULTIBYTE_MARKERS_AND_SIZES: + if byte >= start and byte <= end: + pos += size + break + elif byte >= 0x80 and byte in cls.WINDOWS_1252_TO_UTF8: + # We found a Windows-1252 character! + # Save the string up to this point as a chunk. + byte_chunks.append(in_bytes[chunk_start:pos]) + + # Now translate the Windows-1252 character into UTF-8 + # and add it as another, one-byte chunk. + byte_chunks.append(cls.WINDOWS_1252_TO_UTF8[byte]) + pos += 1 + chunk_start = pos + else: + # Go on to the next character. + pos += 1 + if chunk_start == 0: + # The string is unchanged. + return in_bytes + else: + # Store the final chunk. + byte_chunks.append(in_bytes[chunk_start:]) + return b''.join(byte_chunks) + diff --git a/venv/Lib/site-packages/bs4/diagnose.py b/venv/Lib/site-packages/bs4/diagnose.py new file mode 100644 index 0000000..1254861 --- /dev/null +++ b/venv/Lib/site-packages/bs4/diagnose.py @@ -0,0 +1,219 @@ +"""Diagnostic functions, mainly for use when doing tech support.""" + +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +__license__ = "MIT" + +import cProfile +from io import StringIO +from html.parser import HTMLParser +import bs4 +from bs4 import BeautifulSoup, __version__ +from bs4.builder import builder_registry + +import os +import pstats +import random +import tempfile +import time +import traceback +import sys +import cProfile + +def diagnose(data): + """Diagnostic suite for isolating common problems.""" + print("Diagnostic running on Beautiful Soup %s" % __version__) + print("Python version %s" % sys.version) + + basic_parsers = ["html.parser", "html5lib", "lxml"] + for name in basic_parsers: + for builder in builder_registry.builders: + if name in builder.features: + break + else: + basic_parsers.remove(name) + print(( + "I noticed that %s is not installed. Installing it may help." % + name)) + + if 'lxml' in basic_parsers: + basic_parsers.append(["lxml", "xml"]) + try: + from lxml import etree + print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))) + except ImportError as e: + print ( + "lxml is not installed or couldn't be imported.") + + + if 'html5lib' in basic_parsers: + try: + import html5lib + print("Found html5lib version %s" % html5lib.__version__) + except ImportError as e: + print ( + "html5lib is not installed or couldn't be imported.") + + if hasattr(data, 'read'): + data = data.read() + elif os.path.exists(data): + print('"%s" looks like a filename. Reading data from the file.' % data) + with open(data) as fp: + data = fp.read() + elif data.startswith("http:") or data.startswith("https:"): + print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data) + print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.") + return + print() + + for parser in basic_parsers: + print("Trying to parse your markup with %s" % parser) + success = False + try: + soup = BeautifulSoup(data, parser) + success = True + except Exception as e: + print("%s could not parse the markup." % parser) + traceback.print_exc() + if success: + print("Here's what %s did with the markup:" % parser) + print(soup.prettify()) + + print("-" * 80) + +def lxml_trace(data, html=True, **kwargs): + """Print out the lxml events that occur during parsing. + + This lets you see how lxml parses a document when no Beautiful + Soup code is running. + """ + from lxml import etree + for event, element in etree.iterparse(StringIO(data), html=html, **kwargs): + print(("%s, %4s, %s" % (event, element.tag, element.text))) + +class AnnouncingParser(HTMLParser): + """Announces HTMLParser parse events, without doing anything else.""" + + def _p(self, s): + print(s) + + def handle_starttag(self, name, attrs): + self._p("%s START" % name) + + def handle_endtag(self, name): + self._p("%s END" % name) + + def handle_data(self, data): + self._p("%s DATA" % data) + + def handle_charref(self, name): + self._p("%s CHARREF" % name) + + def handle_entityref(self, name): + self._p("%s ENTITYREF" % name) + + def handle_comment(self, data): + self._p("%s COMMENT" % data) + + def handle_decl(self, data): + self._p("%s DECL" % data) + + def unknown_decl(self, data): + self._p("%s UNKNOWN-DECL" % data) + + def handle_pi(self, data): + self._p("%s PI" % data) + +def htmlparser_trace(data): + """Print out the HTMLParser events that occur during parsing. + + This lets you see how HTMLParser parses a document when no + Beautiful Soup code is running. + """ + parser = AnnouncingParser() + parser.feed(data) + +_vowels = "aeiou" +_consonants = "bcdfghjklmnpqrstvwxyz" + +def rword(length=5): + "Generate a random word-like string." + s = '' + for i in range(length): + if i % 2 == 0: + t = _consonants + else: + t = _vowels + s += random.choice(t) + return s + +def rsentence(length=4): + "Generate a random sentence-like string." + return " ".join(rword(random.randint(4,9)) for i in range(length)) + +def rdoc(num_elements=1000): + """Randomly generate an invalid HTML document.""" + tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table'] + elements = [] + for i in range(num_elements): + choice = random.randint(0,3) + if choice == 0: + # New tag. + tag_name = random.choice(tag_names) + elements.append("<%s>" % tag_name) + elif choice == 1: + elements.append(rsentence(random.randint(1,4))) + elif choice == 2: + # Close a tag. + tag_name = random.choice(tag_names) + elements.append("" % tag_name) + return "" + "\n".join(elements) + "" + +def benchmark_parsers(num_elements=100000): + """Very basic head-to-head performance benchmark.""" + print("Comparative parser benchmark on Beautiful Soup %s" % __version__) + data = rdoc(num_elements) + print("Generated a large invalid HTML document (%d bytes)." % len(data)) + + for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: + success = False + try: + a = time.time() + soup = BeautifulSoup(data, parser) + b = time.time() + success = True + except Exception as e: + print("%s could not parse the markup." % parser) + traceback.print_exc() + if success: + print("BS4+%s parsed the markup in %.2fs." % (parser, b-a)) + + from lxml import etree + a = time.time() + etree.HTML(data) + b = time.time() + print("Raw lxml parsed the markup in %.2fs." % (b-a)) + + import html5lib + parser = html5lib.HTMLParser() + a = time.time() + parser.parse(data) + b = time.time() + print("Raw html5lib parsed the markup in %.2fs." % (b-a)) + +def profile(num_elements=100000, parser="lxml"): + + filehandle = tempfile.NamedTemporaryFile() + filename = filehandle.name + + data = rdoc(num_elements) + vars = dict(bs4=bs4, data=data, parser=parser) + cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename) + + stats = pstats.Stats(filename) + # stats.strip_dirs() + stats.sort_stats("cumulative") + stats.print_stats('_html5lib|bs4', 50) + +if __name__ == '__main__': + diagnose(sys.stdin.read()) diff --git a/venv/Lib/site-packages/bs4/element.py b/venv/Lib/site-packages/bs4/element.py new file mode 100644 index 0000000..a4a750d --- /dev/null +++ b/venv/Lib/site-packages/bs4/element.py @@ -0,0 +1,1808 @@ +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +__license__ = "MIT" + +import collections +import re +import shlex +import sys +import warnings +from bs4.dammit import EntitySubstitution + +DEFAULT_OUTPUT_ENCODING = "utf-8" +PY3K = (sys.version_info[0] > 2) + +whitespace_re = re.compile("\s+") + +def _alias(attr): + """Alias one attribute name to another for backward compatibility""" + @property + def alias(self): + return getattr(self, attr) + + @alias.setter + def alias(self): + return setattr(self, attr) + return alias + + +class NamespacedAttribute(str): + + def __new__(cls, prefix, name, namespace=None): + if name is None: + obj = str.__new__(cls, prefix) + elif prefix is None: + # Not really namespaced. + obj = str.__new__(cls, name) + else: + obj = str.__new__(cls, prefix + ":" + name) + obj.prefix = prefix + obj.name = name + obj.namespace = namespace + return obj + +class AttributeValueWithCharsetSubstitution(str): + """A stand-in object for a character encoding specified in HTML.""" + +class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution): + """A generic stand-in for the value of a meta tag's 'charset' attribute. + + When Beautiful Soup parses the markup '', the + value of the 'charset' attribute will be one of these objects. + """ + + def __new__(cls, original_value): + obj = str.__new__(cls, original_value) + obj.original_value = original_value + return obj + + def encode(self, encoding): + return encoding + + +class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): + """A generic stand-in for the value of a meta tag's 'content' attribute. + + When Beautiful Soup parses the markup: + + + The value of the 'content' attribute will be one of these objects. + """ + + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + + def __new__(cls, original_value): + match = cls.CHARSET_RE.search(original_value) + if match is None: + # No substitution necessary. + return str.__new__(str, original_value) + + obj = str.__new__(cls, original_value) + obj.original_value = original_value + return obj + + def encode(self, encoding): + def rewrite(match): + return match.group(1) + encoding + return self.CHARSET_RE.sub(rewrite, self.original_value) + +class HTMLAwareEntitySubstitution(EntitySubstitution): + + """Entity substitution rules that are aware of some HTML quirks. + + Specifically, the contents of + +Hello, world! + + +''' + soup = self.soup(html) + self.assertEqual("text/javascript", soup.find('script')['type']) + + def test_comment(self): + # Comments are represented as Comment objects. + markup = "

foobaz

" + self.assertSoupEquals(markup) + + soup = self.soup(markup) + comment = soup.find(text="foobar") + self.assertEqual(comment.__class__, Comment) + + # The comment is properly integrated into the tree. + foo = soup.find(text="foo") + self.assertEqual(comment, foo.next_element) + baz = soup.find(text="baz") + self.assertEqual(comment, baz.previous_element) + + def test_preserved_whitespace_in_pre_and_textarea(self): + """Whitespace must be preserved in
 and "
+        self.assertSoupEquals(pre_markup)
+        self.assertSoupEquals(textarea_markup)
+
+        soup = self.soup(pre_markup)
+        self.assertEqual(soup.pre.prettify(), pre_markup)
+
+        soup = self.soup(textarea_markup)
+        self.assertEqual(soup.textarea.prettify(), textarea_markup)
+
+        soup = self.soup("")
+        self.assertEqual(soup.textarea.prettify(), "")
+
+    def test_nested_inline_elements(self):
+        """Inline elements can be nested indefinitely."""
+        b_tag = "Inside a B tag"
+        self.assertSoupEquals(b_tag)
+
+        nested_b_tag = "

A nested tag

" + self.assertSoupEquals(nested_b_tag) + + double_nested_b_tag = "

A doubly nested tag

" + self.assertSoupEquals(nested_b_tag) + + def test_nested_block_level_elements(self): + """Block elements can be nested.""" + soup = self.soup('

Foo

') + blockquote = soup.blockquote + self.assertEqual(blockquote.p.b.string, 'Foo') + self.assertEqual(blockquote.b.string, 'Foo') + + def test_correctly_nested_tables(self): + """One table can go inside another one.""" + markup = ('' + '' + "') + + self.assertSoupEquals( + markup, + '
Here's another table:" + '' + '' + '
foo
Here\'s another table:' + '
foo
' + '
') + + self.assertSoupEquals( + "" + "" + "
Foo
Bar
Baz
") + + def test_deeply_nested_multivalued_attribute(self): + # html5lib can set the attributes of the same tag many times + # as it rearranges the tree. This has caused problems with + # multivalued attributes. + markup = '
' + soup = self.soup(markup) + self.assertEqual(["css"], soup.div.div['class']) + + def test_multivalued_attribute_on_html(self): + # html5lib uses a different API to set the attributes ot the + # tag. This has caused problems with multivalued + # attributes. + markup = '' + soup = self.soup(markup) + self.assertEqual(["a", "b"], soup.html['class']) + + def test_angle_brackets_in_attribute_values_are_escaped(self): + self.assertSoupEquals('', '') + + def test_entities_in_attributes_converted_to_unicode(self): + expect = '

' + self.assertSoupEquals('

', expect) + self.assertSoupEquals('

', expect) + self.assertSoupEquals('

', expect) + self.assertSoupEquals('

', expect) + + def test_entities_in_text_converted_to_unicode(self): + expect = '

pi\N{LATIN SMALL LETTER N WITH TILDE}ata

' + self.assertSoupEquals("

piñata

", expect) + self.assertSoupEquals("

piñata

", expect) + self.assertSoupEquals("

piñata

", expect) + self.assertSoupEquals("

piñata

", expect) + + def test_quot_entity_converted_to_quotation_mark(self): + self.assertSoupEquals("

I said "good day!"

", + '

I said "good day!"

') + + def test_out_of_range_entity(self): + expect = "\N{REPLACEMENT CHARACTER}" + self.assertSoupEquals("�", expect) + self.assertSoupEquals("�", expect) + self.assertSoupEquals("�", expect) + + def test_multipart_strings(self): + "Mostly to prevent a recurrence of a bug in the html5lib treebuilder." + soup = self.soup("

\nfoo

") + self.assertEqual("p", soup.h2.string.next_element.name) + self.assertEqual("p", soup.p.name) + self.assertConnectedness(soup) + + def test_empty_element_tags(self): + """Verify consistent handling of empty-element tags, + no matter how they come in through the markup. + """ + self.assertSoupEquals('


', "


") + self.assertSoupEquals('


', "


") + + def test_head_tag_between_head_and_body(self): + "Prevent recurrence of a bug in the html5lib treebuilder." + content = """ + + foo + +""" + soup = self.soup(content) + self.assertNotEqual(None, soup.html.body) + self.assertConnectedness(soup) + + def test_multiple_copies_of_a_tag(self): + "Prevent recurrence of a bug in the html5lib treebuilder." + content = """ + + + + + +""" + soup = self.soup(content) + self.assertConnectedness(soup.article) + + def test_basic_namespaces(self): + """Parsers don't need to *understand* namespaces, but at the + very least they should not choke on namespaces or lose + data.""" + + markup = b'4' + soup = self.soup(markup) + self.assertEqual(markup, soup.encode()) + html = soup.html + self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns']) + self.assertEqual( + 'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml']) + self.assertEqual( + 'http://www.w3.org/2000/svg', soup.html['xmlns:svg']) + + def test_multivalued_attribute_value_becomes_list(self): + markup = b'' + soup = self.soup(markup) + self.assertEqual(['foo', 'bar'], soup.a['class']) + + # + # Generally speaking, tests below this point are more tests of + # Beautiful Soup than tests of the tree builders. But parsers are + # weird, so we run these tests separately for every tree builder + # to detect any differences between them. + # + + def test_can_parse_unicode_document(self): + # A seemingly innocuous document... but it's in Unicode! And + # it contains characters that can't be represented in the + # encoding found in the declaration! The horror! + markup = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' + soup = self.soup(markup) + self.assertEqual('Sacr\xe9 bleu!', soup.body.string) + + def test_soupstrainer(self): + """Parsers should be able to work with SoupStrainers.""" + strainer = SoupStrainer("b") + soup = self.soup("A bold statement", + parse_only=strainer) + self.assertEqual(soup.decode(), "bold") + + def test_single_quote_attribute_values_become_double_quotes(self): + self.assertSoupEquals("", + '') + + def test_attribute_values_with_nested_quotes_are_left_alone(self): + text = """a""" + self.assertSoupEquals(text) + + def test_attribute_values_with_double_nested_quotes_get_quoted(self): + text = """a""" + soup = self.soup(text) + soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"' + self.assertSoupEquals( + soup.foo.decode(), + """a""") + + def test_ampersand_in_attribute_value_gets_escaped(self): + self.assertSoupEquals('', + '') + + self.assertSoupEquals( + 'foo', + 'foo') + + def test_escaped_ampersand_in_attribute_value_is_left_alone(self): + self.assertSoupEquals('') + + def test_entities_in_strings_converted_during_parsing(self): + # Both XML and HTML entities are converted to Unicode characters + # during parsing. + text = "

<<sacré bleu!>>

" + expected = "

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

" + self.assertSoupEquals(text, expected) + + def test_smart_quotes_converted_on_the_way_in(self): + # Microsoft smart quotes are converted to Unicode characters during + # parsing. + quote = b"

\x91Foo\x92

" + soup = self.soup(quote) + self.assertEqual( + soup.p.string, + "\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") + + def test_non_breaking_spaces_converted_on_the_way_in(self): + soup = self.soup("  ") + self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2) + + def test_entities_converted_on_the_way_out(self): + text = "

<<sacré bleu!>>

" + expected = "

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

".encode("utf-8") + soup = self.soup(text) + self.assertEqual(soup.p.encode("utf-8"), expected) + + def test_real_iso_latin_document(self): + # Smoke test of interrelated functionality, using an + # easy-to-understand document. + + # Here it is in Unicode. Note that it claims to be in ISO-Latin-1. + unicode_html = '

Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!

' + + # That's because we're going to encode it into ISO-Latin-1, and use + # that to test. + iso_latin_html = unicode_html.encode("iso-8859-1") + + # Parse the ISO-Latin-1 HTML. + soup = self.soup(iso_latin_html) + # Encode it to UTF-8. + result = soup.encode("utf-8") + + # What do we expect the result to look like? Well, it would + # look like unicode_html, except that the META tag would say + # UTF-8 instead of ISO-Latin-1. + expected = unicode_html.replace("ISO-Latin-1", "utf-8") + + # And, of course, it would be in UTF-8, not Unicode. + expected = expected.encode("utf-8") + + # Ta-da! + self.assertEqual(result, expected) + + def test_real_shift_jis_document(self): + # Smoke test to make sure the parser can handle a document in + # Shift-JIS encoding, without choking. + shift_jis_html = ( + b'
'
+            b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
+            b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
+            b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
+            b'
') + unicode_html = shift_jis_html.decode("shift-jis") + soup = self.soup(unicode_html) + + # Make sure the parse tree is correctly encoded to various + # encodings. + self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8")) + self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp")) + + def test_real_hebrew_document(self): + # A real-world test to make sure we can convert ISO-8859-9 (a + # Hebrew encoding) to UTF-8. + hebrew_document = b'Hebrew (ISO 8859-8) in Visual Directionality

Hebrew (ISO 8859-8) in Visual Directionality

\xed\xe5\xec\xf9' + soup = self.soup( + hebrew_document, from_encoding="iso8859-8") + # Some tree builders call it iso8859-8, others call it iso-8859-9. + # That's not a difference we really care about. + assert soup.original_encoding in ('iso8859-8', 'iso-8859-8') + self.assertEqual( + soup.encode('utf-8'), + hebrew_document.decode("iso8859-8").encode("utf-8")) + + def test_meta_tag_reflects_current_encoding(self): + # Here's the tag saying that a document is + # encoded in Shift-JIS. + meta_tag = ('') + + # Here's a document incorporating that meta tag. + shift_jis_html = ( + '\n%s\n' + '' + 'Shift-JIS markup goes here.') % meta_tag + soup = self.soup(shift_jis_html) + + # Parse the document, and the charset is seemingly unaffected. + parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'}) + content = parsed_meta['content'] + self.assertEqual('text/html; charset=x-sjis', content) + + # But that value is actually a ContentMetaAttributeValue object. + self.assertTrue(isinstance(content, ContentMetaAttributeValue)) + + # And it will take on a value that reflects its current + # encoding. + self.assertEqual('text/html; charset=utf8', content.encode("utf8")) + + # For the rest of the story, see TestSubstitutions in + # test_tree.py. + + def test_html5_style_meta_tag_reflects_current_encoding(self): + # Here's the tag saying that a document is + # encoded in Shift-JIS. + meta_tag = ('') + + # Here's a document incorporating that meta tag. + shift_jis_html = ( + '\n%s\n' + '' + 'Shift-JIS markup goes here.') % meta_tag + soup = self.soup(shift_jis_html) + + # Parse the document, and the charset is seemingly unaffected. + parsed_meta = soup.find('meta', id="encoding") + charset = parsed_meta['charset'] + self.assertEqual('x-sjis', charset) + + # But that value is actually a CharsetMetaAttributeValue object. + self.assertTrue(isinstance(charset, CharsetMetaAttributeValue)) + + # And it will take on a value that reflects its current + # encoding. + self.assertEqual('utf8', charset.encode("utf8")) + + def test_tag_with_no_attributes_can_have_attributes_added(self): + data = self.soup("text") + data.a['foo'] = 'bar' + self.assertEqual('text', data.a.decode()) + +class XMLTreeBuilderSmokeTest(object): + + def test_pickle_and_unpickle_identity(self): + # Pickling a tree, then unpickling it, yields a tree identical + # to the original. + tree = self.soup("foo") + dumped = pickle.dumps(tree, 2) + loaded = pickle.loads(dumped) + self.assertEqual(loaded.__class__, BeautifulSoup) + self.assertEqual(loaded.decode(), tree.decode()) + + def test_docstring_generated(self): + soup = self.soup("") + self.assertEqual( + soup.encode(), b'\n') + + def test_xml_declaration(self): + markup = b"""\n""" + soup = self.soup(markup) + self.assertEqual(markup, soup.encode("utf8")) + + def test_processing_instruction(self): + markup = b"""\n""" + soup = self.soup(markup) + self.assertEqual(markup, soup.encode("utf8")) + + def test_real_xhtml_document(self): + """A real XHTML document should come out *exactly* the same as it went in.""" + markup = b""" + + +Hello. +Goodbye. +""" + soup = self.soup(markup) + self.assertEqual( + soup.encode("utf-8"), markup) + + def test_formatter_processes_script_tag_for_xml_documents(self): + doc = """ + +""" + soup = BeautifulSoup(doc, "lxml-xml") + # lxml would have stripped this while parsing, but we can add + # it later. + soup.script.string = 'console.log("< < hey > > ");' + encoded = soup.encode() + self.assertTrue(b"< < hey > >" in encoded) + + def test_can_parse_unicode_document(self): + markup = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' + soup = self.soup(markup) + self.assertEqual('Sacr\xe9 bleu!', soup.root.string) + + def test_popping_namespaced_tag(self): + markup = 'b2012-07-02T20:33:42Zcd' + soup = self.soup(markup) + self.assertEqual( + str(soup.rss), markup) + + def test_docstring_includes_correct_encoding(self): + soup = self.soup("") + self.assertEqual( + soup.encode("latin1"), + b'\n') + + def test_large_xml_document(self): + """A large XML document should come out the same as it went in.""" + markup = (b'\n' + + b'0' * (2**12) + + b'') + soup = self.soup(markup) + self.assertEqual(soup.encode("utf-8"), markup) + + + def test_tags_are_empty_element_if_and_only_if_they_are_empty(self): + self.assertSoupEquals("

", "

") + self.assertSoupEquals("

foo

") + + def test_namespaces_are_preserved(self): + markup = 'This tag is in the a namespaceThis tag is in the b namespace' + soup = self.soup(markup) + root = soup.root + self.assertEqual("http://example.com/", root['xmlns:a']) + self.assertEqual("http://example.net/", root['xmlns:b']) + + def test_closing_namespaced_tag(self): + markup = '

20010504

' + soup = self.soup(markup) + self.assertEqual(str(soup.p), markup) + + def test_namespaced_attributes(self): + markup = '' + soup = self.soup(markup) + self.assertEqual(str(soup.foo), markup) + + def test_namespaced_attributes_xml_namespace(self): + markup = 'bar' + soup = self.soup(markup) + self.assertEqual(str(soup.foo), markup) + + def test_find_by_prefixed_name(self): + doc = """ +foo + bar + baz + +""" + soup = self.soup(doc) + + # There are three tags. + self.assertEqual(3, len(soup.find_all('tag'))) + + # But two of them are ns1:tag and one of them is ns2:tag. + self.assertEqual(2, len(soup.find_all('ns1:tag'))) + self.assertEqual(1, len(soup.find_all('ns2:tag'))) + + self.assertEqual(1, len(soup.find_all('ns2:tag', key='value'))) + self.assertEqual(3, len(soup.find_all(['ns1:tag', 'ns2:tag']))) + + def test_copy_tag_preserves_namespace(self): + xml = """ +""" + + soup = self.soup(xml) + tag = soup.document + duplicate = copy.copy(tag) + + # The two tags have the same namespace prefix. + self.assertEqual(tag.prefix, duplicate.prefix) + + +class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): + """Smoke test for a tree builder that supports HTML5.""" + + def test_real_xhtml_document(self): + # Since XHTML is not HTML5, HTML5 parsers are not tested to handle + # XHTML documents in any particular way. + pass + + def test_html_tags_have_namespace(self): + markup = "" + soup = self.soup(markup) + self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace) + + def test_svg_tags_have_namespace(self): + markup = '' + soup = self.soup(markup) + namespace = "http://www.w3.org/2000/svg" + self.assertEqual(namespace, soup.svg.namespace) + self.assertEqual(namespace, soup.circle.namespace) + + + def test_mathml_tags_have_namespace(self): + markup = '5' + soup = self.soup(markup) + namespace = 'http://www.w3.org/1998/Math/MathML' + self.assertEqual(namespace, soup.math.namespace) + self.assertEqual(namespace, soup.msqrt.namespace) + + def test_xml_declaration_becomes_comment(self): + markup = '' + soup = self.soup(markup) + self.assertTrue(isinstance(soup.contents[0], Comment)) + self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?') + self.assertEqual("html", soup.contents[0].next_element.name) + +def skipIf(condition, reason): + def nothing(test, *args, **kwargs): + return None + + def decorator(test_item): + if condition: + return nothing + else: + return test_item + + return decorator diff --git a/venv/Lib/site-packages/bs4/tests/__init__.py b/venv/Lib/site-packages/bs4/tests/__init__.py new file mode 100644 index 0000000..142c8cc --- /dev/null +++ b/venv/Lib/site-packages/bs4/tests/__init__.py @@ -0,0 +1 @@ +"The beautifulsoup tests." diff --git a/venv/Lib/site-packages/bs4/tests/__pycache__/__init__.cpython-38.pyc b/venv/Lib/site-packages/bs4/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..0d1d2cb Binary files /dev/null and b/venv/Lib/site-packages/bs4/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/bs4/tests/__pycache__/test_builder_registry.cpython-38.pyc b/venv/Lib/site-packages/bs4/tests/__pycache__/test_builder_registry.cpython-38.pyc new file mode 100644 index 0000000..2444156 Binary files /dev/null and b/venv/Lib/site-packages/bs4/tests/__pycache__/test_builder_registry.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/bs4/tests/__pycache__/test_docs.cpython-38.pyc b/venv/Lib/site-packages/bs4/tests/__pycache__/test_docs.cpython-38.pyc new file mode 100644 index 0000000..969a0a4 Binary files /dev/null and b/venv/Lib/site-packages/bs4/tests/__pycache__/test_docs.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/bs4/tests/__pycache__/test_html5lib.cpython-38.pyc b/venv/Lib/site-packages/bs4/tests/__pycache__/test_html5lib.cpython-38.pyc new file mode 100644 index 0000000..c2de051 Binary files /dev/null and b/venv/Lib/site-packages/bs4/tests/__pycache__/test_html5lib.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/bs4/tests/__pycache__/test_htmlparser.cpython-38.pyc b/venv/Lib/site-packages/bs4/tests/__pycache__/test_htmlparser.cpython-38.pyc new file mode 100644 index 0000000..9408d7b Binary files /dev/null and b/venv/Lib/site-packages/bs4/tests/__pycache__/test_htmlparser.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/bs4/tests/__pycache__/test_lxml.cpython-38.pyc b/venv/Lib/site-packages/bs4/tests/__pycache__/test_lxml.cpython-38.pyc new file mode 100644 index 0000000..d59ff2e Binary files /dev/null and b/venv/Lib/site-packages/bs4/tests/__pycache__/test_lxml.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/bs4/tests/__pycache__/test_soup.cpython-38.pyc b/venv/Lib/site-packages/bs4/tests/__pycache__/test_soup.cpython-38.pyc new file mode 100644 index 0000000..e9f5a0d Binary files /dev/null and b/venv/Lib/site-packages/bs4/tests/__pycache__/test_soup.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/bs4/tests/__pycache__/test_tree.cpython-38.pyc b/venv/Lib/site-packages/bs4/tests/__pycache__/test_tree.cpython-38.pyc new file mode 100644 index 0000000..c478a4d Binary files /dev/null and b/venv/Lib/site-packages/bs4/tests/__pycache__/test_tree.cpython-38.pyc differ diff --git a/venv/Lib/site-packages/bs4/tests/test_builder_registry.py b/venv/Lib/site-packages/bs4/tests/test_builder_registry.py new file mode 100644 index 0000000..90cad82 --- /dev/null +++ b/venv/Lib/site-packages/bs4/tests/test_builder_registry.py @@ -0,0 +1,147 @@ +"""Tests of the builder registry.""" + +import unittest +import warnings + +from bs4 import BeautifulSoup +from bs4.builder import ( + builder_registry as registry, + HTMLParserTreeBuilder, + TreeBuilderRegistry, +) + +try: + from bs4.builder import HTML5TreeBuilder + HTML5LIB_PRESENT = True +except ImportError: + HTML5LIB_PRESENT = False + +try: + from bs4.builder import ( + LXMLTreeBuilderForXML, + LXMLTreeBuilder, + ) + LXML_PRESENT = True +except ImportError: + LXML_PRESENT = False + + +class BuiltInRegistryTest(unittest.TestCase): + """Test the built-in registry with the default builders registered.""" + + def test_combination(self): + if LXML_PRESENT: + self.assertEqual(registry.lookup('fast', 'html'), + LXMLTreeBuilder) + + if LXML_PRESENT: + self.assertEqual(registry.lookup('permissive', 'xml'), + LXMLTreeBuilderForXML) + self.assertEqual(registry.lookup('strict', 'html'), + HTMLParserTreeBuilder) + if HTML5LIB_PRESENT: + self.assertEqual(registry.lookup('html5lib', 'html'), + HTML5TreeBuilder) + + def test_lookup_by_markup_type(self): + if LXML_PRESENT: + self.assertEqual(registry.lookup('html'), LXMLTreeBuilder) + self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML) + else: + self.assertEqual(registry.lookup('xml'), None) + if HTML5LIB_PRESENT: + self.assertEqual(registry.lookup('html'), HTML5TreeBuilder) + else: + self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder) + + def test_named_library(self): + if LXML_PRESENT: + self.assertEqual(registry.lookup('lxml', 'xml'), + LXMLTreeBuilderForXML) + self.assertEqual(registry.lookup('lxml', 'html'), + LXMLTreeBuilder) + if HTML5LIB_PRESENT: + self.assertEqual(registry.lookup('html5lib'), + HTML5TreeBuilder) + + self.assertEqual(registry.lookup('html.parser'), + HTMLParserTreeBuilder) + + def test_beautifulsoup_constructor_does_lookup(self): + + with warnings.catch_warnings(record=True) as w: + # This will create a warning about not explicitly + # specifying a parser, but we'll ignore it. + + # You can pass in a string. + BeautifulSoup("", features="html") + # Or a list of strings. + BeautifulSoup("", features=["html", "fast"]) + + # You'll get an exception if BS can't find an appropriate + # builder. + self.assertRaises(ValueError, BeautifulSoup, + "", features="no-such-feature") + +class RegistryTest(unittest.TestCase): + """Test the TreeBuilderRegistry class in general.""" + + def setUp(self): + self.registry = TreeBuilderRegistry() + + def builder_for_features(self, *feature_list): + cls = type('Builder_' + '_'.join(feature_list), + (object,), {'features' : feature_list}) + + self.registry.register(cls) + return cls + + def test_register_with_no_features(self): + builder = self.builder_for_features() + + # Since the builder advertises no features, you can't find it + # by looking up features. + self.assertEqual(self.registry.lookup('foo'), None) + + # But you can find it by doing a lookup with no features, if + # this happens to be the only registered builder. + self.assertEqual(self.registry.lookup(), builder) + + def test_register_with_features_makes_lookup_succeed(self): + builder = self.builder_for_features('foo', 'bar') + self.assertEqual(self.registry.lookup('foo'), builder) + self.assertEqual(self.registry.lookup('bar'), builder) + + def test_lookup_fails_when_no_builder_implements_feature(self): + builder = self.builder_for_features('foo', 'bar') + self.assertEqual(self.registry.lookup('baz'), None) + + def test_lookup_gets_most_recent_registration_when_no_feature_specified(self): + builder1 = self.builder_for_features('foo') + builder2 = self.builder_for_features('bar') + self.assertEqual(self.registry.lookup(), builder2) + + def test_lookup_fails_when_no_tree_builders_registered(self): + self.assertEqual(self.registry.lookup(), None) + + def test_lookup_gets_most_recent_builder_supporting_all_features(self): + has_one = self.builder_for_features('foo') + has_the_other = self.builder_for_features('bar') + has_both_early = self.builder_for_features('foo', 'bar', 'baz') + has_both_late = self.builder_for_features('foo', 'bar', 'quux') + lacks_one = self.builder_for_features('bar') + has_the_other = self.builder_for_features('foo') + + # There are two builders featuring 'foo' and 'bar', but + # the one that also features 'quux' was registered later. + self.assertEqual(self.registry.lookup('foo', 'bar'), + has_both_late) + + # There is only one builder featuring 'foo', 'bar', and 'baz'. + self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'), + has_both_early) + + def test_lookup_fails_when_cannot_reconcile_requested_features(self): + builder1 = self.builder_for_features('foo', 'bar') + builder2 = self.builder_for_features('foo', 'baz') + self.assertEqual(self.registry.lookup('bar', 'baz'), None) diff --git a/venv/Lib/site-packages/bs4/tests/test_docs.py b/venv/Lib/site-packages/bs4/tests/test_docs.py new file mode 100644 index 0000000..5b9f677 --- /dev/null +++ b/venv/Lib/site-packages/bs4/tests/test_docs.py @@ -0,0 +1,36 @@ +"Test harness for doctests." + +# pylint: disable-msg=E0611,W0142 + +__metaclass__ = type +__all__ = [ + 'additional_tests', + ] + +import atexit +import doctest +import os +#from pkg_resources import ( +# resource_filename, resource_exists, resource_listdir, cleanup_resources) +import unittest + +DOCTEST_FLAGS = ( + doctest.ELLIPSIS | + doctest.NORMALIZE_WHITESPACE | + doctest.REPORT_NDIFF) + + +# def additional_tests(): +# "Run the doc tests (README.txt and docs/*, if any exist)" +# doctest_files = [ +# os.path.abspath(resource_filename('bs4', 'README.txt'))] +# if resource_exists('bs4', 'docs'): +# for name in resource_listdir('bs4', 'docs'): +# if name.endswith('.txt'): +# doctest_files.append( +# os.path.abspath( +# resource_filename('bs4', 'docs/%s' % name))) +# kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS) +# atexit.register(cleanup_resources) +# return unittest.TestSuite(( +# doctest.DocFileSuite(*doctest_files, **kwargs))) diff --git a/venv/Lib/site-packages/bs4/tests/test_html5lib.py b/venv/Lib/site-packages/bs4/tests/test_html5lib.py new file mode 100644 index 0000000..81fb7d3 --- /dev/null +++ b/venv/Lib/site-packages/bs4/tests/test_html5lib.py @@ -0,0 +1,130 @@ +"""Tests to ensure that the html5lib tree builder generates good trees.""" + +import warnings + +try: + from bs4.builder import HTML5TreeBuilder + HTML5LIB_PRESENT = True +except ImportError as e: + HTML5LIB_PRESENT = False +from bs4.element import SoupStrainer +from bs4.testing import ( + HTML5TreeBuilderSmokeTest, + SoupTest, + skipIf, +) + +@skipIf( + not HTML5LIB_PRESENT, + "html5lib seems not to be present, not testing its tree builder.") +class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): + """See ``HTML5TreeBuilderSmokeTest``.""" + + @property + def default_builder(self): + return HTML5TreeBuilder() + + def test_soupstrainer(self): + # The html5lib tree builder does not support SoupStrainers. + strainer = SoupStrainer("b") + markup = "

A bold statement.

" + with warnings.catch_warnings(record=True) as w: + soup = self.soup(markup, parse_only=strainer) + self.assertEqual( + soup.decode(), self.document_for(markup)) + + self.assertTrue( + "the html5lib tree builder doesn't support parse_only" in + str(w[0].message)) + + def test_correctly_nested_tables(self): + """html5lib inserts tags where other parsers don't.""" + markup = ('' + '' + "') + + self.assertSoupEquals( + markup, + '
Here's another table:" + '' + '' + '
foo
Here\'s another table:' + '
foo
' + '
') + + self.assertSoupEquals( + "" + "" + "
Foo
Bar
Baz
") + + def test_xml_declaration_followed_by_doctype(self): + markup = ''' + + + + + +

foo

+ +''' + soup = self.soup(markup) + # Verify that we can reach the

tag; this means the tree is connected. + self.assertEqual(b"

foo

", soup.p.encode()) + + def test_reparented_markup(self): + markup = '

foo

\n

bar

' + soup = self.soup(markup) + self.assertEqual("

foo

\n

bar

", soup.body.decode()) + self.assertEqual(2, len(soup.find_all('p'))) + + + def test_reparented_markup_ends_with_whitespace(self): + markup = '

foo

\n

bar

\n' + soup = self.soup(markup) + self.assertEqual("

foo

\n

bar

\n", soup.body.decode()) + self.assertEqual(2, len(soup.find_all('p'))) + + def test_reparented_markup_containing_identical_whitespace_nodes(self): + """Verify that we keep the two whitespace nodes in this + document distinct when reparenting the adjacent tags. + """ + markup = '
' + soup = self.soup(markup) + space1, space2 = soup.find_all(string=' ') + tbody1, tbody2 = soup.find_all('tbody') + assert space1.next_element is tbody1 + assert tbody2.next_element is space2 + + def test_reparented_markup_containing_children(self): + markup = '' + soup = self.soup(markup) + noscript = soup.noscript + self.assertEqual("target", noscript.next_element) + target = soup.find(string='target') + + # The 'aftermath' string was duplicated; we want the second one. + final_aftermath = soup.find_all(string='aftermath')[-1] + + # The