binux · kianmeng · Jan 20, 2022
diff --git a/docs/Deployment-demo.pyspider.org.md b/docs/Deployment-demo.pyspider.org.md
@@ -112,15 +112,15 @@ With the config, you can change the scale by `docker-compose scale phantomjs=2 p
 
 #### load balance
 
-phantomjs-lb, fetcher-lb, webui-lb are automaticlly configed haproxy, allow any number of upstreams.
+phantomjs-lb, fetcher-lb, webui-lb are automatically configured haproxy, allow any number of upstreams.
 
 #### phantomjs
 
 phantomjs have memory leak issue, memory limit applied, and it's recommended to restart it every hour.
 
 #### fetcher
 
-fetcher is implemented with aync IO, it supportes 100 concurrent connections. If the upstream queue are not choked, one fetcher should be enough.
+fetcher is implemented with aync IO, it supports 100 concurrent connections. If the upstream queue are not choked, one fetcher should be enough.
 
 #### processor
 

diff --git a/docs/Frequently-Asked-Questions.md b/docs/Frequently-Asked-Questions.md
@@ -56,4 +56,4 @@ You can have only have one scheduler, and multiple fetcher/processor/result_work
 
 For example, the number between scheduler and fetcher indicate the queue size of scheduler to fetchers, when it's hitting 100 (default maximum queue size), fetcher might crashed, or you should considered adding more fetchers.
 
-The number `0+0` below fetcher indicate the queue size of new tasks and status packs between processors and schduler. You can put your mouse over the numbers to see the tips.
+The number `0+0` below fetcher indicate the queue size of new tasks and status packs between processors and scheduler. You can put your mouse over the numbers to see the tips.
diff --git a/docs/Quickstart.md b/docs/Quickstart.md
@@ -18,7 +18,7 @@ to install binary packages first.
 
 please install PhantomJS if needed: http://phantomjs.org/build.html
 
-note that PhantomJS will be enabled only if it is excutable in the `PATH` or in the System Environment
+note that PhantomJS will be enabled only if it is executable in the `PATH` or in the System Environment
 
 **Note:** `pyspider` command is running pyspider in `all` mode, which running components in threads or subprocesses. For production environment, please refer to [Deployment](Deployment).
 

diff --git a/docs/tutorial/Render-with-PhantomJS.md b/docs/tutorial/Render-with-PhantomJS.md
@@ -3,7 +3,7 @@ Level 3: Render with PhantomJS
 
 Sometimes web page is too complex to find out the API request. It's time to meet the power of [PhantomJS].
 
-To use PhantomJS, you should have PhantomJS [installed](http://phantomjs.org/download.html). If you are running pyspider with `all` mode, PhantomJS is enabled if excutable in the `PATH`.
+To use PhantomJS, you should have PhantomJS [installed](http://phantomjs.org/download.html). If you are running pyspider with `all` mode, PhantomJS is enabled if executable in the `PATH`.
 
 Make sure phantomjs is working by running
 ```
@@ -43,7 +43,7 @@ Running JavaScript on Page
 
 We will try to scrape images from [http://www.pinterest.com/categories/popular/](http://www.pinterest.com/categories/popular/) in this section. Only 25 images is shown at the beginning, more images would be loaded when you scroll to the bottom of the page.
 
-To scrape images as many as posible we can use a [`js_script` parameter](/apis/self.crawl/#enable-javascript-fetcher-need-support-by-fetcher) to set some function wrapped JavaScript codes to simulate the scroll action: 
+To scrape images as many as possible we can use a [`js_script` parameter](/apis/self.crawl/#enable-javascript-fetcher-need-support-by-fetcher) to set some function wrapped JavaScript codes to simulate the scroll action: 
 
 ```
 class Handler(BaseHandler):

diff --git a/pyspider/database/__init__.py b/pyspider/database/__init__.py
@@ -185,7 +185,7 @@ def _connect_sqlalchemy(parsed, dbtype,url, other_scheme):
 
 
 def _connect_elasticsearch(parsed, dbtype):
-    # in python 2.6 url like "http://host/?query", query will not been splitted
+    # in python 2.6 url like "http://host/?query", query will not been split
     if parsed.path.startswith('/?'):
         index = parse_qs(parsed.path[2:])
     else:

diff --git a/pyspider/database/basedb.py b/pyspider/database/basedb.py
@@ -19,7 +19,7 @@ class BaseDB:
     '''
     BaseDB
 
-    dbcur should be overwirte
+    dbcur should be overwrite
     '''
     __tablename__ = None
     placeholder = '%s'

diff --git a/pyspider/fetcher/tornado_fetcher.py b/pyspider/fetcher/tornado_fetcher.py
@@ -265,7 +265,7 @@ def pack_tornado_request_parameters(self, url, task):
                 _t = track_headers.get('etag')
             if _t and 'If-None-Match' not in fetch['headers']:
                 fetch['headers']['If-None-Match'] = _t
-        # last modifed
+        # last modified
         if task_fetch.get('last_modified', task_fetch.get('last_modifed', True)):
             last_modified = task_fetch.get('last_modified', task_fetch.get('last_modifed', True))
             _t = None

diff --git a/pyspider/libs/base_handler.py b/pyspider/libs/base_handler.py
@@ -440,7 +440,7 @@ def _on_cronjob(self, response, task):
 
         # When triggered, a '_on_cronjob' task is sent from scheudler with 'tick' in
         # Response.save. Scheduler may at least send the trigger task every GCD of the
-        # inverval of the cronjobs. The method should check the tick for each cronjob
+        # interval of the cronjobs. The method should check the tick for each cronjob
         # function to confirm the execute interval.
         for cronjob in self._cron_jobs:
             if response.save['tick'] % cronjob.tick != 0:
@@ -449,7 +449,7 @@ def _on_cronjob(self, response, task):
             self._run_func(function, response, task)
 
     def _on_get_info(self, response, task):
-        """Sending runtime infomation about this script."""
+        """Sending runtime information about this script."""
         for each in response.save or []:
             if each == 'min_tick':
                 self.save[each] = self._min_tick

diff --git a/pyspider/libs/response.py b/pyspider/libs/response.py
@@ -156,7 +156,7 @@ def etree(self):
             except LookupError:
                 # lxml would raise LookupError when encoding not supported
                 # try fromstring without encoding instead.
-                # on windows, unicode is not availabe as encoding for lxml
+                # on windows, unicode is not available as encoding for lxml
                 self._elements = lxml.html.fromstring(self.content)
         if isinstance(self._elements, lxml.etree._ElementTree):
             self._elements = self._elements.getroot()

diff --git a/pyspider/message_queue/rabbitmq.py b/pyspider/message_queue/rabbitmq.py
@@ -68,11 +68,11 @@ def __init__(self, name, amqp_url='amqp://guest:guest@localhost:5672/%2F',
         amqp_url:   https://www.rabbitmq.com/uri-spec.html
         maxsize:    an integer that sets the upperbound limit on the number of
                     items that can be placed in the queue.
-        lazy_limit: as rabbitmq is shared between multipul instance, for a strict
+        lazy_limit: as rabbitmq is shared between multiple instance, for a strict
                     limit on the number of items in the queue. PikaQueue have to
                     update current queue size before every put operation. When
                     `lazy_limit` is enabled, PikaQueue will check queue size every
-                    max_size / 10 put operation for better performace.
+                    max_size / 10 put operation for better performance.
         """
         self.name = name
         self.amqp_url = amqp_url
@@ -201,11 +201,11 @@ def __init__(self, name, amqp_url='amqp://guest:guest@localhost:5672/%2F',
         amqp_url:   https://www.rabbitmq.com/uri-spec.html
         maxsize:    an integer that sets the upperbound limit on the number of
                     items that can be placed in the queue.
-        lazy_limit: as rabbitmq is shared between multipul instance, for a strict
+        lazy_limit: as rabbitmq is shared between multiple instance, for a strict
                     limit on the number of items in the queue. PikaQueue have to
                     update current queue size before every put operation. When
                     `lazy_limit` is enabled, PikaQueue will check queue size every
-                    max_size / 10 put operation for better performace.
+                    max_size / 10 put operation for better performance.
         """
         self.name = name
         self.amqp_url = amqp_url

diff --git a/pyspider/processor/processor.py b/pyspider/processor/processor.py
@@ -21,7 +21,7 @@
 
 
 class ProcessorResult(object):
-    """The result and logs producted by a callback"""
+    """The result and logs produced by a callback"""
 
     def __init__(self, result=None, follows=(), messages=(),
                  logs=(), exception=None, extinfo=None, save=None):
@@ -45,7 +45,7 @@ def logstr(self):
         """handler the log records to formatted string"""
 
         result = []
-        formater = LogFormatter(color=False)
+        formatter = LogFormatter(color=False)
         for record in self.logs:
             if isinstance(record, six.string_types):
                 result.append(pretty_unicode(record))
@@ -54,7 +54,7 @@ def logstr(self):
                     a, b, tb = record.exc_info
                     tb = hide_me(tb, globals())
                     record.exc_info = a, b, tb
-                result.append(pretty_unicode(formater.format(record)))
+                result.append(pretty_unicode(formatter.format(record)))
                 result.append(u'\n')
         return u''.join(result)
 

diff --git a/pyspider/result/result_worker.py b/pyspider/result/result_worker.py
@@ -38,7 +38,7 @@ def on_result(self, task, result):
                 result=result
             )
         else:
-            logger.warning('result UNKNOW -> %.30r' % result)
+            logger.warning('result UNKNOWN -> %.30r' % result)
             return
 
     def quit(self):
@@ -83,5 +83,5 @@ def on_result(self, task, result):
                 'updatetime': time.time()
             }))
         else:
-            logger.warning('result UNKNOW -> %.30r' % result)
+            logger.warning('result UNKNOWN -> %.30r' % result)
             return
diff --git a/pyspider/scheduler/scheduler.py b/pyspider/scheduler/scheduler.py
@@ -245,7 +245,7 @@ def _update_project(self, project):
                 },
             })
 
-        # load task queue when project is running and delete task_queue when project is stoped
+        # load task queue when project is running and delete task_queue when project is stopped
         if project.active:
             if not project.task_loaded:
                 self._load_tasks(project)
@@ -989,7 +989,7 @@ def on_task_failed(self, task):
 
     def on_select_task(self, task):
         '''Called when a task is selected to fetch & process'''
-        # inject informations about project
+        # inject information about project
         logger.info('select %(project)s:%(taskid)s %(url)s', task)
 
         project_info = self.projects.get(task['project'])

diff --git a/tests/test_message_queue.py b/tests/test_message_queue.py
@@ -73,7 +73,7 @@ def setUpClass(self):
             self.q3 = connect_message_queue('test_queue_for_threading_test')
 
 
-#@unittest.skipIf(six.PY3, 'pika not suport python 3')
+#@unittest.skipIf(six.PY3, 'pika not support python 3')
 @unittest.skipIf(os.environ.get('IGNORE_RABBITMQ') or os.environ.get('IGNORE_ALL'), 'no rabbitmq server for test.')
 class TestPikaRabbitMQ(TestMessageQueue, unittest.TestCase):