From d16a1cd3f075725b3e249c64291e69404acd8e63 Mon Sep 17 00:00:00 2001
From: Eyitayo Ogunbiyi <eyitayoogunbiyi@gmail.com>
Date: Wed, 24 Jun 2020 15:27:24 +0100
Subject: [PATCH 01/15] Initial refactor of howdoi.py into separate plugin

Co-authored-by: c.decal@campus.unimib.it <c.decal@campus.unimib.it>
---
 howdoi/howdoi.py             |  14 +-
 howdoi/plugins/BasePlugin.py | 301 +++++++++++++++++++++++++++++++++++
 howdoi/plugins/__init__.py   |   0
 3 files changed, 310 insertions(+), 5 deletions(-)
 create mode 100644 howdoi/plugins/BasePlugin.py
 create mode 100644 howdoi/plugins/__init__.py

diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py
index 235a52562..adf82d4bc 100755
--- a/howdoi/howdoi.py
+++ b/howdoi/howdoi.py
@@ -30,6 +30,8 @@
 from requests.exceptions import ConnectionError
 from requests.exceptions import SSLError
 
+from howdoi.plugins import BasePlugin
+
 # Handle imports for Python 2 and 3
 if sys.version < '3':
     import codecs
@@ -365,8 +367,8 @@ def _get_answers(args):
             answer = ANSWER_HEADER.format(link, answer, STAR_HEADER)
         answer += '\n'
         answers.append({
-            'answer': answer, 
-            'link': link, 
+            'answer': answer,
+            'link': link,
             'position': current_position
         })
 
@@ -393,13 +395,13 @@ def _format_answers(res, args):
         return json.dumps(res)
 
     formatted_answers = []
-    
+
     for answer in res:
         next_ans = answer["answer"]
         if args["link"]:  # if we only want links
             next_ans = answer["link"]
         formatted_answers.append(next_ans)
-    
+
     return build_splitter().join(formatted_answers)
 
 
@@ -443,7 +445,8 @@ def howdoi(raw_query):
         return _format_answers(res, args)
 
     try:
-        res = _get_answers(args)
+        plugin = BasePlugin.BasePlugin()
+        res = plugin.search()
         if not res:
             res = {"error": "Sorry, couldn\'t find any help with that topic\n"}
         cache.set(cache_key, res)
@@ -469,6 +472,7 @@ def get_parser():
                         action='store_true')
     parser.add_argument('-e', '--engine', help='change search engine for this query only (google, bing, duckduckgo)',
                         dest='search_engine', nargs="?", default='google')
+    parser.add_argument('--plugin', help='use the base plugin', type=str, default='stackoverflow')
     return parser
 
 
diff --git a/howdoi/plugins/BasePlugin.py b/howdoi/plugins/BasePlugin.py
new file mode 100644
index 000000000..de817f952
--- /dev/null
+++ b/howdoi/plugins/BasePlugin.py
@@ -0,0 +1,301 @@
+import os
+import re
+import sys
+
+import appdirs
+
+from cachelib import FileSystemCache, NullCache
+
+from pyquery import PyQuery as pq
+
+from pygments import highlight
+from pygments.formatters.terminal import TerminalFormatter
+from pygments.lexers import get_lexer_by_name, guess_lexer
+from pygments.util import ClassNotFound
+
+
+class BlockError(RuntimeError):
+    pass
+
+
+# Handle imports for Python 2 and 3
+if sys.version < '3':
+    import codecs
+    from urllib import quote as url_quote
+    from urllib import getproxies
+    from urlparse import urlparse, parse_qs
+
+    # Handling Unicode: http://stackoverflow.com/a/6633040/305414
+    def u(x):
+        return codecs.unicode_escape_decode(x)[0]
+else:
+    from urllib.request import getproxies
+    from urllib.parse import quote as url_quote, urlparse, parse_qs
+
+    def u(x):
+        return x
+
+# rudimentary standardized 3-level log output
+
+
+def _print_err(x):
+    print("[ERROR] " + x)
+
+
+_print_ok = print  # noqa: E305
+
+
+def _print_dbg(x):
+    print("[DEBUG] " + x)  # noqa: E302
+
+
+# CACHE_EMPTY_VAL = "NULL"
+CACHE_DIR = appdirs.user_cache_dir('howdoi')
+CACHE_ENTRY_MAX = 128
+
+if os.getenv('HOWDOI_DISABLE_CACHE'):
+    cache = NullCache()  # works like an always empty cache
+else:
+    cache = FileSystemCache(CACHE_DIR, CACHE_ENTRY_MAX, default_timeout=0)
+
+ANSWER_HEADER = u('{2}  Answer from {0} {2}\n{1}')
+STAR_HEADER = u('\u2605')
+CACHE_EMPTY_VAL = "NULL"
+NO_ANSWER_MSG = '< no answer given >'
+
+if os.getenv('HOWDOI_DISABLE_SSL'):  # Set http instead of https
+    SCHEME = 'http://'
+    VERIFY_SSL_CERTIFICATE = False
+else:
+    SCHEME = 'https://'
+    VERIFY_SSL_CERTIFICATE = True
+
+BLOCK_INDICATORS = (
+    'form id="captcha-form"',
+    'This page appears when Google automatically detects requests coming from your computer '
+    'network which appear to be in violation of the <a href="//www.google.com/policies/terms/">Terms of Service'
+)
+
+BLOCKED_QUESTION_FRAGMENTS = (
+    'webcache.googleusercontent.com',
+)
+
+URL = os.getenv('HOWDOI_URL') or 'stackoverflow.com'
+
+SEARCH_URLS = {
+    'bing': SCHEME + 'www.bing.com/search?q=site:{0}%20{1}&hl=en',
+    'google': SCHEME + 'www.google.com/search?q=site:{0}%20{1}&hl=en',
+    'duckduckgo': SCHEME + 'duckduckgo.com/?q=site:{0}%20{1}&t=hj&ia=web'
+}
+
+
+def _is_blocked(page):
+    for indicator in BLOCK_INDICATORS:
+        if page.find(indicator) != -1:
+            return True
+    return False
+
+
+class BasePlugin():
+    def search(self):
+        print("Hello search")
+        pass
+
+    def _add_links_to_text(self, element):
+        hyperlinks = element.find('a')
+
+        for hyperlink in hyperlinks:
+            pquery_object = pq(hyperlink)
+            href = hyperlink.attrib['href']
+            copy = pquery_object.text()
+            if (copy == href):
+                replacement = copy
+            else:
+                replacement = "[{0}]({1})".format(copy, href)
+            pquery_object.replace_with(replacement)
+
+    def get_link_at_pos(self, links, position):
+        if not links:
+            return False
+        if len(links) >= position:
+            link = links[position - 1]
+        else:
+            link = links[-1]
+        return link
+
+    def get_text(self, element):
+        ''' return inner text in pyquery element '''
+        self._add_links_to_text(element)
+        try:
+            return element.text(squash_space=False)
+        except TypeError:
+            return element.text()
+
+    def _get_search_url(self, search_engine):
+        return SEARCH_URLS.get(search_engine, SEARCH_URLS['google'])
+
+    def _get_links(self, query):
+        search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'google')
+        search_url = self._get_search_url(search_engine)
+
+        result = self._get_result(search_url.format(URL, url_quote(query)))
+        if _is_blocked(result):
+            _print_err('Unable to find an answer because the search engine temporarily blocked the request. '
+                       'Please wait a few minutes or select a different search engine.')
+            raise BlockError("Temporary block by search engine")
+
+        html = pq(result)
+        return self._extract_links(html, search_engine)
+
+    def _extract_links_from_bing(self, html):
+        html.remove_namespaces()
+        return [a.attrib['href'] for a in html('.b_algo')('h2')('a')]
+
+    def _extract_links_from_google(self, html):
+        return [a.attrib['href'] for a in html('.l')] or \
+            [a.attrib['href'] for a in html('.r')('a')]
+
+    def _extract_links_from_duckduckgo(self, html):
+        html.remove_namespaces()
+        links_anchors = html.find('a.result__a')
+        results = []
+        for anchor in links_anchors:
+            link = anchor.attrib['href']
+            url_obj = urlparse(link)
+            parsed_url = parse_qs(url_obj.query).get('uddg', '')
+            if parsed_url:
+                results.append(parsed_url[0])
+        return results
+
+    def _extract_links(self, html, search_engine):
+        if search_engine == 'bing':
+            return self._extract_links_from_bing(html)
+        if search_engine == 'duckduckgo':
+            return self._extract_links_from_duckduckgo(html)
+        return self._extract_links_from_google(html)
+
+    def format_output(self, code, args):
+        if not args['color']:
+            return code
+        lexer = None
+
+        # try to find a lexer using the StackOverflow tags
+        # or the query arguments
+        for keyword in args['query'].split() + args['tags']:
+            try:
+                lexer = get_lexer_by_name(keyword)
+                break
+            except ClassNotFound:
+                pass
+
+        # no lexer found above, use the guesser
+        if not lexer:
+            try:
+                lexer = guess_lexer(code)
+            except ClassNotFound:
+                return code
+
+        return highlight(code,
+                         lexer,
+                         TerminalFormatter(bg='dark'))
+
+    def _is_question(self, link):
+        for fragment in BLOCKED_QUESTION_FRAGMENTS:
+            if fragment in link:
+                return False
+        return re.search(r'questions/\d+/', link)
+
+    def _get_answer(self, args, links):
+        link = self.get_link_at_pos(links, args['pos'])
+        if not link:
+            return False
+
+        cache_key = link
+        page = cache.get(link)
+        if not page:
+            page = self._get_result(link + '?answertab=votes')
+            cache.set(cache_key, page)
+
+        html = pq(page)
+
+        first_answer = html('.answer').eq(0)
+
+        instructions = first_answer.find('pre') or first_answer.find('code')
+        args['tags'] = [t.text for t in html('.post-tag')]
+
+        if not instructions and not args['all']:
+            text = self.get_text(first_answer.find('.post-text').eq(0))
+        elif args['all']:
+            texts = []
+            for html_tag in first_answer.items('.post-text > *'):
+                current_text = self.get_text(html_tag)
+                if current_text:
+                    if html_tag[0].tag in ['pre', 'code']:
+                        texts.append(self._format_output(current_text, args))
+                    else:
+                        texts.append(current_text)
+            text = '\n'.join(texts)
+        else:
+            text = self._format_output(self.get_text(instructions.eq(0)), args)
+        if text is None:
+            text = NO_ANSWER_MSG
+        text = text.strip()
+        return text
+
+    def _get_questions(self, links):
+        return [link for link in links if self._is_question(link)]
+
+    def _get_links_with_cache(self, query):
+        cache_key = query + "-links"
+        res = cache.get(cache_key)
+        if res:
+            if res == CACHE_EMPTY_VAL:
+                res = False
+            return res
+
+        links = self._get_links(query)
+        if not links:
+            cache.set(cache_key, CACHE_EMPTY_VAL)
+
+        question_links = self._get_questions(links)
+        cache.set(cache_key, question_links or CACHE_EMPTY_VAL)
+
+        return question_links
+
+    def _get_answers(self, args):
+        """
+        @args: command-line arguments
+        returns: array of answers and their respective metadata
+                False if unable to get answers
+        """
+        question_links = self._get_links_with_cache(args['query'])
+        if not question_links:
+            return False
+
+        answers = []
+        initial_position = args['pos']
+        multiple_answers = (args['num_answers'] > 1 or args['all'])
+
+        for answer_number in range(args['num_answers']):
+            current_position = answer_number + initial_position
+            args['pos'] = current_position
+            link = self.get_link_at_pos(question_links, current_position)
+            answer = self._get_answer(args, question_links)
+            if not answer:
+                continue
+            if not args['link'] and not args['json_output'] and multiple_answers:
+                answer = ANSWER_HEADER.format(link, answer, STAR_HEADER)
+            answer += '\n'
+            answers.append({
+                'answer': answer,
+                'link': link,
+                'position': current_position
+            })
+
+        return answers
+
+    def extract(self):
+        print("Hello extract")
+        pass
+
+# Make StackOverflow plugin
diff --git a/howdoi/plugins/__init__.py b/howdoi/plugins/__init__.py
new file mode 100644
index 000000000..e69de29bb

From cf6f386810a9a581f8a500186b705fa60739f64c Mon Sep 17 00:00:00 2001
From: Cesare De Cal <hi@cesare.io>
Date: Wed, 24 Jun 2020 17:17:07 +0200
Subject: [PATCH 02/15] Deleted ununsed functions from howdoi.py, imported
 get_proxies in BasePlugin

---
 howdoi/howdoi.py             | 311 +----------------------------------
 howdoi/plugins/BasePlugin.py |  27 ++-
 2 files changed, 32 insertions(+), 306 deletions(-)

diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py
index adf82d4bc..b23c1ee42 100755
--- a/howdoi/howdoi.py
+++ b/howdoi/howdoi.py
@@ -14,41 +14,21 @@
 import argparse
 import os
 import appdirs
-import re
-from cachelib import FileSystemCache, NullCache
 import json
 import requests
 import sys
 from . import __version__
 
-from pygments import highlight
-from pygments.lexers import guess_lexer, get_lexer_by_name
-from pygments.formatters.terminal import TerminalFormatter
-from pygments.util import ClassNotFound
+from cachelib import FileSystemCache, NullCache
 
-from pyquery import PyQuery as pq
 from requests.exceptions import ConnectionError
 from requests.exceptions import SSLError
 
 from howdoi.plugins import BasePlugin
 
-# Handle imports for Python 2 and 3
-if sys.version < '3':
-    import codecs
-    from urllib import quote as url_quote
-    from urllib import getproxies
-    from urlparse import urlparse, parse_qs
-
-    # Handling Unicode: http://stackoverflow.com/a/6633040/305414
-    def u(x):
-        return codecs.unicode_escape_decode(x)[0]
-else:
-    from urllib.request import getproxies
-    from urllib.parse import quote as url_quote, urlparse, parse_qs
-
-    def u(x):
-        return x
-
+CACHE_EMPTY_VAL = "NULL"
+CACHE_DIR = appdirs.user_cache_dir('howdoi')
+CACHE_ENTRY_MAX = 128
 
 # rudimentary standardized 3-level log output
 def _print_err(x): print("[ERROR] " + x)
@@ -57,19 +37,8 @@ def _print_err(x): print("[ERROR] " + x)
 _print_ok = print  # noqa: E305
 def _print_dbg(x): print("[DEBUG] " + x)  # noqa: E302
 
-
-if os.getenv('HOWDOI_DISABLE_SSL'):  # Set http instead of https
-    SCHEME = 'http://'
-    VERIFY_SSL_CERTIFICATE = False
-else:
-    SCHEME = 'https://'
-    VERIFY_SSL_CERTIFICATE = True
-
-
 SUPPORTED_SEARCH_ENGINES = ('google', 'bing', 'duckduckgo')
 
-URL = os.getenv('HOWDOI_URL') or 'stackoverflow.com'
-
 USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0',
                'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100 101 Firefox/22.0',
                'Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0',
@@ -77,44 +46,17 @@ def _print_dbg(x): print("[DEBUG] " + x)  # noqa: E302
                 'Chrome/19.0.1084.46 Safari/536.5'),
                ('Mozilla/5.0 (Windows; Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.46'
                 'Safari/536.5'), )
-SEARCH_URLS = {
-    'bing': SCHEME + 'www.bing.com/search?q=site:{0}%20{1}&hl=en',
-    'google': SCHEME + 'www.google.com/search?q=site:{0}%20{1}&hl=en',
-    'duckduckgo': SCHEME + 'duckduckgo.com/?q=site:{0}%20{1}&t=hj&ia=web'
-}
-
-BLOCK_INDICATORS = (
-    'form id="captcha-form"',
-    'This page appears when Google automatically detects requests coming from your computer '
-    'network which appear to be in violation of the <a href="//www.google.com/policies/terms/">Terms of Service'
-)
-
-BLOCKED_QUESTION_FRAGMENTS = (
-    'webcache.googleusercontent.com',
-)
-
-STAR_HEADER = u('\u2605')
-ANSWER_HEADER = u('{2}  Answer from {0} {2}\n{1}')
-NO_ANSWER_MSG = '< no answer given >'
-
-CACHE_EMPTY_VAL = "NULL"
-CACHE_DIR = appdirs.user_cache_dir('howdoi')
-CACHE_ENTRY_MAX = 128
 
 SUPPORTED_HELP_QUERIES = ['use howdoi', 'howdoi', 'run howdoi',
                           'do howdoi', 'howdoi howdoi', 'howdoi use howdoi']
 
+howdoi_session = requests.session()
+
 if os.getenv('HOWDOI_DISABLE_CACHE'):
     cache = NullCache()  # works like an always empty cache
 else:
     cache = FileSystemCache(CACHE_DIR, CACHE_ENTRY_MAX, default_timeout=0)
 
-howdoi_session = requests.session()
-
-
-class BlockError(RuntimeError):
-    pass
-
 
 def _random_int(width):
     bres = os.urandom(width)
@@ -130,251 +72,10 @@ def _random_choice(seq):
     return seq[_random_int(1) % len(seq)]
 
 
-def get_proxies():
-    proxies = getproxies()
-    filtered_proxies = {}
-    for key, value in proxies.items():
-        if key.startswith('http'):
-            if not value.startswith('http'):
-                filtered_proxies[key] = 'http://%s' % value
-            else:
-                filtered_proxies[key] = value
-    return filtered_proxies
-
-
-def _get_result(url):
-    try:
-        return howdoi_session.get(url, headers={'User-Agent': _random_choice(USER_AGENTS)},
-                                  proxies=get_proxies(),
-                                  verify=VERIFY_SSL_CERTIFICATE).text
-    except requests.exceptions.SSLError as e:
-        _print_err('Encountered an SSL Error. Try using HTTP instead of '
-                   'HTTPS by setting the environment variable "HOWDOI_DISABLE_SSL".\n')
-        raise e
-
-
-def _add_links_to_text(element):
-    hyperlinks = element.find('a')
-
-    for hyperlink in hyperlinks:
-        pquery_object = pq(hyperlink)
-        href = hyperlink.attrib['href']
-        copy = pquery_object.text()
-        if (copy == href):
-            replacement = copy
-        else:
-            replacement = "[{0}]({1})".format(copy, href)
-        pquery_object.replace_with(replacement)
-
-
-def get_text(element):
-    ''' return inner text in pyquery element '''
-    _add_links_to_text(element)
-    try:
-        return element.text(squash_space=False)
-    except TypeError:
-        return element.text()
-
-
-def _extract_links_from_bing(html):
-    html.remove_namespaces()
-    return [a.attrib['href'] for a in html('.b_algo')('h2')('a')]
-
-
-def _extract_links_from_google(html):
-    return [a.attrib['href'] for a in html('.l')] or \
-        [a.attrib['href'] for a in html('.r')('a')]
-
-
-def _extract_links_from_duckduckgo(html):
-    html.remove_namespaces()
-    links_anchors = html.find('a.result__a')
-    results = []
-    for anchor in links_anchors:
-        link = anchor.attrib['href']
-        url_obj = urlparse(link)
-        parsed_url = parse_qs(url_obj.query).get('uddg', '')
-        if parsed_url:
-            results.append(parsed_url[0])
-    return results
-
-
-def _extract_links(html, search_engine):
-    if search_engine == 'bing':
-        return _extract_links_from_bing(html)
-    if search_engine == 'duckduckgo':
-        return _extract_links_from_duckduckgo(html)
-    return _extract_links_from_google(html)
-
-
-def _get_search_url(search_engine):
-    return SEARCH_URLS.get(search_engine, SEARCH_URLS['google'])
-
-
-def _is_blocked(page):
-    for indicator in BLOCK_INDICATORS:
-        if page.find(indicator) != -1:
-            return True
-
-    return False
-
-
-def _get_links(query):
-    search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'google')
-    search_url = _get_search_url(search_engine)
-
-    result = _get_result(search_url.format(URL, url_quote(query)))
-    if _is_blocked(result):
-        _print_err('Unable to find an answer because the search engine temporarily blocked the request. '
-                   'Please wait a few minutes or select a different search engine.')
-        raise BlockError("Temporary block by search engine")
-
-    html = pq(result)
-    return _extract_links(html, search_engine)
-
-
-def get_link_at_pos(links, position):
-    if not links:
-        return False
-
-    if len(links) >= position:
-        link = links[position - 1]
-    else:
-        link = links[-1]
-    return link
-
-
-def _format_output(code, args):
-    if not args['color']:
-        return code
-    lexer = None
-
-    # try to find a lexer using the StackOverflow tags
-    # or the query arguments
-    for keyword in args['query'].split() + args['tags']:
-        try:
-            lexer = get_lexer_by_name(keyword)
-            break
-        except ClassNotFound:
-            pass
-
-    # no lexer found above, use the guesser
-    if not lexer:
-        try:
-            lexer = guess_lexer(code)
-        except ClassNotFound:
-            return code
-
-    return highlight(code,
-                     lexer,
-                     TerminalFormatter(bg='dark'))
-
-
-def _is_question(link):
-    for fragment in BLOCKED_QUESTION_FRAGMENTS:
-        if fragment in link:
-            return False
-    return re.search(r'questions/\d+/', link)
-
-
-def _get_questions(links):
-    return [link for link in links if _is_question(link)]
-
-
-def _get_answer(args, links):
-    link = get_link_at_pos(links, args['pos'])
-    if not link:
-        return False
-
-    cache_key = link
-    page = cache.get(link)
-    if not page:
-        page = _get_result(link + '?answertab=votes')
-        cache.set(cache_key, page)
-
-    html = pq(page)
-
-    first_answer = html('.answer').eq(0)
-
-    instructions = first_answer.find('pre') or first_answer.find('code')
-    args['tags'] = [t.text for t in html('.post-tag')]
-
-    if not instructions and not args['all']:
-        text = get_text(first_answer.find('.post-text').eq(0))
-    elif args['all']:
-        texts = []
-        for html_tag in first_answer.items('.post-text > *'):
-            current_text = get_text(html_tag)
-            if current_text:
-                if html_tag[0].tag in ['pre', 'code']:
-                    texts.append(_format_output(current_text, args))
-                else:
-                    texts.append(current_text)
-        text = '\n'.join(texts)
-    else:
-        text = _format_output(get_text(instructions.eq(0)), args)
-    if text is None:
-        text = NO_ANSWER_MSG
-    text = text.strip()
-    return text
-
-
-def _get_links_with_cache(query):
-    cache_key = query + "-links"
-    res = cache.get(cache_key)
-    if res:
-        if res == CACHE_EMPTY_VAL:
-            res = False
-        return res
-
-    links = _get_links(query)
-    if not links:
-        cache.set(cache_key, CACHE_EMPTY_VAL)
-
-    question_links = _get_questions(links)
-    cache.set(cache_key, question_links or CACHE_EMPTY_VAL)
-
-    return question_links
-
-
 def build_splitter(splitter_character='=', splitter_length=80):
     return '\n' + splitter_character * splitter_length + '\n\n'
 
 
-def _get_answers(args):
-    """
-    @args: command-line arguments
-    returns: array of answers and their respective metadata
-             False if unable to get answers
-    """
-
-    question_links = _get_links_with_cache(args['query'])
-    if not question_links:
-        return False
-
-    answers = []
-    initial_position = args['pos']
-    multiple_answers = (args['num_answers'] > 1 or args['all'])
-
-    for answer_number in range(args['num_answers']):
-        current_position = answer_number + initial_position
-        args['pos'] = current_position
-        link = get_link_at_pos(question_links, current_position)
-        answer = _get_answer(args, question_links)
-        if not answer:
-            continue
-        if not args['link'] and not args['json_output'] and multiple_answers:
-            answer = ANSWER_HEADER.format(link, answer, STAR_HEADER)
-        answer += '\n'
-        answers.append({
-            'answer': answer,
-            'link': link,
-            'position': current_position
-        })
-
-    return answers
-
-
 def _clear_cache():
     global cache
     if not cache:
diff --git a/howdoi/plugins/BasePlugin.py b/howdoi/plugins/BasePlugin.py
index de817f952..c3e9ea1fe 100644
--- a/howdoi/plugins/BasePlugin.py
+++ b/howdoi/plugins/BasePlugin.py
@@ -49,7 +49,7 @@ def _print_dbg(x):
     print("[DEBUG] " + x)  # noqa: E302
 
 
-# CACHE_EMPTY_VAL = "NULL"
+CACHE_EMPTY_VAL = "NULL"
 CACHE_DIR = appdirs.user_cache_dir('howdoi')
 CACHE_ENTRY_MAX = 128
 
@@ -294,6 +294,31 @@ def _get_answers(self, args):
 
         return answers
 
+
+    def get_proxies():
+        proxies = getproxies()
+        filtered_proxies = {}
+        for key, value in proxies.items():
+            if key.startswith('http'):
+                if not value.startswith('http'):
+                    filtered_proxies[key] = 'http://%s' % value
+                else:
+                    filtered_proxies[key] = value
+        return filtered_proxies
+
+
+    def _get_result(self, url):
+        pass
+        # try:
+        #     return howdoi_session.get(url, headers={'User-Agent': _random_choice(USER_AGENTS)},
+        #                             proxies=get_proxies(),
+        #                             verify=VERIFY_SSL_CERTIFICATE).text
+        # except requests.exceptions.SSLError as e:
+        #     _print_err('Encountered an SSL Error. Try using HTTP instead of '
+        #                'HTTPS by setting the environment variable "HOWDOI_DISABLE_SSL".\n')
+        #     raise e
+
+
     def extract(self):
         print("Hello extract")
         pass

From c920b9edb2e58c3ccc229073e70d644be604a55e Mon Sep 17 00:00:00 2001
From: Cesare De Cal <hi@cesare.io>
Date: Wed, 24 Jun 2020 17:31:32 +0200
Subject: [PATCH 03/15] Created StackOverflowPlugin and moved StackOverflow
 specific methods from BasePlugin to it

---
 howdoi/plugins/BasePlugin.py          | 82 +--------------------------
 howdoi/plugins/StackOverflowPlugin.py | 82 +++++++++++++++++++++++++++
 2 files changed, 83 insertions(+), 81 deletions(-)
 create mode 100644 howdoi/plugins/StackOverflowPlugin.py

diff --git a/howdoi/plugins/BasePlugin.py b/howdoi/plugins/BasePlugin.py
index c3e9ea1fe..574c51e03 100644
--- a/howdoi/plugins/BasePlugin.py
+++ b/howdoi/plugins/BasePlugin.py
@@ -8,11 +8,6 @@
 
 from pyquery import PyQuery as pq
 
-from pygments import highlight
-from pygments.formatters.terminal import TerminalFormatter
-from pygments.lexers import get_lexer_by_name, guess_lexer
-from pygments.util import ClassNotFound
-
 
 class BlockError(RuntimeError):
     pass
@@ -76,10 +71,6 @@ def _print_dbg(x):
     'network which appear to be in violation of the <a href="//www.google.com/policies/terms/">Terms of Service'
 )
 
-BLOCKED_QUESTION_FRAGMENTS = (
-    'webcache.googleusercontent.com',
-)
-
 URL = os.getenv('HOWDOI_URL') or 'stackoverflow.com'
 
 SEARCH_URLS = {
@@ -174,76 +165,7 @@ def _extract_links(self, html, search_engine):
             return self._extract_links_from_duckduckgo(html)
         return self._extract_links_from_google(html)
 
-    def format_output(self, code, args):
-        if not args['color']:
-            return code
-        lexer = None
-
-        # try to find a lexer using the StackOverflow tags
-        # or the query arguments
-        for keyword in args['query'].split() + args['tags']:
-            try:
-                lexer = get_lexer_by_name(keyword)
-                break
-            except ClassNotFound:
-                pass
-
-        # no lexer found above, use the guesser
-        if not lexer:
-            try:
-                lexer = guess_lexer(code)
-            except ClassNotFound:
-                return code
-
-        return highlight(code,
-                         lexer,
-                         TerminalFormatter(bg='dark'))
-
-    def _is_question(self, link):
-        for fragment in BLOCKED_QUESTION_FRAGMENTS:
-            if fragment in link:
-                return False
-        return re.search(r'questions/\d+/', link)
-
-    def _get_answer(self, args, links):
-        link = self.get_link_at_pos(links, args['pos'])
-        if not link:
-            return False
-
-        cache_key = link
-        page = cache.get(link)
-        if not page:
-            page = self._get_result(link + '?answertab=votes')
-            cache.set(cache_key, page)
-
-        html = pq(page)
-
-        first_answer = html('.answer').eq(0)
-
-        instructions = first_answer.find('pre') or first_answer.find('code')
-        args['tags'] = [t.text for t in html('.post-tag')]
-
-        if not instructions and not args['all']:
-            text = self.get_text(first_answer.find('.post-text').eq(0))
-        elif args['all']:
-            texts = []
-            for html_tag in first_answer.items('.post-text > *'):
-                current_text = self.get_text(html_tag)
-                if current_text:
-                    if html_tag[0].tag in ['pre', 'code']:
-                        texts.append(self._format_output(current_text, args))
-                    else:
-                        texts.append(current_text)
-            text = '\n'.join(texts)
-        else:
-            text = self._format_output(self.get_text(instructions.eq(0)), args)
-        if text is None:
-            text = NO_ANSWER_MSG
-        text = text.strip()
-        return text
 
-    def _get_questions(self, links):
-        return [link for link in links if self._is_question(link)]
 
     def _get_links_with_cache(self, query):
         cache_key = query + "-links"
@@ -321,6 +243,4 @@ def _get_result(self, url):
 
     def extract(self):
         print("Hello extract")
-        pass
-
-# Make StackOverflow plugin
+        pass
\ No newline at end of file
diff --git a/howdoi/plugins/StackOverflowPlugin.py b/howdoi/plugins/StackOverflowPlugin.py
new file mode 100644
index 000000000..7c86a278d
--- /dev/null
+++ b/howdoi/plugins/StackOverflowPlugin.py
@@ -0,0 +1,82 @@
+from howdoi.plugins import BasePlugin
+
+from pygments import highlight
+from pygments.formatters.terminal import TerminalFormatter
+from pygments.lexers import get_lexer_by_name, guess_lexer
+from pygments.util import ClassNotFound
+
+BLOCKED_QUESTION_FRAGMENTS = (
+    'webcache.googleusercontent.com',
+)
+
+class StackOverflowPlugin(BasePlugin.BasePlugin):
+    def format_output(self, code, args):
+        if not args['color']:
+            return code
+        lexer = None
+
+        # try to find a lexer using the StackOverflow tags
+        # or the query arguments
+        for keyword in args['query'].split() + args['tags']:
+            try:
+                lexer = get_lexer_by_name(keyword)
+                break
+            except ClassNotFound:
+                pass
+
+        # no lexer found above, use the guesser
+        if not lexer:
+            try:
+                lexer = guess_lexer(code)
+            except ClassNotFound:
+                return code
+
+        return highlight(code,
+                         lexer,
+                         TerminalFormatter(bg='dark'))
+
+    def _is_question(self, link):
+        for fragment in BLOCKED_QUESTION_FRAGMENTS:
+            if fragment in link:
+                return False
+        return re.search(r'questions/\d+/', link)
+
+    def _get_answer(self, args, links):
+        link = self.get_link_at_pos(links, args['pos'])
+        if not link:
+            return False
+
+        cache_key = link
+        page = cache.get(link)
+        if not page:
+            page = self._get_result(link + '?answertab=votes')
+            cache.set(cache_key, page)
+
+        html = pq(page)
+
+        first_answer = html('.answer').eq(0)
+
+        instructions = first_answer.find('pre') or first_answer.find('code')
+        args['tags'] = [t.text for t in html('.post-tag')]
+
+        if not instructions and not args['all']:
+            text = self.get_text(first_answer.find('.post-text').eq(0))
+        elif args['all']:
+            texts = []
+            for html_tag in first_answer.items('.post-text > *'):
+                current_text = self.get_text(html_tag)
+                if current_text:
+                    if html_tag[0].tag in ['pre', 'code']:
+                        texts.append(self._format_output(current_text, args))
+                    else:
+                        texts.append(current_text)
+            text = '\n'.join(texts)
+        else:
+            text = self._format_output(self.get_text(instructions.eq(0)), args)
+        if text is None:
+            text = NO_ANSWER_MSG
+        text = text.strip()
+        return text
+
+    def _get_questions(self, links):
+        return [link for link in links if self._is_question(link)]

From 1574aa29a6bb71364a1b2bcd76d91c6b4a8006d9 Mon Sep 17 00:00:00 2001
From: Eyitayo Ogunbiyi <eyitayoogunbiyi@gmail.com>
Date: Wed, 24 Jun 2020 16:41:03 +0100
Subject: [PATCH 04/15] refactored plugin directory

---
 howdoi/plugins/__init__.py                               | 2 ++
 howdoi/plugins/{BasePlugin.py => base.py}                | 9 ++-------
 .../plugins/{StackOverflowPlugin.py => stackoverflow.py} | 0
 3 files changed, 4 insertions(+), 7 deletions(-)
 rename howdoi/plugins/{BasePlugin.py => base.py} (99%)
 rename howdoi/plugins/{StackOverflowPlugin.py => stackoverflow.py} (100%)

diff --git a/howdoi/plugins/__init__.py b/howdoi/plugins/__init__.py
index e69de29bb..678d85c40 100644
--- a/howdoi/plugins/__init__.py
+++ b/howdoi/plugins/__init__.py
@@ -0,0 +1,2 @@
+from howdoi.plugins.base import BasePlugin
+from howdoi.plugins.stackoverflow import StackOverflowPlugin
diff --git a/howdoi/plugins/BasePlugin.py b/howdoi/plugins/base.py
similarity index 99%
rename from howdoi/plugins/BasePlugin.py
rename to howdoi/plugins/base.py
index 574c51e03..91301b3da 100644
--- a/howdoi/plugins/BasePlugin.py
+++ b/howdoi/plugins/base.py
@@ -165,8 +165,6 @@ def _extract_links(self, html, search_engine):
             return self._extract_links_from_duckduckgo(html)
         return self._extract_links_from_google(html)
 
-
-
     def _get_links_with_cache(self, query):
         cache_key = query + "-links"
         res = cache.get(cache_key)
@@ -216,8 +214,7 @@ def _get_answers(self, args):
 
         return answers
 
-
-    def get_proxies():
+    def get_proxies(self):
         proxies = getproxies()
         filtered_proxies = {}
         for key, value in proxies.items():
@@ -228,7 +225,6 @@ def get_proxies():
                     filtered_proxies[key] = value
         return filtered_proxies
 
-
     def _get_result(self, url):
         pass
         # try:
@@ -240,7 +236,6 @@ def _get_result(self, url):
         #                'HTTPS by setting the environment variable "HOWDOI_DISABLE_SSL".\n')
         #     raise e
 
-
     def extract(self):
         print("Hello extract")
-        pass
\ No newline at end of file
+        pass
diff --git a/howdoi/plugins/StackOverflowPlugin.py b/howdoi/plugins/stackoverflow.py
similarity index 100%
rename from howdoi/plugins/StackOverflowPlugin.py
rename to howdoi/plugins/stackoverflow.py

From e2b57f2d08ee6d95783c3b9e4c1415c42fdc7117 Mon Sep 17 00:00:00 2001
From: Eyitayo Ogunbiyi <eyitayoogunbiyi@gmail.com>
Date: Wed, 24 Jun 2020 17:08:44 +0100
Subject: [PATCH 05/15] cleaning up the plugins to be modular

---
 howdoi/howdoi.py                |  10 ++-
 howdoi/plugins/base.py          |  82 +++----------------------
 howdoi/plugins/stackoverflow.py | 105 ++++++++++++++++++++++++++++++--
 3 files changed, 118 insertions(+), 79 deletions(-)

diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py
index b23c1ee42..32480014e 100755
--- a/howdoi/howdoi.py
+++ b/howdoi/howdoi.py
@@ -24,19 +24,22 @@
 from requests.exceptions import ConnectionError
 from requests.exceptions import SSLError
 
-from howdoi.plugins import BasePlugin
+from howdoi.plugins import StackOverflowPlugin
 
 CACHE_EMPTY_VAL = "NULL"
 CACHE_DIR = appdirs.user_cache_dir('howdoi')
 CACHE_ENTRY_MAX = 128
 
 # rudimentary standardized 3-level log output
+
+
 def _print_err(x): print("[ERROR] " + x)
 
 
 _print_ok = print  # noqa: E305
 def _print_dbg(x): print("[DEBUG] " + x)  # noqa: E302
 
+
 SUPPORTED_SEARCH_ENGINES = ('google', 'bing', 'duckduckgo')
 
 USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0',
@@ -146,8 +149,8 @@ def howdoi(raw_query):
         return _format_answers(res, args)
 
     try:
-        plugin = BasePlugin.BasePlugin()
-        res = plugin.search()
+        plugin = StackOverflowPlugin()
+        res = plugin.search(args)
         if not res:
             res = {"error": "Sorry, couldn\'t find any help with that topic\n"}
         cache.set(cache_key, res)
@@ -216,4 +219,5 @@ def command_line_runner():
 
 
 if __name__ == '__main__':
+    _clear_cache()
     command_line_runner()
diff --git a/howdoi/plugins/base.py b/howdoi/plugins/base.py
index 91301b3da..08886c027 100644
--- a/howdoi/plugins/base.py
+++ b/howdoi/plugins/base.py
@@ -80,17 +80,17 @@ def _print_dbg(x):
 }
 
 
-def _is_blocked(page):
-    for indicator in BLOCK_INDICATORS:
-        if page.find(indicator) != -1:
-            return True
-    return False
-
-
 class BasePlugin():
-    def search(self):
-        print("Hello search")
-        pass
+    def __init__(self, cache=None):
+        if cache is None:
+            cache = NullCache()
+        self.cache = cache
+
+    def _is_blocked(self, page):
+        for indicator in BLOCK_INDICATORS:
+            if page.find(indicator) != -1:
+                return True
+        return False
 
     def _add_links_to_text(self, element):
         hyperlinks = element.find('a')
@@ -125,19 +125,6 @@ def get_text(self, element):
     def _get_search_url(self, search_engine):
         return SEARCH_URLS.get(search_engine, SEARCH_URLS['google'])
 
-    def _get_links(self, query):
-        search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'google')
-        search_url = self._get_search_url(search_engine)
-
-        result = self._get_result(search_url.format(URL, url_quote(query)))
-        if _is_blocked(result):
-            _print_err('Unable to find an answer because the search engine temporarily blocked the request. '
-                       'Please wait a few minutes or select a different search engine.')
-            raise BlockError("Temporary block by search engine")
-
-        html = pq(result)
-        return self._extract_links(html, search_engine)
-
     def _extract_links_from_bing(self, html):
         html.remove_namespaces()
         return [a.attrib['href'] for a in html('.b_algo')('h2')('a')]
@@ -165,55 +152,6 @@ def _extract_links(self, html, search_engine):
             return self._extract_links_from_duckduckgo(html)
         return self._extract_links_from_google(html)
 
-    def _get_links_with_cache(self, query):
-        cache_key = query + "-links"
-        res = cache.get(cache_key)
-        if res:
-            if res == CACHE_EMPTY_VAL:
-                res = False
-            return res
-
-        links = self._get_links(query)
-        if not links:
-            cache.set(cache_key, CACHE_EMPTY_VAL)
-
-        question_links = self._get_questions(links)
-        cache.set(cache_key, question_links or CACHE_EMPTY_VAL)
-
-        return question_links
-
-    def _get_answers(self, args):
-        """
-        @args: command-line arguments
-        returns: array of answers and their respective metadata
-                False if unable to get answers
-        """
-        question_links = self._get_links_with_cache(args['query'])
-        if not question_links:
-            return False
-
-        answers = []
-        initial_position = args['pos']
-        multiple_answers = (args['num_answers'] > 1 or args['all'])
-
-        for answer_number in range(args['num_answers']):
-            current_position = answer_number + initial_position
-            args['pos'] = current_position
-            link = self.get_link_at_pos(question_links, current_position)
-            answer = self._get_answer(args, question_links)
-            if not answer:
-                continue
-            if not args['link'] and not args['json_output'] and multiple_answers:
-                answer = ANSWER_HEADER.format(link, answer, STAR_HEADER)
-            answer += '\n'
-            answers.append({
-                'answer': answer,
-                'link': link,
-                'position': current_position
-            })
-
-        return answers
-
     def get_proxies(self):
         proxies = getproxies()
         filtered_proxies = {}
diff --git a/howdoi/plugins/stackoverflow.py b/howdoi/plugins/stackoverflow.py
index 7c86a278d..9f421c13f 100644
--- a/howdoi/plugins/stackoverflow.py
+++ b/howdoi/plugins/stackoverflow.py
@@ -1,15 +1,44 @@
 from howdoi.plugins import BasePlugin
 
+import re
+import os
+import sys
+import appdirs
+
 from pygments import highlight
 from pygments.formatters.terminal import TerminalFormatter
 from pygments.lexers import get_lexer_by_name, guess_lexer
 from pygments.util import ClassNotFound
 
+if sys.version < '3':
+    import codecs
+    from urllib import quote as url_quote
+    from urllib import getproxies
+    from urlparse import urlparse, parse_qs
+
+    # Handling Unicode: http://stackoverflow.com/a/6633040/305414
+    def u(x):
+        return codecs.unicode_escape_decode(x)[0]
+else:
+    from urllib.request import getproxies
+    from urllib.parse import quote as url_quote, urlparse, parse_qs
+
+    def u(x):
+        return x
+
 BLOCKED_QUESTION_FRAGMENTS = (
-    'webcache.googleusercontent.com',
+    'webself.cache.googleusercontent.com',
 )
 
-class StackOverflowPlugin(BasePlugin.BasePlugin):
+URL = os.getenv('HOWDOI_URL') or 'stackoverflow.com'
+
+
+CACHE_EMPTY_VAL = "NULL"
+CACHE_DIR = appdirs.user_cache_dir('howdoi')
+CACHE_ENTRY_MAX = 128
+
+
+class StackOverflowPlugin(BasePlugin):
     def format_output(self, code, args):
         if not args['color']:
             return code
@@ -35,22 +64,90 @@ def format_output(self, code, args):
                          lexer,
                          TerminalFormatter(bg='dark'))
 
+    def search(self, args):
+        return self._get_answers(args)
+
+    def _get_answers(self, args):
+        """
+        @args: command-line arguments
+        returns: array of answers and their respective metadata
+                False if unable to get answers
+        """
+        question_links = self._get_links_with_cache(args['query'])
+        if not question_links:
+            return False
+
+        answers = []
+        initial_position = args['pos']
+        multiple_answers = (args['num_answers'] > 1 or args['all'])
+
+        for answer_number in range(args['num_answers']):
+            current_position = answer_number + initial_position
+            args['pos'] = current_position
+            link = self.get_link_at_pos(question_links, current_position)
+            answer = self._get_answer(args, question_links)
+            if not answer:
+                continue
+            if not args['link'] and not args['json_output'] and multiple_answers:
+                answer = ANSWER_HEADER.format(link, answer, STAR_HEADER)
+            answer += '\n'
+            answers.append({
+                'answer': answer,
+                'link': link,
+                'position': current_position
+            })
+
+        return answers
+
+    def _get_links(self, query):
+        search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'google')
+        search_url = self._get_search_url(search_engine)
+
+        result = self._get_result(search_url.format(URL, url_quote(query)))
+        if self._is_blocked(result):
+            _print_err('Unable to find an answer because the search engine temporarily blocked the request. '
+                       'Please wait a few minutes or select a different search engine.')
+            raise BlockError("Temporary block by search engine")
+
+        html = pq(result)
+        return self._extract_links(html, search_engine)
+
+    def _get_links_with_cache(self, query):
+        cache_key = query + "-links"
+        res = self.cache.get(cache_key)
+        if res:
+            if res == CACHE_EMPTY_VAL:
+                res = False
+            return res
+
+        links = self._get_links(query)
+        if not links:
+            self.cache.set(cache_key, CACHE_EMPTY_VAL)
+
+        question_links = self._get_questions(links)
+        self.cache.set(cache_key, question_links or CACHE_EMPTY_VAL)
+
+        return question_links
+
     def _is_question(self, link):
         for fragment in BLOCKED_QUESTION_FRAGMENTS:
             if fragment in link:
                 return False
         return re.search(r'questions/\d+/', link)
 
+    def _get_result(self, url):
+        return [{'answer': 'scala> val x = "scala is awesome"\nx: java.lang.String = scala is awesome\n\nscala> x.reverse\nres1: String = emosewa si alacs\n', 'link': 'https://stackoverflow.com/questions/7700399/scala-reverse-string', 'position': 1}]
+
     def _get_answer(self, args, links):
         link = self.get_link_at_pos(links, args['pos'])
         if not link:
             return False
 
         cache_key = link
-        page = cache.get(link)
+        page = self.cache.get(link)
         if not page:
             page = self._get_result(link + '?answertab=votes')
-            cache.set(cache_key, page)
+            self.cache.set(cache_key, page)
 
         html = pq(page)
 

From 383b252b70497cf38449902b4eebdccf058a0aa9 Mon Sep 17 00:00:00 2001
From: Eyitayo Ogunbiyi <eyitayoogunbiyi@gmail.com>
Date: Wed, 24 Jun 2020 17:25:00 +0100
Subject: [PATCH 06/15] restore functionality to initial state

---
 howdoi/plugins/base.py          | 13 -----
 howdoi/plugins/stackoverflow.py | 95 +++++++++++++++++++++++++++++++--
 2 files changed, 90 insertions(+), 18 deletions(-)

diff --git a/howdoi/plugins/base.py b/howdoi/plugins/base.py
index 08886c027..958acc248 100644
--- a/howdoi/plugins/base.py
+++ b/howdoi/plugins/base.py
@@ -9,8 +9,6 @@
 from pyquery import PyQuery as pq
 
 
-class BlockError(RuntimeError):
-    pass
 
 
 # Handle imports for Python 2 and 3
@@ -163,17 +161,6 @@ def get_proxies(self):
                     filtered_proxies[key] = value
         return filtered_proxies
 
-    def _get_result(self, url):
-        pass
-        # try:
-        #     return howdoi_session.get(url, headers={'User-Agent': _random_choice(USER_AGENTS)},
-        #                             proxies=get_proxies(),
-        #                             verify=VERIFY_SSL_CERTIFICATE).text
-        # except requests.exceptions.SSLError as e:
-        #     _print_err('Encountered an SSL Error. Try using HTTP instead of '
-        #                'HTTPS by setting the environment variable "HOWDOI_DISABLE_SSL".\n')
-        #     raise e
-
     def extract(self):
         print("Hello extract")
         pass
diff --git a/howdoi/plugins/stackoverflow.py b/howdoi/plugins/stackoverflow.py
index 9f421c13f..765a4ae54 100644
--- a/howdoi/plugins/stackoverflow.py
+++ b/howdoi/plugins/stackoverflow.py
@@ -1,14 +1,47 @@
-from howdoi.plugins import BasePlugin
-
-import re
 import os
+import re
 import sys
-import appdirs
 
+import appdirs
+import requests
 from pygments import highlight
 from pygments.formatters.terminal import TerminalFormatter
 from pygments.lexers import get_lexer_by_name, guess_lexer
 from pygments.util import ClassNotFound
+from pyquery import PyQuery as pq
+
+
+from howdoi.plugins import BasePlugin
+
+if os.getenv('HOWDOI_DISABLE_SSL'):  # Set http instead of https
+    SCHEME = 'http://'
+    VERIFY_SSL_CERTIFICATE = False
+else:
+    SCHEME = 'https://'
+    VERIFY_SSL_CERTIFICATE = True
+
+USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0',
+               'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100 101 Firefox/22.0',
+               'Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0',
+               ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.5 (KHTML, like Gecko) '
+                'Chrome/19.0.1084.46 Safari/536.5'),
+               ('Mozilla/5.0 (Windows; Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.46'
+                'Safari/536.5'), )
+
+
+def _random_int(width):
+    bres = os.urandom(width)
+    if sys.version < '3':
+        ires = int(bres.encode('hex'), 16)
+    else:
+        ires = int.from_bytes(bres, 'little')
+
+    return ires
+
+
+def _random_choice(seq):
+    return seq[_random_int(1) % len(seq)]
+
 
 if sys.version < '3':
     import codecs
@@ -36,6 +69,26 @@ def u(x):
 CACHE_EMPTY_VAL = "NULL"
 CACHE_DIR = appdirs.user_cache_dir('howdoi')
 CACHE_ENTRY_MAX = 128
+NO_ANSWER_MSG = '< no answer given >'
+ANSWER_HEADER = u('{2}  Answer from {0} {2}\n{1}')
+STAR_HEADER = u('\u2605')
+
+
+class BlockError(RuntimeError):
+    pass
+
+howdoi_session = requests.session()
+
+
+def _print_err(x):
+    print("[ERROR] " + x)
+
+
+_print_ok = print  # noqa: E305
+
+
+def _print_dbg(x):
+    print("[DEBUG] " + x)  # noqa: E302
 
 
 class StackOverflowPlugin(BasePlugin):
@@ -136,7 +189,14 @@ def _is_question(self, link):
         return re.search(r'questions/\d+/', link)
 
     def _get_result(self, url):
-        return [{'answer': 'scala> val x = "scala is awesome"\nx: java.lang.String = scala is awesome\n\nscala> x.reverse\nres1: String = emosewa si alacs\n', 'link': 'https://stackoverflow.com/questions/7700399/scala-reverse-string', 'position': 1}]
+        try:
+            return howdoi_session.get(url, headers={'User-Agent': _random_choice(USER_AGENTS)},
+                                      proxies=self.get_proxies(),
+                                      verify=VERIFY_SSL_CERTIFICATE).text
+        except requests.exceptions.SSLError as e:
+            _print_err('Encountered an SSL Error. Try using HTTP instead of '
+                       'HTTPS by setting the environment variable "HOWDOI_DISABLE_SSL".\n')
+            raise e
 
     def _get_answer(self, args, links):
         link = self.get_link_at_pos(links, args['pos'])
@@ -175,5 +235,30 @@ def _get_answer(self, args, links):
         text = text.strip()
         return text
 
+    def _format_output(self, code, args):
+        if not args['color']:
+            return code
+        lexer = None
+
+        # try to find a lexer using the StackOverflow tags
+        # or the query arguments
+        for keyword in args['query'].split() + args['tags']:
+            try:
+                lexer = get_lexer_by_name(keyword)
+                break
+            except ClassNotFound:
+                pass
+
+        # no lexer found above, use the guesser
+        if not lexer:
+            try:
+                lexer = guess_lexer(code)
+            except ClassNotFound:
+                return code
+
+        return highlight(code,
+                         lexer,
+                         TerminalFormatter(bg='dark'))
+
     def _get_questions(self, links):
         return [link for link in links if self._is_question(link)]

From e0e3164a664d08e4b676f7dfa012da4835bf3b9c Mon Sep 17 00:00:00 2001
From: Eyitayo Ogunbiyi <eyitayoogunbiyi@gmail.com>
Date: Wed, 24 Jun 2020 17:26:36 +0100
Subject: [PATCH 07/15] remove call to clear_cache in howdoi.py

---
 howdoi/howdoi.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py
index 32480014e..49a344a33 100755
--- a/howdoi/howdoi.py
+++ b/howdoi/howdoi.py
@@ -219,5 +219,4 @@ def command_line_runner():
 
 
 if __name__ == '__main__':
-    _clear_cache()
     command_line_runner()

From 0cafae7bae0c347ac2974201fde5fa8cd0432565 Mon Sep 17 00:00:00 2001
From: Eyitayo Ogunbiyi <eyitayoogunbiyi@gmail.com>
Date: Wed, 24 Jun 2020 17:27:55 +0100
Subject: [PATCH 08/15] ensure to use cache initialized in outer scope

---
 howdoi/howdoi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py
index 49a344a33..a28b12f61 100755
--- a/howdoi/howdoi.py
+++ b/howdoi/howdoi.py
@@ -149,7 +149,7 @@ def howdoi(raw_query):
         return _format_answers(res, args)
 
     try:
-        plugin = StackOverflowPlugin()
+        plugin = StackOverflowPlugin(cache=cache)
         res = plugin.search(args)
         if not res:
             res = {"error": "Sorry, couldn\'t find any help with that topic\n"}

From 9121577e75f9f604f42d069311c2f961ff08d952 Mon Sep 17 00:00:00 2001
From: Cesare De Cal <hi@cesare.io>
Date: Thu, 25 Jun 2020 11:01:56 +0200
Subject: [PATCH 09/15] Delete duplicate function format_output from
 StackOverflowPlugin

---
 howdoi/plugins/stackoverflow.py | 25 -------------------------
 1 file changed, 25 deletions(-)

diff --git a/howdoi/plugins/stackoverflow.py b/howdoi/plugins/stackoverflow.py
index 765a4ae54..b6baf039f 100644
--- a/howdoi/plugins/stackoverflow.py
+++ b/howdoi/plugins/stackoverflow.py
@@ -92,31 +92,6 @@ def _print_dbg(x):
 
 
 class StackOverflowPlugin(BasePlugin):
-    def format_output(self, code, args):
-        if not args['color']:
-            return code
-        lexer = None
-
-        # try to find a lexer using the StackOverflow tags
-        # or the query arguments
-        for keyword in args['query'].split() + args['tags']:
-            try:
-                lexer = get_lexer_by_name(keyword)
-                break
-            except ClassNotFound:
-                pass
-
-        # no lexer found above, use the guesser
-        if not lexer:
-            try:
-                lexer = guess_lexer(code)
-            except ClassNotFound:
-                return code
-
-        return highlight(code,
-                         lexer,
-                         TerminalFormatter(bg='dark'))
-
     def search(self, args):
         return self._get_answers(args)
 

From aaa3013a5dd2012280c7c6850e366d4de5d6ca42 Mon Sep 17 00:00:00 2001
From: Cesare De Cal <hi@cesare.io>
Date: Thu, 25 Jun 2020 12:47:09 +0200
Subject: [PATCH 10/15] Created utils file and refactored StackOverflow to use
 less code

---
 .vscode/settings.json           |   3 +
 howdoi/howdoi.py                |  53 ++-------
 howdoi/plugins/base.py          | 132 ++++++++++++++++-----
 howdoi/plugins/stackoverflow.py | 195 ++++++--------------------------
 howdoi/utils.py                 |  40 +++++++
 5 files changed, 190 insertions(+), 233 deletions(-)
 create mode 100644 .vscode/settings.json
 create mode 100644 howdoi/utils.py

diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 000000000..0862d6c3d
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.pythonPath": "/Users/cesaredecal/workspace/Environments/howdoi/bin/python"
+}
\ No newline at end of file
diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py
index a28b12f61..b75f3bbf7 100755
--- a/howdoi/howdoi.py
+++ b/howdoi/howdoi.py
@@ -25,31 +25,16 @@
 from requests.exceptions import SSLError
 
 from howdoi.plugins import StackOverflowPlugin
+from howdoi.utils import _print_ok, _print_err
+
 
 CACHE_EMPTY_VAL = "NULL"
 CACHE_DIR = appdirs.user_cache_dir('howdoi')
 CACHE_ENTRY_MAX = 128
 
-# rudimentary standardized 3-level log output
-
-
-def _print_err(x): print("[ERROR] " + x)
-
-
-_print_ok = print  # noqa: E305
-def _print_dbg(x): print("[DEBUG] " + x)  # noqa: E302
-
 
 SUPPORTED_SEARCH_ENGINES = ('google', 'bing', 'duckduckgo')
 
-USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0',
-               'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100 101 Firefox/22.0',
-               'Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0',
-               ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.5 (KHTML, like Gecko) '
-                'Chrome/19.0.1084.46 Safari/536.5'),
-               ('Mozilla/5.0 (Windows; Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.46'
-                'Safari/536.5'), )
-
 SUPPORTED_HELP_QUERIES = ['use howdoi', 'howdoi', 'run howdoi',
                           'do howdoi', 'howdoi howdoi', 'howdoi use howdoi']
 
@@ -61,24 +46,14 @@ def _print_dbg(x): print("[DEBUG] " + x)  # noqa: E302
     cache = FileSystemCache(CACHE_DIR, CACHE_ENTRY_MAX, default_timeout=0)
 
 
-def _random_int(width):
-    bres = os.urandom(width)
-    if sys.version < '3':
-        ires = int(bres.encode('hex'), 16)
-    else:
-        ires = int.from_bytes(bres, 'little')
-
-    return ires
-
-
-def _random_choice(seq):
-    return seq[_random_int(1) % len(seq)]
-
-
 def build_splitter(splitter_character='=', splitter_length=80):
     return '\n' + splitter_character * splitter_length + '\n\n'
 
 
+def _get_cache_key(args):
+    return str(args) + __version__
+
+
 def _clear_cache():
     global cache
     if not cache:
@@ -87,10 +62,6 @@ def _clear_cache():
     return cache.clear()
 
 
-def _is_help_query(query: str):
-    return any([query.lower() == help_query for help_query in SUPPORTED_HELP_QUERIES])
-
-
 def _format_answers(res, args):
     if "error" in res:
         return res["error"]
@@ -109,6 +80,10 @@ def _format_answers(res, args):
     return build_splitter().join(formatted_answers)
 
 
+def _is_help_query(query: str):
+    return any([query.lower() == help_query for help_query in SUPPORTED_HELP_QUERIES])
+
+
 def _get_help_instructions():
     instruction_splitter = build_splitter(' ', 60)
     query = 'print hello world in python'
@@ -127,10 +102,6 @@ def _get_help_instructions():
     return instruction_splitter.join(instructions)
 
 
-def _get_cache_key(args):
-    return str(args) + __version__
-
-
 def howdoi(raw_query):
     args = raw_query
     if type(raw_query) is str:  # you can pass either a raw or a parsed query
@@ -150,7 +121,7 @@ def howdoi(raw_query):
 
     try:
         plugin = StackOverflowPlugin(cache=cache)
-        res = plugin.search(args)
+        res = plugin.get_answers(args)
         if not res:
             res = {"error": "Sorry, couldn\'t find any help with that topic\n"}
         cache.set(cache_key, res)
@@ -176,7 +147,7 @@ def get_parser():
                         action='store_true')
     parser.add_argument('-e', '--engine', help='change search engine for this query only (google, bing, duckduckgo)',
                         dest='search_engine', nargs="?", default='google')
-    parser.add_argument('--plugin', help='use the base plugin', type=str, default='stackoverflow')
+    parser.add_argument('--plugin', help='query a specific plugin (default: stackoverflow)', type=str, default='stackoverflow')
     return parser
 
 
diff --git a/howdoi/plugins/base.py b/howdoi/plugins/base.py
index 958acc248..95e80d924 100644
--- a/howdoi/plugins/base.py
+++ b/howdoi/plugins/base.py
@@ -1,14 +1,13 @@
 import os
 import re
 import sys
-
+import requests
 import appdirs
 
 from cachelib import FileSystemCache, NullCache
 
 from pyquery import PyQuery as pq
-
-
+from howdoi.utils import _print_err, _random_choice
 
 
 # Handle imports for Python 2 and 3
@@ -28,21 +27,7 @@ def u(x):
     def u(x):
         return x
 
-# rudimentary standardized 3-level log output
-
-
-def _print_err(x):
-    print("[ERROR] " + x)
-
-
-_print_ok = print  # noqa: E305
-
 
-def _print_dbg(x):
-    print("[DEBUG] " + x)  # noqa: E302
-
-
-CACHE_EMPTY_VAL = "NULL"
 CACHE_DIR = appdirs.user_cache_dir('howdoi')
 CACHE_ENTRY_MAX = 128
 
@@ -54,7 +39,6 @@ def _print_dbg(x):
 ANSWER_HEADER = u('{2}  Answer from {0} {2}\n{1}')
 STAR_HEADER = u('\u2605')
 CACHE_EMPTY_VAL = "NULL"
-NO_ANSWER_MSG = '< no answer given >'
 
 if os.getenv('HOWDOI_DISABLE_SSL'):  # Set http instead of https
     SCHEME = 'http://'
@@ -77,6 +61,20 @@ def _print_dbg(x):
     'duckduckgo': SCHEME + 'duckduckgo.com/?q=site:{0}%20{1}&t=hj&ia=web'
 }
 
+USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0',
+               'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100 101 Firefox/22.0',
+               'Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0',
+               ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.5 (KHTML, like Gecko) '
+                'Chrome/19.0.1084.46 Safari/536.5'),
+               ('Mozilla/5.0 (Windows; Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.46'
+                'Safari/536.5'), )
+
+
+class BlockError(RuntimeError):
+    pass
+
+howdoi_session = requests.session()
+
 
 class BasePlugin():
     def __init__(self, cache=None):
@@ -84,12 +82,51 @@ def __init__(self, cache=None):
             cache = NullCache()
         self.cache = cache
 
+
+    def get_proxies(self):
+        proxies = getproxies()
+        filtered_proxies = {}
+        for key, value in proxies.items():
+            if key.startswith('http'):
+                if not value.startswith('http'):
+                    filtered_proxies[key] = 'http://%s' % value
+                else:
+                    filtered_proxies[key] = value
+        return filtered_proxies
+
+
+    def _get_result(self, url):
+        try:
+            return howdoi_session.get(url, headers={'User-Agent': _random_choice(USER_AGENTS)},
+                                      proxies=self.get_proxies(),
+                                      verify=VERIFY_SSL_CERTIFICATE).text
+        except requests.exceptions.SSLError as e:
+            _print_err('Encountered an SSL Error. Try using HTTP instead of '
+                       'HTTPS by setting the environment variable "HOWDOI_DISABLE_SSL".\n')
+            raise e
+
+
+    def _get_links(self, query):
+        search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'google')
+        search_url = self._get_search_url(search_engine)
+
+        result = self._get_result(search_url.format(URL, url_quote(query)))
+        if self._is_blocked(result):
+            _print_err('Unable to find an answer because the search engine temporarily blocked the request. '
+                       'Please wait a few minutes or select a different search engine.')
+            raise BlockError("Temporary block by search engine")
+
+        html = pq(result)
+        return self._extract_links(html, search_engine)
+
+
     def _is_blocked(self, page):
         for indicator in BLOCK_INDICATORS:
             if page.find(indicator) != -1:
                 return True
         return False
 
+
     def _add_links_to_text(self, element):
         hyperlinks = element.find('a')
 
@@ -103,6 +140,7 @@ def _add_links_to_text(self, element):
                 replacement = "[{0}]({1})".format(copy, href)
             pquery_object.replace_with(replacement)
 
+
     def get_link_at_pos(self, links, position):
         if not links:
             return False
@@ -112,6 +150,7 @@ def get_link_at_pos(self, links, position):
             link = links[-1]
         return link
 
+
     def get_text(self, element):
         ''' return inner text in pyquery element '''
         self._add_links_to_text(element)
@@ -120,17 +159,21 @@ def get_text(self, element):
         except TypeError:
             return element.text()
 
+
     def _get_search_url(self, search_engine):
         return SEARCH_URLS.get(search_engine, SEARCH_URLS['google'])
 
+
     def _extract_links_from_bing(self, html):
         html.remove_namespaces()
         return [a.attrib['href'] for a in html('.b_algo')('h2')('a')]
 
+
     def _extract_links_from_google(self, html):
         return [a.attrib['href'] for a in html('.l')] or \
             [a.attrib['href'] for a in html('.r')('a')]
 
+
     def _extract_links_from_duckduckgo(self, html):
         html.remove_namespaces()
         links_anchors = html.find('a.result__a')
@@ -143,6 +186,7 @@ def _extract_links_from_duckduckgo(self, html):
                 results.append(parsed_url[0])
         return results
 
+
     def _extract_links(self, html, search_engine):
         if search_engine == 'bing':
             return self._extract_links_from_bing(html)
@@ -150,17 +194,43 @@ def _extract_links(self, html, search_engine):
             return self._extract_links_from_duckduckgo(html)
         return self._extract_links_from_google(html)
 
-    def get_proxies(self):
-        proxies = getproxies()
-        filtered_proxies = {}
-        for key, value in proxies.items():
-            if key.startswith('http'):
-                if not value.startswith('http'):
-                    filtered_proxies[key] = 'http://%s' % value
-                else:
-                    filtered_proxies[key] = value
-        return filtered_proxies
 
-    def extract(self):
-        print("Hello extract")
-        pass
+    def get_answer(self, args, links):
+        raise NotImplementedError 
+
+
+    def _get_links_with_cache(self, query):
+        raise NotImplementedError
+
+
+    def get_answers(self, args):
+        """
+        @args: command-line arguments
+        returns: array of answers and their respective metadata
+                False if unable to get answers
+        """
+        question_links = self._get_links_with_cache(args['query'])
+        if not question_links:
+            return False
+
+        answers = []
+        initial_position = args['pos']
+        multiple_answers = (args['num_answers'] > 1 or args['all'])
+
+        for answer_number in range(args['num_answers']):
+            current_position = answer_number + initial_position
+            args['pos'] = current_position
+            link = self.get_link_at_pos(question_links, current_position)
+            answer = self.get_answer(args, question_links)
+            if not answer:
+                continue
+            if not args['link'] and not args['json_output'] and multiple_answers:
+                answer = ANSWER_HEADER.format(link, answer, STAR_HEADER)
+            answer += '\n'
+            answers.append({
+                'answer': answer,
+                'link': link,
+                'position': current_position
+            })
+
+        return answers
diff --git a/howdoi/plugins/stackoverflow.py b/howdoi/plugins/stackoverflow.py
index b6baf039f..5fd38521a 100644
--- a/howdoi/plugins/stackoverflow.py
+++ b/howdoi/plugins/stackoverflow.py
@@ -1,144 +1,61 @@
 import os
 import re
-import sys
 
-import appdirs
-import requests
 from pygments import highlight
 from pygments.formatters.terminal import TerminalFormatter
 from pygments.lexers import get_lexer_by_name, guess_lexer
 from pygments.util import ClassNotFound
-from pyquery import PyQuery as pq
-
 
+from pyquery import PyQuery as pq
 from howdoi.plugins import BasePlugin
 
-if os.getenv('HOWDOI_DISABLE_SSL'):  # Set http instead of https
-    SCHEME = 'http://'
-    VERIFY_SSL_CERTIFICATE = False
-else:
-    SCHEME = 'https://'
-    VERIFY_SSL_CERTIFICATE = True
-
-USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0',
-               'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100 101 Firefox/22.0',
-               'Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0',
-               ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.5 (KHTML, like Gecko) '
-                'Chrome/19.0.1084.46 Safari/536.5'),
-               ('Mozilla/5.0 (Windows; Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.46'
-                'Safari/536.5'), )
-
-
-def _random_int(width):
-    bres = os.urandom(width)
-    if sys.version < '3':
-        ires = int(bres.encode('hex'), 16)
-    else:
-        ires = int.from_bytes(bres, 'little')
-
-    return ires
-
-
-def _random_choice(seq):
-    return seq[_random_int(1) % len(seq)]
-
-
-if sys.version < '3':
-    import codecs
-    from urllib import quote as url_quote
-    from urllib import getproxies
-    from urlparse import urlparse, parse_qs
+URL = os.getenv('HOWDOI_URL') or 'stackoverflow.com'
 
-    # Handling Unicode: http://stackoverflow.com/a/6633040/305414
-    def u(x):
-        return codecs.unicode_escape_decode(x)[0]
-else:
-    from urllib.request import getproxies
-    from urllib.parse import quote as url_quote, urlparse, parse_qs
+CACHE_EMPTY_VAL = "NULL"
 
-    def u(x):
-        return x
+NO_ANSWER_MSG = '< no answer given >'
 
 BLOCKED_QUESTION_FRAGMENTS = (
     'webself.cache.googleusercontent.com',
 )
 
-URL = os.getenv('HOWDOI_URL') or 'stackoverflow.com'
-
-
-CACHE_EMPTY_VAL = "NULL"
-CACHE_DIR = appdirs.user_cache_dir('howdoi')
-CACHE_ENTRY_MAX = 128
-NO_ANSWER_MSG = '< no answer given >'
-ANSWER_HEADER = u('{2}  Answer from {0} {2}\n{1}')
-STAR_HEADER = u('\u2605')
-
-
-class BlockError(RuntimeError):
-    pass
-
-howdoi_session = requests.session()
-
+class StackOverflowPlugin(BasePlugin):
+    def _is_question(self, link):
+        for fragment in BLOCKED_QUESTION_FRAGMENTS:
+            if fragment in link:
+                return False
+        return re.search(r'questions/\d+/', link)
 
-def _print_err(x):
-    print("[ERROR] " + x)
 
+    def _get_questions(self, links):
+        return [link for link in links if self._is_question(link)]
 
-_print_ok = print  # noqa: E305
 
+    def _format_output(self, code, args):
+        if not args['color']:
+            return code
+        lexer = None
 
-def _print_dbg(x):
-    print("[DEBUG] " + x)  # noqa: E302
+        # try to find a lexer using the StackOverflow tags
+        # or the query arguments
+        for keyword in args['query'].split() + args['tags']:
+            try:
+                lexer = get_lexer_by_name(keyword)
+                break
+            except ClassNotFound:
+                pass
 
+        # no lexer found above, use the guesser
+        if not lexer:
+            try:
+                lexer = guess_lexer(code)
+            except ClassNotFound:
+                return code
 
-class StackOverflowPlugin(BasePlugin):
-    def search(self, args):
-        return self._get_answers(args)
-
-    def _get_answers(self, args):
-        """
-        @args: command-line arguments
-        returns: array of answers and their respective metadata
-                False if unable to get answers
-        """
-        question_links = self._get_links_with_cache(args['query'])
-        if not question_links:
-            return False
+        return highlight(code,
+                         lexer,
+                         TerminalFormatter(bg='dark'))
 
-        answers = []
-        initial_position = args['pos']
-        multiple_answers = (args['num_answers'] > 1 or args['all'])
-
-        for answer_number in range(args['num_answers']):
-            current_position = answer_number + initial_position
-            args['pos'] = current_position
-            link = self.get_link_at_pos(question_links, current_position)
-            answer = self._get_answer(args, question_links)
-            if not answer:
-                continue
-            if not args['link'] and not args['json_output'] and multiple_answers:
-                answer = ANSWER_HEADER.format(link, answer, STAR_HEADER)
-            answer += '\n'
-            answers.append({
-                'answer': answer,
-                'link': link,
-                'position': current_position
-            })
-
-        return answers
-
-    def _get_links(self, query):
-        search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'google')
-        search_url = self._get_search_url(search_engine)
-
-        result = self._get_result(search_url.format(URL, url_quote(query)))
-        if self._is_blocked(result):
-            _print_err('Unable to find an answer because the search engine temporarily blocked the request. '
-                       'Please wait a few minutes or select a different search engine.')
-            raise BlockError("Temporary block by search engine")
-
-        html = pq(result)
-        return self._extract_links(html, search_engine)
 
     def _get_links_with_cache(self, query):
         cache_key = query + "-links"
@@ -153,27 +70,11 @@ def _get_links_with_cache(self, query):
             self.cache.set(cache_key, CACHE_EMPTY_VAL)
 
         question_links = self._get_questions(links)
-        self.cache.set(cache_key, question_links or CACHE_EMPTY_VAL)
 
         return question_links
 
-    def _is_question(self, link):
-        for fragment in BLOCKED_QUESTION_FRAGMENTS:
-            if fragment in link:
-                return False
-        return re.search(r'questions/\d+/', link)
 
-    def _get_result(self, url):
-        try:
-            return howdoi_session.get(url, headers={'User-Agent': _random_choice(USER_AGENTS)},
-                                      proxies=self.get_proxies(),
-                                      verify=VERIFY_SSL_CERTIFICATE).text
-        except requests.exceptions.SSLError as e:
-            _print_err('Encountered an SSL Error. Try using HTTP instead of '
-                       'HTTPS by setting the environment variable "HOWDOI_DISABLE_SSL".\n')
-            raise e
-
-    def _get_answer(self, args, links):
+    def get_answer(self, args, links):
         link = self.get_link_at_pos(links, args['pos'])
         if not link:
             return False
@@ -209,31 +110,3 @@ def _get_answer(self, args, links):
             text = NO_ANSWER_MSG
         text = text.strip()
         return text
-
-    def _format_output(self, code, args):
-        if not args['color']:
-            return code
-        lexer = None
-
-        # try to find a lexer using the StackOverflow tags
-        # or the query arguments
-        for keyword in args['query'].split() + args['tags']:
-            try:
-                lexer = get_lexer_by_name(keyword)
-                break
-            except ClassNotFound:
-                pass
-
-        # no lexer found above, use the guesser
-        if not lexer:
-            try:
-                lexer = guess_lexer(code)
-            except ClassNotFound:
-                return code
-
-        return highlight(code,
-                         lexer,
-                         TerminalFormatter(bg='dark'))
-
-    def _get_questions(self, links):
-        return [link for link in links if self._is_question(link)]
diff --git a/howdoi/utils.py b/howdoi/utils.py
new file mode 100644
index 000000000..2151dd13c
--- /dev/null
+++ b/howdoi/utils.py
@@ -0,0 +1,40 @@
+import os
+import sys
+
+
+if sys.version < '3':
+    import codecs
+    # Handling Unicode: http://stackoverflow.com/a/6633040/305414
+    def u(x):
+        return codecs.unicode_escape_decode(x)[0]
+else:
+    def u(x):
+        return x
+
+
+# rudimentary standardized 3-level log output
+
+
+def _print_err(x):
+    print("[ERROR] " + x)
+
+
+_print_ok = print  # noqa: E305
+
+
+def _print_dbg(x):
+    print("[DEBUG] " + x)  # noqa: E302
+
+
+def _random_int(width):
+    bres = os.urandom(width)
+    if sys.version < '3':
+        ires = int(bres.encode('hex'), 16)
+    else:
+        ires = int.from_bytes(bres, 'little')
+
+    return ires
+
+
+def _random_choice(seq):
+    return seq[_random_int(1) % len(seq)]

From 40aedbf7d7cec5b09603be7a0a5ac9f898244808 Mon Sep 17 00:00:00 2001
From: Cesare De Cal <hi@cesare.io>
Date: Thu, 25 Jun 2020 12:54:40 +0200
Subject: [PATCH 11/15] Delete settings.json

---
 .vscode/settings.json | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 .vscode/settings.json

diff --git a/.vscode/settings.json b/.vscode/settings.json
deleted file mode 100644
index 0862d6c3d..000000000
--- a/.vscode/settings.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "python.pythonPath": "/Users/cesaredecal/workspace/Environments/howdoi/bin/python"
-}
\ No newline at end of file

From 4506c35dde21ba9f1eb6d34a1a1477d09d731ffb Mon Sep 17 00:00:00 2001
From: Eyitayo Ogunbiyi <eyitayoogunbiyi@gmail.com>
Date: Thu, 25 Jun 2020 14:39:08 +0100
Subject: [PATCH 12/15] clean up howdoi file and put constants in standalone
 file

---
 .gitignore          |  4 +++-
 howdoi/constants.py | 12 ++++++++++++
 howdoi/howdoi.py    | 31 ++++++++++++-------------------
 3 files changed, 27 insertions(+), 20 deletions(-)
 create mode 100644 howdoi/constants.py

diff --git a/.gitignore b/.gitignore
index 70d7a1e1a..f647f11ba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -185,4 +185,6 @@ fabric.properties
 .ropeproject
 README.html
 .idea
-HOW_TO_RELEASE.txt
\ No newline at end of file
+HOW_TO_RELEASE.txt
+
+.vscode
\ No newline at end of file
diff --git a/howdoi/constants.py b/howdoi/constants.py
new file mode 100644
index 000000000..6724e93d0
--- /dev/null
+++ b/howdoi/constants.py
@@ -0,0 +1,12 @@
+import appdirs
+
+CACHE_EMPTY_VAL = "NULL"
+
+CACHE_DIR = appdirs.user_cache_dir('howdoi')
+
+CACHE_ENTRY_MAX = 128
+
+SUPPORTED_SEARCH_ENGINES = ('google', 'bing', 'duckduckgo')
+
+SUPPORTED_HELP_QUERIES = ['use howdoi', 'howdoi', 'run howdoi',
+                          'do howdoi', 'howdoi howdoi', 'howdoi use howdoi']
diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py
index b75f3bbf7..fe364f1f5 100755
--- a/howdoi/howdoi.py
+++ b/howdoi/howdoi.py
@@ -9,34 +9,26 @@
 ######################################################
 
 from __future__ import print_function
-import gc
-gc.disable()  # noqa: E402
+
 import argparse
-import os
-import appdirs
+import gc
 import json
-import requests
+import os
 import sys
-from . import __version__
 
+import requests
 from cachelib import FileSystemCache, NullCache
+from requests.exceptions import ConnectionError, SSLError
 
-from requests.exceptions import ConnectionError
-from requests.exceptions import SSLError
-
+from howdoi.constants import (CACHE_DIR, CACHE_ENTRY_MAX,
+                              SUPPORTED_HELP_QUERIES, SUPPORTED_SEARCH_ENGINES)
 from howdoi.plugins import StackOverflowPlugin
-from howdoi.utils import _print_ok, _print_err
-
-
-CACHE_EMPTY_VAL = "NULL"
-CACHE_DIR = appdirs.user_cache_dir('howdoi')
-CACHE_ENTRY_MAX = 128
+from howdoi.utils import _print_err, _print_ok
 
+from . import __version__
 
-SUPPORTED_SEARCH_ENGINES = ('google', 'bing', 'duckduckgo')
+gc.disable()  # noqa: E402
 
-SUPPORTED_HELP_QUERIES = ['use howdoi', 'howdoi', 'run howdoi',
-                          'do howdoi', 'howdoi howdoi', 'howdoi use howdoi']
 
 howdoi_session = requests.session()
 
@@ -147,7 +139,8 @@ def get_parser():
                         action='store_true')
     parser.add_argument('-e', '--engine', help='change search engine for this query only (google, bing, duckduckgo)',
                         dest='search_engine', nargs="?", default='google')
-    parser.add_argument('--plugin', help='query a specific plugin (default: stackoverflow)', type=str, default='stackoverflow')
+    parser.add_argument('--plugin', help='query a specific plugin (default: stackoverflow)',
+                        type=str, default='stackoverflow')
     return parser
 
 

From 619d3ea21993ee172bda22dbbf4a87ec594a56f9 Mon Sep 17 00:00:00 2001
From: Eyitayo Ogunbiyi <eyitayoogunbiyi@gmail.com>
Date: Thu, 25 Jun 2020 14:50:44 +0100
Subject: [PATCH 13/15] extracted constants to standalone file

---
 howdoi/constants.py             | 39 ++++++++++++++++++++++
 howdoi/plugins/base.py          | 57 ++++-----------------------------
 howdoi/plugins/stackoverflow.py |  5 +--
 3 files changed, 46 insertions(+), 55 deletions(-)

diff --git a/howdoi/constants.py b/howdoi/constants.py
index 6724e93d0..f88b0f130 100644
--- a/howdoi/constants.py
+++ b/howdoi/constants.py
@@ -1,5 +1,19 @@
+import os
+
 import appdirs
 
+
+def u(x):
+    return x
+
+
+if os.getenv('HOWDOI_DISABLE_SSL'):  # Set http instead of https
+    SCHEME = 'http://'
+    VERIFY_SSL_CERTIFICATE = False
+else:
+    SCHEME = 'https://'
+    VERIFY_SSL_CERTIFICATE = True
+
 CACHE_EMPTY_VAL = "NULL"
 
 CACHE_DIR = appdirs.user_cache_dir('howdoi')
@@ -10,3 +24,28 @@
 
 SUPPORTED_HELP_QUERIES = ['use howdoi', 'howdoi', 'run howdoi',
                           'do howdoi', 'howdoi howdoi', 'howdoi use howdoi']
+
+ANSWER_HEADER = u('{2}  Answer from {0} {2}\n{1}')
+
+STAR_HEADER = u('\u2605')
+
+
+BLOCK_INDICATORS = (
+    'form id="captcha-form"',
+    'This page appears when Google automatically detects requests coming from your computer '
+    'network which appear to be in violation of the <a href="//www.google.com/policies/terms/">Terms of Service'
+)
+
+SEARCH_URLS = {
+    'bing': SCHEME + 'www.bing.com/search?q=site:{0}%20{1}&hl=en',
+    'google': SCHEME + 'www.google.com/search?q=site:{0}%20{1}&hl=en',
+    'duckduckgo': SCHEME + 'duckduckgo.com/?q=site:{0}%20{1}&t=hj&ia=web'
+}
+
+USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0',
+               'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100 101 Firefox/22.0',
+               'Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0',
+               ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.5 (KHTML, like Gecko) '
+                'Chrome/19.0.1084.46 Safari/536.5'),
+               ('Mozilla/5.0 (Windows; Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.46'
+                'Safari/536.5'), )
diff --git a/howdoi/plugins/base.py b/howdoi/plugins/base.py
index 95e80d924..0fe828a44 100644
--- a/howdoi/plugins/base.py
+++ b/howdoi/plugins/base.py
@@ -1,13 +1,15 @@
 import os
-import re
 import sys
 import requests
-import appdirs
 
 from cachelib import FileSystemCache, NullCache
 
 from pyquery import PyQuery as pq
 from howdoi.utils import _print_err, _random_choice
+from howdoi.constants import (
+    VERIFY_SSL_CERTIFICATE, BLOCK_INDICATORS, STAR_HEADER,
+    ANSWER_HEADER, CACHE_ENTRY_MAX, CACHE_DIR, USER_AGENTS, SEARCH_URLS
+)
 
 
 # Handle imports for Python 2 and 3
@@ -28,51 +30,19 @@ def u(x):
         return x
 
 
-CACHE_DIR = appdirs.user_cache_dir('howdoi')
-CACHE_ENTRY_MAX = 128
-
 if os.getenv('HOWDOI_DISABLE_CACHE'):
     cache = NullCache()  # works like an always empty cache
 else:
     cache = FileSystemCache(CACHE_DIR, CACHE_ENTRY_MAX, default_timeout=0)
 
-ANSWER_HEADER = u('{2}  Answer from {0} {2}\n{1}')
-STAR_HEADER = u('\u2605')
-CACHE_EMPTY_VAL = "NULL"
-
-if os.getenv('HOWDOI_DISABLE_SSL'):  # Set http instead of https
-    SCHEME = 'http://'
-    VERIFY_SSL_CERTIFICATE = False
-else:
-    SCHEME = 'https://'
-    VERIFY_SSL_CERTIFICATE = True
-
-BLOCK_INDICATORS = (
-    'form id="captcha-form"',
-    'This page appears when Google automatically detects requests coming from your computer '
-    'network which appear to be in violation of the <a href="//www.google.com/policies/terms/">Terms of Service'
-)
 
 URL = os.getenv('HOWDOI_URL') or 'stackoverflow.com'
 
-SEARCH_URLS = {
-    'bing': SCHEME + 'www.bing.com/search?q=site:{0}%20{1}&hl=en',
-    'google': SCHEME + 'www.google.com/search?q=site:{0}%20{1}&hl=en',
-    'duckduckgo': SCHEME + 'duckduckgo.com/?q=site:{0}%20{1}&t=hj&ia=web'
-}
-
-USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0',
-               'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100 101 Firefox/22.0',
-               'Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0',
-               ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.5 (KHTML, like Gecko) '
-                'Chrome/19.0.1084.46 Safari/536.5'),
-               ('Mozilla/5.0 (Windows; Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.46'
-                'Safari/536.5'), )
-
 
 class BlockError(RuntimeError):
     pass
 
+
 howdoi_session = requests.session()
 
 
@@ -82,7 +52,6 @@ def __init__(self, cache=None):
             cache = NullCache()
         self.cache = cache
 
-
     def get_proxies(self):
         proxies = getproxies()
         filtered_proxies = {}
@@ -94,7 +63,6 @@ def get_proxies(self):
                     filtered_proxies[key] = value
         return filtered_proxies
 
-
     def _get_result(self, url):
         try:
             return howdoi_session.get(url, headers={'User-Agent': _random_choice(USER_AGENTS)},
@@ -105,7 +73,6 @@ def _get_result(self, url):
                        'HTTPS by setting the environment variable "HOWDOI_DISABLE_SSL".\n')
             raise e
 
-
     def _get_links(self, query):
         search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'google')
         search_url = self._get_search_url(search_engine)
@@ -119,14 +86,12 @@ def _get_links(self, query):
         html = pq(result)
         return self._extract_links(html, search_engine)
 
-
     def _is_blocked(self, page):
         for indicator in BLOCK_INDICATORS:
             if page.find(indicator) != -1:
                 return True
         return False
 
-
     def _add_links_to_text(self, element):
         hyperlinks = element.find('a')
 
@@ -140,7 +105,6 @@ def _add_links_to_text(self, element):
                 replacement = "[{0}]({1})".format(copy, href)
             pquery_object.replace_with(replacement)
 
-
     def get_link_at_pos(self, links, position):
         if not links:
             return False
@@ -150,7 +114,6 @@ def get_link_at_pos(self, links, position):
             link = links[-1]
         return link
 
-
     def get_text(self, element):
         ''' return inner text in pyquery element '''
         self._add_links_to_text(element)
@@ -159,21 +122,17 @@ def get_text(self, element):
         except TypeError:
             return element.text()
 
-
     def _get_search_url(self, search_engine):
         return SEARCH_URLS.get(search_engine, SEARCH_URLS['google'])
 
-
     def _extract_links_from_bing(self, html):
         html.remove_namespaces()
         return [a.attrib['href'] for a in html('.b_algo')('h2')('a')]
 
-
     def _extract_links_from_google(self, html):
         return [a.attrib['href'] for a in html('.l')] or \
             [a.attrib['href'] for a in html('.r')('a')]
 
-
     def _extract_links_from_duckduckgo(self, html):
         html.remove_namespaces()
         links_anchors = html.find('a.result__a')
@@ -186,7 +145,6 @@ def _extract_links_from_duckduckgo(self, html):
                 results.append(parsed_url[0])
         return results
 
-
     def _extract_links(self, html, search_engine):
         if search_engine == 'bing':
             return self._extract_links_from_bing(html)
@@ -194,15 +152,12 @@ def _extract_links(self, html, search_engine):
             return self._extract_links_from_duckduckgo(html)
         return self._extract_links_from_google(html)
 
-
     def get_answer(self, args, links):
-        raise NotImplementedError 
-
+        raise NotImplementedError
 
     def _get_links_with_cache(self, query):
         raise NotImplementedError
 
-
     def get_answers(self, args):
         """
         @args: command-line arguments
diff --git a/howdoi/plugins/stackoverflow.py b/howdoi/plugins/stackoverflow.py
index 5fd38521a..cb11275f7 100644
--- a/howdoi/plugins/stackoverflow.py
+++ b/howdoi/plugins/stackoverflow.py
@@ -19,6 +19,7 @@
     'webself.cache.googleusercontent.com',
 )
 
+
 class StackOverflowPlugin(BasePlugin):
     def _is_question(self, link):
         for fragment in BLOCKED_QUESTION_FRAGMENTS:
@@ -26,11 +27,9 @@ def _is_question(self, link):
                 return False
         return re.search(r'questions/\d+/', link)
 
-
     def _get_questions(self, links):
         return [link for link in links if self._is_question(link)]
 
-
     def _format_output(self, code, args):
         if not args['color']:
             return code
@@ -56,7 +55,6 @@ def _format_output(self, code, args):
                          lexer,
                          TerminalFormatter(bg='dark'))
 
-
     def _get_links_with_cache(self, query):
         cache_key = query + "-links"
         res = self.cache.get(cache_key)
@@ -73,7 +71,6 @@ def _get_links_with_cache(self, query):
 
         return question_links
 
-
     def get_answer(self, args, links):
         link = self.get_link_at_pos(links, args['pos'])
         if not link:

From cd9fd572b8850987e3c3b33a1565a70cd1a08bd8 Mon Sep 17 00:00:00 2001
From: Eyitayo Ogunbiyi <eyitayoogunbiyi@gmail.com>
Date: Thu, 25 Jun 2020 14:56:19 +0100
Subject: [PATCH 14/15] extracted unicode handling to constants

---
 howdoi/constants.py    | 5 +----
 howdoi/plugins/base.py | 8 --------
 2 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/howdoi/constants.py b/howdoi/constants.py
index f88b0f130..cb4dc91a5 100644
--- a/howdoi/constants.py
+++ b/howdoi/constants.py
@@ -1,10 +1,7 @@
 import os
-
 import appdirs
 
-
-def u(x):
-    return x
+from howdoi.utils import u
 
 
 if os.getenv('HOWDOI_DISABLE_SSL'):  # Set http instead of https
diff --git a/howdoi/plugins/base.py b/howdoi/plugins/base.py
index 0fe828a44..4e224c747 100644
--- a/howdoi/plugins/base.py
+++ b/howdoi/plugins/base.py
@@ -14,21 +14,13 @@
 
 # Handle imports for Python 2 and 3
 if sys.version < '3':
-    import codecs
     from urllib import quote as url_quote
     from urllib import getproxies
     from urlparse import urlparse, parse_qs
-
-    # Handling Unicode: http://stackoverflow.com/a/6633040/305414
-    def u(x):
-        return codecs.unicode_escape_decode(x)[0]
 else:
     from urllib.request import getproxies
     from urllib.parse import quote as url_quote, urlparse, parse_qs
 
-    def u(x):
-        return x
-
 
 if os.getenv('HOWDOI_DISABLE_CACHE'):
     cache = NullCache()  # works like an always empty cache

From 53d0b71934c67c1501b52993be00d7f09bf86df5 Mon Sep 17 00:00:00 2001
From: Eyitayo Ogunbiyi <eyitayoogunbiyi@gmail.com>
Date: Thu, 25 Jun 2020 14:56:55 +0100
Subject: [PATCH 15/15] applying linting on utils.py

---
 howdoi/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/howdoi/utils.py b/howdoi/utils.py
index 2151dd13c..a60682430 100644
--- a/howdoi/utils.py
+++ b/howdoi/utils.py
@@ -5,6 +5,7 @@
 if sys.version < '3':
     import codecs
     # Handling Unicode: http://stackoverflow.com/a/6633040/305414
+
     def u(x):
         return codecs.unicode_escape_decode(x)[0]
 else: