From 48800357693c069c2ec5139d18f8e52383450faf Mon Sep 17 00:00:00 2001
From: Alexandre Harano <email@ayharano.dev>
Date: Tue, 10 Oct 2023 18:58:55 -0300
Subject: [PATCH 01/11] =?UTF-8?q?Add=20Vit=C3=B3ria-ES=20spider?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

resolve okfn-brasil/querido-diario#750
---
 .../gazette/spiders/es/es_vitoria.py          | 236 ++++++++++++++++++
 1 file changed, 236 insertions(+)
 create mode 100644 data_collection/gazette/spiders/es/es_vitoria.py

diff --git a/data_collection/gazette/spiders/es/es_vitoria.py b/data_collection/gazette/spiders/es/es_vitoria.py
new file mode 100644
index 000000000..de73e3ec9
--- /dev/null
+++ b/data_collection/gazette/spiders/es/es_vitoria.py
@@ -0,0 +1,236 @@
+from datetime import date, datetime
+
+from scrapy import FormRequest, Request
+
+from gazette.items import Gazette
+from gazette.spiders.base import BaseGazetteSpider
+
+BASE_URL = "https://diariooficial.vitoria.es.gov.br/"
+
+
+class EsVitoriaSpider(BaseGazetteSpider):
+    name = "es_vitoria"
+    TERRITORY_ID = "3205309"
+    start_date = date(2014, 7, 21)
+
+    allowed_domains = ["diariooficial.vitoria.es.gov.br"]
+
+    # When there are too many requests, the server may return
+    # an HTTP 406 status code when trying to download a PDF file
+    #
+    # We set `custom_settings` to avoid triggering the 406 HTTP status code
+    # by spreading the downloads for this spider over time
+
+    custom_settings = {
+        "DOWNLOAD_DELAY": 0.3,  # 300 ms
+        "RANDOMIZE_DOWNLOAD_DELAY": True,
+        "RETRY_HTTP_CODES": [500, 502, 503, 504, 522, 524, 408, 429, 406],
+    }
+
+    def __init__(self, *args, **kwargs):
+        super(EsVitoriaSpider, self).__init__(*args, **kwargs)
+
+        # Period queried for gazette source is based on specific year-month
+        # Within a queried period, it has a paging mechanism that can spread multiple files of the same date
+        # We collect all the entries for the year-month period to then generate the gazette entries
+
+        # Considering the above descrition, we use a dict named `data_by_monthly_date_by_date`
+        # with its keys composed by a 2-tuple
+        #     - year
+        #     - month
+        # and its items is another nested dict composed by
+        #     - gazette_date
+        # and its items is a list of str representing the URL of the collected files for that date
+
+        # e.g.
+        # data_by_monthly_date_by_date = {
+        #     (2022, 12): {
+        #         date(2022, 12, 2): [
+        #             "https://diariooficial.vitoria.es.gov.br/ExibirArquivo.aspx"
+        #             "?qs=nnmrXIDe5L4hR81FZwDXlD95Q%2fWHOCtXgeCw%2fnRIrFMxQA7S5mwuf0RM3mOCPGtiwqKwtsQd8WTWmli6Dukj2duE%2bcjGeiOYdOhFAaD2d4lajnB7Bs8eXyta5UTj79FJ",
+        #             "https://diariooficial.vitoria.es.gov.br/ExibirArquivo.aspx"
+        #             "?qs=nnmrXIDe5L4hR81FZwDXlD95Q%2fWHOCtXgeCw%2fnRIrFMxQA7S5mwuf0RM3mOCPGtiwqKwtsQd8WTWmli6Dukj2duE%2bcjGeiOY4xkUuS2BQabum9G9l8gOaMHLbesi83TO",
+        #         ]
+        #     }
+        # }
+
+        self.data_by_monthly_date_by_date = {}
+
+    def start_requests(self):
+        url = BASE_URL
+
+        today = date.today()
+        year = today.year
+        month = today.month
+
+        yield Request(
+            url=url,
+            callback=self.initial_parse,
+            meta={"cookiejar": f"{self.name}_{year}_{month}"},
+        )
+
+    def initial_parse(self, response):
+        year_select = response.xpath("//select[contains(@id, 'ddlAno')]")
+        year_formkey = year_select.attrib["name"]
+        years_available = map(int, year_select.xpath("./option/@value").getall())
+        chosen_year = int(
+            year_select.xpath("./option[contains(@selected, 'selected')]/@value").get()
+        )
+
+        for year in years_available:
+            if year < self.start_date.year or self.end_date.year < year:
+                continue
+
+            if year == chosen_year:
+                yield from self.parse_year(response, year)
+                continue
+
+            yield FormRequest.from_response(
+                response,
+                formdata={year_formkey: str(year)},
+                callback=self.parse_year,
+                cb_kwargs={"year": year},
+                # We are isolating cookiejar per name-year-month combination
+                # to avoid interference between concurrent requests
+                # Whenever we request a past year, it sets the month to December
+                meta={"cookiejar": f"{self.name}_{year}_12"},
+            )
+
+    def parse_year(self, response, year):
+        year_select = response.xpath("//select[contains(@id, 'ddlAno')]")
+        year_formkey = year_select.attrib["name"]
+
+        month_select = response.xpath("//select[contains(@id, 'ddlMes')]")
+        month_formkey = month_select.attrib["name"]
+
+        chosen_month = int(
+            month_select.xpath("./option[contains(@selected, 'selected')]/@value").get()
+        )
+
+        first_day_of_start_date_month = date(
+            self.start_date.year, self.start_date.month, 1
+        )
+
+        for month in range(1, 13):
+            first_day_of_month = date(year, month, 1)
+            if (
+                first_day_of_month < first_day_of_start_date_month
+                or self.end_date < first_day_of_month
+            ):
+                continue
+
+            current_year_month = (year, month)
+
+            if month == chosen_month:
+                yield from self.parse_editions_list(response, current_year_month)
+                continue
+
+            formdata = {
+                "__EVENTTARGET": month_formkey,
+                "__EVENTARGUMENT": "",
+                year_formkey: str(year),
+                month_formkey: str(month),
+            }
+            yield FormRequest.from_response(
+                response,
+                formdata=formdata,
+                callback=self.parse_editions_list,
+                cb_kwargs={
+                    "current_year_month": current_year_month,
+                },
+                # We are isolating cookiejar per name-year-month combination
+                # to avoid interference between concurrent requests
+                meta={"cookiejar": f"{self.name}_{year}_{month}"},
+            )
+
+    def parse_editions_list(
+        self,
+        response,
+        current_year_month,  # (year, month)
+        current_page=1,
+    ):
+        year_select = response.xpath("//select[contains(@id, 'ddlAno')]")
+        year_formkey = year_select.attrib["name"]
+
+        month_select = response.xpath("//select[contains(@id, 'ddlMes')]")
+        month_formkey = month_select.attrib["name"]
+
+        year, month = current_year_month
+
+        for row in response.xpath(
+            "//ancestor::a[span[contains(@id, '_grdArquivos_')]]"
+        ):
+            raw_string = row.xpath("./span/text()").get()
+            date_string_from_text = raw_string.split()[-1]
+            gazette_date = self._parse_date(date_string_from_text)
+
+            if not gazette_date:
+                self.logger.warning(
+                    f"No valid date could be extracted from '{raw_string}'"
+                )
+                continue
+
+            if gazette_date > self.end_date:
+                continue
+            elif gazette_date < self.start_date:
+                return
+
+            if gazette_date.timetuple()[:2] != current_year_month:
+                self.logger.warning(
+                    f"Found {gazette_date.isoformat()} gazette while querying"
+                    f" for {current_year_month[0]}-{current_year_month[1]:02}"
+                    f" period. Skipping..."
+                )
+                continue
+
+            url = response.urljoin(row.attrib["href"])
+
+            file_urls = self.data_by_monthly_date_by_date.setdefault(
+                current_year_month, {}
+            ).setdefault(gazette_date, [])
+
+            if url not in file_urls:
+                # We use this strategy to avoid duplicates while maintaining row order
+                file_urls.append(url)
+
+        number_of_pages = len(
+            response.xpath("//ul[contains(@class, 'pagination')]/li").getall()
+        )
+
+        if current_page < number_of_pages:
+            formdata = {
+                "__EVENTARGUMENT": f"Page${current_page + 1}",
+                "__EVENTTARGET": "ctl00$conteudo$ucPesquisarDiarioOficial$grdArquivos",
+                year_formkey: str(year),
+                month_formkey: str(month),
+            }
+
+            yield FormRequest.from_response(
+                response,
+                formdata=formdata,
+                callback=self.parse_editions_list,
+                cb_kwargs={
+                    "current_year_month": current_year_month,
+                    "current_page": current_page + 1,
+                },
+                # We keep using the same cookiejar for the name_year_month combination
+                # because, if we don't, it can interfere with the paging data for
+                # a different name_year_month combination
+                meta={"cookiejar": f"{self.name}_{year}_{month}"},
+            )
+        else:
+            # After all the entries of the queried year-month period were collected,
+            # we finally yield the Gazette per date within that month
+            current_year_month_data = self.data_by_monthly_date_by_date.get(
+                current_year_month, {}
+            )
+            for gazette_date, file_urls in current_year_month_data.items():
+                yield Gazette(
+                    date=gazette_date,
+                    is_extra_edition=False,
+                    file_urls=file_urls,
+                    power="executive",
+                )
+
+    def _parse_date(self, raw_date):
+        return datetime.strptime(raw_date, "%d/%m/%Y").date()

From 1c9b41baf2299a76892f6b3b33942052598f2e30 Mon Sep 17 00:00:00 2001
From: trevineju <julianabtrevine@gmail.com>
Date: Mon, 13 Jan 2025 02:01:35 -0300
Subject: [PATCH 02/11] =?UTF-8?q?remo=C3=A7=C3=A3o=20de=20vari=C3=A1vel=20?=
 =?UTF-8?q?usada=20uma=20=C3=BAnica=20vez?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data_collection/gazette/spiders/es/es_vitoria.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/data_collection/gazette/spiders/es/es_vitoria.py b/data_collection/gazette/spiders/es/es_vitoria.py
index de73e3ec9..1d20606b2 100644
--- a/data_collection/gazette/spiders/es/es_vitoria.py
+++ b/data_collection/gazette/spiders/es/es_vitoria.py
@@ -5,8 +5,6 @@
 from gazette.items import Gazette
 from gazette.spiders.base import BaseGazetteSpider
 
-BASE_URL = "https://diariooficial.vitoria.es.gov.br/"
-
 
 class EsVitoriaSpider(BaseGazetteSpider):
     name = "es_vitoria"
@@ -57,14 +55,12 @@ def __init__(self, *args, **kwargs):
         self.data_by_monthly_date_by_date = {}
 
     def start_requests(self):
-        url = BASE_URL
-
         today = date.today()
         year = today.year
         month = today.month
 
         yield Request(
-            url=url,
+            "https://diariooficial.vitoria.es.gov.br/",
             callback=self.initial_parse,
             meta={"cookiejar": f"{self.name}_{year}_{month}"},
         )
@@ -157,10 +153,8 @@ def parse_editions_list(
 
         year, month = current_year_month
 
-        for row in response.xpath(
-            "//ancestor::a[span[contains(@id, '_grdArquivos_')]]"
-        ):
-            raw_string = row.xpath("./span/text()").get()
+        for row in response.xpath("//tbody//td/a[1]"):
+            raw_string = row.css("span::text")[0].get()
             date_string_from_text = raw_string.split()[-1]
             gazette_date = self._parse_date(date_string_from_text)
 

From e424be9654d57ed670d5e83321551972dda4392b Mon Sep 17 00:00:00 2001
From: trevineju <julianabtrevine@gmail.com>
Date: Mon, 13 Jan 2025 02:04:08 -0300
Subject: [PATCH 03/11] =?UTF-8?q?remo=C3=A7=C3=A3o=20de=20=5F=5Finit=5F=5F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../gazette/spiders/es/es_vitoria.py          | 35 +++----------------
 1 file changed, 5 insertions(+), 30 deletions(-)

diff --git a/data_collection/gazette/spiders/es/es_vitoria.py b/data_collection/gazette/spiders/es/es_vitoria.py
index 1d20606b2..0b96f4292 100644
--- a/data_collection/gazette/spiders/es/es_vitoria.py
+++ b/data_collection/gazette/spiders/es/es_vitoria.py
@@ -25,47 +25,22 @@ class EsVitoriaSpider(BaseGazetteSpider):
         "RETRY_HTTP_CODES": [500, 502, 503, 504, 522, 524, 408, 429, 406],
     }
 
-    def __init__(self, *args, **kwargs):
-        super(EsVitoriaSpider, self).__init__(*args, **kwargs)
-
-        # Period queried for gazette source is based on specific year-month
-        # Within a queried period, it has a paging mechanism that can spread multiple files of the same date
-        # We collect all the entries for the year-month period to then generate the gazette entries
-
-        # Considering the above descrition, we use a dict named `data_by_monthly_date_by_date`
-        # with its keys composed by a 2-tuple
-        #     - year
-        #     - month
-        # and its items is another nested dict composed by
-        #     - gazette_date
-        # and its items is a list of str representing the URL of the collected files for that date
-
-        # e.g.
-        # data_by_monthly_date_by_date = {
-        #     (2022, 12): {
-        #         date(2022, 12, 2): [
-        #             "https://diariooficial.vitoria.es.gov.br/ExibirArquivo.aspx"
-        #             "?qs=nnmrXIDe5L4hR81FZwDXlD95Q%2fWHOCtXgeCw%2fnRIrFMxQA7S5mwuf0RM3mOCPGtiwqKwtsQd8WTWmli6Dukj2duE%2bcjGeiOYdOhFAaD2d4lajnB7Bs8eXyta5UTj79FJ",
-        #             "https://diariooficial.vitoria.es.gov.br/ExibirArquivo.aspx"
-        #             "?qs=nnmrXIDe5L4hR81FZwDXlD95Q%2fWHOCtXgeCw%2fnRIrFMxQA7S5mwuf0RM3mOCPGtiwqKwtsQd8WTWmli6Dukj2duE%2bcjGeiOY4xkUuS2BQabum9G9l8gOaMHLbesi83TO",
-        #         ]
-        #     }
-        # }
+    data_by_monthly_date_by_date = None
 
+    def start_requests(self):
         self.data_by_monthly_date_by_date = {}
 
-    def start_requests(self):
         today = date.today()
         year = today.year
         month = today.month
 
         yield Request(
             "https://diariooficial.vitoria.es.gov.br/",
-            callback=self.initial_parse,
-            meta={"cookiejar": f"{self.name}_{year}_{month}"},
+            callback=self.make_year_request,
+            meta={"cookiejar": f"{self.name}_{year}_{month}"},  # é necessário?
         )
 
-    def initial_parse(self, response):
+    def make_year_request(self, response):
         year_select = response.xpath("//select[contains(@id, 'ddlAno')]")
         year_formkey = year_select.attrib["name"]
         years_available = map(int, year_select.xpath("./option/@value").getall())

From 707707ea75825dd9e84458d27e5af973d4d15386 Mon Sep 17 00:00:00 2001
From: trevineju <julianabtrevine@gmail.com>
Date: Mon, 13 Jan 2025 02:54:03 -0300
Subject: [PATCH 04/11] =?UTF-8?q?Atualiza=20m=C3=A9todos=20de=20requisi?=
 =?UTF-8?q?=C3=A7=C3=B5es=20intermedi=C3=A1rias=20para=20evitar=20controle?=
 =?UTF-8?q?s=20de=20data=20ao=20carregar=20informa=C3=A7=C3=B5es=20no=20co?=
 =?UTF-8?q?okiejar?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../gazette/spiders/es/es_vitoria.py          | 87 ++++++-------------
 1 file changed, 26 insertions(+), 61 deletions(-)

diff --git a/data_collection/gazette/spiders/es/es_vitoria.py b/data_collection/gazette/spiders/es/es_vitoria.py
index 0b96f4292..386f66dce 100644
--- a/data_collection/gazette/spiders/es/es_vitoria.py
+++ b/data_collection/gazette/spiders/es/es_vitoria.py
@@ -1,5 +1,6 @@
 from datetime import date, datetime
 
+from dateutil.rrule import MONTHLY, rrule, rruleset
 from scrapy import FormRequest, Request
 
 from gazette.items import Gazette
@@ -43,83 +44,47 @@ def start_requests(self):
     def make_year_request(self, response):
         year_select = response.xpath("//select[contains(@id, 'ddlAno')]")
         year_formkey = year_select.attrib["name"]
-        years_available = map(int, year_select.xpath("./option/@value").getall())
-        chosen_year = int(
-            year_select.xpath("./option[contains(@selected, 'selected')]/@value").get()
-        )
-
-        for year in years_available:
-            if year < self.start_date.year or self.end_date.year < year:
-                continue
 
-            if year == chosen_year:
-                yield from self.parse_year(response, year)
-                continue
+        monthly_dates = rruleset()
+        monthly_dates.rrule(
+            rrule(MONTHLY, dtstart=self.start_date, until=self.end_date, bymonthday=[1])
+        )
+        monthly_dates.rdate(date(self.start_date.year, self.start_date.month, 1))
 
+        for monthly_date in monthly_dates:
             yield FormRequest.from_response(
                 response,
-                formdata={year_formkey: str(year)},
-                callback=self.parse_year,
-                cb_kwargs={"year": year},
-                # We are isolating cookiejar per name-year-month combination
+                formdata={year_formkey: str(monthly_date.year)},
+                callback=self.make_month_request,
+                # We are isolating cookiejar like (year, month) combination
                 # to avoid interference between concurrent requests
-                # Whenever we request a past year, it sets the month to December
-                meta={"cookiejar": f"{self.name}_{year}_12"},
+                meta={"cookiejar": (monthly_date.year, monthly_date.month)},
             )
 
-    def parse_year(self, response, year):
+    def make_month_request(self, response):
         year_select = response.xpath("//select[contains(@id, 'ddlAno')]")
         year_formkey = year_select.attrib["name"]
 
         month_select = response.xpath("//select[contains(@id, 'ddlMes')]")
         month_formkey = month_select.attrib["name"]
 
-        chosen_month = int(
-            month_select.xpath("./option[contains(@selected, 'selected')]/@value").get()
-        )
-
-        first_day_of_start_date_month = date(
-            self.start_date.year, self.start_date.month, 1
-        )
-
-        for month in range(1, 13):
-            first_day_of_month = date(year, month, 1)
-            if (
-                first_day_of_month < first_day_of_start_date_month
-                or self.end_date < first_day_of_month
-            ):
-                continue
+        year, month = response.meta.get("cookiejar")
 
-            current_year_month = (year, month)
+        formdata = {
+            "__EVENTTARGET": month_formkey,
+            "__EVENTARGUMENT": "",
+            year_formkey: str(year),
+            month_formkey: str(month),
+        }
 
-            if month == chosen_month:
-                yield from self.parse_editions_list(response, current_year_month)
-                continue
-
-            formdata = {
-                "__EVENTTARGET": month_formkey,
-                "__EVENTARGUMENT": "",
-                year_formkey: str(year),
-                month_formkey: str(month),
-            }
-            yield FormRequest.from_response(
-                response,
-                formdata=formdata,
-                callback=self.parse_editions_list,
-                cb_kwargs={
-                    "current_year_month": current_year_month,
-                },
-                # We are isolating cookiejar per name-year-month combination
-                # to avoid interference between concurrent requests
-                meta={"cookiejar": f"{self.name}_{year}_{month}"},
-            )
+        yield FormRequest.from_response(
+            response,
+            formdata=formdata,
+            callback=self.parse_editions_list,
+            meta={"cookiejar": response.meta.get("cookiejar")},
+        )
 
-    def parse_editions_list(
-        self,
-        response,
-        current_year_month,  # (year, month)
-        current_page=1,
-    ):
+    def parse_editions_list(self, response, current_page=1):
         year_select = response.xpath("//select[contains(@id, 'ddlAno')]")
         year_formkey = year_select.attrib["name"]
 

From ed14bd665714eddd09ea3b26423892f7aa30f08b Mon Sep 17 00:00:00 2001
From: trevineju <julianabtrevine@gmail.com>
Date: Mon, 13 Jan 2025 04:06:34 -0300
Subject: [PATCH 05/11] =?UTF-8?q?Atualiza=20c=C3=B3digo=20para=20coletar?=
 =?UTF-8?q?=20par=C3=A2metros=20de=20formul=C3=A1rio=20apenas=20uma=20vez?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../gazette/spiders/es/es_vitoria.py          | 43 +++++++++----------
 1 file changed, 21 insertions(+), 22 deletions(-)

diff --git a/data_collection/gazette/spiders/es/es_vitoria.py b/data_collection/gazette/spiders/es/es_vitoria.py
index 386f66dce..278b95ce7 100644
--- a/data_collection/gazette/spiders/es/es_vitoria.py
+++ b/data_collection/gazette/spiders/es/es_vitoria.py
@@ -26,7 +26,8 @@ class EsVitoriaSpider(BaseGazetteSpider):
         "RETRY_HTTP_CODES": [500, 502, 503, 504, 522, 524, 408, 429, 406],
     }
 
-    data_by_monthly_date_by_date = None
+    FORM_PARAM_YEAR = None
+    FORM_PARAM_MONTH = None
 
     def start_requests(self):
         self.data_by_monthly_date_by_date = {}
@@ -41,9 +42,8 @@ def start_requests(self):
             meta={"cookiejar": f"{self.name}_{year}_{month}"},  # é necessário?
         )
 
-    def make_year_request(self, response):
-        year_select = response.xpath("//select[contains(@id, 'ddlAno')]")
-        year_formkey = year_select.attrib["name"]
+    def make_year_request(self, response):   
+        self.set_form_params(response)     
 
         monthly_dates = rruleset()
         monthly_dates.rrule(
@@ -52,29 +52,36 @@ def make_year_request(self, response):
         monthly_dates.rdate(date(self.start_date.year, self.start_date.month, 1))
 
         for monthly_date in monthly_dates:
+            
+            formdata={
+                self.FORM_PARAM_YEAR: str(monthly_date.year)
+            }
+
             yield FormRequest.from_response(
                 response,
-                formdata={year_formkey: str(monthly_date.year)},
+                formdata=formdata,
                 callback=self.make_month_request,
                 # We are isolating cookiejar like (year, month) combination
                 # to avoid interference between concurrent requests
                 meta={"cookiejar": (monthly_date.year, monthly_date.month)},
             )
 
-    def make_month_request(self, response):
+    def set_form_params(self, response):
         year_select = response.xpath("//select[contains(@id, 'ddlAno')]")
-        year_formkey = year_select.attrib["name"]
+        self.FORM_PARAM_YEAR = year_select.attrib["name"]
 
         month_select = response.xpath("//select[contains(@id, 'ddlMes')]")
-        month_formkey = month_select.attrib["name"]
+        self.FORM_PARAM_MONTH = month_select.attrib["name"]
+
 
+    def make_month_request(self, response):       
         year, month = response.meta.get("cookiejar")
 
         formdata = {
-            "__EVENTTARGET": month_formkey,
+            "__EVENTTARGET": self.FORM_PARAM_MONTH,
             "__EVENTARGUMENT": "",
-            year_formkey: str(year),
-            month_formkey: str(month),
+            self.FORM_PARAM_YEAR: str(year),
+            self.FORM_PARAM_MONTH: str(month),
         }
 
         yield FormRequest.from_response(
@@ -85,14 +92,6 @@ def make_month_request(self, response):
         )
 
     def parse_editions_list(self, response, current_page=1):
-        year_select = response.xpath("//select[contains(@id, 'ddlAno')]")
-        year_formkey = year_select.attrib["name"]
-
-        month_select = response.xpath("//select[contains(@id, 'ddlMes')]")
-        month_formkey = month_select.attrib["name"]
-
-        year, month = current_year_month
-
         for row in response.xpath("//tbody//td/a[1]"):
             raw_string = row.css("span::text")[0].get()
             date_string_from_text = raw_string.split()[-1]
@@ -133,10 +132,10 @@ def parse_editions_list(self, response, current_page=1):
 
         if current_page < number_of_pages:
             formdata = {
-                "__EVENTARGUMENT": f"Page${current_page + 1}",
+                "__EVENTARGUMENT": f"Page${next_page}",
                 "__EVENTTARGET": "ctl00$conteudo$ucPesquisarDiarioOficial$grdArquivos",
-                year_formkey: str(year),
-                month_formkey: str(month),
+                self.FORM_PARAM_YEAR: str(year),
+                self.FORM_PARAM_MONTH: str(month),
             }
 
             yield FormRequest.from_response(

From 17200a5c5b3332f07d2f3cdf77f9fcb50060ca4b Mon Sep 17 00:00:00 2001
From: trevineju <julianabtrevine@gmail.com>
Date: Mon, 13 Jan 2025 04:09:18 -0300
Subject: [PATCH 06/11] =?UTF-8?q?Atualiza=20l=C3=B3gica=20de=20pagina?=
 =?UTF-8?q?=C3=A7=C3=A3o?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../gazette/spiders/es/es_vitoria.py          | 81 +++++--------------
 1 file changed, 20 insertions(+), 61 deletions(-)

diff --git a/data_collection/gazette/spiders/es/es_vitoria.py b/data_collection/gazette/spiders/es/es_vitoria.py
index 278b95ce7..566e50be0 100644
--- a/data_collection/gazette/spiders/es/es_vitoria.py
+++ b/data_collection/gazette/spiders/es/es_vitoria.py
@@ -93,44 +93,25 @@ def make_month_request(self, response):
 
     def parse_editions_list(self, response, current_page=1):
         for row in response.xpath("//tbody//td/a[1]"):
-            raw_string = row.css("span::text")[0].get()
-            date_string_from_text = raw_string.split()[-1]
-            gazette_date = self._parse_date(date_string_from_text)
+            raw_date = row.css("span::text")[0].get().split()[-1]
+            gazette_date = datetime.strptime(raw_date, "%d/%m/%Y").date()
 
-            if not gazette_date:
-                self.logger.warning(
-                    f"No valid date could be extracted from '{raw_string}'"
-                )
-                continue
-
-            if gazette_date > self.end_date:
-                continue
-            elif gazette_date < self.start_date:
-                return
-
-            if gazette_date.timetuple()[:2] != current_year_month:
-                self.logger.warning(
-                    f"Found {gazette_date.isoformat()} gazette while querying"
-                    f" for {current_year_month[0]}-{current_year_month[1]:02}"
-                    f" period. Skipping..."
+            if self.start_date <= gazette_date <= self.end_date:
+                url = response.urljoin(row.css("a").attrib["href"])
+                
+                yield Gazette(
+                    date=gazette_date,
+                    edition_number="",
+                    is_extra_edition=False,
+                    file_urls=[url],
+                    power="executive",
                 )
-                continue
-
-            url = response.urljoin(row.attrib["href"])
-
-            file_urls = self.data_by_monthly_date_by_date.setdefault(
-                current_year_month, {}
-            ).setdefault(gazette_date, [])
-
-            if url not in file_urls:
-                # We use this strategy to avoid duplicates while maintaining row order
-                file_urls.append(url)
-
-        number_of_pages = len(
-            response.xpath("//ul[contains(@class, 'pagination')]/li").getall()
-        )
-
-        if current_page < number_of_pages:
+        
+        has_next_page = response.css(".pagination li")[-1].css("a::text").get() is not None        
+        if has_next_page:
+            next_page = current_page + 1
+            year, month = response.meta.get("cookiejar")
+            
             formdata = {
                 "__EVENTARGUMENT": f"Page${next_page}",
                 "__EVENTTARGET": "ctl00$conteudo$ucPesquisarDiarioOficial$grdArquivos",
@@ -142,28 +123,6 @@ def parse_editions_list(self, response, current_page=1):
                 response,
                 formdata=formdata,
                 callback=self.parse_editions_list,
-                cb_kwargs={
-                    "current_year_month": current_year_month,
-                    "current_page": current_page + 1,
-                },
-                # We keep using the same cookiejar for the name_year_month combination
-                # because, if we don't, it can interfere with the paging data for
-                # a different name_year_month combination
-                meta={"cookiejar": f"{self.name}_{year}_{month}"},
-            )
-        else:
-            # After all the entries of the queried year-month period were collected,
-            # we finally yield the Gazette per date within that month
-            current_year_month_data = self.data_by_monthly_date_by_date.get(
-                current_year_month, {}
-            )
-            for gazette_date, file_urls in current_year_month_data.items():
-                yield Gazette(
-                    date=gazette_date,
-                    is_extra_edition=False,
-                    file_urls=file_urls,
-                    power="executive",
-                )
-
-    def _parse_date(self, raw_date):
-        return datetime.strptime(raw_date, "%d/%m/%Y").date()
+                cb_kwargs={"current_page": next_page},
+                meta={"cookiejar": response.meta.get("cookiejar")},
+            )
\ No newline at end of file

From 88a25c11ff981929fa62b51f7bd3347dc88207e6 Mon Sep 17 00:00:00 2001
From: trevineju <julianabtrevine@gmail.com>
Date: Mon, 13 Jan 2025 04:34:05 -0300
Subject: [PATCH 07/11] =?UTF-8?q?Aplica=20modifica=C3=A7=C3=B5es=20do=20li?=
 =?UTF-8?q?nter?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../gazette/spiders/es/es_vitoria.py          | 41 ++++++++-----------
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/data_collection/gazette/spiders/es/es_vitoria.py b/data_collection/gazette/spiders/es/es_vitoria.py
index 566e50be0..ee5e70620 100644
--- a/data_collection/gazette/spiders/es/es_vitoria.py
+++ b/data_collection/gazette/spiders/es/es_vitoria.py
@@ -9,10 +9,9 @@
 
 class EsVitoriaSpider(BaseGazetteSpider):
     name = "es_vitoria"
-    TERRITORY_ID = "3205309"
-    start_date = date(2014, 7, 21)
-
+    TERRITORY_ID = "3205309"    
     allowed_domains = ["diariooficial.vitoria.es.gov.br"]
+    start_date = date(2014, 7, 21)
 
     # When there are too many requests, the server may return
     # an HTTP 406 status code when trying to download a PDF file
@@ -30,19 +29,12 @@ class EsVitoriaSpider(BaseGazetteSpider):
     FORM_PARAM_MONTH = None
 
     def start_requests(self):
-        self.data_by_monthly_date_by_date = {}
-
-        today = date.today()
-        year = today.year
-        month = today.month
-
         yield Request(
             "https://diariooficial.vitoria.es.gov.br/",
             callback=self.make_year_request,
-            meta={"cookiejar": f"{self.name}_{year}_{month}"},  # é necessário?
         )
 
-    def make_year_request(self, response):   
+    def make_year_request(self, response):
         self.set_form_params(response)     
 
         monthly_dates = rruleset()
@@ -52,10 +44,7 @@ def make_year_request(self, response):
         monthly_dates.rdate(date(self.start_date.year, self.start_date.month, 1))
 
         for monthly_date in monthly_dates:
-            
-            formdata={
-                self.FORM_PARAM_YEAR: str(monthly_date.year)
-            }
+            formdata = {self.FORM_PARAM_YEAR: str(monthly_date.year)}
 
             yield FormRequest.from_response(
                 response,
@@ -74,14 +63,14 @@ def set_form_params(self, response):
         self.FORM_PARAM_MONTH = month_select.attrib["name"]
 
 
-    def make_month_request(self, response):       
+    def make_month_request(self, response):
         year, month = response.meta.get("cookiejar")
 
         formdata = {
-            "__EVENTTARGET": self.FORM_PARAM_MONTH,
-            "__EVENTARGUMENT": "",
             self.FORM_PARAM_YEAR: str(year),
             self.FORM_PARAM_MONTH: str(month),
+            "__EVENTTARGET": self.FORM_PARAM_MONTH,
+            "__EVENTARGUMENT": "",
         }
 
         yield FormRequest.from_response(
@@ -98,7 +87,7 @@ def parse_editions_list(self, response, current_page=1):
 
             if self.start_date <= gazette_date <= self.end_date:
                 url = response.urljoin(row.css("a").attrib["href"])
-                
+
                 yield Gazette(
                     date=gazette_date,
                     edition_number="",
@@ -106,17 +95,19 @@ def parse_editions_list(self, response, current_page=1):
                     file_urls=[url],
                     power="executive",
                 )
-        
-        has_next_page = response.css(".pagination li")[-1].css("a::text").get() is not None        
+
+        has_next_page = (
+            response.css(".pagination li")[-1].css("a::text").get() is not None
+        )
         if has_next_page:
             next_page = current_page + 1
             year, month = response.meta.get("cookiejar")
-            
+
             formdata = {
-                "__EVENTARGUMENT": f"Page${next_page}",
-                "__EVENTTARGET": "ctl00$conteudo$ucPesquisarDiarioOficial$grdArquivos",
                 self.FORM_PARAM_YEAR: str(year),
                 self.FORM_PARAM_MONTH: str(month),
+                "__EVENTTARGET": self.FORM_PARAM_PAGINATION,
+                "__EVENTARGUMENT": f"Page${next_page}",
             }
 
             yield FormRequest.from_response(
@@ -125,4 +116,4 @@ def parse_editions_list(self, response, current_page=1):
                 callback=self.parse_editions_list,
                 cb_kwargs={"current_page": next_page},
                 meta={"cookiejar": response.meta.get("cookiejar")},
-            )
\ No newline at end of file
+            )

From 034395a9fef444982f4cc0f2d4192550b5bcc8c6 Mon Sep 17 00:00:00 2001
From: trevineju <julianabtrevine@gmail.com>
Date: Mon, 13 Jan 2025 04:35:11 -0300
Subject: [PATCH 08/11] =?UTF-8?q?Torna=20par=C3=A2metros=20do=20formul?=
 =?UTF-8?q?=C3=A1rio=20hardcoded?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../gazette/spiders/es/es_vitoria.py            | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/data_collection/gazette/spiders/es/es_vitoria.py b/data_collection/gazette/spiders/es/es_vitoria.py
index ee5e70620..e83111be8 100644
--- a/data_collection/gazette/spiders/es/es_vitoria.py
+++ b/data_collection/gazette/spiders/es/es_vitoria.py
@@ -9,7 +9,7 @@
 
 class EsVitoriaSpider(BaseGazetteSpider):
     name = "es_vitoria"
-    TERRITORY_ID = "3205309"    
+    TERRITORY_ID = "3205309"
     allowed_domains = ["diariooficial.vitoria.es.gov.br"]
     start_date = date(2014, 7, 21)
 
@@ -25,8 +25,9 @@ class EsVitoriaSpider(BaseGazetteSpider):
         "RETRY_HTTP_CODES": [500, 502, 503, 504, 522, 524, 408, 429, 406],
     }
 
-    FORM_PARAM_YEAR = None
-    FORM_PARAM_MONTH = None
+    FORM_PARAM_YEAR = "ctl00$conteudo$ucPesquisarDiarioOficial$ddlAno"
+    FORM_PARAM_MONTH = "ctl00$conteudo$ucPesquisarDiarioOficial$ddlMes"
+    FORM_PARAM_PAGINATION = "ctl00$conteudo$ucPesquisarDiarioOficial$grdArquivos"
 
     def start_requests(self):
         yield Request(
@@ -35,8 +36,6 @@ def start_requests(self):
         )
 
     def make_year_request(self, response):
-        self.set_form_params(response)     
-
         monthly_dates = rruleset()
         monthly_dates.rrule(
             rrule(MONTHLY, dtstart=self.start_date, until=self.end_date, bymonthday=[1])
@@ -55,14 +54,6 @@ def make_year_request(self, response):
                 meta={"cookiejar": (monthly_date.year, monthly_date.month)},
             )
 
-    def set_form_params(self, response):
-        year_select = response.xpath("//select[contains(@id, 'ddlAno')]")
-        self.FORM_PARAM_YEAR = year_select.attrib["name"]
-
-        month_select = response.xpath("//select[contains(@id, 'ddlMes')]")
-        self.FORM_PARAM_MONTH = month_select.attrib["name"]
-
-
     def make_month_request(self, response):
         year, month = response.meta.get("cookiejar")
 

From 78faf2043e0b226d1f00dc6b98be902868b54510 Mon Sep 17 00:00:00 2001
From: trevineju <julianabtrevine@gmail.com>
Date: Mon, 13 Jan 2025 05:37:42 -0300
Subject: [PATCH 09/11] =?UTF-8?q?Ajusta=20custom=5Fsettings,=20pagina?=
 =?UTF-8?q?=C3=A7=C3=A3o=20e=20convers=C3=A3o=20de=20data?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../gazette/spiders/es/es_vitoria.py          | 56 ++++++++-----------
 1 file changed, 24 insertions(+), 32 deletions(-)

diff --git a/data_collection/gazette/spiders/es/es_vitoria.py b/data_collection/gazette/spiders/es/es_vitoria.py
index e83111be8..08e9ba12c 100644
--- a/data_collection/gazette/spiders/es/es_vitoria.py
+++ b/data_collection/gazette/spiders/es/es_vitoria.py
@@ -1,4 +1,4 @@
-from datetime import date, datetime
+from datetime import date, datetime as dt
 
 from dateutil.rrule import MONTHLY, rrule, rruleset
 from scrapy import FormRequest, Request
@@ -13,14 +13,8 @@ class EsVitoriaSpider(BaseGazetteSpider):
     allowed_domains = ["diariooficial.vitoria.es.gov.br"]
     start_date = date(2014, 7, 21)
 
-    # When there are too many requests, the server may return
-    # an HTTP 406 status code when trying to download a PDF file
-    #
-    # We set `custom_settings` to avoid triggering the 406 HTTP status code
-    # by spreading the downloads for this spider over time
-
     custom_settings = {
-        "DOWNLOAD_DELAY": 0.3,  # 300 ms
+        "DOWNLOAD_DELAY": 0.3,
         "RANDOMIZE_DOWNLOAD_DELAY": True,
         "RETRY_HTTP_CODES": [500, 502, 503, 504, 522, 524, 408, 429, 406],
     }
@@ -40,7 +34,7 @@ def make_year_request(self, response):
         monthly_dates.rrule(
             rrule(MONTHLY, dtstart=self.start_date, until=self.end_date, bymonthday=[1])
         )
-        monthly_dates.rdate(date(self.start_date.year, self.start_date.month, 1))
+        monthly_dates.rdate(dt(self.start_date.year, self.start_date.month, 1))
 
         for monthly_date in monthly_dates:
             formdata = {self.FORM_PARAM_YEAR: str(monthly_date.year)}
@@ -49,7 +43,7 @@ def make_year_request(self, response):
                 response,
                 formdata=formdata,
                 callback=self.make_month_request,
-                # We are isolating cookiejar like (year, month) combination
+                # We are isolating cookiejar in (year, month) combination
                 # to avoid interference between concurrent requests
                 meta={"cookiejar": (monthly_date.year, monthly_date.month)},
             )
@@ -74,7 +68,7 @@ def make_month_request(self, response):
     def parse_editions_list(self, response, current_page=1):
         for row in response.xpath("//tbody//td/a[1]"):
             raw_date = row.css("span::text")[0].get().split()[-1]
-            gazette_date = datetime.strptime(raw_date, "%d/%m/%Y").date()
+            gazette_date = dt.strptime(raw_date, "%d/%m/%Y").date()
 
             if self.start_date <= gazette_date <= self.end_date:
                 url = response.urljoin(row.css("a").attrib["href"])
@@ -87,24 +81,22 @@ def parse_editions_list(self, response, current_page=1):
                     power="executive",
                 )
 
-        has_next_page = (
-            response.css(".pagination li")[-1].css("a::text").get() is not None
-        )
-        if has_next_page:
-            next_page = current_page + 1
-            year, month = response.meta.get("cookiejar")
-
-            formdata = {
-                self.FORM_PARAM_YEAR: str(year),
-                self.FORM_PARAM_MONTH: str(month),
-                "__EVENTTARGET": self.FORM_PARAM_PAGINATION,
-                "__EVENTARGUMENT": f"Page${next_page}",
-            }
-
-            yield FormRequest.from_response(
-                response,
-                formdata=formdata,
-                callback=self.parse_editions_list,
-                cb_kwargs={"current_page": next_page},
-                meta={"cookiejar": response.meta.get("cookiejar")},
-            )
+        if "pagination" in response.text:
+            if response.css(".pagination li")[-1].css("a::text").get():
+                next_page = current_page + 1
+                year, month = response.meta.get("cookiejar")
+
+                formdata = {
+                    self.FORM_PARAM_YEAR: str(year),
+                    self.FORM_PARAM_MONTH: str(month),
+                    "__EVENTTARGET": self.FORM_PARAM_PAGINATION,
+                    "__EVENTARGUMENT": f"Page${next_page}",
+                }
+
+                yield FormRequest.from_response(
+                    response,
+                    formdata=formdata,
+                    callback=self.parse_editions_list,
+                    cb_kwargs={"current_page": next_page},
+                    meta={"cookiejar": response.meta.get("cookiejar")},
+                )

From a14da77267c996ec284c530268d665a9fda61b87 Mon Sep 17 00:00:00 2001
From: trevineju <julianabtrevine@gmail.com>
Date: Mon, 13 Jan 2025 09:13:03 -0300
Subject: [PATCH 10/11] =?UTF-8?q?Atualiza=20l=C3=B3gica=20de=20sele=C3=A7?=
 =?UTF-8?q?=C3=A3o=20de=20datas=20para=20serem=20requisitadas?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../gazette/spiders/es/es_vitoria.py          | 57 +++++++++++--------
 1 file changed, 32 insertions(+), 25 deletions(-)

diff --git a/data_collection/gazette/spiders/es/es_vitoria.py b/data_collection/gazette/spiders/es/es_vitoria.py
index 08e9ba12c..86567e9a8 100644
--- a/data_collection/gazette/spiders/es/es_vitoria.py
+++ b/data_collection/gazette/spiders/es/es_vitoria.py
@@ -1,6 +1,6 @@
 from datetime import date, datetime as dt
 
-from dateutil.rrule import MONTHLY, rrule, rruleset
+from dateutil.rrule import MONTHLY, YEARLY, rrule, rruleset
 from scrapy import FormRequest, Request
 
 from gazette.items import Gazette
@@ -30,14 +30,12 @@ def start_requests(self):
         )
 
     def make_year_request(self, response):
-        monthly_dates = rruleset()
-        monthly_dates.rrule(
-            rrule(MONTHLY, dtstart=self.start_date, until=self.end_date, bymonthday=[1])
-        )
-        monthly_dates.rdate(dt(self.start_date.year, self.start_date.month, 1))
-
-        for monthly_date in monthly_dates:
-            formdata = {self.FORM_PARAM_YEAR: str(monthly_date.year)}
+        for yearly_date in self._dates_of_interest(YEARLY):
+            formdata = {
+                self.FORM_PARAM_YEAR: str(yearly_date.year),
+                "__EVENTTARGET": self.FORM_PARAM_YEAR,
+                "__EVENTARGUMENT": "",
+            }
 
             yield FormRequest.from_response(
                 response,
@@ -45,25 +43,28 @@ def make_year_request(self, response):
                 callback=self.make_month_request,
                 # We are isolating cookiejar in (year, month) combination
                 # to avoid interference between concurrent requests
-                meta={"cookiejar": (monthly_date.year, monthly_date.month)},
+                meta={"cookiejar": (yearly_date.year)},
             )
 
     def make_month_request(self, response):
-        year, month = response.meta.get("cookiejar")
-
-        formdata = {
-            self.FORM_PARAM_YEAR: str(year),
-            self.FORM_PARAM_MONTH: str(month),
-            "__EVENTTARGET": self.FORM_PARAM_MONTH,
-            "__EVENTARGUMENT": "",
-        }
-
-        yield FormRequest.from_response(
-            response,
-            formdata=formdata,
-            callback=self.parse_editions_list,
-            meta={"cookiejar": response.meta.get("cookiejar")},
-        )
+        year = response.meta.get("cookiejar")
+
+        for monthly_date in self._dates_of_interest(MONTHLY):
+            if dt(year, 1, 1) <= monthly_date <= dt(year, 12, 31):
+
+                formdata = {
+                    self.FORM_PARAM_YEAR: str(monthly_date.year),
+                    self.FORM_PARAM_MONTH: str(monthly_date.month),
+                    "__EVENTTARGET": self.FORM_PARAM_MONTH,
+                    "__EVENTARGUMENT": "",
+                }
+
+                yield FormRequest.from_response(
+                    response,
+                    formdata=formdata,
+                    callback=self.parse_editions_list,
+                    meta={"cookiejar": (monthly_date.year, monthly_date.month)},
+                )
 
     def parse_editions_list(self, response, current_page=1):
         for row in response.xpath("//tbody//td/a[1]"):
@@ -100,3 +101,9 @@ def parse_editions_list(self, response, current_page=1):
                     cb_kwargs={"current_page": next_page},
                     meta={"cookiejar": response.meta.get("cookiejar")},
                 )
+
+    def _dates_of_interest(self, recurrence):
+        dates = rruleset()
+        dates.rrule(rrule(recurrence, dtstart=self.start_date, until=self.end_date, bymonthday=[1]))
+        dates.rdate(dt(self.start_date.year, self.start_date.month, 1))
+        return dates
\ No newline at end of file

From e75155f1c0d4d97a7d2721b353d92d96149711a3 Mon Sep 17 00:00:00 2001
From: trevineju <julianabtrevine@gmail.com>
Date: Mon, 13 Jan 2025 09:54:38 -0300
Subject: [PATCH 11/11] Simplifica custom_settings e campos de formrequest
 sendo solicitados

---
 data_collection/gazette/spiders/es/es_vitoria.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/data_collection/gazette/spiders/es/es_vitoria.py b/data_collection/gazette/spiders/es/es_vitoria.py
index 86567e9a8..64fd82c85 100644
--- a/data_collection/gazette/spiders/es/es_vitoria.py
+++ b/data_collection/gazette/spiders/es/es_vitoria.py
@@ -16,7 +16,6 @@ class EsVitoriaSpider(BaseGazetteSpider):
     custom_settings = {
         "DOWNLOAD_DELAY": 0.3,
         "RANDOMIZE_DOWNLOAD_DELAY": True,
-        "RETRY_HTTP_CODES": [500, 502, 503, 504, 522, 524, 408, 429, 406],
     }
 
     FORM_PARAM_YEAR = "ctl00$conteudo$ucPesquisarDiarioOficial$ddlAno"
@@ -51,12 +50,9 @@ def make_month_request(self, response):
 
         for monthly_date in self._dates_of_interest(MONTHLY):
             if dt(year, 1, 1) <= monthly_date <= dt(year, 12, 31):
-
                 formdata = {
-                    self.FORM_PARAM_YEAR: str(monthly_date.year),
                     self.FORM_PARAM_MONTH: str(monthly_date.month),
                     "__EVENTTARGET": self.FORM_PARAM_MONTH,
-                    "__EVENTARGUMENT": "",
                 }
 
                 yield FormRequest.from_response(
@@ -85,11 +81,8 @@ def parse_editions_list(self, response, current_page=1):
         if "pagination" in response.text:
             if response.css(".pagination li")[-1].css("a::text").get():
                 next_page = current_page + 1
-                year, month = response.meta.get("cookiejar")
 
                 formdata = {
-                    self.FORM_PARAM_YEAR: str(year),
-                    self.FORM_PARAM_MONTH: str(month),
                     "__EVENTTARGET": self.FORM_PARAM_PAGINATION,
                     "__EVENTARGUMENT": f"Page${next_page}",
                 }
@@ -104,6 +97,10 @@ def parse_editions_list(self, response, current_page=1):
 
     def _dates_of_interest(self, recurrence):
         dates = rruleset()
-        dates.rrule(rrule(recurrence, dtstart=self.start_date, until=self.end_date, bymonthday=[1]))
+        dates.rrule(
+            rrule(
+                recurrence, dtstart=self.start_date, until=self.end_date, bymonthday=[1]
+            )
+        )
         dates.rdate(dt(self.start_date.year, self.start_date.month, 1))
-        return dates
\ No newline at end of file
+        return dates