Skip to content

Commit

Permalink
feat: fix get data from receita federal
Browse files Browse the repository at this point in the history
  • Loading branch information
Robso-creator committed Oct 21, 2024
1 parent aa28a42 commit 9d4b6c5
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 40 deletions.
58 changes: 29 additions & 29 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ all:
@echo ""
@echo "########################################################################################################################"
@echo "[SESSION] Launch"
@echo "make up ......................................... docker-compose up -d"
@echo "make stop ....................................... docker-compose stop"
@echo "make down ....................................... docker-compose down"
@echo "make up ......................................... docker compose up -d"
@echo "make stop ....................................... docker compose stop"
@echo "make down ....................................... docker compose down"
@echo "make app ........................................ run container app"
@echo "make rm ......................................... remove all exited containers and all dangling volumes"
@echo ""
Expand Down Expand Up @@ -47,25 +47,25 @@ build-img:

up:
@echo "---------------------------------------"
@echo "docker-compose up -d"
@docker-compose up -d
@echo "docker compose up -d"
@docker compose up -d
@echo ""

stop:
@echo "---------------------------------------"
@echo "docker-compose stop"
@docker-compose stop
@echo "docker compose stop"
@docker compose stop
@echo ""

down:
@echo "---------------------------------------"
@echo "docker-compose down"
@docker-compose down
@echo "docker compose down"
@docker compose down
@echo ""

app: up
@echo "compose-up run app container"
@docker-compose run --rm app
@docker compose run --rm app
@echo ""

rm: down
Expand All @@ -82,97 +82,97 @@ rm: down

db-create: up
@echo "PHOENIX"
@docker-compose run app python -c "from src.db_models.utils import create_db; create_db()"
@docker compose run app python -c "from src.db_models.utils import create_db; create_db()"
@echo ""

db-create-tables: up
@echo "Creating tables"
@docker-compose run app python -c "from src.db_models.utils import create_or_drop_all_tables; create_or_drop_all_tables(cmd='create')"
@docker compose run app python -c "from src.db_models.utils import create_or_drop_all_tables; create_or_drop_all_tables(cmd='create')"
@echo ""

db-setup: up
@echo "SETUP"
@echo "sleeping 40 seconds in order to postgres start-up"
@echo "Creating db"
@docker-compose run app python -c "from src.db_models.utils import create_db, create_or_drop_all_tables; create_db();create_or_drop_all_tables(cmd='create')"
@docker compose run app python -c "from src.db_models.utils import create_db, create_or_drop_all_tables; create_db();create_or_drop_all_tables(cmd='create')"
@echo ""

db-phoenix: up
@echo "PHOENIX"
@docker-compose run app python -c "from src.db_models.utils import phoenix; phoenix()"
@docker compose run app python -c "from src.db_models.utils import phoenix; phoenix()"
@echo ""

db-enter: up
@docker exec -i postgres psql -U postgres

tests: up
@echo "compose-up run app & [PYTEST]"
@docker-compose run app python -m pytest
@docker compose run app python -m pytest
@echo ""

io-download: up
@echo "compose-up run app container & [DOWNLOAD]"
@docker-compose run app python src/io/download.py
@docker compose run app python src/io/download.py
@echo ""

io-unzip: up
@echo "compose-up run app container & [UNZIP]"
@docker-compose run app python src/io/unzip.py
@docker compose run app python src/io/unzip.py
@echo ""
@echo "[CREATE JSONS]"
@docker-compose run app python src/io/create_jsons_from_csv.py
@docker compose run app python src/io/create_jsons_from_csv.py

io-create-jsons: up
@echo "[CREATE JSONS]"
@docker-compose run app python src/io/create_jsons_from_csv.py
@docker compose run app python src/io/create_jsons_from_csv.py


io-download-and-unzip: up
@echo "compose-up run app container & [DOWNLOAD]"
@docker-compose run app python src/io/download.py
@docker compose run app python src/io/download.py
@echo ""
@echo "------------------------"
@echo "sleep for 30 seconds to take a breath"
@sleep 30
@echo ""
@echo "------------------------"
@echo "[UNZIP]"
@docker-compose run app python src/io/unzip.py
@docker compose run app python src/io/unzip.py
@echo ""
@echo "[CREATE JSONS]"
@docker-compose run app python src/io/create_jsons_from_csv.py
@docker compose run app python src/io/create_jsons_from_csv.py

engine-company: up
@echo "compose-up run app container & [ENGINE COMPANY]"
@docker-compose run app python src/engine/company.py
@docker compose run app python src/engine/company.py
@echo ""

engine-company-tax-regime: up
@echo "compose-up run app container & [ENGINE COMPANY TAX REGIME]"
@docker-compose run app python src/engine/company_tax_regime.py
@docker compose run app python src/engine/company_tax_regime.py
@echo ""

engine-company-root: up
@echo "compose-up run app container & [ENGINE COMPANY ROOT]"
@docker-compose run app python src/engine/company_root.py
@docker compose run app python src/engine/company_root.py
@echo ""

engine-company-root-simples: up
@echo "compose-up run app container & [ENGINE COMPANY ROOT SIMPLES]"
@docker-compose run app python src/engine/company_root_simples.py
@docker compose run app python src/engine/company_root_simples.py
@echo ""

engine-partners: up
@echo "compose-up run app container & [ENGINE PARTNERS]"
@docker-compose run app python src/engine/partners.py
@docker compose run app python src/engine/partners.py
@echo ""

engine-ref-date: up
@echo "compose-up run app container & [ENGINE REF DATE]"
@docker-compose run app python src/engine/ref_date.py
@docker compose run app python src/engine/ref_date.py
@echo ""

engine-main: up
@echo "compose-up run app container & engine main"
@docker-compose run app python src/engine/main.py
@docker compose run app python src/engine/main.py
@echo ""
6 changes: 4 additions & 2 deletions src/io/get_files_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ def main():
ref_date = get_last_ref_date()

# get page content
page = requests.get(CORE_URL_FILES, headers=HEADERS)
_folder_open_date = 'dados_abertos_cnpj'
CORE_URL = f'{CORE_URL_FILES}/{_folder_open_date}/{ref_date}'
page = requests.get(CORE_URL, headers=HEADERS)

# BeautifulSoup object
soup = BeautifulSoup(page.text, 'html.parser')
Expand Down Expand Up @@ -52,7 +54,7 @@ def main():

dict_core = {file_name: {'last_modified': last_modified,
'file_size_bytes': file_size_bytes,
'link_to_download': f"{CORE_URL_FILES}/{file_name}",
'link_to_download': f"{CORE_URL}/{file_name}",
'path_save_file': os.path.join(SRC_PATH, DATA_FOLDER, ref_date, file_name)}
}
if 'Socios' in file_name:
Expand Down
20 changes: 11 additions & 9 deletions src/io/get_last_ref_date.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ def main():
:return: dict with urls from files as well as last modified date and size in bytes
"""
# get page content
page = requests.get(CORE_URL_FILES, headers=HEADERS)
_folder_open_date = 'dados_abertos_cnpj'
page = requests.get(f'{CORE_URL_FILES}/{_folder_open_date}', headers=HEADERS)

# BeautifulSoup object
soup = BeautifulSoup(page.text, 'html.parser')
Expand All @@ -26,16 +27,17 @@ def main():
print('creating dict files url')
for row in rows:
if row.find_all('td'):
if row.find_all('td')[1].find('a')['href'].endswith('.zip'):
if row.find_all('td')[1].find('a')['href']:
# get last modified time and parse to date (ex: '2021-07-19')
list_last_modified_at.append(
datetime.strptime(row.find_all('td')[2].text.strip(), '%Y-%m-%d %H:%M').strftime(
'%Y-%m-%d'))

try:
list_last_modified_at.append(row.find_all('td')[1].find('a')['href'].replace('/', ''))
except ValueError as e:
print('not a date: ', e)
# get the most common on 'last_modified' from source
ref_date, occurences = Counter(list_last_modified_at).most_common(1)[0]
print(
f"ref date will be: '{ref_date}' with {occurences} out of {len(list_last_modified_at)} ({occurences / len(list_last_modified_at):.1%}) ")
list_last_modified_at.remove('CNPJ')
ref_date = max(list_last_modified_at)
print('last updated date is ', ref_date)

return ref_date


Expand Down

0 comments on commit 9d4b6c5

Please sign in to comment.