Skip to content

Commit

Permalink
v0.0.6
Browse files Browse the repository at this point in the history
  • Loading branch information
sylvainHellin committed Aug 22, 2024
1 parent 9db01c9 commit 68b587d
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 12 deletions.
8 changes: 6 additions & 2 deletions Alasco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@
This is an alpha version for submodules client, data_fetcher, data_transformer, document_downloader and utils.
Submodules document_uploader and data_updater are placeholders for now and will be implemented in the future.
Changelog zur Version 0.0.5:
- changed wrong implementation of document_downloader.download_change_orders.
- updated the verbose mode of the document_downloader sub-module.
- added method to download all documents from a project: document_downloader.batch_download_documents.
Author: sylvain hellin
Version: 0.0.5
Version: 0.0.6
"""

__version__ = "0.0.5"
__version__ = "0.0.6"
__author__ = "sylvain hellin"

# Import key classes or functions
Expand Down
100 changes: 92 additions & 8 deletions Alasco/document_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import pandas as pd
import re
from alasco.data_fetcher import DataFetcher
from alasco.data_transformer import DataTransformer
from datetime import date
from typing import Dict, List

class DocumentDownloader:
"""
Expand All @@ -29,6 +31,7 @@ def __init__(self, header, verbose = False, download_path: str | None = None) ->
self.verbose = verbose
self.BASE_URL = "https://api.alasco.de/v1/"
self.data_fetcher = DataFetcher(header=header, verbose=verbose)
self.data_transformer = DataTransformer(verbose=verbose)
self.today = date.today()

if download_path is None:
Expand Down Expand Up @@ -74,6 +77,10 @@ def get_contract_documents(self, contract_ids: list) -> pd.DataFrame:
>>> df_contract_documents = downloader.get_contract_documents(contract_ids)
>>> print(df_contract_documents)
"""

if self.verbose:
print(f"Getting contract documents for {len(contract_ids)} documents with ids : {contract_ids[:3]} ...")

urls = [self._prepare_url_get_contract_documents(contract_id) for contract_id in contract_ids]
collection = []
for url in urls:
Expand All @@ -99,6 +106,8 @@ def get_change_order_documents(self, change_order_ids: list) -> pd.DataFrame:
>>> print(df_change_order_documents)
"""

if self.verbose:
print(f"Getting change order documents for {len(change_order_ids)} documents with ids : {change_order_ids[:3]} ...")
urls = [self._prepare_url_get_change_order_documents(change_order_id) for change_order_id in change_order_ids]
collection = []
for url in urls:
Expand All @@ -123,6 +132,9 @@ def get_invoice_documents(self, invoice_ids: list) -> pd.DataFrame:
>>> df_invoice_documents = downloader.get_invoice_documents(invoice_ids)
>>> print(df_invoice_documents)
"""
if self.verbose:
print(f"Getting invoice documents for {len(invoice_ids)} documents with ids : {invoice_ids[:3]} ...")

urls = [self._prepare_url_get_invoice_documents(invoice_id) for invoice_id in invoice_ids]
collection = []
for url in urls:
Expand Down Expand Up @@ -260,6 +272,10 @@ def download_contracts(self, df: pd.DataFrame, document_type: str | None = "CONT
download_path = self.download_path + "/" + sub_folder
else:
download_path = self.download_path

# If verbose mode is enabled, print a message indicating the start of the downloading process
if self.verbose:
print(f"Downloading contract documents for {len(contract_names)} documents with names : {contract_names[:3]} ...")

# Download the documents using the extracted download links and generated contract names
self.download_documents(document_download_links=download_links, document_names=contract_names, download_path=download_path)
Expand Down Expand Up @@ -350,6 +366,10 @@ def download_invoices(self, df: pd.DataFrame, document_type: str | None = "INVOI
else:
download_path = self.download_path

# If verbose mode is enabled, print a message indicating the start of the downloading process
if self.verbose:
print(f"Downloading invoice documents for {len(invoice_names)} documents with names : {invoice_names[:3]} ...")

# Download the documents using the extracted download links and generated invoice names
self.download_documents(document_download_links=download_links, document_names=invoice_names, download_path=download_path)

Expand Down Expand Up @@ -392,7 +412,7 @@ def _name_change_order(self, row: pd.Series, document_type: str | None = None) -

return change_order_name

def download_change_orders(self, df: pd.DataFrame, document_type: str | None = "CHANGE-ORDER", sub_folder: str | None = "change_orders") -> None:
def download_change_orders(self, df: pd.DataFrame, document_type: str | None = "CHANGE_ORDER", sub_folder: str | None = "change_orders") -> None:
"""
Downloads change order documents based on the DataFrame, document type, and optional sub-folder.
The name of the document is generated using the _name_change_order() method.
Expand All @@ -419,9 +439,10 @@ def download_change_orders(self, df: pd.DataFrame, document_type: str | None = "
"relationships.change_order.data.id": "change_order_id",
"links.download": "download_link"
})
# Select relevant columns from the fetched contract links DataFrame
# Select relevant columns from the fetched change order links DataFrame
df_change_order_links = df_change_order_links[["change_order_document_id", "change_order_id", "download_link", "document_type", "filename"]]
# Merge the original DataFrame with the contract links DataFrame on 'change_order_id'

# Merge the original DataFrame with the change order links DataFrame on 'change_order_id'
df_merged = pd.merge(df, df_change_order_links, on="change_order_id", how="outer")

# Filter the merged DataFrame by the specified document type, if provided
Expand All @@ -431,15 +452,78 @@ def download_change_orders(self, df: pd.DataFrame, document_type: str | None = "
# Extract download links from the filtered DataFrame
download_links = df_merged["download_link"].tolist()

# Generate standardized contract names for each row in the original DataFrame
contract_names = df_merged.apply(lambda row: self._name_change_order(row=row, document_type=document_type), axis=1)
# Generate standardized change order names for each row in the original DataFrame
change_order_names = df_merged.apply(lambda row: self._name_change_order(row=row, document_type=document_type), axis=1)

if sub_folder is not None:
download_path = self.download_path + "/" + sub_folder
else:
download_path = self.download_path

# Download the documents using the extracted download links and generated contract names
self.download_documents(document_download_links=download_links, document_names=contract_names, download_path=download_path)
# If verbose mode is enabled, print a message indicating the start of the downloading process
if self.verbose:
print(f"Downloading change order documents for {len(change_order_names)} documents with names : {change_order_names[:3]} ...")

# Download the documents using the extracted download links and generated change order names
self.download_documents(document_download_links=download_links, document_names=change_order_names, download_path=download_path)

return None

def batch_download_documents(self,
dfs: Dict[str, pd.DataFrame],
property_name: str | None = None,
project_names: List[str] | None = None,
download_path: str | None = None
) -> None:
"""
Downloads all documents (contracts, change orders, invoices) for a given property or list of projects.
Args:
dfs (Dict[str, pd.DataFrame]): Dictionary of DataFrames containing the data.
property_name (str | None): The name of the property to filter projects. Defaults to None.
project_names (List[str] | None): List of project names to download documents for. Defaults to None.
download_path (str | None): The base path where documents will be downloaded. Defaults to None.
Raises:
ValueError: If neither property_name nor project_names are provided.
"""

if property_name is None and project_names is None:
raise ValueError("Please provide either a property name or a list of project names")

df_core = self.data_transformer.consolidate_core_DataFrames(dfs=dfs)
df_contracts = df_core.copy()
df_invoices = self.data_transformer.consolidate_invoices_DataFrame(df_core=df_core, df_invoices=dfs["invoices"])
df_change_orders = self.data_transformer.consolidate_change_orders_DataFrame(df_core=df_core, df_change_orders=dfs["change_orders"])

if project_names is None:
project_names = df_core[df_core["property_name"] == property_name]["project_name"].drop_duplicates().tolist()

core_path = self.download_path[:]
download_path = download_path if download_path is not None else self.download_path[:]

for project_name in project_names:

# Create subsets of the DataFrames for each project
subset_df_contracts = df_contracts[df_contracts["project_name"] == project_name].copy()
subset_df_invoices = df_invoices[df_invoices["project_name"] == project_name].copy()
subset_df_change_orders = df_change_orders[df_change_orders["project_name"] == project_name].copy()

# Set path for downloading documents for this project
project_download_path = os.path.join(download_path, project_name)
self.download_path = project_download_path

if self.verbose:
print(f"Downloading documents for project: {project_name}")

# Download documents
self.download_contracts(df=subset_df_contracts)
self.download_invoices(df=subset_df_invoices)
self.download_change_orders(df=subset_df_change_orders)

if self.verbose:
print("Batch download completed.")

self.download_path = core_path

return None
return
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Introduction

This package is SDK for `python` to facilitate interaction with the Alasco API.
This package is an SDK for `python` to facilitate interaction with the Alasco API.

The official documentation from [Alasco](https://www.alasco.com/) can be found [here](https://developer.alasco.de/).

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "alasco"
version = "0.0.5"
version = "0.0.6"
authors = [
{ name="Sylvain Hellin", email="[email protected]" },
]
Expand Down

0 comments on commit 68b587d

Please sign in to comment.