diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 4df17785..01d3d3ae 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -32,6 +32,10 @@ jobs:
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+ - name: Run Pytest on the tests in tests/
+ run:
+ pytest tests/ -p no:warnings -vv
- name: Run pytest on tableau-utilities
run: |
cd tableau_utilities && pytest -v
+
diff --git a/.gitignore b/.gitignore
index bcbe97d3..e6ca5069 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@ sheets.googleapis.com-python.json
.idea
.DS_Store
tmp_tdsx_and_config/
+development_test_files/
# Byte-compiled / optimized / DLL files
__pycache__/
diff --git a/README.md b/README.md
index c3d536e9..bc7e9f43 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,7 @@ This extra package depends on the tableauhyperapi which is incompatible with App
#### Locally using pip
- `cd tableau-utilities`
- `pip install ./`
+- `pip install --upgrade ./` to overwrite the existing installation without having to uninstall first
#### Confirm installation
- `which tableau_utilities`
diff --git a/airflow_example/dags/tableau_datasource_update/tableau_datasource_update.py b/airflow_example/dags/tableau_datasource_update/tableau_datasource_update.py
index 7d38f402..d740ae15 100644
--- a/airflow_example/dags/tableau_datasource_update/tableau_datasource_update.py
+++ b/airflow_example/dags/tableau_datasource_update/tableau_datasource_update.py
@@ -314,54 +314,54 @@ def __compare_folders(self, datasource_id, tds_folders, cfg_folders):
def execute(self, context):
""" Update Tableau datasource according to config. """
- github_conn = BaseHook.get_connection(self.github_conn_id)
- config = cfg.Config(
- githup_token=github_conn.password,
- repo_name=github_conn.extra_dejson.get('repo_name'),
- repo_branch=github_conn.extra_dejson.get('repo_branch'),
- subfolder=github_conn.extra_dejson.get('subfolder')
- )
-
- ts = get_tableau_server(self.tableau_conn_id)
- expected_conn_attrs = self.__set_connection_attributes()
-
- # Get the ID for each datasource in the config
- for ds in ts.get.datasources():
- if ds not in config.datasources:
- continue
- config.datasources[ds].id = ds.id
-
- for datasource in config.datasources:
- if not datasource.id:
- logging.error('!! Datasource not found in Tableau Online: %s / %s',
- datasource.project_name, datasource.name)
- continue
- dsid = datasource.id
+ # github_conn = BaseHook.get_connection(self.github_conn_id)
+ # config = cfg.Config(
+ # githup_token=github_conn.password,
+ # repo_name=github_conn.extra_dejson.get('repo_name'),
+ # repo_branch=github_conn.extra_dejson.get('repo_branch'),
+ # subfolder=github_conn.extra_dejson.get('subfolder')
+ # )
+ #
+ # ts = get_tableau_server(self.tableau_conn_id)
+ # expected_conn_attrs = self.__set_connection_attributes()
+ #
+ # # Get the ID for each datasource in the config
+ # for ds in ts.get.datasources():
+ # if ds not in config.datasources:
+ # continue
+ # config.datasources[ds].id = ds.id
+ #
+ # # for datasource in config.datasources:
+ # if not datasource.id:
+ # logging.error('!! Datasource not found in Tableau Online: %s / %s',
+ # datasource.project_name, datasource.name)
+ # continue
+ # dsid = datasource.id
# Set default dict attributes for tasks, for each datasource
self.tasks[dsid] = {a: [] for a in UPDATE_ACTIONS}
self.tasks[dsid]['project'] = datasource.project_name
self.tasks[dsid]['datasource_name'] = datasource.name
- if not config.in_maintenance_window and AIRFLOW_ENV not in ['STAGING', 'DEV']:
- self.tasks[dsid]['skip'] = 'Outside maintenance window'
- logging.info('(SKIP) Outside maintenance window: %s', datasource.name)
- continue
- elif datasource.name in EXCLUDED_DATASOURCES:
- self.tasks[dsid]['skip'] = 'Marked to exclude'
- logging.info('(SKIP) Marked to exclude: %s', datasource.name)
- continue
- logging.info('Checking Datasource: %s', datasource.name)
- # Download the Datasource for comparison
- dl_path = f"downloads/{dsid}/"
- os.makedirs(dl_path, exist_ok=True)
- ds_path = ts.download.datasource(dsid, file_dir=dl_path, include_extract=False)
- tds = Datasource(ds_path)
- # Cleanup downloaded file after assigning the Datasource
- shutil.rmtree(dl_path, ignore_errors=True)
- # Add connection task, if there is a difference
- self.__compare_connection(dsid, datasource.name, tds.connection, expected_conn_attrs)
- # Add folder tasks, if folders need to be added/deleted
- self.__compare_folders(dsid, tds.folders_common, datasource.folders)
- # Add Column tasks, if there are missing columns, or columns need to be updated
+ # if not config.in_maintenance_window and AIRFLOW_ENV not in ['STAGING', 'DEV']:
+ # self.tasks[dsid]['skip'] = 'Outside maintenance window'
+ # logging.info('(SKIP) Outside maintenance window: %s', datasource.name)
+ # continue
+ # elif datasource.name in EXCLUDED_DATASOURCES:
+ # self.tasks[dsid]['skip'] = 'Marked to exclude'
+ # logging.info('(SKIP) Marked to exclude: %s', datasource.name)
+ # continue
+ # logging.info('Checking Datasource: %s', datasource.name)
+ # # Download the Datasource for comparison
+ # dl_path = f"downloads/{dsid}/"
+ # os.makedirs(dl_path, exist_ok=True)
+ # ds_path = ts.download.datasource(dsid, file_dir=dl_path, include_extract=False)
+ # tds = Datasource(ds_path)
+ # # Cleanup downloaded file after assigning the Datasource
+ # shutil.rmtree(dl_path, ignore_errors=True)
+ # # Add connection task, if there is a difference
+ # self.__compare_connection(dsid, datasource.name, tds.connection, expected_conn_attrs)
+ # # Add folder tasks, if folders need to be added/deleted
+ # self.__compare_folders(dsid, tds.folders_common, datasource.folders)
+ # # Add Column tasks, if there are missing columns, or columns need to be updated
for column in datasource.columns:
# Check if the column metadata needs to be updated
self.__compare_column_metadata(dsid, tds, column)
diff --git a/setup.py b/setup.py
index eac07082..7380dd42 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@
long_description=readme,
long_description_content_type='text/markdown',
name="tableau_utilities",
- version="2.2.11",
+ version="2.2.12",
requires_python=">=3.8",
packages=[
'tableau_utilities',
diff --git a/tableau_utilities/scripts/apply_configs.py b/tableau_utilities/scripts/apply_configs.py
new file mode 100644
index 00000000..b551989c
--- /dev/null
+++ b/tableau_utilities/scripts/apply_configs.py
@@ -0,0 +1,284 @@
+from copy import deepcopy
+import os
+import pprint
+import shutil
+from typing import Dict, Any, List
+from time import time
+
+
+from tableau_utilities.tableau_file.tableau_file import Datasource
+from tableau_utilities.general.cli_styling import Color, Symbol
+from tableau_utilities.general.config_column_persona import personas
+from tableau_utilities.scripts.datasource import add_metadata_records_as_columns
+from tableau_utilities.scripts.gen_config import build_configs
+from tableau_utilities.scripts.merge_config import read_file
+
+color = Color()
+symbol = Symbol()
+
+class ApplyConfigs:
+ """Applies a set of configs to a datasource. Configs prefixed with target_ will be applied to the datasource.
+ Configs prefixed with datasource_ represent the current state of the datasource before changes.
+ """
+
+ def __init__(self,
+ datasource_name: str,
+ datasource_path: str,
+ target_column_config: Dict[str, Any],
+ target_calculated_column_config: Dict[str, Any],
+ debugging_logs: bool) -> None:
+ self.datasource_name: str = datasource_name
+ self.datasource_path: str = datasource_path
+ self.target_column_config: Dict[str, Any] = target_column_config
+ self.target_calculated_column_config: Dict[str, Any] = target_calculated_column_config
+ self.debugging_logs: bool = debugging_logs
+
+
+ def select_matching_datasource_config(self, config: Dict[str, Any]) -> Dict[str, Any]:
+ """ Limit
+
+ Args:
+ comfig:
+
+ Returns:
+ A config with any datasource that is not self.datasource_name removed
+
+ """
+
+ try:
+ selected_config = config[self.datasource_name]
+ return selected_config
+ except KeyError:
+ print(f'{color.fg_red}No matching datasource found in config for {self.datasource_name}{color.reset}')
+ return {}
+
+ def invert_config(self, config: Dict[str, Any]) -> Dict[str, Any]:
+ """Helper function to invert the column config and calc config.
+ Output -> {datasource: {column: info}}
+
+ Args:
+ config (dict): The config to invert.
+
+ Returns:
+ dict: The inverted config.
+ """
+
+ inverted_config = {}
+
+ for column, i in config.items():
+ for datasource in i['datasources']:
+ new_info = deepcopy(i)
+ del new_info['datasources']
+ new_info['local-name'] = datasource['local-name']
+ new_info['remote_name'] = datasource['sql_alias'] if 'sql_alias' in datasource else None
+ inverted_config.setdefault(datasource['name'], {column: new_info})
+ inverted_config[datasource['name']].setdefault(column, new_info)
+
+ if self.debugging_logs:
+ pp = pprint.PrettyPrinter(indent=4, width=200, depth=None, compact=False)
+ pp.pprint(inverted_config)
+
+ return inverted_config
+
+
+ def prepare_configs(self, config_A: Dict[str, Any], config_B: Dict[str, Any]) -> Dict[str, Any]:
+ """ Takes 2 configs to invert, combine, and remove irrelevant datasource information. Columns in a main config
+ can be in 1 or many Tableau datasources. So when managing multiple datasources it's likely to have columns that
+ need removal
+
+ Args:
+ config_A:
+ config_B:
+
+ Returns:
+
+ """
+
+ # invert the configs
+ config_A = self.invert_config(config_A)
+ config_B = self.invert_config(config_B)
+
+ # Get only the configs to the current datasource.
+ # Calculated configs from a datasource can sometimes be empty. If it's empty skip this step
+ if len(config_A) > 0:
+ config_A = self.select_matching_datasource_config(config_A)
+
+ if len(config_B) > 0:
+ config_B = self.select_matching_datasource_config(config_B)
+
+ # Combine configs
+ combined_config = {**config_A, **config_B}
+
+ if self.debugging_logs:
+ print(f'{color.fg_yellow}AFTER COMBINING CONFIGS{color.reset}')
+ pp = pprint.PrettyPrinter(indent=4, width=200, depth=None, compact=False)
+ pp.pprint(combined_config)
+
+ return combined_config
+
+
+ def flatten_to_list_of_fields(self, nested_dict: Dict[str, Dict[str, Any]]) -> List[Dict[str, Any]]:
+ """
+ Flattens a nested dictionary by removing one level of nesting and adding a "Caption" key.
+
+ Args:
+ nested_dict (Dict[str, Dict[str, Any]]): The nested dictionary to flatten.
+
+ Returns:
+ List[Dict[str, Any]]: A list of dictionaries with "Caption" as a key.
+ """
+ flattened_list = []
+ for key, value in nested_dict.items():
+ flattened_entry = {"caption": key}
+ flattened_entry.update(value)
+ flattened_list.append(flattened_entry)
+
+ if self.debugging_logs:
+ print(f'{color.fg_yellow}AFTER FLATTENING{color.reset}')
+ for field_config in flattened_list:
+ print(field_config)
+
+ return flattened_list
+
+ def compare_columns(self, target_config: List[Dict[str, Any]], datasource_config: List[Dict[str, Any]]) -> List[
+ Dict[str, Any]]:
+ """Compares the target config to the datasource config and generates a list of changes to make the datasource match the target config.
+
+ Args:
+ target_config (List[Dict[str, Any]]): The target configuration list of dictionaries.
+ datasource_config (List[Dict[str, Any]]): The datasource configuration list of dictionaries.
+
+ Returns:
+ List[Dict[str, Any]]: A list of dictionaries with the columns that need updating.
+ """
+ changes_to_make = []
+ pp = pprint.PrettyPrinter(indent=4, width=200, depth=None, compact=False)
+
+ # print(f'{color.fg_yellow}DATASOURCE CONFIG{color.reset}')
+ # print(datasource_config)
+ #
+ # print(f'{color.fg_yellow}DATASOURCE CONFIG{color.reset}')
+ # print(target_config)
+
+ # pp.pprint(datasource_config)
+
+ for target_entry in target_config:
+ if target_entry['caption'] == 'Is Current Month':
+ print(target_entry)
+ for ds in datasource_config:
+ if ds['caption'] == 'Is Current Month':
+ print(ds)
+
+ for target_entry in target_config:
+ print(target_entry)
+ if not any(target_entry == datasource_entry for datasource_entry in datasource_config):
+ print(f'{color.fg_yellow}NEED TO MAKE CHANGE:{color.reset}{target_entry}')
+ changes_to_make.append(target_entry)
+
+ print(f'{color.fg_yellow}AFTER CREATING CHANGE LIST{color.reset}')
+ pp.pprint(changes_to_make)
+
+ print(len(changes_to_make))
+ return changes_to_make
+
+ def execute_changes(self, columns_list: List[Dict[str, Any]], datasource):
+ """ Applies changes to make
+
+ Args:
+ columns_list:
+ datasource:
+
+ Returns:
+
+ """
+
+ print(f'{color.fg_cyan}...Applying Changes to {self.datasource_name}...{color.reset}')
+
+ for each_column in columns_list:
+ if self.debugging_logs:
+ print(f'{color.fg_yellow}column:{color.reset}{each_column}')
+
+ #
+
+ column = datasource.columns.get(each_column['local-name'])
+
+ persona = personas.get(each_column['persona'].lower(), {})
+
+ if self.debugging_logs:
+ print(f'{color.fg_yellow}persona:{color.reset}{persona}')
+
+ column.caption = each_column['caption'] or column.caption
+ column.role = persona.get('role') or column.role
+ column.type = persona.get('role_type') or column.type
+ column.datatype = persona.get('datatype') or column.datatype
+ column.desc = each_column['description'] or column.desc
+
+ if 'calculation' in each_column:
+ column.calculation = each_column['calculation']
+
+ if self.debugging_logs:
+ print(f'{color.fg_yellow}column:{color.reset}{each_column}')
+
+ datasource.enforce_column(column, remote_name=each_column['remote_name'], folder_name=each_column['folder'])
+
+ start = time()
+ print(f'{color.fg_cyan}...Saving datasource changes...{color.reset}')
+ datasource.save()
+ print(f'{color.fg_green}{symbol.success} (Done in {round(time() - start)} sec) '
+ f'Saved datasource changes: {color.fg_yellow}{self.datasource_path}{color.reset}')
+
+
+ def apply_config_to_datasource(self):
+ """ Applies a set of configs (column_config and calculated_column_config) to a datasource.
+ If a column is in a datasource but NOT in the config that column will be unchanged.
+
+ Args:
+ datasource_name:
+ datasource_path:
+ column_config:
+ calculated_field_config:
+ debugging_logs:
+
+ Returns:
+ None
+
+ """
+
+ datasource = Datasource(self.datasource_path)
+
+ # Run column init on the datasource to make sure columns aren't hiding in Metadata records
+ datasource = add_metadata_records_as_columns(datasource, self.debugging_logs)
+ print(f'{color.fg_cyan}Ran column init {self.datasource_name}...{color.reset}')
+
+ # Build the config dictionaries from the datasource
+ datasource_column_config, datasource_calculated_column_config = build_configs(datasource, self.datasource_name)
+ print(f'{color.fg_cyan}Built dictionaries from the datasource {self.datasource_name}...{color.reset}')
+
+ # # Prepare the configs by inverting, combining and removing configs for other datasources
+ # target_config = self.prepare_configs(self.target_column_config, self.target_calculated_column_config)
+ # print(f'{color.fg_cyan}Prepared the target configs {self.datasource_name}...{color.reset}')
+ #
+ # datasource_config = self.prepare_configs(datasource_column_config, datasource_calculated_column_config)
+ # print(f'{color.fg_cyan}Prepared the datasource configs {self.datasource_name}...{color.reset}')
+
+ target_config = self.flatten_to_list_of_fields(target_config)
+ datasource_config = self.flatten_to_list_of_fields(datasource_config)
+
+ # merged_config = self.merge_configs(target_config, datasource_config)
+ changes_to_make = self.compare_columns(target_config, datasource_config)
+ # print(changes_to_make)
+
+ self.execute_changes(changes_to_make, datasource)
+
+def apply_configs(args):
+ # Set variables from the args
+ debugging_logs = args.debugging_logs
+ datasource_name = args.name
+ datasource_path = args.file_path
+
+ target_column_config = read_file(args.column_config)
+ target_calculated_column_config = read_file(args.calculated_column_config)
+
+ AC = ApplyConfigs(datasource_name, datasource_path, target_column_config, target_calculated_column_config, debugging_logs)
+
+ AC.apply_config_to_datasource()
diff --git a/tableau_utilities/scripts/cli.py b/tableau_utilities/scripts/cli.py
index e02651df..55c7b93c 100644
--- a/tableau_utilities/scripts/cli.py
+++ b/tableau_utilities/scripts/cli.py
@@ -15,6 +15,7 @@
from tableau_utilities.scripts.server_operate import server_operate
from tableau_utilities.scripts.datasource import datasource
from tableau_utilities.scripts.csv_config import csv_config
+from tableau_utilities.scripts.apply_configs import apply_configs
__version__ = importlib.metadata.version('tableau_utilities')
@@ -163,6 +164,9 @@
help='Deletes data from the extract based on the condition string provided. '
"""E.g. "CREATED_AT" < '1/1/2024'""")
parser_datasource.add_argument('-ci', '--column_init', action='store_true', help="Adds Columns from all Metadata Records, if they don't already exist.")
+parser_datasource.add_argument('-cf', '--clean_folders', action='store_true', help="Removes any empty folders without columns")
+# parser_datasource.add_argument('-cc', '--column_config', help='The path to the column configs file')
+# parser_datasource.add_argument('-cac', '--calculated_column_config', help='The path to the calculated field config file.')
parser_datasource.set_defaults(func=datasource)
# GENERATE CONFIG
@@ -200,6 +204,14 @@
'Use with --merge_with generate_merge_all')
parser_config_merge.set_defaults(func=merge_configs)
+# APPLY CONFIGS
+parser_config_apply = subparsers.add_parser(
+ 'apply_configs', help='Applies a config to a datasource. Writes over any datasource attributes to make it '
+ 'conform to the config.', formatter_class=RawTextHelpFormatter)
+parser_config_apply.add_argument('-cc', '--column_config', help='The path to the column configs file')
+parser_config_apply.add_argument('-cac', '--calculated_column_config', help='The path to the calculated field config file.')
+parser_config_apply.set_defaults(func=apply_configs)
+
def validate_args_server_operate(args):
""" Validate that combinations of args are present """
@@ -263,6 +275,11 @@ def validate_args_command_merge_config(args):
parser.error(f'--merge_with {args.merge_with} requires --target_directory')
+def validate_args_command_apply_configs(args):
+ if args.file_path is None or args.name is None or args.column_config is None or args.calculated_column_config is None:
+ parser.error(f'{args.command} requires --name and --file_path for a datasource and --column_config and --calculated_column_config')
+
+
def validate_subpackage_hyper():
""" Checks that the hyper subpackage is installed for functions that use it """
@@ -455,6 +472,8 @@ def main():
validate_args_command_datasource(args)
if args.command == 'merge_config':
validate_args_command_merge_config(args)
+ if args.command == 'apply_configs':
+ validate_args_command_apply_configs(args)
# Set/Reset the directory
tmp_folder = args.output_dir
diff --git a/tableau_utilities/scripts/datasource.py b/tableau_utilities/scripts/datasource.py
index adc4d8ba..0e1e1f54 100644
--- a/tableau_utilities/scripts/datasource.py
+++ b/tableau_utilities/scripts/datasource.py
@@ -11,6 +11,10 @@
from tableau_utilities.tableau_server.tableau_server import TableauServer
+# Define color and symbol as globals
+color = Color()
+symbol = Symbol()
+
def create_column(name: str, persona: dict):
""" Creates the tfo column object with the minimum required fields to add a column
@@ -32,6 +36,73 @@ def create_column(name: str, persona: dict):
return column
+
+def add_metadata_records_as_columns(ds, debugging_logs=False):
+ """ Adds records when they are only present in the
+
+ When you create your Tableau extract the first time all columns will be present in Metadata records like this:
+
+
+ MY_COLUMN
+ 131
+ [MY_COLUMN]
+ [Custom SQL Query]
+ MY_COLUMN
+ 5
+ integer
+ Sum
+ 38
+ 0
+ true
+
+ "SQL_DECIMAL"
+ "SQL_C_NUMERIC"
+
+ <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[_62A667B34C534415B10B2075B0DC36DC]
+
+
+ Separately some columns may have a column like this:
+
+
+
+ Manipulating Tableau columns requires a record.
+
+ Args:
+ ds: A Datasource object
+ color: The cli color styling class
+ debugging_logs: True to print debugging information to the console
+
+ Returns:
+ ds: An altered datasource. You'll still need to save this ds to apply the changes.
+
+ """
+
+ # Create the list of columns to add
+ columns_to_add = [
+ m for m in ds.connection.metadata_records
+ if m.local_name not in [c.name for c in ds.columns]
+ ]
+ print(f'{color.fg_yellow}Adding missing columns from Metadata Records:{color.reset} '
+ f'{[m.local_name for m in columns_to_add]}')
+
+ # Add the columns making the best guess of the proper persona
+ for m in columns_to_add:
+ if debugging_logs:
+ print(f'{color.fg_magenta}Metadata Record -> {m.local_name}:{color.reset} {m}')
+
+ persona = get_persona_by_metadata_local_type(m.local_type)
+ persona_dict = personas.get(persona, {})
+ if debugging_logs:
+ print(f' - {color.fg_blue}Persona -> {persona}:{color.reset} {persona_dict}')
+
+ column = create_column(m.local_name, persona_dict)
+
+ if debugging_logs:
+ print(f' - {color.fg_cyan}Creating Column -> {column.name}:{color.reset} {column.dict()}')
+ ds.enforce_column(column, remote_name=m.remote_name)
+
+ return ds
+
def datasource(args, server=None):
""" Updates a Tableau Datasource locally
@@ -67,6 +138,7 @@ def datasource(args, server=None):
remote_name = args.remote_name
list_objects = args.list.title() if args.list else None
column_init = args.column_init
+ clean_folders = args.clean_folders
# Datasource Connection Args
conn_type = args.conn_type
@@ -77,9 +149,6 @@ def datasource(args, server=None):
conn_schema = args.conn_schema
conn_warehouse = args.conn_warehouse
- # Print Styling
- color = Color()
- symbol = Symbol()
# Downloads the datasource from Tableau Server if the datasource is not local
if location == 'online':
@@ -146,28 +215,7 @@ def datasource(args, server=None):
# Column Init - Add columns for any column in Metadata records but not in columns
if column_init:
- columns_to_add = [
- m for m in ds.connection.metadata_records
- if m.local_name not in [c.name for c in ds.columns]
- ]
- print(f'{color.fg_yellow}Adding missing columns from Metadata Records:{color.reset} '
- f'{[m.local_name for m in columns_to_add]}')
-
- for m in columns_to_add:
- if debugging_logs:
- print(f'{color.fg_magenta}Metadata Record -> {m.local_name}:{color.reset} {m}')
-
- persona = get_persona_by_metadata_local_type(m.local_type)
- persona_dict = personas.get(persona, {})
- if debugging_logs:
- print(f' - {color.fg_blue}Persona -> {persona}:{color.reset} {persona_dict}')
-
- column = create_column(m.local_name, persona_dict)
-
- if debugging_logs:
- print(f' - {color.fg_cyan}Creating Column -> {column.name}:{color.reset} {column.dict()}')
- ds.enforce_column(column, remote_name=m.remote_name)
-
+ ds = add_metadata_records_as_columns(ds, color, debugging_logs)
# Add / modify a specified column
if column_name and not delete:
@@ -213,6 +261,11 @@ def datasource(args, server=None):
if delete == 'folder':
ds.folders_common.folder.delete(folder_name)
+ # Clean folders
+ if clean_folders:
+ cleaned = ds.remove_empty_folders()
+ print(f'Removed this list of folders: {color.fg_cyan}{cleaned}{color.reset}')
+
# Enforce Connection
if enforce_connection:
if debugging_logs:
@@ -231,7 +284,7 @@ def datasource(args, server=None):
ds.connection.update(connection)
# Save the datasource if an edit may have happened
- if column_name or folder_name or delete or enforce_connection or empty_extract or column_init:
+ if column_name or folder_name or delete or enforce_connection or empty_extract or column_init or clean_folders:
start = time()
print(f'{color.fg_cyan}...Saving datasource changes...{color.reset}')
ds.save()
diff --git a/tableau_utilities/scripts/gen_config.py b/tableau_utilities/scripts/gen_config.py
index ab8c3673..a8236c0f 100644
--- a/tableau_utilities/scripts/gen_config.py
+++ b/tableau_utilities/scripts/gen_config.py
@@ -10,22 +10,28 @@
from tableau_utilities.tableau_server.tableau_server import TableauServer
-def load_csv_with_definitions(file=None):
+def load_csv_with_definitions(file=None, debugging_logs=False):
""" Returns a dictionary with the definitions from a csv. The columns are expected to include column_name and description
Args:
file: The path to the .csv file with the definitions. The csv must include a column_name and description.
+ debugging_logs: Prints information to consolde if true
Returns:
dictionary mapping column name to definition
+
"""
definitions_mapping = dict()
df = pd.read_csv(file)
+
df.columns = df.columns.str.lower()
definitions = df.to_dict('records')
+ if debugging_logs:
+ print(definitions)
+
# Check that the csv contains column_name and description headers
column_names = list(df.columns)
if 'column_name' not in column_names or 'description' not in column_names:
@@ -35,8 +41,10 @@ def load_csv_with_definitions(file=None):
if str(column['description']) != 'nan':
definitions_mapping[column['column_name']] = column['description']
- return definitions_mapping
+ if debugging_logs:
+ print(definitions_mapping)
+ return definitions_mapping
def choose_persona(role, role_type, datatype, caption):
""" The config relies on a persona which is a combination of role, role_type and datatype for each column.
@@ -253,6 +261,51 @@ def build_folder_mapping(folders):
return mappings
+def build_configs(datasource, datasource_name, debugging_logs=False, definitions_csv_path=None):
+ """
+
+ Args:
+ datasource: A Tableau utilities datasource object
+ datasource_name: The name of the datasource
+ debugging_logs: True to print debugging logs to the console
+ definitions_csv_path: The path to a .csv with data definitions
+
+ Returns:
+ column_configs: A dictionary with the column configs
+ calculated_column_configs = A dictionary with the calculated field configs
+
+ """
+
+ # Get column information from the metadata records
+ metadata_record_config = get_metadata_record_config(
+ datasource.connection.metadata_records,
+ datasource_name,
+ debugging_logs
+ )
+
+ # Get the mapping of definitions from the csv
+ definitions_mapping = dict()
+ if definitions_csv_path is not None:
+ definitions_mapping = load_csv_with_definitions(file=definitions_csv_path)
+
+ # Extract the columns and folders. Build the new config
+ folder_mapping = build_folder_mapping(datasource.folders_common)
+ column_configs, calculated_column_configs = create_column_config(
+ columns=datasource.columns,
+ datasource_name=datasource_name,
+ folder_mapping=folder_mapping,
+ metadata_record_columns=metadata_record_config,
+ definitions_mapping=definitions_mapping,
+ debugging_logs=debugging_logs
+ )
+
+ # Sort configs
+ column_configs = dict(sorted(column_configs.items()))
+ calculated_column_configs = dict(sorted(calculated_column_configs.items()))
+
+ return column_configs, calculated_column_configs
+
+
def generate_config(args, server: TableauServer = None):
""" Downloads a datasource and saves configs for that datasource
@@ -293,34 +346,14 @@ def generate_config(args, server: TableauServer = None):
print(f'{color.fg_yellow}BUILDING CONFIG {symbol.arrow_r} '
f'{color.fg_grey}{datasource_name} {symbol.sep} {datasource_path}{color.reset}')
- datasource = Datasource(datasource_path)
- # Get column information from the metadata records
- metadata_record_config = get_metadata_record_config(
- datasource.connection.metadata_records,
- datasource_name,
- debugging_logs
- )
-
- # Get the mapping of definitions from the csv
- definitions_mapping = dict()
- if definitions_csv_path is not None:
- definitions_mapping = load_csv_with_definitions(file=definitions_csv_path)
- # Extract the columns and folders. Build the new config
- folder_mapping = build_folder_mapping(datasource.folders_common)
- column_configs, calculated_column_configs = create_column_config(
- columns=datasource.columns,
- datasource_name=datasource_name,
- folder_mapping=folder_mapping,
- metadata_record_columns=metadata_record_config,
- definitions_mapping=definitions_mapping,
- debugging_logs=debugging_logs
- )
+ datasource = Datasource(datasource_path)
- # Sort configs
- column_configs = dict(sorted(column_configs.items()))
- calculated_column_configs = dict(sorted(calculated_column_configs.items()))
+ # Build the config dictionaries
+ column_configs, calculated_column_configs = build_configs(datasource, datasource_name, debugging_logs,
+ definitions_csv_path)
+ # Output the configs to files
datasource_name_snake = convert_to_snake_case(datasource_name)
output_file_column_config = 'column_config.json'
output_file_calculated_column_config = 'tableau_calc_config.json'
diff --git a/tableau_utilities/scripts/merge_config.py b/tableau_utilities/scripts/merge_config.py
index 1535a55f..d86d00a2 100644
--- a/tableau_utilities/scripts/merge_config.py
+++ b/tableau_utilities/scripts/merge_config.py
@@ -4,16 +4,27 @@
def read_file(file_path):
- """ Read a JSON file to a dictionary
+ """Read a JSON file to a dictionary.
Args:
- file_path (str): The path of the file to read
+ file_path (str): The path of the file to read.
+ Returns:
+ dict: The JSON content as a dictionary.
"""
- with open(file_path, "r") as infile:
- config: dict = json.load(infile)
+ try:
+ with open(file_path, "r") as infile:
+ config = json.load(infile)
+ print(f"Successfully read file: {file_path}")
+ return config
+ except FileNotFoundError:
+ print(f"File not found: {file_path}")
+ except json.JSONDecodeError:
+ print(f"Error decoding JSON from file: {file_path}")
+ except Exception as e:
+ print(f"An unexpected error occurred: {e}")
- return config
+ return {}
def write_file(file_name, config, debugging_logs=False):
@@ -45,6 +56,30 @@ def add_definitions_mapping(config, definitions_mapping):
return config
+def add_definitions_mapping_any_local_name(config, definitions_mapping):
+ """Adds definitions from a mapping to the config. Chooses the definition from the mapping if needed.
+
+ Args:
+ config (dict): A datasource config.
+ definitions_mapping (dict): A dictionary with columns as keys and their definitions as values.
+
+ Returns:
+ dict: The updated config with new descriptions.
+ """
+ if not isinstance(definitions_mapping, dict):
+ raise TypeError("definitions_mapping should be a dictionary")
+
+ for column, definition in definitions_mapping.items():
+ if len(definition) > 0:
+ column_lower = column.lower()
+ for key, value in config.items():
+ for datasource in value.get('datasources', []):
+ if datasource.get('local-name', '').lower() == column_lower:
+ config[key]['description'] = definition
+ break
+ return config
+
+
def merge_2_configs(existing_config, additional_config, debugging_logs=False):
""" Takes 2 configs and adds information from the additional_cong to the existing_config
The output of the merged config should be merged into the existing config in a PR
@@ -112,7 +147,7 @@ def sort_config(config, debugging_logs):
if debugging_logs:
print('KEY', k)
- print('CONGIG', v)
+ print('CONFIG', v)
print('DATASOURCES', v['datasources'])
sorted_datasources = sorted(v['datasources'], key=lambda d: d['name'])
@@ -159,6 +194,7 @@ def merge_configs(args, server=None):
existing_config_path = args.existing_config
additional_config_path = args.additional_config
definitions_csv_path = args.definitions_csv
+ # definitions_csv_local_name_path = args.definitions_csv_local_name
merge_with = args.merge_with
file_name = f'{args.merged_config}.json'
target_directory = args.target_directory
@@ -173,17 +209,20 @@ def merge_configs(args, server=None):
if merge_with == 'config':
read_merge_write(existing_config_path, additional_config_path, file_name, debugging_logs)
- # Merge a config with a definitions csv
+ # Merge a config with a definitions csv. This
elif merge_with == 'csv':
# Read files
existing_config = read_file(existing_config)
- definitions_mapping = load_csv_with_definitions(file=definitions_csv_path)
+ definitions_mapping = load_csv_with_definitions(file=definitions_csv_path, debugging_logs=debugging_logs)
# Merge
- new_config = add_definitions_mapping(existing_config, definitions_mapping)
+ new_config = add_definitions_mapping_any_local_name(existing_config, definitions_mapping)
# Sort and write the merged config
new_config = sort_config(new_config, debugging_logs)
+
write_file(file_name=file_name, config=new_config, debugging_logs=debugging_logs)
+ print(f'{color.fg_yellow}DEFINITIONS CSV {symbol.arrow_r} '
+ f'{color.fg_grey}{definitions_csv_path}{color.reset}')
print(f'{color.fg_yellow}EXISTING CONFIG {symbol.arrow_r} '
f'{color.fg_grey}{existing_config_path}{color.reset}')
print(f'{color.fg_yellow}ADDITIONAL CONFIG {symbol.arrow_r} '
diff --git a/tableau_utilities/tableau_file/tableau_file.py b/tableau_utilities/tableau_file/tableau_file.py
index 8d55d56b..63f6f560 100644
--- a/tableau_utilities/tableau_file/tableau_file.py
+++ b/tableau_utilities/tableau_file/tableau_file.py
@@ -301,6 +301,42 @@ def enforce_column(self, column, folder_name=None, remote_name=None):
if not found:
self.extract.connection.cols.append(extract_col)
+ def remove_empty_folders(self):
+ """ Removes any folder without a column in it
+
+ Example:
+ The "Folder - 2 columns" will be unchanged and the xml line for "Folder - Empty" will be removed
+
+ <_.fcp.SchemaViewerObjectModel.true...folders-common>
+
+
+
+
+
+
+
+
+ Returns:
+ The list of folders that were removed
+
+ """
+
+ # Identify empty folders
+ empty_folder_list = []
+
+ for folder in self.folders_common.folder:
+ number_columns_in_folder = len(folder.folder_item)
+
+ if number_columns_in_folder == 0:
+ empty_folder_list.append(folder.name)
+
+ # Remove Empty Folders
+ for empty_folder in empty_folder_list:
+ self.folders_common.folder.delete(empty_folder)
+
+ return empty_folder_list
+
+
def save(self):
""" Save all changes made to each section of the Datasource """
parent = self._root.find('.')
diff --git a/tableau_utilities/tableau_file/tableau_file_objects.py b/tableau_utilities/tableau_file/tableau_file_objects.py
index 7935f803..11c1defd 100644
--- a/tableau_utilities/tableau_file/tableau_file_objects.py
+++ b/tableau_utilities/tableau_file/tableau_file_objects.py
@@ -708,6 +708,7 @@ class Connection(TableauFileObject):
extract_engine: bool = None
port: int = None
max_varchar_size: str = None
+ oauth_config_id: str = None
def dict(self):
output = dict()
@@ -759,6 +760,8 @@ def dict(self):
output['@port'] = str(self.port)
if self.max_varchar_size is not None:
output['@max-varchar-size'] = str(self.max_varchar_size)
+ if self.max_varchar_size is not None:
+ output['@oauth-config-id'] = str(self.oauth_config_id)
return output
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..33e282d7
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,25 @@
+import pytest
+
+file_test_count = {}
+
+@pytest.hookimpl(tryfirst=True)
+def pytest_sessionstart(session):
+ global file_test_count
+ file_test_count = {}
+
+@pytest.hookimpl(tryfirst=True)
+def pytest_runtestloop(session):
+ global file_test_count
+ for item in session.items:
+ file_path = str(item.fspath)
+ if file_path not in file_test_count:
+ file_test_count[file_path] = 0
+ file_test_count[file_path] += 1
+
+@pytest.hookimpl(trylast=True)
+def pytest_terminal_summary(terminalreporter, exitstatus):
+ terminalreporter.write_sep("=", "test count summary")
+ for file_path, count in file_test_count.items():
+ terminalreporter.write_line(f"{file_path}: {count} test(s)")
+ terminalreporter.write_line(f"Total number of test files: {len(file_test_count)}")
+ terminalreporter.write_line(f"Total number of tests: {sum(file_test_count.values())}")
diff --git a/tests/test_apply_configs.py b/tests/test_apply_configs.py
new file mode 100644
index 00000000..8981f669
--- /dev/null
+++ b/tests/test_apply_configs.py
@@ -0,0 +1,292 @@
+import pytest
+from typing import Dict, Any
+from tableau_utilities.scripts.apply_configs import ApplyConfigs
+
+
+@pytest.fixture
+def apply_configs():
+ return ApplyConfigs(datasource_name="my_datasource_1", datasource_path="", target_column_config={},
+ target_calculated_column_config={}, debugging_logs=False)
+
+
+def test_invert_config_single_datasource(apply_configs):
+ sample_config = {
+ "Column1": {
+ "description": "Description of Column1",
+ "folder": "Folder1",
+ "persona": "string_dimension",
+ "datasources": [
+ {
+ "name": "my_datasource_1",
+ "local-name": "MY_COLUMN_1",
+ "sql_alias": "MY_COLUMN_1_ALIAS"
+ }
+ ]
+ }
+ }
+
+ expected_output = {
+ "my_datasource_1": {
+ "Column1": {
+ "description": "Description of Column1",
+ "folder": "Folder1",
+ "persona": "string_dimension",
+ "local-name": "MY_COLUMN_1",
+ "remote_name": "MY_COLUMN_1_ALIAS"
+ }
+ }
+ }
+
+ result = apply_configs.invert_config(sample_config)
+ assert result == expected_output
+
+
+def test_invert_config_multiple_datasources(apply_configs):
+ sample_config = {
+ "Column2": {
+ "description": "Description of Column2",
+ "folder": "Folder2",
+ "persona": "string_dimension",
+ "datasources": [
+ {
+ "name": "my_datasource_1",
+ "local-name": "MY_COLUMN_1",
+ "sql_alias": "MY_COLUMN_1_ALIAS"
+ },
+ {
+ "name": "my_datasource_2",
+ "local-name": "MY_COLUMN_2",
+ "sql_alias": "MY_COLUMN_2_ALIAS"
+ }
+ ]
+ }
+ }
+
+ expected_output = {
+ "my_datasource_1": {
+ "Column2": {
+ "description": "Description of Column2",
+ "folder": "Folder2",
+ "persona": "string_dimension",
+ "local-name": "MY_COLUMN_1",
+ "remote_name": "MY_COLUMN_1_ALIAS"
+ }
+ },
+ "my_datasource_2": {
+ "Column2": {
+ "description": "Description of Column2",
+ "folder": "Folder2",
+ "persona": "string_dimension",
+ "local-name": "MY_COLUMN_2",
+ "remote_name": "MY_COLUMN_2_ALIAS"
+ }
+ }
+ }
+
+ result = apply_configs.invert_config(sample_config)
+ assert result == expected_output
+
+def test_flatten_to_list_of_fields(apply_configs):
+
+ sample_dict = {
+ 'My Caption 1': {
+ 'description': 'A perfect description',
+ 'folder': 'My Folder',
+ 'local-name': 'MY_FIELD_1',
+ 'persona': 'string_dimension',
+ 'remote_name': 'MY_FIELD_1'
+ },
+ 'My Caption 2': {
+ 'description': 'Another perfect description',
+ 'folder': 'My Folder',
+ 'local-name': 'MY_FIELD_2',
+ 'persona': 'string_dimension',
+ 'remote_name': 'MY_FIELD_2'
+ }
+ }
+
+ expected_output = [
+ {
+ 'Caption': 'My Caption 1',
+ 'description': 'A perfect description',
+ 'folder': 'My Folder',
+ 'local-name': 'MY_FIELD_1',
+ 'persona': 'string_dimension',
+ 'remote_name': 'MY_FIELD_1'
+ },
+ {
+ 'Caption': 'My Caption 2',
+ 'description': 'Another perfect description',
+ 'folder': 'My Folder',
+ 'local-name': 'MY_FIELD_2',
+ 'persona': 'string_dimension',
+ 'remote_name': 'MY_FIELD_2'
+ }
+ ]
+
+ result = apply_configs.flatten_to_list_of_fields(sample_dict)
+ assert result == expected_output
+
+
+def test_prepare_configs(apply_configs):
+ sample_config_A = {
+ "Column1": {
+ "description": "Description of Column1",
+ "folder": "Folder1",
+ "persona": "string_dimension",
+ "datasources": [
+ {
+ "name": "my_datasource_1",
+ "local-name": "MY_COLUMN_1",
+ "sql_alias": "MY_COLUMN_1_ALIAS"
+ }
+ ]
+ },
+ "Column2": {
+ "description": "Description of Column2",
+ "folder": "Folder2",
+ "persona": "string_dimension",
+ "datasources": [
+ {
+ "name": "my_datasource_1",
+ "local-name": "MY_COLUMN_1",
+ "sql_alias": "MY_COLUMN_1_ALIAS"
+ },
+ {
+ "name": "my_datasource_2",
+ "local-name": "MY_COLUMN_2",
+ "sql_alias": "MY_COLUMN_2_ALIAS"
+ }
+ ]
+ }
+ }
+
+ sample_config_B = {
+ "# ID": {
+ "description": "Distinct Count of the ID",
+ "calculation": "COUNTD([ID])",
+ "folder": "My Data",
+ "persona": "continuous_number_measure",
+ "datasources": [
+ {
+ "name": "my_datasource_1",
+ "local-name": "MY_COLUMN_1",
+ "sql_alias": "MY_COLUMN_1_ALIAS"
+ },
+ {
+ "name": "my_datasource_2",
+ "local-name": "MY_COLUMN_2",
+ "sql_alias": "MY_COLUMN_2_ALIAS"
+ }
+ ],
+ "default_format": "n#,##0;-#,##0"
+ }
+ }
+
+ expected_output = {
+ "Column1": {
+ "description": "Description of Column1",
+ "folder": "Folder1",
+ "persona": "string_dimension",
+ "local-name": "MY_COLUMN_1",
+ "remote_name": "MY_COLUMN_1_ALIAS"
+ },
+ "Column2": {
+ "description": "Description of Column2",
+ "folder": "Folder2",
+ "persona": "string_dimension",
+ "local-name": "MY_COLUMN_1",
+ "remote_name": "MY_COLUMN_1_ALIAS"
+ },
+ "# ID": {
+ "description": "Distinct Count of the ID",
+ "calculation": "COUNTD([ID])",
+ "default_format": "n#,##0;-#,##0",
+ "folder": "My Data",
+ "persona": "continuous_number_measure",
+ "local-name": "MY_COLUMN_1",
+ "remote_name": "MY_COLUMN_1_ALIAS"
+ }
+ }
+
+ result = apply_configs.prepare_configs(sample_config_A, sample_config_B)
+ assert result == expected_output
+
+def test_flatten_to_list_of_fields(apply_configs):
+
+ sample_dict = {
+ 'My Caption 1': {
+ 'description': 'A perfect description',
+ 'folder': 'My Folder',
+ 'local-name': 'MY_FIELD_1',
+ 'persona': 'string_dimension',
+ 'remote_name': 'MY_FIELD_1'
+ },
+ 'My Caption 2': {
+ 'description': 'Another perfect description',
+ 'folder': 'My Folder',
+ 'local-name': 'MY_FIELD_2',
+ 'persona': 'string_dimension',
+ 'remote_name': 'MY_FIELD_2'
+ }
+ }
+
+ expected_output = [
+ {
+ 'caption': 'My Caption 1',
+ 'description': 'A perfect description',
+ 'folder': 'My Folder',
+ 'local-name': 'MY_FIELD_1',
+ 'persona': 'string_dimension',
+ 'remote_name': 'MY_FIELD_1'
+ },
+ {'caption': 'My Caption 2',
+ 'description': 'Another perfect description',
+ 'folder': 'My Folder',
+ 'local-name': 'MY_FIELD_2',
+ 'persona': 'string_dimension',
+ 'remote_name': 'MY_FIELD_2'
+ }
+ ]
+
+ result = apply_configs.flatten_to_list_of_fields(sample_dict)
+ assert result == expected_output
+
+def test_select_matching_datasource_config(apply_configs):
+
+ sample_config = {
+ "my_datasource_1": {
+ "Column1": {
+ "description": "Description of Column1",
+ "folder": "Folder1",
+ "persona": "string_dimension",
+ "local-name": "MY_COLUMN_1",
+ "remote_name": "MY_COLUMN_1_ALIAS"
+ }
+ },
+ "my_datasource_2": {
+ "Column2": {
+ "description": "Description of Column2",
+ "folder": "Folder2",
+ "persona": "string_dimension",
+ "local-name": "MY_COLUMN_2",
+ "remote_name": "MY_COLUMN_2_ALIAS"
+ }
+ }
+ }
+
+ expected_output = {
+ "Column1": {
+ "description": "Description of Column1",
+ "folder": "Folder1",
+ "persona": "string_dimension",
+ "local-name": "MY_COLUMN_1",
+ "remote_name": "MY_COLUMN_1_ALIAS"
+ }
+ }
+ result = apply_configs.select_matching_datasource_config(sample_config)
+ assert result == expected_output
+
+
+if __name__ == '__main__':
+ pytest.main()
diff --git a/tests/test_datasource_remove_empty_folders.py b/tests/test_datasource_remove_empty_folders.py
new file mode 100644
index 00000000..bce4a203
--- /dev/null
+++ b/tests/test_datasource_remove_empty_folders.py
@@ -0,0 +1,58 @@
+import pytest
+from unittest.mock import patch
+from tableau_utilities.tableau_file.tableau_file_objects import FoldersCommon, Folder, FolderItem
+from tableau_utilities.tableau_file.tableau_file import Datasource
+
+@pytest.fixture
+def mock_datasource():
+ with patch('tableau_utilities.tableau_file.tableau_file.Datasource.__init__', lambda x, file_path: None):
+ datasource = Datasource(file_path='dummy_path')
+
+ # Create the mock data
+ mock_folders = [
+ Folder(
+ name='Folder - 2 columns',
+ tag='folder',
+ role=None,
+ folder_item=[
+ FolderItem(name='[COLUMN_1]', type='field', tag='folder-item'),
+ FolderItem(name='[COLUMN_2]', type='field', tag='folder-item')
+ ]
+ ),
+ Folder(
+ name='Folder - Empty',
+ tag='folder',
+ role=None,
+ folder_item=[]
+ ),
+ Folder(
+ name='People',
+ tag='folder',
+ role=None,
+ folder_item=[
+ FolderItem(name='[COLUMN_2+3]', type='field', tag='folder-item')
+ ]
+ )
+ ]
+
+ # Assign the mock folders to the folders_common attribute
+ folders_common = FoldersCommon(folder=mock_folders)
+ datasource.folders_common = folders_common
+
+ return datasource
+
+def test_remove_empty_folders_removed_folders(mock_datasource):
+ removed_folders = mock_datasource.remove_empty_folders()
+ assert removed_folders == ['Folder - Empty']
+
+def test_remove_empty_folders_folder_count(mock_datasource):
+ mock_datasource.remove_empty_folders()
+ assert len(mock_datasource.folders_common.folder) == 2
+
+def test_remove_empty_folders_folder_names(mock_datasource):
+ mock_datasource.remove_empty_folders()
+ folder_names = [folder.name for folder in mock_datasource.folders_common.folder]
+ assert 'Folder - Empty' not in folder_names
+
+if __name__ == '__main__':
+ pytest.main()