diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 4df17785..01d3d3ae 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -32,6 +32,10 @@ jobs: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Run Pytest on the tests in tests/ + run: + pytest tests/ -p no:warnings -vv - name: Run pytest on tableau-utilities run: | cd tableau_utilities && pytest -v + diff --git a/.gitignore b/.gitignore index bcbe97d3..e6ca5069 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ sheets.googleapis.com-python.json .idea .DS_Store tmp_tdsx_and_config/ +development_test_files/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/README.md b/README.md index c3d536e9..bc7e9f43 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ This extra package depends on the tableauhyperapi which is incompatible with App #### Locally using pip - `cd tableau-utilities` - `pip install ./` +- `pip install --upgrade ./` to overwrite the existing installation without having to uninstall first #### Confirm installation - `which tableau_utilities` diff --git a/airflow_example/dags/tableau_datasource_update/tableau_datasource_update.py b/airflow_example/dags/tableau_datasource_update/tableau_datasource_update.py index 7d38f402..d740ae15 100644 --- a/airflow_example/dags/tableau_datasource_update/tableau_datasource_update.py +++ b/airflow_example/dags/tableau_datasource_update/tableau_datasource_update.py @@ -314,54 +314,54 @@ def __compare_folders(self, datasource_id, tds_folders, cfg_folders): def execute(self, context): """ Update Tableau datasource according to config. """ - github_conn = BaseHook.get_connection(self.github_conn_id) - config = cfg.Config( - githup_token=github_conn.password, - repo_name=github_conn.extra_dejson.get('repo_name'), - repo_branch=github_conn.extra_dejson.get('repo_branch'), - subfolder=github_conn.extra_dejson.get('subfolder') - ) - - ts = get_tableau_server(self.tableau_conn_id) - expected_conn_attrs = self.__set_connection_attributes() - - # Get the ID for each datasource in the config - for ds in ts.get.datasources(): - if ds not in config.datasources: - continue - config.datasources[ds].id = ds.id - - for datasource in config.datasources: - if not datasource.id: - logging.error('!! Datasource not found in Tableau Online: %s / %s', - datasource.project_name, datasource.name) - continue - dsid = datasource.id + # github_conn = BaseHook.get_connection(self.github_conn_id) + # config = cfg.Config( + # githup_token=github_conn.password, + # repo_name=github_conn.extra_dejson.get('repo_name'), + # repo_branch=github_conn.extra_dejson.get('repo_branch'), + # subfolder=github_conn.extra_dejson.get('subfolder') + # ) + # + # ts = get_tableau_server(self.tableau_conn_id) + # expected_conn_attrs = self.__set_connection_attributes() + # + # # Get the ID for each datasource in the config + # for ds in ts.get.datasources(): + # if ds not in config.datasources: + # continue + # config.datasources[ds].id = ds.id + # + # # for datasource in config.datasources: + # if not datasource.id: + # logging.error('!! Datasource not found in Tableau Online: %s / %s', + # datasource.project_name, datasource.name) + # continue + # dsid = datasource.id # Set default dict attributes for tasks, for each datasource self.tasks[dsid] = {a: [] for a in UPDATE_ACTIONS} self.tasks[dsid]['project'] = datasource.project_name self.tasks[dsid]['datasource_name'] = datasource.name - if not config.in_maintenance_window and AIRFLOW_ENV not in ['STAGING', 'DEV']: - self.tasks[dsid]['skip'] = 'Outside maintenance window' - logging.info('(SKIP) Outside maintenance window: %s', datasource.name) - continue - elif datasource.name in EXCLUDED_DATASOURCES: - self.tasks[dsid]['skip'] = 'Marked to exclude' - logging.info('(SKIP) Marked to exclude: %s', datasource.name) - continue - logging.info('Checking Datasource: %s', datasource.name) - # Download the Datasource for comparison - dl_path = f"downloads/{dsid}/" - os.makedirs(dl_path, exist_ok=True) - ds_path = ts.download.datasource(dsid, file_dir=dl_path, include_extract=False) - tds = Datasource(ds_path) - # Cleanup downloaded file after assigning the Datasource - shutil.rmtree(dl_path, ignore_errors=True) - # Add connection task, if there is a difference - self.__compare_connection(dsid, datasource.name, tds.connection, expected_conn_attrs) - # Add folder tasks, if folders need to be added/deleted - self.__compare_folders(dsid, tds.folders_common, datasource.folders) - # Add Column tasks, if there are missing columns, or columns need to be updated + # if not config.in_maintenance_window and AIRFLOW_ENV not in ['STAGING', 'DEV']: + # self.tasks[dsid]['skip'] = 'Outside maintenance window' + # logging.info('(SKIP) Outside maintenance window: %s', datasource.name) + # continue + # elif datasource.name in EXCLUDED_DATASOURCES: + # self.tasks[dsid]['skip'] = 'Marked to exclude' + # logging.info('(SKIP) Marked to exclude: %s', datasource.name) + # continue + # logging.info('Checking Datasource: %s', datasource.name) + # # Download the Datasource for comparison + # dl_path = f"downloads/{dsid}/" + # os.makedirs(dl_path, exist_ok=True) + # ds_path = ts.download.datasource(dsid, file_dir=dl_path, include_extract=False) + # tds = Datasource(ds_path) + # # Cleanup downloaded file after assigning the Datasource + # shutil.rmtree(dl_path, ignore_errors=True) + # # Add connection task, if there is a difference + # self.__compare_connection(dsid, datasource.name, tds.connection, expected_conn_attrs) + # # Add folder tasks, if folders need to be added/deleted + # self.__compare_folders(dsid, tds.folders_common, datasource.folders) + # # Add Column tasks, if there are missing columns, or columns need to be updated for column in datasource.columns: # Check if the column metadata needs to be updated self.__compare_column_metadata(dsid, tds, column) diff --git a/setup.py b/setup.py index eac07082..7380dd42 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ long_description=readme, long_description_content_type='text/markdown', name="tableau_utilities", - version="2.2.11", + version="2.2.12", requires_python=">=3.8", packages=[ 'tableau_utilities', diff --git a/tableau_utilities/scripts/apply_configs.py b/tableau_utilities/scripts/apply_configs.py new file mode 100644 index 00000000..b551989c --- /dev/null +++ b/tableau_utilities/scripts/apply_configs.py @@ -0,0 +1,284 @@ +from copy import deepcopy +import os +import pprint +import shutil +from typing import Dict, Any, List +from time import time + + +from tableau_utilities.tableau_file.tableau_file import Datasource +from tableau_utilities.general.cli_styling import Color, Symbol +from tableau_utilities.general.config_column_persona import personas +from tableau_utilities.scripts.datasource import add_metadata_records_as_columns +from tableau_utilities.scripts.gen_config import build_configs +from tableau_utilities.scripts.merge_config import read_file + +color = Color() +symbol = Symbol() + +class ApplyConfigs: + """Applies a set of configs to a datasource. Configs prefixed with target_ will be applied to the datasource. + Configs prefixed with datasource_ represent the current state of the datasource before changes. + """ + + def __init__(self, + datasource_name: str, + datasource_path: str, + target_column_config: Dict[str, Any], + target_calculated_column_config: Dict[str, Any], + debugging_logs: bool) -> None: + self.datasource_name: str = datasource_name + self.datasource_path: str = datasource_path + self.target_column_config: Dict[str, Any] = target_column_config + self.target_calculated_column_config: Dict[str, Any] = target_calculated_column_config + self.debugging_logs: bool = debugging_logs + + + def select_matching_datasource_config(self, config: Dict[str, Any]) -> Dict[str, Any]: + """ Limit + + Args: + comfig: + + Returns: + A config with any datasource that is not self.datasource_name removed + + """ + + try: + selected_config = config[self.datasource_name] + return selected_config + except KeyError: + print(f'{color.fg_red}No matching datasource found in config for {self.datasource_name}{color.reset}') + return {} + + def invert_config(self, config: Dict[str, Any]) -> Dict[str, Any]: + """Helper function to invert the column config and calc config. + Output -> {datasource: {column: info}} + + Args: + config (dict): The config to invert. + + Returns: + dict: The inverted config. + """ + + inverted_config = {} + + for column, i in config.items(): + for datasource in i['datasources']: + new_info = deepcopy(i) + del new_info['datasources'] + new_info['local-name'] = datasource['local-name'] + new_info['remote_name'] = datasource['sql_alias'] if 'sql_alias' in datasource else None + inverted_config.setdefault(datasource['name'], {column: new_info}) + inverted_config[datasource['name']].setdefault(column, new_info) + + if self.debugging_logs: + pp = pprint.PrettyPrinter(indent=4, width=200, depth=None, compact=False) + pp.pprint(inverted_config) + + return inverted_config + + + def prepare_configs(self, config_A: Dict[str, Any], config_B: Dict[str, Any]) -> Dict[str, Any]: + """ Takes 2 configs to invert, combine, and remove irrelevant datasource information. Columns in a main config + can be in 1 or many Tableau datasources. So when managing multiple datasources it's likely to have columns that + need removal + + Args: + config_A: + config_B: + + Returns: + + """ + + # invert the configs + config_A = self.invert_config(config_A) + config_B = self.invert_config(config_B) + + # Get only the configs to the current datasource. + # Calculated configs from a datasource can sometimes be empty. If it's empty skip this step + if len(config_A) > 0: + config_A = self.select_matching_datasource_config(config_A) + + if len(config_B) > 0: + config_B = self.select_matching_datasource_config(config_B) + + # Combine configs + combined_config = {**config_A, **config_B} + + if self.debugging_logs: + print(f'{color.fg_yellow}AFTER COMBINING CONFIGS{color.reset}') + pp = pprint.PrettyPrinter(indent=4, width=200, depth=None, compact=False) + pp.pprint(combined_config) + + return combined_config + + + def flatten_to_list_of_fields(self, nested_dict: Dict[str, Dict[str, Any]]) -> List[Dict[str, Any]]: + """ + Flattens a nested dictionary by removing one level of nesting and adding a "Caption" key. + + Args: + nested_dict (Dict[str, Dict[str, Any]]): The nested dictionary to flatten. + + Returns: + List[Dict[str, Any]]: A list of dictionaries with "Caption" as a key. + """ + flattened_list = [] + for key, value in nested_dict.items(): + flattened_entry = {"caption": key} + flattened_entry.update(value) + flattened_list.append(flattened_entry) + + if self.debugging_logs: + print(f'{color.fg_yellow}AFTER FLATTENING{color.reset}') + for field_config in flattened_list: + print(field_config) + + return flattened_list + + def compare_columns(self, target_config: List[Dict[str, Any]], datasource_config: List[Dict[str, Any]]) -> List[ + Dict[str, Any]]: + """Compares the target config to the datasource config and generates a list of changes to make the datasource match the target config. + + Args: + target_config (List[Dict[str, Any]]): The target configuration list of dictionaries. + datasource_config (List[Dict[str, Any]]): The datasource configuration list of dictionaries. + + Returns: + List[Dict[str, Any]]: A list of dictionaries with the columns that need updating. + """ + changes_to_make = [] + pp = pprint.PrettyPrinter(indent=4, width=200, depth=None, compact=False) + + # print(f'{color.fg_yellow}DATASOURCE CONFIG{color.reset}') + # print(datasource_config) + # + # print(f'{color.fg_yellow}DATASOURCE CONFIG{color.reset}') + # print(target_config) + + # pp.pprint(datasource_config) + + for target_entry in target_config: + if target_entry['caption'] == 'Is Current Month': + print(target_entry) + for ds in datasource_config: + if ds['caption'] == 'Is Current Month': + print(ds) + + for target_entry in target_config: + print(target_entry) + if not any(target_entry == datasource_entry for datasource_entry in datasource_config): + print(f'{color.fg_yellow}NEED TO MAKE CHANGE:{color.reset}{target_entry}') + changes_to_make.append(target_entry) + + print(f'{color.fg_yellow}AFTER CREATING CHANGE LIST{color.reset}') + pp.pprint(changes_to_make) + + print(len(changes_to_make)) + return changes_to_make + + def execute_changes(self, columns_list: List[Dict[str, Any]], datasource): + """ Applies changes to make + + Args: + columns_list: + datasource: + + Returns: + + """ + + print(f'{color.fg_cyan}...Applying Changes to {self.datasource_name}...{color.reset}') + + for each_column in columns_list: + if self.debugging_logs: + print(f'{color.fg_yellow}column:{color.reset}{each_column}') + + # + + column = datasource.columns.get(each_column['local-name']) + + persona = personas.get(each_column['persona'].lower(), {}) + + if self.debugging_logs: + print(f'{color.fg_yellow}persona:{color.reset}{persona}') + + column.caption = each_column['caption'] or column.caption + column.role = persona.get('role') or column.role + column.type = persona.get('role_type') or column.type + column.datatype = persona.get('datatype') or column.datatype + column.desc = each_column['description'] or column.desc + + if 'calculation' in each_column: + column.calculation = each_column['calculation'] + + if self.debugging_logs: + print(f'{color.fg_yellow}column:{color.reset}{each_column}') + + datasource.enforce_column(column, remote_name=each_column['remote_name'], folder_name=each_column['folder']) + + start = time() + print(f'{color.fg_cyan}...Saving datasource changes...{color.reset}') + datasource.save() + print(f'{color.fg_green}{symbol.success} (Done in {round(time() - start)} sec) ' + f'Saved datasource changes: {color.fg_yellow}{self.datasource_path}{color.reset}') + + + def apply_config_to_datasource(self): + """ Applies a set of configs (column_config and calculated_column_config) to a datasource. + If a column is in a datasource but NOT in the config that column will be unchanged. + + Args: + datasource_name: + datasource_path: + column_config: + calculated_field_config: + debugging_logs: + + Returns: + None + + """ + + datasource = Datasource(self.datasource_path) + + # Run column init on the datasource to make sure columns aren't hiding in Metadata records + datasource = add_metadata_records_as_columns(datasource, self.debugging_logs) + print(f'{color.fg_cyan}Ran column init {self.datasource_name}...{color.reset}') + + # Build the config dictionaries from the datasource + datasource_column_config, datasource_calculated_column_config = build_configs(datasource, self.datasource_name) + print(f'{color.fg_cyan}Built dictionaries from the datasource {self.datasource_name}...{color.reset}') + + # # Prepare the configs by inverting, combining and removing configs for other datasources + # target_config = self.prepare_configs(self.target_column_config, self.target_calculated_column_config) + # print(f'{color.fg_cyan}Prepared the target configs {self.datasource_name}...{color.reset}') + # + # datasource_config = self.prepare_configs(datasource_column_config, datasource_calculated_column_config) + # print(f'{color.fg_cyan}Prepared the datasource configs {self.datasource_name}...{color.reset}') + + target_config = self.flatten_to_list_of_fields(target_config) + datasource_config = self.flatten_to_list_of_fields(datasource_config) + + # merged_config = self.merge_configs(target_config, datasource_config) + changes_to_make = self.compare_columns(target_config, datasource_config) + # print(changes_to_make) + + self.execute_changes(changes_to_make, datasource) + +def apply_configs(args): + # Set variables from the args + debugging_logs = args.debugging_logs + datasource_name = args.name + datasource_path = args.file_path + + target_column_config = read_file(args.column_config) + target_calculated_column_config = read_file(args.calculated_column_config) + + AC = ApplyConfigs(datasource_name, datasource_path, target_column_config, target_calculated_column_config, debugging_logs) + + AC.apply_config_to_datasource() diff --git a/tableau_utilities/scripts/cli.py b/tableau_utilities/scripts/cli.py index e02651df..55c7b93c 100644 --- a/tableau_utilities/scripts/cli.py +++ b/tableau_utilities/scripts/cli.py @@ -15,6 +15,7 @@ from tableau_utilities.scripts.server_operate import server_operate from tableau_utilities.scripts.datasource import datasource from tableau_utilities.scripts.csv_config import csv_config +from tableau_utilities.scripts.apply_configs import apply_configs __version__ = importlib.metadata.version('tableau_utilities') @@ -163,6 +164,9 @@ help='Deletes data from the extract based on the condition string provided. ' """E.g. "CREATED_AT" < '1/1/2024'""") parser_datasource.add_argument('-ci', '--column_init', action='store_true', help="Adds Columns from all Metadata Records, if they don't already exist.") +parser_datasource.add_argument('-cf', '--clean_folders', action='store_true', help="Removes any empty folders without columns") +# parser_datasource.add_argument('-cc', '--column_config', help='The path to the column configs file') +# parser_datasource.add_argument('-cac', '--calculated_column_config', help='The path to the calculated field config file.') parser_datasource.set_defaults(func=datasource) # GENERATE CONFIG @@ -200,6 +204,14 @@ 'Use with --merge_with generate_merge_all') parser_config_merge.set_defaults(func=merge_configs) +# APPLY CONFIGS +parser_config_apply = subparsers.add_parser( + 'apply_configs', help='Applies a config to a datasource. Writes over any datasource attributes to make it ' + 'conform to the config.', formatter_class=RawTextHelpFormatter) +parser_config_apply.add_argument('-cc', '--column_config', help='The path to the column configs file') +parser_config_apply.add_argument('-cac', '--calculated_column_config', help='The path to the calculated field config file.') +parser_config_apply.set_defaults(func=apply_configs) + def validate_args_server_operate(args): """ Validate that combinations of args are present """ @@ -263,6 +275,11 @@ def validate_args_command_merge_config(args): parser.error(f'--merge_with {args.merge_with} requires --target_directory') +def validate_args_command_apply_configs(args): + if args.file_path is None or args.name is None or args.column_config is None or args.calculated_column_config is None: + parser.error(f'{args.command} requires --name and --file_path for a datasource and --column_config and --calculated_column_config') + + def validate_subpackage_hyper(): """ Checks that the hyper subpackage is installed for functions that use it """ @@ -455,6 +472,8 @@ def main(): validate_args_command_datasource(args) if args.command == 'merge_config': validate_args_command_merge_config(args) + if args.command == 'apply_configs': + validate_args_command_apply_configs(args) # Set/Reset the directory tmp_folder = args.output_dir diff --git a/tableau_utilities/scripts/datasource.py b/tableau_utilities/scripts/datasource.py index adc4d8ba..0e1e1f54 100644 --- a/tableau_utilities/scripts/datasource.py +++ b/tableau_utilities/scripts/datasource.py @@ -11,6 +11,10 @@ from tableau_utilities.tableau_server.tableau_server import TableauServer +# Define color and symbol as globals +color = Color() +symbol = Symbol() + def create_column(name: str, persona: dict): """ Creates the tfo column object with the minimum required fields to add a column @@ -32,6 +36,73 @@ def create_column(name: str, persona: dict): return column + +def add_metadata_records_as_columns(ds, debugging_logs=False): + """ Adds records when they are only present in the + + When you create your Tableau extract the first time all columns will be present in Metadata records like this: + + + MY_COLUMN + 131 + [MY_COLUMN] + [Custom SQL Query] + MY_COLUMN + 5 + integer + Sum + 38 + 0 + true + + "SQL_DECIMAL" + "SQL_C_NUMERIC" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[_62A667B34C534415B10B2075B0DC36DC] + + + Separately some columns may have a column like this: + + + + Manipulating Tableau columns requires a record. + + Args: + ds: A Datasource object + color: The cli color styling class + debugging_logs: True to print debugging information to the console + + Returns: + ds: An altered datasource. You'll still need to save this ds to apply the changes. + + """ + + # Create the list of columns to add + columns_to_add = [ + m for m in ds.connection.metadata_records + if m.local_name not in [c.name for c in ds.columns] + ] + print(f'{color.fg_yellow}Adding missing columns from Metadata Records:{color.reset} ' + f'{[m.local_name for m in columns_to_add]}') + + # Add the columns making the best guess of the proper persona + for m in columns_to_add: + if debugging_logs: + print(f'{color.fg_magenta}Metadata Record -> {m.local_name}:{color.reset} {m}') + + persona = get_persona_by_metadata_local_type(m.local_type) + persona_dict = personas.get(persona, {}) + if debugging_logs: + print(f' - {color.fg_blue}Persona -> {persona}:{color.reset} {persona_dict}') + + column = create_column(m.local_name, persona_dict) + + if debugging_logs: + print(f' - {color.fg_cyan}Creating Column -> {column.name}:{color.reset} {column.dict()}') + ds.enforce_column(column, remote_name=m.remote_name) + + return ds + def datasource(args, server=None): """ Updates a Tableau Datasource locally @@ -67,6 +138,7 @@ def datasource(args, server=None): remote_name = args.remote_name list_objects = args.list.title() if args.list else None column_init = args.column_init + clean_folders = args.clean_folders # Datasource Connection Args conn_type = args.conn_type @@ -77,9 +149,6 @@ def datasource(args, server=None): conn_schema = args.conn_schema conn_warehouse = args.conn_warehouse - # Print Styling - color = Color() - symbol = Symbol() # Downloads the datasource from Tableau Server if the datasource is not local if location == 'online': @@ -146,28 +215,7 @@ def datasource(args, server=None): # Column Init - Add columns for any column in Metadata records but not in columns if column_init: - columns_to_add = [ - m for m in ds.connection.metadata_records - if m.local_name not in [c.name for c in ds.columns] - ] - print(f'{color.fg_yellow}Adding missing columns from Metadata Records:{color.reset} ' - f'{[m.local_name for m in columns_to_add]}') - - for m in columns_to_add: - if debugging_logs: - print(f'{color.fg_magenta}Metadata Record -> {m.local_name}:{color.reset} {m}') - - persona = get_persona_by_metadata_local_type(m.local_type) - persona_dict = personas.get(persona, {}) - if debugging_logs: - print(f' - {color.fg_blue}Persona -> {persona}:{color.reset} {persona_dict}') - - column = create_column(m.local_name, persona_dict) - - if debugging_logs: - print(f' - {color.fg_cyan}Creating Column -> {column.name}:{color.reset} {column.dict()}') - ds.enforce_column(column, remote_name=m.remote_name) - + ds = add_metadata_records_as_columns(ds, color, debugging_logs) # Add / modify a specified column if column_name and not delete: @@ -213,6 +261,11 @@ def datasource(args, server=None): if delete == 'folder': ds.folders_common.folder.delete(folder_name) + # Clean folders + if clean_folders: + cleaned = ds.remove_empty_folders() + print(f'Removed this list of folders: {color.fg_cyan}{cleaned}{color.reset}') + # Enforce Connection if enforce_connection: if debugging_logs: @@ -231,7 +284,7 @@ def datasource(args, server=None): ds.connection.update(connection) # Save the datasource if an edit may have happened - if column_name or folder_name or delete or enforce_connection or empty_extract or column_init: + if column_name or folder_name or delete or enforce_connection or empty_extract or column_init or clean_folders: start = time() print(f'{color.fg_cyan}...Saving datasource changes...{color.reset}') ds.save() diff --git a/tableau_utilities/scripts/gen_config.py b/tableau_utilities/scripts/gen_config.py index ab8c3673..a8236c0f 100644 --- a/tableau_utilities/scripts/gen_config.py +++ b/tableau_utilities/scripts/gen_config.py @@ -10,22 +10,28 @@ from tableau_utilities.tableau_server.tableau_server import TableauServer -def load_csv_with_definitions(file=None): +def load_csv_with_definitions(file=None, debugging_logs=False): """ Returns a dictionary with the definitions from a csv. The columns are expected to include column_name and description Args: file: The path to the .csv file with the definitions. The csv must include a column_name and description. + debugging_logs: Prints information to consolde if true Returns: dictionary mapping column name to definition + """ definitions_mapping = dict() df = pd.read_csv(file) + df.columns = df.columns.str.lower() definitions = df.to_dict('records') + if debugging_logs: + print(definitions) + # Check that the csv contains column_name and description headers column_names = list(df.columns) if 'column_name' not in column_names or 'description' not in column_names: @@ -35,8 +41,10 @@ def load_csv_with_definitions(file=None): if str(column['description']) != 'nan': definitions_mapping[column['column_name']] = column['description'] - return definitions_mapping + if debugging_logs: + print(definitions_mapping) + return definitions_mapping def choose_persona(role, role_type, datatype, caption): """ The config relies on a persona which is a combination of role, role_type and datatype for each column. @@ -253,6 +261,51 @@ def build_folder_mapping(folders): return mappings +def build_configs(datasource, datasource_name, debugging_logs=False, definitions_csv_path=None): + """ + + Args: + datasource: A Tableau utilities datasource object + datasource_name: The name of the datasource + debugging_logs: True to print debugging logs to the console + definitions_csv_path: The path to a .csv with data definitions + + Returns: + column_configs: A dictionary with the column configs + calculated_column_configs = A dictionary with the calculated field configs + + """ + + # Get column information from the metadata records + metadata_record_config = get_metadata_record_config( + datasource.connection.metadata_records, + datasource_name, + debugging_logs + ) + + # Get the mapping of definitions from the csv + definitions_mapping = dict() + if definitions_csv_path is not None: + definitions_mapping = load_csv_with_definitions(file=definitions_csv_path) + + # Extract the columns and folders. Build the new config + folder_mapping = build_folder_mapping(datasource.folders_common) + column_configs, calculated_column_configs = create_column_config( + columns=datasource.columns, + datasource_name=datasource_name, + folder_mapping=folder_mapping, + metadata_record_columns=metadata_record_config, + definitions_mapping=definitions_mapping, + debugging_logs=debugging_logs + ) + + # Sort configs + column_configs = dict(sorted(column_configs.items())) + calculated_column_configs = dict(sorted(calculated_column_configs.items())) + + return column_configs, calculated_column_configs + + def generate_config(args, server: TableauServer = None): """ Downloads a datasource and saves configs for that datasource @@ -293,34 +346,14 @@ def generate_config(args, server: TableauServer = None): print(f'{color.fg_yellow}BUILDING CONFIG {symbol.arrow_r} ' f'{color.fg_grey}{datasource_name} {symbol.sep} {datasource_path}{color.reset}') - datasource = Datasource(datasource_path) - # Get column information from the metadata records - metadata_record_config = get_metadata_record_config( - datasource.connection.metadata_records, - datasource_name, - debugging_logs - ) - - # Get the mapping of definitions from the csv - definitions_mapping = dict() - if definitions_csv_path is not None: - definitions_mapping = load_csv_with_definitions(file=definitions_csv_path) - # Extract the columns and folders. Build the new config - folder_mapping = build_folder_mapping(datasource.folders_common) - column_configs, calculated_column_configs = create_column_config( - columns=datasource.columns, - datasource_name=datasource_name, - folder_mapping=folder_mapping, - metadata_record_columns=metadata_record_config, - definitions_mapping=definitions_mapping, - debugging_logs=debugging_logs - ) + datasource = Datasource(datasource_path) - # Sort configs - column_configs = dict(sorted(column_configs.items())) - calculated_column_configs = dict(sorted(calculated_column_configs.items())) + # Build the config dictionaries + column_configs, calculated_column_configs = build_configs(datasource, datasource_name, debugging_logs, + definitions_csv_path) + # Output the configs to files datasource_name_snake = convert_to_snake_case(datasource_name) output_file_column_config = 'column_config.json' output_file_calculated_column_config = 'tableau_calc_config.json' diff --git a/tableau_utilities/scripts/merge_config.py b/tableau_utilities/scripts/merge_config.py index 1535a55f..d86d00a2 100644 --- a/tableau_utilities/scripts/merge_config.py +++ b/tableau_utilities/scripts/merge_config.py @@ -4,16 +4,27 @@ def read_file(file_path): - """ Read a JSON file to a dictionary + """Read a JSON file to a dictionary. Args: - file_path (str): The path of the file to read + file_path (str): The path of the file to read. + Returns: + dict: The JSON content as a dictionary. """ - with open(file_path, "r") as infile: - config: dict = json.load(infile) + try: + with open(file_path, "r") as infile: + config = json.load(infile) + print(f"Successfully read file: {file_path}") + return config + except FileNotFoundError: + print(f"File not found: {file_path}") + except json.JSONDecodeError: + print(f"Error decoding JSON from file: {file_path}") + except Exception as e: + print(f"An unexpected error occurred: {e}") - return config + return {} def write_file(file_name, config, debugging_logs=False): @@ -45,6 +56,30 @@ def add_definitions_mapping(config, definitions_mapping): return config +def add_definitions_mapping_any_local_name(config, definitions_mapping): + """Adds definitions from a mapping to the config. Chooses the definition from the mapping if needed. + + Args: + config (dict): A datasource config. + definitions_mapping (dict): A dictionary with columns as keys and their definitions as values. + + Returns: + dict: The updated config with new descriptions. + """ + if not isinstance(definitions_mapping, dict): + raise TypeError("definitions_mapping should be a dictionary") + + for column, definition in definitions_mapping.items(): + if len(definition) > 0: + column_lower = column.lower() + for key, value in config.items(): + for datasource in value.get('datasources', []): + if datasource.get('local-name', '').lower() == column_lower: + config[key]['description'] = definition + break + return config + + def merge_2_configs(existing_config, additional_config, debugging_logs=False): """ Takes 2 configs and adds information from the additional_cong to the existing_config The output of the merged config should be merged into the existing config in a PR @@ -112,7 +147,7 @@ def sort_config(config, debugging_logs): if debugging_logs: print('KEY', k) - print('CONGIG', v) + print('CONFIG', v) print('DATASOURCES', v['datasources']) sorted_datasources = sorted(v['datasources'], key=lambda d: d['name']) @@ -159,6 +194,7 @@ def merge_configs(args, server=None): existing_config_path = args.existing_config additional_config_path = args.additional_config definitions_csv_path = args.definitions_csv + # definitions_csv_local_name_path = args.definitions_csv_local_name merge_with = args.merge_with file_name = f'{args.merged_config}.json' target_directory = args.target_directory @@ -173,17 +209,20 @@ def merge_configs(args, server=None): if merge_with == 'config': read_merge_write(existing_config_path, additional_config_path, file_name, debugging_logs) - # Merge a config with a definitions csv + # Merge a config with a definitions csv. This elif merge_with == 'csv': # Read files existing_config = read_file(existing_config) - definitions_mapping = load_csv_with_definitions(file=definitions_csv_path) + definitions_mapping = load_csv_with_definitions(file=definitions_csv_path, debugging_logs=debugging_logs) # Merge - new_config = add_definitions_mapping(existing_config, definitions_mapping) + new_config = add_definitions_mapping_any_local_name(existing_config, definitions_mapping) # Sort and write the merged config new_config = sort_config(new_config, debugging_logs) + write_file(file_name=file_name, config=new_config, debugging_logs=debugging_logs) + print(f'{color.fg_yellow}DEFINITIONS CSV {symbol.arrow_r} ' + f'{color.fg_grey}{definitions_csv_path}{color.reset}') print(f'{color.fg_yellow}EXISTING CONFIG {symbol.arrow_r} ' f'{color.fg_grey}{existing_config_path}{color.reset}') print(f'{color.fg_yellow}ADDITIONAL CONFIG {symbol.arrow_r} ' diff --git a/tableau_utilities/tableau_file/tableau_file.py b/tableau_utilities/tableau_file/tableau_file.py index 8d55d56b..63f6f560 100644 --- a/tableau_utilities/tableau_file/tableau_file.py +++ b/tableau_utilities/tableau_file/tableau_file.py @@ -301,6 +301,42 @@ def enforce_column(self, column, folder_name=None, remote_name=None): if not found: self.extract.connection.cols.append(extract_col) + def remove_empty_folders(self): + """ Removes any folder without a column in it + + Example: + The "Folder - 2 columns" will be unchanged and the xml line for "Folder - Empty" will be removed + + <_.fcp.SchemaViewerObjectModel.true...folders-common> + + + + + + + + + Returns: + The list of folders that were removed + + """ + + # Identify empty folders + empty_folder_list = [] + + for folder in self.folders_common.folder: + number_columns_in_folder = len(folder.folder_item) + + if number_columns_in_folder == 0: + empty_folder_list.append(folder.name) + + # Remove Empty Folders + for empty_folder in empty_folder_list: + self.folders_common.folder.delete(empty_folder) + + return empty_folder_list + + def save(self): """ Save all changes made to each section of the Datasource """ parent = self._root.find('.') diff --git a/tableau_utilities/tableau_file/tableau_file_objects.py b/tableau_utilities/tableau_file/tableau_file_objects.py index 7935f803..11c1defd 100644 --- a/tableau_utilities/tableau_file/tableau_file_objects.py +++ b/tableau_utilities/tableau_file/tableau_file_objects.py @@ -708,6 +708,7 @@ class Connection(TableauFileObject): extract_engine: bool = None port: int = None max_varchar_size: str = None + oauth_config_id: str = None def dict(self): output = dict() @@ -759,6 +760,8 @@ def dict(self): output['@port'] = str(self.port) if self.max_varchar_size is not None: output['@max-varchar-size'] = str(self.max_varchar_size) + if self.max_varchar_size is not None: + output['@oauth-config-id'] = str(self.oauth_config_id) return output diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..33e282d7 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,25 @@ +import pytest + +file_test_count = {} + +@pytest.hookimpl(tryfirst=True) +def pytest_sessionstart(session): + global file_test_count + file_test_count = {} + +@pytest.hookimpl(tryfirst=True) +def pytest_runtestloop(session): + global file_test_count + for item in session.items: + file_path = str(item.fspath) + if file_path not in file_test_count: + file_test_count[file_path] = 0 + file_test_count[file_path] += 1 + +@pytest.hookimpl(trylast=True) +def pytest_terminal_summary(terminalreporter, exitstatus): + terminalreporter.write_sep("=", "test count summary") + for file_path, count in file_test_count.items(): + terminalreporter.write_line(f"{file_path}: {count} test(s)") + terminalreporter.write_line(f"Total number of test files: {len(file_test_count)}") + terminalreporter.write_line(f"Total number of tests: {sum(file_test_count.values())}") diff --git a/tests/test_apply_configs.py b/tests/test_apply_configs.py new file mode 100644 index 00000000..8981f669 --- /dev/null +++ b/tests/test_apply_configs.py @@ -0,0 +1,292 @@ +import pytest +from typing import Dict, Any +from tableau_utilities.scripts.apply_configs import ApplyConfigs + + +@pytest.fixture +def apply_configs(): + return ApplyConfigs(datasource_name="my_datasource_1", datasource_path="", target_column_config={}, + target_calculated_column_config={}, debugging_logs=False) + + +def test_invert_config_single_datasource(apply_configs): + sample_config = { + "Column1": { + "description": "Description of Column1", + "folder": "Folder1", + "persona": "string_dimension", + "datasources": [ + { + "name": "my_datasource_1", + "local-name": "MY_COLUMN_1", + "sql_alias": "MY_COLUMN_1_ALIAS" + } + ] + } + } + + expected_output = { + "my_datasource_1": { + "Column1": { + "description": "Description of Column1", + "folder": "Folder1", + "persona": "string_dimension", + "local-name": "MY_COLUMN_1", + "remote_name": "MY_COLUMN_1_ALIAS" + } + } + } + + result = apply_configs.invert_config(sample_config) + assert result == expected_output + + +def test_invert_config_multiple_datasources(apply_configs): + sample_config = { + "Column2": { + "description": "Description of Column2", + "folder": "Folder2", + "persona": "string_dimension", + "datasources": [ + { + "name": "my_datasource_1", + "local-name": "MY_COLUMN_1", + "sql_alias": "MY_COLUMN_1_ALIAS" + }, + { + "name": "my_datasource_2", + "local-name": "MY_COLUMN_2", + "sql_alias": "MY_COLUMN_2_ALIAS" + } + ] + } + } + + expected_output = { + "my_datasource_1": { + "Column2": { + "description": "Description of Column2", + "folder": "Folder2", + "persona": "string_dimension", + "local-name": "MY_COLUMN_1", + "remote_name": "MY_COLUMN_1_ALIAS" + } + }, + "my_datasource_2": { + "Column2": { + "description": "Description of Column2", + "folder": "Folder2", + "persona": "string_dimension", + "local-name": "MY_COLUMN_2", + "remote_name": "MY_COLUMN_2_ALIAS" + } + } + } + + result = apply_configs.invert_config(sample_config) + assert result == expected_output + +def test_flatten_to_list_of_fields(apply_configs): + + sample_dict = { + 'My Caption 1': { + 'description': 'A perfect description', + 'folder': 'My Folder', + 'local-name': 'MY_FIELD_1', + 'persona': 'string_dimension', + 'remote_name': 'MY_FIELD_1' + }, + 'My Caption 2': { + 'description': 'Another perfect description', + 'folder': 'My Folder', + 'local-name': 'MY_FIELD_2', + 'persona': 'string_dimension', + 'remote_name': 'MY_FIELD_2' + } + } + + expected_output = [ + { + 'Caption': 'My Caption 1', + 'description': 'A perfect description', + 'folder': 'My Folder', + 'local-name': 'MY_FIELD_1', + 'persona': 'string_dimension', + 'remote_name': 'MY_FIELD_1' + }, + { + 'Caption': 'My Caption 2', + 'description': 'Another perfect description', + 'folder': 'My Folder', + 'local-name': 'MY_FIELD_2', + 'persona': 'string_dimension', + 'remote_name': 'MY_FIELD_2' + } + ] + + result = apply_configs.flatten_to_list_of_fields(sample_dict) + assert result == expected_output + + +def test_prepare_configs(apply_configs): + sample_config_A = { + "Column1": { + "description": "Description of Column1", + "folder": "Folder1", + "persona": "string_dimension", + "datasources": [ + { + "name": "my_datasource_1", + "local-name": "MY_COLUMN_1", + "sql_alias": "MY_COLUMN_1_ALIAS" + } + ] + }, + "Column2": { + "description": "Description of Column2", + "folder": "Folder2", + "persona": "string_dimension", + "datasources": [ + { + "name": "my_datasource_1", + "local-name": "MY_COLUMN_1", + "sql_alias": "MY_COLUMN_1_ALIAS" + }, + { + "name": "my_datasource_2", + "local-name": "MY_COLUMN_2", + "sql_alias": "MY_COLUMN_2_ALIAS" + } + ] + } + } + + sample_config_B = { + "# ID": { + "description": "Distinct Count of the ID", + "calculation": "COUNTD([ID])", + "folder": "My Data", + "persona": "continuous_number_measure", + "datasources": [ + { + "name": "my_datasource_1", + "local-name": "MY_COLUMN_1", + "sql_alias": "MY_COLUMN_1_ALIAS" + }, + { + "name": "my_datasource_2", + "local-name": "MY_COLUMN_2", + "sql_alias": "MY_COLUMN_2_ALIAS" + } + ], + "default_format": "n#,##0;-#,##0" + } + } + + expected_output = { + "Column1": { + "description": "Description of Column1", + "folder": "Folder1", + "persona": "string_dimension", + "local-name": "MY_COLUMN_1", + "remote_name": "MY_COLUMN_1_ALIAS" + }, + "Column2": { + "description": "Description of Column2", + "folder": "Folder2", + "persona": "string_dimension", + "local-name": "MY_COLUMN_1", + "remote_name": "MY_COLUMN_1_ALIAS" + }, + "# ID": { + "description": "Distinct Count of the ID", + "calculation": "COUNTD([ID])", + "default_format": "n#,##0;-#,##0", + "folder": "My Data", + "persona": "continuous_number_measure", + "local-name": "MY_COLUMN_1", + "remote_name": "MY_COLUMN_1_ALIAS" + } + } + + result = apply_configs.prepare_configs(sample_config_A, sample_config_B) + assert result == expected_output + +def test_flatten_to_list_of_fields(apply_configs): + + sample_dict = { + 'My Caption 1': { + 'description': 'A perfect description', + 'folder': 'My Folder', + 'local-name': 'MY_FIELD_1', + 'persona': 'string_dimension', + 'remote_name': 'MY_FIELD_1' + }, + 'My Caption 2': { + 'description': 'Another perfect description', + 'folder': 'My Folder', + 'local-name': 'MY_FIELD_2', + 'persona': 'string_dimension', + 'remote_name': 'MY_FIELD_2' + } + } + + expected_output = [ + { + 'caption': 'My Caption 1', + 'description': 'A perfect description', + 'folder': 'My Folder', + 'local-name': 'MY_FIELD_1', + 'persona': 'string_dimension', + 'remote_name': 'MY_FIELD_1' + }, + {'caption': 'My Caption 2', + 'description': 'Another perfect description', + 'folder': 'My Folder', + 'local-name': 'MY_FIELD_2', + 'persona': 'string_dimension', + 'remote_name': 'MY_FIELD_2' + } + ] + + result = apply_configs.flatten_to_list_of_fields(sample_dict) + assert result == expected_output + +def test_select_matching_datasource_config(apply_configs): + + sample_config = { + "my_datasource_1": { + "Column1": { + "description": "Description of Column1", + "folder": "Folder1", + "persona": "string_dimension", + "local-name": "MY_COLUMN_1", + "remote_name": "MY_COLUMN_1_ALIAS" + } + }, + "my_datasource_2": { + "Column2": { + "description": "Description of Column2", + "folder": "Folder2", + "persona": "string_dimension", + "local-name": "MY_COLUMN_2", + "remote_name": "MY_COLUMN_2_ALIAS" + } + } + } + + expected_output = { + "Column1": { + "description": "Description of Column1", + "folder": "Folder1", + "persona": "string_dimension", + "local-name": "MY_COLUMN_1", + "remote_name": "MY_COLUMN_1_ALIAS" + } + } + result = apply_configs.select_matching_datasource_config(sample_config) + assert result == expected_output + + +if __name__ == '__main__': + pytest.main() diff --git a/tests/test_datasource_remove_empty_folders.py b/tests/test_datasource_remove_empty_folders.py new file mode 100644 index 00000000..bce4a203 --- /dev/null +++ b/tests/test_datasource_remove_empty_folders.py @@ -0,0 +1,58 @@ +import pytest +from unittest.mock import patch +from tableau_utilities.tableau_file.tableau_file_objects import FoldersCommon, Folder, FolderItem +from tableau_utilities.tableau_file.tableau_file import Datasource + +@pytest.fixture +def mock_datasource(): + with patch('tableau_utilities.tableau_file.tableau_file.Datasource.__init__', lambda x, file_path: None): + datasource = Datasource(file_path='dummy_path') + + # Create the mock data + mock_folders = [ + Folder( + name='Folder - 2 columns', + tag='folder', + role=None, + folder_item=[ + FolderItem(name='[COLUMN_1]', type='field', tag='folder-item'), + FolderItem(name='[COLUMN_2]', type='field', tag='folder-item') + ] + ), + Folder( + name='Folder - Empty', + tag='folder', + role=None, + folder_item=[] + ), + Folder( + name='People', + tag='folder', + role=None, + folder_item=[ + FolderItem(name='[COLUMN_2+3]', type='field', tag='folder-item') + ] + ) + ] + + # Assign the mock folders to the folders_common attribute + folders_common = FoldersCommon(folder=mock_folders) + datasource.folders_common = folders_common + + return datasource + +def test_remove_empty_folders_removed_folders(mock_datasource): + removed_folders = mock_datasource.remove_empty_folders() + assert removed_folders == ['Folder - Empty'] + +def test_remove_empty_folders_folder_count(mock_datasource): + mock_datasource.remove_empty_folders() + assert len(mock_datasource.folders_common.folder) == 2 + +def test_remove_empty_folders_folder_names(mock_datasource): + mock_datasource.remove_empty_folders() + folder_names = [folder.name for folder in mock_datasource.folders_common.folder] + assert 'Folder - Empty' not in folder_names + +if __name__ == '__main__': + pytest.main()