diff --git a/.circleci/config.yml b/.circleci/config.yml index 07822b5..4dfbc55 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -13,31 +13,27 @@ jobs: command: | python3 -m venv venv . venv/bin/activate - pip install -e . + pip install cookiecutter - run: - name: "Create Starter Project - BigQuery" + name: "Create Starter Project - Snowflake" command: | - mkdir jaffle_shop_bigquery . venv/bin/activate - dbt-init --client jaffle-shop --warehouse bigquery --target-dir ./jaffle_shop_bigquery + cookiecutter --no-input gh:fishtown-analytics/dbt-init --checkout ${CIRCLE_BRANCH} name=jaffle_shop project_name=jaffle_shop_snowflake warehouse=snowflake - run: - name: "Create Starter Project - Postgres" + name: "Create Starter Project - Redshift" command: | - mkdir jaffle_shop_postgres . venv/bin/activate - dbt-init --client jaffle-shop --warehouse postgres --target-dir ./jaffle_shop_postgres + cookiecutter --no-input gh:fishtown-analytics/dbt-init --checkout ${CIRCLE_BRANCH} name=jaffle_shop project_name=jaffle_shop_redshift warehouse=redshift - run: - name: "Create Starter Project - Redshift" + name: "Create Starter Project - BigQuery" command: | - mkdir jaffle_shop_redshift . venv/bin/activate - dbt-init --client jaffle-shop --warehouse redshift --target-dir ./jaffle_shop_redshift + cookiecutter --no-input gh:fishtown-analytics/dbt-init --checkout ${CIRCLE_BRANCH} name=jaffle_shop project_name=jaffle_shop_bigquery warehouse=bigquery - run: - name: "Create Starter Project - Snowflake" + name: "Create Starter Project - Postgres" command: | - mkdir jaffle_shop_snowflake . venv/bin/activate - dbt-init --client jaffle-shop --warehouse snowflake --target-dir ./jaffle_shop_snowflake + cookiecutter --no-input gh:fishtown-analytics/dbt-init --checkout ${CIRCLE_BRANCH} name=jaffle_shop project_name=jaffle_shop_postgres warehouse=postgres - save_cache: key: deps1-{{ .Branch }} paths: diff --git a/README.md b/README.md index b50fc7e..d0c4c7d 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,26 @@ # dbt-init -A tool to create dbt projects for consulting. +A cookiecutter template to create dbt projects for consulting. dbt-init will create a project as a subdirectory within the target directory you provide it, and populate as much of the dbt project as possible -## Installation & usage -1. Install using `pip install dbt-init` -2. To create a new client project run a command like following: +## Usage +1. Install [cookiecutter](https://github.com/cookiecutter/cookiecutter) (preferably in a virtual environemnt) `pip install cookiecutter` +2. Then run `cookiecutter gh:fishtown-analytics/dbt-init`. This will give you a number of prompts — hitting `enter` will use the default value indicated in the `[]` parentheses. + ```bash -$ dbt-init --client jaffle-shop --warehouse snowflake --target-dir ~/clients/ +$ cookiecutter gh:fishtown-analytics/dbt-init +name [e.g. jaffle_shop]: jaffle_shop +Select warehouse: +1 - snowflake +2 - redshift +3 - bigquery +4 - postgres +Choose from 1, 2, 3, 4 [1]: 1 +client_name [jaffle-shop]: +project_name [jaffle-shop-dbt]: +profile_name [jaffle_shop]: ``` -You can also check the available arguments with `dbt-init --help` +3. `cd` into your newly created dbt project! ## Once you've created your project 1. Update `sample.profile.yml` to contain the correct profile details for your @@ -24,29 +35,11 @@ statements in the post-run hooks (defined in `dbt_project.yml`). If you're interested in helping build out the starter project, here is a list of variables you can use – a lot of them have defaults based on the client name. ``` -{{ project.name }}: The name of the project, as defined in `dbt_project.yml`, e.g. jaffle_shop. -{{ project.warehouse }}: The warehouse that a client is using. -{{ project.client_name }}: The name of the client, e.g. jaffle-shop. -{{ project.dir_name }}: The name of the directory this project is in, e.g. jaffle-shop-dbt. -{{ project.profile_name }}: The name of the profile used by this project, e.g. jaffle_shop. -``` - -## Testing out the changes -If you're just making simple changes to the starter project, testing out the -changes is optional. If you want to improve the script, or just get familiar -with virtual environments, this is a good idea! -1. Clone this repo and `cd` into it -2. Create a new virtual environment `dbt-init-dev` and activate it. Make sure -your virtual environment uses python 3. -3. Run `pip install -r requirements-dev.txt` -4. You should now have a development version of `dbt-init` installed. Test your -changes by creating a sample project and inspecting the results (I know, we -should build real tests), e.g.: -``` -$ dbt-init --client test --target-dir ~/clrcrl/ --warehouse bigquery -New dbt project for test created at /Users/claire/clrcrl/test-dbt! 🎉 - -$ open /Users/claire/clrcrl/test-dbt +{{ cookiecutter.name }}: The name of the project, as defined in `dbt_project.yml`, e.g. jaffle_shop. +{{ cookiecutter.warehouse }}: The warehouse that a client is using. +{{ cookiecutter.client_name }}: The name of the client, e.g. jaffle-shop. +{{ cookiecutter.project_name }}: The name of the directory this project is in, e.g. jaffle-shop-dbt (this has to be called project name for cookiecutter reasons) +{{ cookiecutter.profile_name }}: The name of the profile used by this project, e.g. jaffle_shop. ``` ## To-do: diff --git a/RELEASING.md b/RELEASING.md deleted file mode 100644 index 5723fdf..0000000 --- a/RELEASING.md +++ /dev/null @@ -1,9 +0,0 @@ -## Releasing to pypi -1. In a virtual env, install package via: `python setup.py install`. Run a sample -`dbt-init` command to confirm that it works. -2. Clear out the `dist/` folder -3. Bump the version number in setup.py -4. Switch back to your `dbt-init-dev` virtualenv (which has the `twine` and -`wheel` packages installed). Run `python setup.py bdist_wheel` -- check that the -right starter project files were included in the `.whl` file. -5. Run `twine upload dist/*` diff --git a/cookiecutter.json b/cookiecutter.json new file mode 100644 index 0000000..e5a70f5 --- /dev/null +++ b/cookiecutter.json @@ -0,0 +1,7 @@ +{ + "name": "e.g. jaffle_shop", + "warehouse": ["snowflake", "redshift", "bigquery", "postgres"], + "client_name": "{{ cookiecutter.name.replace('_', '-') }}", + "project_name": "{{ cookiecutter.name.replace('_', '-') }}-dbt", + "profile_name": "{{ cookiecutter.name }}" +} diff --git a/core/__init__.py b/core/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/core/main.py b/core/main.py deleted file mode 100755 index e406fe7..0000000 --- a/core/main.py +++ /dev/null @@ -1,186 +0,0 @@ -#!/usr/bin/env python -from argparse import ArgumentParser, ArgumentTypeError -from pathlib import Path -import sys -import os -import re - -import jinja2 - - -STARTER_PROJECT_DIR_PATH = Path(__file__).parents[1] -STARTER_PROJECT_DIR = "starter-project" -STARTER_PROJECT_PATH = os.path.join(STARTER_PROJECT_DIR_PATH, STARTER_PROJECT_DIR) - - -def render_template(dir_path, filename, project): - """ Load the starter project and render the project details """ - environment = jinja2.Environment( - loader=jinja2.FileSystemLoader(dir_path), - undefined=jinja2.StrictUndefined, - keep_trailing_newline=True, - ) - - loaded = environment.get_template(filename) - - rendered = loaded.render(project=project) - - return rendered - - -def write_file(file_path, contents): - dir_name = os.path.dirname(file_path) - os.makedirs(dir_name, exist_ok=True) - with open(file_path, "w") as f: - f.write(contents) - - -def parse_args(args): - parser = ArgumentParser(description="dbt project starter") - parser._action_groups.pop() - required = parser.add_argument_group("required arguments") - optional = parser.add_argument_group("optional arguments") - - required.add_argument( - "--client", - required=True, - help="The name of the client you are creating this project for", - type=check_kebab_case, - ) - required.add_argument( - "--warehouse", - required=True, - choices=[ - "bigquery", - "bq", - "postgres", - "pg", - "redshift", - "rs", - "snowflake", - "sf", - ], - help="The warehouse your client is using", - ) - required.add_argument( - "--target-dir", - required=True, - help="The target directory name. Note that the project will be created as a subdirectory within the target directory", - type=check_file_path, - ) - optional.add_argument( - "--project-name", - help="The name of your dbt project (as defined in dbt_project.yml). Defaults to ", - type=check_snake_case, - ) - optional.add_argument( - "--project-directory", - help="The name of your dbt project directory. Defaults to -dbt", - type=check_kebab_case, - ) - optional.add_argument( - "--profile-name", - help="The name of the profile your dbt project will use. Defaults to ", - type=check_snake_case, - ) - - parsed = parser.parse_args(args) - return parsed - - -def handle(parsed): - """ Checks that the arguments are valid, and returns a dictionary that - describes the dbt project - """ - - client_kebab_case = parsed.client - client_snake_case = client_kebab_case.replace("-", "_") - - project = {} - - project["client_name"] = client_kebab_case - project["warehouse"] = map_warehouse(parsed.warehouse) - project["name"] = parsed.project_name or client_snake_case - project["dir_path"] = parsed.target_dir - project["dir_name"] = parsed.project_directory or "{}-dbt".format(client_kebab_case) - project["profile_name"] = parsed.profile_name or client_snake_case - - return project - - -def check_snake_case(s): - if re.match("^[a-z0-9_]*$", s) is None: - raise ArgumentTypeError( - "{} should only contain lower case letters, numbers, and underscores.".format( - s - ) - ) - return s - - -def check_kebab_case(s): - if re.match("^[a-z0-9-]*$", s) is None: - raise ArgumentTypeError( - "{} should only contain lower case letters, numbers, and hyphens.".format(s) - ) - return s - - -def check_file_path(s): - if not os.path.exists(s): - raise ArgumentTypeError("Target directory {} does not exist!".format(s)) - return s - - -def map_warehouse(s): - mapping_dict = { - "bq": "bigquery", - "rs": "redshift", - "sf": "snowflake", - "pg": "postgres", - } - - return mapping_dict.get(s, s) - - -def should_copy_file(base_name, contents): - is_empty_file = contents.strip() == "" - if base_name == ".gitkeep" or not is_empty_file: - return True - - -def create_starter_project(project): - # set the path we are targeting - client_project_path = os.path.join(project["dir_path"], project["dir_name"]) - # for each file in the starter project, copy a rendered version of the file - for subdir, dirs, files in os.walk(STARTER_PROJECT_PATH): - if os.path.basename(subdir) != "__pycache__": - for base_name in files: - rendered_template = render_template(subdir, base_name, project) - if should_copy_file(base_name, rendered_template): - target_dir = subdir.replace( - STARTER_PROJECT_PATH, client_project_path - ) - target_filepath = os.path.join(target_dir, base_name) - write_file(target_filepath, rendered_template) - - print( - "New dbt project for {} created at {}! 🎉".format( - project["client_name"], client_project_path - ) - ) - - -def main(args=None): - if args is None: - args = sys.argv[1:] - - parsed = parse_args(args) - - project = handle(parsed) - - create_starter_project(project) - - -if __name__ == "__main__": - main() diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index 13ccb99..0000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,5 +0,0 @@ --e . -ipdb -twine -wheel -black diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index d6e1198..0000000 --- a/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --e . diff --git a/setup.py b/setup.py deleted file mode 100644 index 5f629f8..0000000 --- a/setup.py +++ /dev/null @@ -1,50 +0,0 @@ -from setuptools import find_packages, setup -from setuptools.command.install import install -import os - -this_directory = os.path.abspath(os.path.dirname(__file__)) -with open(os.path.join(this_directory, "README.md")) as f: - long_description = f.read() - - -package_name = "dbt-init" -VERSION = "0.3.0" -description = """Create a dbt project the way Fishtown Analytics would""" - - -class VerifyVersionCommand(install): - """ - Custom command to verify that the git tag matches our version - https://circleci.com/blog/continuously-deploying-python-packages-to-pypi-with-circleci/ - """ - - description = "verify that the git tag matches our version" - - def run(self): - tag = os.getenv("CIRCLE_TAG") - - if tag != VERSION: - info = "Git tag: {0} does not match the version of this app: {1}".format( - tag, VERSION - ) - sys.exit(info) - - -setup( - name=package_name, - version=VERSION, - description=description, - long_description=long_description, - long_description_content_type="text/markdown", - author="Claire Carroll", - author_email="claire@fishtownanalytics.com", - url="https://github.com/fishtown-analyics/dbt-init", - packages=find_packages(), - package_data={"": ["starter-project/**/*"]}, - include_package_data=True, - test_suite="test", - entry_points={"console_scripts": ["dbt-init = core.main:main"]}, - scripts=[], - install_requires=["jinja2"], - cmdclass={"verify": VerifyVersionCommand}, -) diff --git a/starter-project/models/admin/warehouse_operation.sql b/starter-project/models/admin/warehouse_operation.sql deleted file mode 100644 index 4e4a3f4..0000000 --- a/starter-project/models/admin/warehouse_operation.sql +++ /dev/null @@ -1,17 +0,0 @@ -{% if project.warehouse == 'redshift' %} -{% raw %} -{{ - config({ - "materialized" : 'incremental', - "post-hook" : [ - after_commit("{{ vacuum( var('maintenance', false) ) }}"), - after_commit("{{ analyze( var('maintenance', false) ) }}") - ] - }) -}} - -select - current_timestamp as run_at, - {{var('maintenance', false)}} as maintenance_jobs_run -{% endraw %} -{% endif %} diff --git a/starter-project/.github/issue_template.md b/{{cookiecutter.project_name}}/.github/issue_template.md similarity index 100% rename from starter-project/.github/issue_template.md rename to {{cookiecutter.project_name}}/.github/issue_template.md diff --git a/starter-project/.github/pull_request_template.md b/{{cookiecutter.project_name}}/.github/pull_request_template.md similarity index 97% rename from starter-project/.github/pull_request_template.md rename to {{cookiecutter.project_name}}/.github/pull_request_template.md index af6dd77..6e897c8 100644 --- a/starter-project/.github/pull_request_template.md +++ b/{{cookiecutter.project_name}}/.github/pull_request_template.md @@ -53,7 +53,7 @@ addressed, and remove any items that are not relevant to this PR. - [ ] I have materialized my models appropriately. - [ ] I have added appropriate tests and documentation to any new models. - [ ] I have updated the README file. -{%- if project.warehouse == 'redshift' %} +{%- if cookiecutter.warehouse == 'redshift' %} - [ ] I have added sort and dist keys to models materialized as tables. - [ ] I have validated the SQL in any late-binding views. {% endif %} diff --git a/starter-project/.gitignore b/{{cookiecutter.project_name}}/.gitignore similarity index 100% rename from starter-project/.gitignore rename to {{cookiecutter.project_name}}/.gitignore diff --git a/starter-project/README.md b/{{cookiecutter.project_name}}/README.md similarity index 82% rename from starter-project/README.md rename to {{cookiecutter.project_name}}/README.md index b223881..824cb51 100644 --- a/starter-project/README.md +++ b/{{cookiecutter.project_name}}/README.md @@ -1,24 +1,24 @@ -# {{ project.dir_name }} +# {{ cookiecutter.project_name }} -dbt models for {{ project.client_name }} +dbt models for {{ cookiecutter.client_name }} ## Getting started 1. Clone this github repo 2. Install dbt following [these instructions](https://docs.getdbt.com/docs/installation) -3. Ask your database administrator for a set of {{ project.warehouse }} credentials. +3. Ask your database administrator for a set of {{ cookiecutter.warehouse }} credentials. -{% if project.warehouse == 'bigquery' %} +{% if cookiecutter.warehouse == 'bigquery' %} You'll also need to connect to BigQuery using [these instructions](https://docs.getdbt.com/docs/profile-bigquery#section-oauth-authorization). -{% elif project.warehouse in ('postgres', 'redshift') %} +{% elif cookiecutter.warehouse in ('postgres', 'redshift') %} The database administrator should run the following statements from a super user account to create your account. ```sql create user with password '' in group transformer, reporter; ``` -{% elif project.warehouse == 'snowflake' %} +{% elif cookiecutter.warehouse == 'snowflake' %} The database administrator should run the following statements from a super user account to create your account. ```sql create user diff --git a/starter-project/analysis/.gitkeep b/{{cookiecutter.project_name}}/analysis/.gitkeep similarity index 100% rename from starter-project/analysis/.gitkeep rename to {{cookiecutter.project_name}}/analysis/.gitkeep diff --git a/starter-project/data/.gitkeep b/{{cookiecutter.project_name}}/data/.gitkeep similarity index 100% rename from starter-project/data/.gitkeep rename to {{cookiecutter.project_name}}/data/.gitkeep diff --git a/starter-project/dbt_project.yml b/{{cookiecutter.project_name}}/dbt_project.yml similarity index 75% rename from starter-project/dbt_project.yml rename to {{cookiecutter.project_name}}/dbt_project.yml index 005c55e..f2fe3c4 100644 --- a/starter-project/dbt_project.yml +++ b/{{cookiecutter.project_name}}/dbt_project.yml @@ -1,6 +1,6 @@ -name: {{ project.name }} +name: {{ cookiecutter.name }} -profile: {{ project.profile_name }} +profile: {{ cookiecutter.profile_name }} version: '1.0' @@ -20,20 +20,20 @@ clean-targets: seeds: quote_columns: false -{% if project.warehouse == 'bigquery' %} +{% if cookiecutter.warehouse == 'bigquery' %} {# Currently we don't have any best practice BQ configurations #} -{% elif project.warehouse == 'postgres' -%} +{% elif cookiecutter.warehouse == 'postgres' -%} {# Currently we don't have any best practice PG configurations #} -{% elif project.warehouse == 'redshift' -%} +{% elif cookiecutter.warehouse == 'redshift' -%} models: bind: false on-run-end: - {% raw %}"{{ grant_select_on_schemas(schemas, ['transformer','reporter']) }}"{% endraw %} -{% elif project.warehouse == 'snowflake' %} +{% elif cookiecutter.warehouse == 'snowflake' %} quoting: database: false diff --git a/starter-project/macros/.gitkeep b/{{cookiecutter.project_name}}/macros/.gitkeep similarity index 100% rename from starter-project/macros/.gitkeep rename to {{cookiecutter.project_name}}/macros/.gitkeep diff --git a/starter-project/macros/generate_schema_name.sql b/{{cookiecutter.project_name}}/macros/generate_schema_name.sql similarity index 100% rename from starter-project/macros/generate_schema_name.sql rename to {{cookiecutter.project_name}}/macros/generate_schema_name.sql diff --git a/starter-project/macros/grant_select_on_schemas.sql b/{{cookiecutter.project_name}}/macros/grant_select_on_schemas.sql similarity index 89% rename from starter-project/macros/grant_select_on_schemas.sql rename to {{cookiecutter.project_name}}/macros/grant_select_on_schemas.sql index 60bf3f8..0ef7fc0 100644 --- a/starter-project/macros/grant_select_on_schemas.sql +++ b/{{cookiecutter.project_name}}/macros/grant_select_on_schemas.sql @@ -1,4 +1,4 @@ -{% if project.warehouse in ('postgres', 'redshift') %} +{% if cookiecutter.warehouse in ('postgres', 'redshift') %} {% raw %} {% macro grant_select_on_schemas(schemas, groups) %} {% set groups_csv = 'group ' ~ groups | join(', group ') %} @@ -11,7 +11,7 @@ {% endmacro %} {% endraw %} -{% elif project.warehouse == 'snowflake' %} +{% elif cookiecutter.warehouse == 'snowflake' %} {% raw %} {% macro grant_select_on_schemas(schemas, role) %} diff --git a/starter-project/models/marts/.gitkeep b/{{cookiecutter.project_name}}/models/marts/.gitkeep similarity index 100% rename from starter-project/models/marts/.gitkeep rename to {{cookiecutter.project_name}}/models/marts/.gitkeep diff --git a/starter-project/models/staging/.gitkeep b/{{cookiecutter.project_name}}/models/staging/.gitkeep similarity index 100% rename from starter-project/models/staging/.gitkeep rename to {{cookiecutter.project_name}}/models/staging/.gitkeep diff --git a/starter-project/packages.yml b/{{cookiecutter.project_name}}/packages.yml similarity index 100% rename from starter-project/packages.yml rename to {{cookiecutter.project_name}}/packages.yml diff --git a/starter-project/sample.profiles.yml b/{{cookiecutter.project_name}}/sample.profiles.yml similarity index 60% rename from starter-project/sample.profiles.yml rename to {{cookiecutter.project_name}}/sample.profiles.yml index 40f6365..7a90133 100644 --- a/starter-project/sample.profiles.yml +++ b/{{cookiecutter.project_name}}/sample.profiles.yml @@ -1,8 +1,8 @@ -{% if project.warehouse == 'bigquery' %} -{{ project.profile_name }}: +{% if cookiecutter.warehouse == 'bigquery' %} +{{ cookiecutter.profile_name }}: outputs: dev: - type: {{ project.warehouse }} + type: {{ cookiecutter.warehouse }} threads: 8 method: oauth project: [project] @@ -10,12 +10,12 @@ timeout_seconds: 300 target: dev -{% elif project.warehouse in ('postgres', 'redshift') %} -{% set default_port = 5439 if project.warehouse == 'redshift' else 5432 %} -{{ project.profile_name }}: +{% elif cookiecutter.warehouse in ('postgres', 'redshift') %} +{% set default_port = 5439 if cookiecutter.warehouse == 'redshift' else 5432 %} +{{ cookiecutter.profile_name }}: outputs: dev: - type: {{ project.warehouse }} + type: {{ cookiecutter.warehouse }} threads: 8 host: [hostname] user: [username] @@ -25,11 +25,11 @@ schema: dbt_[username] # e.g. dbt_alice target: dev -{% elif project.warehouse == 'snowflake' %} -{{ project.profile_name }}: +{% elif cookiecutter.warehouse == 'snowflake' %} +{{ cookiecutter.profile_name }}: outputs: dev: - type: {{ project.warehouse }} + type: {{ cookiecutter.warehouse }} threads: 16 account: [account id] user: [username] diff --git a/starter-project/snapshots/.gitkeep b/{{cookiecutter.project_name}}/snapshots/.gitkeep similarity index 100% rename from starter-project/snapshots/.gitkeep rename to {{cookiecutter.project_name}}/snapshots/.gitkeep diff --git a/starter-project/tests/.gitkeep b/{{cookiecutter.project_name}}/tests/.gitkeep similarity index 100% rename from starter-project/tests/.gitkeep rename to {{cookiecutter.project_name}}/tests/.gitkeep