diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 8aaf0375007..1983b0201d9 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -4,14 +4,14 @@ * @dbt-labs/product-docs # Adapter & Package Development Docs -/website/docs/docs/supported-data-platforms.md @dbt-labs/product-docs @dataders -/website/docs/reference/warehouse-setups @dbt-labs/product-docs @dataders +/website/docs/docs/supported-data-platforms.md @dbt-labs/product-docs @amychen1776 +/website/docs/reference/warehouse-setups @dbt-labs/product-docs @amychen1776 # `resource-configs` contains more than just warehouse setups -/website/docs/reference/resource-configs/*-configs.md @dbt-labs/product-docs @dataders -/website/docs/guides/advanced/adapter-development @dbt-labs/product-docs @dataders @dbeatty10 +/website/docs/reference/resource-configs/*-configs.md @dbt-labs/product-docs @amychen1776 +/website/docs/guides/advanced/adapter-development @dbt-labs/product-docs @amychen1776 -/website/docs/guides/building-packages @dbt-labs/product-docs @amychen1776 @dataders @dbeatty10 -/website/docs/guides/creating-new-materializations @dbt-labs/product-docs @dataders @dbeatty10 +/website/docs/guides/building-packages @dbt-labs/product-docs @amychen1776 +/website/docs/guides/creating-new-materializations @dbt-labs/product-docs # Require approval from the Multicell team when making # changes to the public facing migration documentation. diff --git a/.github/workflows/vale.yml b/.github/workflows/vale.yml index 5feaaa12a20..8abc5e2f50b 100644 --- a/.github/workflows/vale.yml +++ b/.github/workflows/vale.yml @@ -29,11 +29,11 @@ jobs: python-version: '3.x' - name: Install Vale - run: pip install vale==2.27.0 # Install a stable version of Vale + run: pip install vale==3.9.1.0 # Install a stable version of Vale - name: Get changed files id: changed-files - uses: tj-actions/changed-files@v34 + uses: tj-actions/changed-files@v45 with: files: | website/**/*.md @@ -63,10 +63,9 @@ jobs: uses: errata-ai/vale-action@reviewdog with: token: ${{ secrets.GITHUB_TOKEN }} - reporter: github-check + reporter: github-pr-review files: ${{ steps.changed-files.outputs.all_changed_and_modified_files }} separator: ' ' - version: '2.27.0' # - name: Post summary comment # if: ${{ steps.changed-files.outputs.any_changed == 'true' }} diff --git a/styles/Vocab/EN/accept.txt b/styles/config/vocabularies/EN/accept.txt similarity index 94% rename from styles/Vocab/EN/accept.txt rename to styles/config/vocabularies/EN/accept.txt index e673e2ef83d..083b42f7bed 100644 --- a/styles/Vocab/EN/accept.txt +++ b/styles/config/vocabularies/EN/accept.txt @@ -65,3 +65,5 @@ N/A pseudocolumn yml values= +dbt v\\d+\\.\\d+ +v\\d+\\.\\d+ diff --git a/styles/Vocab/EN/reject.txt b/styles/config/vocabularies/EN/reject.txt similarity index 100% rename from styles/Vocab/EN/reject.txt rename to styles/config/vocabularies/EN/reject.txt diff --git a/styles/custom/Typos.yml b/styles/custom/Typos.yml index 456517950a9..c1ad5cbe95b 100644 --- a/styles/custom/Typos.yml +++ b/styles/custom/Typos.yml @@ -37,3 +37,5 @@ tokens: - '\w+/\w+|\w+-\w+|n/a' - 'n/a' - 'N/A' + - 'dbt v\\d+\\.\\d+' + - 'v\\d+\\.\\d+ ' diff --git a/website/blog/2022-04-14-add-ci-cd-to-bitbucket.md b/website/blog/2022-04-14-add-ci-cd-to-bitbucket.md index e871687d8cd..381a457d855 100644 --- a/website/blog/2022-04-14-add-ci-cd-to-bitbucket.md +++ b/website/blog/2022-04-14-add-ci-cd-to-bitbucket.md @@ -197,7 +197,7 @@ Reading the file over, you can see that we: In summary, anytime anything is pushed to main, we’ll ensure our production database reflects the dbt transformation, and we’ve saved the resulting artifacts to defer to. -> ❓ **What are artifacts and why should I defer to them?** dbt artifacts are metadata of the last run - what models and tests were defined, which ones ran successfully, and which failed. If a future dbt run is set to ***defer*** to this metadata, it means that it can select models and tests to run based on their state, including and especially their difference from the reference metadata. See [Artifacts](https://docs.getdbt.com/reference/artifacts/dbt-artifacts), [Selection methods: “state”](https://docs.getdbt.com/reference/node-selection/methods#the-state-method), and [Caveats to state comparison](https://docs.getdbt.com/reference/node-selection/state-comparison-caveats) for details. +> ❓ **What are artifacts and why should I defer to them?** dbt artifacts are metadata of the last run - what models and tests were defined, which ones ran successfully, and which failed. If a future dbt run is set to ***defer*** to this metadata, it means that it can select models and tests to run based on their state, including and especially their difference from the reference metadata. See [Artifacts](https://docs.getdbt.com/reference/artifacts/dbt-artifacts), [Selection methods: “state”](https://docs.getdbt.com/reference/node-selection/methods#state), and [Caveats to state comparison](https://docs.getdbt.com/reference/node-selection/state-comparison-caveats) for details. ### Slim Continuous Integration: Retrieve the artifacts and do a state-based run diff --git a/website/dbt-versions.js b/website/dbt-versions.js index 3e59b926b80..bee90e3b9ed 100644 --- a/website/dbt-versions.js +++ b/website/dbt-versions.js @@ -20,6 +20,7 @@ exports.versions = [ }, { version: "1.9", + customDisplay: "1.9 (Compatible)", EOLDate: "2025-12-08", }, { diff --git a/website/docs/best-practices/how-we-style/5-how-we-style-our-yaml.md b/website/docs/best-practices/how-we-style/5-how-we-style-our-yaml.md index 8f817356334..e3b539e8b12 100644 --- a/website/docs/best-practices/how-we-style/5-how-we-style-our-yaml.md +++ b/website/docs/best-practices/how-we-style/5-how-we-style-our-yaml.md @@ -7,6 +7,7 @@ id: 5-how-we-style-our-yaml - 2️⃣ Indents should be two spaces - ➡️ List items should be indented +- 🔠 List items with a single entry can be a string. For example, `'select': 'other_user'`, but it's best practice to provide the argument as an explicit list. For example, `'select': ['other_user']` - 🆕 Use a new line to separate list items that are dictionaries where appropriate - 📏 Lines of YAML should be no longer than 80 characters. - 🛠️ Use the [dbt JSON schema](https://github.com/dbt-labs/dbt-jsonschema) with any compatible IDE and a YAML formatter (we recommend [Prettier](https://prettier.io/)) to validate your YAML files and format them automatically. diff --git a/website/docs/community/resources/oss-expectations.md b/website/docs/community/resources/oss-expectations.md index e6e5d959c96..7b518424e92 100644 --- a/website/docs/community/resources/oss-expectations.md +++ b/website/docs/community/resources/oss-expectations.md @@ -2,112 +2,122 @@ title: "Expectations for OSS contributors" --- -Whether it's a dbt package, a plugin, `dbt-core`, or this very documentation site, contributing to the open source code that supports the dbt ecosystem is a great way to level yourself up as a developer, and to give back to the community. The goal of this page is to help you understand what to expect when contributing to dbt open source software (OSS). While we can only speak for our own experience as open source maintainers, many of these guidelines apply when contributing to other open source projects, too. +Whether it's `dbt-core`, adapters, packages, or this very documentation site, contributing to the open source code that supports the dbt ecosystem is a great way to share your knowledge, level yourself up as a developer, and to give back to the community. The goal of this page is to help you understand what to expect when contributing to dbt open source software (OSS). -Have you seen things in other OSS projects that you quite like, and think we could learn from? [Open a discussion on the dbt Community Forum](https://discourse.getdbt.com), or start a conversation in the dbt Community Slack (for example: `#community-strategy`, `#dbt-core-development`, `#package-ecosystem`, `#adapter-ecosystem`). We always appreciate hearing from you! +Have you seen things in other OSS projects that you quite like, and think we could learn from? [Open a discussion on the dbt Community Forum](https://discourse.getdbt.com), or start a conversation in the [dbt Community Slack](https://www.getdbt.com/community/join-the-community) (for example: `#community-strategy`, `#dbt-core-development`, `#package-ecosystem`, `#adapter-ecosystem`). We always appreciate hearing from you! ## Principles ### Open source is participatory -Why take time out of your day to write code you don’t _have_ to? We all build dbt together. By using dbt, you’re invested in the future of the tool, and an agent in pushing forward the practice of analytics engineering. You’ve already benefited from using code contributed by community members, and documentation written by community members. Contributing to dbt OSS is your way to pay it forward, as an active participant in the thing we’re all creating together. +We all build dbt together -- whether you write code or contribute your ideas. By using dbt, you're invested in the future of the tool, and have an active role in pushing forward the standard of analytics engineering. You already benefit from using code and documentation contributed by community members. Contributing to the dbt community is your way to be an active participant in the thing we're all creating together. -There’s a very practical reason, too: OSS prioritizes our collective knowledge and experience over any one person’s. We don’t have experience using every database, operating system, security environment, ... We rely on the community of OSS users to hone our product capabilities and documentation to the wide variety of contexts in which it operates. In this way, dbt gets to be the handiwork of thousands, rather than a few dozen. +There's a very practical reason, too: OSS prioritizes our collective knowledge and experience over any one person's. We don't have experience using every database, operating system, security environment, ... We rely on the community of OSS users to hone our product capabilities and documentation to the wide variety of contexts in which it operates. In this way, dbt gets to be the handiwork of thousands, rather than a few dozen. -### We take seriously our role as maintainers +### We take seriously our role as maintainers of a standard -In that capacity, we cannot and will not fix every bug ourselves, or code up every feature worth doing. Instead, we’ll do our best to respond to new issues with context (including links to related issues), feedback, alternatives/workarounds, and (whenever possible) pointers to code that would aid a community contributor. If a change is so tricky or involved that the initiative rests solely with us, we’ll do our best to explain the complexity, and when / why we could foresee prioritizing it. Our role also includes maintenance of the backlog of issues, such as closing duplicates, proposals we don’t intend to support, or stale issues (no activity for 180 days). +As a standard, dbt must be reliable and consistent. Our first priority is ensuring the continued high quality of existing dbt capabilities before we introduce net-new capabilities. -### Initiative is everything +We also believe dbt as a framework should be extensible enough to ["make the easy things easy, and the hard things possible"](https://en.wikipedia.org/wiki/Perl#Philosophy). To that end, we _don't_ believe it's appropriate for dbt to have an out-of-the-box solution for every niche problem. Users have the flexibility to achieve many custom behaviors by defining their own macros, materializations, hooks, and more. We view it as our responsibility as maintainers to decide when something should be "possible" — via macros, packages, etc. — and when something should be "easy" — built into the dbt Core standard. -Given that we, as maintainers, will not be able to resolve every bug or flesh out every feature request, we empower you, as a community member, to initiate a change. +So when will we say "yes" to new capabilities for dbt Core? The signals we look for include: +- Upvotes on issues in our GitHub repos +- Open source dbt packages trying to close a gap +- Technical advancements in the ecosystem -- If you open the bug report, it’s more likely to be identified. -- If you open the feature request, it’s more likely to be discussed. -- If you comment on the issue, engaging with ideas and relating it to your own experience, it’s more likely to be prioritized. -- If you open a PR to fix an identified bug, it’s more likely to be fixed. -- If you contribute the code for a well-understood feature, that feature is more likely to be in the next version. -- If you review an existing PR, to confirm it solves a concrete problem for you, it’s more likely to be merged. +In the meantime — we'll do our best to respond to new issues with: +- Clarity about whether the proposed feature falls into the intended scope of dbt Core +- Context (including links to related issues) +- Alternatives and workarounds +- When possible, pointers to code that would aid a community contributor -Sometimes, this can feel like shouting into the void, especially if you aren’t met with an immediate response. We promise that there are dozens (if not hundreds) of folks who will read your comment, maintainers included. It all adds up to a real difference. +### Initiative is everything -# Practicalities +Given that we, as maintainers, will not be able to resolve every bug or flesh out every feature request, we empower you, as a community member, to initiate a change. -As dbt OSS is growing in popularity, and dbt Labs has been growing in size, we’re working to involve new people in the responsibilities of OSS maintenance. We really appreciate your patience as our newest maintainers are learning and developing habits. +- If you open the bug report, it's more likely to be identified. +- If you open the feature request, it's more likely to be discussed. +- If you comment on the issue, engaging with ideas and relating it to your own experience, it's more likely to be prioritized. +- If you open a PR to fix an identified bug, it's more likely to be fixed. +- If you comment on an existing PR, to confirm it solves the concrete problem for your team in practice, it's more likely to be merged. -## Discussions +Sometimes, this can feel like shouting into the void, especially if you aren't met with an immediate response. We promise that there are dozens (if not hundreds) of folks who will read your comment, including us as maintainers. It all adds up to a real difference. -Discussions are a relatively new GitHub feature, and we really like them! +## Practicalities -A discussion is best suited to propose a Big Idea, such as brand-new capability in dbt Core, or a new section of the product docs. Anyone can open a discussion, add a comment to an existing one, or reply in a thread. +### Discussions -What can you expect from a new Discussion? Hopefully, comments from other members of the community, who like your idea or have their own ideas for how it could be improved. The most helpful comments are ones that describe the kinds of experiences users and readers should have. Unlike an **issue**, there is no specific code change that would “resolve” a Discussion. +A discussion is best suited to propose a Big Idea, such as brand-new capability in dbt Core or an adapter. Anyone can open a discussion, comment on an existing one, or reply in a thread. -If, over the course of a discussion, we do manage to reach consensus on a way forward, we’ll open a new issue that references the discussion for context. That issue will connect desired outcomes to specific implementation details, as well as perceived limitations and open questions. It will serve as a formal proposal and request for comment. +When you open a new discussion, you might be looking for validation from other members of the community — folks who identify with your problem statement, who like your proposed idea, and who may have their own ideas for how it could be improved. The most helpful comments propose nuances or desirable user experiences to be considered in design and refinement. Unlike an **issue**, there is no specific code change that would “resolve” a discussion. -## Issues +If, over the course of a discussion, we reach a consensus on specific elements of a proposed design, we can open new implementation issues that reference the discussion for context. Those issues will connect desired user outcomes to specific implementation details, acceptance testing, and remaining questions that need answering. -An issue could be a bug you’ve identified while using the product or reading the documentation. It could also be a specific idea you’ve had for how it could be better. +### Issues -### Best practices for issues +An issue could be a bug you've identified while using the product or reading the documentation. It could also be a specific idea you've had for a narrow extension of existing functionality. + +#### Best practices for issues - Issues are **not** for support / troubleshooting / debugging help. Please see [dbt support](/docs/dbt-support) for more details and suggestions on how to get help. - Always search existing issues first, to see if someone else had the same idea / found the same bug you did. -- Many repositories offer templates for creating issues, such as when reporting a bug or requesting a new feature. If available, please select the relevant template and fill it out to the best of your ability. This will help other people understand your issue and respond. +- Many dbt repositories offer templates for creating issues, such as reporting a bug or requesting a new feature. If available, please select the relevant template and fill it out to the best of your ability. This information helps us (and others) understand your issue. -### You’ve found an existing issue that interests you. What should you do? +##### You've found an existing issue that interests you. What should you do? -Comment on it! Explain that you’ve run into the same bug, or had a similar idea for a new feature. If the issue includes a detailed proposal for a change, say which parts of the proposal you find most compelling, and which parts give you pause. +Comment on it! Explain that you've run into the same bug, or had a similar idea for a new feature. If the issue includes a detailed proposal for a change, say which parts of the proposal you find most compelling, and which parts give you pause. -### You’ve opened a new issue. What can you expect to happen? +##### You've opened a new issue. What can you expect to happen? -In our most critical repositories (such as `dbt-core`), **our goal is to respond to new issues within 2 standard work days.** While this initial response might be quite lengthy (context, feedback, and pointers that we can offer as maintainers), more often it will be a short acknowledgement that the maintainers are aware of it and don't believe it's in urgent need of resolution. Depending on the nature of your issue, it might be well suited to an external contribution, from you or another community member. +In our most critical repositories (such as `dbt-core`), our goal is to respond to new issues as soon as possible. This initial response will often be a short acknowledgement that the maintainers are aware of the issue, signalling our perception of its urgency. Depending on the nature of your issue, it might be well suited to an external contribution, from you or another community member. -**What does “triage” mean?** In some repositories, we use a `triage` label to keep track of issues that need an initial response from a maintainer. +**What if you're opening an issue in a different repository?** We have engineering teams dedicated to active maintenance of [`dbt-core`](https://github.com/dbt-labs/dbt-core) and its component libraries ([`dbt-common`](https://github.com/dbt-labs/dbt-common) + [`dbt-adapters`](https://github.com/dbt-labs/dbt-adapters)), as well as several platform-specific adapters ([`dbt-snowflake`](https://github.com/dbt-labs/dbt-snowflake), [`dbt-bigquery`](https://github.com/dbt-labs/dbt-bigquery), [`dbt-redshift`](https://github.com/dbt-labs/dbt-redshift), [`dbt-postgres`](https://github.com/dbt-labs/dbt-postgres)). We've open-sourced a number of other software projects over the years, and the majority of them do not have the same activity or maintenance guarantees. Check to see if other recent issues have responses, or when the last commit was added to the `main` branch. -**What if I’m opening an issue in a different repository?** **What if I’m opening an issue in a different repository?** We have engineering teams dedicated to active maintainence of [`dbt-core`](https://github.com/dbt-labs/dbt-core) and its component libraries ([`dbt-common`](https://github.com/dbt-labs/dbt-common) + [`dbt-adapters`](https://github.com/dbt-labs/dbt-adapters)), as well as several platform-specific adapters ([`dbt-snowflake`](https://github.com/dbt-labs/dbt-snowflake), [`dbt-bigquery`](https://github.com/dbt-labs/dbt-bigquery), [`dbt-redshift`](https://github.com/dbt-labs/dbt-redshift), [`dbt-postgres`](https://github.com/dbt-labs/dbt-postgres)). We’ve open sourced a number of other software projects over the years, and the majority of them do not have the same activity or maintenance guarantees. Check to see if other recent issues have responses, or when the last commit was added to the `main` branch. +**You're not sure about the status of your issue.** If your issue is in an actively maintained repo and has a `triage` label attached, we're aware it's something that needs a response. If the issue has been triaged, but not prioritized, this could mean: +- The intended scope or user experience of a proposed feature requires further refinement from a maintainer +- We believe the required code change is too tricky for an external contributor -**If my issue is lingering...** Sorry for the delay! If your issue is in an actively maintained repo and has a `triage` label attached, we’re aware it's something that needs a response. +We'll do our best to explain the open questions or complexity, and when / why we could foresee prioritizing it. -**Automation that can help us:** In many repositories, we use a bot that marks issues as stale if they haven’t had any activity for 180 days. This helps us keep our backlog organized and up-to-date. We encourage you to comment on older open issues that you’re interested in, to keep them from being marked stale. You’re also always welcome to comment on closed issues to say that you’re still interested in the proposal. +**Automation that can help us:** In many repositories, we use a bot that marks issues as stale if they haven't had any activity for 180 days. This helps us keep our backlog organized and up-to-date. We encourage you to comment on older open issues that you're interested in, to keep them from being marked stale. You're also always welcome to comment on closed issues to say that you're still interested in the proposal. -### Issue labels +#### Issue labels In all likelihood, the maintainer who responds will also add a number of labels. Not all of these labels are used in every repository. -In some cases, the right resolution to an open issue might be tangential to the codebase. The right path forward might be in another codebase (we'll transfer it), a documentation update, or a change that can be made in user-space code. In other cases, the issue might describe functionality that the maintainers are unwilling or unable to incorporate into the main codebase. In these cases, a maintainer will close the issue (perhaps using a `wontfix` label) and explain why. +In some cases, the right resolution to an open issue might be tangential to the codebase. The right path forward might be in another codebase (we'll transfer it), a documentation update, or a change that you can make yourself in user-space code. In other cases, the issue might describe functionality that the maintainers are unwilling or unable to incorporate into the main codebase. In these cases, a maintainer will close the issue (perhaps using a `wontfix` label) and explain why. + +Some of the most common labels are explained below: | tag | description | | ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `triage` | This is a new issue which has not yet been reviewed by a maintainer. This label is removed when a maintainer reviews and responds to the issue. | -| `bug` | This issue represents a defect or regression from the behavior that's documented, or that you reasonably expect | -| `enhancement` | This issue represents net-new functionality, including an extension of an existing capability | -| `good_first_issue` | This issue does not require deep knowledge of the codebase to implement. This issue is appropriate for a first-time contributor. | +| `bug` | This issue represents a defect or regression from the behavior that's documented | +| `enhancement` | This issue represents a narrow extension of an existing capability | +| `good_first_issue` | This issue does not require deep knowledge of the codebase to implement, and it is appropriate for a first-time contributor. | | `help_wanted` | This issue is trickier than a "good first issue." The required changes are scattered across the codebase, or more difficult to test. The maintainers are happy to help an experienced community contributor; they aren't planning to prioritize this issue themselves. | | `duplicate` | This issue is functionally identical to another open issue. The maintainers will close this issue and encourage community members to focus conversation on the other one. | | `stale` | This is an old issue which has not recently been updated. In repositories with a lot of activity, stale issues will periodically be closed. | | `wontfix` | This issue does not require a code change in the repository, or the maintainers are unwilling to merge a change which implements the proposed behavior. | -## Pull requests - -PRs are your surest way to make the change you want to see in dbt / packages / docs, especially when the change is straightforward. +### Pull requests -**Every PR should be associated with an issue.** Why? Before you spend a lot of time working on a contribution, we want to make sure that your proposal will be accepted. You should open an issue first, describing your desired outcome and outlining your planned change. If you've found an older issue that's already open, comment on it with an outline for your planned implementation. Exception to this rule: If you're just opening a PR for a cosmetic fix, such as a typo in documentation, an issue isn't needed. +**Every PR should be associated with an issue.** Why? Before you spend a lot of time working on a contribution, we want to make sure that your proposal will be accepted. You should open an issue first, describing your desired outcome and outlining your planned change. If you've found an older issue that's already open, comment on it with an outline for your planned implementation _before_ putting in the work to open a pull request. -**PRs must include robust testing.** Comprehensive testing within pull requests is crucial for the stability of our project. By prioritizing robust testing, we ensure the reliability of our codebase, minimize unforeseen issues, and safeguard against potential regressions. We cannot merge changes that risk the backward incompatibility of existing documented behaviors. We understand that creating thorough tests often requires significant effort, and your dedication to this process greatly contributes to the project's overall reliability. Thank you for your commitment to maintaining the integrity of our codebase and the experience of everyone using dbt! +**PRs must include robust testing.** Comprehensive testing within pull requests is crucial for the stability of dbt. By prioritizing robust testing, we ensure the reliability of our codebase, minimize unforeseen issues, and safeguard against potential regressions. **We cannot merge changes that risk the backward incompatibility of existing documented behaviors.** We understand that creating thorough tests often requires significant effort, and your dedication to this process greatly contributes to the project's overall reliability. Thank you for your commitment to maintaining the integrity of our codebase and the experience of everyone using dbt! -**PRs go through two review steps.** First, we aim to respond with feedback on whether we think the implementation is appropriate from a product & usability standpoint. At this point, we will close PRs that we believe fall outside the scope of dbt Core, or which might lead to an inconsistent user experience. This is an important part of our role as maintainers; we're always open to hearing disagreement. If a PR passes this first review, we will queue it up for code review, at which point we aim to test it ourselves and provide thorough feedback within the next month. +**PRs go through two review steps.** First, we aim to respond with feedback on whether we think the implementation is appropriate from a product & usability standpoint. At this point, we will close PRs that we believe fall outside the scope of dbt Core, or which might lead to an inconsistent user experience. This is an important part of our role as maintainers; we're always open to hearing disagreement. If a PR passes this first review, we will queue it up for code review, at which point we aim to test it ourselves and provide thorough feedback. -**We receive more PRs than we can thoroughly review, test, and merge.** Our teams have finite capacity, and our top priority is maintaining a well-scoped, high-quality framework for the tens of thousands of people who use it every week. To that end, we must prioritize overall stability and planned improvements over a long tail of niche potential features. For best results, say what in particular you’d like feedback on, and explain what would it mean to you, your team, and other community members to have the proposed change merged. Smaller PRs tackling well-scoped issues tend to be easier and faster for review. Two recent examples of community-contributed PRs: +**We receive more PRs than we can thoroughly review, test, and merge.** Our teams have finite capacity, and our top priority is maintaining a well-scoped, high-quality framework for the tens of thousands of people who use it every week. To that end, we must prioritize overall stability and planned improvements over a long tail of niche potential features. For best results, say what in particular you'd like feedback on, and explain what would it mean to you, your team, and other community members to have the proposed change merged. Smaller PRs tackling well-scoped issues tend to be easier and faster for review. Two examples of community-contributed PRs: - [(dbt-core#9347) Fix configuration of turning test warnings into failures](https://github.com/dbt-labs/dbt-core/pull/9347) - [(dbt-core#9863) Better error message when trying to select a disabled model](https://github.com/dbt-labs/dbt-core/pull/9863) -**Automation that can help us:** Many repositories have a template for pull request descriptions, which will include a checklist that must be completed before the PR can be merged. You don’t have to do all of these things to get an initial PR, but they definitely help. Those many include things like: +**Automation that can help us:** Many repositories have a template for pull request descriptions, which will include a checklist that must be completed before the PR can be merged. You don't have to do all of these things to get an initial PR, but they will delay our review process. Those include: -- **Tests!** When you open a PR, some tests and code checks will run. (For security reasons, some may need to be approved by a maintainer.) We will not merge any PRs with failing tests. If you’re not sure why a test is failing, please say so, and we’ll do our best to get to the bottom of it together. +- **Tests, tests, tests.** When you open a PR, some tests and code checks will run. (For security reasons, some may need to be approved by a maintainer.) We will not merge any PRs with failing tests. If you're not sure why a test is failing, please say so, and we'll do our best to get to the bottom of it together. - **Contributor License Agreement** (CLA): This ensures that we can merge your code, without worrying about unexpected implications for the copyright or license of open source dbt software. For more details, read: ["Contributor License Agreements"](../resources/contributor-license-agreements.md) - **Changelog:** In projects that include a number of changes in each release, we need a reliable way to signal what's been included. The mechanism for this will vary by repository, so keep an eye out for notes about how to update the changelog. -### Inclusion in release versions +#### Inclusion in release versions -Both bug fixes and backwards-compatible new features will be included in the [next minor release](/docs/dbt-versions/core#how-dbt-core-uses-semantic-versioning). Fixes for regressions and net-new bugs that were present in the minor version's original release will be backported to versions with [active support](/docs/dbt-versions/core). Other bug fixes may be backported when we have high confidence that they're narrowly scoped and won't cause unintended side effects. +Both bug fixes and backwards-compatible new features will be included in the [next minor release of dbt Core](/docs/dbt-versions/core#how-dbt-core-uses-semantic-versioning). Fixes for regressions and net-new bugs that were present in the minor version's original release will be backported to versions with [active support](/docs/dbt-versions/core). Other bug fixes may be backported when we have high confidence that they're narrowly scoped and won't cause unintended side effects. diff --git a/website/docs/docs/build/data-tests.md b/website/docs/docs/build/data-tests.md index af48e0af267..559fe468644 100644 --- a/website/docs/docs/build/data-tests.md +++ b/website/docs/docs/build/data-tests.md @@ -9,6 +9,11 @@ id: "data-tests" keywords: - test, tests, testing, dag --- + +import CopilotBeta from '/snippets/_dbt-copilot-avail.md'; + + + ## Related reference docs * [Test command](/reference/commands/test) * [Data test properties](/reference/resource-properties/data-tests) diff --git a/website/docs/docs/build/dbt-tips.md b/website/docs/docs/build/dbt-tips.md index 817468e5e9c..0cc83394b8b 100644 --- a/website/docs/docs/build/dbt-tips.md +++ b/website/docs/docs/build/dbt-tips.md @@ -40,7 +40,7 @@ Leverage these dbt packages to streamline your workflow: - Set `vars` in your `dbt_project.yml` to define global defaults for certain conditions, which you can then override using the `--vars` flag in your commands. - Use [for loops](/guides/using-jinja?step=3) in Jinja to DRY up repetitive logic, such as selecting a series of columns that all require the same transformations and naming patterns to be applied. - Instead of relying on post-hooks, use the [grants config](/reference/resource-configs/grants) to apply permission grants in the warehouse resiliently. -- Define [source-freshness](/docs/build/sources#snapshotting-source-data-freshness) thresholds on your sources to avoid running transformations on data that has already been processed. +- Define [source-freshness](/docs/build/sources#source-data-freshness) thresholds on your sources to avoid running transformations on data that has already been processed. - Use the `+` operator on the left of a model `dbt build --select +model_name` to run a model and all of its upstream dependencies. Use the `+` operator on the right of the model `dbt build --select model_name+` to run a model and everything downstream that depends on it. - Use `dir_name` to run all models in a package or directory. - Use the `@` operator on the left of a model in a non-state-aware CI setup to test it. This operator runs all of a selection’s parents and children, and also runs the parents of its children, which in a fresh CI schema will likely not exist yet. diff --git a/website/docs/docs/build/documentation.md b/website/docs/docs/build/documentation.md index 6f7c6c27f31..455fc9e70e0 100644 --- a/website/docs/docs/build/documentation.md +++ b/website/docs/docs/build/documentation.md @@ -7,6 +7,10 @@ id: "documentation" Good documentation for your dbt models will help downstream consumers discover and understand the datasets you curate for them. dbt provides a way to generate documentation for your dbt project and render it as a website. +import CopilotBeta from '/snippets/_dbt-copilot-avail.md'; + + + ## Related documentation * [Declaring properties](/reference/configs-and-properties) diff --git a/website/docs/docs/build/exposures.md b/website/docs/docs/build/exposures.md index 16dfd0e5f73..a3ac7bcb3ce 100644 --- a/website/docs/docs/build/exposures.md +++ b/website/docs/docs/build/exposures.md @@ -77,5 +77,5 @@ When we generate the [dbt Explorer site](/docs/collaborate/explore-projects), yo ## Related docs * [Exposure properties](/reference/exposure-properties) -* [`exposure:` selection method](/reference/node-selection/methods#the-exposure-method) +* [`exposure:` selection method](/reference/node-selection/methods#exposure) * [Data health tiles](/docs/collaborate/data-tile) diff --git a/website/docs/docs/build/groups.md b/website/docs/docs/build/groups.md index 890ee96901a..1be4388c246 100644 --- a/website/docs/docs/build/groups.md +++ b/website/docs/docs/build/groups.md @@ -119,4 +119,4 @@ dbt.exceptions.DbtReferenceError: Parsing Error * [Model Access](/docs/collaborate/govern/model-access#groups) * [Group configuration](/reference/resource-configs/group) -* [Group selection](/reference/node-selection/methods#the-group-method) +* [Group selection](/reference/node-selection/methods#group) diff --git a/website/docs/docs/build/incremental-microbatch.md b/website/docs/docs/build/incremental-microbatch.md index 901f59a167c..4aff8b5839c 100644 --- a/website/docs/docs/build/incremental-microbatch.md +++ b/website/docs/docs/build/incremental-microbatch.md @@ -187,7 +187,7 @@ Several configurations are relevant to microbatch models, and some are required: | [`begin`](/reference/resource-configs/begin) | The "beginning of time" for the microbatch model. This is the starting point for any initial or full-refresh builds. For example, a daily-grain microbatch model run on `2024-10-01` with `begin = '2023-10-01` will process 366 batches (it's a leap year!) plus the batch for "today." | N/A | Date | Required | | [`batch_size`](/reference/resource-configs/batch-size) | The granularity of your batches. Supported values are `hour`, `day`, `month`, and `year` | N/A | String | Required | | [`lookback`](/reference/resource-configs/lookback) | Process X batches prior to the latest bookmark to capture late-arriving records. | `1` | Integer | Optional | -| [`concurrent_batches`](/reference/resource-properties/concurrent_batches) | An override for whether batches run concurrently (at the same time) or sequentially (one after the other). | `None` | Boolean | Optional | +| [`concurrent_batches`](/reference/resource-properties/concurrent_batches) | Overrides dbt's auto detect for running batches concurrently (at the same time). Read more about [configuring concurrent batches](/docs/build/incremental-microbatch#configure-concurrent_batches). Setting to
* `true` runs batches concurrently (in parallel).
* `false` runs batches sequentially (one after the other). | `None` | Boolean | Optional | diff --git a/website/docs/docs/build/materializations.md b/website/docs/docs/build/materializations.md index 723acf87414..2ed30c7126a 100644 --- a/website/docs/docs/build/materializations.md +++ b/website/docs/docs/build/materializations.md @@ -18,7 +18,11 @@ You can also configure [custom materializations](/guides/create-new-materializat ## Configuring materializations -By default, dbt models are materialized as "views". Models can be configured with a different materialization by supplying the `materialized` configuration parameter as shown below. +By default, dbt models are materialized as "views". Models can be configured with a different materialization by supplying the [`materialized` configuration](/reference/resource-configs/materialized) parameter as shown in the following tabs. + + + + @@ -49,6 +53,10 @@ models: + + + + Alternatively, materializations can be configured directly inside of the model sql files. This can be useful if you are also setting [Performance Optimization] configs for specific models (for example, [Redshift specific configurations](/reference/resource-configs/redshift-configs) or [BigQuery specific configurations](/reference/resource-configs/bigquery-configs)). @@ -63,6 +71,29 @@ from ... + + + + +Materializations can also be configured in the model's `properties.yml` file. The following example shows the `table` materialization type. For a complete list of materialization types, refer to [materializations](/docs/build/materializations#materializations). + + + +```yaml +version: 2 + +models: + - name: events + config: + materialized: table +``` + + + + + + + ## Materializations diff --git a/website/docs/docs/build/metrics-overview.md b/website/docs/docs/build/metrics-overview.md index 57cdd929acb..e874dced63a 100644 --- a/website/docs/docs/build/metrics-overview.md +++ b/website/docs/docs/build/metrics-overview.md @@ -271,9 +271,9 @@ metrics: measure: name: cancellations_usd # Specify the measure you are creating a proxy for. fill_nulls_with: 0 + join_to_timespine: true filter: | {{ Dimension('order__value')}} > 100 and {{Dimension('user__acquisition')}} is not null - join_to_timespine: true ``` diff --git a/website/docs/docs/build/packages.md b/website/docs/docs/build/packages.md index 9ba4ceeaff5..82ba2c3d74c 100644 --- a/website/docs/docs/build/packages.md +++ b/website/docs/docs/build/packages.md @@ -157,9 +157,57 @@ packages: Where `name: 'dbt_utils'` specifies the subfolder of `dbt_packages` that's created for the package source code to be installed within. -### Private packages +## Private packages -#### SSH Key Method (Command Line only) +### Native private packages + +dbt Cloud supports private packages from [supported](#prerequisites) Git repos leveraging an exisiting [configuration](/docs/cloud/git/git-configuration-in-dbt-cloud) in your environment. Previously, you had to configure a [token](#git-token-method) to retrieve packages from your private repos. + +#### Prerequisites + +To use native private packages, you must have one of the following Git providers configured in the **Integrations** section of your **Account settings**: +- [GitHub](/docs/cloud/git/connect-github) +- [Azure DevOps](/docs/cloud/git/connect-azure-devops) +- Support for GitLab is coming soon. + + +#### Configuration + +Use the `private` key in your `packages.yml` or `dependencies.yml` to clone package repos using your existing dbt Cloud Git integration without having to provision an access token or create a dbt Cloud environment variable: + + + +```yaml +packages: + - private: dbt-labs/awesome_repo + - package: normal packages + + [...] +``` + + + +You can pin private packages similar to regular dbt packages: + +```yaml +packages: + - private: dbt-labs/awesome_repo + revision: "0.9.5" # Pin to a tag, branch, or complete 40-character commit hash + +``` + +If you are using multiple Git integrations, disambiguate by adding the provider key: + +```yaml +packages: + - private: dbt-labs/awesome_repo + provider: "github" # GitHub and Azure are currently supported. GitLab is coming soon. + +``` + +With this method, you can retrieve private packages from an integrated Git provider without any additional steps to connect. + +### SSH key method (command line only) If you're using the Command Line, private packages can be cloned via SSH and an SSH key. When you use SSH keys to authenticate to your git remote server, you don’t need to supply your username and password each time. Read more about SSH keys, how to generate them, and how to add them to your git provider here: [Github](https://docs.github.com/en/github/authenticating-to-github/connecting-to-github-with-ssh) and [GitLab](https://docs.gitlab.com/ee/user/ssh.html). @@ -177,7 +225,14 @@ packages: If you're using dbt Cloud, the SSH key method will not work, but you can use the [HTTPS Git Token Method](https://docs.getdbt.com/docs/build/packages#git-token-method). -#### Git token method +### Git token method + +:::note + +dbt Cloud has [native support](#native-private-packages) for Git hosted private packages with GitHub and Azure DevOps (GitLab coming soon). If you are using a supported [integrated Git environment](/docs/cloud/git/git-configuration-in-dbt-cloud), you no longer need to configure Git tokens to retrieve private packages. + +::: + This method allows the user to clone via HTTPS by passing in a git token via an environment variable. Be careful of the expiration date of any token you use, as an expired token could cause a scheduled run to fail. Additionally, user tokens can create a challenge if the user ever loses access to a specific repo. @@ -246,7 +301,7 @@ Read more about creating a Personal Access Token [here](https://confluence.atlas -#### Configure subdirectory for packaged projects +## Configure subdirectory for packaged projects In general, dbt expects `dbt_project.yml` to be located as a top-level file in a package. If the packaged project is instead nested in a subdirectory—perhaps within a much larger mono repo—you can optionally specify the folder path as `subdirectory`. dbt will attempt a [sparse checkout](https://git-scm.com/docs/git-sparse-checkout) of just the files located within that subdirectory. Note that you must be using a recent version of `git` (`>=2.26.0`). diff --git a/website/docs/docs/build/projects.md b/website/docs/docs/build/projects.md index a65d4773ac6..4732dbe6da7 100644 --- a/website/docs/docs/build/projects.md +++ b/website/docs/docs/build/projects.md @@ -22,6 +22,8 @@ At a minimum, all a project needs is the `dbt_project.yml` project configuration | [metrics](/docs/build/build-metrics-intro) | A way for you to define metrics for your project. | | [groups](/docs/build/groups) | Groups enable collaborative node organization in restricted collections. | | [analysis](/docs/build/analyses) | A way to organize analytical SQL queries in your project such as the general ledger from your QuickBooks. | +| [semantic models](/docs/build/semantic-models) | Semantic models define the foundational data relationships in [MetricFlow](/docs/build/about-metricflow) and the [dbt Semantic Layer](/docs/use-dbt-semantic-layer/dbt-sl), enabling you to query metrics using a semantic graph. | +| [saved queries](/docs/build/saved-queries) | Saved queries organize reusable queries by grouping metrics, dimensions, and filters into nodes visible in the dbt DAG. | When building out the structure of your project, you should consider these impacts on your organization's workflow: diff --git a/website/docs/docs/build/semantic-models.md b/website/docs/docs/build/semantic-models.md index 5ff363dd44c..4edc6b0a422 100644 --- a/website/docs/docs/build/semantic-models.md +++ b/website/docs/docs/build/semantic-models.md @@ -9,6 +9,10 @@ tags: [Metrics, Semantic Layer] pagination_next: "docs/build/dimensions" --- +import CopilotBeta from '/snippets/_dbt-copilot-avail.md'; + + + Semantic models are the foundation for data definition in MetricFlow, which powers the dbt Semantic Layer: - Think of semantic models as nodes connected by entities in a semantic graph. diff --git a/website/docs/docs/build/sources.md b/website/docs/docs/build/sources.md index 4926601f3b2..aad1ac42c8e 100644 --- a/website/docs/docs/build/sources.md +++ b/website/docs/docs/build/sources.md @@ -130,11 +130,11 @@ You can find more details on the available properties for sources in the [refere -## Snapshotting source data freshness -With a couple of extra configs, dbt can optionally snapshot the "freshness" of the data in your source tables. This is useful for understanding if your data pipelines are in a healthy state, and is a critical component of defining SLAs for your warehouse. +## Source data freshness +With a couple of extra configs, dbt can optionally capture the "freshness" of the data in your source tables. This is useful for understanding if your data pipelines are in a healthy state, and is a critical component of defining SLAs for your warehouse. ### Declaring source freshness -To configure sources to snapshot freshness information, add a `freshness` block to your source and `loaded_at_field` to your table declaration: +To configure source freshness information, add a `freshness` block to your source and `loaded_at_field` to your table declaration: @@ -164,14 +164,14 @@ sources: -In the `freshness` block, one or both of `warn_after` and `error_after` can be provided. If neither is provided, then dbt will not calculate freshness snapshots for the tables in this source. +In the `freshness` block, one or both of `warn_after` and `error_after` can be provided. If neither is provided, then dbt will not calculate freshness for the tables in this source. Additionally, the `loaded_at_field` is required to calculate freshness for a table. If a `loaded_at_field` is not provided, then dbt will not calculate freshness for the table. These configs are applied hierarchically, so `freshness` and `loaded_at_field` values specified for a `source` will flow through to all of the `tables` defined in that source. This is useful when all of the tables in a source have the same `loaded_at_field`, as the config can just be specified once in the top-level source definition. ### Checking source freshness -To snapshot freshness information for your sources, use the `dbt source freshness` command ([reference docs](/reference/commands/source)): +To obtain freshness information for your sources, use the `dbt source freshness` command ([reference docs](/reference/commands/source)): ``` $ dbt source freshness @@ -182,7 +182,7 @@ Behind the scenes, dbt uses the freshness properties to construct a `select` que ```sql select max(_etl_loaded_at) as max_loaded_at, - convert_timezone('UTC', current_timestamp()) as snapshotted_at + convert_timezone('UTC', current_timestamp()) as calculated_at from raw.jaffle_shop.orders ``` @@ -198,7 +198,7 @@ Some databases can have tables where a filter over certain columns are required, ```sql select max(_etl_loaded_at) as max_loaded_at, - convert_timezone('UTC', current_timestamp()) as snapshotted_at + convert_timezone('UTC', current_timestamp()) as calculated_at from raw.jaffle_shop.orders where _etl_loaded_at >= date_sub(current_date(), interval 1 day) ``` diff --git a/website/docs/docs/build/unit-tests.md b/website/docs/docs/build/unit-tests.md index a81fc088de7..fc4cf02b34f 100644 --- a/website/docs/docs/build/unit-tests.md +++ b/website/docs/docs/build/unit-tests.md @@ -24,6 +24,7 @@ Starting in dbt Core v1.8, we have introduced an additional type of test to dbt - We currently only support adding unit tests to models in your _current_ project. - We currently _don't_ support unit testing models that use the [`materialized view`](/docs/build/materializations#materialized-view) materialization. - We currently _don't_ support unit testing models that use recursive SQL. +- We currently _don't_ support unit testing models that use introspective queries. - If your model has multiple versions, by default the unit test will run on *all* versions of your model. Read [unit testing versioned models](/reference/resource-properties/unit-testing-versions) for more information. - Unit tests must be defined in a YML file in your [`models/` directory](/reference/project-configs/model-paths). - Table names must be aliased in order to unit test `join` logic. diff --git a/website/docs/docs/cloud-integrations/configure-auto-exposures.md b/website/docs/docs/cloud-integrations/configure-auto-exposures.md index 2bb09573221..9692249240a 100644 --- a/website/docs/docs/cloud-integrations/configure-auto-exposures.md +++ b/website/docs/docs/cloud-integrations/configure-auto-exposures.md @@ -6,7 +6,7 @@ description: "Import and auto-generate exposures from dashboards and understand image: /img/docs/cloud-integrations/auto-exposures/explorer-lineage2.jpg --- -# Configure auto-exposures +# Configure auto-exposures As a data team, it’s critical that you have context into the downstream use cases and users of your data products. [Auto-exposures](/docs/collaborate/auto-exposures) integrates natively with Tableau and [auto-generates downstream lineage](/docs/collaborate/auto-exposures#view-auto-exposures-in-dbt-explorer) in dbt Explorer for a richer experience. diff --git a/website/docs/docs/cloud-integrations/overview.md b/website/docs/docs/cloud-integrations/overview.md index 8334632a7f8..f5208c8d754 100644 --- a/website/docs/docs/cloud-integrations/overview.md +++ b/website/docs/docs/cloud-integrations/overview.md @@ -13,7 +13,7 @@ Many data applications integrate with dbt Cloud, enabling you to leverage the po
diff --git a/website/docs/docs/cloud/about-cloud-develop-defer.md b/website/docs/docs/cloud/about-cloud-develop-defer.md index ea059ed3e27..d1685c42cba 100644 --- a/website/docs/docs/cloud/about-cloud-develop-defer.md +++ b/website/docs/docs/cloud/about-cloud-develop-defer.md @@ -19,7 +19,7 @@ When using `--defer`, dbt Cloud will follow this order of execution for resolvin 2. If a development version doesn't exist, dbt uses the staging locations of parent relations based on metadata from the staging environment. 3. If both a development and staging version doesn't exist, dbt uses the production locations of parent relations based on metadata from the production environment. -**Note:** Passing the `--favor-state` flag will always resolve refs using production metadata, regardless of the presence of a development relation, skipping step #1. +**Note:** Passing the `--favor-state` flag will always resolve refs using staging metadata if available; otherwise, it defaults to production metadata regardless of the presence of a development relation, skipping step #1. For a clean slate, it's a good practice to drop the development schema at the start and end of your development cycle. diff --git a/website/docs/docs/cloud/about-cloud/about-dbt-cloud.md b/website/docs/docs/cloud/about-cloud/about-dbt-cloud.md index 08bbcb94c3b..1a7e59dd5c2 100644 --- a/website/docs/docs/cloud/about-cloud/about-dbt-cloud.md +++ b/website/docs/docs/cloud/about-cloud/about-dbt-cloud.md @@ -24,7 +24,7 @@ dbt Cloud's [flexible plans](https://www.getdbt.com/pricing/) and features make diff --git a/website/docs/docs/cloud/connect-data-platform/about-connections.md b/website/docs/docs/cloud/connect-data-platform/about-connections.md index 6497e86de89..b9b2c18aced 100644 --- a/website/docs/docs/cloud/connect-data-platform/about-connections.md +++ b/website/docs/docs/cloud/connect-data-platform/about-connections.md @@ -8,7 +8,7 @@ pagination_prev: null --- dbt Cloud can connect with a variety of data platform providers including: - [AlloyDB](/docs/cloud/connect-data-platform/connect-redshift-postgresql-alloydb) -- [Amazon Athena](/docs/cloud/connect-data-platform/connect-amazon-athena) +- [Amazon Athena](/docs/cloud/connect-data-platform/connect-amazon-athena) - [Amazon Redshift](/docs/cloud/connect-data-platform/connect-redshift-postgresql-alloydb) - [Apache Spark](/docs/cloud/connect-data-platform/connect-apache-spark) - [Azure Synapse Analytics](/docs/cloud/connect-data-platform/connect-azure-synapse-analytics) diff --git a/website/docs/docs/cloud/dbt-cloud-ide/develop-in-the-cloud.md b/website/docs/docs/cloud/dbt-cloud-ide/develop-in-the-cloud.md index c9d2cbbad30..de44de67b33 100644 --- a/website/docs/docs/cloud/dbt-cloud-ide/develop-in-the-cloud.md +++ b/website/docs/docs/cloud/dbt-cloud-ide/develop-in-the-cloud.md @@ -13,7 +13,7 @@ The dbt Cloud integrated development environment (IDE) is a single web-based int The dbt Cloud IDE offers several [keyboard shortcuts](/docs/cloud/dbt-cloud-ide/keyboard-shortcuts) and [editing features](/docs/cloud/dbt-cloud-ide/ide-user-interface#editing-features) for faster and efficient development and governance: - Syntax highlighting for SQL — Makes it easy to distinguish different parts of your code, reducing syntax errors and enhancing readability. -- AI copilot — Use [dbt Copilot](/docs/cloud/dbt-copilot), a powerful AI engine that can generate documentation, tests, and semantic models for your dbt SQL models. +- AI copilot — Use [dbt Copilot](/docs/cloud/dbt-copilot), a powerful AI engine that can [generate code](/docs/cloud/use-dbt-copilot#generate-and-edit-code) using natural language, and [generate documentation](/docs/build/documentation), [tests](/docs/build/data-tests), and [semantic models](/docs/build/semantic-models) for you with the click of a button. - Auto-completion — Suggests table names, arguments, and column names as you type, saving time and reducing typos. - Code [formatting and linting](/docs/cloud/dbt-cloud-ide/lint-format) — Helps standardize and fix your SQL code effortlessly. - Navigation tools — Easily move around your code, jump to specific lines, find and replace text, and navigate between project files. diff --git a/website/docs/docs/cloud/dbt-copilot.md b/website/docs/docs/cloud/dbt-copilot.md index 403df86a089..bd2573e0ff8 100644 --- a/website/docs/docs/cloud/dbt-copilot.md +++ b/website/docs/docs/cloud/dbt-copilot.md @@ -8,10 +8,12 @@ pagination_prev: null # About dbt Copilot -dbt Copilot is a powerful artificial intelligence (AI) engine that's fully integrated into your dbt Cloud experience and designed to accelerate your analytics workflows. dbt Copilot embeds AI-driven assistance across every stage of the analytics development life cycle (ADLC), empowering data practitioners to deliver data products faster, improve data quality, and enhance data accessibility. With automatic code generation, you can let the AI engine generate the [documentation](/docs/build/documentation), [tests](/docs/build/data-tests), and [semantic models](/docs/build/semantic-models) for you. +dbt Copilot is a powerful artificial intelligence (AI) engine that's fully integrated into your dbt Cloud experience and designed to accelerate your analytics workflows. dbt Copilot embeds AI-driven assistance across every stage of the analytics development life cycle (ADLC), empowering data practitioners to deliver data products faster, improve data quality, and enhance data accessibility. + +With automatic code generation, let dbt Copilot [generate code](/docs/cloud/use-dbt-copilot#generate-and-edit-code) using natural language, and [generate documentation](/docs/build/documentation), [tests](/docs/build/data-tests), and [semantic models](/docs/build/semantic-models) for you with the click of a button. :::tip Beta feature -dbt Copilot is designed to _help_ developers generate documentation, tests, and semantic models in dbt Cloud. It's available in beta, in the dbt Cloud IDE only. +dbt Copilot is designed to _help_ developers generate documentation, tests, and semantic models, as well as [code](/docs/cloud/use-dbt-copilot#generate-and-edit-code) using natural language, in dbt Cloud. It's available in beta, in the dbt Cloud IDE only. To use dbt Copilot, you must have an active [dbt Cloud Enterprise account](https://www.getdbt.com/pricing) and either agree to use dbt Labs' OpenAI key or provide your own Open AI API key. [Register here](https://docs.google.com/forms/d/e/1FAIpQLScPjRGyrtgfmdY919Pf3kgqI5E95xxPXz-8JoVruw-L9jVtxg/viewform) or reach out to the Account Team if you're interested in joining the private beta. ::: diff --git a/website/docs/docs/cloud/use-dbt-copilot.md b/website/docs/docs/cloud/use-dbt-copilot.md index 30def967f96..48e5ffa6fa7 100644 --- a/website/docs/docs/cloud/use-dbt-copilot.md +++ b/website/docs/docs/cloud/use-dbt-copilot.md @@ -1,22 +1,73 @@ --- title: "Use dbt Copilot" sidebar_label: "Use dbt Copilot" -description: "Use the dbt Copilot AI engine to generate documentation, tests, and semantic models from scratch, giving you the flexibility to modify or fix generated code." +description: "Use dbt Copilot to generate documentation, tests, semantic models, and sql code from scratch, giving you the flexibility to modify or fix generated code." --- # Use dbt Copilot -Use dbt Copilot to generate documentation, tests, and semantic models from scratch, giving you the flexibility to modify or fix generated code. To access and use this AI engine: +Use dbt Copilot to generate documentation, tests, semantic models, and code from scratch, giving you the flexibility to modify or fix generated code. -1. Navigate to the dbt Cloud IDE and select a SQL model file under the **File Explorer**. +This page explains how to use dbt Copilot to: -2. In the **Console** section (under the **File Editor**), click **dbt Copilot** to view the available AI options. +- [Generate resources](#generate-resources) — Save time by using dbt Copilot’s generation button to generate documentation, tests, and semantic model files during your development. +- [Generate and edit code](#generate-and-edit-code) — Use natural language prompts to generate SQL code from scratch or to edit existing SQL file by using keyboard shortcuts or highlighting code. + +## Generate resources +Generate documentation, tests, and semantic models resources with the click-of-a-button using dbt Copilot, saving you time. To access and use this AI feature: + +1. Navigate to the dbt Cloud IDE and select a SQL model file under the **File Explorer**. +2. In the **Console** section (under the **File Editor**), click **dbt Copilot** to view the available AI options. 3. Select the available options to generate the YAML config: **Generate Documentation**, **Generate Tests**, or **Generate Semantic Model**. - To generate multiple YAML configs for the same model, click each option separately. dbt Copilot intelligently saves the YAML config in the same file. - 4. Verify the AI-generated code. You can update or fix the code as needed. - 5. Click **Save As**. You should see the file changes under the **Version control** section. + +## Generate and edit code + +dbt Copilot also allows you to generate SQL code directly within the SQL file in the dbt Cloud IDE, using natural language prompts. This means you can rewrite or add specific portions of the SQL file without needing to edit the entire file. + +This intelligent AI tool streamlines SQL development by reducing errors, scaling effortlessly with complexity, and saving valuable time. dbt Copilot's [prompt window](#use-the-prompt-window), accessible by keyboard shortcut, handles repetitive or complex SQL generation effortlessly so you can focus on high-level tasks. + +Use Copilot's prompt window for use cases like: + +- Writing advanced transformations +- Performing bulk edits efficiently +- Crafting complex patterns like regex + +### Use the prompt window + +Access dbt Copilot's AI prompt window using the keyboard shortcut Cmd+B (Mac) or Ctrl+B (Windows) to: + +#### 1. Generate SQL from scratch +- Use the keyboard shortcuts Cmd+B (Mac) or Ctrl+B (Windows) to generate SQL from scratch. +- Enter your instructions to generate SQL code tailored to your needs using natural language. +- Ask dbt Copilot to fix the code or add a specific portion of the SQL file. + + + +#### 2. Edit existing SQL code +- Highlight a section of SQL code and press Cmd+B (Mac) or Ctrl+B (Windows) to open the prompt window for editing. +- Use this to refine or modify specific code snippets based on your needs. +- Ask dbt Copilot to fix the code or add a specific portion of the SQL file. + +#### 3. Review changes with the diff view to quickly assess the impact of the changes before making changes +- When a suggestion is generated, Copilot displays a visual "diff" view to help you compare the proposed changes with your existing code: + - **Green**: Means new code that will be added if you accept the suggestion. + - **Red**: Highlights existing code that will be removed or replaced by the suggested changes. + +#### 4. Accept or reject suggestions +- **Accept**: If the generated SQL meets your requirements, click the **Accept** button to apply the changes directly to your `.sql` file directly in the IDE. +- **Reject**: If the suggestion don’t align with your request/prompt, click **Reject** to discard the generated SQL without making changes and start again. + +#### 5. Regenerate code +- To regenerate, press the **Escape** button on your keyboard (or click the Reject button in the popup). This will remove the generated code and puts your cursor back into the prompt text area. +- Update your prompt and press **Enter** to try another generation. Press **Escape** again to close the popover entirely. + +Once you've accepted a suggestion, you can continue to use the prompt window to generate additional SQL code and commit your changes to the branch. + + + diff --git a/website/docs/docs/collaborate/auto-exposures.md b/website/docs/docs/collaborate/auto-exposures.md index 495906cee75..a333df19831 100644 --- a/website/docs/docs/collaborate/auto-exposures.md +++ b/website/docs/docs/collaborate/auto-exposures.md @@ -7,7 +7,7 @@ pagination_next: "docs/collaborate/data-tile" image: /img/docs/cloud-integrations/auto-exposures/explorer-lineage.jpg --- -# Auto-exposures +# Auto-exposures As a data team, it’s critical that you have context into the downstream use cases and users of your data products. Auto-exposures integrate natively with Tableau (Power BI coming soon) and auto-generate downstream lineage in dbt Explorer for a richer experience. diff --git a/website/docs/docs/collaborate/data-health-signals.md b/website/docs/docs/collaborate/data-health-signals.md new file mode 100644 index 00000000000..756b43e3583 --- /dev/null +++ b/website/docs/docs/collaborate/data-health-signals.md @@ -0,0 +1,88 @@ +--- +title: "Data health signals" +sidebar_label: "Data health signals" +id: data-health-signals +description: "Learn how data health signals offer a quick, at-a-glance view of data health when browsing your resources in dbt Explorer." +image: /img/docs/collaborate/dbt-explorer/data-health-signal.jpg +--- + +# Data health signals +Data health signals offer a quick, at-a-glance view of data health when browsing your resources in dbt Explorer. They keep you informed on the status of your resource's health using the indicators **Healthy**, **Caution**, **Degraded**, or **Unknown**. + +- Supported resources are [models](/docs/build/models), [sources](/docs/build/sources), and [exposures](/docs/build/exposures). +- For accurate health data, ensure the resource is up-to-date and had a recent job run. +- Each data health signal reflects key data health components, such as test success status, missing resource descriptions, missing tests, absence of builds in 30-day windows, [and more](#data-health-signal-criteria) + + + +## Access data health signals + +Access data health signals in the following places: +- In the [search function](/docs/collaborate/explore-projects#search-resources) or under **Models**, **Sources**, or **Exposures** in the **Resource** tab. + - For sources, the data health signal also indicates the [source freshness](/docs/deploy/source-freshness) status. +- In the **Health** column on [each resource's details page](/docs/collaborate/explore-projects#view-resource-details). Hover over or click the signal to view detailed information. +- In the **Health** column of public models tables. +- In the [DAG lineage graph](/docs/collaborate/explore-projects#project-lineage). Click any node to open the node details panel where you can view it and its details. +- In [Data health tiles](/docs/collaborate/data-tile) through an embeddable iFrame and visible in your BI dashboard. + + + +## Data health signal criteria + +Each resource has a health state that is determined by specific set of criteria. Select the following tabs to view the criteria for that resource type. + + + +The health state of a model is determined by the following criteria: + +| **Health state** | **Criteria** | +|-------------------|---------------| +| ✅ **Healthy** | All of the following must be true:

- Built successfully in the last run
- Built in the last 30 days
- Model has tests configured
- All tests passed
- All upstream [sources are fresh](/docs/build/sources#source-data-freshness) or freshness is not applicable (set to `null`)
- Has a description | +| 🟡 **Caution** | One of the following must be true:

- Not built in the last 30 days
- Tests are not configured
- Tests return warnings
- One or more upstream sources are stale:
    - Has a freshness check configured
    - Freshness check ran in the past 30 days
    - Freshness check returned a warning
- Missing a description | +| 🔴 **Degraded** | One of the following must be true:

- Model failed to build
- Model has failing tests
- One or more upstream sources are stale:
    - Freshness check hasn’t run in the past 30 days
    - Freshness check returned an error | +| ⚪ **Unknown** | - Unable to determine health of resource; no job runs have processed the resource. | + +
+ + + +The health state of a source is determined by the following criteria: + +| **Health state** | **Criteria** | +|-------------------|---------------| +| ✅ Healthy | All of the following must be true:

- Freshness check configured
- Freshness check passed
- Freshness check ran in the past 30 days
- Has a description | +| 🟡 Caution | One of the following must be true:

- Freshness check returned a warning
- Freshness check not configured
- Freshness check not run in the past 30 days
- Missing a description | +| 🔴 Degraded | - Freshness check returned an error | +| ⚪ Unknown | Unable to determine health of resource; no job runs have processed the resource. | + +
+ + + +The health state of an exposure is determined by the following criteria: + +| **Health state** | **Criteria** | +|-------------------|---------------| +| ✅ Healthy | All of the following must be true:

- Underlying sources are fresh
- Underlying models built successfully
- Underlying models’ tests passing
| +| 🟡 Caution | One of the following must be true:

- At least one underlying source’s freshness checks returned a warning
- At least one underlying model was skipped
- At least one underlying model’s tests returned a warning
| +| 🔴 Degraded | One of the following must be true:

- At least one underlying source’s freshness checks returned an error
- At least one underlying model did not build successfully
- At least one model’s tests returned an error | + +
+ + + +
diff --git a/website/docs/docs/collaborate/data-tile.md b/website/docs/docs/collaborate/data-tile.md index 1d5b26e26b7..0edd9d7c44e 100644 --- a/website/docs/docs/collaborate/data-tile.md +++ b/website/docs/docs/collaborate/data-tile.md @@ -2,24 +2,24 @@ title: "Data health tile" id: "data-tile" sidebar_label: "Data health tile" -description: "Embed data health tiles in your dashboards to distill trust signals for data consumers." +description: "Embed data health tiles in your dashboards to distill data health signals for data consumers." image: /img/docs/collaborate/dbt-explorer/data-tile-pass.jpg --- -With data health tiles, stakeholders will get an at-a-glance confirmation on whether the data they’re looking at is stale or degraded. This trust signal allows teams to immediately go back into Explorer to see more details and investigate issues. +With data health tiles, stakeholders will get an at-a-glance confirmation on whether the data they’re looking at is stale or degraded. It allows teams to immediately go back into Explorer to see more details and investigate issues. The data health tile: -- Distills trust signals for data consumers. +- Distills [data health signals](/docs/collaborate/data-health-signals) for data consumers. - Deep links you into dbt Explorer where you can further dive into upstream data issues. - Provides richer information and makes it easier to debug. - Revamps the existing, [job-based tiles](#job-based-data-health). -Data health tiles rely on [exposures](/docs/build/exposures) to surface trust signals in your dashboards. When you configure exposures in your dbt project, you are explicitly defining how specific outputs—like dashboards or reports—depend on your data models. +Data health tiles rely on [exposures](/docs/build/exposures) to surface data health signals in your dashboards. When you configure exposures in your dbt project, you are explicitly defining how specific outputs—like dashboards or reports—depend on your data models. - + ## Prerequisites diff --git a/website/docs/docs/collaborate/explore-projects.md b/website/docs/docs/collaborate/explore-projects.md index 3780d100932..0e4dd7963c8 100644 --- a/website/docs/docs/collaborate/explore-projects.md +++ b/website/docs/docs/collaborate/explore-projects.md @@ -3,7 +3,7 @@ title: "Discover data with dbt Explorer" sidebar_label: "Discover data with dbt Explorer" description: "Learn about dbt Explorer and how to interact with it to understand, improve, and leverage your dbt projects." image: /img/docs/collaborate/dbt-explorer/example-project-lineage-graph.png -pagination_next: "docs/collaborate/access-from-dbt-cloud" +pagination_next: "docs/collaborate/data-health-signals" pagination_prev: null --- @@ -31,6 +31,7 @@ Navigate the dbt Explorer overview page to access your project's resources and m - **Marts and public models** — View the [marts](/best-practices/how-we-structure/1-guide-overview#guide-structure-overview) and [public models](/docs/collaborate/govern/model-access#access-modifiers) in your project. You can also navigate to all public models in your account through this view. - **Model query history** — Use [model query history](/docs/collaborate/model-query-history) to track consumption queries on your models for deeper insights. - **Auto-exposures** — [Set up and view auto-exposures](/docs/collaborate/auto-exposures) to automatically expose relevant data models from Tableau to enhance visibility. +- **Data health signals** — View the [data-health-signals](/docs/collaborate/data-health-signals) for each resource to understand its health and performance. @@ -162,61 +163,8 @@ Under the the **Models** option, you can filter on model properties (access or m - - -Trust signal icons offer a quick, at-a-glance view of data health when browsing your resources in dbt Explorer. These icons keep you informed on the status of your resource's health using the indicators **Healthy**, **Caution**, **Degraded**, and **Unknown**. For accurate health data, ensure the resource is up-to-date and has had a recent job run. Supported resources are models, sources, and exposures. - -Each trust signal icon reflects key data health components, such as test success status, missing resource descriptions, absence of builds in 30-day windows, and more. - -To access trust signals: -- Use the search function or click on **Models**, **Sources** or **Exposures** under the **Resource** tab. -- View the icons under the **Health** column. -- Hover over or click the trust signal to see detailed information. -- For sources, the trust signal also indicates the source freshness status. - - - - - - - ### Example of keyword search -Example of results from searching on the keyword `customers` and applying the filters models, description, and code. Trust signals are visible to the right of the model name in the search results. +Example of results from searching on the keyword `customers` and applying the filters models, description, and code. [Data health signals](/docs/collaborate/data-health-signals) are visible to the right of the model name in the search results. @@ -225,6 +173,7 @@ Example of results from searching on the keyword `customers` and applying the fi From the sidebar, you can browse your project's resources, its file tree, and the database. - **Resources** tab — All resources in the project organized by type. Select any resource type in the list and all those resources in the project will display as a table in the main section of the page. For a description on the different resource types (like models, metrics, and so on), refer to [About dbt projects](/docs/build/projects). + - [Data health signals](/docs/collaborate/data-health-signals) are visible to the right of the resource name under the **Health** column. - **File Tree** tab — All resources in the project organized by the file in which they are defined. This mirrors the file tree in your dbt project repository. - **Database** tab — All resources in the project organized by the database and schema in which they are built. This mirrors your data platform's structure that represents the [applied state](/docs/dbt-cloud-apis/project-state) of your project. @@ -253,7 +202,7 @@ In the upper right corner of the resource details page, you can: -- Trust signal icon — Icons offering a quick, at-a-glance view of data health. These icons indicate whether a model is Healthy, Caution, Degraded, or Unknown. Hover over an icon to view detailed information about the model's health. +- **Data health signals** — [Data health signals](/docs/collaborate/data-health-signals) offer a quick, at-a-glance view of data health. These icons indicate whether a model is Healthy, Caution, Degraded, or Unknown. Hover over an icon to view detailed information about the model's health. - **Status bar** (below the page title) — Information on the last time the model ran, whether the run was successful, how the data is materialized, number of rows, and the size of the model. - **General** tab includes: - **Lineage** graph — The model’s lineage graph that you can interact with. The graph includes one upstream node and one downstream node from the model. Click the Expand icon in the graph's upper right corner to view the model in full lineage graph mode. @@ -270,6 +219,7 @@ In the upper right corner of the resource details page, you can: - **Status bar** (below the page title) — Information on the last time the exposure was updated. +- **Data health signals** — [Data health signals](/docs/collaborate/data-health-signals) offer a quick, at-a-glance view of data health. These icons indicate whether a resource is Healthy, Caution, or Degraded. Hover over an icon to view detailed information about the exposure's health. - **General** tab includes: - **Data health** — The status on data freshness and data quality. - **Status** section — The status on data freshness and data quality. @@ -303,6 +253,7 @@ Example of the Tests view: - **Status bar** (below the page title) — Information on the last time the source was updated and the number of tables the source uses. +- **Data health signals** — [Data health signals](/docs/collaborate/data-health-signals) offer a quick, at-a-glance view of data health. These icons indicate whether a resource is Healthy, Caution, or Degraded. Hover over an icon to view detailed information about the source's health. - **General** tab includes: - **Lineage** graph — The source’s lineage graph that you can interact with. The graph includes one upstream node and one downstream node from the source. Click the Expand icon in the graph's upper right corner to view the source in full lineage graph mode. - **Description** section — A description of the source. @@ -323,6 +274,7 @@ Example of the details view for the model `customers`:
diff --git a/website/docs/docs/community-adapters.md b/website/docs/docs/community-adapters.md index 3af4e15b32b..895e47a8fa3 100644 --- a/website/docs/docs/community-adapters.md +++ b/website/docs/docs/community-adapters.md @@ -7,7 +7,8 @@ Community adapters are adapter plugins contributed and maintained by members of | Data platforms (click to view setup guide) ||| | ------------------------------------------ | -------------------------------- | ------------------------------------- | -| [Clickhouse](/docs/core/connect-data-platform/clickhouse-setup) | [Databend Cloud](/docs/core/connect-data-platform/databend-setup) | [Doris & SelectDB](/docs/core/connect-data-platform/doris-setup) | +| [Clickhouse](/docs/core/connect-data-platform/clickhouse-setup) | [CrateDB](/docs/core/connect-data-platform/cratedb-setup) +| [Databend Cloud](/docs/core/connect-data-platform/databend-setup) | [Doris & SelectDB](/docs/core/connect-data-platform/doris-setup) | | [DuckDB](/docs/core/connect-data-platform/duckdb-setup) | [Exasol Analytics](/docs/core/connect-data-platform/exasol-setup) | [Extrica](/docs/core/connect-data-platform/extrica-setup) | | [Hive](/docs/core/connect-data-platform/hive-setup) | [IBM DB2](/docs/core/connect-data-platform/ibmdb2-setup) | [Impala](/docs/core/connect-data-platform/impala-setup) | | [Infer](/docs/core/connect-data-platform/infer-setup) | [iomete](/docs/core/connect-data-platform/iomete-setup) | [MindsDB](/docs/core/connect-data-platform/mindsdb-setup) | diff --git a/website/docs/docs/core/connect-data-platform/bigquery-setup.md b/website/docs/docs/core/connect-data-platform/bigquery-setup.md index 8b1867ef620..bfa99f21a6d 100644 --- a/website/docs/docs/core/connect-data-platform/bigquery-setup.md +++ b/website/docs/docs/core/connect-data-platform/bigquery-setup.md @@ -388,6 +388,28 @@ my-profile: execution_project: buck-stops-here-456 ``` +### Quota project + +By default, dbt will use the `quota_project_id` set within the credentials of the account you are using to authenticate to BigQuery. + +Optionally, you may specify `quota_project` to bill for query execution instead of the default quota project specified for the account from the environment. + +This can sometimes be required when impersonating service accounts that do not have the BigQuery API enabled within the project in which they are defined. Without overriding the quota project, it will fail to connect. + +If you choose to set a quota project, the account you use to authenticate must have the `Service Usage Consumer` role on that project. + +```yaml +my-profile: + target: dev + outputs: + dev: + type: bigquery + method: oauth + project: abc-123 + dataset: my_dataset + quota_project: my-bq-quota-project +``` + ### Running Python models on Dataproc import BigQueryDataproc from '/snippets/_bigquery-dataproc.md'; diff --git a/website/docs/docs/core/connect-data-platform/cratedb-setup.md b/website/docs/docs/core/connect-data-platform/cratedb-setup.md new file mode 100644 index 00000000000..fa1b9833e59 --- /dev/null +++ b/website/docs/docs/core/connect-data-platform/cratedb-setup.md @@ -0,0 +1,62 @@ +--- +title: "CrateDB setup" +description: "Read this guide to learn about the CrateDB data platform setup in dbt." +id: "cratedb-setup" +meta: + maintained_by: Crate.io, Inc. + authors: 'CrateDB maintainers' + github_repo: 'crate/dbt-cratedb2' + pypi_package: 'dbt-cratedb2' + min_core_version: 'v1.0.0' + cloud_support: Not Supported + min_supported_version: 'n/a' + slack_channel_name: 'Community Forum' + slack_channel_link: 'https://community.cratedb.com/' + platform_name: 'CrateDB' + config_page: '/reference/resource-configs/no-configs' +--- + +import SetUpPages from '/snippets/_setup-pages-intro.md'; + + + + +[CrateDB] is compatible with PostgreSQL, so its dbt adapter strongly depends on +dbt-postgres, documented at [PostgreSQL profile setup]. + +CrateDB targets are configured exactly the same way, see also [PostgreSQL +configuration], with just a few things to consider which are special to +CrateDB. Relevant details are outlined at [using dbt with CrateDB], +which also includes up-to-date information. + + +## Profile configuration + +CrateDB targets should be set up using a configuration like this minimal sample +of settings in your [`profiles.yml`] file. + + + +```yaml +cratedb_analytics: + target: dev + outputs: + dev: + type: cratedb + host: [clustername].aks1.westeurope.azure.cratedb.net + port: 5432 + user: [username] + pass: [password] + dbname: crate # Do not change this value. CrateDB's only catalog is `crate`. + schema: doc # Define the schema name. CrateDB's default schema is `doc`. +``` + + + + + +[CrateDB]: https://cratedb.com/database +[PostgreSQL configuration]: https://docs.getdbt.com/reference/resource-configs/postgres-configs +[PostgreSQL profile setup]: https://docs.getdbt.com/docs/core/connect-data-platform/postgres-setup +[`profiles.yml`]: https://docs.getdbt.com/docs/core/connect-data-platform/profiles.yml +[using dbt with CrateDB]: https://cratedb.com/docs/guide/integrate/dbt/ diff --git a/website/docs/docs/core/connect-data-platform/dremio-setup.md b/website/docs/docs/core/connect-data-platform/dremio-setup.md index 21d0ee2956b..7ac304bba2b 100644 --- a/website/docs/docs/core/connect-data-platform/dremio-setup.md +++ b/website/docs/docs/core/connect-data-platform/dremio-setup.md @@ -3,14 +3,14 @@ title: "Dremio setup" description: "Read this guide to learn about the Dremio warehouse setup in dbt." meta: maintained_by: Dremio - authors: 'Dremio (formerly Fabrice Etanchaud)' + authors: 'Dremio' github_repo: 'dremio/dbt-dremio' pypi_package: 'dbt-dremio' - min_core_version: 'v1.2.0' + min_core_version: 'v1.8.0' cloud_support: Not Supported min_supported_version: 'Dremio 22.0' - slack_channel_name: 'n/a' - slack_channel_link: 'https://www.getdbt.com/community' + slack_channel_name: 'db-dremio' + slack_channel_link: '[https://www.getdbt.com/community](https://getdbt.slack.com/archives/C049G61TKBK)' platform_name: 'Dremio' config_page: '/reference/resource-configs/no-configs' --- @@ -36,10 +36,6 @@ Before connecting from project to Dremio Cloud, follow these prerequisite steps: * Ensure that you are using version 22.0 or later. * Ensure that Python 3.9.x or later is installed on the system that you are running dbt on. -* Enable these support keys in your Dremio cluster: - * `dremio.iceberg.enabled` - * `dremio.iceberg.ctas.enabled` - * `dremio.execution.support_unlimited_splits` See Support Keys in the Dremio documentation for the steps. * If you want to use TLS to secure the connection between dbt and Dremio Software, configure full wire encryption in your Dremio cluster. For instructions, see Configuring Wire Encryption. @@ -60,10 +56,6 @@ Next, configure the profile for your project. When you initialize a project, you create one of these three profiles. You must configure it before trying to connect to Dremio Cloud or Dremio Software. -## Profiles - -When you initialize a project, you create one of these three profiles. You must configure it before trying to connect to Dremio Cloud or Dremio Software. - * Profile for Dremio Cloud * Profile for Dremio Software with Username/Password Authentication * Profile for Dremio Software with Authentication Through a Personal Access Token @@ -88,7 +80,7 @@ For descriptions of the configurations in these profiles, see [Configurations](# [project name]: outputs: dev: - cloud_host: https://api.dremio.cloud + cloud_host: api.dremio.cloud cloud_project_id: [project ID] object_storage_source: [name] object_storage_path: [path] @@ -149,9 +141,7 @@ For descriptions of the configurations in these profiles, see [Configurations](# -## Configurations - -### Configurations Common to Profiles for Dremio Cloud and Dremio Software +## Configurations Common to Profiles for Dremio Cloud and Dremio Software | Configuration | Required? | Default Value | Description | @@ -167,7 +157,7 @@ For descriptions of the configurations in these profiles, see [Configurations](# | Configuration | Required? | Default Value | Description | | --- | --- | --- | --- | -| `cloud_host` | Yes | `https://api.dremio.cloud` | US Control Plane: `https://api.dremio.cloud`

EU Control Plane: `https://api.eu.dremio.cloud` | +| `cloud_host` | Yes | `api.dremio.cloud` | US Control Plane: `api.dremio.cloud`

EU Control Plane: `api.eu.dremio.cloud` | | `user` | Yes | None | Email address used as a username in Dremio Cloud | | `pat` | Yes | None | The personal access token to use for authentication. See [Personal Access Tokens](https://docs.dremio.com/cloud/security/authentication/personal-access-token/) for instructions about obtaining a token. | | `cloud_project_id` | Yes | None | The ID of the Sonar project in which to run transformations. | diff --git a/website/docs/docs/core/docker-install.md b/website/docs/docs/core/docker-install.md index 5ce033c7996..fc3caa42a03 100644 --- a/website/docs/docs/core/docker-install.md +++ b/website/docs/docs/core/docker-install.md @@ -30,11 +30,23 @@ docker pull ghcr.io/dbt-labs/: ### Running a dbt Docker image in a container The `ENTRYPOINT` for dbt Docker images is the command `dbt`. You can bind-mount your project to `/usr/app` and use dbt as normal: + +``` +docker run \ +--network=host \ +--mount type=bind,source=path/to/project,target=/usr/app \ +--mount type=bind,source=path/to/profiles.yml,target=/root/.dbt/profiles.yml \ + \ +ls +``` + +Or + ``` docker run \ --network=host \ --mount type=bind,source=path/to/project,target=/usr/app \ ---mount type=bind,source=path/to/profiles.yml,target=/root/.dbt/ \ +--mount type=bind,source=path/to/profiles.yml.dbt,target=/root/.dbt/ \ \ ls ``` diff --git a/website/docs/docs/dbt-cloud-apis/discovery-use-cases-and-examples.md b/website/docs/docs/dbt-cloud-apis/discovery-use-cases-and-examples.md index e095374343f..08941c70e1a 100644 --- a/website/docs/docs/dbt-cloud-apis/discovery-use-cases-and-examples.md +++ b/website/docs/docs/dbt-cloud-apis/discovery-use-cases-and-examples.md @@ -495,7 +495,7 @@ Graph example: ### Are my data sources fresh? -Checking [source freshness](/docs/build/sources#snapshotting-source-data-freshness) allows you to ensure that sources loaded and used in your dbt project are compliant with expectations. The API provides the latest metadata about source loading and information about the freshness check criteria. +Checking [source freshness](/docs/build/sources#source-data-freshness) allows you to ensure that sources loaded and used in your dbt project are compliant with expectations. The API provides the latest metadata about source loading and information about the freshness check criteria. diff --git a/website/docs/docs/dbt-versions/cloud-release-tracks.md b/website/docs/docs/dbt-versions/cloud-release-tracks.md index 290078da572..89836aa13e6 100644 --- a/website/docs/docs/dbt-versions/cloud-release-tracks.md +++ b/website/docs/docs/dbt-versions/cloud-release-tracks.md @@ -14,16 +14,18 @@ By moving your environments and jobs to release tracks you can get all the funct ## Which release tracks are available? -- **"Latest"** (available to all plans, formerly called "Versionless"): Provides a continuous release of the latest functionality in dbt Cloud. Includes early access to new features of the dbt framework before they're available in open source releases of dbt Core. -- **"Compatible"** (available to Team + Enterprise): Provides a monthly release aligned with the most recent open source versions of dbt Core and adapters, plus functionality exclusively available in dbt Cloud. -- **"Extended"** (available to Enterprise): Provides a delayed release of the previous month's "Compatible" release. +| Release track | Description | Plan availability | API value | +| ------------- | ----------- | ----------------- | --------- | +| **Latest**
| Formerly called "Versionless", provides a continuous release of the latest functionality in dbt Cloud. Includes early access to new features of the dbt framework before they're available in open source releases of dbt Core. | All plans | `latest` (or `versionless`) | +| **Compatible** | Provides a monthly release aligned with the most recent open source versions of dbt Core and adapters, plus functionality exclusively available in dbt Cloud. | Team + Enterprise | `compatible` | +| **Extended** | The previous month's "Compatible" release. | Enterprise | `extended` | -The first "Compatible" release will be in December 2024, after the final release of dbt Core v1.9.0. For December 2024 only, the "Extended" release is the same as "Compatible." Starting in January 2025, "Extended" will be one month behind "Compatible." +The first "Compatible" release was on December 12, 2024, after the final release of dbt Core v1.9.0. For December 2024 only, the "Extended" release is the same as "Compatible." Starting in January 2025, "Extended" will be one month behind "Compatible." To configure an environment in the [dbt Cloud Admin API](/docs/dbt-cloud-apis/admin-cloud-api) or [Terraform](https://registry.terraform.io/providers/dbt-labs/dbtcloud/latest) to use a release track, set `dbt_version` to the release track name: -- `latest` (formerly called `versionless`; the old name is still supported) -- `compatible` (available to Team + Enterprise) -- `extended` (available to Enterprise) +- `latest` (or `versionless`, the old name is still supported) +- `compatible` +- `extended` ## Which release track should I choose? @@ -69,7 +71,7 @@ To learn more about how dbt Labs deploys stable dbt upgrades in a safe manner to If you're running dbt version 1.6 or older, please know that your version of dbt Core has reached [end-of-life (EOL)](/docs/dbt-versions/core#eol-version-support) and is no longer supported. We strongly recommend that you update to a newer version as soon as reasonably possible. -dbt Labs has extended the critical support period of dbt Core v1.7 for dbt Cloud Enterprise customers to January 31, 2024. At that point, we will be asking all customers to select a Release Track for receiving ongoing updates to dbt in dbt Cloud. +dbt Labs has extended the critical support period of dbt Core v1.7 for dbt Cloud Enterprise customers to March 2025. At that point, we will be encouraging all customers to select a Release Track for ongoing updates in dbt Cloud. @@ -77,9 +79,7 @@ If you're running dbt version v1.6 or older, please know that your version of db dbt Labs has extended the "Critical Support" period of dbt Core v1.7 for dbt Cloud Enterprise customers while we work through the migration with those customers to Release Tracks. In the meantime, this means that v1.7 will continue to be accessible in dbt Cloud for Enteprise customers, jobs and environments on v1.7 for those customers will not be automatically migrated to "Latest," and dbt Labs will continue to fix critical bugs and security issues. -dbt Cloud accounts on the Developer and Team plans will be migrated to the "Latest" release track after November 1, 2024. If you know that your project will not be compatible with the upgrade, for one of the reasons described here, or a different reason in your own testing, you should [contact dbt Cloud support](https://docs.getdbt.com/docs/dbt-support#dbt-cloud-support) to request an extension. - -If your account has been migrated to the "Latest" release track, and you are seeing net-new failures in your scheduled dbt jobs, you should also [contact dbt Cloud support](https://docs.getdbt.com/docs/dbt-support#dbt-cloud-support) to request an extension. +Starting in October 2024, dbt Cloud accounts on the Developer and Team plans have been migrated to release tracks from older dbt Core versions. If your account was migrated to the "Latest" release track and you notice new failures in scheduled jobs, please [contact dbt Cloud support](https://docs.getdbt.com/docs/dbt-support#dbt-cloud-support) to report the problem or request an extension. @@ -134,8 +134,8 @@ In 2024, we've changed the way that new dbt functionality is made available for Opting into a release cadence with automated upgrades is required for accessing any new functionality that we've released in 2024, and going forward. -We continue to release new minor versions of dbt Core (OSS), including v1.9 which will be available later this year. When we do, it will be a subset of the functionality that's already available to dbt Cloud customers, and always after the functionality has been available in dbt Cloud. +We continue to release new minor versions of dbt Core (OSS). We most recently released dbt Core v1.9 on December 9, 2024. These releases always include a subset of the functionality that's already available to dbt Cloud customers, and always after the functionality has been available in dbt Cloud.
-If you have comments or concerns, we’re happy to help. If you’re an existing dbt Cloud customer, you may reach out to your account team or [contact support](/docs/dbt-support#dbt-cloud-support). \ No newline at end of file +If you have comments or concerns, we’re happy to help. If you’re an existing dbt Cloud customer, you may reach out to your account team or [contact support](/docs/dbt-support#dbt-cloud-support). diff --git a/website/docs/docs/dbt-versions/compatible-track-changelog.md b/website/docs/docs/dbt-versions/compatible-track-changelog.md index 8f31775e3f1..a8243e2ceff 100644 --- a/website/docs/docs/dbt-versions/compatible-track-changelog.md +++ b/website/docs/docs/dbt-versions/compatible-track-changelog.md @@ -4,9 +4,9 @@ sidebar_label: "Compatible Track Changelog" description: "The Compatible release track updates once per month, and it includes up-to-date open source versions as of the monthly release." --- -:::info Coming soon +:::info Preview -The "Compatible" and "Extended" release tracks will be available in Preview to eligible dbt Cloud accounts in December 2024. +The "Compatible" and "Extended" [release tracks](/docs/dbt-versions/cloud-release-tracks) are available in Preview. Access will be rolling out to dbt Cloud accounts on eligible plans during the week of December 16-20, 2024. ::: @@ -20,8 +20,42 @@ For more information, see [release tracks](/docs/dbt-versions/cloud-release-trac ## December 2024 -Planned release: December 11-13 - -This release will include functionality from `dbt-core==1.9.0` and the most recent versions of all adapters supported in dbt Cloud. After the Compatible release is cut, we will update with: -- exact versions of open source dbt packages -- changelog notes concerning functionality specific to dbt Cloud +Release date: December 12, 2024 + +This release includes functionality from the following versions of dbt Core OSS: +``` +dbt-core==1.9.0 + +# shared interfaces +dbt-adapters==1.10.4 +dbt-common==1.14.0 +dbt-semantic-interfaces==0.7.4 + +# adapters +dbt-athena==1.9.0 +dbt-bigquery==1.9.0 +dbt-databricks==1.9.0 +dbt-fabric==1.8.8 +dbt-postgres==1.9.0 +dbt-redshift==1.9.0 +dbt-snowflake==1.9.0 +dbt-spark==1.9.0 +dbt-synapse==1.8.2 +dbt-teradata==1.8.2 +dbt-trino==1.8.5 +``` + +Changelogs: +- [dbt-core 1.9.0](https://github.com/dbt-labs/dbt-core/blob/1.9.latest/CHANGELOG.md#dbt-core-190---december-09-2024) +- [dbt-adapters 1.10.4](https://github.com/dbt-labs/dbt-adapters/blob/main/CHANGELOG.md#dbt-adapters-1104---november-11-2024) +- [dbt-common 1.14.0](https://github.com/dbt-labs/dbt-common/blob/main/CHANGELOG.md) +- [dbt-bigquery 1.9.0](https://github.com/dbt-labs/dbt-bigquery/blob/1.9.latest/CHANGELOG.md#dbt-bigquery-190---december-09-2024) +- [dbt-databricks 1.9.0](https://github.com/databricks/dbt-databricks/blob/main/CHANGELOG.md#dbt-databricks-190-december-9-2024) +- [dbt-fabric 1.8.8](https://github.com/microsoft/dbt-fabric/blob/v1.8.latest/CHANGELOG.md) +- [dbt-postgres 1.9.0](https://github.com/dbt-labs/dbt-postgres/blob/main/CHANGELOG.md#dbt-postgres-190---december-09-2024) +- [dbt-redshift 1.9.0](https://github.com/dbt-labs/dbt-redshift/blob/1.9.latest/CHANGELOG.md#dbt-redshift-190---december-09-2024) +- [dbt-snowflake 1.9.0](https://github.com/dbt-labs/dbt-snowflake/blob/1.9.latest/CHANGELOG.md#dbt-snowflake-190---december-09-2024) +- [dbt-spark 1.9.0](https://github.com/dbt-labs/dbt-spark/blob/1.9.latest/CHANGELOG.md#dbt-spark-190---december-10-2024) +- [dbt-synapse 1.8.2](https://github.com/microsoft/dbt-synapse/blob/v1.8.latest/CHANGELOG.md) +- [dbt-teradata 1.8.2](https://github.com/Teradata/dbt-teradata/releases/tag/v1.8.2) +- [dbt-trino 1.8.5](https://github.com/starburstdata/dbt-trino/blob/master/CHANGELOG.md#dbt-trino-185---december-11-2024) \ No newline at end of file diff --git a/website/docs/docs/dbt-versions/core-upgrade/07-upgrading-to-v1.8.md b/website/docs/docs/dbt-versions/core-upgrade/07-upgrading-to-v1.8.md index e9e45a69153..2c4370f929c 100644 --- a/website/docs/docs/dbt-versions/core-upgrade/07-upgrading-to-v1.8.md +++ b/website/docs/docs/dbt-versions/core-upgrade/07-upgrading-to-v1.8.md @@ -1,5 +1,5 @@ --- -title: "Upgrading to v1.8 (latest)" +title: "Upgrading to v1.8" id: upgrading-to-v1.8 description: New features and changes in dbt Core v1.8 displayed_sidebar: "docs" @@ -46,7 +46,7 @@ Historically, dbt's test coverage was confined to [“data” tests](/docs/build In v1.8, we're introducing native support for [unit testing](/docs/build/unit-tests). Unit tests validate your SQL modeling logic on a small set of static inputs __before__ you materialize your full model in production. They support a test-driven development approach, improving both the efficiency of developers and the reliability of code. -Starting from v1.8, when you execute the `dbt test` command, it will run both unit and data tests. Use the [`test_type`](/reference/node-selection/methods#the-test_type-method) method to run only unit or data tests: +Starting from v1.8, when you execute the `dbt test` command, it will run both unit and data tests. Use the [`test_type`](/reference/node-selection/methods#test_type) method to run only unit or data tests: ```shell diff --git a/website/docs/docs/dbt-versions/core-upgrade/09-upgrading-to-v1.6.md b/website/docs/docs/dbt-versions/core-upgrade/09-upgrading-to-v1.6.md index bbb2535a74c..4a210e23fc0 100644 --- a/website/docs/docs/dbt-versions/core-upgrade/09-upgrading-to-v1.6.md +++ b/website/docs/docs/dbt-versions/core-upgrade/09-upgrading-to-v1.6.md @@ -101,7 +101,7 @@ The ability for installed packages to override built-in materializations without ### Quick hits -- [`state:unmodified` and `state:old`](/reference/node-selection/methods#the-state-method) for [MECE](https://en.wikipedia.org/wiki/MECE_principle) stateful selection +- [`state:unmodified` and `state:old`](/reference/node-selection/methods#state) for [MECE](https://en.wikipedia.org/wiki/MECE_principle) stateful selection - [`invocation_args_dict`](/reference/dbt-jinja-functions/flags#invocation_args_dict) includes full `invocation_command` as string - [`dbt debug --connection`](/reference/commands/debug) to test just the data platform connection specified in a profile - [`dbt docs generate --empty-catalog`](/reference/commands/cmd-docs) to skip catalog population while generating docs diff --git a/website/docs/docs/dbt-versions/core-upgrade/11-Older versions/13-upgrading-to-v1.3.md b/website/docs/docs/dbt-versions/core-upgrade/11-Older versions/13-upgrading-to-v1.3.md index 250aa76ab26..2dd78727c65 100644 --- a/website/docs/docs/dbt-versions/core-upgrade/11-Older versions/13-upgrading-to-v1.3.md +++ b/website/docs/docs/dbt-versions/core-upgrade/11-Older versions/13-upgrading-to-v1.3.md @@ -54,5 +54,5 @@ GitHub discussion with details: [dbt-labs/dbt-core#6011](https://github.com/dbt- ### Quick hits - **["Full refresh"](/reference/resource-configs/full_refresh)** flag supports a short name, `-f`. -- **[The "config" selection method](/reference/node-selection/methods#the-config-method)** supports boolean and list config values, in addition to strings. +- **[The "config" selection method](/reference/node-selection/methods#config)** supports boolean and list config values, in addition to strings. - Two new dbt-Jinja context variables for accessing invocation metadata: [`invocation_args_dict`](/reference/dbt-jinja-functions/flags#invocation_args_dict) and [`dbt_metadata_envs`](/reference/dbt-jinja-functions/env_var#custom-metadata). diff --git a/website/docs/docs/dbt-versions/core-upgrade/11-Older versions/14-upgrading-to-v1.2.md b/website/docs/docs/dbt-versions/core-upgrade/11-Older versions/14-upgrading-to-v1.2.md index f2102560dfa..1b393df2f01 100644 --- a/website/docs/docs/dbt-versions/core-upgrade/11-Older versions/14-upgrading-to-v1.2.md +++ b/website/docs/docs/dbt-versions/core-upgrade/11-Older versions/14-upgrading-to-v1.2.md @@ -32,7 +32,7 @@ See GitHub discussion [dbt-labs/dbt-core#5468](https://github.com/dbt-labs/dbt-c - **[Grants](/reference/resource-configs/grants)** are natively supported in `dbt-core` for the first time. That support extends to all standard materializations, and the most popular adapters. If you already use hooks to apply simple grants, we encourage you to use built-in `grants` to configure your models, seeds, and snapshots instead. This will enable you to [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself) up your duplicated or boilerplate code. - **[Metrics](/docs/build/build-metrics-intro)** now support an `expression` type (metrics-on-metrics), as well as a `metric()` function to use when referencing metrics from within models, macros, or `expression`-type metrics. For more information on how to use expression metrics, check out the [**`dbt_metrics` package**](https://github.com/dbt-labs/dbt_metrics) - **[dbt-Jinja functions](/reference/dbt-jinja-functions)** now include the [`itertools` Python module](/reference/dbt-jinja-functions/modules#itertools), as well as the [set](/reference/dbt-jinja-functions/set) and [zip](/reference/dbt-jinja-functions/zip) functions. -- **[Node selection](/reference/node-selection/syntax)** includes a [file selection method](/reference/node-selection/methods#the-file-method) (`-s model.sql`), and [yaml selector](/reference/node-selection/yaml-selectors) inheritance. +- **[Node selection](/reference/node-selection/syntax)** includes a [file selection method](/reference/node-selection/methods#file) (`-s model.sql`), and [yaml selector](/reference/node-selection/yaml-selectors) inheritance. - **[Global configs](/reference/global-configs/about-global-configs)** now include CLI flag and environment variable settings for [`target-path`](/reference/global-configs/json-artifacts) and [`log-path`](/reference/global-configs/logs), which can be used to override the values set in `dbt_project.yml` ### Specific adapters diff --git a/website/docs/docs/dbt-versions/core-upgrade/11-Older versions/15-upgrading-to-v1.1.md b/website/docs/docs/dbt-versions/core-upgrade/11-Older versions/15-upgrading-to-v1.1.md index 0dc3d279b87..01bbabf9d16 100644 --- a/website/docs/docs/dbt-versions/core-upgrade/11-Older versions/15-upgrading-to-v1.1.md +++ b/website/docs/docs/dbt-versions/core-upgrade/11-Older versions/15-upgrading-to-v1.1.md @@ -45,7 +45,7 @@ Expected a schema version of "https://schemas.getdbt.com/dbt/manifest/v5.json" i ### Advanced and experimental functionality -**Fresh Rebuilds.** There's a new _experimental_ selection method in town: [`source_status:fresher`](/reference/node-selection/methods#the-source_status-method). Much like the `state:` and `result` methods, the goal is to use dbt metadata to run your DAG more efficiently. If dbt has access to previous and current results of `dbt source freshness` (the `sources.json` artifact), dbt can compare them to determine which sources have loaded new data, and select only resources downstream of "fresher" sources. Read more in [Understanding State](/reference/node-selection/syntax#about-node-selection) and [CI/CD in dbt Cloud](/docs/deploy/continuous-integration). +**Fresh Rebuilds.** There's a new _experimental_ selection method in town: [`source_status:fresher`](/reference/node-selection/methods#source_status). Much like the `state:` and `result` methods, the goal is to use dbt metadata to run your DAG more efficiently. If dbt has access to previous and current results of `dbt source freshness` (the `sources.json` artifact), dbt can compare them to determine which sources have loaded new data, and select only resources downstream of "fresher" sources. Read more in [Understanding State](/reference/node-selection/syntax#about-node-selection) and [CI/CD in dbt Cloud](/docs/deploy/continuous-integration). [**dbt-Jinja functions**](/reference/dbt-jinja-functions) have a new landing page, and two new members: diff --git a/website/docs/docs/dbt-versions/core-upgrade/11-Older versions/upgrading-to-0-17-0.md b/website/docs/docs/dbt-versions/core-upgrade/11-Older versions/upgrading-to-0-17-0.md index e26a69fd1c7..6a19bdcf808 100644 --- a/website/docs/docs/dbt-versions/core-upgrade/11-Older versions/upgrading-to-0-17-0.md +++ b/website/docs/docs/dbt-versions/core-upgrade/11-Older versions/upgrading-to-0-17-0.md @@ -247,7 +247,7 @@ BigQuery: ## New and changed documentation **Core** -- [`path:` selectors](/reference/node-selection/methods#the-path-method) +- [`path:` selectors](/reference/node-selection/methods#path) - [`--fail-fast` command](/reference/commands/run#failing-fast) - `as_text` Jinja filter: removed this defunct filter - [accessing nodes in the `graph` object](/reference/dbt-jinja-functions/graph) diff --git a/website/docs/docs/dbt-versions/release-notes.md b/website/docs/docs/dbt-versions/release-notes.md index 2ca41480bc3..77f0df7d18d 100644 --- a/website/docs/docs/dbt-versions/release-notes.md +++ b/website/docs/docs/dbt-versions/release-notes.md @@ -30,6 +30,10 @@ Release notes are grouped by month for both multi-tenant and virtual private clo +tags: order_metrics ``` +- **New**: [Auto exposures](/docs/collaborate/auto-exposures) are now generally available to dbt Cloud Enterprise plans. Auto-exposures integrate natively with Tableau (Power BI coming soon) and auto-generate downstream lineage in dbt Explorer for a richer experience. +- **New**: The dbt Semantic Layer supports Sigma as a [partner integration](/docs/cloud-integrations/avail-sl-integrations), available in Preview. Refer to [Sigma](https://help.sigmacomputing.com/docs/configure-a-dbt-semantic-layer-integration) for more information. +- **New**: The dbt Semantic Layer now supports Azure Single-tenant deployments. Refer to [Set up the dbt Semantic Layer](/docs/use-dbt-semantic-layer/setup-sl) for more information on how to get started. +- **Fix**: Resolved intermittent issues in Single-tenant environments affecting Semantic Layer and query history. - **Fix**: [The dbt Semantic Layer](/docs/use-dbt-semantic-layer/dbt-sl) now respects the BigQuery [`execution_project` attribute](/docs/core/connect-data-platform/bigquery-setup#execution-project), including for exports. - **New**: [Model notifications](/docs/deploy/model-notifications) are now generally available in dbt Cloud. These notifications alert model owners through email about any issues encountered by models and tests as soon as they occur while running a job. - **New**: You can now use your [Azure OpenAI key](/docs/cloud/account-integrations?ai-integration=azure#ai-integrations) (available in beta) to use dbt Cloud features like [dbt Copilot](/docs/cloud/dbt-copilot) and [Ask dbt](/docs/cloud-integrations/snowflake-native-app) . Additionally, you can use your own [OpenAI API key](/docs/cloud/account-integrations?ai-integration=openai#ai-integrations) or use [dbt Labs-managed OpenAI](/docs/cloud/account-integrations?ai-integration=dbtlabs#ai-integrations) key. Refer to [AI integrations](/docs/cloud/account-integrations#ai-integrations) for more information. @@ -38,7 +42,7 @@ Release notes are grouped by month for both multi-tenant and virtual private clo ## November 2024 -- **Enhancement**: Trust signal icons in dbt Explorer are now available for Exposures, providing a quick view of data health while browsing resources. To view trust signal icons, go to dbt Explorer and click **Exposures** under the **Resource** tab. Refer to [Trust signal for resources](/docs/collaborate/explore-projects#trust-signals-for-resources) for more info. +- **Enhancement**: Data health signals in dbt Explorer are now available for Exposures, providing a quick view of data health while browsing resources. To view trust signal icons, go to dbt Explorer and click **Exposures** under the **Resource** tab. Refer to [Data health signals for resources](/docs/collaborate/data-health-signals) for more info. - **Bug**: Identified and fixed an error with Semantic Layer queries that take longer than 10 minutes to complete. - **Fix**: Job environment variable overrides in credentials are now respected for Exports. Previously, they were ignored. - **Behavior change**: If you use a custom microbatch macro, set a [`require_batched_execution_for_custom_microbatch_strategy` behavior flag](/reference/global-configs/behavior-changes#custom-microbatch-strategy) in your `dbt_project.yml` to enable batched execution. If you don't have a custom microbatch macro, you don't need to set this flag as dbt will handle microbatching automatically for any model using the [microbatch strategy](/docs/build/incremental-microbatch#how-microbatch-compares-to-other-incremental-strategies). @@ -102,7 +106,7 @@ Release notes are grouped by month for both multi-tenant and virtual private clo - **Enhancement**: In the "Latest" release track in dbt Cloud, snapshots defined in SQL files can now use `config` defined in `schema.yml` YAML files. This update resolves the previous limitation that required snapshot properties to be defined exclusively in `dbt_project.yml` and/or a `config()` block within the SQL file. This will also be released in dbt Core 1.9. - **New**: In the "Latest" release track in dbt Cloud, the `snapshot_meta_column_names` config allows for customizing the snapshot metadata columns. This feature allows an organization to align these automatically-generated column names with their conventions, and will be included in the upcoming dbt Core 1.9 release. - **Enhancement**: the "Latest" release track in dbt Cloud infers a model's `primary_key` based on configured data tests and/or constraints within `manifest.json`. The inferred `primary_key` is visible in dbt Explorer and utilized by the dbt Cloud [compare changes](/docs/deploy/run-visibility#compare-tab) feature. This will also be released in dbt Core 1.9. Read about the [order dbt infers columns can be used as primary key of a model](https://github.com/dbt-labs/dbt-core/blob/7940ad5c7858ff11ef100260a372f2f06a86e71f/core/dbt/contracts/graph/nodes.py#L534-L541). -- **New:** dbt Explorer now includes trust signal icons, which is currently available as a [Preview](/docs/dbt-versions/product-lifecycles#dbt-cloud). Trust signals offer a quick, at-a-glance view of data health when browsing your dbt models in Explorer. These icons indicate whether a model is **Healthy**, **Caution**, **Degraded**, or **Unknown**. For accurate health data, ensure the resource is up-to-date and has had a recent job run. Refer to [Trust signals](/docs/collaborate/explore-projects#trust-signals-for-resources) for more information. +- **New:** dbt Explorer now includes trust signal icons, which is currently available as a [Preview](/docs/dbt-versions/product-lifecycles#dbt-cloud). Trust signals offer a quick, at-a-glance view of data health when browsing your dbt models in Explorer. These icons indicate whether a model is **Healthy**, **Caution**, **Degraded**, or **Unknown**. For accurate health data, ensure the resource is up-to-date and has had a recent job run. Refer to [Data health signals](/docs/collaborate/data-health-signals) for more information. - **New:** Auto exposures are now available in Preview in dbt Cloud. Auto-exposures helps users understand how their models are used in downstream analytics tools to inform investments and reduce incidents. It imports and auto-generates exposures based on Tableau dashboards, with user-defined curation. To learn more, refer to [Auto exposures](/docs/collaborate/auto-exposures). diff --git a/website/docs/docs/deploy/about-ci.md b/website/docs/docs/deploy/about-ci.md index 1de9365219c..e27d2e7d08e 100644 --- a/website/docs/docs/deploy/about-ci.md +++ b/website/docs/docs/deploy/about-ci.md @@ -19,9 +19,9 @@ Refer to the guide [Get started with continuous integration tests](/guides/set-u icon="dbt-bit"/> -

\ No newline at end of file +
diff --git a/website/docs/docs/deploy/artifacts.md b/website/docs/docs/deploy/artifacts.md index cb8d3e85da0..c87bc2fbd1b 100644 --- a/website/docs/docs/deploy/artifacts.md +++ b/website/docs/docs/deploy/artifacts.md @@ -4,7 +4,7 @@ id: "artifacts" description: "Use artifacts to power your automated docs site and source freshness data." --- -When running dbt jobs, dbt Cloud generates and saves *artifacts*. You can use these artifacts, like `manifest.json`, `catalog.json`, and `sources.json` to power different aspects of dbt Cloud, namely: [dbt Explorer](/docs/collaborate/explore-projects), [dbt Docs](/docs/collaborate/build-and-view-your-docs#dbt-docs), and [source freshness reporting](/docs/build/sources#snapshotting-source-data-freshness). +When running dbt jobs, dbt Cloud generates and saves *artifacts*. You can use these artifacts, like `manifest.json`, `catalog.json`, and `sources.json` to power different aspects of dbt Cloud, namely: [dbt Explorer](/docs/collaborate/explore-projects), [dbt Docs](/docs/collaborate/build-and-view-your-docs#dbt-docs), and [source freshness reporting](/docs/build/sources#source-data-freshness). ## Create dbt Cloud Artifacts diff --git a/website/docs/docs/deploy/ci-jobs.md b/website/docs/docs/deploy/ci-jobs.md index 0f9b6ba377a..38bfb56a728 100644 --- a/website/docs/docs/deploy/ci-jobs.md +++ b/website/docs/docs/deploy/ci-jobs.md @@ -146,7 +146,7 @@ For semantic nodes and models that aren't downstream of modified models, dbt Clo -To only validate modified semantic nodes, use the following command (with [state selection](/reference/node-selection/syntax#stateful-selection)): +To only validate modified semantic nodes, use the following command (with [state selection](/reference/node-selection/syntax#state-selection)): ```bash dbt sl validate --select state:modified+ diff --git a/website/docs/docs/deploy/deploy-jobs.md b/website/docs/docs/deploy/deploy-jobs.md index 96ec8a1932e..9a0cc3cfcfa 100644 --- a/website/docs/docs/deploy/deploy-jobs.md +++ b/website/docs/docs/deploy/deploy-jobs.md @@ -13,7 +13,7 @@ You can use deploy jobs to build production data assets. Deploy jobs make it eas - Job run details, including run timing, [model timing data](/docs/deploy/run-visibility#model-timing), and [artifacts](/docs/deploy/artifacts) - Detailed run steps with logs and their run step statuses -You can create a deploy job and configure it to run on [scheduled days and times](#schedule-days) or enter a [custom cron schedule](#cron-schedule). +You can create a deploy job and configure it to run on [scheduled days and times](#schedule-days), enter a [custom cron schedule](#cron-schedule), or [trigger the job after another job completes](#trigger-on-job-completion). ## Prerequisites @@ -115,11 +115,18 @@ Examples of cron job schedules: ### Trigger on job completion -To _chain_ deploy jobs together, enable the **Run when another job finishes** option and specify the upstream (parent) job that, when it completes, will trigger your job. You can also use the [Create Job API](/dbt-cloud/api-v2#/operations/Create%20Job) to do this. +To _chain_ deploy jobs together: +1. In the **Triggers** section, enable the **Run when another job finishes** option. +2. Select the project that has the deploy job you want to run after completion. +3. Specify the upstream (parent) job that, when completed, will trigger your job. + - You can also use the [Create Job API](/dbt-cloud/api-v2#/operations/Create%20Job) to do this. +4. In the **Completes on** option, select the job run status(es) that will [enqueue](/docs/deploy/job-scheduler#scheduler-queue) the deploy job. -You can set up a configuration where an upstream job triggers multiple downstream (child) jobs and jobs in other projects. You must have proper [permissions](/docs/cloud/manage-access/enterprise-permissions#project-role-permissions) to the project and job to configure the trigger. + -For jobs that are triggered to run by another job, a link to the upstream job run is available from your [job's run details](/docs/deploy/run-visibility#job-run-details). +5. You can set up a configuration where an upstream job triggers multiple downstream (child) jobs and jobs in other projects. You must have proper [permissions](/docs/cloud/manage-access/enterprise-permissions#project-role-permissions) to the project and job to configure the trigger. + +If another job triggers your job to run, you can find a link to the upstream job in the [run details section](/docs/deploy/run-visibility#job-run-details). ## Related docs diff --git a/website/docs/docs/deploy/deployment-overview.md b/website/docs/docs/deploy/deployment-overview.md index 9382634812f..e9c25f68c08 100644 --- a/website/docs/docs/deploy/deployment-overview.md +++ b/website/docs/docs/deploy/deployment-overview.md @@ -33,7 +33,7 @@ Learn how to use dbt Cloud's features to help your team ship timely and quality diff --git a/website/docs/docs/deploy/job-scheduler.md b/website/docs/docs/deploy/job-scheduler.md index 7d45fddc3f6..c5d1886879b 100644 --- a/website/docs/docs/deploy/job-scheduler.md +++ b/website/docs/docs/deploy/job-scheduler.md @@ -11,9 +11,10 @@ The job scheduler is the backbone of running jobs in dbt Cloud, bringing power a The scheduler enables both cron-based and event-driven execution of dbt commands in the user’s data platform. Specifically, it handles: - Cron-based execution of dbt Cloud jobs that run on a predetermined cadence -- Event-driven execution of dbt Cloud CI jobs triggered by pull requests to the dbt repo +- Event-driven execution of dbt Cloud jobs that run based on the completion of another job ([trigger on job completion](/docs/deploy/deploy-jobs#trigger-on-job-completion)) +- Event-driven execution of dbt Cloud CI jobs triggered when a pull request is merged to the branch ([merge jobs](/docs/deploy/merge-jobs)) - Event-driven execution of dbt Cloud jobs triggered by API -- Event-driven execution of dbt Cloud jobs manually triggered by a user to "Run Now" +- Event-driven execution of dbt Cloud jobs manually triggered by a user to **Run now** The scheduler handles various tasks including queuing jobs, creating temporary environments to run the dbt commands required for those jobs, providing logs for debugging and remediation, and storing dbt artifacts for direct consumption/ingestion by the Discovery API. diff --git a/website/docs/docs/deploy/jobs.md b/website/docs/docs/deploy/jobs.md index 08d6cc585ef..1826836d602 100644 --- a/website/docs/docs/deploy/jobs.md +++ b/website/docs/docs/deploy/jobs.md @@ -4,21 +4,22 @@ sidebar_label: "About Jobs" description: "Learn about the different job types in dbt Cloud and what their differences are." tags: [scheduler] pagination_next: "docs/deploy/deploy-jobs" +hide_table_of_contents: true --- These are the available job types in dbt Cloud: -- [Deploy jobs](/docs/deploy/deploy-jobs) — To create and set up triggers for building production data assets -- [Continuous integration (CI) jobs](/docs/deploy/continuous-integration) — To create and set up triggers for checking code changes -- [Merge jobs](/docs/deploy/merge-jobs) — To create and set up triggers for merged pull requests +- [Deploy jobs](/docs/deploy/deploy-jobs) — Build production data assets. Runs on a schedule, by API, or after another job completes. +- [Continuous integration (CI) jobs](/docs/deploy/continuous-integration) — Test and validate code changes before merging. Triggered by commit to a PR or by API. +- [Merge jobs](/docs/deploy/merge-jobs) — Deploy merged changes into production. Runs after a successful PR merge or by API. -Below is a comparison table that describes the behaviors of the different job types: +The following comparison table describes the behaviors of the different job types: | | **Deploy jobs** | **CI jobs** | **Merge jobs** | | --- | --- | --- | --- | | Purpose | Builds production data assets. | Builds and tests new code before merging changes into production. | Build merged changes into production or update state for deferral. | -| Trigger types | Triggered by a schedule or by API. | Triggered by a commit to a PR or by API. | Triggered by a successful merge into the environment's branch or by API.| +| Trigger types | Triggered by a schedule, API, or the successful completion of another job. | Triggered by a commit to a PR or by API. | Triggered by a successful merge into the environment's branch or by API.| | Destination | Builds into a production database and schema. | Builds into a staging database and ephemeral schema, lived for the lifetime of the PR. | Builds into a production database and schema. | | Execution mode | Runs execute sequentially, so as to not have collisions on the underlying DAG. | Runs execute in parallel to promote team velocity. | Runs execute sequentially, so as to not have collisions on the underlying DAG. | | Efficiency run savings | Detects over-scheduled jobs and cancels unnecessary runs to avoid queue clog. | Cancels existing runs when a newer commit is pushed to avoid redundant work. | N/A | | State comparison | Only sometimes needs to detect state. | Almost always needs to compare state against the production environment to build on modified code and its dependents. | Almost always needs to compare state against the production environment to build on modified code and its dependents. | -| Job run duration | Limit is 24 hours. | Limit is 24 hours. | Limit is 24 hours. | \ No newline at end of file +| Job run duration | Limit is 24 hours. | Limit is 24 hours. | Limit is 24 hours. | diff --git a/website/docs/docs/deploy/model-notifications.md b/website/docs/docs/deploy/model-notifications.md index 24bbc2295c6..45ffbef0a4f 100644 --- a/website/docs/docs/deploy/model-notifications.md +++ b/website/docs/docs/deploy/model-notifications.md @@ -36,17 +36,17 @@ version: 2 groups: - name: finance - description: "Models related to the finance department" owner: # Email is required to receive model-level notifications, additional properties are also allowed. name: "Finance Team" + description: "Models related to the finance department" email: finance@dbtlabs.com favorite_food: donuts - name: marketing - description: "Models related to the marketing department" owner: name: "Marketing Team" + description: "Models related to the marketing department" email: marketing@dbtlabs.com favorite_food: jaffles ``` diff --git a/website/docs/docs/deploy/retry-jobs.md b/website/docs/docs/deploy/retry-jobs.md index f439351aec5..4e3ad0d429f 100644 --- a/website/docs/docs/deploy/retry-jobs.md +++ b/website/docs/docs/deploy/retry-jobs.md @@ -10,6 +10,7 @@ If your dbt job run completed with a status of **Error**, you can rerun it from - You have a [dbt Cloud account](https://www.getdbt.com/signup). - You must be using [dbt version](/docs/dbt-versions/upgrade-dbt-version-in-cloud) 1.6 or newer. +- dbt can successfully parse the project and generate a [manifest](/reference/artifacts/manifest-json) - The most recent run of the job hasn't completed successfully. The latest status of the run is **Error**. - The job command that failed in the run must be one that supports the [retry command](/reference/commands/retry). diff --git a/website/docs/docs/deploy/source-freshness.md b/website/docs/docs/deploy/source-freshness.md index ab214865925..8a83b653df1 100644 --- a/website/docs/docs/deploy/source-freshness.md +++ b/website/docs/docs/deploy/source-freshness.md @@ -4,7 +4,7 @@ id: "source-freshness" description: "Validate that data freshness meets expectations and alert if stale." --- -dbt Cloud provides a helpful interface around dbt's [source data freshness](/docs/build/sources#snapshotting-source-data-freshness) calculations. When a dbt Cloud job is configured to snapshot source data freshness, dbt Cloud will render a user interface showing you the state of the most recent snapshot. This interface is intended to help you determine if your source data freshness is meeting the service level agreement (SLA) that you've defined for your organization. +dbt Cloud provides a helpful interface around dbt's [source data freshness](/docs/build/sources#source-data-freshness) calculations. When a dbt Cloud job is configured to snapshot source data freshness, dbt Cloud will render a user interface showing you the state of the most recent snapshot. This interface is intended to help you determine if your source data freshness is meeting the service level agreement (SLA) that you've defined for your organization. @@ -17,7 +17,7 @@ dbt Cloud provides a helpful interface around dbt's [source data freshness](/doc -To enable source freshness snapshots, firstly make sure to configure your sources to [snapshot freshness information](/docs/build/sources#snapshotting-source-data-freshness). You can add source freshness to the list of commands in the job run steps or enable the checkbox. However, you can expect different outcomes when you configure a job by selecting the **Run source freshness** checkbox compared to adding the command to the run steps. +To enable source freshness snapshots, firstly make sure to configure your sources to [snapshot freshness information](/docs/build/sources#source-data-freshness). You can add source freshness to the list of commands in the job run steps or enable the checkbox. However, you can expect different outcomes when you configure a job by selecting the **Run source freshness** checkbox compared to adding the command to the run steps. Review the following options and outcomes: diff --git a/website/docs/faqs/Models/insert-records.md b/website/docs/faqs/Models/insert-records.md index d522f6fbc85..5c53ff5411e 100644 --- a/website/docs/faqs/Models/insert-records.md +++ b/website/docs/faqs/Models/insert-records.md @@ -9,4 +9,4 @@ id: insert-records For those coming from an (Extract Transform Load) paradigm, there's often a desire to write transformations as `insert` and `update` statements. In comparison, dbt will wrap your `select` query in a `create table as` statement, which can feel counter-productive. * If you wish to use `insert` statements for performance reasons (i.e. to reduce data that is processed), consider [incremental models](/docs/build/incremental-models) -* If you wish to use `insert` statements since your source data is constantly changing (e.g. to create "Type 2 Slowly Changing Dimensions"), consider [snapshotting your source data](/docs/build/sources#snapshotting-source-data-freshness), and building models on top of your snaphots. +* If you wish to use `insert` statements since your source data is constantly changing (e.g. to create "Type 2 Slowly Changing Dimensions"), consider [snapshotting your source data](/docs/build/sources#source-data-freshness), and building models on top of your snaphots. diff --git a/website/docs/guides/productionize-your-dbt-databricks-project.md b/website/docs/guides/productionize-your-dbt-databricks-project.md index 1e757e9cf0a..de8dcbf7f4a 100644 --- a/website/docs/guides/productionize-your-dbt-databricks-project.md +++ b/website/docs/guides/productionize-your-dbt-databricks-project.md @@ -57,7 +57,7 @@ Let’s [create a job](/docs/deploy/deploy-jobs#create-and-schedule-jobs) in dbt - This will allow the job to inherit the catalog, schema, credentials, and environment variables defined in [Set up your dbt project with Databricks](/guides/set-up-your-databricks-dbt-project). 4. Under **Execution Settings** - Check the **Generate docs on run** checkbox to configure the job to automatically generate project docs each time this job runs. This will ensure your documentation stays evergreen as models are added and modified. - - Select the **Run on source freshness** checkbox to configure dbt [source freshness](/docs/deploy/source-freshness) as the first step of this job. Your sources will need to be configured to [snapshot freshness information](/docs/build/sources#snapshotting-source-data-freshness) for this to drive meaningful insights. + - Select the **Run on source freshness** checkbox to configure dbt [source freshness](/docs/deploy/source-freshness) as the first step of this job. Your sources will need to be configured to [snapshot freshness information](/docs/build/sources#source-data-freshness) for this to drive meaningful insights. Add the following three **Commands:** - `dbt source freshness` diff --git a/website/docs/guides/redshift-qs.md b/website/docs/guides/redshift-qs.md index 83fafad1d12..8990c4db925 100644 --- a/website/docs/guides/redshift-qs.md +++ b/website/docs/guides/redshift-qs.md @@ -118,8 +118,7 @@ Now we are going to load our sample data into the S3 bucket that our Cloudformat id integer, user_id integer, order_date date, - status varchar(50), - _etl_loaded_at timestamp default current_timestamp + status varchar(50) ); create table stripe.payment( @@ -128,8 +127,7 @@ Now we are going to load our sample data into the S3 bucket that our Cloudformat paymentmethod varchar(50), status varchar(50), amount integer, - created date, - _batched_at timestamp default current_timestamp + created date ); ``` diff --git a/website/docs/guides/refactoring-legacy-sql.md b/website/docs/guides/refactoring-legacy-sql.md index afcfba1e4b0..0a0aa6bd128 100644 --- a/website/docs/guides/refactoring-legacy-sql.md +++ b/website/docs/guides/refactoring-legacy-sql.md @@ -71,7 +71,7 @@ This allows you to call the same table in multiple places with `{{ src('my_sourc We start here for several reasons: #### Source freshness reporting -Using sources unlocks the ability to run [source freshness reporting](/docs/build/sources#snapshotting-source-data-freshness) to make sure your raw data isn't stale. +Using sources unlocks the ability to run [source freshness reporting](/docs/build/sources#source-data-freshness) to make sure your raw data isn't stale. #### Easy dependency tracing If you're migrating multiple stored procedures into dbt, with sources you can see which queries depend on the same raw tables. diff --git a/website/docs/guides/set-up-ci.md b/website/docs/guides/set-up-ci.md index 3c1ece9451d..79761e88e57 100644 --- a/website/docs/guides/set-up-ci.md +++ b/website/docs/guides/set-up-ci.md @@ -50,7 +50,7 @@ Use the **Continuous Integration Job** template, and call the job **CI Check**. In the Execution Settings, your command will be preset to `dbt build --select state:modified+`. Let's break this down: - [`dbt build`](/reference/commands/build) runs all nodes (seeds, models, snapshots, tests) at once in DAG order. If something fails, nodes that depend on it will be skipped. -- The [`state:modified+` selector](/reference/node-selection/methods#the-state-method) means that only modified nodes and their children will be run ("Slim CI"). In addition to [not wasting time](https://discourse.getdbt.com/t/how-we-sped-up-our-ci-runs-by-10x-using-slim-ci/2603) building and testing nodes that weren't changed in the first place, this significantly reduces compute costs. +- The [`state:modified+` selector](/reference/node-selection/methods#state) means that only modified nodes and their children will be run ("Slim CI"). In addition to [not wasting time](https://discourse.getdbt.com/t/how-we-sped-up-our-ci-runs-by-10x-using-slim-ci/2603) building and testing nodes that weren't changed in the first place, this significantly reduces compute costs. To be able to find modified nodes, dbt needs to have something to compare against. dbt Cloud uses the last successful run of any job in your Production environment as its [comparison state](/reference/node-selection/syntax#about-node-selection). As long as you identified your Production environment in Step 2, you won't need to touch this. If you didn't, pick the right environment from the dropdown. @@ -344,7 +344,7 @@ Use the **Continuous Integration Job** template, and call the job **QA Check**. In the Execution Settings, your command will be preset to `dbt build --select state:modified+`. Let's break this down: - [`dbt build`](/reference/commands/build) runs all nodes (seeds, models, snapshots, tests) at once in DAG order. If something fails, nodes that depend on it will be skipped. -- The [`state:modified+` selector](/reference/node-selection/methods#the-state-method) means that only modified nodes and their children will be run ("Slim CI"). In addition to [not wasting time](https://discourse.getdbt.com/t/how-we-sped-up-our-ci-runs-by-10x-using-slim-ci/2603) building and testing nodes that weren't changed in the first place, this significantly reduces compute costs. +- The [`state:modified+` selector](/reference/node-selection/methods#state) means that only modified nodes and their children will be run ("Slim CI"). In addition to [not wasting time](https://discourse.getdbt.com/t/how-we-sped-up-our-ci-runs-by-10x-using-slim-ci/2603) building and testing nodes that weren't changed in the first place, this significantly reduces compute costs. To be able to find modified nodes, dbt needs to have something to compare against. Normally, we use the Production environment as the source of truth, but in this case there will be new code merged into `qa` long before it hits the `main` branch and Production environment. Because of this, we'll want to defer the Release environment to itself. diff --git a/website/docs/reference/artifacts/dbt-artifacts.md b/website/docs/reference/artifacts/dbt-artifacts.md index b8998dba261..99f57fbe5ab 100644 --- a/website/docs/reference/artifacts/dbt-artifacts.md +++ b/website/docs/reference/artifacts/dbt-artifacts.md @@ -7,7 +7,7 @@ With every invocation, dbt generates and saves one or more *artifacts*. Several - [documentation](/docs/collaborate/build-and-view-your-docs) - [state](/reference/node-selection/syntax#about-node-selection) -- [visualizing source freshness](/docs/build/sources#snapshotting-source-data-freshness) +- [visualizing source freshness](/docs/build/sources#source-data-freshness) They could also be used to: diff --git a/website/docs/reference/artifacts/sources-json.md b/website/docs/reference/artifacts/sources-json.md index 3b7f7539896..cc4aa8975aa 100644 --- a/website/docs/reference/artifacts/sources-json.md +++ b/website/docs/reference/artifacts/sources-json.md @@ -7,7 +7,7 @@ sidebar_label: "Sources" **Produced by:** [`source freshness`](/reference/commands/source) -This file contains information about [sources with freshness checks](/docs/build/sources#checking-source-freshness). Today, dbt Cloud uses this file to power its [Source Freshness visualization](/docs/build/sources#snapshotting-source-data-freshness). +This file contains information about [sources with freshness checks](/docs/build/sources#checking-source-freshness). Today, dbt Cloud uses this file to power its [Source Freshness visualization](/docs/build/sources#source-data-freshness). ### Top-level keys diff --git a/website/docs/reference/commands/source.md b/website/docs/reference/commands/source.md index f8a9e7088e3..038d9dee65a 100644 --- a/website/docs/reference/commands/source.md +++ b/website/docs/reference/commands/source.md @@ -75,4 +75,4 @@ Snapshots of source freshness can be used to understand: This command can be run manually to determine the state of your source data freshness at any time. It is also recommended that you run this command on a schedule, storing the results of the freshness snapshot at regular intervals. These longitudinal snapshots will make it possible to be alerted when source data freshness SLAs are violated, as well as understand the trend of freshness over time. -dbt Cloud makes it easy to snapshot source freshness on a schedule, and provides a dashboard out of the box indicating the state of freshness for all of the sources defined in your project. For more information on snapshotting freshness in dbt Cloud, check out the [docs](/docs/build/sources#snapshotting-source-data-freshness). +dbt Cloud makes it easy to snapshot source freshness on a schedule, and provides a dashboard out of the box indicating the state of freshness for all of the sources defined in your project. For more information on snapshotting freshness in dbt Cloud, check out the [docs](/docs/build/sources#source-data-freshness). diff --git a/website/docs/reference/database-permissions/snowflake-permissions.md b/website/docs/reference/database-permissions/snowflake-permissions.md index 3f474242834..1ab35e46d26 100644 --- a/website/docs/reference/database-permissions/snowflake-permissions.md +++ b/website/docs/reference/database-permissions/snowflake-permissions.md @@ -83,6 +83,7 @@ grant role reporter to user looker_user; -- or mode_user, periscope_user ``` 5. Let loader load data + Give the role unilateral permission to operate on the raw database ``` use role sysadmin; @@ -90,6 +91,7 @@ grant all on database raw to role loader; ``` 6. Let transformer transform data + The transformer role needs to be able to read raw data. If you do this before you have any data loaded, you can run: @@ -110,6 +112,7 @@ transformer also needs to be able to create in the analytics database: grant all on database analytics to role transformer; ``` 7. Let reporter read the transformed data + A previous version of this article recommended this be implemented through hooks in dbt, but this way lets you get away with a one-off statement. ``` grant usage on database analytics to role reporter; @@ -120,10 +123,11 @@ grant select on future views in database analytics to role reporter; Again, if you already have data in your analytics database, make sure you run: ``` grant usage on all schemas in database analytics to role reporter; -grant select on all tables in database analytics to role transformer; -grant select on all views in database analytics to role transformer; +grant select on all tables in database analytics to role reporter; +grant select on all views in database analytics to role reporter; ``` 8. Maintain + When new users are added, make sure you add them to the right role! Everything else should be inherited automatically thanks to those `future` grants. For more discussion and legacy information, refer to [this Discourse article](https://discourse.getdbt.com/t/setting-up-snowflake-the-exact-grant-statements-we-run/439). diff --git a/website/docs/reference/dbt-jinja-functions/env_var.md b/website/docs/reference/dbt-jinja-functions/env_var.md index 28feccc30e4..7e85ed9797a 100644 --- a/website/docs/reference/dbt-jinja-functions/env_var.md +++ b/website/docs/reference/dbt-jinja-functions/env_var.md @@ -91,7 +91,7 @@ $ DBT_ENV_CUSTOM_ENV_MY_FAVORITE_COLOR=indigo DBT_ENV_CUSTOM_ENV_MY_FAVORITE_NUM ``` Compiles to: ```sql --- {'MY_FAVORITE_COLOR': 'indigo', 'DBT_ENV_CUSTOM_ENV_MY_FAVORITE_NUMBER': '6'} +-- {'MY_FAVORITE_COLOR': 'indigo', 'MY_FAVORITE_NUMBER': '6'} select 1 as id ``` diff --git a/website/docs/reference/node-selection/defer.md b/website/docs/reference/node-selection/defer.md index 863494de12e..eddb1ece9d4 100644 --- a/website/docs/reference/node-selection/defer.md +++ b/website/docs/reference/node-selection/defer.md @@ -29,11 +29,12 @@ dbt test --models [...] --defer --state path/to/artifacts -When the `--defer` flag is provided, dbt will resolve `ref` calls differently depending on two criteria: -1. Is the referenced node included in the model selection criteria of the current run? -2. Does the referenced node exist as a database object in the current environment? +By default, dbt uses the [`target`](/reference/dbt-jinja-functions/target) namespace to resolve `ref` calls. -If the answer to both is **no**—a node is not included _and_ it does not exist as a database object in the current environment—references to it will use the other namespace instead, provided by the state manifest. +When `--defer` is enabled, dbt resolves ref calls using the state manifest instead, but only if: + +1. The node isn’t among the selected nodes, _and_ +2. It doesn’t exist in the database (or `--favor-state` is used). Ephemeral models are never deferred, since they serve as "passthroughs" for other `ref` calls. @@ -46,7 +47,7 @@ Deferral requires both `--defer` and `--state` to be set, either by passing flag #### Favor state -You can optionally skip the second criterion by passing the `--favor-state` flag. If passed, dbt will favor using the node defined in your `--state` namespace, even if the node exists in the current target. +When `--favor-state` is passed, dbt prioritizes node definitions from the `--state directory`. However, this doesn’t apply if the node is also part of the selected nodes. ### Example diff --git a/website/docs/reference/node-selection/methods.md b/website/docs/reference/node-selection/methods.md index 7587a9fd2b1..600a578ef8e 100644 --- a/website/docs/reference/node-selection/methods.md +++ b/website/docs/reference/node-selection/methods.md @@ -1,5 +1,6 @@ --- -title: "Methods" +title: "Node selector methods" +sidebar: "Node selector methods" --- Selector methods return all resources that share a common property, using the @@ -22,51 +23,67 @@ dbt list --select "*.folder_name.*" dbt list --select "package:*_source" ``` -### The "tag" method -The `tag:` method is used to select models that match a specified [tag](/reference/resource-configs/tags). +### access +The `access` method selects models based on their [access](/reference/resource-configs/access) property. - ```bash -dbt run --select "tag:nightly" # run all models with the `nightly` tag +```bash +dbt list --select "access:public" # list all public models +dbt list --select "access:private" # list all private models +dbt list --select "access:protected" # list all protected models ``` +### config + +The `config` method is used to select models that match a specified [node config](/reference/configs-and-properties). -### The "source" method -The `source` method is used to select models that select from a specified [source](/docs/build/sources#using-sources). Use in conjunction with the `+` operator. ```bash -dbt run --select "source:snowplow+" # run all models that select from Snowplow sources +dbt run --select "config.materialized:incremental" # run all models that are materialized incrementally +dbt run --select "config.schema:audit" # run all models that are created in the `audit` schema +dbt run --select "config.cluster_by:geo_country" # run all models clustered by `geo_country` ``` -### The "resource_type" method -Use the `resource_type` method to select nodes of a particular type (`model`, `test`, `exposure`, and so on). This is similar to the `--resource-type` flag used by the [`dbt ls` command](/reference/commands/list). +While most config values are strings, you can also use the `config` method to match boolean configs, dictionary keys, and values in lists. - ```bash -dbt build --select "resource_type:exposure" # build all resources upstream of exposures -dbt list --select "resource_type:test" # list all tests in your project -dbt list --select "resource_type:source" # list all sources in your project +For example, given a model with the following configurations: + +```bash +{{ config( + materialized = 'incremental', + unique_key = ['column_a', 'column_b'], + grants = {'select': ['reporter', 'analysts']}, + meta = {"contains_pii": true}, + transient = true +) }} + +select ... ``` -### The "path" method -The `path` method is used to select models/sources defined at or under a specific path. -Model definitions are in SQL/Python files (not YAML), and source definitions are in YAML files. -While the `path` prefix is not explicitly required, it may be used to make -selectors unambiguous. + You can select using any of the following: +```bash +dbt ls -s config.materialized:incremental +dbt ls -s config.unique_key:column_a +dbt ls -s config.grants.select:reporter +dbt ls -s config.meta.contains_pii:true +dbt ls -s config.transient:true +``` - ```bash - # These two selectors are equivalent - dbt run --select "path:models/staging/github" - dbt run --select "models/staging/github" +### exposure - # These two selectors are equivalent - dbt run --select "path:models/staging/github/stg_issues.sql" - dbt run --select "models/staging/github/stg_issues.sql" - ``` +The `exposure` method is used to select parent resources of a specified [exposure](/docs/build/exposures). Use in conjunction with the `+` operator. -### The "file" method + ```bash +dbt run --select "+exposure:weekly_kpis" # run all models that feed into the weekly_kpis exposure +dbt test --select "+exposure:*" # test all resources upstream of all exposures +dbt ls --select "+exposure:*" --resource-type source # list all source tables upstream of all exposures +``` + +### file + The `file` method can be used to select a model by its filename, including the file extension (`.sql`). ```bash @@ -76,7 +93,7 @@ dbt run --select "some_model.sql" dbt run --select "some_model" ``` -### The "fqn" method +### fqn The `fqn` method is used to select nodes based off their "fully qualified names" (FQN) within the dbt graph. The default output of [`dbt list`](/reference/commands/list) is a listing of FQN. The default FQN format is composed of the project name, subdirectories within the path, and the file name (without extension) separated by periods. @@ -88,7 +105,26 @@ dbt run --select "fqn:some_path.some_model" dbt run --select "fqn:your_project.some_path.some_model" ``` -### The "package" method + +### group + +The `group` method is used to select models defined within a [group](/reference/resource-configs/group). + + +```bash +dbt run --select "group:finance" # run all models that belong to the finance group. +``` + +### metric + +The `metric` method is used to select parent resources of a specified [metric](/docs/build/build-metrics-intro). Use in conjunction with the `+` operator. + +```bash +dbt build --select "+metric:weekly_active_users" # build all resources upstream of weekly_active_users metric +dbt ls --select "+metric:*" --resource-type source # list all source tables upstream of all metrics +``` + +### package The `package` method is used to select models defined within the root project or an installed dbt package. While the `package:` prefix is not explicitly required, it may be used to make @@ -102,91 +138,86 @@ selectors unambiguous. dbt run --select "snowplow.*" ``` +### path +The `path` method is used to select models/sources defined at or under a specific path. +Model definitions are in SQL/Python files (not YAML), and source definitions are in YAML files. +While the `path` prefix is not explicitly required, it may be used to make +selectors unambiguous. -### The "config" method -The `config` method is used to select models that match a specified [node config](/reference/configs-and-properties). + ```bash + # These two selectors are equivalent + dbt run --select "path:models/staging/github" + dbt run --select "models/staging/github" + # These two selectors are equivalent + dbt run --select "path:models/staging/github/stg_issues.sql" + dbt run --select "models/staging/github/stg_issues.sql" + ``` +### resource_type +Use the `resource_type` method to select nodes of a particular type (`model`, `test`, `exposure`, and so on). This is similar to the `--resource-type` flag used by the [`dbt ls` command](/reference/commands/list). ```bash -dbt run --select "config.materialized:incremental" # run all models that are materialized incrementally -dbt run --select "config.schema:audit" # run all models that are created in the `audit` schema -dbt run --select "config.cluster_by:geo_country" # run all models clustered by `geo_country` +dbt build --select "resource_type:exposure" # build all resources upstream of exposures +dbt list --select "resource_type:test" # list all tests in your project +dbt list --select "resource_type:source" # list all sources in your project ``` -While most config values are strings, you can also use the `config` method to match boolean configs, dictionary keys, and values in lists. - -For example, given a model with the following configurations: +### result -```bash -{{ config( - materialized = 'incremental', - unique_key = ['column_a', 'column_b'], - grants = {'select': ['reporter', 'analysts']}, - meta = {"contains_pii": true}, - transient = true -) }} - -select ... -``` +The `result` method is related to the `state` method described above and can be used to select resources based on their result status from a prior run. Note that one of the dbt commands [`run`, `test`, `build`, `seed`] must have been performed in order to create the result on which a result selector operates. You can use `result` selectors in conjunction with the `+` operator. - You can select using any of the following: ```bash -dbt ls -s config.materialized:incremental -dbt ls -s config.unique_key:column_a -dbt ls -s config.grants.select:reporter -dbt ls -s config.meta.contains_pii:true -dbt ls -s config.transient:true +dbt run --select "result:error" --state path/to/artifacts # run all models that generated errors on the prior invocation of dbt run +dbt test --select "result:fail" --state path/to/artifacts # run all tests that failed on the prior invocation of dbt test +dbt build --select "1+result:fail" --state path/to/artifacts # run all the models associated with failed tests from the prior invocation of dbt build +dbt seed --select "result:error" --state path/to/artifacts # run all seeds that generated errors on the prior invocation of dbt seed. ``` -### The "test_type" method +### saved_query - - -The `test_type` method is used to select tests based on their type, `singular` or `generic`: +The `saved_query` method selects [saved queries](/docs/build/saved-queries). ```bash -dbt test --select "test_type:generic" # run all generic tests -dbt test --select "test_type:singular" # run all singular tests +dbt list --select "saved_query:*" # list all saved queries +dbt list --select "+saved_query:orders_saved_query" # list your saved query named "orders_saved_query" and all upstream resources ``` - +### semantic_model - +The `semantic_model` method selects [semantic models](/docs/build/semantic-models). -The `test_type` method is used to select tests based on their type: +```bash +dbt list --select "semantic_model:*" # list all semantic models +dbt list --select "+semantic_model:orders" # list your semantic model named "orders" and all upstream resources +``` -- [Unit tests](/docs/build/unit-tests) -- [Data tests](/docs/build/data-tests): - - [Singular](/docs/build/data-tests#singular-data-tests) - - [Generic](/docs/build/data-tests#generic-data-tests) +### source +The `source` method is used to select models that select from a specified [source](/docs/build/sources#using-sources). Use in conjunction with the `+` operator. -```bash -dbt test --select "test_type:unit" # run all unit tests -dbt test --select "test_type:data" # run all data tests -dbt test --select "test_type:generic" # run all generic data tests -dbt test --select "test_type:singular" # run all singular data tests + ```bash +dbt run --select "source:snowplow+" # run all models that select from Snowplow sources ``` - +### source_status + +Another element of job state is the `source_status` of a prior dbt invocation. After executing `dbt source freshness`, for example, dbt creates the `sources.json` artifact which contains execution times and `max_loaded_at` dates for dbt sources. You can read more about `sources.json` on the ['sources'](/reference/artifacts/sources-json) page. -### The "test_name" method +The following dbt commands produce `sources.json` artifacts whose results can be referenced in subsequent dbt invocations: +- `dbt source freshness` -The `test_name` method is used to select tests based on the name of the generic test -that defines it. For more information about how generic tests are defined, read about -[tests](/docs/build/data-tests). +After issuing one of the above commands, you can reference the source freshness results by adding a selector to a subsequent command as follows: - ```bash -dbt test --select "test_name:unique" # run all instances of the `unique` test -dbt test --select "test_name:equality" # run all instances of the `dbt_utils.equality` test -dbt test --select "test_name:range_min_max" # run all instances of a custom schema test defined in the local project, `range_min_max` +```bash +# You can also set the DBT_STATE environment variable instead of the --state flag. +dbt source freshness # must be run again to compare current to previous state +dbt build --select "source_status:fresher+" --state path/to/prod/artifacts ``` - -### The "state" method +### state **N.B.** State-based selection is a powerful, complex feature. Read about [known caveats and limitations](/reference/node-selection/state-comparison-caveats) to state comparison. @@ -196,14 +227,12 @@ The `state` method is used to select nodes by comparing them against a previous `state:modified`: All new nodes, plus any changes to existing nodes. - ```bash dbt test --select "state:new" --state path/to/artifacts # run all tests on new models + and new tests on old models dbt run --select "state:modified" --state path/to/artifacts # run all models that have been modified dbt ls --select "state:modified" --state path/to/artifacts # list all modified nodes (not just models) ``` - Because state comparison is complex, and everyone's project is different, dbt supports subselectors that include a subset of the full `modified` criteria: - `state:modified.body`: Changes to node body (e.g. model SQL, seed values) - `state:modified.configs`: Changes to any node configs, excluding `database`/`schema`/`alias` @@ -220,105 +249,60 @@ There are two additional `state` selectors that complement `state:new` and `stat These selectors can help you shorten run times by excluding unchanged nodes. Currently, no subselectors are available at this time, but that might change as use cases evolve. -### The "exposure" method - -The `exposure` method is used to select parent resources of a specified [exposure](/docs/build/exposures). Use in conjunction with the `+` operator. +### tag +The `tag:` method is used to select models that match a specified [tag](/reference/resource-configs/tags). ```bash -dbt run --select "+exposure:weekly_kpis" # run all models that feed into the weekly_kpis exposure -dbt test --select "+exposure:*" # test all resources upstream of all exposures -dbt ls --select "+exposure:*" --resource-type source # list all source tables upstream of all exposures -``` - -### The "metric" method - -The `metric` method is used to select parent resources of a specified [metric](/docs/build/build-metrics-intro). Use in conjunction with the `+` operator. - -```bash -dbt build --select "+metric:weekly_active_users" # build all resources upstream of weekly_active_users metric -dbt ls --select "+metric:*" --resource-type source # list all source tables upstream of all metrics -``` - -### The "result" method - -The `result` method is related to the `state` method described above and can be used to select resources based on their result status from a prior run. Note that one of the dbt commands [`run`, `test`, `build`, `seed`] must have been performed in order to create the result on which a result selector operates. You can use `result` selectors in conjunction with the `+` operator. - -```bash -dbt run --select "result:error" --state path/to/artifacts # run all models that generated errors on the prior invocation of dbt run -dbt test --select "result:fail" --state path/to/artifacts # run all tests that failed on the prior invocation of dbt test -dbt build --select "1+result:fail" --state path/to/artifacts # run all the models associated with failed tests from the prior invocation of dbt build -dbt seed --select "result:error" --state path/to/artifacts # run all seeds that generated errors on the prior invocation of dbt seed. +dbt run --select "tag:nightly" # run all models with the `nightly` tag ``` -### The "source_status" method - -Supported in v1.1 or higher. - -Another element of job state is the `source_status` of a prior dbt invocation. After executing `dbt source freshness`, for example, dbt creates the `sources.json` artifact which contains execution times and `max_loaded_at` dates for dbt sources. You can read more about `sources.json` on the ['sources'](/reference/artifacts/sources-json) page. - -The following dbt commands produce `sources.json` artifacts whose results can be referenced in subsequent dbt invocations: -- `dbt source freshness` +### test_name -After issuing one of the above commands, you can reference the source freshness results by adding a selector to a subsequent command as follows: +The `test_name` method is used to select tests based on the name of the generic test +that defines it. For more information about how generic tests are defined, read about +[tests](/docs/build/data-tests). -```bash -# You can also set the DBT_STATE environment variable instead of the --state flag. -dbt source freshness # must be run again to compare current to previous state -dbt build --select "source_status:fresher+" --state path/to/prod/artifacts + ```bash +dbt test --select "test_name:unique" # run all instances of the `unique` test +dbt test --select "test_name:equality" # run all instances of the `dbt_utils.equality` test +dbt test --select "test_name:range_min_max" # run all instances of a custom schema test defined in the local project, `range_min_max` ``` -### The "group" method - -The `group` method is used to select models defined within a [group](/reference/resource-configs/group). - - -```bash -dbt run --select "group:finance" # run all models that belong to the finance group. -``` +### The test_type -### The "access" method + -The `access` method selects models based on their [access](/reference/resource-configs/access) property. +The `test_type` method is used to select tests based on their type, `singular` or `generic`: ```bash -dbt list --select "access:public" # list all public models -dbt list --select "access:private" # list all private models -dbt list --select "access:protected" # list all protected models +dbt test --select "test_type:generic" # run all generic tests +dbt test --select "test_type:singular" # run all singular tests ``` -### The "version" method - -The `version` method selects [versioned models](/docs/collaborate/govern/model-versions) based on their [version identifier](/reference/resource-properties/versions) and [latest version](/reference/resource-properties/latest_version). + -```bash -dbt list --select "version:latest" # only 'latest' versions -dbt list --select "version:prerelease" # versions newer than the 'latest' version -dbt list --select "version:old" # versions older than the 'latest' version + -dbt list --select "version:none" # models that are *not* versioned -``` +The `test_type` method is used to select tests based on their type: -### The "semantic_model" method +- [Unit tests](/docs/build/unit-tests) +- [Data tests](/docs/build/data-tests): + - [Singular](/docs/build/data-tests#singular-data-tests) + - [Generic](/docs/build/data-tests#generic-data-tests) -The `semantic_model` method selects [semantic models](/docs/build/semantic-models). ```bash -dbt list --select "semantic_model:*" # list all semantic models -dbt list --select "+semantic_model:orders" # list your semantic model named "orders" and all upstream resources +dbt test --select "test_type:unit" # run all unit tests +dbt test --select "test_type:data" # run all data tests +dbt test --select "test_type:generic" # run all generic data tests +dbt test --select "test_type:singular" # run all singular data tests ``` -### The "saved_query" method - -The `saved_query` method selects [saved queries](/docs/build/saved-queries). - -```bash -dbt list --select "saved_query:*" # list all saved queries -dbt list --select "+saved_query:orders_saved_query" # list your saved query named "orders_saved_query" and all upstream resources -``` + -### The "unit_test" method +### unit_test Supported in v1.8 or newer. @@ -333,3 +317,15 @@ dbt list --select "+unit_test:orders_with_zero_items" # list your unit test nam ``` + +### version + +The `version` method selects [versioned models](/docs/collaborate/govern/model-versions) based on their [version identifier](/reference/resource-properties/versions) and [latest version](/reference/resource-properties/latest_version). + +```bash +dbt list --select "version:latest" # only 'latest' versions +dbt list --select "version:prerelease" # versions newer than the 'latest' version +dbt list --select "version:old" # versions older than the 'latest' version + +dbt list --select "version:none" # models that are *not* versioned +``` diff --git a/website/docs/reference/node-selection/state-comparison-caveats.md b/website/docs/reference/node-selection/state-comparison-caveats.md index adaf35bd710..f83a4f37c89 100644 --- a/website/docs/reference/node-selection/state-comparison-caveats.md +++ b/website/docs/reference/node-selection/state-comparison-caveats.md @@ -4,7 +4,7 @@ title: "Caveats to state comparison" import StateModified from '/snippets/_state-modified-compare.md'; -The [`state:` selection method](/reference/node-selection/methods#the-state-method) is a powerful feature, with a lot of underlying complexity. Below are a handful of considerations when setting up automated jobs that leverage state comparison. +The [`state:` selection method](/reference/node-selection/methods#state) is a powerful feature, with a lot of underlying complexity. Below are a handful of considerations when setting up automated jobs that leverage state comparison. ### Seeds diff --git a/website/docs/reference/node-selection/syntax.md b/website/docs/reference/node-selection/syntax.md index c61ab598a88..2e53eff72df 100644 --- a/website/docs/reference/node-selection/syntax.md +++ b/website/docs/reference/node-selection/syntax.md @@ -118,19 +118,19 @@ dbt ls --select "result:+" state:modified+ --state ./ -## Stateful selection +## State selection One of the greatest underlying assumptions about dbt is that its operations should be **stateless** and ****. That is, it doesn't matter how many times a model has been run before, or if it has ever been run before. It doesn't matter if you run it once or a thousand times. Given the same raw data, you can expect the same transformed result. A given run of dbt doesn't need to "know" about _any other_ run; it just needs to know about the code in the project and the objects in your database as they exist _right now_. -That said, dbt does store "state"—a detailed, point-in-time view of project resources (also referred to as nodes), database objects, and invocation results—in the form of its [artifacts](/docs/deploy/artifacts). If you choose, dbt can use these artifacts to inform certain operations. Crucially, the operations themselves are still stateless and : given the same manifest and the same raw data, dbt will produce the same transformed result. +That said, dbt does store "state" — a detailed, point-in-time view of project resources (also referred to as nodes), database objects, and invocation results — in the form of its [artifacts](/docs/deploy/artifacts). If you choose, dbt can use these artifacts to inform certain operations. Crucially, the operations themselves are still stateless and : given the same manifest and the same raw data, dbt will produce the same transformed result. dbt can leverage artifacts from a prior invocation as long as their file path is passed to the `--state` flag. This is a prerequisite for: -- [The `state:` selector](/reference/node-selection/methods#the-state-method), whereby dbt can identify resources that are new or modified +- [The `state` selector](/reference/node-selection/methods#state), whereby dbt can identify resources that are new or modified by comparing code in the current project against the state manifest. - [Deferring](/reference/node-selection/defer) to another environment, whereby dbt can identify upstream, unselected resources that don't exist in your current environment and instead "defer" their references to the environment provided by the state manifest. - The [`dbt clone` command](/reference/commands/clone), whereby dbt can clone nodes based on their location in the manifest provided to the `--state` flag. -Together, the `state:` selector and deferral enable ["slim CI"](/best-practices/best-practice-workflows#run-only-modified-models-to-test-changes-slim-ci). We expect to add more features in future releases that can leverage artifacts passed to the `--state` flag. +Together, the [`state`](/reference/node-selection/methods#state) selector and deferral enable ["slim CI"](/best-practices/best-practice-workflows#run-only-modified-models-to-test-changes-slim-ci). We expect to add more features in future releases that can leverage artifacts passed to the `--state` flag. ### Establishing state diff --git a/website/docs/reference/resource-configs/alias.md b/website/docs/reference/resource-configs/alias.md index c14804ef2a7..5beaa238806 100644 --- a/website/docs/reference/resource-configs/alias.md +++ b/website/docs/reference/resource-configs/alias.md @@ -8,9 +8,11 @@ datatype: string -Specify a custom alias for a model in your `dbt_project.yml` file or config block. +Specify a custom alias for a model in your `dbt_project.yml` file, `models/properties.yml` file, or config block in a SQL file. -For example, if you have a model that calculates `sales_total` and want to give it a more user-friendly alias, you can alias it like this: +For example, if you have a model that calculates `sales_total` and want to give it a more user-friendly alias, you can alias it as shown in the following examples. + +In the `dbt_project.yml` file, the following example sets a default `alias` for the `sales_total` model at the project level: @@ -22,16 +24,40 @@ models: ``` +The following specifies an `alias` as part of the `models/properties.yml` file metadata, useful for centralized configuration: + + + +```yml +version: 2 + +models: + - name: sales_total + config: + alias: sales_dashboard +``` + + +The following assigns the `alias` directly in the In `models/sales_total.sql` file: + + + +```sql +{{ config( + alias="sales_dashboard" +) }} +``` + + This would return `analytics.finance.sales_dashboard` in the database, instead of the default `analytics.finance.sales_total`. +Configure a seed's alias in your `dbt_project.yml` file or a `properties.yml` file. The following examples demonstrate how to `alias` a seed named `product_categories` to `categories_data`. -Configure a seed's alias in your `dbt_project.yml` file or config block. - -For example, if you have a seed that represents `product_categories` and want to alias it as `categories_data`, you would alias like this: +In the `dbt_project.yml` file at the project level: @@ -41,6 +67,21 @@ seeds: product_categories: +alias: categories_data ``` + + +In the `seeds/properties.yml` file: + + + +```yml +version: 2 + +seeds: + - name: product_categories + config: + alias: categories_data +``` + This would return the name `analytics.finance.categories_data` in the database. @@ -55,9 +96,6 @@ seeds: +alias: country_mappings ``` - - - @@ -65,7 +103,9 @@ seeds: Configure a snapshots's alias in your `dbt_project.yml` file or config block. -For example, if you have a snapshot that is named `your_snapshot` and want to alias it as `the_best_snapshot`, you would alias like this: +The following examples demonstrate how to `alias` a snapshot named `your_snapshot` to `the_best_snapshot`. + +In the `dbt_project.yml` file at the project level: @@ -75,20 +115,57 @@ snapshots: your_snapshot: +alias: the_best_snapshot ``` + -This would build your snapshot to `analytics.finance.the_best_snapshot` in the database. +In the `snapshots/properties.yml` file: + + +```yml +version: 2 + +snapshots: + - name: your_snapshot + config: + alias: the_best_snapshot +``` +In `snapshots/your_snapshot.sql` file: + + + +```sql +{{ config( + alias="the_best_snapshot" +) }} +``` + + +This would build your snapshot to `analytics.finance.the_best_snapshot` in the database. + -Configure a test's alias in your `schema.yml` file or config block. +Configure a data test's alias in your `dbt_project.yml` file, `properties.yml` file, or config block in the model file. -For example, to add a unique test to the `order_id` column and give it an alias `unique_order_id_test` to identify this specific test, you would alias like this: +The following examples demonstrate how to `alias` a unique data test named `order_id` to `unique_order_id_test` to identify a specific data test. - +In the `dbt_project.yml` file at the project level: + + + +```yml +tests: + your_project: + +alias: unique_order_id_test +``` + + +In the `models/properties.yml` file: + + ```yml models: @@ -99,10 +176,22 @@ models: - unique: alias: unique_order_id_test ``` + + +In `tests/unique_order_id_test.sql` file: + + + +```sql +{{ config( + alias="unique_order_id_test", + severity="error", +``` + When using [`store_failures_as`](/reference/resource-configs/store_failures_as), this would return the name `analytics.finance.orders_order_id_unique_order_id_test` in the database. - + diff --git a/website/docs/reference/resource-configs/contract.md b/website/docs/reference/resource-configs/contract.md index fb25076b0d9..18266ec672f 100644 --- a/website/docs/reference/resource-configs/contract.md +++ b/website/docs/reference/resource-configs/contract.md @@ -14,6 +14,13 @@ When the `contract` configuration is enforced, dbt will ensure that your model's This is to ensure that the people querying your model downstream—both inside and outside dbt—have a predictable and consistent set of columns to use in their analyses. Even a subtle change in data type, such as from `boolean` (`true`/`false`) to `integer` (`0`/`1`), could cause queries to fail in surprising ways. +## Support + +At present, model contracts are supported for: +- SQL models (not yet Python) +- Models materialized as `table`, `view`, and `incremental` (with `on_schema_change: append_new_columns` or `on_schema_change: fail`) +- The most popular data platforms — though support and enforcement of different [constraint types](/reference/resource-properties/constraints) vary by platform + ## Data type aliasing dbt uses built-in type aliasing for the `data_type` defined in your YAML. For example, you can specify `string` in your contract, and on Postgres/Redshift, dbt will convert it to `text`. If dbt doesn't recognize the `data_type` name among its known aliases, it will pass it through as-is. This is enabled by default, but you can opt-out by setting `alias_types` to `false`. @@ -91,12 +98,6 @@ When you `dbt run` your model, _before_ dbt has materialized it as a table in th 20:53:45 > in macro assert_columns_equivalent (macros/materializations/models/table/columns_spec_ddl.sql) ``` -## Support - -At present, model contracts are supported for: -- SQL models (not yet Python) -- Models materialized as `table`, `view`, and `incremental` (with `on_schema_change: append_new_columns`) -- The most popular data platforms — though support and enforcement of different [constraint types](/reference/resource-properties/constraints) vary by platform ### Incremental models and `on_schema_change` diff --git a/website/docs/reference/resource-configs/grants.md b/website/docs/reference/resource-configs/grants.md index 99b61ef2413..4b0cda26f52 100644 --- a/website/docs/reference/resource-configs/grants.md +++ b/website/docs/reference/resource-configs/grants.md @@ -11,12 +11,12 @@ The grant resource configs enable you to apply permissions at build time to a sp dbt aims to use the most efficient approach when updating grants, which varies based on the adapter you're using, and whether dbt is replacing or updating an object that already exists. You can always check the debug logs for the full set of grant and revoke statements that dbt runs. -dbt encourages you to use grants as resource configs whenever possible. In versions prior to Core v1.2, you were limited to using hooks for grants. Occasionally, you still might need to write grants statements manually and run them using hooks. For example, hooks may be appropriate if you want to: +You should define grants as resource configs whenever possible, but you might occasionally need to write grants statements manually and run them using [hooks](/docs/build/hooks-operations). For example, hooks may be appropriate if you want to: -* Apply grants in a more complex or custom manner, beyond what the built-in grants capability can provide. * Apply grants on other database objects besides views and tables. -* Take advantage of more-advanced permission capabilities offered by your data platform, for which dbt does not (yet!) offer out-of-the-box support using resource configuration. * Create more granular row- and column-level access, use masking policies, or apply future grants. +* Take advantage of more advanced permission capabilities offered by your data platform, for which dbt does not offer out-of-the-box support using resource configuration. +* Apply grants in a more complex or custom manner, beyond what the built-in grants capability can provide. For more information on hooks, see [Hooks & operations](/docs/build/hooks-operations). @@ -154,6 +154,83 @@ Now, the model will grant select to `user_a`, `user_b`, AND `user_c`! - This use of `+`, controlling clobber vs. add merge behavior, is distinct from the use of `+` in `dbt_project.yml` (shown in the example above) for defining configs with dictionary values. For more information, see [the plus prefix](https://docs.getdbt.com/reference/resource-configs/plus-prefix). - `grants` is the first config to support a `+` prefix for controlling config merge behavior. Currently, it's the only one. If it proves useful, we may extend this capability to new and existing configs in the future. +### Conditional grants + +Like any other config, you can use Jinja to vary the grants in different contexts. For example, you might grant different permissions in prod than dev: + + + +```yml +models: + +grants: + select: "{{ ['user_a', 'user_b'] if target.name == 'prod' else ['user_c'] }}" +``` + + + +## Revoking grants + +dbt only modifies grants on a node (including revocation) when a `grants` configuration is attached to that node. For example, imagine you had originally specified the following grants in `dbt_project.yml`: + + + +```yml +models: + +grants: + select: ['user_a', 'user_b'] +``` + + + +If you delete the entire `+grants` section, dbt assumes you no longer want it to manage grants and doesn't change anything. To have dbt revoke all existing grants from a node, provide an empty list of grantees. + + + + + + + ```yml + models: + +grants: + select: ['user_b'] + ``` + + + + + + + + ```yml + models: + +grants: + select: [] + ``` + + + + + + + + ```yml + models: + + # this section intentionally left blank + ``` + + + + + + ## General examples You can grant each permission to a single grantee, or a set of multiple grantees. In this example, we're granting `select` on this model to just `bi_user`, so that it can be queried in our Business Intelligence (BI) tool. @@ -249,7 +326,7 @@ models:
-* Granting to / revoking from is only fully supported for Redshift users (not [groups](https://docs.aws.amazon.com/redshift/latest/dg/r_Groups.html) or [roles](https://docs.aws.amazon.com/redshift/latest/dg/r_roles-managing.html)). +* Granting to / revoking from is only fully supported for Redshift users (not [groups](https://docs.aws.amazon.com/redshift/latest/dg/r_Groups.html) or [roles](https://docs.aws.amazon.com/redshift/latest/dg/r_roles-managing.html)). See [dbt-redshift#415](https://github.com/dbt-labs/dbt-redshift/issues/415) for the corresponding issue.
diff --git a/website/docs/reference/resource-configs/no-configs.md b/website/docs/reference/resource-configs/no-configs.md index 5eec26917c8..f72b286c837 100644 --- a/website/docs/reference/resource-configs/no-configs.md +++ b/website/docs/reference/resource-configs/no-configs.md @@ -1,11 +1,12 @@ --- -title: "No specifc configurations for this Adapter" +title: "No specific configurations for this adapter" id: "no-configs" --- If you were guided to this page from a data platform setup article, it most likely means: - Setting up the profile is the only action the end-user needs to take on the data platform, or -- The subsequent actions the end-user needs to take are not currently documented +- The subsequent actions the end-user needs to take are not currently documented, or +- Relevant information is provided on the documentation pages of the data platform vendor. If you'd like to contribute to data platform-specific configuration information, refer to [Documenting a new adapter](/guides/adapter-creation) diff --git a/website/docs/reference/resource-properties/latest_version.md b/website/docs/reference/resource-properties/latest_version.md index 567ea5e7e1f..6635bd3fecb 100644 --- a/website/docs/reference/resource-properties/latest_version.md +++ b/website/docs/reference/resource-properties/latest_version.md @@ -21,7 +21,7 @@ models: The latest version of this model. The "latest" version is relevant for: 1. Resolving `ref()` calls to this model that are "unpinned" (a version is not explicitly specified) -2. Selecting model versions using the [`version:` selection method](/reference/node-selection/methods#the-version-method), based on whether a given model version is `latest`, `prerelease`, or `old` +2. Selecting model versions using the [`version:` selection method](/reference/node-selection/methods#version), based on whether a given model version is `latest`, `prerelease`, or `old` This value can be a string or a numeric (integer or float) value. It must be one of the [version identifiers](/reference/resource-properties/versions#v) specified in this model's list of `versions`. diff --git a/website/docs/reference/source-configs.md b/website/docs/reference/source-configs.md index 959d4c542e9..facd747f9f5 100644 --- a/website/docs/reference/source-configs.md +++ b/website/docs/reference/source-configs.md @@ -121,7 +121,7 @@ sources: Sources can be configured via a `config:` block within their `.yml` definitions, or from the `dbt_project.yml` file under the `sources:` key. This configuration is most useful for configuring sources imported from [a package](/docs/build/packages). -You can disable sources imported from a package to prevent them from rendering in the documentation, or to prevent [source freshness checks](/docs/build/sources#snapshotting-source-data-freshness) from running on source tables imported from packages. +You can disable sources imported from a package to prevent them from rendering in the documentation, or to prevent [source freshness checks](/docs/build/sources#source-data-freshness) from running on source tables imported from packages. - **Note**: To disable a source table nested in a YAML file in a subfolder, you will need to supply the subfolder(s) within the path to that YAML file, as well as the source name and the table name in the `dbt_project.yml` file.

The following example shows how to disable a source table nested in a YAML file in a subfolder: diff --git a/website/sidebars.js b/website/sidebars.js index 5600fe164f2..db97f1f25da 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -222,6 +222,7 @@ const sidebarSettings = { "docs/core/connect-data-platform/athena-setup", "docs/core/connect-data-platform/glue-setup", "docs/core/connect-data-platform/clickhouse-setup", + "docs/core/connect-data-platform/cratedb-setup", "docs/core/connect-data-platform/databend-setup", "docs/core/connect-data-platform/decodable-setup", "docs/core/connect-data-platform/doris-setup", @@ -522,6 +523,7 @@ const sidebarSettings = { link: { type: "doc", id: "docs/collaborate/explore-projects" }, items: [ "docs/collaborate/explore-projects", + "docs/collaborate/data-health-signals", "docs/collaborate/access-from-dbt-cloud", "docs/collaborate/column-level-lineage", "docs/collaborate/model-performance", diff --git a/website/snippets/_dbt-copilot-avail.md b/website/snippets/_dbt-copilot-avail.md new file mode 100644 index 00000000000..3d8f418ceb9 --- /dev/null +++ b/website/snippets/_dbt-copilot-avail.md @@ -0,0 +1,6 @@ +:::info dbt Copilot available in beta + +Use [dbt Copilot](/docs/cloud/dbt-copilot), available in beta, to generate {props.resource} in the dbt Cloud IDE only.

+ +To use dbt Copilot, you must have an active dbt Cloud Enterprise account and either agree to use dbt Labs' OpenAI key or provide your own Open AI API key. Register [here](https://docs.google.com/forms/d/e/1FAIpQLScPjRGyrtgfmdY919Pf3kgqI5E95xxPXz-8JoVruw-L9jVtxg/viewform) or reach out to the Account team to join the private beta. +::: diff --git a/website/snippets/_sl-partner-links.md b/website/snippets/_sl-partner-links.md index 7d08323239b..7c5f376180b 100644 --- a/website/snippets/_sl-partner-links.md +++ b/website/snippets/_sl-partner-links.md @@ -106,6 +106,21 @@ The following tools integrate with the dbt Semantic Layer: +
+ + + + +
+ +
\ No newline at end of file + diff --git a/website/static/img/icons/sigma.svg b/website/static/img/icons/sigma.svg new file mode 100644 index 00000000000..411c2a74b46 --- /dev/null +++ b/website/static/img/icons/sigma.svg @@ -0,0 +1,185 @@ + + + + + + + + + diff --git a/website/static/img/icons/white/redshift.svg b/website/static/img/icons/white/redshift.svg deleted file mode 100644 index e57888813e3..00000000000 --- a/website/static/img/icons/white/redshift.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/website/static/img/icons/white/rocket.svg b/website/static/img/icons/white/rocket.svg deleted file mode 100644 index 50f06851db9..00000000000 --- a/website/static/img/icons/white/rocket.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/website/static/img/icons/white/sigma.svg b/website/static/img/icons/white/sigma.svg new file mode 100644 index 00000000000..411c2a74b46 --- /dev/null +++ b/website/static/img/icons/white/sigma.svg @@ -0,0 +1,185 @@ + + + + + + + + + diff --git a/website/vercel.json b/website/vercel.json index fa90697a517..b68dc053db9 100644 --- a/website/vercel.json +++ b/website/vercel.json @@ -3651,7 +3651,7 @@ }, { "key": "Content-Security-Policy", - "value": "img-src 'self' data: https:;" + "value": "img-src 'self' data: https:; frame-ancestors 'self' https://*.mutinyhq.com https://*.getdbt.com" }, { "key": "Strict-Transport-Security",