Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Task/Adding sql fluff tool #512

Open
wants to merge 8 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -216,4 +216,13 @@ pip-log.txt
.mr.developer.cfg

# SSIS Configs
SSIS/
SSIS/

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
repos:
- repo: https://github.com/sqlfluff/sqlfluff
rev: 3.2.3
hooks:
- id: sqlfluff-lint
- id: sqlfluff-fix
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,26 @@
# dwh-etl
dwapi-etl


# SQLFluff
SQLFluff is an open source, dialect-flexible and configurable SQL linter. Designed with ELT applications in mind, SQLFluff also works with Jinja templating and dbt. SQLFluff will auto-fix most linting errors, allowing you to focus your time on what matters. More documentation at: https://sqlfluff.com/

## Setting up sqlfluff
## Requirements
- Make sure you have python 3.8 or higher

### Setup
- Create a virtual python virtual environment by runing python3.8 -m venv <name_of_environemt> (e.g. `python3.8 -m venv venv`)
- Activate virtual environment by running: source venv/Scripts/activate
- Install the following packages by running:
- pip install sqlfluff
- pip install pre-commit

- Run `pre-commit install` to to set up the git hook scripts in the config file `.pre-commit-config.yaml

### Maintainance of the rules
The linting and fixing of the sql files is controlled by the config file `Scripts/.sqlfluff`
You can edit or add new rules on this file. The various rules can be found at https://docs.sqlfluff.com/en/2.1.3/rules.html#

### Usage
On running your commit message, sqlfluff will run a lint and fix for any .sql files changed
24 changes: 24 additions & 0 deletions Scripts/.sqlfluff
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[sqlfluff]
dialect = tsql
exclude_rules = AM01

[sqlfluff:indentation]
tab_space_size = 2

[sqlfluff:rules:capitalisation.keywords]
capitalisation_policy = lower

[sqlfluff:rules:capitalisation.identifiers]
extended_capitalisation_policy = lower
unquoted_identifiers_policy = all


[sqlfluff:rules:aliasing.table]
aliasing.table = explicit

[sqlfluff:rules:layout.set_operators]
set_operator_on_new_line = ['UNION', 'UNION ALL']


[sqlfluff:rules:capitalisation.functions]
extended_capitalisation_policy = lower
81 changes: 40 additions & 41 deletions Scripts/REPORTING/load_aggregate_dsd.sql
Original file line number Diff line number Diff line change
@@ -1,42 +1,41 @@
IF OBJECT_ID(N'[REPORTING].[dbo].[AggregateDSD]', N'U') IS NOT NULL
drop TABLE [REPORTING].[dbo].[AggregateDSD]
GO
if object_id(N'[REPORTING].[dbo].[AggregateDSD]', N'U') is not null
drop table reporting.dbo.aggregatedsd
go

SELECT DISTINCT
MFLCode,
f.FacilityName,
County,
SubCounty,
p.PartnerName,
a.AgencyName,
Gender,
age.DATIMAgeGroup as AgeGroup,
StabilityAssessment,
DifferentiatedCare,
SUM(onMMD) as patients_onMMD,
SUM(case when onMMD = 0 then 1 else 0 end) as patients_nonMMD,
COUNT(StabilityAssessment) AS Stability,
Sum(pat.isTXCurr) As TXCurr,
cast(getdate() as date) as LoadDate
INTO [REPORTING].[dbo].[AggregateDSD]
FROM NDWH.dbo.FactART as art
LEFT JOIN NDWH.dbo.FactLatestObs as lob on lob.Patientkey = art.PatientKey
LEFT JOIN NDWH.dbo.DimAgeGroup age on age.AgeGroupKey = art.AgeGroupKey
LEFT JOIN NDWH.dbo.DimFacility f on f.FacilityKey = art.FacilityKey
LEFT JOIN NDWH.dbo.DimAgency a on a.AgencyKey = art.AgencyKey
LEFT JOIN NDWH.dbo.DimPatient pat on pat.PatientKey = art.PatientKey
LEFT JOIN NDWH.dbo.DimPartner p on p.PartnerKey = art.PartnerKey
WHERE pat.IsTXCurr = 1
GROUP BY
MFLCode,
f.FacilityName,
County,
SubCounty,
p.PartnerName,
a.AgencyName,
Gender,
age.DATIMAgeGroup,
StabilityAssessment,
DifferentiatedCare

GO
select distinct
f.mflcode,
f.facilityname,
f.county,
f.subcounty,
p.partnername,
a.agencyname,
pat.gender,
age.datimagegroup as agegroup,
art.stabilityassessment,
art.differentiatedcare,
cast(getdate() as date) as loaddate,
sum(art.onmmd) as patients_onmmd,
sum(case when art.onmmd = 0 then 1 else 0 end) as patients_nonmmd,
count(art.stabilityassessment) as stability,
sum(pat.istxcurr) as txcurr
into reporting.dbo.aggregatedsd
from ndwh.dbo.factart as art
left join ndwh.dbo.factlatestobs as lob on art.patientkey = lob.patientkey
left join ndwh.dbo.dimagegroup as age on art.agegroupkey = age.agegroupkey
left join ndwh.dbo.dimfacility as f on art.facilitykey = f.facilitykey
left join ndwh.dbo.dimagency as a on art.agencykey = a.agencykey
left join ndwh.dbo.dimpatient as pat on art.patientkey = pat.patientkey
left join ndwh.dbo.dimpartner as p on art.partnerkey = p.partnerkey
where pat.istxcurr = 1
group by
f.mflcode,
f.facilityname,
f.county,
f.subcounty,
p.partnername,
a.agencyname,
pat.gender,
age.datimagegroup,
art.stabilityassessment,
art.differentiatedcare
go
70 changes: 34 additions & 36 deletions Scripts/REPORTING/load_all_emr_sites.sql
Original file line number Diff line number Diff line change
@@ -1,40 +1,38 @@
IF OBJECT_ID(N'REPORTING.dbo.all_EMRSites', N'U') IS NOT NULL
DROP TABLE REPORTING.dbo.all_EMRSites;
if object_id(N'REPORTING.dbo.all_EMRSites', N'U') is not NULL
drop table reporting.dbo.all_emrsites;


WITH ModulesUptake AS (
SELECT
MFLCode,
FacilityName,
SubCounty,
County,
isEMRSite,
PartnerName,
AgencyName,
isCT,
modules.isHTS,
isHTSML,
isIITML,
isOTZ,
isOVC,
isPMTCT,
isPrep,
fac.Latitude,
fac.Longitude,
EMR_Status,
modules.EMR,
modules.owner,
modules.InfrastructureType,
modules.KEPH_Level,
CAST(GETDATE() AS DATE) AS LoadDate
FROM NDWH.dbo.FactModulesuptake AS modules
LEFT JOIN NDWH.dbo.DimFacility fac ON fac.FacilityKey = modules.FacilityKey
LEFT JOIN NDWH.dbo.DimPartner pat ON pat.PartnerKey = modules.Partnerkey
LEFT JOIN NDWH.dbo.DimAgency agency ON agency.AgencyKey = modules.Agencykey
with modulesuptake as (
select
fac.mflcode,
fac.facilityname,
fac.subcounty,
fac.county,
fac.isemrsite,
pat.partnername,
agency.agencyname,
modules.isct,
modules.ishts,
modules.ishtsml,
modules.isiitml,
modules.isotz,
modules.isovc,
modules.ispmtct,
modules.isprep,
fac.latitude,
fac.longitude,
fac.emr_status,
modules.emr,
modules.owner,
modules.infrastructuretype,
modules.keph_level,
cast(getdate() as date) as loaddate
from ndwh.dbo.factmodulesuptake as modules
left join ndwh.dbo.dimfacility as fac on modules.facilitykey = fac.facilitykey
left join ndwh.dbo.dimpartner as pat on modules.partnerkey = pat.partnerkey
left join ndwh.dbo.dimagency as agency on modules.agencykey = agency.agencykey
)


SELECT *
INTO REPORTING.dbo.all_EMRSites
FROM ModulesUptake;

select *
into reporting.dbo.all_emrsites
from modulesuptake;