diff --git a/.gitignore b/.gitignore index 25bdc372..dbf1abf3 100644 --- a/.gitignore +++ b/.gitignore @@ -216,4 +216,13 @@ pip-log.txt .mr.developer.cfg # SSIS Configs -SSIS/ \ No newline at end of file +SSIS/ + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..86981551 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,6 @@ +repos: +- repo: https://github.com/sqlfluff/sqlfluff + rev: 3.2.3 + hooks: + - id: sqlfluff-lint + - id: sqlfluff-fix diff --git a/README.md b/README.md index 8b8044a5..b2925cef 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,25 @@ # dwh-etl dwapi-etl + + +# SQLFluff +SQLFluff is an open source, dialect-flexible and configurable SQL linter. Designed with ELT applications in mind, SQLFluff also works with Jinja templating and dbt. SQLFluff will auto-fix most linting errors, allowing you to focus your time on what matters. More documentation at: https://sqlfluff.com/ + +## Setting up sqlfluff locally +### Requirements +- Make sure you have python 3.8 or higher +### Setup +- Create a python virtual environment by runing python3.8 -m venv (e.g. `python3.8 -m venv venv`) +- Activate virtual environment by running: source venv/Scripts/activate +- Install the following packages by running: + - pip install sqlfluff + - pip install pre-commit + +- Run `pre-commit install` to to set up the git hook scripts in the config file `.pre-commit-config.yaml + +### Maintainance of the rules +The linting and fixing of the sql files is controlled by the config file `Scripts/.sqlfluff` +You can edit or add new rules on this file. The various rules can be found at https://docs.sqlfluff.com/en/2.1.3/rules.html# + +### Usage +On running your commit message, sqlfluff will run a lint and fix for any .sql files changed \ No newline at end of file diff --git a/Scripts/.sqlfluff b/Scripts/.sqlfluff new file mode 100644 index 00000000..c2f8389e --- /dev/null +++ b/Scripts/.sqlfluff @@ -0,0 +1,24 @@ +[sqlfluff] +dialect = tsql +exclude_rules = AM01 + +[sqlfluff:indentation] +tab_space_size = 2 + +[sqlfluff:rules:capitalisation.keywords] +capitalisation_policy = lower + +[sqlfluff:rules:capitalisation.identifiers] +extended_capitalisation_policy = lower +unquoted_identifiers_policy = all + + +[sqlfluff:rules:aliasing.table] +aliasing.table = explicit + +[sqlfluff:rules:layout.set_operators] +set_operator_on_new_line = ['UNION', 'UNION ALL'] + + +[sqlfluff:rules:capitalisation.functions] +extended_capitalisation_policy = lower \ No newline at end of file diff --git a/Scripts/REPORTING/load_aggregate_dsd.sql b/Scripts/REPORTING/load_aggregate_dsd.sql index 512e770b..5bd6080d 100644 --- a/Scripts/REPORTING/load_aggregate_dsd.sql +++ b/Scripts/REPORTING/load_aggregate_dsd.sql @@ -1,42 +1,41 @@ -IF OBJECT_ID(N'[REPORTING].[dbo].[AggregateDSD]', N'U') IS NOT NULL - drop TABLE [REPORTING].[dbo].[AggregateDSD] -GO +if object_id(N'[REPORTING].[dbo].[AggregateDSD]', N'U') is not null + drop table reporting.dbo.aggregatedsd +go -SELECT DISTINCT - MFLCode, - f.FacilityName, - County, - SubCounty, - p.PartnerName, - a.AgencyName, - Gender, - age.DATIMAgeGroup as AgeGroup, - StabilityAssessment, - DifferentiatedCare, - SUM(onMMD) as patients_onMMD, - SUM(case when onMMD = 0 then 1 else 0 end) as patients_nonMMD, - COUNT(StabilityAssessment) AS Stability, - Sum(pat.isTXCurr) As TXCurr, - cast(getdate() as date) as LoadDate -INTO [REPORTING].[dbo].[AggregateDSD] -FROM NDWH.dbo.FactART as art -LEFT JOIN NDWH.dbo.FactLatestObs as lob on lob.Patientkey = art.PatientKey -LEFT JOIN NDWH.dbo.DimAgeGroup age on age.AgeGroupKey = art.AgeGroupKey -LEFT JOIN NDWH.dbo.DimFacility f on f.FacilityKey = art.FacilityKey -LEFT JOIN NDWH.dbo.DimAgency a on a.AgencyKey = art.AgencyKey -LEFT JOIN NDWH.dbo.DimPatient pat on pat.PatientKey = art.PatientKey -LEFT JOIN NDWH.dbo.DimPartner p on p.PartnerKey = art.PartnerKey -WHERE pat.IsTXCurr = 1 -GROUP BY - MFLCode, - f.FacilityName, - County, - SubCounty, - p.PartnerName, - a.AgencyName, - Gender, - age.DATIMAgeGroup, - StabilityAssessment, - DifferentiatedCare - -GO +select distinct + f.mflcode, + f.facilityname, + f.county, + f.subcounty, + p.partnername, + a.agencyname, + pat.gender, + age.datimagegroup as agegroup, + art.stabilityassessment, + art.differentiatedcare, + cast(getdate() as date) as loaddate, + sum(art.onmmd) as patients_onmmd, + sum(case when art.onmmd = 0 then 1 else 0 end) as patients_nonmmd, + count(art.stabilityassessment) as stability, + sum(pat.istxcurr) as txcurr +into reporting.dbo.aggregatedsd +from ndwh.dbo.factart as art +left join ndwh.dbo.factlatestobs as lob on art.patientkey = lob.patientkey +left join ndwh.dbo.dimagegroup as age on art.agegroupkey = age.agegroupkey +left join ndwh.dbo.dimfacility as f on art.facilitykey = f.facilitykey +left join ndwh.dbo.dimagency as a on art.agencykey = a.agencykey +left join ndwh.dbo.dimpatient as pat on art.patientkey = pat.patientkey +left join ndwh.dbo.dimpartner as p on art.partnerkey = p.partnerkey +where pat.istxcurr = 1 +group by + f.mflcode, + f.facilityname, + f.county, + f.subcounty, + p.partnername, + a.agencyname, + pat.gender, + age.datimagegroup, + art.stabilityassessment, + art.differentiatedcare +go diff --git a/Scripts/REPORTING/load_all_emr_sites.sql b/Scripts/REPORTING/load_all_emr_sites.sql index 0be3f0b9..4d2fd6a2 100644 --- a/Scripts/REPORTING/load_all_emr_sites.sql +++ b/Scripts/REPORTING/load_all_emr_sites.sql @@ -1,40 +1,38 @@ -IF OBJECT_ID(N'REPORTING.dbo.all_EMRSites', N'U') IS NOT NULL - DROP TABLE REPORTING.dbo.all_EMRSites; +if object_id(N'REPORTING.dbo.all_EMRSites', N'U') is not NULL + drop table reporting.dbo.all_emrsites; -WITH ModulesUptake AS ( - SELECT - MFLCode, - FacilityName, - SubCounty, - County, - isEMRSite, - PartnerName, - AgencyName, - isCT, - modules.isHTS, - isHTSML, - isIITML, - isOTZ, - isOVC, - isPMTCT, - isPrep, - fac.Latitude, - fac.Longitude, - EMR_Status, - modules.EMR, - modules.owner, - modules.InfrastructureType, - modules.KEPH_Level, - CAST(GETDATE() AS DATE) AS LoadDate - FROM NDWH.dbo.FactModulesuptake AS modules - LEFT JOIN NDWH.dbo.DimFacility fac ON fac.FacilityKey = modules.FacilityKey - LEFT JOIN NDWH.dbo.DimPartner pat ON pat.PartnerKey = modules.Partnerkey - LEFT JOIN NDWH.dbo.DimAgency agency ON agency.AgencyKey = modules.Agencykey +with modulesuptake as ( + select + fac.mflcode, + fac.facilityname, + fac.subcounty, + fac.county, + fac.isemrsite, + pat.partnername, + agency.agencyname, + modules.isct, + modules.ishts, + modules.ishtsml, + modules.isiitml, + modules.isotz, + modules.isovc, + modules.ispmtct, + modules.isprep, + fac.latitude, + fac.longitude, + fac.emr_status, + modules.emr, + modules.owner, + modules.infrastructuretype, + modules.keph_level, + cast(getdate() as date) as loaddate + from ndwh.dbo.factmodulesuptake as modules + left join ndwh.dbo.dimfacility as fac on modules.facilitykey = fac.facilitykey + left join ndwh.dbo.dimpartner as pat on modules.partnerkey = pat.partnerkey + left join ndwh.dbo.dimagency as agency on modules.agencykey = agency.agencykey ) - -SELECT * -INTO REPORTING.dbo.all_EMRSites -FROM ModulesUptake; - +select * +into reporting.dbo.all_emrsites +from modulesuptake;