From c4dc6e932b5792c8bfe8aba054665e7b5cbf4004 Mon Sep 17 00:00:00 2001 From: Mumo Ngungu Date: Mon, 14 Oct 2024 15:07:41 +0300 Subject: [PATCH 1/8] test --- .gitignore | 11 +++- .pre-commit-config.yaml | 13 ++++ Scripts/.sqlfluff | 19 ++++++ Scripts/REPORTING/load_aggregate_dsd.sql | 80 ++++++++++++------------ 4 files changed, 82 insertions(+), 41 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 Scripts/.sqlfluff diff --git a/.gitignore b/.gitignore index 25bdc372..dbf1abf3 100644 --- a/.gitignore +++ b/.gitignore @@ -216,4 +216,13 @@ pip-log.txt .mr.developer.cfg # SSIS Configs -SSIS/ \ No newline at end of file +SSIS/ + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..936cb0ac --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,13 @@ +repos: +- repo: https://github.com/sqlfluff/sqlfluff + rev: stable_version + hooks: + - id: sqlfluff-lint + # For dbt projects, this installs the dbt "extras". + # You will need to select the relevant dbt adapter for your dialect + # (https://docs.getdbt.com/docs/available-adapters): + # additional_dependencies: ['', 'sqlfluff-templater-dbt'] + - id: sqlfluff-fix + # Arbitrary arguments to show an example + # args: [--rules, "LT02,CP02"] + # additional_dependencies: ['', 'sqlfluff-templater-dbt'] diff --git a/Scripts/.sqlfluff b/Scripts/.sqlfluff new file mode 100644 index 00000000..a734dc39 --- /dev/null +++ b/Scripts/.sqlfluff @@ -0,0 +1,19 @@ +[sqlfluff] +dialect = tsql + +[sqlfluff:indentation] +tab_space_size = 2 + +[sqlfluff:rules:capitalisation.keywords] +capitalisation_policy = lower + +[sqlfluff:rules:capitalisation.identifiers] +extended_capitalisation_policy = pascal +unquoted_identifiers_policy = all + + +[sqlfluff:rules:aliasing.table] +aliasing.table = explicit + +[sqlfluff:rules:layout.set_operators] +set_operator_on_new_line = ['UNION', 'UNION ALL'] \ No newline at end of file diff --git a/Scripts/REPORTING/load_aggregate_dsd.sql b/Scripts/REPORTING/load_aggregate_dsd.sql index 512e770b..c9bc0249 100644 --- a/Scripts/REPORTING/load_aggregate_dsd.sql +++ b/Scripts/REPORTING/load_aggregate_dsd.sql @@ -1,42 +1,42 @@ -IF OBJECT_ID(N'[REPORTING].[dbo].[AggregateDSD]', N'U') IS NOT NULL - drop TABLE [REPORTING].[dbo].[AggregateDSD] -GO +if OBJECT_ID(N'[REPORTING].[dbo].[AggregateDSD]', N'U') is not null + drop table Reporting.Dbo.Aggregatedsd +go -SELECT DISTINCT - MFLCode, - f.FacilityName, - County, - SubCounty, - p.PartnerName, - a.AgencyName, - Gender, - age.DATIMAgeGroup as AgeGroup, - StabilityAssessment, - DifferentiatedCare, - SUM(onMMD) as patients_onMMD, - SUM(case when onMMD = 0 then 1 else 0 end) as patients_nonMMD, - COUNT(StabilityAssessment) AS Stability, - Sum(pat.isTXCurr) As TXCurr, - cast(getdate() as date) as LoadDate -INTO [REPORTING].[dbo].[AggregateDSD] -FROM NDWH.dbo.FactART as art -LEFT JOIN NDWH.dbo.FactLatestObs as lob on lob.Patientkey = art.PatientKey -LEFT JOIN NDWH.dbo.DimAgeGroup age on age.AgeGroupKey = art.AgeGroupKey -LEFT JOIN NDWH.dbo.DimFacility f on f.FacilityKey = art.FacilityKey -LEFT JOIN NDWH.dbo.DimAgency a on a.AgencyKey = art.AgencyKey -LEFT JOIN NDWH.dbo.DimPatient pat on pat.PatientKey = art.PatientKey -LEFT JOIN NDWH.dbo.DimPartner p on p.PartnerKey = art.PartnerKey -WHERE pat.IsTXCurr = 1 -GROUP BY - MFLCode, - f.FacilityName, - County, - SubCounty, - p.PartnerName, - a.AgencyName, - Gender, - age.DATIMAgeGroup, - StabilityAssessment, - DifferentiatedCare +select distinct + Mflcode, + F.Facilityname, + County, + Subcounty, + P.Partnername, + A.Agencyname, + Gender, + Age.Datimagegroup as Agegroup, + Stabilityassessment, + Differentiatedcare, + cast(GETDATE() as date) as Loaddate, + SUM(Onmmd) as Patients_Onmmd, + SUM(case when Onmmd = 0 then 1 else 0 end) as Patients_Nonmmd, + COUNT(Stabilityassessment) as Stability, + SUM(Pat.Istxcurr) as Txcurr +into Reporting.Dbo.Aggregatedsd +from Ndwh.Dbo.Factart as Art +left join Ndwh.Dbo.Factlatestobs as Lob on Art.Patientkey = Lob.Patientkey +left join Ndwh.Dbo.Dimagegroup as Age on Art.Agegroupkey = Age.Agegroupkey +left join Ndwh.Dbo.Dimfacility as F on Art.Facilitykey = F.Facilitykey +left join Ndwh.Dbo.Dimagency as A on Art.Agencykey = A.Agencykey +left join Ndwh.Dbo.Dimpatient as Pat on Art.Patientkey = Pat.Patientkey +left join Ndwh.Dbo.Dimpartner as P on Art.Partnerkey = P.Partnerkey +where Pat.Istxcurr = 1 +group by + Mflcode, + F.Facilityname, + County, + Subcounty, + P.Partnername, + A.Agencyname, + Gender, + Age.Datimagegroup, + Stabilityassessment, + Differentiatedcare -GO +go From 66be4f3136ce84749f46acd4ff034c8090ea1340 Mon Sep 17 00:00:00 2001 From: Mumo Ngungu Date: Mon, 14 Oct 2024 15:55:31 +0300 Subject: [PATCH 2/8] test sqlfluff --- .pre-commit-config.yaml | 2 +- README.md | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 936cb0ac..dc24511a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/sqlfluff/sqlfluff - rev: stable_version + rev: 3.2.3 hooks: - id: sqlfluff-lint # For dbt projects, this installs the dbt "extras". diff --git a/README.md b/README.md index 8b8044a5..f2c6ec07 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,15 @@ # dwh-etl dwapi-etl + +# Setting up sqlfluff + ## Requirements + - Make sure you have python 3.8 or higher + +## Setup + - Create a virtual python virtual environment by runing python3.8 -m venv (e.g. `python3.8 -m venv venv`) + - Activate virtual environment by running: source venv/Scripts/activate + - Install the following packages by running: + - pip install sqlfluff + - pip install pre-commit + + - Run `pre-commit install` to to set up the git hook scripts in the config file `.pre-commit-config.yaml \ No newline at end of file From 7663ff8ddcb678b14db4e99be81fd4534d0b0f3d Mon Sep 17 00:00:00 2001 From: Mumo Ngungu Date: Tue, 15 Oct 2024 10:15:59 +0300 Subject: [PATCH 3/8] test lint --- .pre-commit-config.yaml | 7 ------- README.md | 29 ++++++++++++++++++++--------- Scripts/.sqlfluff | 8 ++++++-- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dc24511a..86981551 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,11 +3,4 @@ repos: rev: 3.2.3 hooks: - id: sqlfluff-lint - # For dbt projects, this installs the dbt "extras". - # You will need to select the relevant dbt adapter for your dialect - # (https://docs.getdbt.com/docs/available-adapters): - # additional_dependencies: ['', 'sqlfluff-templater-dbt'] - id: sqlfluff-fix - # Arbitrary arguments to show an example - # args: [--rules, "LT02,CP02"] - # additional_dependencies: ['', 'sqlfluff-templater-dbt'] diff --git a/README.md b/README.md index f2c6ec07..169b908a 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,26 @@ # dwh-etl dwapi-etl -# Setting up sqlfluff + +# SQLFluff +SQLFluff is an open source, dialect-flexible and configurable SQL linter. Designed with ELT applications in mind, SQLFluff also works with Jinja templating and dbt. SQLFluff will auto-fix most linting errors, allowing you to focus your time on what matters. More documentation at: https://sqlfluff.com/ + +## Setting up sqlfluff ## Requirements - - Make sure you have python 3.8 or higher +- Make sure you have python 3.8 or higher + +### Setup +- Create a virtual python virtual environment by runing python3.8 -m venv (e.g. `python3.8 -m venv venv`) +- Activate virtual environment by running: source venv/Scripts/activate +- Install the following packages by running: + - pip install sqlfluff + - pip install pre-commit + +- Run `pre-commit install` to to set up the git hook scripts in the config file `.pre-commit-config.yaml -## Setup - - Create a virtual python virtual environment by runing python3.8 -m venv (e.g. `python3.8 -m venv venv`) - - Activate virtual environment by running: source venv/Scripts/activate - - Install the following packages by running: - - pip install sqlfluff - - pip install pre-commit +### Maintainance of the rules +The linting and fixing of the sql files is controlled by the config file `Scripts/.sqlfluff` +You can edit or add new rules on this file. The various rules can be found at https://docs.sqlfluff.com/en/2.1.3/rules.html# - - Run `pre-commit install` to to set up the git hook scripts in the config file `.pre-commit-config.yaml \ No newline at end of file +### Usage +On running your commit message, sqlfluff will run a lint and fix for any .sql files changed \ No newline at end of file diff --git a/Scripts/.sqlfluff b/Scripts/.sqlfluff index a734dc39..0f44dd4e 100644 --- a/Scripts/.sqlfluff +++ b/Scripts/.sqlfluff @@ -8,7 +8,7 @@ tab_space_size = 2 capitalisation_policy = lower [sqlfluff:rules:capitalisation.identifiers] -extended_capitalisation_policy = pascal +extended_capitalisation_policy = lower unquoted_identifiers_policy = all @@ -16,4 +16,8 @@ unquoted_identifiers_policy = all aliasing.table = explicit [sqlfluff:rules:layout.set_operators] -set_operator_on_new_line = ['UNION', 'UNION ALL'] \ No newline at end of file +set_operator_on_new_line = ['UNION', 'UNION ALL'] + + +[sqlfluff:rules:capitalisation.functions] +capitalisation_policy = lower \ No newline at end of file From a714725348185228b73abefd404e38b65bcf9ddf Mon Sep 17 00:00:00 2001 From: Mumo Ngungu Date: Tue, 15 Oct 2024 10:50:16 +0300 Subject: [PATCH 4/8] test lint --- Scripts/.sqlfluff | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Scripts/.sqlfluff b/Scripts/.sqlfluff index 0f44dd4e..9c421cc7 100644 --- a/Scripts/.sqlfluff +++ b/Scripts/.sqlfluff @@ -1,6 +1,9 @@ [sqlfluff] dialect = tsql +[sqlfluff] +exclude_rules = AM01 + [sqlfluff:indentation] tab_space_size = 2 @@ -20,4 +23,4 @@ set_operator_on_new_line = ['UNION', 'UNION ALL'] [sqlfluff:rules:capitalisation.functions] -capitalisation_policy = lower \ No newline at end of file +extended_capitalisation_policy = lower \ No newline at end of file From 49d5c7f948653e395d55b412be6c8c4bd7a0907d Mon Sep 17 00:00:00 2001 From: Mumo Ngungu Date: Tue, 15 Oct 2024 10:51:23 +0300 Subject: [PATCH 5/8] test lint --- Scripts/.sqlfluff | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Scripts/.sqlfluff b/Scripts/.sqlfluff index 9c421cc7..c2f8389e 100644 --- a/Scripts/.sqlfluff +++ b/Scripts/.sqlfluff @@ -1,8 +1,6 @@ [sqlfluff] dialect = tsql - -[sqlfluff] -exclude_rules = AM01 +exclude_rules = AM01 [sqlfluff:indentation] tab_space_size = 2 From 8006550e137c611de2c831ecd347ddd8fead67a0 Mon Sep 17 00:00:00 2001 From: Mumo Ngungu Date: Tue, 15 Oct 2024 10:51:34 +0300 Subject: [PATCH 6/8] test lint --- Scripts/REPORTING/load_aggregate_dsd.sql | 73 ++++++++++++------------ Scripts/REPORTING/load_all_emr_sites.sql | 70 +++++++++++------------ 2 files changed, 70 insertions(+), 73 deletions(-) diff --git a/Scripts/REPORTING/load_aggregate_dsd.sql b/Scripts/REPORTING/load_aggregate_dsd.sql index c9bc0249..5bd6080d 100644 --- a/Scripts/REPORTING/load_aggregate_dsd.sql +++ b/Scripts/REPORTING/load_aggregate_dsd.sql @@ -1,42 +1,41 @@ -if OBJECT_ID(N'[REPORTING].[dbo].[AggregateDSD]', N'U') is not null - drop table Reporting.Dbo.Aggregatedsd +if object_id(N'[REPORTING].[dbo].[AggregateDSD]', N'U') is not null + drop table reporting.dbo.aggregatedsd go select distinct - Mflcode, - F.Facilityname, - County, - Subcounty, - P.Partnername, - A.Agencyname, - Gender, - Age.Datimagegroup as Agegroup, - Stabilityassessment, - Differentiatedcare, - cast(GETDATE() as date) as Loaddate, - SUM(Onmmd) as Patients_Onmmd, - SUM(case when Onmmd = 0 then 1 else 0 end) as Patients_Nonmmd, - COUNT(Stabilityassessment) as Stability, - SUM(Pat.Istxcurr) as Txcurr -into Reporting.Dbo.Aggregatedsd -from Ndwh.Dbo.Factart as Art -left join Ndwh.Dbo.Factlatestobs as Lob on Art.Patientkey = Lob.Patientkey -left join Ndwh.Dbo.Dimagegroup as Age on Art.Agegroupkey = Age.Agegroupkey -left join Ndwh.Dbo.Dimfacility as F on Art.Facilitykey = F.Facilitykey -left join Ndwh.Dbo.Dimagency as A on Art.Agencykey = A.Agencykey -left join Ndwh.Dbo.Dimpatient as Pat on Art.Patientkey = Pat.Patientkey -left join Ndwh.Dbo.Dimpartner as P on Art.Partnerkey = P.Partnerkey -where Pat.Istxcurr = 1 + f.mflcode, + f.facilityname, + f.county, + f.subcounty, + p.partnername, + a.agencyname, + pat.gender, + age.datimagegroup as agegroup, + art.stabilityassessment, + art.differentiatedcare, + cast(getdate() as date) as loaddate, + sum(art.onmmd) as patients_onmmd, + sum(case when art.onmmd = 0 then 1 else 0 end) as patients_nonmmd, + count(art.stabilityassessment) as stability, + sum(pat.istxcurr) as txcurr +into reporting.dbo.aggregatedsd +from ndwh.dbo.factart as art +left join ndwh.dbo.factlatestobs as lob on art.patientkey = lob.patientkey +left join ndwh.dbo.dimagegroup as age on art.agegroupkey = age.agegroupkey +left join ndwh.dbo.dimfacility as f on art.facilitykey = f.facilitykey +left join ndwh.dbo.dimagency as a on art.agencykey = a.agencykey +left join ndwh.dbo.dimpatient as pat on art.patientkey = pat.patientkey +left join ndwh.dbo.dimpartner as p on art.partnerkey = p.partnerkey +where pat.istxcurr = 1 group by - Mflcode, - F.Facilityname, - County, - Subcounty, - P.Partnername, - A.Agencyname, - Gender, - Age.Datimagegroup, - Stabilityassessment, - Differentiatedcare - + f.mflcode, + f.facilityname, + f.county, + f.subcounty, + p.partnername, + a.agencyname, + pat.gender, + age.datimagegroup, + art.stabilityassessment, + art.differentiatedcare go diff --git a/Scripts/REPORTING/load_all_emr_sites.sql b/Scripts/REPORTING/load_all_emr_sites.sql index 0be3f0b9..4d2fd6a2 100644 --- a/Scripts/REPORTING/load_all_emr_sites.sql +++ b/Scripts/REPORTING/load_all_emr_sites.sql @@ -1,40 +1,38 @@ -IF OBJECT_ID(N'REPORTING.dbo.all_EMRSites', N'U') IS NOT NULL - DROP TABLE REPORTING.dbo.all_EMRSites; +if object_id(N'REPORTING.dbo.all_EMRSites', N'U') is not NULL + drop table reporting.dbo.all_emrsites; -WITH ModulesUptake AS ( - SELECT - MFLCode, - FacilityName, - SubCounty, - County, - isEMRSite, - PartnerName, - AgencyName, - isCT, - modules.isHTS, - isHTSML, - isIITML, - isOTZ, - isOVC, - isPMTCT, - isPrep, - fac.Latitude, - fac.Longitude, - EMR_Status, - modules.EMR, - modules.owner, - modules.InfrastructureType, - modules.KEPH_Level, - CAST(GETDATE() AS DATE) AS LoadDate - FROM NDWH.dbo.FactModulesuptake AS modules - LEFT JOIN NDWH.dbo.DimFacility fac ON fac.FacilityKey = modules.FacilityKey - LEFT JOIN NDWH.dbo.DimPartner pat ON pat.PartnerKey = modules.Partnerkey - LEFT JOIN NDWH.dbo.DimAgency agency ON agency.AgencyKey = modules.Agencykey +with modulesuptake as ( + select + fac.mflcode, + fac.facilityname, + fac.subcounty, + fac.county, + fac.isemrsite, + pat.partnername, + agency.agencyname, + modules.isct, + modules.ishts, + modules.ishtsml, + modules.isiitml, + modules.isotz, + modules.isovc, + modules.ispmtct, + modules.isprep, + fac.latitude, + fac.longitude, + fac.emr_status, + modules.emr, + modules.owner, + modules.infrastructuretype, + modules.keph_level, + cast(getdate() as date) as loaddate + from ndwh.dbo.factmodulesuptake as modules + left join ndwh.dbo.dimfacility as fac on modules.facilitykey = fac.facilitykey + left join ndwh.dbo.dimpartner as pat on modules.partnerkey = pat.partnerkey + left join ndwh.dbo.dimagency as agency on modules.agencykey = agency.agencykey ) - -SELECT * -INTO REPORTING.dbo.all_EMRSites -FROM ModulesUptake; - +select * +into reporting.dbo.all_emrsites +from modulesuptake; From eebf324129eab03db252863ecce22dadd9d2b76a Mon Sep 17 00:00:00 2001 From: Mumo Ngungu Date: Wed, 16 Oct 2024 09:09:09 +0300 Subject: [PATCH 7/8] fixing typo on the readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 169b908a..6a41e1c5 100644 --- a/README.md +++ b/README.md @@ -5,12 +5,12 @@ dwapi-etl # SQLFluff SQLFluff is an open source, dialect-flexible and configurable SQL linter. Designed with ELT applications in mind, SQLFluff also works with Jinja templating and dbt. SQLFluff will auto-fix most linting errors, allowing you to focus your time on what matters. More documentation at: https://sqlfluff.com/ -## Setting up sqlfluff +## Setting up sqlfluff locally ## Requirements - Make sure you have python 3.8 or higher ### Setup -- Create a virtual python virtual environment by runing python3.8 -m venv (e.g. `python3.8 -m venv venv`) +- Create a python virtual environment by runing python3.8 -m venv (e.g. `python3.8 -m venv venv`) - Activate virtual environment by running: source venv/Scripts/activate - Install the following packages by running: - pip install sqlfluff From 90633dda5c726c68d8ef2b79a2c4ec065e88c5be Mon Sep 17 00:00:00 2001 From: Mumo Ngungu Date: Wed, 16 Oct 2024 09:11:03 +0300 Subject: [PATCH 8/8] fixing typo on the readme --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 6a41e1c5..b2925cef 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,8 @@ dwapi-etl SQLFluff is an open source, dialect-flexible and configurable SQL linter. Designed with ELT applications in mind, SQLFluff also works with Jinja templating and dbt. SQLFluff will auto-fix most linting errors, allowing you to focus your time on what matters. More documentation at: https://sqlfluff.com/ ## Setting up sqlfluff locally - ## Requirements +### Requirements - Make sure you have python 3.8 or higher - ### Setup - Create a python virtual environment by runing python3.8 -m venv (e.g. `python3.8 -m venv venv`) - Activate virtual environment by running: source venv/Scripts/activate