Skip to content

Commit fc85a2e

Browse files
authored
Support multischema for sanitize script (#42)
1 parent 9b93ac0 commit fc85a2e

File tree

1 file changed

+47
-38
lines changed

1 file changed

+47
-38
lines changed

sanitizer/sql/sanitize.sql

+47-38
Original file line numberDiff line numberDiff line change
@@ -7,79 +7,79 @@ create schema sanitizing;
77
-- - sanitize_value
88
-- - sanitize_nullable
99
-- - sanitize_jsonb
10-
create or replace function sanitizing.sanitize_email(_t varchar, _c varchar)
10+
create or replace function sanitizing.sanitize_email(_t varchar, _c varchar, _s varchar)
1111
returns void as
1212
$$
1313
declare
1414
affected numeric;
1515
begin
1616
execute format(
17-
'with data as (
18-
select
19-
id,
20-
string_agg(
21-
right(md5(e), 8) || substring(e, ''@.+$'') || ''t'',
22-
'',''
23-
) as sanitized
24-
from %1$I, unnest(regexp_split_to_array(%2$I, '','')) e
25-
group by id
26-
)
27-
update %1$I
28-
set %2$I = case when sanitized isnull then ''[email protected]'' else sanitized end
29-
from data
30-
where data.id = %1$I.id;', _t, _c);
17+
'with data as (
18+
select
19+
id,
20+
string_agg(
21+
right(md5(e), 8) || substring(e, ''@.+$'') || ''t'',
22+
'',''
23+
) as sanitized
24+
from %3$I.%1$I, unnest(regexp_split_to_array(%2$I, '','')) e
25+
group by id
26+
)
27+
update %3$I.%1$I
28+
set %2$I = case when sanitized isnull then ''[email protected]'' else sanitized end
29+
from data
30+
where data.id = %3$I.%1$I.id;', _t, _c, _s);
3131
get diagnostics affected = row_count;
3232
raise notice '%.% is sanitized: % rows affected', _t, _c, affected;
3333
end
3434
$$
3535
language plpgsql;
3636

37-
create or replace function sanitizing.sanitize_phone(_t varchar, _c varchar)
37+
create or replace function sanitizing.sanitize_phone(_t varchar, _c varchar, _s varchar)
3838
returns void as
3939
$$
4040
declare
4141
affected numeric;
4242
begin
43-
execute format('update %1$I set %2$I = ''93000000'' where %2$I is not null;', _t, _c);
43+
execute format('update %3$I.%1$I set %2$I = ''93000000'' where %2$I is not null;', _t, _c, _s);
4444
get diagnostics affected = row_count;
4545
raise notice '%.% is sanitized: % rows affected', _t, _c, affected;
4646
end
4747
$$
4848
language plpgsql;
4949

50-
create or replace function sanitizing.sanitize_value(_t varchar, _c varchar)
50+
create or replace function sanitizing.sanitize_value(_t varchar, _c varchar, _s varchar)
5151
returns void as
5252
$$
5353
declare
5454
affected numeric;
5555
begin
56-
execute format('update %1$I set %2$I = right(md5(%2$I), 12);', _t, _c);
56+
execute format('update %3$I.%1$I set %2$I = right(md5(%2$I), 12);', _t, _c, _s);
5757
get diagnostics affected = row_count;
5858
raise notice '%.% is sanitized: % rows affected', _t, _c, affected;
5959
end
6060
$$
6161
language plpgsql;
6262

63-
create or replace function sanitizing.sanitize_nullable(_t varchar, _c varchar)
63+
create or replace function sanitizing.sanitize_nullable(_t varchar, _c varchar, _s varchar)
6464
returns void as
6565
$$
6666
declare
6767
affected numeric;
6868
begin
69-
execute format('update %I set %I = null;', _t, _c);
69+
execute format('update %I.%I set %I = null;', _s, _t, _c);
7070
get diagnostics affected = row_count;
7171
raise notice '%.% is sanitized: % rows affected', _t, _c, affected;
7272
end
7373
$$
7474
language plpgsql;
7575

76-
create or replace function sanitizing.sanitize_as_empty_jsonb(_t varchar, _c varchar)
76+
create or replace function sanitizing.sanitize_as_empty_jsonb(_t varchar, _c varchar, _s varchar)
7777
returns void as
7878
$$
7979
declare
8080
affected numeric;
8181
begin
82-
execute format('update %I set %I = ''{}''::jsonb;', _t, _c);
82+
execute format('update %I.%I set %I = ''{}''::jsonb;', _s, _t, _c);
8383
get diagnostics affected = row_count;
8484
raise notice '%.% is sanitized: % rows affected', _t, _c, affected;
8585
end
@@ -117,13 +117,13 @@ end
117117
$$
118118
language plpgsql;
119119

120-
create or replace function sanitizing.sanitize_jsonb(_t varchar, _c varchar)
120+
create or replace function sanitizing.sanitize_jsonb(_t varchar, _c varchar, _s varchar)
121121
returns void as
122122
$$
123123
declare
124124
affected numeric;
125125
begin
126-
execute format('update %I set %I = sanitizing.get_sanitized_jsonb(%I);', _t, _c, _c);
126+
execute format('update %I.%I set %I = sanitizing.get_sanitized_jsonb(%I);', _s, _t, _c, _c);
127127
get diagnostics affected = row_count;
128128
raise notice '%.% is sanitized: % rows affected', _t, _c, affected;
129129
end
@@ -145,18 +145,22 @@ from pg_catalog.pg_statio_all_tables as st
145145
-- truncate table which are not needed for the development before the sanitizing (logs, emails etc);
146146
-- a table must have 'TRUNCATE_ON_SANITIZE' comment
147147
do $$
148-
declare t text;
149-
begin
150-
for t in select relname from pg_class
151-
where relkind = 'r' and obj_description(oid) = 'TRUNCATE_ON_SANITIZE' loop
152-
execute 'truncate table ' || quote_ident(t) || ' cascade'; -- cascade to drop formDataImages etc
153-
end loop;
154-
end
148+
declare r record;
149+
begin
150+
for r in select nspname, relname
151+
from pg_class
152+
join pg_namespace on pg_namespace.oid = pg_class.relnamespace
153+
join pg_description on pg_class.oid = pg_description.objoid
154+
where pg_description.description = 'TRUNCATE_ON_SANITIZE' and pg_class.relkind = 'r' loop
155+
execute format('truncate table %I.%I cascade', r.nspname, r.relname); -- cascade to drop formDataImages etc
156+
end loop;
157+
end
155158
$$;
156159

157160
-- call sanitize_email on every column containing SANITIZE_AS_EMAIL in the comment
158-
select sanitizing.sanitize_email(res.table_name :: varchar, res.column_name :: varchar)
161+
select sanitizing.sanitize_email(res.table_name :: varchar, res.column_name :: varchar, res.table_schema :: varchar)
159162
from (select
163+
c.table_schema,
160164
c.table_name,
161165
c.column_name
162166
from pg_catalog.pg_statio_all_tables as st
@@ -166,8 +170,9 @@ from (select
166170
where pgd.description ilike '%SANITIZE_AS_EMAIL%') as res;
167171

168172
-- call sanitize_value on every column containing SANITIZE_AS_VALUE in the comment
169-
select sanitizing.sanitize_value(res.table_name :: varchar, res.column_name :: varchar)
173+
select sanitizing.sanitize_value(res.table_name :: varchar, res.column_name :: varchar, res.table_schema :: varchar)
170174
from (select
175+
c.table_schema,
171176
c.table_name,
172177
c.column_name
173178
from pg_catalog.pg_statio_all_tables as st
@@ -177,8 +182,9 @@ from (select
177182
where pgd.description ilike '%SANITIZE_AS_VALUE%') as res;
178183

179184
-- call sanitize_phone on every column containing SANITIZE_AS_PHONE in the comment
180-
select sanitizing.sanitize_phone(res.table_name :: varchar, res.column_name :: varchar)
185+
select sanitizing.sanitize_phone(res.table_name :: varchar, res.column_name :: varchar, res.table_schema :: varchar)
181186
from (select
187+
c.table_schema,
182188
c.table_name,
183189
c.column_name
184190
from pg_catalog.pg_statio_all_tables as st
@@ -188,8 +194,9 @@ from (select
188194
where pgd.description ilike '%SANITIZE_AS_PHONE%') as res;
189195

190196
-- call sanitize_nullable on every column containing SANITIZE_AS_NULLABLE in the comment
191-
select sanitizing.sanitize_nullable(res.table_name :: varchar, res.column_name :: varchar)
197+
select sanitizing.sanitize_nullable(res.table_name :: varchar, res.column_name :: varchar, res.table_schema :: varchar)
192198
from (select
199+
c.table_schema,
193200
c.table_name,
194201
c.column_name
195202
from pg_catalog.pg_statio_all_tables as st
@@ -199,8 +206,9 @@ from (select
199206
where pgd.description ilike '%SANITIZE_AS_NULLABLE%') as res;
200207

201208
-- call sanitize_jsonb on every column containing SANITIZE_AS_JSONB in the comment
202-
select sanitizing.sanitize_jsonb(res.table_name :: varchar, res.column_name :: varchar)
209+
select sanitizing.sanitize_jsonb(res.table_name :: varchar, res.column_name :: varchar, res.table_schema :: varchar)
203210
from (select
211+
c.table_schema,
204212
c.table_name,
205213
c.column_name
206214
from pg_catalog.pg_statio_all_tables as st
@@ -210,8 +218,9 @@ from (select
210218
where pgd.description ilike '%SANITIZE_AS_JSONB%') as res;
211219

212220
-- call sanitize_as_empty_jsonb on every column containing SANITIZE_AS_EMPTY_JSONB in the comment
213-
select sanitizing.sanitize_as_empty_jsonb(res.table_name :: varchar, res.column_name :: varchar)
221+
select sanitizing.sanitize_as_empty_jsonb(res.table_name :: varchar, res.column_name :: varchar, res.table_schema :: varchar)
214222
from (select
223+
c.table_schema,
215224
c.table_name,
216225
c.column_name
217226
from pg_catalog.pg_statio_all_tables as st

0 commit comments

Comments
 (0)