-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathschema.sql
168 lines (142 loc) · 6.34 KB
/
schema.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
-- schema.sql
-- (c) 2020-2023 McGill University
-- authors: Romaine Gregoire (original SQLite version); David Lougheed
-- CONSENTS -------------------------------------------------------------------
create table if not exists term_consents
(
-- Core columns related to consents
id serial primary key,
issuer text not null,
sub text not null, -- subject - user ID within the issuer
version integer not null check (version > 0),
consent bool not null, -- has the user consented to this version of the terms
-- Extra profile information for identification
idp text not null default '', -- Blank until we get CILogon going
idp_name text not null default '', -- Blank until we get CILogon going
email text not null default '', -- Blank until we get CILogon going
-- Extra profile information, unstructured
extra jsonb not null default '{}'::jsonb,
UNIQUE (issuer, sub)
);
create table if not exists term_consents_ip
(
id serial primary key,
ip_addr text not null,
version integer not null check (version > 0),
consent bool not null, -- has the user consented to this version of the terms
ts timestamp not null default (now() at time zone 'utc'),
UNIQUE (ip_addr)
);
-------------------------------------------------------------------------------
-- ASSAYS ---------------------------------------------------------------------
create table if not exists assays
(
"id" smallserial primary key,
"name" text not null unique
);
insert into assays (name)
values ('RNA-seq'), ('ATAC-seq'), ('H3K4me1'), ('H3K4me3'), ('H3K27ac'), ('H3K27me3')
on conflict on constraint assays_name_key do nothing;
-------------------------------------------------------------------------------
-- GENES AND FEATURES ---------------------------------------------------------
create table if not exists genes
(
"id" serial primary key, -- use an integer rather than a gene name for space
"name_norm" varchar(31) not null unique,
"name" varchar(31) not null unique
);
create table if not exists features
(
-- Even though it's duplicating information, we use an int as the pk to
-- save on storage.
"id" serial primary key, -- use an integer rather than a gene name for space TODO: small serial?
"nat_id" varchar(50) not null unique,
"chrom" varchar(31) not null,
"start" integer not null,
"end" integer not null,
"strand" varchar(1),
"assay" smallint not null, -- put assay next to strand to keep tuple smaller
"gene" integer,
-- optional:
-- pre-computed, (presumably batch-corrected etc.) array of treatment-arrays of points for the peak to render in
-- the box plot, instead of pulling live from the bigWigs. treatments and samples MUST be in alphabetical order of
-- their IDs, e.g., [Flu: [<value for AF01>, ..., <value for EU01>], NI: [...]]
-- if the array is NULL, points SHOULD be pulled from bigWigs instead
"points" real[] default null,
foreign key ("assay") references assays ("id"),
foreign key ("gene") references genes ("id")
);
-------------------------------------------------------------------------------
-- PEAKS, SNPS, RELATED CACHED STUFF ------------------------------------------
create table if not exists snps
(
"id" serial primary key,
"nat_id" varchar(32) unique,
"chrom" varchar(31) not null,
"position" integer not null -- 32 bit - enough to contain
);
create table if not exists peaks
(
"id" serial primary key,
"snp" integer not null,
"feature" integer not null,
-- values as array of treatments (in alphabetical order), e.g., [<p_Flu>, <p_NI>]
"values" real[] not null,
foreign key ("snp") references snps ("id"),
foreign key ("feature") references features ("id")
);
create index if not exists peaks_snp_idx
on peaks("snp");
create index if not exists peaks_feature_idx
on peaks("feature");
create table if not exists features_by_snp
(
"id" serial primary key,
"snp" integer not null unique,
"minValueMin" real not null,
"mostSignificantFeatureID" integer not null, -- TODO: rename
"nFeatures" integer not null,
foreign key ("snp") references snps ("id"),
foreign key ("mostSignificantFeatureID") references peaks ("id")
);
create table if not exists features_by_gene
(
"id" serial primary key,
"gene" integer not null unique,
"minValueMin" real not null,
"mostSignificantFeatureID" integer not null, -- TODO: rename
"nFeatures" integer not null,
foreign key ("gene") references genes ("id"),
foreign key ("mostSignificantFeatureID") references peaks ("id")
);
-- create table if not exists features_by_position
-- (
-- id serial primary key,
-- chrom text not null,
-- position integer not null,
-- minValueMin double precision not null,
-- mostSignificantFeatureID integer not null,
-- nFeatures integer not null,
--
-- unique (chrom, position),
-- foreign key (mostSignificantFeatureID) references peaks (id)
-- );
-------------------------------------------------------------------------------
-- BINNED MOST SIGNIFICANT PEAKS (for Manhattan plots) ------------------------
create table if not exists binned_most_significant_peaks
(
"id" serial primary key,
"chrom" varchar(31) not null,
"pos_bin" integer not null, -- binned by SNP position
"peak" integer, -- nullable -> if null, no significant peak
foreign key ("peak") references peaks ("id")
);
-------------------------------------------------------------------------------
-- SESSIONS -------------------------------------------------------------------
create table if not exists sessions
(
"hash" varchar(32) primary key,
"samples" text not null,
"peak" text not null
);
-------------------------------------------------------------------------------