-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsamples.mjs
79 lines (59 loc) · 2.21 KB
/
samples.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
// noinspection JSUnusedGlobalSymbols
/*
* samples.js
*/
import Tabix from "@gmod/tabix";
import VCF from "@gmod/vcf";
import config from "../config.js";
import envConfig from "../envConfig.js";
import {
GENOTYPE_STATE_HET,
GENOTYPE_STATE_HOM,
GENOTYPE_STATE_REF,
} from "../helpers/genome.mjs";
const { TabixIndexedFile } = Tabix;
const VCF_TABIX_FILE = new TabixIndexedFile({ path: envConfig.GENOTYPE_VCF_PATH });
const vcfParser = new VCF.default({ header: await VCF_TABIX_FILE.getHeader() });
const vcfFilterFn = config.samples?.vcfFindFn
?? ((line) => line.REF.length === 1 && line.ALT.every((a) => a.length === 1));
const vcfChrTransform = config.samples?.vcfChrTransform ?? ((chr) => chr);
export default {
queryMap,
};
export function queryMap(chrom, start, end = start) {
return vcfQuery(vcfChrTransform(chrom), parseInt(start.toString(), 10), parseInt(end.toString(), 10))
.then(normalizeSamplesMap);
}
export async function vcfQuery(contig, start, end) {
const lines = [];
// tabix JS takes in 0-based half-open coordinates, which we convert from queryMap taking 1-based closed coordinates
await VCF_TABIX_FILE.getLines(contig, start - 1, end, line => lines.push(vcfParser.parseLine(line)));
console.info(`queried vcf: ${contig}:${start}-${end}; got ${lines.length} lines`);
return lines;
}
export function normalizeSamplesMap(lines) {
const variant = lines.find(vcfFilterFn);
if (!variant) {
console.error(`could not find any variants (got ${lines?.length ?? 'undefined'} lines)`);
return undefined;
}
const variantData = {
chrom: variant.CHROM,
start: variant.POS,
end: variant.POS + variant.REF.length,
ref: variant.REF,
alts: variant.ALT,
};
const res = { ...variantData, samples: {} };
const variants = variant.SAMPLES;
Object.entries(variants).forEach(([ sampleID, record ]) => {
const value = record.GT[0];
const gt = value.split(/[|/]/);
if (gt.length !== 2) return; // TODO: only works for diploid
const type = (gt[0] === "0" && gt[1] === "0")
? GENOTYPE_STATE_REF
: (gt[0] !== gt[1] ? GENOTYPE_STATE_HET : GENOTYPE_STATE_HOM);
res.samples[sampleID] = { value, type, variant: variantData };
});
return res;
}