Skip to content
This repository has been archived by the owner on Oct 23, 2024. It is now read-only.

Commit

Permalink
Full coverage for gcs util
Browse files Browse the repository at this point in the history
  • Loading branch information
varney committed Dec 22, 2023
1 parent 46d8422 commit 493f033
Show file tree
Hide file tree
Showing 4 changed files with 230 additions and 6 deletions.
6 changes: 3 additions & 3 deletions .c8rc
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"text",
"html"
],
"statements": "70",
"statements": "75",
"branches": "100",
"functions": "62",
"lines": "70"
"functions": "74",
"lines": "75"
}
6 changes: 6 additions & 0 deletions config/test.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
"projectId": "some-project-1234"
}
},
"gcsDataLake": {
"bucket": "test-data-lake-bucket",
"credentials": {
"projectId": "some-other-project-5678"
}
},
"ses": {
"accessKeyId": "REPLACED_BY_ENV",
"secretAccessKey": "REPLACED_BY_ENV"
Expand Down
2 changes: 1 addition & 1 deletion lib/utils/gcs.js
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ function lakeUri(
type,
version ?? "",
`year=${year}/month=${month}/day=${day}`,
`${fileName || type}${fileExt || ""}${compress ? ".gz" : ""}`,
`${fileName || type}${fileExt}${compress ? ".gz" : ""}`,
]
.filter(Boolean)
.join("/");
Expand Down
222 changes: 220 additions & 2 deletions test/unit/utils/gcs-test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import config from "exp-config";
import { fakeGcs } from "@bonniernews/lu-test";
import stream from "stream";

import {
createReadStream,
Expand All @@ -13,10 +15,226 @@ import {
toPath,
} from "../../../lib/utils/gcs.js";

describe("gcs", () => {
it("should credentials object", () => {
const { Readable, Writable, promises: { pipeline } } = stream;

const fileDate = "2020-01-01";
const pathDate = "year=2020/month=01/day=01";
const filename = "some-file.json";

const gcsBucket = config.gcs.bucket;
const gcsDirectory = `brand/some-namespace/some-area/some-classification/some-system/some-data/v1/${pathDate}`;

const filePath = `gs://${gcsBucket}/${gcsDirectory}/${filename}`;
const fileKey = `${gcsDirectory}/${filename}`;
const fileContent = "test content";

const dataLakePath = `gs://${config.gcsDataLake.bucket}/red/brand/all/greenfield/some-data/v1/${pathDate}/${filename}`;

describe("create read stream from GCS file", () => {
before(fakeGcs.reset);
let readStream;
it("should create a read stream from GCS", () => {
fakeGcs.mockFile(filePath, { content: fileContent });
readStream = createReadStream(filePath);
readStream.should.be.an.instanceOf(Readable);
});
it("should return the expected content", async () => {
const fileData = [];
await pipeline(readStream, async function* (iterable) {
for await (const row of iterable) {
if (!row) continue;
fileData.push(row);
}
yield;
});
fileData.join("\n").should.eql(fileContent);
});
});

describe("create write stream to GCS file", () => {
before(fakeGcs.reset);
let writeStream;
it("should create a write stream to GCS", () => {
fakeGcs.mockFile(filePath);
writeStream = createWriteStream(filePath);
writeStream.should.be.an.instanceOf(Writable);
});
it("should accept the content", async () => {
await pipeline(async function* () {
yield fileContent;
}, writeStream);
fakeGcs.written(filePath).should.eql(fileContent);
});
});

describe("check if GCS file exists", () => {
beforeEach(fakeGcs.reset);
it("should return that a non-existant file isn't there", async () => {
fakeGcs.mockFile(filePath);
const fileExists = await exists(filePath);
fileExists.should.eql(false);
});
});

describe("get GCS credentials", () => {
it("should get the credentials object from config", () => {
const credentials = getCredentials();
const expected = JSON.parse(JSON.stringify(config.gcs.credentials));
credentials.should.eql(expected);
});
});

describe("convert date to lake date", () => {
it("should return a date in a format we can use in a GCS path", () => {
const lakeDate = toLakeDate(fileDate);
lakeDate.should.eql(pathDate);
});
});

describe("get data lake path from parameters", () => {
it("should return a data lake GCS path based on input parameters", () => {
const gcsPath = lakeUri({
system: "greenfield",
type: "some-data",
compress: false,
fileExt: ".json",
fileName: "some-file",
version: "v1",
date: new Date(fileDate),
}, "gcsDataLake");
gcsPath.should.eql(dataLakePath);
});
it("should return a data lake GCS path without version", () => {
const gcsPath = lakeUri({
system: "greenfield",
type: "some-data",
compress: false,
fileExt: ".json",
fileName: "some-file",
date: new Date(fileDate),
}, "gcsDataLake");
gcsPath.should.eql(dataLakePath.replace("/v1", ""));
});
it("should return a data lake GCS path for a gzipped file", () => {
const gcsPath = lakeUri({
system: "greenfield",
type: "some-data",
compress: true,
fileExt: ".json",
fileName: "some-file",
version: "v1",
date: new Date(fileDate),
}, "gcsDataLake");
gcsPath.should.eql(`${dataLakePath}.gz`);
});
it("should return a data lake GCS path with default filename and extension", () => {
const gcsPath = lakeUri({
system: "greenfield",
type: "some-data",
compress: false,
version: "v1",
date: new Date(fileDate),
}, "gcsDataLake");
gcsPath.should.eql(dataLakePath.replace("some-file", "some-data"));
});
});

describe("parse a GCS file path", () => {
it("should return the bucket and key from a GCS path", () => {
const gcsPath = parseUri(filePath);
gcsPath.should.eql({ Bucket: gcsBucket, Key: fileKey });
});
it("should throw an error with an invalid bucket", () => {
try {
parseUri(filePath.replace(gcsBucket, "some-other-bucket"));
} catch (error) {
error.message.should.eql("Invalid gcs bucket some-other-bucket");
}
});
});

describe("list GCS path", () => {
beforeEach(fakeGcs.reset);
it("should return a list of files", async () => {
fakeGcs.mockFile(filePath, { content: fileContent });
const files = await list(`gs://${gcsBucket}/${gcsDirectory}/${filename}`);
files.should.eql(filePath);
});
it("should return an empty list when no files found", async () => {
fakeGcs.mockFile(filePath);
const files = await list(`gs://${gcsBucket}/${gcsDirectory}/${filename}`);
files.length.should.eql(0);
});
});

describe("get GCS file metadata based on file extension", () => {
beforeEach(fakeGcs.reset);
it("should return json for json files", async () => {
fakeGcs.mockFile(filePath, { content: fileContent });
const fileMetadata = await metadata(filePath);
fileMetadata.should.eql({ contentType: "application/json" });
});
it("should return return json lines for jsonl files", async () => {
const testFilePath = `${filePath}.jsonl`;
fakeGcs.mockFile(testFilePath, { content: fileContent });
const fileMetadata = await metadata(testFilePath);
fileMetadata.should.eql({ contentType: "application/x-ndjson" });
});
it("should return return csv for csv files", async () => {
const testFilePath = `${filePath}.csv`;
fakeGcs.mockFile(testFilePath, { content: fileContent });
const fileMetadata = await metadata(testFilePath);
fileMetadata.should.eql({ contentType: "text/csv" });
});
it("should return return default type for unknown files", async () => {
const testFilePath = `${filePath}.log`;
fakeGcs.mockFile(testFilePath, { content: fileContent });
const fileMetadata = await metadata(testFilePath);
fileMetadata.should.eql({ contentType: "text/plain" });
});
it("should return content encoding for gz files", async () => {
const gzipFilePath = `${filePath}.gz`;
fakeGcs.mockFile(gzipFilePath, { content: fileContent });
const fileMetadata = await metadata(gzipFilePath);
fileMetadata.should.eql({ contentEncoding: "gzip", contentType: "application/json" });
});
});

describe("generate Greenfield GCS path from parameters", () => {
it("should return a GCS path based on input parameters", () => {
const gcsPath = toPath("some-namespace",
fileDate,
gcsBucket,
"some-area",
"some-classification",
"some-system",
"some-data",
"v1",
filename
);
gcsPath.should.eql(filePath);
});
it("should return a GCS path with sub-directories", () => {
const gcsPath = toPath("some-namespace",
fileDate,
gcsBucket,
"some-area",
"some-classification",
"some-system",
"some-data",
"v1",
filename,
"sub-directory-1",
"sub-directory-2",
"sub-directory-3"
);
gcsPath.should.eql(filePath.replace(filename, `sub-directory-1/sub-directory-2/sub-directory-3/${filename}`));
});
it("should throw an error with an invalid bucket", () => {
try {
toPath("some-namespace", fileDate, "some-other-bucket");
} catch (error) {
error.message.should.eql("Invalid gcs bucket some-other-bucket");
}
});
});

0 comments on commit 493f033

Please sign in to comment.