Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor code and simplify file processing #48

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"pretest": "npm run build && node ./dist/src/bin/index.js ./__mocks__/content",
"test": "node --experimental-vm-modules node_modules/jest/bin/jest.js",
"build": "tsc --p tsconfig.lib.json",
"dev": "tsc --p tsconfig.lib.json --watch",
"changeset": "changeset",
"prepublishOnly": "npm run build",
"release": "changeset publish"
Expand Down Expand Up @@ -55,6 +56,7 @@
"@changesets/changelog-github": "^0.4.8",
"@changesets/cli": "^2.26.1",
"@types/jest": "^29.5.1",
"@types/node": "^20.8.7",
"@typescript-eslint/eslint-plugin": "^5.59.5",
"@typescript-eslint/parser": "^5.59.5",
"eslint": "^8.40.0",
Expand Down
File renamed without changes.
116 changes: 116 additions & 0 deletions src/lib/DbQueryManager.ts
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd recommend avoiding refactoring the DB here - let's just focus on process.ts as discussed in the issue.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd leave the refactoring done so far in this PR. It makes the code much easier to reason about.

Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import { Knex } from "knex";
import { MddbFile, MddbTag, MddbLink } from "./schema.js";
import { FilesQuery } from "./types/DbQueryTypes.js";

export class DbQueryManager {
private db: Knex;

constructor(db: Knex) {
this.db = db;
}

async getFileById(id: string): Promise<MddbFile | null> {
const file = await this.db.from("files").where("_id", id).first();
return new MddbFile(file);
}

async getFileByUrl(url: string): Promise<MddbFile | null> {
const file = await this.db
.from("files")
.where("url_path", encodeURI(url))
.first();
return new MddbFile(file);
}

async getFiles(query?: FilesQuery): Promise<MddbFile[]> {
const { filetypes, tags, extensions, folder, frontmatter } = query || {};

const files = await this.db
// TODO join only if tags are specified ?
.leftJoin("file_tags", "files._id", "file_tags.file")
.where((builder) => {
// TODO temporary solution before we have a proper way to filter files by and assign file types
if (folder) {
builder.whereLike("url_path", `${folder}/%`);
}

if (tags) {
builder.whereIn("tag", tags);
}

if (extensions) {
builder.whereIn("extension", extensions);
}

if (filetypes) {
builder.whereIn("filetype", filetypes);
}

if (frontmatter) {
Object.entries(frontmatter).forEach(([key, value]) => {
if (typeof value === "string" || typeof value === "number") {
builder.whereRaw(`json_extract(metadata, '$.${key}') = ?`, [
value,
]);
} else if (typeof value === "boolean") {
if (value) {
builder.whereRaw(`json_extract(metadata, '$.${key}') = ?`, [
true,
]);
} else {
builder.where(function () {
this.whereRaw(`json_extract(metadata, '$.${key}') = ?`, [
false,
]).orWhereRaw(`json_extract(metadata, '$.${key}') IS NULL`);
});
}
}

// To check if the provided value exists in an array inside the JSON
else {
builder.whereRaw(`json_extract(metadata, '$.${key}') LIKE ?`, [
`%${value}%`,
]);
}
});
}
})
.select("files.*")
.from("files")
.groupBy("_id");

return files.map((file) => new MddbFile(file));
}

async getTags(): Promise<MddbTag[]> {
const tags = await this.db("tags").select();
return tags.map((tag) => new MddbTag(tag));
}

async getLinks(query?: {
fileId: string;
linkType?: "normal" | "embed";
direction?: "forward" | "backward";
}): Promise<MddbLink[]> {
const { fileId, direction = "forward", linkType } = query || {};
const joinKey = direction === "forward" ? "from" : "to";
const where = {
[joinKey]: fileId,
};
if (linkType) {
where["link_type"] = linkType;
}
const dbLinks = await this.db
.select("links.*")
.from("links")
.rightJoin("files", `links.${joinKey}`, "=", "files._id")
.where(where);

const links = dbLinks.map((link) => new MddbLink(link));
return links;
}

_destroyDb() {
this.db.destroy();
}
}
79 changes: 79 additions & 0 deletions src/lib/indexFolderToObjects.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import { getUniqueValues, recursiveWalkDir } from "../utils/index.js";
import type { WikiLink } from "../utils/index.js";
import { extractFileSchemeFromObject } from "../utils/extractFileSchemeFromObject.js";
import { readLocalMarkdownFileToObject } from "./readLocalMarkdownFileToObject.js";
import type { File, FileTag, Link } from "./types/schemaTypes.js";

export function indexFolderToObjects(
folderPath: string,
pathToUrlResolver: (filePath: string) => string,
ignorePatterns?: RegExp[]
) {
const filePathsToIndex = recursiveWalkDir(folderPath);
const files: File[] = [];
const tags: string[] = [];
const fileTags: FileTag[] = [];
const links: Link[] = [];
const filteredFilePathsToIndex = filePathsToIndex.filter((filePath) =>
shouldIncludeFile(filePath, ignorePatterns)
);

for (const filePath of filteredFilePathsToIndex) {
const fileObject = readLocalMarkdownFileToObject(
folderPath,
filePath,
filePathsToIndex,
pathToUrlResolver
);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks a bit awkward imo... Both the naming, the arguments it takes and unnecesarily adds a layer of abstraction. I wonder if we could get rid of the readLocalMarkdownFileToObject whatsoever. This could just be:

Suggested change
const fileObject = readLocalMarkdownFileToObject(
folderPath,
filePath,
filePathsToIndex,
pathToUrlResolver
);
const id = generateFileIdFromPath(filePath);
const extension = getFileExtensionFromPath(filePath);
if (MddbFile.supportedExtensions.includes(extension)) {
return { id, extension, filePath };
}
const pathRelativeToFolder = path.relative(folderPath, filePath);
const urlPath = pathToUrlResolver(pathRelativeToFolder);
const data = parseMarkdownFile(filePath)
...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I agree.
This could benefit from a slight refactor...


const file = extractFileSchemeFromObject(fileObject);
Copy link
Member

@olayway olayway Nov 15, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd suggest "marshalling" (or molding the object into the ready-to-insert-to-db structure) is done by relevant Mddb* classes (in this case MddbFile). So I'd just add the fileObject to the list and let MddbFile do the rest. And if you'd really want to do this here, creating a function for it is not necessary, as you could just destructure what's needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm ready to proceed, but I have a question about the structure of the data in the JSON files. Do you think it should mirror the format of the database? If they should align, we might want to keep things as they are. However, if variations are acceptable, I am more than willing to implement the modifications based on your recommended alternative structure.

files.push(file);

tags.push(...fileObject.tags);

const fileTagsToInsert = fileObject.tags.map((tag) => ({
tag: tag,
file: fileObject._id,
}));
fileTags.push(...fileTagsToInsert);

const linksToInsert: Link[] = processWikiLinks(
fileObject.links,
fileObject._id,
filePathsToIndex
);
links.push(...linksToInsert);
}

const uniqueTags = getUniqueValues(tags);
const TagsToInsert = uniqueTags.map((tag) => ({ name: tag }));
return {
files: files,
tags: TagsToInsert,
fileTags: fileTags,
links: links,
};
}

function processWikiLinks(
links: WikiLink[],
fileId: string,
filePathsToIndex: string[]
): Link[] {
return links
.map((link) => ({
from: fileId,
to: filePathsToIndex.find((file) => file === link.linkSrc)!,

Check warning on line 66 in src/lib/indexFolderToObjects.ts

View workflow job for this annotation

GitHub Actions / Lint & format check

Forbidden non-null assertion
link_type: link.linkType,
}))
.filter((link) => link.to !== undefined);
}

function shouldIncludeFile(
filePath: string,
ignorePatterns?: RegExp[]
): boolean {
return !(
ignorePatterns && ignorePatterns.some((pattern) => pattern.test(filePath))
);
}
3 changes: 2 additions & 1 deletion src/lib/markdowndb.spec.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
// import knex from "knex";
import { MarkdownDB } from "./markdowndb";
import { File, MddbFile, Table } from "./schema";
import { MddbFile } from "./schema";
import { recursiveWalkDir } from "../utils";
import { File, Table } from "./types/schemaTypes";

/**
* @jest-environment node
Expand Down Expand Up @@ -173,14 +174,14 @@
test("can find file by url path", async () => {
const dbFile = await mddb.getFileByUrl("blog/blog2");
expect(dbFile).not.toBeNull();
expect(dbFile!.url_path).toBe("blog/blog2");

Check warning on line 177 in src/lib/markdowndb.spec.ts

View workflow job for this annotation

GitHub Actions / Lint & format check

Forbidden non-null assertion
});

test("can find file by id", async () => {
const dbFile = await mddb.getFileByUrl("blog/blog2");
const dbFileById = await mddb.getFileById(dbFile!._id);

Check warning on line 182 in src/lib/markdowndb.spec.ts

View workflow job for this annotation

GitHub Actions / Lint & format check

Forbidden non-null assertion
expect(dbFileById).not.toBeNull();
expect(dbFileById!.url_path).toBe("blog/blog2");

Check warning on line 184 in src/lib/markdowndb.spec.ts

View workflow job for this annotation

GitHub Actions / Lint & format check

Forbidden non-null assertion
});
});

Expand Down Expand Up @@ -208,10 +209,10 @@
const toFile = await mddb.getFileByUrl("blog0");

const forwardLinks = await mddb.getLinks({
fileId: fromFile!._id,

Check warning on line 212 in src/lib/markdowndb.spec.ts

View workflow job for this annotation

GitHub Actions / Lint & format check

Forbidden non-null assertion
});
expect(forwardLinks.length).toBe(1);
expect(forwardLinks[0].to).toBe(toFile!._id);

Check warning on line 215 in src/lib/markdowndb.spec.ts

View workflow job for this annotation

GitHub Actions / Lint & format check

Forbidden non-null assertion
});

test("can get all backward links of a file", async () => {
Expand All @@ -220,13 +221,13 @@
const fromFile2 = await mddb.getFileByUrl("blog/blog1");

const backwardLinks = await mddb.getLinks({
fileId: toFile!._id,

Check warning on line 224 in src/lib/markdowndb.spec.ts

View workflow job for this annotation

GitHub Actions / Lint & format check

Forbidden non-null assertion
direction: "backward",
});
const backwardLinksFileIds = backwardLinks.map((l) => l.from);
expect(backwardLinksFileIds).toHaveLength(2);
expect(backwardLinksFileIds).toContain(fromFile1!._id);

Check warning on line 229 in src/lib/markdowndb.spec.ts

View workflow job for this annotation

GitHub Actions / Lint & format check

Forbidden non-null assertion
expect(backwardLinksFileIds).toContain(fromFile2!._id);

Check warning on line 230 in src/lib/markdowndb.spec.ts

View workflow job for this annotation

GitHub Actions / Lint & format check

Forbidden non-null assertion
});
});

Expand Down Expand Up @@ -268,7 +269,7 @@
},
];
// TODO fix types
expect(() => MddbFile.batchInsert(mddb as any, files)).toThrow();

Check warning on line 272 in src/lib/markdowndb.spec.ts

View workflow job for this annotation

GitHub Actions / Lint & format check

Unexpected any. Specify a different type
});
});
});
Expand Down
Loading
Loading