Skip to content

Commit

Permalink
get loaded loaders
Browse files Browse the repository at this point in the history
  • Loading branch information
adhityan committed May 30, 2024
1 parent 0ca2330 commit 4951680
Show file tree
Hide file tree
Showing 20 changed files with 3,893 additions and 1,843 deletions.
5,516 changes: 3,748 additions & 1,768 deletions package-lock.json

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@llm-tools/embedjs",
"version": "0.0.80",
"version": "0.0.81",
"description": "A NodeJS RAG framework to easily work with LLMs and custom datasets",
"main": "dist/index.js",
"types": "dist/index.d.ts",
Expand Down Expand Up @@ -59,7 +59,7 @@
"@langchain/anthropic": "^0.1.21",
"@langchain/cohere": "^0.0.10",
"@langchain/community": "^0.2.4",
"@langchain/core": "^0.2.2",
"@langchain/core": "^0.2.4",
"@langchain/google-vertexai": "^0.0.17",
"@langchain/mistralai": "^0.0.22",
"@langchain/openai": "^0.0.34",
Expand Down Expand Up @@ -90,7 +90,7 @@
"@typescript-eslint/eslint-plugin": "^7.11.0",
"@typescript-eslint/parser": "^7.11.0",
"chromadb": "^1.8.1",
"cohere-ai": "^7.10.1",
"cohere-ai": "^7.10.2",
"eslint": "^8.57.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-prettier": "^5.1.3",
Expand All @@ -109,7 +109,7 @@
"@pinecone-database/pinecone": "^2.2.1",
"@qdrant/js-client-rest": "^1.9.0",
"chromadb": "^1.8.1",
"cohere-ai": "^7.10.1",
"cohere-ai": "^7.10.2",
"hnswlib-node": "^3.0.0",
"ioredis": "^5.4.1",
"lmdb": "^3.0.11",
Expand Down
2 changes: 1 addition & 1 deletion src/core/dynamic-loader-selector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ export class DynamicLoader {
return new UrlLoader({ url: loader });
} else if (fs.existsSync(path.resolve(loader))) {
DynamicLoader.debug('Loader is a valid path on local filesystem!');
return new LocalPathLoader({ path: loader });
return new LocalPathLoader({ path: path.resolve(loader) });
} else if (isValidJson(loader)) {
DynamicLoader.debug('Loader is a valid JSON!');
return new JsonLoader({ object: JSON.parse(loader) });
Expand Down
100 changes: 52 additions & 48 deletions src/core/rag-application.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,15 @@ export class RAGApplication {
return RAGEmbedding.getEmbedding().embedDocuments(texts);
}

/**
* The function `getChunkUniqueId` generates a unique identifier by combining a loader unique ID and
* an increment ID.
* @param {string} loaderUniqueId - A unique identifier for the loader.
* @param {number} incrementId - The `incrementId` parameter is a number that represents the
* increment value used to generate a unique chunk identifier.
* @returns The function `getChunkUniqueId` returns a string that combines the `loaderUniqueId` and
* `incrementId`.
*/
/**
* The function `getChunkUniqueId` generates a unique identifier by combining a loader unique ID and
* an increment ID.
* @param {string} loaderUniqueId - A unique identifier for the loader.
* @param {number} incrementId - The `incrementId` parameter is a number that represents the
* increment value used to generate a unique chunk identifier.
* @returns The function `getChunkUniqueId` returns a string that combines the `loaderUniqueId` and
* `incrementId`.
*/
private getChunkUniqueId(loaderUniqueId: string, incrementId: number) {
return `${loaderUniqueId}_${incrementId}`;
}
Expand Down Expand Up @@ -94,31 +94,31 @@ export class RAGApplication {
this.debug('Initialized pre-loaders');
}

/**
* The function `addLoader` asynchronously initalizes a loader using the provided parameters and adds
* it to the system.
* @param {LoaderParam} loaderParam - The `loaderParam` parameter is a string, object or instance of BaseLoader
* that contains the necessary information to create a loader.
* @returns The function `addLoader` returns an object with the following properties:
* - `entriesAdded`: Number of new entries added during the loader operation
* - `uniqueId`: Unique identifier of the loader
* - `loaderType`: Name of the loader's constructor class
*/
/**
* The function `addLoader` asynchronously initalizes a loader using the provided parameters and adds
* it to the system.
* @param {LoaderParam} loaderParam - The `loaderParam` parameter is a string, object or instance of BaseLoader
* that contains the necessary information to create a loader.
* @returns The function `addLoader` returns an object with the following properties:
* - `entriesAdded`: Number of new entries added during the loader operation
* - `uniqueId`: Unique identifier of the loader
* - `loaderType`: Name of the loader's constructor class
*/
public async addLoader(loaderParam: LoaderParam): Promise<AddLoaderReturn> {
const loader = await DynamicLoader.createLoader(loaderParam);
return this._addLoader(loader);
}
/**
* The function `_addLoader` asynchronously adds a loader, processes its chunks, and handles
* incremental loading if supported by the loader.
* @param {BaseLoader} loader - The `loader` parameter in the `_addLoader` method is an instance of the
* `BaseLoader` class.
* @returns The function `_addLoader` returns an object with the following properties:
* - `entriesAdded`: Number of new entries added during the loader operation
* - `uniqueId`: Unique identifier of the loader
* - `loaderType`: Name of the loader's constructor class
*/

/**
* The function `_addLoader` asynchronously adds a loader, processes its chunks, and handles
* incremental loading if supported by the loader.
* @param {BaseLoader} loader - The `loader` parameter in the `_addLoader` method is an instance of the
* `BaseLoader` class.
* @returns The function `_addLoader` returns an object with the following properties:
* - `entriesAdded`: Number of new entries added during the loader operation
* - `uniqueId`: Unique identifier of the loader
* - `loaderType`: Name of the loader's constructor class
*/
private async _addLoader(loader: BaseLoader): Promise<AddLoaderReturn> {
const uniqueId = loader.getUniqueId();
this.debug('Add loader called for', uniqueId);
Expand Down Expand Up @@ -153,19 +153,23 @@ export class RAGApplication {
return { entriesAdded: newInserts, uniqueId, loaderType: loader.constructor.name };
}

/**
* The `incrementalLoader` function asynchronously processes incremental chunks for a loader.
* @param {string} uniqueId - The `uniqueId` parameter is a string that serves as an identifier for
* the loader.
* @param incrementalGenerator - The `incrementalGenerator` parameter is an asynchronous generator
* function that yields `LoaderChunk` objects. It is used to incrementally load chunks of data for a specific loader
*/
/**
* The `incrementalLoader` function asynchronously processes incremental chunks for a loader.
* @param {string} uniqueId - The `uniqueId` parameter is a string that serves as an identifier for
* the loader.
* @param incrementalGenerator - The `incrementalGenerator` parameter is an asynchronous generator
* function that yields `LoaderChunk` objects. It is used to incrementally load chunks of data for a specific loader
*/
private async incrementalLoader(uniqueId: string, incrementalGenerator: AsyncGenerator<LoaderChunk, void, void>) {
this.debug(`incrementalChunkAvailable for loader`, uniqueId);
const { newInserts } = await this.batchLoadChunks(uniqueId, incrementalGenerator);
this.debug(`${newInserts} new incrementalChunks processed`, uniqueId);
}

public async getLoaders() {
return BaseLoader.getLoadersList();
}

/**
* The function `deleteLoader` deletes embeddings from a loader after confirming the action.
* @param {string} uniqueLoaderId - The `uniqueLoaderId` parameter is a string that represents the
Expand Down Expand Up @@ -288,17 +292,17 @@ export class RAGApplication {
return true;
}

/**
* The function `getEmbeddings` retrieves embeddings for a query, performs similarity search,
* filters and sorts the results based on relevance score, and returns a subset of the top results.
* @param {string} cleanQuery - The `cleanQuery` parameter is a string that represents the query
* input after it has been cleaned or processed to remove any unnecessary characters, symbols, or
* noise. This clean query is then used to generate embeddings for similarity search.
* @returns The `getEmbeddings` function returns a filtered and sorted array of search results based
* on the similarity score of the query embedded in the cleanQuery string. The results are filtered
* based on a relevance cutoff value, sorted in descending order of score, and then sliced to return
* only the number of results specified by the `searchResultCount` property.
*/
/**
* The function `getEmbeddings` retrieves embeddings for a query, performs similarity search,
* filters and sorts the results based on relevance score, and returns a subset of the top results.
* @param {string} cleanQuery - The `cleanQuery` parameter is a string that represents the query
* input after it has been cleaned or processed to remove any unnecessary characters, symbols, or
* noise. This clean query is then used to generate embeddings for similarity search.
* @returns The `getEmbeddings` function returns a filtered and sorted array of search results based
* on the similarity score of the query embedded in the cleanQuery string. The results are filtered
* based on a relevance cutoff value, sorted in descending order of score, and then sliced to return
* only the number of results specified by the `searchResultCount` property.
*/
public async getEmbeddings(cleanQuery: string) {
const queryEmbedded = await RAGEmbedding.getEmbedding().embedQuery(cleanQuery);
const unfilteredResultSet = await this.vectorDb.similaritySearch(queryEmbedded, this.searchResultCount + 10);
Expand Down
6 changes: 6 additions & 0 deletions src/global/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,9 @@ export type ConversationHistory = {
message: string;
sender: 'HUMAN' | 'AI' | 'SYSTEM';
};

export type LoaderList = {
type: string;
uniqueId: string;
loaderMetadata: Record<string, unknown>;
}[];
66 changes: 59 additions & 7 deletions src/interfaces/base-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,76 @@ import md5 from 'md5';
import createDebugMessages from 'debug';
import { EventEmitter } from 'node:events';

import { LoaderChunk, UnfilteredLoaderChunk } from '../global/types.js';
import { LoaderChunk, LoaderList, UnfilteredLoaderChunk } from '../global/types.js';
import { BaseCache } from './base-cache.js';

export abstract class BaseLoader<
T extends Record<string, string | number | boolean> = Record<string, string | number | boolean>,
M extends Record<string, unknown> = Record<string, null>,
> extends EventEmitter {
private static cache?: BaseCache;
private static cache?: Pick<
BaseCache,
'loaderCustomDelete' | 'loaderCustomGet' | 'loaderCustomHas' | 'loaderCustomSet'
>;
private static readonly LOADERS_LIST_CACHE_KEY = 'LOADERS_LIST_CACHE_KEY';

public static setCache(cache?: BaseCache) {
BaseLoader.cache = cache;
}

private static async recordLoaderInCache(
loaderName: string,
uniqueId: string,
loaderMetadata: Record<string, unknown>,
) {
if (!BaseLoader.cache) return;

if (await BaseLoader.cache.loaderCustomHas(BaseLoader.LOADERS_LIST_CACHE_KEY)) {
const current = await BaseLoader.cache.loaderCustomGet<{ list: LoaderList }>(
BaseLoader.LOADERS_LIST_CACHE_KEY,
);

current.list.push({
type: loaderName,
uniqueId,
loaderMetadata,
});

current.list = [...new Map(current.list.map((item) => [item.uniqueId, item])).values()];
BaseLoader.cache.loaderCustomSet(BaseLoader.LOADERS_LIST_CACHE_KEY, current);
} else {
BaseLoader.cache.loaderCustomSet<{ list: LoaderList }>(BaseLoader.LOADERS_LIST_CACHE_KEY, {
list: [
{
type: loaderName,
uniqueId,
loaderMetadata,
},
],
});
}
}

public static async getLoadersList() {
if (!BaseLoader.cache) return null;

if (await BaseLoader.cache.loaderCustomHas(BaseLoader.LOADERS_LIST_CACHE_KEY)) {
const current = await BaseLoader.cache.loaderCustomGet<{ list: LoaderList }>(
BaseLoader.LOADERS_LIST_CACHE_KEY,
);

return current.list;
} else return <LoaderList>[];
}

protected readonly uniqueId: string;
private readonly _canIncrementallyLoad: boolean;
protected readonly chunkOverlap: number;
protected readonly chunkSize: number;

constructor(
uniqueId: string,
loaderMetadata: Record<string, unknown>,
chunkSize: number = 5,
chunkOverlap: number = 0,
canIncrementallyLoad: boolean = false,
Expand All @@ -32,6 +82,8 @@ export abstract class BaseLoader<
this._canIncrementallyLoad = canIncrementallyLoad;
this.chunkOverlap = chunkOverlap;
this.chunkSize = chunkSize;

BaseLoader.recordLoaderInCache(this.constructor.name, uniqueId, loaderMetadata);
createDebugMessages('embedjs:loader:BaseLoader')(`New loader class initalized with key ${uniqueId}`);
}

Expand All @@ -41,7 +93,7 @@ export abstract class BaseLoader<
return this._canIncrementallyLoad;
}

getUniqueId(): string {
public getUniqueId(): string {
return this.uniqueId;
}

Expand Down Expand Up @@ -73,10 +125,10 @@ export abstract class BaseLoader<
this.emit('incrementalChunkAvailable', incrementalGenerator);
}

/**
* This TypeScript function asynchronously processes chunks of data, cleans up the content,
* calculates a content hash, and yields the modified chunks.
*/
/**
* This TypeScript function asynchronously processes chunks of data, cleans up the content,
* calculates a content hash, and yields the modified chunks.
*/
public async *getChunks(): AsyncGenerator<LoaderChunk<T>, void, void> {
const chunks = await this.getUnfilteredChunks();

Expand Down
2 changes: 1 addition & 1 deletion src/loaders/confluence-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }> {
chunkSize?: number;
chunkOverlap?: number;
}) {
super(`ConfluenceLoader_${md5(spaceNames.join(','))}`, chunkSize ?? 2000, chunkOverlap ?? 200);
super(`ConfluenceLoader_${md5(spaceNames.join(','))}`, { spaceNames }, chunkSize ?? 2000, chunkOverlap ?? 200);

this.spaceNames = spaceNames;
this.confluenceBaseUrl = confluenceBaseUrl ?? process.env.CONFLUENCE_BASE_URL;
Expand Down
2 changes: 1 addition & 1 deletion src/loaders/docx-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export class DocxLoader extends BaseLoader<{ type: 'DocxLoader' }> {
chunkSize?: number;
chunkOverlap?: number;
}) {
super(`DocxLoader_${md5(filePathOrUrl)}`, chunkSize ?? 1000, chunkOverlap ?? 0);
super(`DocxLoader_${md5(filePathOrUrl)}`, { filePathOrUrl }, chunkSize ?? 1000, chunkOverlap ?? 0);

this.filePathOrUrl = filePathOrUrl;
this.isUrl = isValidURL(filePathOrUrl) ? true : false;
Expand Down
2 changes: 1 addition & 1 deletion src/loaders/excel-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export class ExcelLoader extends BaseLoader<{ type: 'ExcelLoader' }> {
chunkSize?: number;
chunkOverlap?: number;
}) {
super(`ExcelLoader_${md5(filePathOrUrl)}`, chunkSize ?? 1000, chunkOverlap ?? 0);
super(`ExcelLoader_${md5(filePathOrUrl)}`, { filePathOrUrl }, chunkSize ?? 1000, chunkOverlap ?? 0);

this.filePathOrUrl = filePathOrUrl;
this.isUrl = isValidURL(filePathOrUrl) ? true : false;
Expand Down
4 changes: 3 additions & 1 deletion src/loaders/json-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ export class JsonLoader extends BaseLoader<{ type: 'JsonLoader' }> {
object: Record<string, unknown> | Record<string, unknown>[];
pickKeysForEmbedding?: string[];
}) {
super(`JsonLoader_${md5(cleanString(JSON.stringify(object)))}`);
super(`JsonLoader_${md5(cleanString(JSON.stringify(object)))}`, {
object: truncateCenterString(JSON.stringify(object), 50),
});

this.pickKeysForEmbedding = pickKeysForEmbedding;
this.object = object;
Expand Down
4 changes: 2 additions & 2 deletions src/loaders/local-path-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export class LocalPathLoader extends BaseLoader<{ type: 'LocalPathLoader' }> {
private readonly path: string;

constructor({ path }: { path: string }) {
super(`LocalPathLoader_${md5(path)}`);
super(`LocalPathLoader_${md5(path)}`, { path });
this.path = path;
}

Expand All @@ -24,7 +24,7 @@ export class LocalPathLoader extends BaseLoader<{ type: 'LocalPathLoader' }> {
metadata: {
...result.metadata,
type: <'LocalPathLoader'>'LocalPathLoader',
originalPath: path.resolve(this.path),
originalPath: this.path,
},
};
}
Expand Down
2 changes: 1 addition & 1 deletion src/loaders/pdf-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export class PdfLoader extends BaseLoader<{ type: 'PdfLoader' }> {
chunkSize?: number;
chunkOverlap?: number;
}) {
super(`PdfLoader_${md5(filePathOrUrl)}`, chunkSize ?? 1000, chunkOverlap ?? 0);
super(`PdfLoader_${md5(filePathOrUrl)}`, { filePathOrUrl }, chunkSize ?? 1000, chunkOverlap ?? 0);

this.filePathOrUrl = filePathOrUrl;
this.isUrl = isValidURL(filePathOrUrl) ? true : false;
Expand Down
2 changes: 1 addition & 1 deletion src/loaders/ppt-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export class PptLoader extends BaseLoader<{ type: 'PptLoader' }> {
chunkSize?: number;
chunkOverlap?: number;
}) {
super(`PptLoader_${md5(filePathOrUrl)}`, chunkSize ?? 1000, chunkOverlap ?? 0);
super(`PptLoader_${md5(filePathOrUrl)}`, { filePathOrUrl }, chunkSize ?? 1000, chunkOverlap ?? 0);

this.filePathOrUrl = filePathOrUrl;
this.isUrl = isValidURL(filePathOrUrl) ? true : false;
Expand Down
2 changes: 1 addition & 1 deletion src/loaders/sitemap-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ export class SitemapLoader extends BaseLoader<{ type: 'SitemapLoader' }> {
private readonly url: string;

constructor({ url, chunkSize, chunkOverlap }: { url: string; chunkSize?: number; chunkOverlap?: number }) {
super(`SitemapLoader_${md5(url)}`, chunkSize ?? 2000, chunkOverlap);
super(`SitemapLoader_${md5(url)}`, { url }, chunkSize ?? 2000, chunkOverlap);
this.url = url;
}

Expand Down
2 changes: 1 addition & 1 deletion src/loaders/text-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ export class TextLoader extends BaseLoader<{ type: 'TextLoader' }> {
private readonly text: string;

constructor({ text, chunkSize, chunkOverlap }: { text: string; chunkSize?: number; chunkOverlap?: number }) {
super(`TextLoader_${md5(text)}`, chunkSize ?? 300, chunkOverlap ?? 0);
super(`TextLoader_${md5(text)}`, { text: truncateCenterString(text, 50) }, chunkSize ?? 300, chunkOverlap ?? 0);
this.text = text;
}

Expand Down
Loading

0 comments on commit 4951680

Please sign in to comment.