Skip to content

Commit

Permalink
LocalFileExtractor (#519)
Browse files Browse the repository at this point in the history
* Moved changes to new branch

Moved changes from old PR that is now out of sync with main (#478) to a fresh PR, based on latest main commit.

Co-authored-by: OmarFourati <[email protected]>

* feat: ✨ Correctly read file mime type from extensions

* test: ✅ Added tests for property assignment of LocalFileExtractor

* docs: 📝 Updated documentation of LocalFileExtractor to call out forbidden path traversal

---------

Co-authored-by: OmarFourati <[email protected]>
  • Loading branch information
rhazn and OmarFourati authored Feb 7, 2024
1 parent 968af93 commit 3874ff1
Show file tree
Hide file tree
Showing 14 changed files with 410 additions and 0 deletions.
2 changes: 2 additions & 0 deletions libs/extensions/std/exec/src/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { ArchiveInterpreterExecutor } from './archive-interpreter-executor';
import { FilePickerExecutor } from './file-picker-executor';
import { GtfsRTInterpreterExecutor } from './gtfs-rt-interpreter-executor';
import { HttpExtractorExecutor } from './http-extractor-executor';
import { LocalFileExtractorExecutor } from './local-file-extractor-executor';
import { TextFileInterpreterExecutor } from './text-file-interpreter-executor';
import { TextLineDeleterExecutor } from './text-line-deleter-executor';
import { TextRangeSelectorExecutor } from './text-range-selector-executor';
Expand All @@ -33,6 +34,7 @@ export class StdExecExtension implements JayveeExecExtension {
ArchiveInterpreterExecutor,
FilePickerExecutor,
GtfsRTInterpreterExecutor,
LocalFileExtractorExecutor,
];
}
}
131 changes: 131 additions & 0 deletions libs/extensions/std/exec/src/local-file-extractor-executor.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

import * as path from 'path';

import * as R from '@jvalue/jayvee-execution';
import { getTestExecutionContext } from '@jvalue/jayvee-execution/test';
import {
BlockDefinition,
IOType,
createJayveeServices,
} from '@jvalue/jayvee-language-server';
import {
expectNoParserAndLexerErrors,
loadTestExtensions,
parseHelper,
readJvTestAssetHelper,
} from '@jvalue/jayvee-language-server/test';
import { AstNode, AstNodeLocator, LangiumDocument } from 'langium';
import { NodeFileSystem } from 'langium/node';
import * as nock from 'nock';

import { LocalFileExtractorExecutor } from './local-file-extractor-executor';

describe('Validation of LocalFileExtractorExecutor', () => {
let parse: (input: string) => Promise<LangiumDocument<AstNode>>;

let locator: AstNodeLocator;

const readJvTestAsset = readJvTestAssetHelper(
__dirname,
'../test/assets/local-file-extractor-executor/',
);

async function parseAndExecuteExecutor(
input: string,
): Promise<R.Result<R.BinaryFile>> {
const document = await parse(input);
expectNoParserAndLexerErrors(document);

const block = locator.getAstNode<BlockDefinition>(
document.parseResult.value,
'pipelines@0/blocks@1',
) as BlockDefinition;

return new LocalFileExtractorExecutor().doExecute(
R.NONE,
getTestExecutionContext(locator, document, [block]),
);
}

beforeAll(async () => {
// Create language services
const services = createJayveeServices(NodeFileSystem).Jayvee;
await loadTestExtensions(services, [
path.resolve(__dirname, '../test/test-extension/TestBlockTypes.jv'),
]);
locator = services.workspace.AstNodeLocator;
// Parse function for Jayvee (without validation)
parse = parseHelper(services);
});

afterEach(() => {
nock.restore();
});

beforeEach(() => {
if (!nock.isActive()) {
nock.activate();
}
nock.cleanAll();
});

it('should diagnose no error on valid local file path', async () => {
const text = readJvTestAsset('valid-local-file.jv');

const result = await parseAndExecuteExecutor(text);

expect(R.isErr(result)).toEqual(false);
if (R.isOk(result)) {
expect(result.right).toEqual(
expect.objectContaining({
name: 'local-file-test.csv',
extension: 'csv',
ioType: IOType.FILE,
mimeType: R.MimeType.TEXT_CSV,
}),
);
}
});

it('should diagnose error on file not found', async () => {
const text = readJvTestAsset('invalid-file-not-found.jv');

const result = await parseAndExecuteExecutor(text);

expect(R.isErr(result)).toEqual(true);
if (R.isErr(result)) {
expect(result.left.message).toEqual(
`File './does-not-exist.csv' not found.`,
);
}
});

it('should diagnose error on path traversal at the start of the path', async () => {
const text = readJvTestAsset('invalid-path-traversal-at-start.jv');

const result = await parseAndExecuteExecutor(text);

expect(R.isErr(result)).toEqual(true);
if (R.isErr(result)) {
expect(result.left.message).toEqual(
`File path cannot include "..". Path traversal is restricted.`,
);
}
});

it('should diagnose error on path traversal in the path', async () => {
const text = readJvTestAsset('invalid-path-traversal-in-path.jv');

const result = await parseAndExecuteExecutor(text);

expect(R.isErr(result)).toEqual(true);
if (R.isErr(result)) {
expect(result.left.message).toEqual(
`File path cannot include "..". Path traversal is restricted.`,
);
}
});
});
82 changes: 82 additions & 0 deletions libs/extensions/std/exec/src/local-file-extractor-executor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

import * as fs from 'fs/promises';
import * as path from 'path';

import * as R from '@jvalue/jayvee-execution';
import {
AbstractBlockExecutor,
BinaryFile,
BlockExecutorClass,
ExecutionContext,
FileExtension,
MimeType,
None,
implementsStatic,
inferFileExtensionFromFileExtensionString,
inferMimeTypeFromFileExtensionString,
} from '@jvalue/jayvee-execution';
import { IOType, PrimitiveValuetypes } from '@jvalue/jayvee-language-server';

@implementsStatic<BlockExecutorClass>()
export class LocalFileExtractorExecutor extends AbstractBlockExecutor<
IOType.NONE,
IOType.FILE
> {
public static readonly type = 'LocalFileExtractor';

constructor() {
super(IOType.NONE, IOType.FILE);
}

async doExecute(
input: None,
context: ExecutionContext,
): Promise<R.Result<BinaryFile>> {
const filePath = context.getPropertyValue(
'filePath',
PrimitiveValuetypes.Text,
);

if (filePath.includes('..')) {
return R.err({
message: 'File path cannot include "..". Path traversal is restricted.',
diagnostic: { node: context.getCurrentNode(), property: 'filePath' },
});
}

try {
const rawData = await fs.readFile(filePath);

// Infer FileName and FileExtension from filePath
const fileName = path.basename(filePath);
const extName = path.extname(fileName);
const fileExtension =
inferFileExtensionFromFileExtensionString(extName) ??
FileExtension.NONE;

// Infer Mimetype from FileExtension, if not inferrable, then default to application/octet-stream
const mimeType: MimeType | undefined =
inferMimeTypeFromFileExtensionString(fileExtension) ??
MimeType.APPLICATION_OCTET_STREAM;

// Create file and return file
const file = new BinaryFile(
fileName,
fileExtension,
mimeType,
rawData.buffer as ArrayBuffer,
);

context.logger.logDebug(`Successfully extraced file ${filePath}`);
return R.ok(file);
} catch (error) {
return R.err({
message: `File '${filePath}' not found.`,
diagnostic: { node: context.getCurrentNode(), property: 'filePath' },
});
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

pipeline TestPipeline {

block TestExtractor oftype TestFileExtractor {
}

block TestBlock oftype LocalFileExtractor {
filePath: './does-not-exist.csv';
}

block TestLoader oftype TestSheetLoader {
}

TestExtractor -> TestBlock -> TestLoader;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

pipeline TestPipeline {

block TestExtractor oftype TestFileExtractor {
}

block TestBlock oftype LocalFileExtractor {
filePath: '../non-existent-file.csv';
}

block TestLoader oftype TestSheetLoader {
}

TestExtractor -> TestBlock -> TestLoader;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

pipeline TestPipeline {

block TestExtractor oftype TestFileExtractor {
}

block TestBlock oftype LocalFileExtractor {
filePath: './../non-existent-file.csv';
}

block TestLoader oftype TestSheetLoader {
}

TestExtractor -> TestBlock -> TestLoader;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
HeaderExample1,HeaderExample2,HeaderExample3,HeaderExample4
Example1,Example2,Example3,Example4
Example1,Example2,Example3,Example4
Example1,Example2,Example3,Example4
Example1,Example2,Example3,Example4
Example1,Example2,Example3,Example4
Example1,Example2,Example3,Example4
Example1,Example2,Example3,Example4
Example1,Example2,Example3,Example4
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg

SPDX-License-Identifier: AGPL-3.0-only
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

pipeline TestPipeline {

block TestExtractor oftype TestFileExtractor {
}

block TestBlock oftype LocalFileExtractor {
filePath: './libs/extensions/std/exec/test/assets/local-file-extractor-executor/local-file-test.csv';
}

block TestLoader oftype TestSheetLoader {
}

TestExtractor -> TestBlock -> TestLoader;
}
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,33 @@ describe('Validation of blocktype specific properties', () => {
});
});

describe('LocalFileExtractor blocktype', () => {
it('should diagnose no error on valid filePath parameter value', async () => {
const text = readJvTestAsset(
'property-assignment/blocktype-specific/local-file-extractor/valid-valid-filepath-param.jv',
);

await parseAndValidatePropertyAssignment(text);

expect(validationAcceptorMock).toHaveBeenCalledTimes(0);
});

it('should diagnose error on invalid filePath parameter value', async () => {
const text = readJvTestAsset(
'property-assignment/blocktype-specific/local-file-extractor/invalid-invalid-filepath-param.jv',
);

await parseAndValidatePropertyAssignment(text);

expect(validationAcceptorMock).toHaveBeenCalledTimes(1);
expect(validationAcceptorMock).toHaveBeenCalledWith(
'error',
'File path cannot include "..". Path traversal is restricted.',
expect.any(Object),
);
});
});

describe('RowDeleter blocktype', () => {
it('should diagnose error on deleting partial row', async () => {
const text = readJvTestAsset(
Expand Down
Loading

0 comments on commit 3874ff1

Please sign in to comment.