Skip to content

Commit 7a03239

Browse files
authored
feat(lib): Column value mapping capability (#64)
* feat(lib): Column value mapping capability * test(lib): Tests for applyMappings * chore(config): Ignore shrinkwrap.yaml * fix(lib): Fix mappings implementation Co-authored-by: Glitch (isair-tensorflow-load-csv) <none>
1 parent 1ccb7df commit 7a03239

6 files changed

+78
-0
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,4 @@ pnpm-debug.log*
2424
dist
2525
docs
2626
coverage
27+
shrinkwrap.yaml

README.md

+4
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ const {
5555
} = loadCsv('./data.csv', {
5656
featureColumns: ['lat', 'lng', 'height'],
5757
labelColumns: ['temperature'],
58+
mappings: {
59+
height: (ft) => ft * 0.3048, // feet to meters
60+
temperature: (f) => (f - 32) / 1.8, // fahrenheit to celsius
61+
}, // Map values based on which column they are in before they are loaded into tensors.
5862
shuffle: true, // Pass true to shuffle with a fixed seed, or a string to use it as a seed for the shuffling.
5963
splitTest: true, // Splits your data in half. You can also provide a certain row count for the test data.
6064
prependOnes: true, // Prepends a column of 1s to your features and testFeatures tensors, useful for linear regression.

src/applyMappings.ts

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import { CsvTable, CsvReadOptions } from './loadCsv.models';
2+
3+
const applyMappings = (
4+
table: CsvTable,
5+
mappings: NonNullable<CsvReadOptions['mappings']>
6+
) => {
7+
if (table.length < 2) {
8+
return table;
9+
}
10+
const mappingsByIndex = table[0].map((columnName) => mappings[columnName]);
11+
return table.map((row, index) =>
12+
index === 0
13+
? row
14+
: row.map((value, columnIndex) =>
15+
mappingsByIndex[columnIndex]
16+
? mappingsByIndex[columnIndex](value)
17+
: value
18+
)
19+
);
20+
};
21+
22+
export default applyMappings;

src/loadCsv.models.ts

+7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@ export interface CsvReadOptions {
77
* Names of the columns to be included in the labels and testLabels tensors.
88
*/
99
labelColumns: string[];
10+
/**
11+
* Used for transforming values of entire columns. Key is column label, value is transformer function. Each value belonging to
12+
* that column will be put through the transformer function and be overwritten with the return value of it.
13+
*/
14+
mappings?: {
15+
[columnName: string]: (value: string | number) => string | number;
16+
};
1017
/**
1118
* If true, shuffles all rows with a fixed seed, meaning that shuffling the same data will always result in the same shuffled data.
1219
*

src/loadCsv.ts

+6
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@ import { shuffle } from 'shuffle-seed';
66
import { CsvReadOptions, CsvTable } from './loadCsv.models';
77
import filterColumns from './filterColumns';
88
import splitTestData from './splitTestData';
9+
import applyMappings from './applyMappings';
910

1011
const defaultShuffleSeed = 'mncv9340ur';
1112

1213
const loadCsv = (filename: string, options: CsvReadOptions) => {
1314
const {
1415
featureColumns,
1516
labelColumns,
17+
mappings = {},
1618
shuffle: shouldShuffle = false,
1719
splitTest = false,
1820
prependOnes = false,
@@ -43,6 +45,10 @@ const loadCsv = (filename: string, options: CsvReadOptions) => {
4345
tables.labels.shift();
4446
tables.features.shift();
4547

48+
for (const key of Object.keys(tables)) {
49+
tables[key] = applyMappings(tables[key], mappings);
50+
}
51+
4652
if (shouldShuffle) {
4753
const seed =
4854
typeof shouldShuffle === 'string' ? shouldShuffle : defaultShuffleSeed;

tests/applyMappings.test.ts

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/* eslint-disable @typescript-eslint/ban-ts-comment */
2+
import { CsvReadOptions } from '../src/loadCsv.models';
3+
import applyMappings from '../src/applyMappings';
4+
5+
const data = [
6+
['lat', 'lng', 'height', 'temperature'],
7+
[0.234, 1.47, 849.7, 64.4],
8+
[-293.2, 103.34, 715.2, 73.4],
9+
];
10+
11+
const mappings: NonNullable<CsvReadOptions['mappings']> = {
12+
height: (ft) => Number(ft) * 0.3048, // feet to meters
13+
temperature: (f) => (Number(f) - 32) / 1.8, // fahrenheit to celsius
14+
};
15+
16+
test('Applying mappings works correctly', () => {
17+
const mappedData = applyMappings(data, mappings);
18+
// @ts-ignore
19+
expect(mappedData).toBeDeepCloseTo(
20+
[
21+
['lat', 'lng', 'height', 'temperature'],
22+
[0.234, 1.47, 258.98856, 18],
23+
[-293.2, 103.34, 217.99296, 23],
24+
],
25+
3
26+
);
27+
});
28+
29+
test('Applying mappings does not break with a table with just headers', () => {
30+
const tableOnlyHeaders = [['lat', 'lng', 'height', 'temperature']];
31+
const mappedData = applyMappings(tableOnlyHeaders, mappings);
32+
expect(mappedData).toMatchObject(tableOnlyHeaders);
33+
});
34+
35+
test('Applying mappings does not break with an empty table', () => {
36+
const mappedData = applyMappings([], mappings);
37+
expect(mappedData).toMatchObject([]);
38+
});

0 commit comments

Comments
 (0)