Skip to content

Commit 36f4f0e

Browse files
authored
feat(lib): Replace shuffling lib with own implementation (#174)
1 parent 0508ccb commit 36f4f0e

7 files changed

+91
-32
lines changed

README.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
A library that aims to remove the overhead of creating tensors from CSV files completely; allowing you to dive right into the fun parts of your ML project.
1010

11-
- Lightweight.
11+
- [Lightweight](https://bundlephobia.com/result?p=tensorflow-load-csv).
1212
- Fast.
1313
- Flexible.
1414
- TypeScript compatible.
@@ -21,18 +21,21 @@ You can find the docs [here](https://barissencan.com/tensorflow-load-csv/).
2121
## Installation
2222

2323
NPM:
24+
2425
```sh
2526
npm install tensorflow-load-csv
2627
```
2728

2829
Yarn:
30+
2931
```sh
3032
yarn add tensorflow-load-csv
3133
```
3234

3335
## Usage
3436

3537
Simple usage:
38+
3639
```js
3740
import loadCsv from 'tensorflow-load-csv';
3841

@@ -46,6 +49,7 @@ labels.print();
4649
```
4750

4851
Advanced usage:
52+
4953
```js
5054
import loadCsv from 'tensorflow-load-csv';
5155

package-lock.json

+4-21
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

-3
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,5 @@
7171
},
7272
"peerDependencies": {
7373
"@tensorflow/tfjs": "^2.0.1"
74-
},
75-
"dependencies": {
76-
"shuffle-seed": "^1.1.6"
7774
}
7875
}

src/loadCsv.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import fs from 'fs';
22

33
import * as tf from '@tensorflow/tfjs';
4-
import { shuffle } from 'shuffle-seed';
54

65
import { CsvReadOptions, CsvTable } from './loadCsv.models';
76
import filterColumns from './filterColumns';
87
import splitTestData from './splitTestData';
98
import applyMappings from './applyMappings';
9+
import shuffle from './shuffle';
1010

1111
const defaultShuffleSeed = 'mncv9340ur';
1212

src/shuffle.ts

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
const mulberry32 = (a: number) => () => {
2+
let t = (a += 0x6d2b79f5);
3+
t = Math.imul(t ^ (t >>> 15), t | 1);
4+
t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
5+
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
6+
};
7+
8+
const cyrb53 = (str: string, seed = 0) => {
9+
let h1 = 0xdeadbeef ^ seed,
10+
h2 = 0x41c6ce57 ^ seed;
11+
for (let i = 0, ch; i < str.length; i++) {
12+
ch = str.charCodeAt(i);
13+
h1 = Math.imul(h1 ^ ch, 2654435761);
14+
h2 = Math.imul(h2 ^ ch, 1597334677);
15+
}
16+
h1 =
17+
Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^
18+
Math.imul(h2 ^ (h2 >>> 13), 3266489909);
19+
h2 =
20+
Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^
21+
Math.imul(h1 ^ (h1 >>> 13), 3266489909);
22+
return 4294967296 * (2097151 & h2) + (h1 >>> 0);
23+
};
24+
25+
function shuffle<T>(array: T[], seed: number | string = 0) {
26+
if (typeof seed === 'string') {
27+
seed = cyrb53(seed);
28+
}
29+
const random = mulberry32(seed);
30+
31+
const output = new Array(array.length);
32+
33+
for (let i = 0; i < array.length; i++) {
34+
output[i] = array[i];
35+
}
36+
37+
let m = output.length;
38+
39+
while (m) {
40+
const i = Math.floor(random() * m--);
41+
42+
const t = output[m];
43+
output[m] = output[i];
44+
output[i] = t;
45+
++seed;
46+
}
47+
48+
return output;
49+
}
50+
51+
export default shuffle;

tests/loadCsv.test.ts

+6-6
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,18 @@ test('Shuffling should work and preserve feature - label pairs', () => {
3737
// @ts-ignore
3838
expect(features.arraySync()).toBeDeepCloseTo(
3939
[
40-
[102, -164],
4140
[5, 40.34],
4241
[0.234, 1.47],
4342
[-93.2, 103.34],
43+
[102, -164],
4444
],
4545
3
4646
);
4747
expect(labels.arraySync()).toMatchObject([
48-
['Landotzka'],
4948
['Landistan'],
5049
['SomeCountria'],
5150
['SomeOtherCountria'],
51+
['Landotzka'],
5252
]);
5353
});
5454

@@ -61,18 +61,18 @@ test('Shuffling with a custom seed should work', () => {
6161
// @ts-ignore
6262
expect(features.arraySync()).toBeDeepCloseTo(
6363
[
64-
[5, 40.34],
64+
[-93.2, 103.34],
6565
[102, -164],
66+
[5, 40.34],
6667
[0.234, 1.47],
67-
[-93.2, 103.34],
6868
],
6969
3
7070
);
7171
expect(labels.arraySync()).toMatchObject([
72-
['Landistan'],
72+
['SomeOtherCountria'],
7373
['Landotzka'],
74+
['Landistan'],
7475
['SomeCountria'],
75-
['SomeOtherCountria'],
7676
]);
7777
});
7878

tests/shuffle.test.ts

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import shuffle from '../src/shuffle';
2+
3+
const data = [1, 2, 3, 4];
4+
5+
test('Shuffling without a seed should change order', () => {
6+
expect(shuffle(data)).toEqual([4, 3, 1, 2]);
7+
});
8+
9+
test('Shuffling should not modify the original array', () => {
10+
expect(shuffle(data)).not.toEqual(data);
11+
});
12+
13+
test('Shuffling with a number seed should change order', () => {
14+
expect(shuffle(data, 7)).toEqual([3, 2, 4, 1]);
15+
});
16+
17+
test('Shuffling with a string seed should change order', () => {
18+
expect(shuffle(data, 'hello')).toEqual([2, 4, 3, 1]);
19+
});
20+
21+
test('Shuffling with different seeds should produce different results', () => {
22+
const results = [shuffle(data, 7), shuffle(data, 'hello')];
23+
expect(results[0]).not.toEqual(results[2]);
24+
});

0 commit comments

Comments
 (0)