File tree 5 files changed +23
-5
lines changed 5 files changed +23
-5
lines changed Original file line number Diff line number Diff line change @@ -4,6 +4,19 @@ All notable changes to this project will be documented in this file.
4
4
The format is based on [ Keep a Changelog] ( https://keepachangelog.com/en/1.0.0/ ) ,
5
5
and this project adheres to [ Semantic Versioning] ( https://semver.org/spec/v2.0.0.html ) .
6
6
7
+ ## [ 0.8.0] - 2019-03-20
8
+ ### Added
9
+ - [ Tokenization] Added NGramTokenizer (#350 )
10
+ - editorconfig file (#355 )
11
+ ### Fixed
12
+ - [ Dataset] FilesDataset read samples without additional array (#363 )
13
+ - [ Tokenization] fixed error with numeric token values (#363 )
14
+ ### Changed
15
+ - [ Math] improved performance with pow and sqrt replacement (#350 )
16
+ - [ Math] reduce duplicated code in distance metrics (#348 )
17
+ - update phpunit to 7.5.1 (#335 )
18
+ - code style fixes (#334 )
19
+
7
20
## [ 0.7.0] - 2018-11-07
8
21
### Added
9
22
- [ Clustering] added KMeans associative clustering (#262 )
Original file line number Diff line number Diff line change @@ -29,7 +29,7 @@ private function scanDir(string $dir): void
29
29
$ target = basename ($ dir );
30
30
31
31
foreach (array_filter (glob ($ dir .DIRECTORY_SEPARATOR .'* ' ), 'is_file ' ) as $ file ) {
32
- $ this ->samples [] = [ file_get_contents ($ file )] ;
32
+ $ this ->samples [] = file_get_contents ($ file );
33
33
$ this ->targets [] = $ target ;
34
34
}
35
35
}
Original file line number Diff line number Diff line change @@ -157,7 +157,7 @@ private function getBeyondMinimumIndexes(int $samplesCount): array
157
157
$ indexes = [];
158
158
foreach ($ this ->frequencies as $ token => $ frequency ) {
159
159
if (($ frequency / $ samplesCount ) < $ this ->minDF ) {
160
- $ indexes [] = $ this ->getTokenIndex ($ token );
160
+ $ indexes [] = $ this ->getTokenIndex (( string ) $ token );
161
161
}
162
162
}
163
163
Original file line number Diff line number Diff line change @@ -29,13 +29,13 @@ public function testLoadFilesDatasetWithBBCData(): void
29
29
self ::assertEquals ($ targets , array_values (array_unique ($ dataset ->getTargets ())));
30
30
31
31
$ firstSample = file_get_contents ($ rootPath .'/business/001.txt ' );
32
- self ::assertEquals ($ firstSample , $ dataset ->getSamples ()[0 ][ 0 ] );
32
+ self ::assertEquals ($ firstSample , $ dataset ->getSamples ()[0 ]);
33
33
34
34
$ firstTarget = 'business ' ;
35
35
self ::assertEquals ($ firstTarget , $ dataset ->getTargets ()[0 ]);
36
36
37
37
$ lastSample = file_get_contents ($ rootPath .'/tech/010.txt ' );
38
- self ::assertEquals ($ lastSample , $ dataset ->getSamples ()[49 ][ 0 ] );
38
+ self ::assertEquals ($ lastSample , $ dataset ->getSamples ()[49 ]);
39
39
40
40
$ lastTarget = 'tech ' ;
41
41
self ::assertEquals ($ lastTarget , $ dataset ->getTargets ()[49 ]);
Original file line number Diff line number Diff line change @@ -84,7 +84,7 @@ public function testTransformationWithMinimumDocumentTokenCountFrequency(): void
84
84
{
85
85
// word at least in half samples
86
86
$ samples = [
87
- 'Lorem ipsum dolor sit amet ' ,
87
+ 'Lorem ipsum dolor sit amet 1550 ' ,
88
88
'Lorem ipsum sit amet ' ,
89
89
'ipsum sit amet ' ,
90
90
'ipsum sit amet ' ,
@@ -96,6 +96,7 @@ public function testTransformationWithMinimumDocumentTokenCountFrequency(): void
96
96
2 => 'dolor ' ,
97
97
3 => 'sit ' ,
98
98
4 => 'amet ' ,
99
+ 5 => 1550 ,
99
100
];
100
101
101
102
$ tokensCounts = [
@@ -105,27 +106,31 @@ public function testTransformationWithMinimumDocumentTokenCountFrequency(): void
105
106
2 => 0 ,
106
107
3 => 1 ,
107
108
4 => 1 ,
109
+ 5 => 0 ,
108
110
],
109
111
[
110
112
0 => 1 ,
111
113
1 => 1 ,
112
114
2 => 0 ,
113
115
3 => 1 ,
114
116
4 => 1 ,
117
+ 5 => 0 ,
115
118
],
116
119
[
117
120
0 => 0 ,
118
121
1 => 1 ,
119
122
2 => 0 ,
120
123
3 => 1 ,
121
124
4 => 1 ,
125
+ 5 => 0 ,
122
126
],
123
127
[
124
128
0 => 0 ,
125
129
1 => 1 ,
126
130
2 => 0 ,
127
131
3 => 1 ,
128
132
4 => 1 ,
133
+ 5 => 0 ,
129
134
],
130
135
];
131
136
You can’t perform that action at this time.
0 commit comments