Skip to content

Commit ed4029c

Browse files
committed
update stale links and standardize formatting
1 parent fef2b3a commit ed4029c

File tree

7 files changed

+141
-137
lines changed

7 files changed

+141
-137
lines changed

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Copyright © 2017 Christopher G. Jennings
1+
Copyright © 2020 Christopher G. Jennings
22

33
Permission is hereby granted, free of charge, to any person obtaining a copy
44
of this software and associated documentation files (the "Software"), to deal

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@
44

55
String search algorithms find occurrences of a pattern string in a text, like the search feature of a text editor. The FJS (Franek-Jennings-Smyth) algorithm is the fastest known string search algorithm under a wide variety of conditions. It combines the linear-time worst case guarantee of the well-known KMP (Knuth-Morris-Pratt) algorithm with the fast average-case performance of the BMS (Boyer-Moore-Sunday) algorithm.
66

7-
[More information, including an interactive visualization.](https://cgjennings.ca/articles/fjs.html)
7+
[More information, including an interactive visualization.](https://cgjennings.ca/articles/fjs/)
88

99
## The sample code
1010

1111
Sample implementations are currently provided in C and Java. Both implementations find *all* matches of the pattern string in the text, rather than simply finding the first or last.
1212

1313
### c/
1414

15-
The C implementation is meant as a starting point that you can customize to suit your specific needs. Note that it is also based on 8-bit characters. For wider characters you might want to adapt the simple hash strategy demonstrated by the Java code to improve performance on short texts. Another option is to process the string as 8-bit characters and ignore spurious matches. For example, cast pointers to 16-bit character strings to byte array pointers and then ignore "matches" that start at an odd offset.
15+
The C implementation is meant as a starting point that you can customize to suit your specific needs. Note that it is also based on 8-bit characters. For wider characters you might want to adapt the simple hash strategy demonstrated by the Java code to improve performance on short texts. Another option is to process the string as 8-bit characters and ignore spurious matches. For example, cast pointers to 16-bit character strings to byte array pointers and then ignore "matches" that start on odd offsets.
1616

1717
### java/
1818

c/fjs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
44
FJS is a very fast algorithm for finding every occurrence
55
of a string p of length m in a string x of length n.
6-
For details see <https://cgjennings.ca/articles/fjs.html>.
6+
For details see <https://cgjennings.ca/articles/fjs/>.
77
88
Christopher G. Jennings.
99
See LICENSE.md for license details (MIT license).

java/ca/cgjennings/algo/BruteForceStringSearcher.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/* See LICENSE.md for license details (MIT license). */
12
package ca.cgjennings.algo;
23

34
import java.util.stream.IntStream;
@@ -11,23 +12,22 @@
1112
public final class BruteForceStringSearcher implements StringSearcher {
1213

1314
/**
14-
* Creates a new {@code StringSearcher} that uses brute force to find
15-
* matches.
15+
* Creates a new {@code StringSearcher} that uses brute force to find matches.
1616
*/
1717
public BruteForceStringSearcher() {
1818
}
1919

2020
@Override
21-
@SuppressWarnings( "empty-statement" )
22-
public IntStream findAll( CharSequence p, CharSequence x ) {
21+
@SuppressWarnings("empty-statement")
22+
public IntStream findAll(CharSequence p, CharSequence x) {
2323
final int m = p.length(), n = x.length();
2424

2525
final IntStream.Builder stream = IntStream.builder();
2626
int i, j;
2727

28-
for( j=0; j <= n-m; ++j ) {
29-
for( i=0; i < m && p.charAt(i) == x.charAt(i+j); ++i );
30-
if( i >= p.length() ) {
28+
for (j = 0; j <= n - m; ++j) {
29+
for (i = 0; i < m && p.charAt(i) == x.charAt(i + j); ++i);
30+
if (i >= p.length()) {
3131
stream.accept(j);
3232
}
3333
}

java/ca/cgjennings/algo/FJSStringSearcher.java

Lines changed: 39 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/* See LICENSE.md for license details (MIT license). */
12
package ca.cgjennings.algo;
23

34
import java.util.Arrays;
@@ -12,6 +13,8 @@
1213
* implementation (very rare in practice).
1314
*
1415
* @author Christopher G. Jennings
16+
* @see <a href="https://cgjennings.ca/articles/fjs/">The FJS string matching
17+
* algorithm</a>
1518
*/
1619
public final class FJSStringSearcher implements StringSearcher {
1720

@@ -20,7 +23,7 @@ public final class FJSStringSearcher implements StringSearcher {
2023
*/
2124
public FJSStringSearcher() {
2225
// reused since it does not depend on pattern size
23-
delta = new int[ ALPHABET_HASH_SIZE ];
26+
delta = new int[ALPHABET_HASH_SIZE];
2427
}
2528

2629
// The hash size must be a power of 2; typical texts may not see a speedup
@@ -30,74 +33,73 @@ public FJSStringSearcher() {
3033
private final int[] delta;
3134

3235
@Override
33-
public IntStream findAll( CharSequence p, CharSequence x ) {
36+
public IntStream findAll(CharSequence p, CharSequence x) {
3437
final int n = x.length();
3538
final int m = p.length();
3639

37-
if( m == 0 ) {
38-
return IntStream.rangeClosed( 0, n );
40+
if (m == 0) {
41+
return IntStream.rangeClosed(0, n);
3942
}
40-
if( m > n ) {
43+
if (m > n) {
4144
return IntStream.empty();
4245
}
4346

44-
final int beta[] = makeBeta( p );
45-
@SuppressWarnings( "LocalVariableHidesMemberVariable" )
46-
final int delta[] = makeDelta( p );
47+
final int beta[] = makeBeta(p);
48+
@SuppressWarnings("LocalVariableHidesMemberVariable")
49+
final int delta[] = makeDelta(p);
4750
final IntStream.Builder stream = IntStream.builder();
4851

49-
int mp = m-1, np = n-1, i = 0, ip = i+mp, j = 0;
52+
int mp = m - 1, np = n - 1, i = 0, ip = i + mp, j = 0;
5053

51-
outer:
52-
while( ip < np ) {
53-
if( j <= 0 ) {
54-
while( p.charAt(mp) != x.charAt(ip) ) {
55-
ip += delta[x.charAt(ip+1) & HASH_MASK];
56-
if( ip >= np ) {
54+
outer: while (ip < np) {
55+
if (j <= 0) {
56+
while (p.charAt(mp) != x.charAt(ip)) {
57+
ip += delta[x.charAt(ip + 1) & HASH_MASK];
58+
if (ip >= np) {
5759
break outer;
5860
}
5961
}
6062
j = 0;
6163
i = ip - mp;
62-
while( (j < mp) && (x.charAt(i) == p.charAt(j)) ) {
64+
while ((j < mp) && (x.charAt(i) == p.charAt(j))) {
6365
++i;
6466
++j;
6567
}
66-
if( j == mp ) {
67-
stream.accept( i-mp );
68+
if (j == mp) {
69+
stream.accept(i - mp);
6870
++i;
6971
++j;
7072
}
71-
if( j <= 0 ) {
73+
if (j <= 0) {
7274
++i;
7375
} else {
7476
j = beta[j];
7577
}
7678
} else {
77-
while( (j < m) && (x.charAt(i) == p.charAt(j)) ) {
79+
while ((j < m) && (x.charAt(i) == p.charAt(j))) {
7880
++i;
7981
++j;
8082
}
81-
if( j == m ) {
82-
stream.accept( i-m );
83+
if (j == m) {
84+
stream.accept(i - m);
8385
}
8486
j = beta[j];
8587
}
8688
ip = i + mp - j;
8789
}
8890

8991
// check final alignment p[0..m-1] == x[n-m..n-1]
90-
if( ip == np ) {
91-
if( j < 0 ) {
92+
if (ip == np) {
93+
if (j < 0) {
9294
j = 0;
9395
}
9496
i = n - m + j;
95-
while( j < m && x.charAt(i) == p.charAt(j) ) {
97+
while (j < m && x.charAt(i) == p.charAt(j)) {
9698
++i;
9799
++j;
98100
}
99-
if( j == m ) {
100-
stream.accept( n-m );
101+
if (j == m) {
102+
stream.accept(n - m);
101103
}
102104
}
103105

@@ -109,17 +111,17 @@ public IntStream findAll( CharSequence p, CharSequence x ) {
109111
*
110112
* @param pattern the search pattern
111113
*/
112-
private int[] makeDelta( CharSequence pattern ) {
114+
private int[] makeDelta(CharSequence pattern) {
113115
final int m = pattern.length();
114-
@SuppressWarnings( "LocalVariableHidesMemberVariable" )
116+
@SuppressWarnings("LocalVariableHidesMemberVariable")
115117
final int[] delta = this.delta;
116118

117-
Arrays.fill( delta, m + 1 );
118-
for( int i=0; i < m; ++i ) {
119+
Arrays.fill(delta, m + 1);
120+
for (int i = 0; i < m; ++i) {
119121
final char ch = pattern.charAt(i);
120122
final int slot = ch & HASH_MASK;
121123
final int jump = m - i;
122-
if( jump < delta[slot] ) {
124+
if (jump < delta[slot]) {
123125
delta[slot] = jump;
124126
}
125127
}
@@ -132,19 +134,19 @@ private int[] makeDelta( CharSequence pattern ) {
132134
* @param pattern the search pattern
133135
* @return a new β′ array based on the borders of the pattern
134136
*/
135-
private int[] makeBeta( CharSequence pattern ) {
137+
private int[] makeBeta(CharSequence pattern) {
136138
final int m = pattern.length();
137-
final int[] beta = new int[ m + 1 ];
139+
final int[] beta = new int[m + 1];
138140
int i = 0, j = beta[0] = -1;
139141

140-
while( i < m ) {
141-
while( (j > -1) && (pattern.charAt(i) != pattern.charAt(j)) ) {
142+
while (i < m) {
143+
while ((j > -1) && (pattern.charAt(i) != pattern.charAt(j))) {
142144
j = beta[j];
143145
}
144146

145147
++i;
146148
++j;
147-
if( (i < m) && (pattern.charAt(i) == pattern.charAt(j)) ) {
149+
if ((i < m) && (pattern.charAt(i) == pattern.charAt(j))) {
148150
beta[i] = beta[j];
149151
} else {
150152
beta[i] = j;

java/ca/cgjennings/algo/StringSearcher.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/* See LICENSE.md for license details (MIT license). */
12
package ca.cgjennings.algo;
23

34
import java.util.stream.IntStream;
@@ -19,13 +20,13 @@
1920
*/
2021
public interface StringSearcher {
2122
/**
22-
* Finds all matches of the pattern within the text. Each entry in the
23-
* returned {@code IntStream} is the index of one match. If the pattern does
24-
* not occur in the text, an empty stream is returned.
23+
* Finds all matches of the pattern within the text. Each entry in the returned
24+
* {@code IntStream} is the index of one match. If the pattern does not occur in
25+
* the text, an empty stream is returned.
2526
*
2627
* @param pattern the pattern to search for
27-
* @param text the text to search within
28+
* @param text the text to search within
2829
* @return a stream of the indices at which matches was found
2930
*/
30-
IntStream findAll( CharSequence pattern, CharSequence text );
31+
IntStream findAll(CharSequence pattern, CharSequence text);
3132
}

0 commit comments

Comments
 (0)