Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Boyer-Moore string deletion algorithm as a fix for issue #286 #662

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
## [Unreleased]

### Added

- New Class called IEEE cleanup
- Added a new testcase called
### Changed

added cleanup method before setting title field
### Fixed

### Removed
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/jabref/logic/importer/fetcher/IEEE.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public IEEE(ImportFormatPreferences importFormatPreferences, ImporterPreferences
*/
private static BibEntry parseJsonResponse(JSONObject jsonEntry, Character keywordSeparator) {
BibEntry entry = new BibEntry();

IEEEcleanup ieeEcleanup = new IEEEcleanup();
switch (jsonEntry.optString("content_type")) {
case "Books" -> entry.setType(StandardEntryType.Book);
case "Conferences" -> entry.setType(StandardEntryType.InProceedings);
Expand Down Expand Up @@ -129,7 +129,7 @@ private static BibEntry parseJsonResponse(JSONObject jsonEntry, Character keywor
entry.setField(StandardField.EVENTTITLEADDON, jsonEntry.optString("conference_location"));
entry.setField(StandardField.EVENTDATE, jsonEntry.optString("conference_dates"));
entry.setField(StandardField.PUBLISHER, jsonEntry.optString("publisher"));
entry.setField(StandardField.TITLE, jsonEntry.optString("title"));
entry.setField(StandardField.TITLE, ieeEcleanup.clean(jsonEntry.optString("title"),"{&}{#}x2014$\\mathsemicolon$"));
entry.setField(StandardField.VOLUME, jsonEntry.optString("volume"));

return entry;
Expand Down
54 changes: 54 additions & 0 deletions src/main/java/org/jabref/logic/importer/fetcher/IEEEcleanup.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package org.jabref.logic.importer.fetcher;

public class IEEEcleanup {

private void badCharHeuristic( char []str, int size,int badchar[])
{

// Initialize all occurrences as -1
for (int i = 0; i < 256; i++){
badchar[i] = -1;
}


// Fill the actual value of last occurrence
// of a character (indices of table are ascii and values are index of occurrence)
for (int i = 0; i < size; i++){
badchar[(int) str[i]] = i;
}
}

/* A pattern searching function that uses Bad
Character Heuristic of Boyer Moore Algorithm */
public String clean(String str, String t) {
int m = t.length();
int n = str.length();
StringBuilder s = new StringBuilder(str);
int[] badChar = new int[256];

badCharHeuristic(t.toCharArray(), m, badChar);

int i = 0;
while (i <= n - m) {
int j = m - 1;

// Keep reducing the index j of pattern while characters of pattern
// and string are matching at this shift s
while (j >= 0 && t.charAt(j) == str.charAt(i + j)) {
j--;
}

// If the pattern is present at current shift, then remove it
if (j < 0) {
s.delete(i, i + m);
n = s.length();
i += m;
}
else {
// Shift the pattern so that the bad character in text aligns with the last occurrence of it in pattern.
i += Math.max(1, j - badChar[s.charAt(i + j)]);
}
}
return s.toString();
}
}
10 changes: 10 additions & 0 deletions src/test/java/org/jabref/logic/importer/fetcher/IEEETest.java
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,16 @@ void searchByQuotedQueryFindsEntry() throws Exception {
assertEquals(Collections.singletonList(IGOR_NEWCOMERS), fetchedEntries);
}

@Test
void testEmDashCleanUp() throws Exception{
List<BibEntry> entry = fetcher.performSearch("10.1109/PERCOMW.2015.7133989");
if (entry.get(0).getTitle().isPresent()){
assertEquals("Towards situation-aware adaptive workflows: SitOPT A general purpose situation-aware workflow management system"
,entry.get(0).getTitle().get());
}
}


@Override
public SearchBasedFetcher getFetcher() {
return fetcher;
Expand Down