Skip to content

Commit

Permalink
WAT extractor: add attributes of the <html> element as metadata
Browse files Browse the repository at this point in the history
- make tests run also on JDK 8
  • Loading branch information
sebastian-nagel committed Dec 5, 2024
1 parent 581b43a commit 8627773
Showing 1 changed file with 9 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -241,16 +241,17 @@ private void checkLinks(Resource resource, String[][] expectedLinks) {
}
}

private void checkExtractHtmlLangAttribute(Resource resource, Map<String, String> langAttributes)
private void checkExtractHtmlLangAttribute(Resource resource, String... langAttributes)
throws JSONException {
assertNotNull(resource);
assertTrue("Wrong instance type of Resource: " + resource.getClass(), resource instanceof HTMLResource);
JSONArray metas = resource.getMetaData().getJSONObject("Head").getJSONArray("Metas");
assertNotNull(metas);
JSONObject meta = metas.getJSONObject(0);
for (String key : langAttributes.keySet()) {
for (int i = 0; i < langAttributes.length; i += 2) {
String key = langAttributes[i];
assertNotNull(meta.get(key));
assertEquals(meta.get(key), langAttributes.get(key));
assertEquals(meta.get(key), langAttributes[i+1]);
}
}

Expand Down Expand Up @@ -433,11 +434,11 @@ public void testHtmlLanguageAttributeExtraction() throws ResourceParseException,
ResourceProducer producer = ProducerUtils.getProducer(getClass().getResource(testFileName).getPath());
ResourceFactoryMapper mapper = new ExtractingResourceFactoryMapper();
ExtractingResourceProducer extractor = new ExtractingResourceProducer(producer, mapper);
checkExtractHtmlLangAttribute(extractor.getNext(), Map.of("name", "HTML@/lang", "content", "en"));
checkExtractHtmlLangAttribute(extractor.getNext(), Map.of("name", "HTML@/lang", "content", "zh-CN"));
checkExtractHtmlLangAttribute(extractor.getNext(), Map.of("name", "HTML@/lang", "content", "cs-cz"));
checkExtractHtmlLangAttribute(extractor.getNext(), Map.of("name", "HTML@/lang", "content", "en"));
checkExtractHtmlLangAttribute(extractor.getNext(), Map.of("name", "HTML@/xml:lang", "content", "es-MX"));
checkExtractHtmlLangAttribute(extractor.getNext(), "name", "HTML@/lang", "content", "en");
checkExtractHtmlLangAttribute(extractor.getNext(), "name", "HTML@/lang", "content", "zh-CN");
checkExtractHtmlLangAttribute(extractor.getNext(), "name", "HTML@/lang", "content", "cs-cz");
checkExtractHtmlLangAttribute(extractor.getNext(), "name", "HTML@/lang", "content", "en");
checkExtractHtmlLangAttribute(extractor.getNext(), "name", "HTML@/xml:lang", "content", "es-MX");
}

public void testHtmlParserEntityDecoding() {
Expand Down

0 comments on commit 8627773

Please sign in to comment.