From 60832142f65b3028094e687d79939d93458dbc56 Mon Sep 17 00:00:00 2001 From: Emanuela Epure <67077116+emanuelaepure10@users.noreply.github.com> Date: Fri, 7 Jul 2023 14:35:15 +0200 Subject: [PATCH] feat/ING-3911 (#1033) * Update test and writer From the test class the whenReadWithBufferedReader_thenCorrect() and testCPGFileCreation() have been removed and a new method testCpgFileAgainstRelatedSpapefile() called from all the Test methods. The writer has been updated taking the charset from the ShapefileDataStore of the related shapefile or adding the default charset in case the above one is null feat: extend shapefile writer to create .cpg file Create .cpg file/files when one/more .shp files are created. The .cpg file contain only the encoding used to write the shapefile. Create test for creating .cpg file * feat: extend shapefile writer to create .cpg file Create .cpg file/files when one/more .shp files are created. The .cpg file contain only the encoding used to write the shapefile. Create test for checking .cpg file content against related .shp file * Set the charset to the writer as well as is done in the reader Update test containing text with special characters Decode the encoded text containing special characters --- .../io/shp/ShapefileInstanceWriterTest.groovy | 70 ++++++++++++++----- .../hale/io/shp/ShapefileConstants.java | 6 ++ .../shp/writer/ShapefileInstanceWriter.java | 34 +++++++++ 3 files changed, 91 insertions(+), 19 deletions(-) diff --git a/io/plugins/eu.esdihumboldt.hale.io.shp.test/src/eu/esdihumboldt/hale/io/shp/ShapefileInstanceWriterTest.groovy b/io/plugins/eu.esdihumboldt.hale.io.shp.test/src/eu/esdihumboldt/hale/io/shp/ShapefileInstanceWriterTest.groovy index 9cfc8b9a8d..a3d1b3dd84 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.shp.test/src/eu/esdihumboldt/hale/io/shp/ShapefileInstanceWriterTest.groovy +++ b/io/plugins/eu.esdihumboldt.hale.io.shp.test/src/eu/esdihumboldt/hale/io/shp/ShapefileInstanceWriterTest.groovy @@ -25,6 +25,7 @@ import java.time.Instant import java.time.LocalDate import java.util.function.Consumer +import org.geotools.data.shapefile.ShapefileDataStore import org.junit.Test import org.locationtech.jts.geom.Coordinate import org.locationtech.jts.geom.Geometry @@ -100,7 +101,7 @@ class ShapefileInstanceWriterTest { String filenameOnly = Paths.get(location).getFileName().toString(); filenameOnly = filenameOnly.substring(0, filenameOnly.lastIndexOf(".")); - String filename = filePath + "/" + filenameOnly + "_" + additionalName + ".shp"; + String filename = filePath + "/" + filenameOnly + "_" + additionalName + ShapefileConstants.SHP_EXTENSION; file = new File(filename) Schema schema = loadSchema(file) @@ -114,10 +115,11 @@ class ShapefileInstanceWriterTest { assertTrue(report.isSuccess()) assertTrue(report.getErrors().isEmpty()) + testCpgFileAgainstRelatedShapefile(file) + return reader.getInstances(); } - /** * Write an instance collection to a Shapefile. */ @@ -183,12 +185,26 @@ class ShapefileInstanceWriterTest { try { println "Temporary file is $tmpFile" writeInstances(tmpFile.toFile(), schema, instances, configurator) + testCpgFileAgainstRelatedShapefile(tmpFile.toFile()) + handler.accept(tmpFile.toFile()) } finally { tmpDir.deleteDir() } } + private static testCpgFileAgainstRelatedShapefile(File shpFile) { + ShapefileInstanceWriter shapefileInstanceWriter = new ShapefileInstanceWriter() + String cpgFilePath = shpFile.getAbsolutePath().replace(ShapefileConstants.SHP_EXTENSION, ShapefileConstants.CPG_EXTENSION) + + def cpgFile = new File(cpgFilePath) + if (cpgFile.exists()) { + assertTrue(shapefileInstanceWriter.getCharset().toString().equals(cpgFile.text)) + } else { + println("File not found.") + } + } + //@CompileStatic static void withNewShapefileWithReporterErrors(Schema schema, InstanceCollection instances, Consumer handler, Consumer configurator = null) { @@ -198,6 +214,8 @@ class ShapefileInstanceWriterTest { try { println "Temporary file is $tmpFile" writeInstancesWithReporterErrors(tmpFile.toFile(), schema, instances, configurator) + testCpgFileAgainstRelatedShapefile(tmpFile.toFile()) + handler.accept(tmpFile.toFile()) } finally { tmpDir.deleteDir() @@ -240,6 +258,7 @@ class ShapefileInstanceWriterTest { num++ } } + // 593 instances were loaded assertEquals(593, num) } @@ -276,6 +295,7 @@ class ShapefileInstanceWriterTest { // load instances again and test def loaded = loadInstances(file) + ShapefileDataStore shapeFileDataStore = new ShapefileDataStore(file.toURL()) int num = 0 loaded.iterator().withCloseable { @@ -293,8 +313,10 @@ class ShapefileInstanceWriterTest { def jts = the_geom.geometry assert jts instanceof Point def name = inst.p.name.value() - assert name - switch (name) { + + String decodedName = new String(name.getBytes(shapeFileDataStore.getCharset())); + assert decodedName + switch (decodedName) { case 'Darmstadt': assert inst.p.population.value() == 158254 break @@ -302,7 +324,7 @@ class ShapefileInstanceWriterTest { assert inst.p.population.value() == 1471508 break default: - throw new IllegalStateException("Unexpected type $typeName") + throw new IllegalStateException("Unexpected type $decodedName") } } } @@ -349,6 +371,7 @@ class ShapefileInstanceWriterTest { // load instances again and test def loaded = loadInstances(file) + ShapefileDataStore shapeFileDataStore = new ShapefileDataStore(file.toURL()) int num = 0 loaded.iterator().withCloseable { @@ -368,8 +391,9 @@ class ShapefileInstanceWriterTest { assert jts instanceof MultiPolygon def name = inst.p.name.value() - assert name - switch (name) { + String decodedName = new String(name.getBytes(shapeFileDataStore.getCharset())); + assert decodedName + switch (decodedName) { case 'Darmstadt': assert inst.p.population.value() == 158254 break @@ -377,7 +401,7 @@ class ShapefileInstanceWriterTest { assert inst.p.population.value() == 1471508 break default: - throw new IllegalStateException("Unexpected type $typeName") + throw new IllegalStateException("Unexpected type $decodedName") } } } @@ -422,6 +446,7 @@ class ShapefileInstanceWriterTest { int num = 0 for (geom in geomNames) { def loaded = loadInstances(file, geom) + ShapefileDataStore shapeFileDataStore = new ShapefileDataStore(file.toURL()) loaded.iterator().withCloseable { while (it.hasNext()) { @@ -439,8 +464,9 @@ class ShapefileInstanceWriterTest { def jts = the_geom.geometry assert jts def name = inst.p.name.value() - assert name - switch (name) { + String decodedName = new String(name.getBytes(shapeFileDataStore.getCharset())); + assert decodedName + switch (decodedName) { case 'Darmstadt': assert inst.p.population.value() == 158254 break @@ -448,7 +474,7 @@ class ShapefileInstanceWriterTest { assert inst.p.population.value() == 1471508 break default: - throw new IllegalStateException("Unexpected type $typeName") + throw new IllegalStateException("Unexpected type $decodedName") } } } @@ -1158,6 +1184,7 @@ class ShapefileInstanceWriterTest { withNewShapefile(schema, instances) { file -> // load instances again and test def loaded = loadInstances(file) + ShapefileDataStore shapeFileDataStore = new ShapefileDataStore(file.toURL()) int num = 0 loaded.iterator().withCloseable { @@ -1178,8 +1205,9 @@ class ShapefileInstanceWriterTest { def jts = geom.geometry assert jts instanceof Point def name = inst.p.name.value() - assert name - switch (name) { + String decodedName = new String(name.getBytes(shapeFileDataStore.getCharset())); + assert decodedName + switch (decodedName) { case 'Darmstadt': assert inst.p.population.value() == 158254 break @@ -1187,7 +1215,7 @@ class ShapefileInstanceWriterTest { assert inst.p.population.value() == 1471508 break default: - throw new IllegalStateException("Unexpected type $typeName") + throw new IllegalStateException("Unexpected type $decodedName") } } } @@ -1339,6 +1367,7 @@ class ShapefileInstanceWriterTest { // load instances again and test def loaded = loadInstances(file) + ShapefileDataStore shapeFileDataStore = new ShapefileDataStore(file.toURL()) int num = 0 loaded.iterator().withCloseable { @@ -1358,8 +1387,9 @@ class ShapefileInstanceWriterTest { assert jts instanceof MultiPolygon def name = inst.p.name.value() - assert name - switch (name) { + String decodedName = new String(name.getBytes(shapeFileDataStore.getCharset())); + assert decodedName + switch (decodedName) { case 'Darmstadt': assert inst.p.po12.value() == 158254 break @@ -1367,7 +1397,7 @@ class ShapefileInstanceWriterTest { assert inst.p.po12.value() == 1471508 break default: - throw new IllegalStateException("Unexpected type $typeName") + throw new IllegalStateException("Unexpected type $decodedName") } } } @@ -1413,6 +1443,7 @@ class ShapefileInstanceWriterTest { // load instances again and test def loaded = loadInstances(file) + ShapefileDataStore shapeFileDataStore = new ShapefileDataStore(file.toURL()) int num = 0 loaded.iterator().withCloseable { @@ -1432,8 +1463,9 @@ class ShapefileInstanceWriterTest { assert jts instanceof MultiPolygon def name = inst.p.name.value() - assert name - switch (name) { + String decodedName = new String(name.getBytes(shapeFileDataStore.getCharset())); + assert decodedName + switch (decodedName) { case 'München': assert inst.p.po12.value() == 1471508 break diff --git a/io/plugins/eu.esdihumboldt.hale.io.shp/src/eu/esdihumboldt/hale/io/shp/ShapefileConstants.java b/io/plugins/eu.esdihumboldt.hale.io.shp/src/eu/esdihumboldt/hale/io/shp/ShapefileConstants.java index 18f7701281..bffeb96ab1 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.shp/src/eu/esdihumboldt/hale/io/shp/ShapefileConstants.java +++ b/io/plugins/eu.esdihumboldt.hale.io.shp/src/eu/esdihumboldt/hale/io/shp/ShapefileConstants.java @@ -81,6 +81,12 @@ public interface ShapefileConstants { * Constant for the shape file extension. */ public static final String SHP_EXTENSION = ".shp"; + + /** + * Constant for the CPG file extension. + */ + public static final String CPG_EXTENSION = ".cpg"; + /** * Constant for underscore. */ diff --git a/io/plugins/eu.esdihumboldt.hale.io.shp/src/eu/esdihumboldt/hale/io/shp/writer/ShapefileInstanceWriter.java b/io/plugins/eu.esdihumboldt.hale.io.shp/src/eu/esdihumboldt/hale/io/shp/writer/ShapefileInstanceWriter.java index ad6e86c954..e9297b5c9f 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.shp/src/eu/esdihumboldt/hale/io/shp/writer/ShapefileInstanceWriter.java +++ b/io/plugins/eu.esdihumboldt.hale.io.shp/src/eu/esdihumboldt/hale/io/shp/writer/ShapefileInstanceWriter.java @@ -16,6 +16,7 @@ package eu.esdihumboldt.hale.io.shp.writer; import java.io.File; +import java.io.FileWriter; import java.io.IOException; import java.io.Serializable; import java.net.URI; @@ -122,6 +123,11 @@ protected IOReport execute(ProgressIndicator progress, IOReporter reporter) setTarget(new MultiLocationOutputSupplier(uris)); } + for (String f : filesWritten) { + String cpgFileName = filePath + "/" + f + ShapefileConstants.CPG_EXTENSION; + writeCodePageFile(cpgFileName); + } + reporter.setSuccess(true); } catch (Exception e) { reporter.error(new IOMessageImpl(e.getMessage(), e)); @@ -455,6 +461,7 @@ private Map> createSchema(URI location, ShapefileDataStore newDataStore; newDataStore = (ShapefileDataStore) dataStoreFactory.createNewDataStore(params); + newDataStore.setCharset(getCharset()); newDataStore.createSchema(geometryEntry.getValue()); schemaDataStoreMap .computeIfAbsent(schemaEntry.getKey(), @@ -724,4 +731,31 @@ private List writeToFile( return filesWritten; } + /** + * Create the CPG file starting from the Shapefile + * + * @param cpgFilePath Path of the file to be written with just one line of + * the encoding + * @throws IOException exception in any. + */ + public void writeCodePageFile(String cpgFilePath) throws IOException { + File cpgFile = new File(cpgFilePath); + FileWriter fileWriter = new FileWriter(cpgFile); + + try { + fileWriter.write(getCharset() != null ? getCharset().toString() + : getDefaultCharset().toString()); + } catch (IOException e) { + throw new IOException("An error occurred while writing the CPG file: " + cpgFilePath + + " " + e.getMessage()); + } finally { + try { + fileWriter.close(); + } catch (IOException e) { + throw new IOException("An error occurred while trying to close the CPG file: " + + cpgFilePath + " " + e.getMessage()); + } + } + } + }