From 0ffd9ee40117f38c6dd4e56322e45a0512258948 Mon Sep 17 00:00:00 2001 From: Bart Maertens Date: Thu, 16 May 2024 10:46:28 +0200 Subject: [PATCH] additional metadata injection sample. #2237 --- .../src/main/samples/files/person-info-1.csv | 6 + .../src/main/samples/files/person-info-2.csv | 6 + .../process-files-with-mdi-template.hpl | 203 ++++++ .../process-files-with-mdi.hpl | 620 ++++++++++++++++++ 4 files changed, 835 insertions(+) create mode 100644 plugins/transforms/metainject/src/main/samples/files/person-info-1.csv create mode 100644 plugins/transforms/metainject/src/main/samples/files/person-info-2.csv create mode 100644 plugins/transforms/metainject/src/main/samples/metadata-injection/process-files-with-mdi-template.hpl create mode 100644 plugins/transforms/metainject/src/main/samples/metadata-injection/process-files-with-mdi.hpl diff --git a/plugins/transforms/metainject/src/main/samples/files/person-info-1.csv b/plugins/transforms/metainject/src/main/samples/files/person-info-1.csv new file mode 100644 index 00000000000..64e141c3ed6 --- /dev/null +++ b/plugins/transforms/metainject/src/main/samples/files/person-info-1.csv @@ -0,0 +1,6 @@ +First name,Last name,Address,Email,Country,Zip +Ralph,Padilla,"571-1430 Vulputate, Rd.",auctor.ullamcorper@icloud.edu,Chile,35528 +Colton,Cobb,3010 Ligula. Av.,eu.ligula.aenean@google.org,Austria,9351-5888 +Orlando,Sheppard,622-8030 Litora St.,egestas.a.dui@google.edu,Russian Federation,134388 +Fiona,Fleming,Ap #913-8067 Sociis Rd.,lacus.ut@icloud.net,Austria,28748 +Tatum,Chen,"P.O. Box 390, 3345 Eleifend Av.",et.pede.nunc@hotmail.org,Austria,28555 diff --git a/plugins/transforms/metainject/src/main/samples/files/person-info-2.csv b/plugins/transforms/metainject/src/main/samples/files/person-info-2.csv new file mode 100644 index 00000000000..e3c19659f53 --- /dev/null +++ b/plugins/transforms/metainject/src/main/samples/files/person-info-2.csv @@ -0,0 +1,6 @@ +Street Address 1,Country Name,Postal/Zip,Name Last,Name First,Email Address,Day Of Birth +Ap #253-2158 Iaculis Av.,South Africa,54760,Barry,Elton,nulla.semper.tellus@yahoo.couk,1993-08-13 +Ap #143-6408 Id Rd.,Netherlands,54655,Hodge,Gray,hendrerit.consectetuer.cursus@outlook.com,1984-06-10 +9730 Mi St.,Philippines,423468,Hicks,Zenaida,sit.amet@aol.net,1972-04-12 +Ap #335-4745 Eu Street,Nigeria,6011,Vargas,Pamela,nunc.ullamcorper.velit@yahoo.edu,2001-07-26 +Ap #669-7751 Placerat Avenue,Costa Rica,5424,Jennings,Rhoda,et.magnis@google.edu,1989-01-22 diff --git a/plugins/transforms/metainject/src/main/samples/metadata-injection/process-files-with-mdi-template.hpl b/plugins/transforms/metainject/src/main/samples/metadata-injection/process-files-with-mdi-template.hpl new file mode 100644 index 00000000000..14148e4a95e --- /dev/null +++ b/plugins/transforms/metainject/src/main/samples/metadata-injection/process-files-with-mdi-template.hpl @@ -0,0 +1,203 @@ + + + + + template-file-input + Y + + + + Normal + + + N + 1000 + 100 + - + 2024/05/07 09:54:50.069 + - + 2024/05/07 09:54:50.069 + + + + + + Text file input + Select values + Y + + + Select values + Text file output + Y + + + + Select values + SelectValues + + Y + + 1 + + none + + + + N + + + + 464 + 176 + + + + Text file input + TextFileInput2 + + Y + + 1 + + none + + + N + N + + + ; + " + N + +
Y
+ 1 +
N
+ 1 + N + 1 + N + 80 + 0 + Y + N + + N + N + + mixed + + + Y + + CSV + None + + + + + + 0 + N + N + + + N + + + + + + warning + + error + + line + Y + en_US + + + + + + + + + + + 256 + 176 + +
+ + Text file output + TextFileOutput + + Y + + 1 + + none + + + + ; + " + N + N +
Y
+
N
+ DOS + None + + + N + + Y + + file + N + Y + txt + N + N + N + N + N + N + + Y + N + N + + + + + + + 672 + 176 + +
+ + + +
diff --git a/plugins/transforms/metainject/src/main/samples/metadata-injection/process-files-with-mdi.hpl b/plugins/transforms/metainject/src/main/samples/metadata-injection/process-files-with-mdi.hpl new file mode 100644 index 00000000000..ee73976b8b6 --- /dev/null +++ b/plugins/transforms/metainject/src/main/samples/metadata-injection/process-files-with-mdi.hpl @@ -0,0 +1,620 @@ + + + + process-files-with-mdi + Y + + + + Normal + 0 + + + PRM_TYPE + 1 + + + + N + 1000 + 100 + - + 2024/05/07 09:56:06.810 + - + 2024/05/07 09:56:06.810 + + + + 210 + 136 + 15 + 250 + 231 + 200 + N + 250 + 231 + 200 + N + .AppleSystemUIFont + 13 + 170 + 118 + 52 + This sample pipelines reads person information from 2 files with different layouts. +The data is written to a unified file format through metadata injection. + +To run this sample, run this pipeline with the default PRM_TYPE value of 1, then run it again with 2 as the parameter value. +This will read ${PROJECT_HOME}/files/person-info-1.csv and ${PROJECT_HOME}/files/person-info-2.csv respectively. + +The unified output file is written to ${PROJECT_HOME}/output/unified-person-data.csv. + +To learn more about metadata injection: https://hop.apache.org//manual/latest/pipeline/metadata-injection.html + + 749 + + + + + Get file names + File Metadata + Y + + + File Metadata + ETL metadata injection + Y + + + File Metadata + keep filename + Y + + + keep filename + sort unique filename + Y + + + sort unique filename + ETL metadata injection + Y + + + File Metadata + rename fields + Y + + + rename fields + keep fields + Y + + + keep fields + field position + Y + + + field position + sort fields + Y + + + sort fields + ETL metadata injection + Y + + + + ETL metadata injection + MetaInject + + Y + + 1 + + none + + + ${PROJECT_HOME}/metadata-injection/process-files-with-mdi-template.hpl + local + + + + Y + N + + + + + Text file input + FIELD_PRECISION + Y + File Metadata + precision + + + Text file input + SEPARATOR + N + + , + + + Text file input + FIELD_FORMAT + Y + File Metadata + mask + + + Text file input + FIELD_GROUP + Y + File Metadata + grouping_symbol + + + Text file output + OUTPUT_TRIM + Y + + both + + + Text file input + FIELD_DECIMAL + Y + File Metadata + decimal_symbol + + + Text file input + FIELD_LENGTH + Y + File Metadata + length + + + Text file input + ENCLOSURE + N + + " + + + Text file output + FILENAME + N + + ${PROJECT_HOME}/output/unified-person-data + + + Select values + FIELD_NAME + Y + sort fields + name + + + Text file input + ENCODING + N + + UTF-8 + + + Select values + FIELD_RENAME + Y + sort fields + name_new + + + Text file input + FIELD_TYPE + Y + File Metadata + type_1 + + + Text file output + OUTPUT_FIELDNAME + Y + sort fields + name_new + + + Text file output + EXTENSION + N + + csv + + + Text file input + FILE_TYPE + N + + CSV + + + Text file output + OUTPUT_TYPE + Y + sort fields + type_1 + + + Text file input + FILE_FORMAT + N + + mixed + + + Text file input + HEADER_PRESENT + N + + Y + + + Text file output + SEPARATOR + N + + , + + + Text file input + FILENAME + Y + sort unique filename + filename + + + Text file output + ENCLOSURE + N + + " + + + Text file output + APPEND + N + + Y + + + Text file input + FIELD_NAME + Y + File Metadata + name + + + Text file input + NR_HEADER_LINES + N + + 1 + + + + + 928 + 352 + + + + File Metadata + FileMetadataPlugin + + N + + 1 + + none + + + ISO-8859-1 + + + + + ; + + + , + + + " + + + ' + + filename + Y + 10000 + + + 288 + 352 + + + + Get file names + GetFileNames + + Y + + 1 + + none + + + N + N + + N + person-info-${PRM_TYPE}.csv + N + ${PROJECT_HOME}/files + + N + + all_files + + Y + 0 + N + N + + + 128 + 352 + + + + field position + ValueMapper + + Y + + 1 + + none + + + name_new + + + first_name + 1 + + + last_name + 2 + + + primary_email + 3 + + + address_street + 4 + + + address_zip + 5 + + + address_country + 6 + + + field_position + + + 720 + 464 + + + + keep fields + FilterRows + + Y + + 1 + + none + + + + + + + IN LIST + name_new + N + - + + N + -1 + constant + -1 + first_name;last_name;address_stress;address_zip;address_country;primary_email + String + + + + + + 560 + 464 + + + + keep filename + SelectValues + + Y + + 1 + + none + + + + + filename + + N + + + + 400 + 256 + + + + rename fields + ValueMapper + + Y + + 1 + + none + + + name + + + First name + first_name + + + Last name + last_name + + + Name First + first_name + + + Name Last + last_name + + + Address + address_street + + + Street Address 1 + address_street + + + Country + address_country + + + Country Name + address_country + + + Zip + address_zip + + + Postal/Zip + address_zip + + + Email + primary_email + + + Email Address + primary_email + + + Day Of Birth + dob + + + name_new + + + 400 + 464 + + + + sort fields + SortRows + + Y + + 1 + + none + + + ${java.io.tmpdir} + out + 1000000 + + N + + N + + + field_position + Y + N + N + 0 + N + + + + + 848 + 464 + + + + sort unique filename + SortRows + + Y + + 1 + + none + + + ${java.io.tmpdir} + out + 1000000 + + N + + Y + + + + + 848 + 256 + + + + + +