diff --git a/.cache/plugin/social/assets/images/social/file/apache_avro/apache_hudi.png b/.cache/plugin/social/assets/images/social/file/apache_avro/apache_hudi.png new file mode 100644 index 0000000..33c3e1e Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_avro/apache_hudi.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_avro/apache_iceberg.png b/.cache/plugin/social/assets/images/social/file/apache_avro/apache_iceberg.png new file mode 100644 index 0000000..1b62fc1 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_avro/apache_iceberg.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_avro/apache_orc.png b/.cache/plugin/social/assets/images/social/file/apache_avro/apache_orc.png new file mode 100644 index 0000000..75d8a14 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_avro/apache_orc.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_avro/apache_parquet.png b/.cache/plugin/social/assets/images/social/file/apache_avro/apache_parquet.png new file mode 100644 index 0000000..9773c8c Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_avro/apache_parquet.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_avro/csv.png b/.cache/plugin/social/assets/images/social/file/apache_avro/csv.png new file mode 100644 index 0000000..e3226f9 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_avro/csv.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_avro/delta_lake.png b/.cache/plugin/social/assets/images/social/file/apache_avro/delta_lake.png new file mode 100644 index 0000000..f57576a Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_avro/delta_lake.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_hudi/apache_avro.png b/.cache/plugin/social/assets/images/social/file/apache_hudi/apache_avro.png new file mode 100644 index 0000000..7939499 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_hudi/apache_avro.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_hudi/apache_iceberg.png b/.cache/plugin/social/assets/images/social/file/apache_hudi/apache_iceberg.png new file mode 100644 index 0000000..3414966 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_hudi/apache_iceberg.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_hudi/apache_orc.png b/.cache/plugin/social/assets/images/social/file/apache_hudi/apache_orc.png new file mode 100644 index 0000000..cc884b6 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_hudi/apache_orc.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_hudi/apache_parquet.png b/.cache/plugin/social/assets/images/social/file/apache_hudi/apache_parquet.png new file mode 100644 index 0000000..c36bb1b Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_hudi/apache_parquet.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_hudi/csv.png b/.cache/plugin/social/assets/images/social/file/apache_hudi/csv.png new file mode 100644 index 0000000..98e0e88 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_hudi/csv.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_hudi/delta_lake.png b/.cache/plugin/social/assets/images/social/file/apache_hudi/delta_lake.png new file mode 100644 index 0000000..a1b0955 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_hudi/delta_lake.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_iceberg/apache_avro.png b/.cache/plugin/social/assets/images/social/file/apache_iceberg/apache_avro.png new file mode 100644 index 0000000..c40f093 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_iceberg/apache_avro.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_iceberg/apache_hudi.png b/.cache/plugin/social/assets/images/social/file/apache_iceberg/apache_hudi.png new file mode 100644 index 0000000..ff5318e Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_iceberg/apache_hudi.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_iceberg/apache_orc.png b/.cache/plugin/social/assets/images/social/file/apache_iceberg/apache_orc.png new file mode 100644 index 0000000..0492293 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_iceberg/apache_orc.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_iceberg/apache_parquet.png b/.cache/plugin/social/assets/images/social/file/apache_iceberg/apache_parquet.png new file mode 100644 index 0000000..9517168 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_iceberg/apache_parquet.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_iceberg/csv.png b/.cache/plugin/social/assets/images/social/file/apache_iceberg/csv.png new file mode 100644 index 0000000..72aa261 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_iceberg/csv.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_iceberg/delta_lake.png b/.cache/plugin/social/assets/images/social/file/apache_iceberg/delta_lake.png new file mode 100644 index 0000000..8adf4fc Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_iceberg/delta_lake.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_orc/apache_avro.png b/.cache/plugin/social/assets/images/social/file/apache_orc/apache_avro.png new file mode 100644 index 0000000..ad37e0a Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_orc/apache_avro.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_orc/apache_hudi.png b/.cache/plugin/social/assets/images/social/file/apache_orc/apache_hudi.png new file mode 100644 index 0000000..4de808e Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_orc/apache_hudi.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_orc/apache_iceberg.png b/.cache/plugin/social/assets/images/social/file/apache_orc/apache_iceberg.png new file mode 100644 index 0000000..9836eeb Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_orc/apache_iceberg.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_orc/apache_parquet.png b/.cache/plugin/social/assets/images/social/file/apache_orc/apache_parquet.png new file mode 100644 index 0000000..b7313d2 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_orc/apache_parquet.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_orc/csv.png b/.cache/plugin/social/assets/images/social/file/apache_orc/csv.png new file mode 100644 index 0000000..168c3f4 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_orc/csv.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_orc/delta_lake.png b/.cache/plugin/social/assets/images/social/file/apache_orc/delta_lake.png new file mode 100644 index 0000000..62154f5 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_orc/delta_lake.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_parquet/apache_avro.png b/.cache/plugin/social/assets/images/social/file/apache_parquet/apache_avro.png new file mode 100644 index 0000000..1040054 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_parquet/apache_avro.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_parquet/apache_hudi.png b/.cache/plugin/social/assets/images/social/file/apache_parquet/apache_hudi.png new file mode 100644 index 0000000..6ad4937 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_parquet/apache_hudi.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_parquet/apache_iceberg.png b/.cache/plugin/social/assets/images/social/file/apache_parquet/apache_iceberg.png new file mode 100644 index 0000000..b2113ad Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_parquet/apache_iceberg.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_parquet/apache_orc.png b/.cache/plugin/social/assets/images/social/file/apache_parquet/apache_orc.png new file mode 100644 index 0000000..146f9f3 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_parquet/apache_orc.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_parquet/csv.png b/.cache/plugin/social/assets/images/social/file/apache_parquet/csv.png new file mode 100644 index 0000000..7700278 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_parquet/csv.png differ diff --git a/.cache/plugin/social/assets/images/social/file/apache_parquet/delta_lake.png b/.cache/plugin/social/assets/images/social/file/apache_parquet/delta_lake.png new file mode 100644 index 0000000..29fae6e Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/apache_parquet/delta_lake.png differ diff --git a/.cache/plugin/social/assets/images/social/file/csv/apache_avro.png b/.cache/plugin/social/assets/images/social/file/csv/apache_avro.png new file mode 100644 index 0000000..c8118d7 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/csv/apache_avro.png differ diff --git a/.cache/plugin/social/assets/images/social/file/csv/apache_hudi.png b/.cache/plugin/social/assets/images/social/file/csv/apache_hudi.png new file mode 100644 index 0000000..be5751c Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/csv/apache_hudi.png differ diff --git a/.cache/plugin/social/assets/images/social/file/csv/apache_iceberg.png b/.cache/plugin/social/assets/images/social/file/csv/apache_iceberg.png new file mode 100644 index 0000000..1fb0d49 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/csv/apache_iceberg.png differ diff --git a/.cache/plugin/social/assets/images/social/file/csv/apache_orc.png b/.cache/plugin/social/assets/images/social/file/csv/apache_orc.png new file mode 100644 index 0000000..e08051f Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/csv/apache_orc.png differ diff --git a/.cache/plugin/social/assets/images/social/file/csv/apache_parquet.png b/.cache/plugin/social/assets/images/social/file/csv/apache_parquet.png new file mode 100644 index 0000000..5a57461 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/csv/apache_parquet.png differ diff --git a/.cache/plugin/social/assets/images/social/file/csv/delta_lake.png b/.cache/plugin/social/assets/images/social/file/csv/delta_lake.png new file mode 100644 index 0000000..9dbbc4b Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/csv/delta_lake.png differ diff --git a/.cache/plugin/social/assets/images/social/file/delta_lake/apache_avro.png b/.cache/plugin/social/assets/images/social/file/delta_lake/apache_avro.png new file mode 100644 index 0000000..4dd861f Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/delta_lake/apache_avro.png differ diff --git a/.cache/plugin/social/assets/images/social/file/delta_lake/apache_hudi.png b/.cache/plugin/social/assets/images/social/file/delta_lake/apache_hudi.png new file mode 100644 index 0000000..503506f Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/delta_lake/apache_hudi.png differ diff --git a/.cache/plugin/social/assets/images/social/file/delta_lake/apache_iceberg.png b/.cache/plugin/social/assets/images/social/file/delta_lake/apache_iceberg.png new file mode 100644 index 0000000..c614303 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/delta_lake/apache_iceberg.png differ diff --git a/.cache/plugin/social/assets/images/social/file/delta_lake/apache_orc.png b/.cache/plugin/social/assets/images/social/file/delta_lake/apache_orc.png new file mode 100644 index 0000000..966d829 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/delta_lake/apache_orc.png differ diff --git a/.cache/plugin/social/assets/images/social/file/delta_lake/apache_parquet.png b/.cache/plugin/social/assets/images/social/file/delta_lake/apache_parquet.png new file mode 100644 index 0000000..04ed40d Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/delta_lake/apache_parquet.png differ diff --git a/.cache/plugin/social/assets/images/social/file/delta_lake/csv.png b/.cache/plugin/social/assets/images/social/file/delta_lake/csv.png new file mode 100644 index 0000000..25f82f5 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/file/delta_lake/csv.png differ diff --git a/.cache/plugin/social/assets/images/social/job_orchestration/apache_airflow/dagster.png b/.cache/plugin/social/assets/images/social/job_orchestration/apache_airflow/dagster.png new file mode 100644 index 0000000..283da72 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/job_orchestration/apache_airflow/dagster.png differ diff --git a/.cache/plugin/social/assets/images/social/job_orchestration/apache_airflow/mage.png b/.cache/plugin/social/assets/images/social/job_orchestration/apache_airflow/mage.png new file mode 100644 index 0000000..b8bcc53 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/job_orchestration/apache_airflow/mage.png differ diff --git a/.cache/plugin/social/assets/images/social/job_orchestration/apache_airflow/prefect.png b/.cache/plugin/social/assets/images/social/job_orchestration/apache_airflow/prefect.png new file mode 100644 index 0000000..cd62d86 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/job_orchestration/apache_airflow/prefect.png differ diff --git a/.cache/plugin/social/assets/images/social/job_orchestration/dagster/apache_airflow.png b/.cache/plugin/social/assets/images/social/job_orchestration/dagster/apache_airflow.png new file mode 100644 index 0000000..969dc40 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/job_orchestration/dagster/apache_airflow.png differ diff --git a/.cache/plugin/social/assets/images/social/job_orchestration/dagster/mage.png b/.cache/plugin/social/assets/images/social/job_orchestration/dagster/mage.png new file mode 100644 index 0000000..6119920 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/job_orchestration/dagster/mage.png differ diff --git a/.cache/plugin/social/assets/images/social/job_orchestration/dagster/prefect.png b/.cache/plugin/social/assets/images/social/job_orchestration/dagster/prefect.png new file mode 100644 index 0000000..7b1832b Binary files /dev/null and b/.cache/plugin/social/assets/images/social/job_orchestration/dagster/prefect.png differ diff --git a/.cache/plugin/social/assets/images/social/job_orchestration/mage/apache_airflow.png b/.cache/plugin/social/assets/images/social/job_orchestration/mage/apache_airflow.png new file mode 100644 index 0000000..a32d84e Binary files /dev/null and b/.cache/plugin/social/assets/images/social/job_orchestration/mage/apache_airflow.png differ diff --git a/.cache/plugin/social/assets/images/social/job_orchestration/mage/dagster.png b/.cache/plugin/social/assets/images/social/job_orchestration/mage/dagster.png new file mode 100644 index 0000000..dcae96a Binary files /dev/null and b/.cache/plugin/social/assets/images/social/job_orchestration/mage/dagster.png differ diff --git a/.cache/plugin/social/assets/images/social/job_orchestration/mage/prefect.png b/.cache/plugin/social/assets/images/social/job_orchestration/mage/prefect.png new file mode 100644 index 0000000..478d169 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/job_orchestration/mage/prefect.png differ diff --git a/.cache/plugin/social/assets/images/social/job_orchestration/prefect/apache_airflow.png b/.cache/plugin/social/assets/images/social/job_orchestration/prefect/apache_airflow.png new file mode 100644 index 0000000..efda902 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/job_orchestration/prefect/apache_airflow.png differ diff --git a/.cache/plugin/social/assets/images/social/job_orchestration/prefect/dagster.png b/.cache/plugin/social/assets/images/social/job_orchestration/prefect/dagster.png new file mode 100644 index 0000000..2e0d59e Binary files /dev/null and b/.cache/plugin/social/assets/images/social/job_orchestration/prefect/dagster.png differ diff --git a/.cache/plugin/social/assets/images/social/job_orchestration/prefect/mage.png b/.cache/plugin/social/assets/images/social/job_orchestration/prefect/mage.png new file mode 100644 index 0000000..68065f6 Binary files /dev/null and b/.cache/plugin/social/assets/images/social/job_orchestration/prefect/mage.png differ diff --git a/.cache/plugin/social/manifest.json b/.cache/plugin/social/manifest.json index 47ffbfd..bacc514 100644 --- a/.cache/plugin/social/manifest.json +++ b/.cache/plugin/social/manifest.json @@ -2,5 +2,59 @@ "assets/images/social/index.png": "6f6b3add99756489361e006c0d2b9d5df173e51f", "assets/images/social/database/index.png": "8ee6be82cdcfdfff26456f23cf60be94de7debe8", "assets/images/social/file/index.png": "4e4a5927aa4ec4db27f94f53488933563c4e1480", - "assets/images/social/job_orchestration/index.png": "3116dff397b6c723639a3323379b83a3e547c6a8" + "assets/images/social/job_orchestration/index.png": "3116dff397b6c723639a3323379b83a3e547c6a8", + "assets/images/social/file/apache_avro/delta_lake.png": "427d66a5a4639458b3b831057f9a37065f556883", + "assets/images/social/file/apache_avro/apache_hudi.png": "cabba053648ba0bf4958f83601ed8ab7b66acf97", + "assets/images/social/file/apache_avro/apache_parquet.png": "d73b2c1870faf7f44cf9624ce5901a9a8d260a21", + "assets/images/social/file/apache_avro/apache_orc.png": "276930327a4bf690a7fde80eb46a871f1a968183", + "assets/images/social/file/apache_hudi/apache_avro.png": "0c9f1c669aa68e503c8688073d92afa3593cb432", + "assets/images/social/file/apache_avro/csv.png": "56a7b1a365c53fe7fb77a3dfe47a3d3c6fab2b7f", + "assets/images/social/file/apache_hudi/apache_iceberg.png": "9dedd53ac01f7cedd3ce57ba3ff9a4835b64dfde", + "assets/images/social/file/apache_hudi/csv.png": "f72543a1b8404e5428af61b0936fcf62452f426e", + "assets/images/social/file/apache_hudi/apache_parquet.png": "0d1be4ed23ee6ac765cb5f8daa66dcbf0ed39a43", + "assets/images/social/file/apache_hudi/delta_lake.png": "33a3430b5ce2791aafa7f86437af307633fe8f29", + "assets/images/social/file/apache_hudi/apache_orc.png": "a4820d434512952adc3c8cb18ada567fda918d20", + "assets/images/social/file/apache_iceberg/apache_avro.png": "3e03525abd3cca31a90941fbfc5b78cbcabc8614", + "assets/images/social/file/apache_iceberg/delta_lake.png": "1ccc6d4b6e31dc2cc75e28fe067172f9bc6b519f", + "assets/images/social/file/apache_iceberg/csv.png": "49a2f56b645fd74d2a8648afe94503332aa2827a", + "assets/images/social/file/apache_iceberg/apache_orc.png": "cd1fee6760ca971432b3e74c3196c5990c589dde", + "assets/images/social/file/apache_iceberg/apache_hudi.png": "a1fded2f19ebaea1c0e866687f70cdc60fbb370b", + "assets/images/social/file/apache_orc/apache_avro.png": "ac7be2402c16542962a38130d91f1b320f2079e1", + "assets/images/social/file/apache_iceberg/apache_parquet.png": "60288bd975fd7beb9cb89d6c21cff7330bb46efd", + "assets/images/social/file/apache_orc/apache_hudi.png": "e1d8ca7d66079db7a529345847522faee3e10dd4", + "assets/images/social/file/apache_orc/apache_parquet.png": "83b3e82df3d6b3c1336892cfadbf669430891568", + "assets/images/social/file/apache_orc/apache_iceberg.png": "1bd32650d514cb42dc8ed2a1477d6ab30ae1a5d4", + "assets/images/social/file/apache_orc/csv.png": "b5485c33a3cf8e0556158f0e0aa902584579f689", + "assets/images/social/file/apache_parquet/apache_avro.png": "0361ce4f3b7d3ebb909c0acd550660a679e7ceca", + "assets/images/social/file/apache_orc/delta_lake.png": "24d6a09f0ad9d508934a499ac2634bfaa6e48918", + "assets/images/social/file/apache_parquet/apache_hudi.png": "fc52722e3fe82486418a199478f50f892261dd0a", + "assets/images/social/file/apache_parquet/apache_orc.png": "dc25a4ce88e652fab0753c6207bf05744d060358", + "assets/images/social/file/apache_parquet/apache_iceberg.png": "fc83a3aa24ef3e6f08c58b2b321289cbc766d721", + "assets/images/social/file/apache_parquet/csv.png": "9c6c2048273227f7b35767dac79c2da86360d155", + "assets/images/social/file/csv/apache_avro.png": "9ff1d97ecfcad5d7a576a2c7a8365e8ad45ac342", + "assets/images/social/file/apache_parquet/delta_lake.png": "568a564dbe44352d7ec4f8b9a49968aed9ad1061", + "assets/images/social/file/csv/apache_hudi.png": "7ed128913330e3bf31a41a83ce91cb64a3d4ae64", + "assets/images/social/file/csv/apache_iceberg.png": "d30b4e98f78f28e9b14b64d95b171b0c46d306de", + "assets/images/social/file/csv/apache_orc.png": "b781e50566ee0a16c14d70005eb3fd9ab90c14a6", + "assets/images/social/file/csv/delta_lake.png": "c6e2fe1052f67f77c6967413137ec17324f2b9f8", + "assets/images/social/file/delta_lake/apache_avro.png": "bb918698cb55d24028921e1b32426c6d878699f4", + "assets/images/social/file/csv/apache_parquet.png": "af4eedacac5494f882d26a22aa4e3e1a22374a06", + "assets/images/social/file/delta_lake/apache_hudi.png": "e114a58bcb9fb80ae0e512dae72842cdf5f25edb", + "assets/images/social/file/delta_lake/apache_iceberg.png": "ddaf87de3152fd90f79c4d73be5bc09816c2fbb6", + "assets/images/social/file/delta_lake/csv.png": "fafff08c4c1d868abeaefa016cf5771986c1948c", + "assets/images/social/file/delta_lake/apache_orc.png": "a7468941d9f17420669bf872ec2d35620351f84c", + "assets/images/social/file/delta_lake/apache_parquet.png": "9f522dffbad9252092eefa75e1dac985ce084859", + "assets/images/social/job_orchestration/apache_airflow/mage.png": "bffce30fc7f0b8d60aff6f78805e879fe27b8407", + "assets/images/social/job_orchestration/apache_airflow/dagster.png": "44f87a006f73ca7a2a7a885a10f7d5bece6266f2", + "assets/images/social/job_orchestration/apache_airflow/prefect.png": "7014808abfd542cbf7d6a355b5f8fa45bbb964d8", + "assets/images/social/job_orchestration/dagster/mage.png": "7149d11fa3a7be835057644875b9857b1e21a2a8", + "assets/images/social/job_orchestration/dagster/prefect.png": "2f209e18725ce61922c4ec961c53d71c36a9f364", + "assets/images/social/job_orchestration/dagster/apache_airflow.png": "5e002c9e6741ff086547e2d8a236f208cb9e503a", + "assets/images/social/job_orchestration/mage/apache_airflow.png": "e09935472191bde1e0a9d6358f546665516860b1", + "assets/images/social/job_orchestration/mage/dagster.png": "305e81ba0739b4b5ad99c69e1daf416d3a62e260", + "assets/images/social/job_orchestration/mage/prefect.png": "86c42e2183b1422e2bdedc1fb90650aebbefc19c", + "assets/images/social/job_orchestration/prefect/apache_airflow.png": "f43fcc5bb0104e40e216aa86454e80579b66d7f7", + "assets/images/social/job_orchestration/prefect/mage.png": "04f7e8aec3dd2bcdf11490b3af5b645e9b12f43e", + "assets/images/social/job_orchestration/prefect/dagster.png": "1192837fffdecc9b08a4183db11ceae8fd9a0072", + "assets/images/social/file/apache_avro/apache_iceberg.png": "84cfbff4049b1f515de79f20c563614c070cdde0" } \ No newline at end of file diff --git a/build.gradle b/build.gradle index 2275700..4b43bdb 100644 --- a/build.gradle +++ b/build.gradle @@ -26,21 +26,24 @@ tasks.register("generate") { a.name <=> b.name } - List techTypes = List.of("file") LoaderOptions loaderOptions = new LoaderOptions() Yaml yaml = new Yaml(loaderOptions) var workingDir = "${rootProject.projectDir.absolutePath}" freemarker.template.Configuration cfg = new freemarker.template.Configuration(freemarker.template.Configuration.VERSION_2_3_32) cfg.setDirectoryForTemplateLoading(new File("$workingDir/tech/templates")) cfg.setDefaultEncoding("UTF-8") - var template = cfg.getTemplate("template-index.ftl") + var baseTechTemplate = cfg.getTemplate("template-index.ftl") + //update mkdocs file by getting the 'nav:' section and writing the updated site structure + var baseNavSection = "nav:\n - Home: 'index.md'\n" new File("$workingDir/tech").eachFileRecurse(groovy.io.FileType.DIRECTORIES) { dir -> if (dir.name != "templates") { var tech = dir.name + var cleanTechName = tech.replace("_", " ").capitalize() var baseDir = new File("tech/$tech") var root = new HashMap() var innerTechTypes = new ArrayList() + baseNavSection += " - $cleanTechName:\n - '$tech/index.md'\n" baseDir.traverse(type: FILES, sort: sortByName) { file -> var parsedYaml = yaml.load(file.getText()) @@ -50,26 +53,56 @@ tasks.register("generate") { } root.put("high_level_tech_name", tech) root.put("tech_types", innerTechTypes) - var outFile = new File("$workingDir/docs/$tech/index.md") - if (!outFile.parentFile.exists()) { - outFile.parentFile.mkdirs() + + // main file with all tech comparisons + var baseTechOutFile = new File("$workingDir/docs/$tech/index.md") + if (!baseTechOutFile.parentFile.exists()) { + baseTechOutFile.parentFile.mkdirs() } - if (!outFile.exists()) { - outFile.createNewFile() + if (!baseTechOutFile.exists()) { + baseTechOutFile.createNewFile() } - var out = new FileWriter(outFile) - template.process(root, out) + var out = new FileWriter(baseTechOutFile) + baseTechTemplate.process(root, out) out.close() - var mkdocsFile = new File("$workingDir/mkdocs.yml") - if (!mkdocsFile.text.contains("'$tech/index.md'")) { - var cleanTechName = tech.replace("_", " ").capitalize() - var updatedMkdocs = mkdocsFile.text.concat("\n - $cleanTechName: '$tech/index.md'") - var mkdocsOut = new FileWriter(mkdocsFile) - mkdocsOut.write(updatedMkdocs) - mkdocsOut.close() - } + // sub files with comparison with each pair + innerTechTypes.forEach(techType -> { + baseNavSection += " - $techType:\n" + innerTechTypes.forEach(techType2 -> { + var techTypeName = techType.toLowerCase().replaceAll(" ", "_") + var techType2Name = techType2.toLowerCase().replaceAll(" ", "_") + if (techTypeName != techType2Name) { + baseNavSection += " - $techType2: '$tech/$techTypeName/${techType2Name}.md'\n" + + var compareRoot = new HashMap() + compareRoot.put("compare_two_tech", true) + compareRoot.put("high_level_tech_name", tech) + compareRoot.put("tech_types", [techType, techType2]) + compareRoot.put(techType, root[techType]) + compareRoot.put(techType2, root[techType2]) + + var compareTwoOutFile = new File("$workingDir/docs/$tech/$techTypeName/${techType2Name}.md") + if (!compareTwoOutFile.parentFile.exists()) { + compareTwoOutFile.parentFile.mkdirs() + } + if (!compareTwoOutFile.exists()) { + compareTwoOutFile.createNewFile() + } + var compareOut = new FileWriter(compareTwoOutFile) + baseTechTemplate.process(compareRoot, compareOut) + compareOut.close() + } + }) + }) } } + + //update mkdocs file nav section with baseNavSection that will have updated site structure + var mkdocsFile = new File("$workingDir/mkdocs.yml") + var updatedMkdocs = mkdocsFile.text.replaceAll(/(?ms)nav:.*/, baseNavSection) + var mkdocsOut = new FileWriter(mkdocsFile) + mkdocsOut.write(updatedMkdocs) + mkdocsOut.close() } } \ No newline at end of file diff --git a/docs/file/apache_avro/apache_hudi.md b/docs/file/apache_avro/apache_hudi.md new file mode 100644 index 0000000..b1797bc --- /dev/null +++ b/docs/file/apache_avro/apache_hudi.md @@ -0,0 +1,188 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Avro vs Apache Hudi." +--- +# File + +[![Apache Avro logo](../../assets/logo/avro.png){: style="height:30px;width:30px" .lg align-left } Apache Avro](){ .md-button .toggle-vis data-column="1" } +[![Apache Hudi logo](../../assets/logo/hudi.png){: style="height:30px;width:30px" .lg align-left } Apache Hudi](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache AvroApache Hudi
NameApache AvroApache Hudi
DescriptionApache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.Apache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/avrohttps://github.com/apache/hudi
Websitehttps://avro.apache.org/https://hudi.apache.org/
Year created20092016
CompanyApacheUber
Language supportjava, c++, c#, c, python, javascript, perl, ruby, php, rust
Use casesStream processing, Analytics, Efficient data exchangeIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactions
Is human readable +
no
+
+
no
+
Orientationrowcolumn or row
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
yes
+
Has acid support +
no
+
+
yes
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
no
+
+
maybe
+
Data processing framework support + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Spark, + Apache Flink, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, +
diff --git a/docs/file/apache_avro/apache_iceberg.md b/docs/file/apache_avro/apache_iceberg.md new file mode 100644 index 0000000..8a52f3a --- /dev/null +++ b/docs/file/apache_avro/apache_iceberg.md @@ -0,0 +1,194 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Avro vs Apache Iceberg." +--- +# File + +[![Apache Avro logo](../../assets/logo/avro.png){: style="height:30px;width:30px" .lg align-left } Apache Avro](){ .md-button .toggle-vis data-column="1" } +[![Apache Iceberg logo](../../assets/logo/iceberg.png){: style="height:30px;width:30px" .lg align-left } Apache Iceberg](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache AvroApache Iceberg
NameApache AvroApache Iceberg
DescriptionApache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.Iceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/avrohttps://github.com/apache/iceberg
Websitehttps://avro.apache.org/https://iceberg.apache.org/
Year created20092017
CompanyApacheNetflix
Language supportjava, c++, c#, c, python, javascript, perl, ruby, php, rust
Use casesStream processing, Analytics, Efficient data exchangeWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
no
+
+
no
+
Orientationrowcolumn or row
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
no
+
Has acid support +
no
+
+
yes
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
no
+
+
maybe
+
Data processing framework support + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, +
diff --git a/docs/file/apache_avro/apache_orc.md b/docs/file/apache_avro/apache_orc.md new file mode 100644 index 0000000..9791fa8 --- /dev/null +++ b/docs/file/apache_avro/apache_orc.md @@ -0,0 +1,195 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Avro vs Apache ORC." +--- +# File + +[![Apache Avro logo](../../assets/logo/avro.png){: style="height:30px;width:30px" .lg align-left } Apache Avro](){ .md-button .toggle-vis data-column="1" } +[![Apache ORC logo](../../assets/logo/orc.png){: style="height:30px;width:30px" .lg align-left } Apache ORC](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache AvroApache ORC
NameApache AvroApache ORC
DescriptionApache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.ORC is a self-describing type-aware columnar file format designed for Hadoop workloads.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/avrohttps://github.com/apache/orc
Websitehttps://avro.apache.org/https://orc.apache.org/
Year created20092013
CompanyApacheHortonworks, Facebook
Language supportjava, c++, c#, c, python, javascript, perl, ruby, php, rustjava, scala, c++, python
Use casesStream processing, Analytics, Efficient data exchangeWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
no
+
+
no
+
Orientationrowrow
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
no
+
Has acid support +
no
+
+
no
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
no
+
+
yes
+
Data processing framework support + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, +
diff --git a/docs/file/apache_avro/apache_parquet.md b/docs/file/apache_avro/apache_parquet.md new file mode 100644 index 0000000..cda43ff --- /dev/null +++ b/docs/file/apache_avro/apache_parquet.md @@ -0,0 +1,194 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Avro vs Apache Parquet." +--- +# File + +[![Apache Avro logo](../../assets/logo/avro.png){: style="height:30px;width:30px" .lg align-left } Apache Avro](){ .md-button .toggle-vis data-column="1" } +[![Apache Parquet logo](../../assets/logo/parquet.svg){: style="height:30px;width:30px" .lg align-left } Apache Parquet](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache AvroApache Parquet
NameApache AvroApache Parquet
DescriptionApache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.Apache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/avrohttps://github.com/apache/parquet-format
Websitehttps://avro.apache.org/https://parquet.apache.org/
Year created20092013
CompanyApacheTwitter, Cloudera
Language supportjava, c++, c#, c, python, javascript, perl, ruby, php, rustjava, scala, c++, python, r, php
Use casesStream processing, Analytics, Efficient data exchangeWrite once read many, Analytics, Efficient storage, Column based queries
Is human readable +
no
+
+
no
+
Orientationrowcolumn
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
no
+
Has acid support +
no
+
+
no
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
no
+
+
yes
+
Data processing framework support + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
diff --git a/docs/file/apache_avro/csv.md b/docs/file/apache_avro/csv.md new file mode 100644 index 0000000..56ad26e --- /dev/null +++ b/docs/file/apache_avro/csv.md @@ -0,0 +1,197 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Avro vs CSV." +--- +# File + +[![Apache Avro logo](../../assets/logo/avro.png){: style="height:30px;width:30px" .lg align-left } Apache Avro](){ .md-button .toggle-vis data-column="1" } +[![CSV logo](../../assets/logo/csv.png){: style="height:30px;width:30px" .lg align-left } CSV](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache AvroCSV
NameApache AvroCSV
DescriptionApache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.Comma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.
LicenseApache license 2.0N/A
Source codehttps://github.com/apache/avro
Websitehttps://avro.apache.org/https://www.rfc-editor.org/rfc/rfc4180.html
Year created20090
CompanyApache
Language supportjava, c++, c#, c, python, javascript, perl, ruby, php, rustjava, scala, c++, python, r, php, go
Use casesStream processing, Analytics, Efficient data exchange
Is human readable +
no
+
+
yes
+
Orientationrowrow
Has type system +
yes
+
+
no
+
Has nested structure support +
yes
+
+
no
+
Has native compression +
yes
+
+
no
+
Has encoding support +
yes
+
+
no
+
Has constraint support +
no
+
+
no
+
Has acid support +
no
+
+
no
+
Has metadata +
yes
+
+
no
+
Has encryption support +
no
+
+
no
+
Data processing framework support + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
diff --git a/docs/file/apache_avro/delta_lake.md b/docs/file/apache_avro/delta_lake.md new file mode 100644 index 0000000..2ea1fcf --- /dev/null +++ b/docs/file/apache_avro/delta_lake.md @@ -0,0 +1,190 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Avro vs Delta Lake." +--- +# File + +[![Apache Avro logo](../../assets/logo/avro.png){: style="height:30px;width:30px" .lg align-left } Apache Avro](){ .md-button .toggle-vis data-column="1" } +[![Delta Lake logo](../../assets/logo/delta_lake.png){: style="height:30px;width:30px" .lg align-left } Delta Lake](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache AvroDelta Lake
NameApache AvroDelta Lake
DescriptionApache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.Delta Lake is an open-source storage framework that enables building a Lakehouse architecture.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/avrohttps://github.com/delta-io/delta
Websitehttps://avro.apache.org/https://delta.io/
Year created20092019
CompanyApacheDatabricks
Language supportjava, c++, c#, c, python, javascript, perl, ruby, php, rustscala, java, python, rust
Use casesStream processing, Analytics, Efficient data exchangeWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
no
+
+
no
+
Orientationrowcolumn
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
yes
+
Has acid support +
no
+
+
yes
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
no
+
+
maybe
+
Data processing framework support + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, +
diff --git a/docs/file/apache_hudi/apache_avro.md b/docs/file/apache_hudi/apache_avro.md new file mode 100644 index 0000000..f9d38e0 --- /dev/null +++ b/docs/file/apache_hudi/apache_avro.md @@ -0,0 +1,188 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Hudi vs Apache Avro." +--- +# File + +[![Apache Hudi logo](../../assets/logo/hudi.png){: style="height:30px;width:30px" .lg align-left } Apache Hudi](){ .md-button .toggle-vis data-column="1" } +[![Apache Avro logo](../../assets/logo/avro.png){: style="height:30px;width:30px" .lg align-left } Apache Avro](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache HudiApache Avro
NameApache HudiApache Avro
DescriptionApache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.Apache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.
Source codehttps://github.com/apache/hudihttps://github.com/apache/avro
Websitehttps://hudi.apache.org/https://avro.apache.org/
LicenseApache license 2.0Apache license 2.0
Year created20162009
CompanyUberApache
Use casesIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactionsStream processing, Analytics, Efficient data exchange
Language supportjava, c++, c#, c, python, javascript, perl, ruby, php, rust
Is human readable +
no
+
+
no
+
Orientationcolumn or rowrow
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
yes
+
+
no
+
Has acid support +
yes
+
+
no
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
maybe
+
+
no
+
Data processing framework support + Apache Spark, + Apache Flink, + + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, +
diff --git a/docs/file/apache_hudi/apache_iceberg.md b/docs/file/apache_hudi/apache_iceberg.md new file mode 100644 index 0000000..904ec2e --- /dev/null +++ b/docs/file/apache_hudi/apache_iceberg.md @@ -0,0 +1,190 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Hudi vs Apache Iceberg." +--- +# File + +[![Apache Hudi logo](../../assets/logo/hudi.png){: style="height:30px;width:30px" .lg align-left } Apache Hudi](){ .md-button .toggle-vis data-column="1" } +[![Apache Iceberg logo](../../assets/logo/iceberg.png){: style="height:30px;width:30px" .lg align-left } Apache Iceberg](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache HudiApache Iceberg
NameApache HudiApache Iceberg
DescriptionApache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.Iceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.
Source codehttps://github.com/apache/hudihttps://github.com/apache/iceberg
Websitehttps://hudi.apache.org/https://iceberg.apache.org/
LicenseApache license 2.0Apache license 2.0
Year created20162017
CompanyUberNetflix
Use casesIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
Language support
Is human readable +
no
+
+
no
+
Orientationcolumn or rowcolumn or row
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
yes
+
+
no
+
Has acid support +
yes
+
+
yes
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
maybe
+
+
maybe
+
Data processing framework support + Apache Spark, + Apache Flink, + + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, +
diff --git a/docs/file/apache_hudi/apache_orc.md b/docs/file/apache_hudi/apache_orc.md new file mode 100644 index 0000000..d1198b7 --- /dev/null +++ b/docs/file/apache_hudi/apache_orc.md @@ -0,0 +1,191 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Hudi vs Apache ORC." +--- +# File + +[![Apache Hudi logo](../../assets/logo/hudi.png){: style="height:30px;width:30px" .lg align-left } Apache Hudi](){ .md-button .toggle-vis data-column="1" } +[![Apache ORC logo](../../assets/logo/orc.png){: style="height:30px;width:30px" .lg align-left } Apache ORC](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache HudiApache ORC
NameApache HudiApache ORC
DescriptionApache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.ORC is a self-describing type-aware columnar file format designed for Hadoop workloads.
Source codehttps://github.com/apache/hudihttps://github.com/apache/orc
Websitehttps://hudi.apache.org/https://orc.apache.org/
LicenseApache license 2.0Apache license 2.0
Year created20162013
CompanyUberHortonworks, Facebook
Use casesIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
Language supportjava, scala, c++, python
Is human readable +
no
+
+
no
+
Orientationcolumn or rowrow
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
yes
+
+
no
+
Has acid support +
yes
+
+
no
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
maybe
+
+
yes
+
Data processing framework support + Apache Spark, + Apache Flink, + + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, +
diff --git a/docs/file/apache_hudi/apache_parquet.md b/docs/file/apache_hudi/apache_parquet.md new file mode 100644 index 0000000..6610d38 --- /dev/null +++ b/docs/file/apache_hudi/apache_parquet.md @@ -0,0 +1,190 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Hudi vs Apache Parquet." +--- +# File + +[![Apache Hudi logo](../../assets/logo/hudi.png){: style="height:30px;width:30px" .lg align-left } Apache Hudi](){ .md-button .toggle-vis data-column="1" } +[![Apache Parquet logo](../../assets/logo/parquet.svg){: style="height:30px;width:30px" .lg align-left } Apache Parquet](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache HudiApache Parquet
NameApache HudiApache Parquet
DescriptionApache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.Apache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.
Source codehttps://github.com/apache/hudihttps://github.com/apache/parquet-format
Websitehttps://hudi.apache.org/https://parquet.apache.org/
LicenseApache license 2.0Apache license 2.0
Year created20162013
CompanyUberTwitter, Cloudera
Use casesIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactionsWrite once read many, Analytics, Efficient storage, Column based queries
Language supportjava, scala, c++, python, r, php
Is human readable +
no
+
+
no
+
Orientationcolumn or rowcolumn
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
yes
+
+
no
+
Has acid support +
yes
+
+
no
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
maybe
+
+
yes
+
Data processing framework support + Apache Spark, + Apache Flink, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, +
Analytics query support + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, + + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
diff --git a/docs/file/apache_hudi/csv.md b/docs/file/apache_hudi/csv.md new file mode 100644 index 0000000..ef49028 --- /dev/null +++ b/docs/file/apache_hudi/csv.md @@ -0,0 +1,193 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Hudi vs CSV." +--- +# File + +[![Apache Hudi logo](../../assets/logo/hudi.png){: style="height:30px;width:30px" .lg align-left } Apache Hudi](){ .md-button .toggle-vis data-column="1" } +[![CSV logo](../../assets/logo/csv.png){: style="height:30px;width:30px" .lg align-left } CSV](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache HudiCSV
NameApache HudiCSV
DescriptionApache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.Comma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.
Source codehttps://github.com/apache/hudi
Websitehttps://hudi.apache.org/https://www.rfc-editor.org/rfc/rfc4180.html
LicenseApache license 2.0N/A
Year created20160
CompanyUber
Use casesIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactions
Language supportjava, scala, c++, python, r, php, go
Is human readable +
no
+
+
yes
+
Orientationcolumn or rowrow
Has type system +
yes
+
+
no
+
Has nested structure support +
yes
+
+
no
+
Has native compression +
yes
+
+
no
+
Has encoding support +
yes
+
+
no
+
Has constraint support +
yes
+
+
no
+
Has acid support +
yes
+
+
no
+
Has metadata +
yes
+
+
no
+
Has encryption support +
maybe
+
+
no
+
Data processing framework support + Apache Spark, + Apache Flink, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
diff --git a/docs/file/apache_hudi/delta_lake.md b/docs/file/apache_hudi/delta_lake.md new file mode 100644 index 0000000..fb5b735 --- /dev/null +++ b/docs/file/apache_hudi/delta_lake.md @@ -0,0 +1,186 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Hudi vs Delta Lake." +--- +# File + +[![Apache Hudi logo](../../assets/logo/hudi.png){: style="height:30px;width:30px" .lg align-left } Apache Hudi](){ .md-button .toggle-vis data-column="1" } +[![Delta Lake logo](../../assets/logo/delta_lake.png){: style="height:30px;width:30px" .lg align-left } Delta Lake](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache HudiDelta Lake
NameApache HudiDelta Lake
DescriptionApache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.Delta Lake is an open-source storage framework that enables building a Lakehouse architecture.
Source codehttps://github.com/apache/hudihttps://github.com/delta-io/delta
Websitehttps://hudi.apache.org/https://delta.io/
LicenseApache license 2.0Apache license 2.0
Year created20162019
CompanyUberDatabricks
Use casesIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
Language supportscala, java, python, rust
Is human readable +
no
+
+
no
+
Orientationcolumn or rowcolumn
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
yes
+
+
yes
+
Has acid support +
yes
+
+
yes
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
maybe
+
+
maybe
+
Data processing framework support + Apache Spark, + Apache Flink, + + Apache Drill, + Apache Flink, + Apache Spark, +
Analytics query support + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, + + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, +
diff --git a/docs/file/apache_iceberg/apache_avro.md b/docs/file/apache_iceberg/apache_avro.md new file mode 100644 index 0000000..65f69b9 --- /dev/null +++ b/docs/file/apache_iceberg/apache_avro.md @@ -0,0 +1,194 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Iceberg vs Apache Avro." +--- +# File + +[![Apache Iceberg logo](../../assets/logo/iceberg.png){: style="height:30px;width:30px" .lg align-left } Apache Iceberg](){ .md-button .toggle-vis data-column="1" } +[![Apache Avro logo](../../assets/logo/avro.png){: style="height:30px;width:30px" .lg align-left } Apache Avro](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache IcebergApache Avro
NameApache IcebergApache Avro
DescriptionIceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.Apache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/iceberghttps://github.com/apache/avro
Websitehttps://iceberg.apache.org/https://avro.apache.org/
Year created20172009
CompanyNetflixApache
Language supportjava, c++, c#, c, python, javascript, perl, ruby, php, rust
Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsStream processing, Analytics, Efficient data exchange
Is human readable +
no
+
+
no
+
Orientationcolumn or rowrow
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
no
+
Has acid support +
yes
+
+
no
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
maybe
+
+
no
+
Data processing framework support + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, +
diff --git a/docs/file/apache_iceberg/apache_hudi.md b/docs/file/apache_iceberg/apache_hudi.md new file mode 100644 index 0000000..cc9b23b --- /dev/null +++ b/docs/file/apache_iceberg/apache_hudi.md @@ -0,0 +1,190 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Iceberg vs Apache Hudi." +--- +# File + +[![Apache Iceberg logo](../../assets/logo/iceberg.png){: style="height:30px;width:30px" .lg align-left } Apache Iceberg](){ .md-button .toggle-vis data-column="1" } +[![Apache Hudi logo](../../assets/logo/hudi.png){: style="height:30px;width:30px" .lg align-left } Apache Hudi](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache IcebergApache Hudi
NameApache IcebergApache Hudi
DescriptionIceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.Apache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/iceberghttps://github.com/apache/hudi
Websitehttps://iceberg.apache.org/https://hudi.apache.org/
Year created20172016
CompanyNetflixUber
Language support
Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactions
Is human readable +
no
+
+
no
+
Orientationcolumn or rowcolumn or row
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
yes
+
Has acid support +
yes
+
+
yes
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
maybe
+
+
maybe
+
Data processing framework support + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, + + Apache Spark, + Apache Flink, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, + + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, +
diff --git a/docs/file/apache_iceberg/apache_orc.md b/docs/file/apache_iceberg/apache_orc.md new file mode 100644 index 0000000..e60ea48 --- /dev/null +++ b/docs/file/apache_iceberg/apache_orc.md @@ -0,0 +1,197 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Iceberg vs Apache ORC." +--- +# File + +[![Apache Iceberg logo](../../assets/logo/iceberg.png){: style="height:30px;width:30px" .lg align-left } Apache Iceberg](){ .md-button .toggle-vis data-column="1" } +[![Apache ORC logo](../../assets/logo/orc.png){: style="height:30px;width:30px" .lg align-left } Apache ORC](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache IcebergApache ORC
NameApache IcebergApache ORC
DescriptionIceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.ORC is a self-describing type-aware columnar file format designed for Hadoop workloads.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/iceberghttps://github.com/apache/orc
Websitehttps://iceberg.apache.org/https://orc.apache.org/
Year created20172013
CompanyNetflixHortonworks, Facebook
Language supportjava, scala, c++, python
Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
no
+
+
no
+
Orientationcolumn or rowrow
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
no
+
Has acid support +
yes
+
+
no
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
maybe
+
+
yes
+
Data processing framework support + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, +
diff --git a/docs/file/apache_iceberg/apache_parquet.md b/docs/file/apache_iceberg/apache_parquet.md new file mode 100644 index 0000000..b52e247 --- /dev/null +++ b/docs/file/apache_iceberg/apache_parquet.md @@ -0,0 +1,196 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Iceberg vs Apache Parquet." +--- +# File + +[![Apache Iceberg logo](../../assets/logo/iceberg.png){: style="height:30px;width:30px" .lg align-left } Apache Iceberg](){ .md-button .toggle-vis data-column="1" } +[![Apache Parquet logo](../../assets/logo/parquet.svg){: style="height:30px;width:30px" .lg align-left } Apache Parquet](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache IcebergApache Parquet
NameApache IcebergApache Parquet
DescriptionIceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.Apache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/iceberghttps://github.com/apache/parquet-format
Websitehttps://iceberg.apache.org/https://parquet.apache.org/
Year created20172013
CompanyNetflixTwitter, Cloudera
Language supportjava, scala, c++, python, r, php
Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, Column based queries
Is human readable +
no
+
+
no
+
Orientationcolumn or rowcolumn
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
no
+
Has acid support +
yes
+
+
no
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
maybe
+
+
yes
+
Data processing framework support + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, + + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
diff --git a/docs/file/apache_iceberg/csv.md b/docs/file/apache_iceberg/csv.md new file mode 100644 index 0000000..7d2813a --- /dev/null +++ b/docs/file/apache_iceberg/csv.md @@ -0,0 +1,199 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Iceberg vs CSV." +--- +# File + +[![Apache Iceberg logo](../../assets/logo/iceberg.png){: style="height:30px;width:30px" .lg align-left } Apache Iceberg](){ .md-button .toggle-vis data-column="1" } +[![CSV logo](../../assets/logo/csv.png){: style="height:30px;width:30px" .lg align-left } CSV](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache IcebergCSV
NameApache IcebergCSV
DescriptionIceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.Comma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.
LicenseApache license 2.0N/A
Source codehttps://github.com/apache/iceberg
Websitehttps://iceberg.apache.org/https://www.rfc-editor.org/rfc/rfc4180.html
Year created20170
CompanyNetflix
Language supportjava, scala, c++, python, r, php, go
Use casesWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
no
+
+
yes
+
Orientationcolumn or rowrow
Has type system +
yes
+
+
no
+
Has nested structure support +
yes
+
+
no
+
Has native compression +
yes
+
+
no
+
Has encoding support +
yes
+
+
no
+
Has constraint support +
no
+
+
no
+
Has acid support +
yes
+
+
no
+
Has metadata +
yes
+
+
no
+
Has encryption support +
maybe
+
+
no
+
Data processing framework support + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
diff --git a/docs/file/apache_iceberg/delta_lake.md b/docs/file/apache_iceberg/delta_lake.md new file mode 100644 index 0000000..7bf907f --- /dev/null +++ b/docs/file/apache_iceberg/delta_lake.md @@ -0,0 +1,192 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Iceberg vs Delta Lake." +--- +# File + +[![Apache Iceberg logo](../../assets/logo/iceberg.png){: style="height:30px;width:30px" .lg align-left } Apache Iceberg](){ .md-button .toggle-vis data-column="1" } +[![Delta Lake logo](../../assets/logo/delta_lake.png){: style="height:30px;width:30px" .lg align-left } Delta Lake](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache IcebergDelta Lake
NameApache IcebergDelta Lake
DescriptionIceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.Delta Lake is an open-source storage framework that enables building a Lakehouse architecture.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/iceberghttps://github.com/delta-io/delta
Websitehttps://iceberg.apache.org/https://delta.io/
Year created20172019
CompanyNetflixDatabricks
Language supportscala, java, python, rust
Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
no
+
+
no
+
Orientationcolumn or rowcolumn
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
yes
+
Has acid support +
yes
+
+
yes
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
maybe
+
+
maybe
+
Data processing framework support + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, + + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, +
diff --git a/docs/file/apache_orc/apache_avro.md b/docs/file/apache_orc/apache_avro.md new file mode 100644 index 0000000..feb6f54 --- /dev/null +++ b/docs/file/apache_orc/apache_avro.md @@ -0,0 +1,195 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache ORC vs Apache Avro." +--- +# File + +[![Apache ORC logo](../../assets/logo/orc.png){: style="height:30px;width:30px" .lg align-left } Apache ORC](){ .md-button .toggle-vis data-column="1" } +[![Apache Avro logo](../../assets/logo/avro.png){: style="height:30px;width:30px" .lg align-left } Apache Avro](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache ORCApache Avro
NameApache ORCApache Avro
DescriptionORC is a self-describing type-aware columnar file format designed for Hadoop workloads.Apache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/orchttps://github.com/apache/avro
Websitehttps://orc.apache.org/https://avro.apache.org/
Year created20132009
CompanyHortonworks, FacebookApache
Language supportjava, scala, c++, pythonjava, c++, c#, c, python, javascript, perl, ruby, php, rust
Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsStream processing, Analytics, Efficient data exchange
Is human readable +
no
+
+
no
+
Orientationrowrow
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
no
+
Has acid support +
no
+
+
no
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
yes
+
+
no
+
Data processing framework support + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, +
diff --git a/docs/file/apache_orc/apache_hudi.md b/docs/file/apache_orc/apache_hudi.md new file mode 100644 index 0000000..5b47d0c --- /dev/null +++ b/docs/file/apache_orc/apache_hudi.md @@ -0,0 +1,191 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache ORC vs Apache Hudi." +--- +# File + +[![Apache ORC logo](../../assets/logo/orc.png){: style="height:30px;width:30px" .lg align-left } Apache ORC](){ .md-button .toggle-vis data-column="1" } +[![Apache Hudi logo](../../assets/logo/hudi.png){: style="height:30px;width:30px" .lg align-left } Apache Hudi](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache ORCApache Hudi
NameApache ORCApache Hudi
DescriptionORC is a self-describing type-aware columnar file format designed for Hadoop workloads.Apache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/orchttps://github.com/apache/hudi
Websitehttps://orc.apache.org/https://hudi.apache.org/
Year created20132016
CompanyHortonworks, FacebookUber
Language supportjava, scala, c++, python
Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactions
Is human readable +
no
+
+
no
+
Orientationrowcolumn or row
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
yes
+
Has acid support +
no
+
+
yes
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
yes
+
+
maybe
+
Data processing framework support + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Spark, + Apache Flink, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, + + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, +
diff --git a/docs/file/apache_orc/apache_iceberg.md b/docs/file/apache_orc/apache_iceberg.md new file mode 100644 index 0000000..5103b0b --- /dev/null +++ b/docs/file/apache_orc/apache_iceberg.md @@ -0,0 +1,197 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache ORC vs Apache Iceberg." +--- +# File + +[![Apache ORC logo](../../assets/logo/orc.png){: style="height:30px;width:30px" .lg align-left } Apache ORC](){ .md-button .toggle-vis data-column="1" } +[![Apache Iceberg logo](../../assets/logo/iceberg.png){: style="height:30px;width:30px" .lg align-left } Apache Iceberg](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache ORCApache Iceberg
NameApache ORCApache Iceberg
DescriptionORC is a self-describing type-aware columnar file format designed for Hadoop workloads.Iceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/orchttps://github.com/apache/iceberg
Websitehttps://orc.apache.org/https://iceberg.apache.org/
Year created20132017
CompanyHortonworks, FacebookNetflix
Language supportjava, scala, c++, python
Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
no
+
+
no
+
Orientationrowcolumn or row
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
no
+
Has acid support +
no
+
+
yes
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
yes
+
+
maybe
+
Data processing framework support + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, +
diff --git a/docs/file/apache_orc/apache_parquet.md b/docs/file/apache_orc/apache_parquet.md new file mode 100644 index 0000000..5f19322 --- /dev/null +++ b/docs/file/apache_orc/apache_parquet.md @@ -0,0 +1,197 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache ORC vs Apache Parquet." +--- +# File + +[![Apache ORC logo](../../assets/logo/orc.png){: style="height:30px;width:30px" .lg align-left } Apache ORC](){ .md-button .toggle-vis data-column="1" } +[![Apache Parquet logo](../../assets/logo/parquet.svg){: style="height:30px;width:30px" .lg align-left } Apache Parquet](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache ORCApache Parquet
NameApache ORCApache Parquet
DescriptionORC is a self-describing type-aware columnar file format designed for Hadoop workloads.Apache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/orchttps://github.com/apache/parquet-format
Websitehttps://orc.apache.org/https://parquet.apache.org/
Year created20132013
CompanyHortonworks, FacebookTwitter, Cloudera
Language supportjava, scala, c++, pythonjava, scala, c++, python, r, php
Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, Column based queries
Is human readable +
no
+
+
no
+
Orientationrowcolumn
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
no
+
Has acid support +
no
+
+
no
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
yes
+
+
yes
+
Data processing framework support + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, + + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
diff --git a/docs/file/apache_orc/csv.md b/docs/file/apache_orc/csv.md new file mode 100644 index 0000000..1401c60 --- /dev/null +++ b/docs/file/apache_orc/csv.md @@ -0,0 +1,200 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache ORC vs CSV." +--- +# File + +[![Apache ORC logo](../../assets/logo/orc.png){: style="height:30px;width:30px" .lg align-left } Apache ORC](){ .md-button .toggle-vis data-column="1" } +[![CSV logo](../../assets/logo/csv.png){: style="height:30px;width:30px" .lg align-left } CSV](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache ORCCSV
NameApache ORCCSV
DescriptionORC is a self-describing type-aware columnar file format designed for Hadoop workloads.Comma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.
LicenseApache license 2.0N/A
Source codehttps://github.com/apache/orc
Websitehttps://orc.apache.org/https://www.rfc-editor.org/rfc/rfc4180.html
Year created20130
CompanyHortonworks, Facebook
Language supportjava, scala, c++, pythonjava, scala, c++, python, r, php, go
Use casesWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
no
+
+
yes
+
Orientationrowrow
Has type system +
yes
+
+
no
+
Has nested structure support +
yes
+
+
no
+
Has native compression +
yes
+
+
no
+
Has encoding support +
yes
+
+
no
+
Has constraint support +
no
+
+
no
+
Has acid support +
no
+
+
no
+
Has metadata +
yes
+
+
no
+
Has encryption support +
yes
+
+
no
+
Data processing framework support + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
diff --git a/docs/file/apache_orc/delta_lake.md b/docs/file/apache_orc/delta_lake.md new file mode 100644 index 0000000..fc172bc --- /dev/null +++ b/docs/file/apache_orc/delta_lake.md @@ -0,0 +1,193 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache ORC vs Delta Lake." +--- +# File + +[![Apache ORC logo](../../assets/logo/orc.png){: style="height:30px;width:30px" .lg align-left } Apache ORC](){ .md-button .toggle-vis data-column="1" } +[![Delta Lake logo](../../assets/logo/delta_lake.png){: style="height:30px;width:30px" .lg align-left } Delta Lake](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache ORCDelta Lake
NameApache ORCDelta Lake
DescriptionORC is a self-describing type-aware columnar file format designed for Hadoop workloads.Delta Lake is an open-source storage framework that enables building a Lakehouse architecture.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/orchttps://github.com/delta-io/delta
Websitehttps://orc.apache.org/https://delta.io/
Year created20132019
CompanyHortonworks, FacebookDatabricks
Language supportjava, scala, c++, pythonscala, java, python, rust
Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
no
+
+
no
+
Orientationrowcolumn
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
yes
+
Has acid support +
no
+
+
yes
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
yes
+
+
maybe
+
Data processing framework support + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, + + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, +
diff --git a/docs/file/apache_parquet/apache_avro.md b/docs/file/apache_parquet/apache_avro.md new file mode 100644 index 0000000..d501d23 --- /dev/null +++ b/docs/file/apache_parquet/apache_avro.md @@ -0,0 +1,194 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Parquet vs Apache Avro." +--- +# File + +[![Apache Parquet logo](../../assets/logo/parquet.svg){: style="height:30px;width:30px" .lg align-left } Apache Parquet](){ .md-button .toggle-vis data-column="1" } +[![Apache Avro logo](../../assets/logo/avro.png){: style="height:30px;width:30px" .lg align-left } Apache Avro](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache ParquetApache Avro
NameApache ParquetApache Avro
DescriptionApache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.Apache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/parquet-formathttps://github.com/apache/avro
Websitehttps://parquet.apache.org/https://avro.apache.org/
Year created20132009
CompanyTwitter, ClouderaApache
Language supportjava, scala, c++, python, r, phpjava, c++, c#, c, python, javascript, perl, ruby, php, rust
Use casesWrite once read many, Analytics, Efficient storage, Column based queriesStream processing, Analytics, Efficient data exchange
Is human readable +
no
+
+
no
+
Orientationcolumnrow
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
no
+
Has acid support +
no
+
+
no
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
yes
+
+
no
+
Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, +
diff --git a/docs/file/apache_parquet/apache_hudi.md b/docs/file/apache_parquet/apache_hudi.md new file mode 100644 index 0000000..2ea6741 --- /dev/null +++ b/docs/file/apache_parquet/apache_hudi.md @@ -0,0 +1,190 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Parquet vs Apache Hudi." +--- +# File + +[![Apache Parquet logo](../../assets/logo/parquet.svg){: style="height:30px;width:30px" .lg align-left } Apache Parquet](){ .md-button .toggle-vis data-column="1" } +[![Apache Hudi logo](../../assets/logo/hudi.png){: style="height:30px;width:30px" .lg align-left } Apache Hudi](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache ParquetApache Hudi
NameApache ParquetApache Hudi
DescriptionApache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.Apache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/parquet-formathttps://github.com/apache/hudi
Websitehttps://parquet.apache.org/https://hudi.apache.org/
Year created20132016
CompanyTwitter, ClouderaUber
Language supportjava, scala, c++, python, r, php
Use casesWrite once read many, Analytics, Efficient storage, Column based queriesIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactions
Is human readable +
no
+
+
no
+
Orientationcolumncolumn or row
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
yes
+
Has acid support +
no
+
+
yes
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
yes
+
+
maybe
+
Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Spark, + Apache Flink, +
Analytics query support + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, +
diff --git a/docs/file/apache_parquet/apache_iceberg.md b/docs/file/apache_parquet/apache_iceberg.md new file mode 100644 index 0000000..bc0b3ef --- /dev/null +++ b/docs/file/apache_parquet/apache_iceberg.md @@ -0,0 +1,196 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Parquet vs Apache Iceberg." +--- +# File + +[![Apache Parquet logo](../../assets/logo/parquet.svg){: style="height:30px;width:30px" .lg align-left } Apache Parquet](){ .md-button .toggle-vis data-column="1" } +[![Apache Iceberg logo](../../assets/logo/iceberg.png){: style="height:30px;width:30px" .lg align-left } Apache Iceberg](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache ParquetApache Iceberg
NameApache ParquetApache Iceberg
DescriptionApache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.Iceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/parquet-formathttps://github.com/apache/iceberg
Websitehttps://parquet.apache.org/https://iceberg.apache.org/
Year created20132017
CompanyTwitter, ClouderaNetflix
Language supportjava, scala, c++, python, r, php
Use casesWrite once read many, Analytics, Efficient storage, Column based queriesWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
no
+
+
no
+
Orientationcolumncolumn or row
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
no
+
Has acid support +
no
+
+
yes
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
yes
+
+
maybe
+
Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, +
diff --git a/docs/file/apache_parquet/apache_orc.md b/docs/file/apache_parquet/apache_orc.md new file mode 100644 index 0000000..a595cc1 --- /dev/null +++ b/docs/file/apache_parquet/apache_orc.md @@ -0,0 +1,197 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Parquet vs Apache ORC." +--- +# File + +[![Apache Parquet logo](../../assets/logo/parquet.svg){: style="height:30px;width:30px" .lg align-left } Apache Parquet](){ .md-button .toggle-vis data-column="1" } +[![Apache ORC logo](../../assets/logo/orc.png){: style="height:30px;width:30px" .lg align-left } Apache ORC](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache ParquetApache ORC
NameApache ParquetApache ORC
DescriptionApache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.ORC is a self-describing type-aware columnar file format designed for Hadoop workloads.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/parquet-formathttps://github.com/apache/orc
Websitehttps://parquet.apache.org/https://orc.apache.org/
Year created20132013
CompanyTwitter, ClouderaHortonworks, Facebook
Language supportjava, scala, c++, python, r, phpjava, scala, c++, python
Use casesWrite once read many, Analytics, Efficient storage, Column based queriesWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
no
+
+
no
+
Orientationcolumnrow
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
no
+
Has acid support +
no
+
+
no
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
yes
+
+
yes
+
Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, +
diff --git a/docs/file/apache_parquet/csv.md b/docs/file/apache_parquet/csv.md new file mode 100644 index 0000000..bf48387 --- /dev/null +++ b/docs/file/apache_parquet/csv.md @@ -0,0 +1,199 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Parquet vs CSV." +--- +# File + +[![Apache Parquet logo](../../assets/logo/parquet.svg){: style="height:30px;width:30px" .lg align-left } Apache Parquet](){ .md-button .toggle-vis data-column="1" } +[![CSV logo](../../assets/logo/csv.png){: style="height:30px;width:30px" .lg align-left } CSV](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache ParquetCSV
NameApache ParquetCSV
DescriptionApache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.Comma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.
LicenseApache license 2.0N/A
Source codehttps://github.com/apache/parquet-format
Websitehttps://parquet.apache.org/https://www.rfc-editor.org/rfc/rfc4180.html
Year created20130
CompanyTwitter, Cloudera
Language supportjava, scala, c++, python, r, phpjava, scala, c++, python, r, php, go
Use casesWrite once read many, Analytics, Efficient storage, Column based queries
Is human readable +
no
+
+
yes
+
Orientationcolumnrow
Has type system +
yes
+
+
no
+
Has nested structure support +
yes
+
+
no
+
Has native compression +
yes
+
+
no
+
Has encoding support +
yes
+
+
no
+
Has constraint support +
no
+
+
no
+
Has acid support +
no
+
+
no
+
Has metadata +
yes
+
+
no
+
Has encryption support +
yes
+
+
no
+
Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
diff --git a/docs/file/apache_parquet/delta_lake.md b/docs/file/apache_parquet/delta_lake.md new file mode 100644 index 0000000..6c277da --- /dev/null +++ b/docs/file/apache_parquet/delta_lake.md @@ -0,0 +1,192 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Apache Parquet vs Delta Lake." +--- +# File + +[![Apache Parquet logo](../../assets/logo/parquet.svg){: style="height:30px;width:30px" .lg align-left } Apache Parquet](){ .md-button .toggle-vis data-column="1" } +[![Delta Lake logo](../../assets/logo/delta_lake.png){: style="height:30px;width:30px" .lg align-left } Delta Lake](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache ParquetDelta Lake
NameApache ParquetDelta Lake
DescriptionApache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.Delta Lake is an open-source storage framework that enables building a Lakehouse architecture.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/parquet-formathttps://github.com/delta-io/delta
Websitehttps://parquet.apache.org/https://delta.io/
Year created20132019
CompanyTwitter, ClouderaDatabricks
Language supportjava, scala, c++, python, r, phpscala, java, python, rust
Use casesWrite once read many, Analytics, Efficient storage, Column based queriesWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
no
+
+
no
+
Orientationcolumncolumn
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
no
+
+
yes
+
Has acid support +
no
+
+
yes
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
yes
+
+
maybe
+
Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Spark, +
Analytics query support + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, +
diff --git a/docs/file/csv/apache_avro.md b/docs/file/csv/apache_avro.md new file mode 100644 index 0000000..a09c1fe --- /dev/null +++ b/docs/file/csv/apache_avro.md @@ -0,0 +1,197 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes CSV vs Apache Avro." +--- +# File + +[![CSV logo](../../assets/logo/csv.png){: style="height:30px;width:30px" .lg align-left } CSV](){ .md-button .toggle-vis data-column="1" } +[![Apache Avro logo](../../assets/logo/avro.png){: style="height:30px;width:30px" .lg align-left } Apache Avro](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeCSVApache Avro
NameCSVApache Avro
DescriptionComma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.Apache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.
Source codehttps://github.com/apache/avro
Websitehttps://www.rfc-editor.org/rfc/rfc4180.htmlhttps://avro.apache.org/
Language supportjava, scala, c++, python, r, php, gojava, c++, c#, c, python, javascript, perl, ruby, php, rust
LicenseN/AApache license 2.0
Year created02009
CompanyApache
Use casesStream processing, Analytics, Efficient data exchange
Is human readable +
yes
+
+
no
+
Orientationrowrow
Has type system +
no
+
+
yes
+
Has nested structure support +
no
+
+
yes
+
Has native compression +
no
+
+
yes
+
Has encoding support +
no
+
+
yes
+
Has constraint support +
no
+
+
no
+
Has acid support +
no
+
+
no
+
Has metadata +
no
+
+
yes
+
Has encryption support +
no
+
+
no
+
Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, +
diff --git a/docs/file/csv/apache_hudi.md b/docs/file/csv/apache_hudi.md new file mode 100644 index 0000000..12a1839 --- /dev/null +++ b/docs/file/csv/apache_hudi.md @@ -0,0 +1,193 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes CSV vs Apache Hudi." +--- +# File + +[![CSV logo](../../assets/logo/csv.png){: style="height:30px;width:30px" .lg align-left } CSV](){ .md-button .toggle-vis data-column="1" } +[![Apache Hudi logo](../../assets/logo/hudi.png){: style="height:30px;width:30px" .lg align-left } Apache Hudi](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeCSVApache Hudi
NameCSVApache Hudi
DescriptionComma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.Apache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.
Source codehttps://github.com/apache/hudi
Websitehttps://www.rfc-editor.org/rfc/rfc4180.htmlhttps://hudi.apache.org/
Language supportjava, scala, c++, python, r, php, go
LicenseN/AApache license 2.0
Year created02016
CompanyUber
Use casesIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactions
Is human readable +
yes
+
+
no
+
Orientationrowcolumn or row
Has type system +
no
+
+
yes
+
Has nested structure support +
no
+
+
yes
+
Has native compression +
no
+
+
yes
+
Has encoding support +
no
+
+
yes
+
Has constraint support +
no
+
+
yes
+
Has acid support +
no
+
+
yes
+
Has metadata +
no
+
+
yes
+
Has encryption support +
no
+
+
maybe
+
Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Spark, + Apache Flink, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, +
diff --git a/docs/file/csv/apache_iceberg.md b/docs/file/csv/apache_iceberg.md new file mode 100644 index 0000000..1ff5340 --- /dev/null +++ b/docs/file/csv/apache_iceberg.md @@ -0,0 +1,199 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes CSV vs Apache Iceberg." +--- +# File + +[![CSV logo](../../assets/logo/csv.png){: style="height:30px;width:30px" .lg align-left } CSV](){ .md-button .toggle-vis data-column="1" } +[![Apache Iceberg logo](../../assets/logo/iceberg.png){: style="height:30px;width:30px" .lg align-left } Apache Iceberg](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeCSVApache Iceberg
NameCSVApache Iceberg
DescriptionComma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.Iceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.
Source codehttps://github.com/apache/iceberg
Websitehttps://www.rfc-editor.org/rfc/rfc4180.htmlhttps://iceberg.apache.org/
Language supportjava, scala, c++, python, r, php, go
LicenseN/AApache license 2.0
Year created02017
CompanyNetflix
Use casesWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
yes
+
+
no
+
Orientationrowcolumn or row
Has type system +
no
+
+
yes
+
Has nested structure support +
no
+
+
yes
+
Has native compression +
no
+
+
yes
+
Has encoding support +
no
+
+
yes
+
Has constraint support +
no
+
+
no
+
Has acid support +
no
+
+
yes
+
Has metadata +
no
+
+
yes
+
Has encryption support +
no
+
+
maybe
+
Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, +
diff --git a/docs/file/csv/apache_orc.md b/docs/file/csv/apache_orc.md new file mode 100644 index 0000000..06e7732 --- /dev/null +++ b/docs/file/csv/apache_orc.md @@ -0,0 +1,200 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes CSV vs Apache ORC." +--- +# File + +[![CSV logo](../../assets/logo/csv.png){: style="height:30px;width:30px" .lg align-left } CSV](){ .md-button .toggle-vis data-column="1" } +[![Apache ORC logo](../../assets/logo/orc.png){: style="height:30px;width:30px" .lg align-left } Apache ORC](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeCSVApache ORC
NameCSVApache ORC
DescriptionComma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.ORC is a self-describing type-aware columnar file format designed for Hadoop workloads.
Source codehttps://github.com/apache/orc
Websitehttps://www.rfc-editor.org/rfc/rfc4180.htmlhttps://orc.apache.org/
Language supportjava, scala, c++, python, r, php, gojava, scala, c++, python
LicenseN/AApache license 2.0
Year created02013
CompanyHortonworks, Facebook
Use casesWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
yes
+
+
no
+
Orientationrowrow
Has type system +
no
+
+
yes
+
Has nested structure support +
no
+
+
yes
+
Has native compression +
no
+
+
yes
+
Has encoding support +
no
+
+
yes
+
Has constraint support +
no
+
+
no
+
Has acid support +
no
+
+
no
+
Has metadata +
no
+
+
yes
+
Has encryption support +
no
+
+
yes
+
Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, +
diff --git a/docs/file/csv/apache_parquet.md b/docs/file/csv/apache_parquet.md new file mode 100644 index 0000000..2d4e06f --- /dev/null +++ b/docs/file/csv/apache_parquet.md @@ -0,0 +1,199 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes CSV vs Apache Parquet." +--- +# File + +[![CSV logo](../../assets/logo/csv.png){: style="height:30px;width:30px" .lg align-left } CSV](){ .md-button .toggle-vis data-column="1" } +[![Apache Parquet logo](../../assets/logo/parquet.svg){: style="height:30px;width:30px" .lg align-left } Apache Parquet](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeCSVApache Parquet
NameCSVApache Parquet
DescriptionComma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.Apache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.
Source codehttps://github.com/apache/parquet-format
Websitehttps://www.rfc-editor.org/rfc/rfc4180.htmlhttps://parquet.apache.org/
Language supportjava, scala, c++, python, r, php, gojava, scala, c++, python, r, php
LicenseN/AApache license 2.0
Year created02013
CompanyTwitter, Cloudera
Use casesWrite once read many, Analytics, Efficient storage, Column based queries
Is human readable +
yes
+
+
no
+
Orientationrowcolumn
Has type system +
no
+
+
yes
+
Has nested structure support +
no
+
+
yes
+
Has native compression +
no
+
+
yes
+
Has encoding support +
no
+
+
yes
+
Has constraint support +
no
+
+
no
+
Has acid support +
no
+
+
no
+
Has metadata +
no
+
+
yes
+
Has encryption support +
no
+
+
yes
+
Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
diff --git a/docs/file/csv/delta_lake.md b/docs/file/csv/delta_lake.md new file mode 100644 index 0000000..94a607d --- /dev/null +++ b/docs/file/csv/delta_lake.md @@ -0,0 +1,195 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes CSV vs Delta Lake." +--- +# File + +[![CSV logo](../../assets/logo/csv.png){: style="height:30px;width:30px" .lg align-left } CSV](){ .md-button .toggle-vis data-column="1" } +[![Delta Lake logo](../../assets/logo/delta_lake.png){: style="height:30px;width:30px" .lg align-left } Delta Lake](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeCSVDelta Lake
NameCSVDelta Lake
DescriptionComma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.Delta Lake is an open-source storage framework that enables building a Lakehouse architecture.
Source codehttps://github.com/delta-io/delta
Websitehttps://www.rfc-editor.org/rfc/rfc4180.htmlhttps://delta.io/
Language supportjava, scala, c++, python, r, php, goscala, java, python, rust
LicenseN/AApache license 2.0
Year created02019
CompanyDatabricks
Use casesWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
yes
+
+
no
+
Orientationrowcolumn
Has type system +
no
+
+
yes
+
Has nested structure support +
no
+
+
yes
+
Has native compression +
no
+
+
yes
+
Has encoding support +
no
+
+
yes
+
Has constraint support +
no
+
+
yes
+
Has acid support +
no
+
+
yes
+
Has metadata +
no
+
+
yes
+
Has encryption support +
no
+
+
maybe
+
Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Spark, +
Analytics query support + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, +
diff --git a/docs/file/delta_lake/apache_avro.md b/docs/file/delta_lake/apache_avro.md new file mode 100644 index 0000000..2f8894c --- /dev/null +++ b/docs/file/delta_lake/apache_avro.md @@ -0,0 +1,190 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Delta Lake vs Apache Avro." +--- +# File + +[![Delta Lake logo](../../assets/logo/delta_lake.png){: style="height:30px;width:30px" .lg align-left } Delta Lake](){ .md-button .toggle-vis data-column="1" } +[![Apache Avro logo](../../assets/logo/avro.png){: style="height:30px;width:30px" .lg align-left } Apache Avro](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeDelta LakeApache Avro
NameDelta LakeApache Avro
DescriptionDelta Lake is an open-source storage framework that enables building a Lakehouse architecture.Apache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/delta-io/deltahttps://github.com/apache/avro
Websitehttps://delta.io/https://avro.apache.org/
Year created20192009
CompanyDatabricksApache
Language supportscala, java, python, rustjava, c++, c#, c, python, javascript, perl, ruby, php, rust
Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsStream processing, Analytics, Efficient data exchange
Is human readable +
no
+
+
no
+
Orientationcolumnrow
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
yes
+
+
no
+
Has acid support +
yes
+
+
no
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
maybe
+
+
no
+
Data processing framework support + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, +
diff --git a/docs/file/delta_lake/apache_hudi.md b/docs/file/delta_lake/apache_hudi.md new file mode 100644 index 0000000..f7fe156 --- /dev/null +++ b/docs/file/delta_lake/apache_hudi.md @@ -0,0 +1,186 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Delta Lake vs Apache Hudi." +--- +# File + +[![Delta Lake logo](../../assets/logo/delta_lake.png){: style="height:30px;width:30px" .lg align-left } Delta Lake](){ .md-button .toggle-vis data-column="1" } +[![Apache Hudi logo](../../assets/logo/hudi.png){: style="height:30px;width:30px" .lg align-left } Apache Hudi](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeDelta LakeApache Hudi
NameDelta LakeApache Hudi
DescriptionDelta Lake is an open-source storage framework that enables building a Lakehouse architecture.Apache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/delta-io/deltahttps://github.com/apache/hudi
Websitehttps://delta.io/https://hudi.apache.org/
Year created20192016
CompanyDatabricksUber
Language supportscala, java, python, rust
Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactions
Is human readable +
no
+
+
no
+
Orientationcolumncolumn or row
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
yes
+
+
yes
+
Has acid support +
yes
+
+
yes
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
maybe
+
+
maybe
+
Data processing framework support + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Spark, + Apache Flink, +
Analytics query support + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, + + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, +
diff --git a/docs/file/delta_lake/apache_iceberg.md b/docs/file/delta_lake/apache_iceberg.md new file mode 100644 index 0000000..61b512a --- /dev/null +++ b/docs/file/delta_lake/apache_iceberg.md @@ -0,0 +1,192 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Delta Lake vs Apache Iceberg." +--- +# File + +[![Delta Lake logo](../../assets/logo/delta_lake.png){: style="height:30px;width:30px" .lg align-left } Delta Lake](){ .md-button .toggle-vis data-column="1" } +[![Apache Iceberg logo](../../assets/logo/iceberg.png){: style="height:30px;width:30px" .lg align-left } Apache Iceberg](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeDelta LakeApache Iceberg
NameDelta LakeApache Iceberg
DescriptionDelta Lake is an open-source storage framework that enables building a Lakehouse architecture.Iceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/delta-io/deltahttps://github.com/apache/iceberg
Websitehttps://delta.io/https://iceberg.apache.org/
Year created20192017
CompanyDatabricksNetflix
Language supportscala, java, python, rust
Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
no
+
+
no
+
Orientationcolumncolumn or row
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
yes
+
+
no
+
Has acid support +
yes
+
+
yes
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
maybe
+
+
maybe
+
Data processing framework support + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, +
diff --git a/docs/file/delta_lake/apache_orc.md b/docs/file/delta_lake/apache_orc.md new file mode 100644 index 0000000..31f0ef1 --- /dev/null +++ b/docs/file/delta_lake/apache_orc.md @@ -0,0 +1,193 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Delta Lake vs Apache ORC." +--- +# File + +[![Delta Lake logo](../../assets/logo/delta_lake.png){: style="height:30px;width:30px" .lg align-left } Delta Lake](){ .md-button .toggle-vis data-column="1" } +[![Apache ORC logo](../../assets/logo/orc.png){: style="height:30px;width:30px" .lg align-left } Apache ORC](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeDelta LakeApache ORC
NameDelta LakeApache ORC
DescriptionDelta Lake is an open-source storage framework that enables building a Lakehouse architecture.ORC is a self-describing type-aware columnar file format designed for Hadoop workloads.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/delta-io/deltahttps://github.com/apache/orc
Websitehttps://delta.io/https://orc.apache.org/
Year created20192013
CompanyDatabricksHortonworks, Facebook
Language supportscala, java, python, rustjava, scala, c++, python
Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
no
+
+
no
+
Orientationcolumnrow
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
yes
+
+
no
+
Has acid support +
yes
+
+
no
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
maybe
+
+
yes
+
Data processing framework support + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, +
diff --git a/docs/file/delta_lake/apache_parquet.md b/docs/file/delta_lake/apache_parquet.md new file mode 100644 index 0000000..4c7edc4 --- /dev/null +++ b/docs/file/delta_lake/apache_parquet.md @@ -0,0 +1,192 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Delta Lake vs Apache Parquet." +--- +# File + +[![Delta Lake logo](../../assets/logo/delta_lake.png){: style="height:30px;width:30px" .lg align-left } Delta Lake](){ .md-button .toggle-vis data-column="1" } +[![Apache Parquet logo](../../assets/logo/parquet.svg){: style="height:30px;width:30px" .lg align-left } Apache Parquet](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeDelta LakeApache Parquet
NameDelta LakeApache Parquet
DescriptionDelta Lake is an open-source storage framework that enables building a Lakehouse architecture.Apache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/delta-io/deltahttps://github.com/apache/parquet-format
Websitehttps://delta.io/https://parquet.apache.org/
Year created20192013
CompanyDatabricksTwitter, Cloudera
Language supportscala, java, python, rustjava, scala, c++, python, r, php
Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, Column based queries
Is human readable +
no
+
+
no
+
Orientationcolumncolumn
Has type system +
yes
+
+
yes
+
Has nested structure support +
yes
+
+
yes
+
Has native compression +
yes
+
+
yes
+
Has encoding support +
yes
+
+
yes
+
Has constraint support +
yes
+
+
no
+
Has acid support +
yes
+
+
no
+
Has metadata +
yes
+
+
yes
+
Has encryption support +
maybe
+
+
yes
+
Data processing framework support + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, +
Analytics query support + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, + + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
diff --git a/docs/file/delta_lake/csv.md b/docs/file/delta_lake/csv.md new file mode 100644 index 0000000..3de4a72 --- /dev/null +++ b/docs/file/delta_lake/csv.md @@ -0,0 +1,195 @@ +--- +title: "Compare File technologies/tools" +description: "Compare File technologies/tools by features. Includes Delta Lake vs CSV." +--- +# File + +[![Delta Lake logo](../../assets/logo/delta_lake.png){: style="height:30px;width:30px" .lg align-left } Delta Lake](){ .md-button .toggle-vis data-column="1" } +[![CSV logo](../../assets/logo/csv.png){: style="height:30px;width:30px" .lg align-left } CSV](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeDelta LakeCSV
NameDelta LakeCSV
DescriptionDelta Lake is an open-source storage framework that enables building a Lakehouse architecture.Comma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.
LicenseApache license 2.0N/A
Source codehttps://github.com/delta-io/delta
Websitehttps://delta.io/https://www.rfc-editor.org/rfc/rfc4180.html
Year created20190
CompanyDatabricks
Language supportscala, java, python, rustjava, scala, c++, python, r, php, go
Use casesWrite once read many, Analytics, Efficient storage, ACID transactions
Is human readable +
no
+
+
yes
+
Orientationcolumnrow
Has type system +
yes
+
+
no
+
Has nested structure support +
yes
+
+
no
+
Has native compression +
yes
+
+
no
+
Has encoding support +
yes
+
+
no
+
Has constraint support +
yes
+
+
no
+
Has acid support +
yes
+
+
no
+
Has metadata +
yes
+
+
no
+
Has encryption support +
maybe
+
+
no
+
Data processing framework support + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, +
Analytics query support + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
diff --git a/docs/job_orchestration/apache_airflow/dagster.md b/docs/job_orchestration/apache_airflow/dagster.md new file mode 100644 index 0000000..ad9a9c2 --- /dev/null +++ b/docs/job_orchestration/apache_airflow/dagster.md @@ -0,0 +1,147 @@ +--- +title: "Compare Job orchestration technologies/tools" +description: "Compare Job orchestration technologies/tools by features. Includes Apache Airflow vs Dagster." +--- +# Job orchestration + +[![Apache Airflow logo](../../assets/logo/airflow.png){: style="height:30px;width:30px" .lg align-left } Apache Airflow](){ .md-button .toggle-vis data-column="1" } +[![Dagster logo](../../assets/logo/dagster.png){: style="height:30px;width:30px" .lg align-left } Dagster](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache AirflowDagster
NameApache AirflowDagster
DescriptionApache Airflow is a platform to programmatically author, schedule, and monitor workflows.A data orchestrator for machine learning, analytics, and ETL.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/airflowhttps://github.com/dagster-io/dagster
Websitehttps://airflow.apache.org/https://dagster.io/
Year created20142019
CompanyAirbnb, ApacheDagster
Language supportpythonpython
Use casesWorkflow schedulingData Orchestration, Machine Learning Pipelines
Has cron schedule support +
yes
+
+
yes
+
Has event based trigger support +
yes
+
+
yes
+
Has api support +
yes
+
+
yes
+
Has access controls +
yes
+
+
yes
+
Has workflow versioning +
no
+
+
yes
+
Has workflow configuration support +
yes
+
+
yes
+
Has audit logs +
yes
+
+
yes
+
Has cost tracking +
no
+
+
no
+
Has data source connection support +
yes
+
+
no
+
diff --git a/docs/job_orchestration/apache_airflow/mage.md b/docs/job_orchestration/apache_airflow/mage.md new file mode 100644 index 0000000..cb34bfd --- /dev/null +++ b/docs/job_orchestration/apache_airflow/mage.md @@ -0,0 +1,147 @@ +--- +title: "Compare Job orchestration technologies/tools" +description: "Compare Job orchestration technologies/tools by features. Includes Apache Airflow vs Mage." +--- +# Job orchestration + +[![Apache Airflow logo](../../assets/logo/airflow.png){: style="height:30px;width:30px" .lg align-left } Apache Airflow](){ .md-button .toggle-vis data-column="1" } +[![Mage logo](../../assets/logo/mage.svg){: style="height:30px;width:30px" .lg align-left } Mage](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache AirflowMage
NameApache AirflowMage
DescriptionApache Airflow is a platform to programmatically author, schedule, and monitor workflows.A lightweight and flexible job orchestration tool.
LicenseApache license 2.0Apache license 2.0
Source codehttps://github.com/apache/airflowhttps://github.com/mage-ai/mage-ai
Websitehttps://airflow.apache.org/https://www.mage.ai/
Year created20142019
CompanyAirbnb, ApacheMage
Language supportpythonpython
Use casesWorkflow schedulingTask Automation, Workflow Management
Has cron schedule support +
yes
+
+
yes
+
Has event based trigger support +
yes
+
+
no
+
Has api support +
yes
+
+
no
+
Has access controls +
yes
+
+
no
+
Has workflow versioning +
no
+
+
no
+
Has workflow configuration support +
yes
+
+
yes
+
Has audit logs +
yes
+
+
no
+
Has cost tracking +
no
+
+
no
+
Has data source connection support +
yes
+
+
no
+
diff --git a/docs/job_orchestration/apache_airflow/prefect.md b/docs/job_orchestration/apache_airflow/prefect.md new file mode 100644 index 0000000..deaacb0 --- /dev/null +++ b/docs/job_orchestration/apache_airflow/prefect.md @@ -0,0 +1,147 @@ +--- +title: "Compare Job orchestration technologies/tools" +description: "Compare Job orchestration technologies/tools by features. Includes Apache Airflow vs Prefect." +--- +# Job orchestration + +[![Apache Airflow logo](../../assets/logo/airflow.png){: style="height:30px;width:30px" .lg align-left } Apache Airflow](){ .md-button .toggle-vis data-column="1" } +[![Prefect logo](../../assets/logo/prefect.svg){: style="height:30px;width:30px" .lg align-left } Prefect](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeApache AirflowPrefect
NameApache AirflowPrefect
DescriptionApache Airflow is a platform to programmatically author, schedule, and monitor workflows.A modern data workflow management system.
LicenseApache license 2.0MIT
Source codehttps://github.com/apache/airflowhttps://github.com/PrefectHQ/prefect
Websitehttps://airflow.apache.org/https://www.prefect.io/
Year created20142018
CompanyAirbnb, ApachePrefect
Language supportpythonpython
Use casesWorkflow schedulingData Workflow Management, ETL
Has cron schedule support +
yes
+
+
yes
+
Has event based trigger support +
yes
+
+
yes
+
Has api support +
yes
+
+
yes
+
Has access controls +
yes
+
+
yes
+
Has workflow versioning +
no
+
+
yes
+
Has workflow configuration support +
yes
+
+
yes
+
Has audit logs +
yes
+
+
yes
+
Has cost tracking +
no
+
+
no
+
Has data source connection support +
yes
+
+
yes
+
diff --git a/docs/job_orchestration/dagster/apache_airflow.md b/docs/job_orchestration/dagster/apache_airflow.md new file mode 100644 index 0000000..7c5c6d9 --- /dev/null +++ b/docs/job_orchestration/dagster/apache_airflow.md @@ -0,0 +1,147 @@ +--- +title: "Compare Job orchestration technologies/tools" +description: "Compare Job orchestration technologies/tools by features. Includes Dagster vs Apache Airflow." +--- +# Job orchestration + +[![Dagster logo](../../assets/logo/dagster.png){: style="height:30px;width:30px" .lg align-left } Dagster](){ .md-button .toggle-vis data-column="1" } +[![Apache Airflow logo](../../assets/logo/airflow.png){: style="height:30px;width:30px" .lg align-left } Apache Airflow](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeDagsterApache Airflow
NameDagsterApache Airflow
DescriptionA data orchestrator for machine learning, analytics, and ETL.Apache Airflow is a platform to programmatically author, schedule, and monitor workflows.
Source codehttps://github.com/dagster-io/dagsterhttps://github.com/apache/airflow
Websitehttps://dagster.io/https://airflow.apache.org/
LicenseApache license 2.0Apache license 2.0
Year created20192014
CompanyDagsterAirbnb, Apache
Use casesData Orchestration, Machine Learning PipelinesWorkflow scheduling
Language supportpythonpython
Has cron schedule support +
yes
+
+
yes
+
Has event based trigger support +
yes
+
+
yes
+
Has api support +
yes
+
+
yes
+
Has access controls +
yes
+
+
yes
+
Has workflow versioning +
yes
+
+
no
+
Has workflow configuration support +
yes
+
+
yes
+
Has audit logs +
yes
+
+
yes
+
Has cost tracking +
no
+
+
no
+
Has data source connection support +
no
+
+
yes
+
diff --git a/docs/job_orchestration/dagster/mage.md b/docs/job_orchestration/dagster/mage.md new file mode 100644 index 0000000..ab6a2c9 --- /dev/null +++ b/docs/job_orchestration/dagster/mage.md @@ -0,0 +1,147 @@ +--- +title: "Compare Job orchestration technologies/tools" +description: "Compare Job orchestration technologies/tools by features. Includes Dagster vs Mage." +--- +# Job orchestration + +[![Dagster logo](../../assets/logo/dagster.png){: style="height:30px;width:30px" .lg align-left } Dagster](){ .md-button .toggle-vis data-column="1" } +[![Mage logo](../../assets/logo/mage.svg){: style="height:30px;width:30px" .lg align-left } Mage](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeDagsterMage
NameDagsterMage
DescriptionA data orchestrator for machine learning, analytics, and ETL.A lightweight and flexible job orchestration tool.
Source codehttps://github.com/dagster-io/dagsterhttps://github.com/mage-ai/mage-ai
Websitehttps://dagster.io/https://www.mage.ai/
LicenseApache license 2.0Apache license 2.0
Year created20192019
CompanyDagsterMage
Use casesData Orchestration, Machine Learning PipelinesTask Automation, Workflow Management
Language supportpythonpython
Has cron schedule support +
yes
+
+
yes
+
Has event based trigger support +
yes
+
+
no
+
Has api support +
yes
+
+
no
+
Has access controls +
yes
+
+
no
+
Has workflow versioning +
yes
+
+
no
+
Has workflow configuration support +
yes
+
+
yes
+
Has audit logs +
yes
+
+
no
+
Has cost tracking +
no
+
+
no
+
Has data source connection support +
no
+
+
no
+
diff --git a/docs/job_orchestration/dagster/prefect.md b/docs/job_orchestration/dagster/prefect.md new file mode 100644 index 0000000..1a968c4 --- /dev/null +++ b/docs/job_orchestration/dagster/prefect.md @@ -0,0 +1,147 @@ +--- +title: "Compare Job orchestration technologies/tools" +description: "Compare Job orchestration technologies/tools by features. Includes Dagster vs Prefect." +--- +# Job orchestration + +[![Dagster logo](../../assets/logo/dagster.png){: style="height:30px;width:30px" .lg align-left } Dagster](){ .md-button .toggle-vis data-column="1" } +[![Prefect logo](../../assets/logo/prefect.svg){: style="height:30px;width:30px" .lg align-left } Prefect](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeDagsterPrefect
NameDagsterPrefect
DescriptionA data orchestrator for machine learning, analytics, and ETL.A modern data workflow management system.
Source codehttps://github.com/dagster-io/dagsterhttps://github.com/PrefectHQ/prefect
Websitehttps://dagster.io/https://www.prefect.io/
LicenseApache license 2.0MIT
Year created20192018
CompanyDagsterPrefect
Use casesData Orchestration, Machine Learning PipelinesData Workflow Management, ETL
Language supportpythonpython
Has cron schedule support +
yes
+
+
yes
+
Has event based trigger support +
yes
+
+
yes
+
Has api support +
yes
+
+
yes
+
Has access controls +
yes
+
+
yes
+
Has workflow versioning +
yes
+
+
yes
+
Has workflow configuration support +
yes
+
+
yes
+
Has audit logs +
yes
+
+
yes
+
Has cost tracking +
no
+
+
no
+
Has data source connection support +
no
+
+
yes
+
diff --git a/docs/job_orchestration/mage/apache_airflow.md b/docs/job_orchestration/mage/apache_airflow.md new file mode 100644 index 0000000..17bd9df --- /dev/null +++ b/docs/job_orchestration/mage/apache_airflow.md @@ -0,0 +1,147 @@ +--- +title: "Compare Job orchestration technologies/tools" +description: "Compare Job orchestration technologies/tools by features. Includes Mage vs Apache Airflow." +--- +# Job orchestration + +[![Mage logo](../../assets/logo/mage.svg){: style="height:30px;width:30px" .lg align-left } Mage](){ .md-button .toggle-vis data-column="1" } +[![Apache Airflow logo](../../assets/logo/airflow.png){: style="height:30px;width:30px" .lg align-left } Apache Airflow](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeMageApache Airflow
NameMageApache Airflow
DescriptionA lightweight and flexible job orchestration tool.Apache Airflow is a platform to programmatically author, schedule, and monitor workflows.
Source codehttps://github.com/mage-ai/mage-aihttps://github.com/apache/airflow
Websitehttps://www.mage.ai/https://airflow.apache.org/
LicenseApache license 2.0Apache license 2.0
Year created20192014
CompanyMageAirbnb, Apache
Use casesTask Automation, Workflow ManagementWorkflow scheduling
Language supportpythonpython
Has cron schedule support +
yes
+
+
yes
+
Has event based trigger support +
no
+
+
yes
+
Has api support +
no
+
+
yes
+
Has access controls +
no
+
+
yes
+
Has workflow versioning +
no
+
+
no
+
Has workflow configuration support +
yes
+
+
yes
+
Has audit logs +
no
+
+
yes
+
Has cost tracking +
no
+
+
no
+
Has data source connection support +
no
+
+
yes
+
diff --git a/docs/job_orchestration/mage/dagster.md b/docs/job_orchestration/mage/dagster.md new file mode 100644 index 0000000..5b5796d --- /dev/null +++ b/docs/job_orchestration/mage/dagster.md @@ -0,0 +1,147 @@ +--- +title: "Compare Job orchestration technologies/tools" +description: "Compare Job orchestration technologies/tools by features. Includes Mage vs Dagster." +--- +# Job orchestration + +[![Mage logo](../../assets/logo/mage.svg){: style="height:30px;width:30px" .lg align-left } Mage](){ .md-button .toggle-vis data-column="1" } +[![Dagster logo](../../assets/logo/dagster.png){: style="height:30px;width:30px" .lg align-left } Dagster](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeMageDagster
NameMageDagster
DescriptionA lightweight and flexible job orchestration tool.A data orchestrator for machine learning, analytics, and ETL.
Source codehttps://github.com/mage-ai/mage-aihttps://github.com/dagster-io/dagster
Websitehttps://www.mage.ai/https://dagster.io/
LicenseApache license 2.0Apache license 2.0
Year created20192019
CompanyMageDagster
Use casesTask Automation, Workflow ManagementData Orchestration, Machine Learning Pipelines
Language supportpythonpython
Has cron schedule support +
yes
+
+
yes
+
Has event based trigger support +
no
+
+
yes
+
Has api support +
no
+
+
yes
+
Has access controls +
no
+
+
yes
+
Has workflow versioning +
no
+
+
yes
+
Has workflow configuration support +
yes
+
+
yes
+
Has audit logs +
no
+
+
yes
+
Has cost tracking +
no
+
+
no
+
Has data source connection support +
no
+
+
no
+
diff --git a/docs/job_orchestration/mage/prefect.md b/docs/job_orchestration/mage/prefect.md new file mode 100644 index 0000000..bd447b2 --- /dev/null +++ b/docs/job_orchestration/mage/prefect.md @@ -0,0 +1,147 @@ +--- +title: "Compare Job orchestration technologies/tools" +description: "Compare Job orchestration technologies/tools by features. Includes Mage vs Prefect." +--- +# Job orchestration + +[![Mage logo](../../assets/logo/mage.svg){: style="height:30px;width:30px" .lg align-left } Mage](){ .md-button .toggle-vis data-column="1" } +[![Prefect logo](../../assets/logo/prefect.svg){: style="height:30px;width:30px" .lg align-left } Prefect](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeMagePrefect
NameMagePrefect
DescriptionA lightweight and flexible job orchestration tool.A modern data workflow management system.
Source codehttps://github.com/mage-ai/mage-aihttps://github.com/PrefectHQ/prefect
Websitehttps://www.mage.ai/https://www.prefect.io/
LicenseApache license 2.0MIT
Year created20192018
CompanyMagePrefect
Use casesTask Automation, Workflow ManagementData Workflow Management, ETL
Language supportpythonpython
Has cron schedule support +
yes
+
+
yes
+
Has event based trigger support +
no
+
+
yes
+
Has api support +
no
+
+
yes
+
Has access controls +
no
+
+
yes
+
Has workflow versioning +
no
+
+
yes
+
Has workflow configuration support +
yes
+
+
yes
+
Has audit logs +
no
+
+
yes
+
Has cost tracking +
no
+
+
no
+
Has data source connection support +
no
+
+
yes
+
diff --git a/docs/job_orchestration/prefect/apache_airflow.md b/docs/job_orchestration/prefect/apache_airflow.md new file mode 100644 index 0000000..06b188d --- /dev/null +++ b/docs/job_orchestration/prefect/apache_airflow.md @@ -0,0 +1,147 @@ +--- +title: "Compare Job orchestration technologies/tools" +description: "Compare Job orchestration technologies/tools by features. Includes Prefect vs Apache Airflow." +--- +# Job orchestration + +[![Prefect logo](../../assets/logo/prefect.svg){: style="height:30px;width:30px" .lg align-left } Prefect](){ .md-button .toggle-vis data-column="1" } +[![Apache Airflow logo](../../assets/logo/airflow.png){: style="height:30px;width:30px" .lg align-left } Apache Airflow](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributePrefectApache Airflow
NamePrefectApache Airflow
DescriptionA modern data workflow management system.Apache Airflow is a platform to programmatically author, schedule, and monitor workflows.
Source codehttps://github.com/PrefectHQ/prefecthttps://github.com/apache/airflow
Websitehttps://www.prefect.io/https://airflow.apache.org/
LicenseMITApache license 2.0
Year created20182014
CompanyPrefectAirbnb, Apache
Use casesData Workflow Management, ETLWorkflow scheduling
Language supportpythonpython
Has cron schedule support +
yes
+
+
yes
+
Has event based trigger support +
yes
+
+
yes
+
Has api support +
yes
+
+
yes
+
Has access controls +
yes
+
+
yes
+
Has workflow versioning +
yes
+
+
no
+
Has workflow configuration support +
yes
+
+
yes
+
Has audit logs +
yes
+
+
yes
+
Has cost tracking +
no
+
+
no
+
Has data source connection support +
yes
+
+
yes
+
diff --git a/docs/job_orchestration/prefect/dagster.md b/docs/job_orchestration/prefect/dagster.md new file mode 100644 index 0000000..39c804c --- /dev/null +++ b/docs/job_orchestration/prefect/dagster.md @@ -0,0 +1,147 @@ +--- +title: "Compare Job orchestration technologies/tools" +description: "Compare Job orchestration technologies/tools by features. Includes Prefect vs Dagster." +--- +# Job orchestration + +[![Prefect logo](../../assets/logo/prefect.svg){: style="height:30px;width:30px" .lg align-left } Prefect](){ .md-button .toggle-vis data-column="1" } +[![Dagster logo](../../assets/logo/dagster.png){: style="height:30px;width:30px" .lg align-left } Dagster](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributePrefectDagster
NamePrefectDagster
DescriptionA modern data workflow management system.A data orchestrator for machine learning, analytics, and ETL.
Source codehttps://github.com/PrefectHQ/prefecthttps://github.com/dagster-io/dagster
Websitehttps://www.prefect.io/https://dagster.io/
LicenseMITApache license 2.0
Year created20182019
CompanyPrefectDagster
Use casesData Workflow Management, ETLData Orchestration, Machine Learning Pipelines
Language supportpythonpython
Has cron schedule support +
yes
+
+
yes
+
Has event based trigger support +
yes
+
+
yes
+
Has api support +
yes
+
+
yes
+
Has access controls +
yes
+
+
yes
+
Has workflow versioning +
yes
+
+
yes
+
Has workflow configuration support +
yes
+
+
yes
+
Has audit logs +
yes
+
+
yes
+
Has cost tracking +
no
+
+
no
+
Has data source connection support +
yes
+
+
no
+
diff --git a/docs/job_orchestration/prefect/mage.md b/docs/job_orchestration/prefect/mage.md new file mode 100644 index 0000000..4a8f3da --- /dev/null +++ b/docs/job_orchestration/prefect/mage.md @@ -0,0 +1,147 @@ +--- +title: "Compare Job orchestration technologies/tools" +description: "Compare Job orchestration technologies/tools by features. Includes Prefect vs Mage." +--- +# Job orchestration + +[![Prefect logo](../../assets/logo/prefect.svg){: style="height:30px;width:30px" .lg align-left } Prefect](){ .md-button .toggle-vis data-column="1" } +[![Mage logo](../../assets/logo/mage.svg){: style="height:30px;width:30px" .lg align-left } Mage](){ .md-button .toggle-vis data-column="2" } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributePrefectMage
NamePrefectMage
DescriptionA modern data workflow management system.A lightweight and flexible job orchestration tool.
Source codehttps://github.com/PrefectHQ/prefecthttps://github.com/mage-ai/mage-ai
Websitehttps://www.prefect.io/https://www.mage.ai/
LicenseMITApache license 2.0
Year created20182019
CompanyPrefectMage
Use casesData Workflow Management, ETLTask Automation, Workflow Management
Language supportpythonpython
Has cron schedule support +
yes
+
+
yes
+
Has event based trigger support +
yes
+
+
no
+
Has api support +
yes
+
+
no
+
Has access controls +
yes
+
+
no
+
Has workflow versioning +
yes
+
+
no
+
Has workflow configuration support +
yes
+
+
yes
+
Has audit logs +
yes
+
+
no
+
Has cost tracking +
no
+
+
no
+
Has data source connection support +
yes
+
+
no
+
diff --git a/mkdocs.yml b/mkdocs.yml index 8230815..af903dd 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -94,5 +94,72 @@ markdown_extensions: nav: - Home: 'index.md' - - File: 'file/index.md' - - Job_orchestration: 'job_orchestration/index.md' \ No newline at end of file + - File: + - 'file/index.md' + - Apache Avro: + - Apache Hudi: 'file/apache_avro/apache_hudi.md' + - Apache Iceberg: 'file/apache_avro/apache_iceberg.md' + - Apache ORC: 'file/apache_avro/apache_orc.md' + - Apache Parquet: 'file/apache_avro/apache_parquet.md' + - CSV: 'file/apache_avro/csv.md' + - Delta Lake: 'file/apache_avro/delta_lake.md' + - Apache Hudi: + - Apache Avro: 'file/apache_hudi/apache_avro.md' + - Apache Iceberg: 'file/apache_hudi/apache_iceberg.md' + - Apache ORC: 'file/apache_hudi/apache_orc.md' + - Apache Parquet: 'file/apache_hudi/apache_parquet.md' + - CSV: 'file/apache_hudi/csv.md' + - Delta Lake: 'file/apache_hudi/delta_lake.md' + - Apache Iceberg: + - Apache Avro: 'file/apache_iceberg/apache_avro.md' + - Apache Hudi: 'file/apache_iceberg/apache_hudi.md' + - Apache ORC: 'file/apache_iceberg/apache_orc.md' + - Apache Parquet: 'file/apache_iceberg/apache_parquet.md' + - CSV: 'file/apache_iceberg/csv.md' + - Delta Lake: 'file/apache_iceberg/delta_lake.md' + - Apache ORC: + - Apache Avro: 'file/apache_orc/apache_avro.md' + - Apache Hudi: 'file/apache_orc/apache_hudi.md' + - Apache Iceberg: 'file/apache_orc/apache_iceberg.md' + - Apache Parquet: 'file/apache_orc/apache_parquet.md' + - CSV: 'file/apache_orc/csv.md' + - Delta Lake: 'file/apache_orc/delta_lake.md' + - Apache Parquet: + - Apache Avro: 'file/apache_parquet/apache_avro.md' + - Apache Hudi: 'file/apache_parquet/apache_hudi.md' + - Apache Iceberg: 'file/apache_parquet/apache_iceberg.md' + - Apache ORC: 'file/apache_parquet/apache_orc.md' + - CSV: 'file/apache_parquet/csv.md' + - Delta Lake: 'file/apache_parquet/delta_lake.md' + - CSV: + - Apache Avro: 'file/csv/apache_avro.md' + - Apache Hudi: 'file/csv/apache_hudi.md' + - Apache Iceberg: 'file/csv/apache_iceberg.md' + - Apache ORC: 'file/csv/apache_orc.md' + - Apache Parquet: 'file/csv/apache_parquet.md' + - Delta Lake: 'file/csv/delta_lake.md' + - Delta Lake: + - Apache Avro: 'file/delta_lake/apache_avro.md' + - Apache Hudi: 'file/delta_lake/apache_hudi.md' + - Apache Iceberg: 'file/delta_lake/apache_iceberg.md' + - Apache ORC: 'file/delta_lake/apache_orc.md' + - Apache Parquet: 'file/delta_lake/apache_parquet.md' + - CSV: 'file/delta_lake/csv.md' + - Job orchestration: + - 'job_orchestration/index.md' + - Apache Airflow: + - Dagster: 'job_orchestration/apache_airflow/dagster.md' + - Mage: 'job_orchestration/apache_airflow/mage.md' + - Prefect: 'job_orchestration/apache_airflow/prefect.md' + - Dagster: + - Apache Airflow: 'job_orchestration/dagster/apache_airflow.md' + - Mage: 'job_orchestration/dagster/mage.md' + - Prefect: 'job_orchestration/dagster/prefect.md' + - Mage: + - Apache Airflow: 'job_orchestration/mage/apache_airflow.md' + - Dagster: 'job_orchestration/mage/dagster.md' + - Prefect: 'job_orchestration/mage/prefect.md' + - Prefect: + - Apache Airflow: 'job_orchestration/prefect/apache_airflow.md' + - Dagster: 'job_orchestration/prefect/dagster.md' + - Mage: 'job_orchestration/prefect/mage.md' diff --git a/site/404.html b/site/404.html index 94af738..23fee46 100644 --- a/site/404.html +++ b/site/404.html @@ -213,16 +213,17 @@ -
  • - - - + +
  • + + File - -
  • + + + @@ -230,16 +231,17 @@ -
  • - - - + +
  • + + - Job_orchestration + Job orchestration - -
  • + + + @@ -320,6 +322,55 @@ + + + + + + + + +
  • + + + + + + + + + + + +
  • + + + + + + + + + + + + + + + + +
  • + + + + + + + + + + + +
  • + + diff --git a/site/assets/images/social/file/apache_avro/apache_hudi.png b/site/assets/images/social/file/apache_avro/apache_hudi.png new file mode 100644 index 0000000..33c3e1e Binary files /dev/null and b/site/assets/images/social/file/apache_avro/apache_hudi.png differ diff --git a/site/assets/images/social/file/apache_avro/apache_iceberg.png b/site/assets/images/social/file/apache_avro/apache_iceberg.png new file mode 100644 index 0000000..1b62fc1 Binary files /dev/null and b/site/assets/images/social/file/apache_avro/apache_iceberg.png differ diff --git a/site/assets/images/social/file/apache_avro/apache_orc.png b/site/assets/images/social/file/apache_avro/apache_orc.png new file mode 100644 index 0000000..75d8a14 Binary files /dev/null and b/site/assets/images/social/file/apache_avro/apache_orc.png differ diff --git a/site/assets/images/social/file/apache_avro/apache_parquet.png b/site/assets/images/social/file/apache_avro/apache_parquet.png new file mode 100644 index 0000000..9773c8c Binary files /dev/null and b/site/assets/images/social/file/apache_avro/apache_parquet.png differ diff --git a/site/assets/images/social/file/apache_avro/csv.png b/site/assets/images/social/file/apache_avro/csv.png new file mode 100644 index 0000000..e3226f9 Binary files /dev/null and b/site/assets/images/social/file/apache_avro/csv.png differ diff --git a/site/assets/images/social/file/apache_avro/delta_lake.png b/site/assets/images/social/file/apache_avro/delta_lake.png new file mode 100644 index 0000000..f57576a Binary files /dev/null and b/site/assets/images/social/file/apache_avro/delta_lake.png differ diff --git a/site/assets/images/social/file/apache_hudi/apache_avro.png b/site/assets/images/social/file/apache_hudi/apache_avro.png new file mode 100644 index 0000000..7939499 Binary files /dev/null and b/site/assets/images/social/file/apache_hudi/apache_avro.png differ diff --git a/site/assets/images/social/file/apache_hudi/apache_iceberg.png b/site/assets/images/social/file/apache_hudi/apache_iceberg.png new file mode 100644 index 0000000..3414966 Binary files /dev/null and b/site/assets/images/social/file/apache_hudi/apache_iceberg.png differ diff --git a/site/assets/images/social/file/apache_hudi/apache_orc.png b/site/assets/images/social/file/apache_hudi/apache_orc.png new file mode 100644 index 0000000..cc884b6 Binary files /dev/null and b/site/assets/images/social/file/apache_hudi/apache_orc.png differ diff --git a/site/assets/images/social/file/apache_hudi/apache_parquet.png b/site/assets/images/social/file/apache_hudi/apache_parquet.png new file mode 100644 index 0000000..c36bb1b Binary files /dev/null and b/site/assets/images/social/file/apache_hudi/apache_parquet.png differ diff --git a/site/assets/images/social/file/apache_hudi/csv.png b/site/assets/images/social/file/apache_hudi/csv.png new file mode 100644 index 0000000..98e0e88 Binary files /dev/null and b/site/assets/images/social/file/apache_hudi/csv.png differ diff --git a/site/assets/images/social/file/apache_hudi/delta_lake.png b/site/assets/images/social/file/apache_hudi/delta_lake.png new file mode 100644 index 0000000..a1b0955 Binary files /dev/null and b/site/assets/images/social/file/apache_hudi/delta_lake.png differ diff --git a/site/assets/images/social/file/apache_iceberg/apache_avro.png b/site/assets/images/social/file/apache_iceberg/apache_avro.png new file mode 100644 index 0000000..c40f093 Binary files /dev/null and b/site/assets/images/social/file/apache_iceberg/apache_avro.png differ diff --git a/site/assets/images/social/file/apache_iceberg/apache_hudi.png b/site/assets/images/social/file/apache_iceberg/apache_hudi.png new file mode 100644 index 0000000..ff5318e Binary files /dev/null and b/site/assets/images/social/file/apache_iceberg/apache_hudi.png differ diff --git a/site/assets/images/social/file/apache_iceberg/apache_orc.png b/site/assets/images/social/file/apache_iceberg/apache_orc.png new file mode 100644 index 0000000..0492293 Binary files /dev/null and b/site/assets/images/social/file/apache_iceberg/apache_orc.png differ diff --git a/site/assets/images/social/file/apache_iceberg/apache_parquet.png b/site/assets/images/social/file/apache_iceberg/apache_parquet.png new file mode 100644 index 0000000..9517168 Binary files /dev/null and b/site/assets/images/social/file/apache_iceberg/apache_parquet.png differ diff --git a/site/assets/images/social/file/apache_iceberg/csv.png b/site/assets/images/social/file/apache_iceberg/csv.png new file mode 100644 index 0000000..72aa261 Binary files /dev/null and b/site/assets/images/social/file/apache_iceberg/csv.png differ diff --git a/site/assets/images/social/file/apache_iceberg/delta_lake.png b/site/assets/images/social/file/apache_iceberg/delta_lake.png new file mode 100644 index 0000000..8adf4fc Binary files /dev/null and b/site/assets/images/social/file/apache_iceberg/delta_lake.png differ diff --git a/site/assets/images/social/file/apache_orc/apache_avro.png b/site/assets/images/social/file/apache_orc/apache_avro.png new file mode 100644 index 0000000..ad37e0a Binary files /dev/null and b/site/assets/images/social/file/apache_orc/apache_avro.png differ diff --git a/site/assets/images/social/file/apache_orc/apache_hudi.png b/site/assets/images/social/file/apache_orc/apache_hudi.png new file mode 100644 index 0000000..4de808e Binary files /dev/null and b/site/assets/images/social/file/apache_orc/apache_hudi.png differ diff --git a/site/assets/images/social/file/apache_orc/apache_iceberg.png b/site/assets/images/social/file/apache_orc/apache_iceberg.png new file mode 100644 index 0000000..9836eeb Binary files /dev/null and b/site/assets/images/social/file/apache_orc/apache_iceberg.png differ diff --git a/site/assets/images/social/file/apache_orc/apache_parquet.png b/site/assets/images/social/file/apache_orc/apache_parquet.png new file mode 100644 index 0000000..b7313d2 Binary files /dev/null and b/site/assets/images/social/file/apache_orc/apache_parquet.png differ diff --git a/site/assets/images/social/file/apache_orc/csv.png b/site/assets/images/social/file/apache_orc/csv.png new file mode 100644 index 0000000..168c3f4 Binary files /dev/null and b/site/assets/images/social/file/apache_orc/csv.png differ diff --git a/site/assets/images/social/file/apache_orc/delta_lake.png b/site/assets/images/social/file/apache_orc/delta_lake.png new file mode 100644 index 0000000..62154f5 Binary files /dev/null and b/site/assets/images/social/file/apache_orc/delta_lake.png differ diff --git a/site/assets/images/social/file/apache_parquet/apache_avro.png b/site/assets/images/social/file/apache_parquet/apache_avro.png new file mode 100644 index 0000000..1040054 Binary files /dev/null and b/site/assets/images/social/file/apache_parquet/apache_avro.png differ diff --git a/site/assets/images/social/file/apache_parquet/apache_hudi.png b/site/assets/images/social/file/apache_parquet/apache_hudi.png new file mode 100644 index 0000000..6ad4937 Binary files /dev/null and b/site/assets/images/social/file/apache_parquet/apache_hudi.png differ diff --git a/site/assets/images/social/file/apache_parquet/apache_iceberg.png b/site/assets/images/social/file/apache_parquet/apache_iceberg.png new file mode 100644 index 0000000..b2113ad Binary files /dev/null and b/site/assets/images/social/file/apache_parquet/apache_iceberg.png differ diff --git a/site/assets/images/social/file/apache_parquet/apache_orc.png b/site/assets/images/social/file/apache_parquet/apache_orc.png new file mode 100644 index 0000000..146f9f3 Binary files /dev/null and b/site/assets/images/social/file/apache_parquet/apache_orc.png differ diff --git a/site/assets/images/social/file/apache_parquet/csv.png b/site/assets/images/social/file/apache_parquet/csv.png new file mode 100644 index 0000000..7700278 Binary files /dev/null and b/site/assets/images/social/file/apache_parquet/csv.png differ diff --git a/site/assets/images/social/file/apache_parquet/delta_lake.png b/site/assets/images/social/file/apache_parquet/delta_lake.png new file mode 100644 index 0000000..29fae6e Binary files /dev/null and b/site/assets/images/social/file/apache_parquet/delta_lake.png differ diff --git a/site/assets/images/social/file/csv/apache_avro.png b/site/assets/images/social/file/csv/apache_avro.png new file mode 100644 index 0000000..c8118d7 Binary files /dev/null and b/site/assets/images/social/file/csv/apache_avro.png differ diff --git a/site/assets/images/social/file/csv/apache_hudi.png b/site/assets/images/social/file/csv/apache_hudi.png new file mode 100644 index 0000000..be5751c Binary files /dev/null and b/site/assets/images/social/file/csv/apache_hudi.png differ diff --git a/site/assets/images/social/file/csv/apache_iceberg.png b/site/assets/images/social/file/csv/apache_iceberg.png new file mode 100644 index 0000000..1fb0d49 Binary files /dev/null and b/site/assets/images/social/file/csv/apache_iceberg.png differ diff --git a/site/assets/images/social/file/csv/apache_orc.png b/site/assets/images/social/file/csv/apache_orc.png new file mode 100644 index 0000000..e08051f Binary files /dev/null and b/site/assets/images/social/file/csv/apache_orc.png differ diff --git a/site/assets/images/social/file/csv/apache_parquet.png b/site/assets/images/social/file/csv/apache_parquet.png new file mode 100644 index 0000000..5a57461 Binary files /dev/null and b/site/assets/images/social/file/csv/apache_parquet.png differ diff --git a/site/assets/images/social/file/csv/delta_lake.png b/site/assets/images/social/file/csv/delta_lake.png new file mode 100644 index 0000000..9dbbc4b Binary files /dev/null and b/site/assets/images/social/file/csv/delta_lake.png differ diff --git a/site/assets/images/social/file/delta_lake/apache_avro.png b/site/assets/images/social/file/delta_lake/apache_avro.png new file mode 100644 index 0000000..4dd861f Binary files /dev/null and b/site/assets/images/social/file/delta_lake/apache_avro.png differ diff --git a/site/assets/images/social/file/delta_lake/apache_hudi.png b/site/assets/images/social/file/delta_lake/apache_hudi.png new file mode 100644 index 0000000..503506f Binary files /dev/null and b/site/assets/images/social/file/delta_lake/apache_hudi.png differ diff --git a/site/assets/images/social/file/delta_lake/apache_iceberg.png b/site/assets/images/social/file/delta_lake/apache_iceberg.png new file mode 100644 index 0000000..c614303 Binary files /dev/null and b/site/assets/images/social/file/delta_lake/apache_iceberg.png differ diff --git a/site/assets/images/social/file/delta_lake/apache_orc.png b/site/assets/images/social/file/delta_lake/apache_orc.png new file mode 100644 index 0000000..966d829 Binary files /dev/null and b/site/assets/images/social/file/delta_lake/apache_orc.png differ diff --git a/site/assets/images/social/file/delta_lake/apache_parquet.png b/site/assets/images/social/file/delta_lake/apache_parquet.png new file mode 100644 index 0000000..04ed40d Binary files /dev/null and b/site/assets/images/social/file/delta_lake/apache_parquet.png differ diff --git a/site/assets/images/social/file/delta_lake/csv.png b/site/assets/images/social/file/delta_lake/csv.png new file mode 100644 index 0000000..25f82f5 Binary files /dev/null and b/site/assets/images/social/file/delta_lake/csv.png differ diff --git a/site/assets/images/social/job_orchestration/apache_airflow/dagster.png b/site/assets/images/social/job_orchestration/apache_airflow/dagster.png new file mode 100644 index 0000000..283da72 Binary files /dev/null and b/site/assets/images/social/job_orchestration/apache_airflow/dagster.png differ diff --git a/site/assets/images/social/job_orchestration/apache_airflow/mage.png b/site/assets/images/social/job_orchestration/apache_airflow/mage.png new file mode 100644 index 0000000..b8bcc53 Binary files /dev/null and b/site/assets/images/social/job_orchestration/apache_airflow/mage.png differ diff --git a/site/assets/images/social/job_orchestration/apache_airflow/prefect.png b/site/assets/images/social/job_orchestration/apache_airflow/prefect.png new file mode 100644 index 0000000..cd62d86 Binary files /dev/null and b/site/assets/images/social/job_orchestration/apache_airflow/prefect.png differ diff --git a/site/assets/images/social/job_orchestration/dagster/apache_airflow.png b/site/assets/images/social/job_orchestration/dagster/apache_airflow.png new file mode 100644 index 0000000..969dc40 Binary files /dev/null and b/site/assets/images/social/job_orchestration/dagster/apache_airflow.png differ diff --git a/site/assets/images/social/job_orchestration/dagster/mage.png b/site/assets/images/social/job_orchestration/dagster/mage.png new file mode 100644 index 0000000..6119920 Binary files /dev/null and b/site/assets/images/social/job_orchestration/dagster/mage.png differ diff --git a/site/assets/images/social/job_orchestration/dagster/prefect.png b/site/assets/images/social/job_orchestration/dagster/prefect.png new file mode 100644 index 0000000..7b1832b Binary files /dev/null and b/site/assets/images/social/job_orchestration/dagster/prefect.png differ diff --git a/site/assets/images/social/job_orchestration/mage/apache_airflow.png b/site/assets/images/social/job_orchestration/mage/apache_airflow.png new file mode 100644 index 0000000..a32d84e Binary files /dev/null and b/site/assets/images/social/job_orchestration/mage/apache_airflow.png differ diff --git a/site/assets/images/social/job_orchestration/mage/dagster.png b/site/assets/images/social/job_orchestration/mage/dagster.png new file mode 100644 index 0000000..dcae96a Binary files /dev/null and b/site/assets/images/social/job_orchestration/mage/dagster.png differ diff --git a/site/assets/images/social/job_orchestration/mage/prefect.png b/site/assets/images/social/job_orchestration/mage/prefect.png new file mode 100644 index 0000000..478d169 Binary files /dev/null and b/site/assets/images/social/job_orchestration/mage/prefect.png differ diff --git a/site/assets/images/social/job_orchestration/prefect/apache_airflow.png b/site/assets/images/social/job_orchestration/prefect/apache_airflow.png new file mode 100644 index 0000000..efda902 Binary files /dev/null and b/site/assets/images/social/job_orchestration/prefect/apache_airflow.png differ diff --git a/site/assets/images/social/job_orchestration/prefect/dagster.png b/site/assets/images/social/job_orchestration/prefect/dagster.png new file mode 100644 index 0000000..2e0d59e Binary files /dev/null and b/site/assets/images/social/job_orchestration/prefect/dagster.png differ diff --git a/site/assets/images/social/job_orchestration/prefect/mage.png b/site/assets/images/social/job_orchestration/prefect/mage.png new file mode 100644 index 0000000..68065f6 Binary files /dev/null and b/site/assets/images/social/job_orchestration/prefect/mage.png differ diff --git a/site/database/index.html b/site/database/index.html index 99486e1..35b3666 100644 --- a/site/database/index.html +++ b/site/database/index.html @@ -231,16 +231,17 @@ -
  • - - - + +
  • + + File - -
  • + + + @@ -248,16 +249,17 @@ -
  • - - - + +
  • + + - Job_orchestration + Job orchestration - -
  • + + + @@ -338,6 +340,55 @@ + + + + + + + + +
  • + + + + + + + + + + + +
  • + + + + + + + + + + + + + + + + +
  • + + + + + + + + + + + +
  • + + diff --git a/site/file/apache_avro/apache_hudi/index.html b/site/file/apache_avro/apache_hudi/index.html new file mode 100644 index 0000000..5dd638a --- /dev/null +++ b/site/file/apache_avro/apache_hudi/index.html @@ -0,0 +1,2912 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Avro logo Apache Avro +Apache Hudi logo Apache Hudi

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache AvroApache Hudi
    NameApache AvroApache Hudi
    DescriptionApache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.Apache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/avrohttps://github.com/apache/hudi
    Websitehttps://avro.apache.org/https://hudi.apache.org/
    Year created20092016
    CompanyApacheUber
    Language supportjava, c++, c#, c, python, javascript, perl, ruby, php, rust
    Use casesStream processing, Analytics, Efficient data exchangeIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactions
    Is human readable +
    no
    +
    +
    no
    +
    Orientationrowcolumn or row
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    yes
    +
    Has acid support +
    no
    +
    +
    yes
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    no
    +
    +
    maybe
    +
    Data processing framework support + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Spark, + Apache Flink, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_avro/apache_iceberg/index.html b/site/file/apache_avro/apache_iceberg/index.html new file mode 100644 index 0000000..da62a9e --- /dev/null +++ b/site/file/apache_avro/apache_iceberg/index.html @@ -0,0 +1,2918 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Avro logo Apache Avro +Apache Iceberg logo Apache Iceberg

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache AvroApache Iceberg
    NameApache AvroApache Iceberg
    DescriptionApache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.Iceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/avrohttps://github.com/apache/iceberg
    Websitehttps://avro.apache.org/https://iceberg.apache.org/
    Year created20092017
    CompanyApacheNetflix
    Language supportjava, c++, c#, c, python, javascript, perl, ruby, php, rust
    Use casesStream processing, Analytics, Efficient data exchangeWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    no
    +
    +
    no
    +
    Orientationrowcolumn or row
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    no
    +
    +
    yes
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    no
    +
    +
    maybe
    +
    Data processing framework support + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_avro/apache_orc/index.html b/site/file/apache_avro/apache_orc/index.html new file mode 100644 index 0000000..314a57e --- /dev/null +++ b/site/file/apache_avro/apache_orc/index.html @@ -0,0 +1,2919 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Avro logo Apache Avro +Apache ORC logo Apache ORC

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache AvroApache ORC
    NameApache AvroApache ORC
    DescriptionApache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.ORC is a self-describing type-aware columnar file format designed for Hadoop workloads.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/avrohttps://github.com/apache/orc
    Websitehttps://avro.apache.org/https://orc.apache.org/
    Year created20092013
    CompanyApacheHortonworks, Facebook
    Language supportjava, c++, c#, c, python, javascript, perl, ruby, php, rustjava, scala, c++, python
    Use casesStream processing, Analytics, Efficient data exchangeWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    no
    +
    +
    no
    +
    Orientationrowrow
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    no
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    no
    +
    +
    yes
    +
    Data processing framework support + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_avro/apache_parquet/index.html b/site/file/apache_avro/apache_parquet/index.html new file mode 100644 index 0000000..a0d9b5f --- /dev/null +++ b/site/file/apache_avro/apache_parquet/index.html @@ -0,0 +1,2918 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Avro logo Apache Avro +Apache Parquet logo Apache Parquet

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache AvroApache Parquet
    NameApache AvroApache Parquet
    DescriptionApache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.Apache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/avrohttps://github.com/apache/parquet-format
    Websitehttps://avro.apache.org/https://parquet.apache.org/
    Year created20092013
    CompanyApacheTwitter, Cloudera
    Language supportjava, c++, c#, c, python, javascript, perl, ruby, php, rustjava, scala, c++, python, r, php
    Use casesStream processing, Analytics, Efficient data exchangeWrite once read many, Analytics, Efficient storage, Column based queries
    Is human readable +
    no
    +
    +
    no
    +
    Orientationrowcolumn
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    no
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    no
    +
    +
    yes
    +
    Data processing framework support + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_avro/csv/index.html b/site/file/apache_avro/csv/index.html new file mode 100644 index 0000000..40d06a3 --- /dev/null +++ b/site/file/apache_avro/csv/index.html @@ -0,0 +1,2921 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Avro logo Apache Avro +CSV logo CSV

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache AvroCSV
    NameApache AvroCSV
    DescriptionApache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.Comma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.
    LicenseApache license 2.0N/A
    Source codehttps://github.com/apache/avro
    Websitehttps://avro.apache.org/https://www.rfc-editor.org/rfc/rfc4180.html
    Year created20090
    CompanyApache
    Language supportjava, c++, c#, c, python, javascript, perl, ruby, php, rustjava, scala, c++, python, r, php, go
    Use casesStream processing, Analytics, Efficient data exchange
    Is human readable +
    no
    +
    +
    yes
    +
    Orientationrowrow
    Has type system +
    yes
    +
    +
    no
    +
    Has nested structure support +
    yes
    +
    +
    no
    +
    Has native compression +
    yes
    +
    +
    no
    +
    Has encoding support +
    yes
    +
    +
    no
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    no
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    no
    +
    Has encryption support +
    no
    +
    +
    no
    +
    Data processing framework support + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_avro/delta_lake/index.html b/site/file/apache_avro/delta_lake/index.html new file mode 100644 index 0000000..8b9959e --- /dev/null +++ b/site/file/apache_avro/delta_lake/index.html @@ -0,0 +1,2914 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Avro logo Apache Avro +Delta Lake logo Delta Lake

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache AvroDelta Lake
    NameApache AvroDelta Lake
    DescriptionApache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.Delta Lake is an open-source storage framework that enables building a Lakehouse architecture.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/avrohttps://github.com/delta-io/delta
    Websitehttps://avro.apache.org/https://delta.io/
    Year created20092019
    CompanyApacheDatabricks
    Language supportjava, c++, c#, c, python, javascript, perl, ruby, php, rustscala, java, python, rust
    Use casesStream processing, Analytics, Efficient data exchangeWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    no
    +
    +
    no
    +
    Orientationrowcolumn
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    yes
    +
    Has acid support +
    no
    +
    +
    yes
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    no
    +
    +
    maybe
    +
    Data processing framework support + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_hudi/apache_avro/index.html b/site/file/apache_hudi/apache_avro/index.html new file mode 100644 index 0000000..271df57 --- /dev/null +++ b/site/file/apache_hudi/apache_avro/index.html @@ -0,0 +1,2912 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Hudi logo Apache Hudi +Apache Avro logo Apache Avro

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache HudiApache Avro
    NameApache HudiApache Avro
    DescriptionApache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.Apache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.
    Source codehttps://github.com/apache/hudihttps://github.com/apache/avro
    Websitehttps://hudi.apache.org/https://avro.apache.org/
    LicenseApache license 2.0Apache license 2.0
    Year created20162009
    CompanyUberApache
    Use casesIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactionsStream processing, Analytics, Efficient data exchange
    Language supportjava, c++, c#, c, python, javascript, perl, ruby, php, rust
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumn or rowrow
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    yes
    +
    +
    no
    +
    Has acid support +
    yes
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    maybe
    +
    +
    no
    +
    Data processing framework support + Apache Spark, + Apache Flink, + + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_hudi/apache_iceberg/index.html b/site/file/apache_hudi/apache_iceberg/index.html new file mode 100644 index 0000000..7adfd43 --- /dev/null +++ b/site/file/apache_hudi/apache_iceberg/index.html @@ -0,0 +1,2914 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Hudi logo Apache Hudi +Apache Iceberg logo Apache Iceberg

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache HudiApache Iceberg
    NameApache HudiApache Iceberg
    DescriptionApache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.Iceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.
    Source codehttps://github.com/apache/hudihttps://github.com/apache/iceberg
    Websitehttps://hudi.apache.org/https://iceberg.apache.org/
    LicenseApache license 2.0Apache license 2.0
    Year created20162017
    CompanyUberNetflix
    Use casesIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
    Language support
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumn or rowcolumn or row
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    yes
    +
    +
    no
    +
    Has acid support +
    yes
    +
    +
    yes
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    maybe
    +
    +
    maybe
    +
    Data processing framework support + Apache Spark, + Apache Flink, + + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_hudi/apache_orc/index.html b/site/file/apache_hudi/apache_orc/index.html new file mode 100644 index 0000000..46a8c5e --- /dev/null +++ b/site/file/apache_hudi/apache_orc/index.html @@ -0,0 +1,2915 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Hudi logo Apache Hudi +Apache ORC logo Apache ORC

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache HudiApache ORC
    NameApache HudiApache ORC
    DescriptionApache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.ORC is a self-describing type-aware columnar file format designed for Hadoop workloads.
    Source codehttps://github.com/apache/hudihttps://github.com/apache/orc
    Websitehttps://hudi.apache.org/https://orc.apache.org/
    LicenseApache license 2.0Apache license 2.0
    Year created20162013
    CompanyUberHortonworks, Facebook
    Use casesIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
    Language supportjava, scala, c++, python
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumn or rowrow
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    yes
    +
    +
    no
    +
    Has acid support +
    yes
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    maybe
    +
    +
    yes
    +
    Data processing framework support + Apache Spark, + Apache Flink, + + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_hudi/apache_parquet/index.html b/site/file/apache_hudi/apache_parquet/index.html new file mode 100644 index 0000000..6820971 --- /dev/null +++ b/site/file/apache_hudi/apache_parquet/index.html @@ -0,0 +1,2914 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Hudi logo Apache Hudi +Apache Parquet logo Apache Parquet

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache HudiApache Parquet
    NameApache HudiApache Parquet
    DescriptionApache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.Apache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.
    Source codehttps://github.com/apache/hudihttps://github.com/apache/parquet-format
    Websitehttps://hudi.apache.org/https://parquet.apache.org/
    LicenseApache license 2.0Apache license 2.0
    Year created20162013
    CompanyUberTwitter, Cloudera
    Use casesIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactionsWrite once read many, Analytics, Efficient storage, Column based queries
    Language supportjava, scala, c++, python, r, php
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumn or rowcolumn
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    yes
    +
    +
    no
    +
    Has acid support +
    yes
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    maybe
    +
    +
    yes
    +
    Data processing framework support + Apache Spark, + Apache Flink, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, +
    Analytics query support + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, + + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_hudi/csv/index.html b/site/file/apache_hudi/csv/index.html new file mode 100644 index 0000000..5787d7a --- /dev/null +++ b/site/file/apache_hudi/csv/index.html @@ -0,0 +1,2917 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Hudi logo Apache Hudi +CSV logo CSV

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache HudiCSV
    NameApache HudiCSV
    DescriptionApache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.Comma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.
    Source codehttps://github.com/apache/hudi
    Websitehttps://hudi.apache.org/https://www.rfc-editor.org/rfc/rfc4180.html
    LicenseApache license 2.0N/A
    Year created20160
    CompanyUber
    Use casesIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactions
    Language supportjava, scala, c++, python, r, php, go
    Is human readable +
    no
    +
    +
    yes
    +
    Orientationcolumn or rowrow
    Has type system +
    yes
    +
    +
    no
    +
    Has nested structure support +
    yes
    +
    +
    no
    +
    Has native compression +
    yes
    +
    +
    no
    +
    Has encoding support +
    yes
    +
    +
    no
    +
    Has constraint support +
    yes
    +
    +
    no
    +
    Has acid support +
    yes
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    no
    +
    Has encryption support +
    maybe
    +
    +
    no
    +
    Data processing framework support + Apache Spark, + Apache Flink, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_hudi/delta_lake/index.html b/site/file/apache_hudi/delta_lake/index.html new file mode 100644 index 0000000..e98275a --- /dev/null +++ b/site/file/apache_hudi/delta_lake/index.html @@ -0,0 +1,2910 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Hudi logo Apache Hudi +Delta Lake logo Delta Lake

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache HudiDelta Lake
    NameApache HudiDelta Lake
    DescriptionApache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.Delta Lake is an open-source storage framework that enables building a Lakehouse architecture.
    Source codehttps://github.com/apache/hudihttps://github.com/delta-io/delta
    Websitehttps://hudi.apache.org/https://delta.io/
    LicenseApache license 2.0Apache license 2.0
    Year created20162019
    CompanyUberDatabricks
    Use casesIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
    Language supportscala, java, python, rust
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumn or rowcolumn
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    yes
    +
    +
    yes
    +
    Has acid support +
    yes
    +
    +
    yes
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    maybe
    +
    +
    maybe
    +
    Data processing framework support + Apache Spark, + Apache Flink, + + Apache Drill, + Apache Flink, + Apache Spark, +
    Analytics query support + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, + + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_iceberg/apache_avro/index.html b/site/file/apache_iceberg/apache_avro/index.html new file mode 100644 index 0000000..4a329fb --- /dev/null +++ b/site/file/apache_iceberg/apache_avro/index.html @@ -0,0 +1,2918 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Iceberg logo Apache Iceberg +Apache Avro logo Apache Avro

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache IcebergApache Avro
    NameApache IcebergApache Avro
    DescriptionIceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.Apache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/iceberghttps://github.com/apache/avro
    Websitehttps://iceberg.apache.org/https://avro.apache.org/
    Year created20172009
    CompanyNetflixApache
    Language supportjava, c++, c#, c, python, javascript, perl, ruby, php, rust
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsStream processing, Analytics, Efficient data exchange
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumn or rowrow
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    yes
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    maybe
    +
    +
    no
    +
    Data processing framework support + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_iceberg/apache_hudi/index.html b/site/file/apache_iceberg/apache_hudi/index.html new file mode 100644 index 0000000..caf0220 --- /dev/null +++ b/site/file/apache_iceberg/apache_hudi/index.html @@ -0,0 +1,2914 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Iceberg logo Apache Iceberg +Apache Hudi logo Apache Hudi

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache IcebergApache Hudi
    NameApache IcebergApache Hudi
    DescriptionIceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.Apache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/iceberghttps://github.com/apache/hudi
    Websitehttps://iceberg.apache.org/https://hudi.apache.org/
    Year created20172016
    CompanyNetflixUber
    Language support
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactions
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumn or rowcolumn or row
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    yes
    +
    Has acid support +
    yes
    +
    +
    yes
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    maybe
    +
    +
    maybe
    +
    Data processing framework support + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, + + Apache Spark, + Apache Flink, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, + + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_iceberg/apache_orc/index.html b/site/file/apache_iceberg/apache_orc/index.html new file mode 100644 index 0000000..0f7772d --- /dev/null +++ b/site/file/apache_iceberg/apache_orc/index.html @@ -0,0 +1,2921 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Iceberg logo Apache Iceberg +Apache ORC logo Apache ORC

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache IcebergApache ORC
    NameApache IcebergApache ORC
    DescriptionIceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.ORC is a self-describing type-aware columnar file format designed for Hadoop workloads.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/iceberghttps://github.com/apache/orc
    Websitehttps://iceberg.apache.org/https://orc.apache.org/
    Year created20172013
    CompanyNetflixHortonworks, Facebook
    Language supportjava, scala, c++, python
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumn or rowrow
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    yes
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    maybe
    +
    +
    yes
    +
    Data processing framework support + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_iceberg/apache_parquet/index.html b/site/file/apache_iceberg/apache_parquet/index.html new file mode 100644 index 0000000..d49863f --- /dev/null +++ b/site/file/apache_iceberg/apache_parquet/index.html @@ -0,0 +1,2920 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Iceberg logo Apache Iceberg +Apache Parquet logo Apache Parquet

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache IcebergApache Parquet
    NameApache IcebergApache Parquet
    DescriptionIceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.Apache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/iceberghttps://github.com/apache/parquet-format
    Websitehttps://iceberg.apache.org/https://parquet.apache.org/
    Year created20172013
    CompanyNetflixTwitter, Cloudera
    Language supportjava, scala, c++, python, r, php
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, Column based queries
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumn or rowcolumn
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    yes
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    maybe
    +
    +
    yes
    +
    Data processing framework support + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, + + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_iceberg/csv/index.html b/site/file/apache_iceberg/csv/index.html new file mode 100644 index 0000000..a9835b2 --- /dev/null +++ b/site/file/apache_iceberg/csv/index.html @@ -0,0 +1,2923 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Iceberg logo Apache Iceberg +CSV logo CSV

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache IcebergCSV
    NameApache IcebergCSV
    DescriptionIceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.Comma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.
    LicenseApache license 2.0N/A
    Source codehttps://github.com/apache/iceberg
    Websitehttps://iceberg.apache.org/https://www.rfc-editor.org/rfc/rfc4180.html
    Year created20170
    CompanyNetflix
    Language supportjava, scala, c++, python, r, php, go
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    no
    +
    +
    yes
    +
    Orientationcolumn or rowrow
    Has type system +
    yes
    +
    +
    no
    +
    Has nested structure support +
    yes
    +
    +
    no
    +
    Has native compression +
    yes
    +
    +
    no
    +
    Has encoding support +
    yes
    +
    +
    no
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    yes
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    no
    +
    Has encryption support +
    maybe
    +
    +
    no
    +
    Data processing framework support + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_iceberg/delta_lake/index.html b/site/file/apache_iceberg/delta_lake/index.html new file mode 100644 index 0000000..30ab01f --- /dev/null +++ b/site/file/apache_iceberg/delta_lake/index.html @@ -0,0 +1,2916 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Iceberg logo Apache Iceberg +Delta Lake logo Delta Lake

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache IcebergDelta Lake
    NameApache IcebergDelta Lake
    DescriptionIceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.Delta Lake is an open-source storage framework that enables building a Lakehouse architecture.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/iceberghttps://github.com/delta-io/delta
    Websitehttps://iceberg.apache.org/https://delta.io/
    Year created20172019
    CompanyNetflixDatabricks
    Language supportscala, java, python, rust
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumn or rowcolumn
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    yes
    +
    Has acid support +
    yes
    +
    +
    yes
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    maybe
    +
    +
    maybe
    +
    Data processing framework support + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, + + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_orc/apache_avro/index.html b/site/file/apache_orc/apache_avro/index.html new file mode 100644 index 0000000..14f0c50 --- /dev/null +++ b/site/file/apache_orc/apache_avro/index.html @@ -0,0 +1,2919 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache ORC logo Apache ORC +Apache Avro logo Apache Avro

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache ORCApache Avro
    NameApache ORCApache Avro
    DescriptionORC is a self-describing type-aware columnar file format designed for Hadoop workloads.Apache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/orchttps://github.com/apache/avro
    Websitehttps://orc.apache.org/https://avro.apache.org/
    Year created20132009
    CompanyHortonworks, FacebookApache
    Language supportjava, scala, c++, pythonjava, c++, c#, c, python, javascript, perl, ruby, php, rust
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsStream processing, Analytics, Efficient data exchange
    Is human readable +
    no
    +
    +
    no
    +
    Orientationrowrow
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    no
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    yes
    +
    +
    no
    +
    Data processing framework support + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_orc/apache_hudi/index.html b/site/file/apache_orc/apache_hudi/index.html new file mode 100644 index 0000000..16d3fba --- /dev/null +++ b/site/file/apache_orc/apache_hudi/index.html @@ -0,0 +1,2915 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache ORC logo Apache ORC +Apache Hudi logo Apache Hudi

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache ORCApache Hudi
    NameApache ORCApache Hudi
    DescriptionORC is a self-describing type-aware columnar file format designed for Hadoop workloads.Apache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/orchttps://github.com/apache/hudi
    Websitehttps://orc.apache.org/https://hudi.apache.org/
    Year created20132016
    CompanyHortonworks, FacebookUber
    Language supportjava, scala, c++, python
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactions
    Is human readable +
    no
    +
    +
    no
    +
    Orientationrowcolumn or row
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    yes
    +
    Has acid support +
    no
    +
    +
    yes
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    yes
    +
    +
    maybe
    +
    Data processing framework support + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Spark, + Apache Flink, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, + + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_orc/apache_iceberg/index.html b/site/file/apache_orc/apache_iceberg/index.html new file mode 100644 index 0000000..ee7498f --- /dev/null +++ b/site/file/apache_orc/apache_iceberg/index.html @@ -0,0 +1,2921 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache ORC logo Apache ORC +Apache Iceberg logo Apache Iceberg

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache ORCApache Iceberg
    NameApache ORCApache Iceberg
    DescriptionORC is a self-describing type-aware columnar file format designed for Hadoop workloads.Iceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/orchttps://github.com/apache/iceberg
    Websitehttps://orc.apache.org/https://iceberg.apache.org/
    Year created20132017
    CompanyHortonworks, FacebookNetflix
    Language supportjava, scala, c++, python
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    no
    +
    +
    no
    +
    Orientationrowcolumn or row
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    no
    +
    +
    yes
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    yes
    +
    +
    maybe
    +
    Data processing framework support + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_orc/apache_parquet/index.html b/site/file/apache_orc/apache_parquet/index.html new file mode 100644 index 0000000..b19fa0e --- /dev/null +++ b/site/file/apache_orc/apache_parquet/index.html @@ -0,0 +1,2921 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache ORC logo Apache ORC +Apache Parquet logo Apache Parquet

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache ORCApache Parquet
    NameApache ORCApache Parquet
    DescriptionORC is a self-describing type-aware columnar file format designed for Hadoop workloads.Apache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/orchttps://github.com/apache/parquet-format
    Websitehttps://orc.apache.org/https://parquet.apache.org/
    Year created20132013
    CompanyHortonworks, FacebookTwitter, Cloudera
    Language supportjava, scala, c++, pythonjava, scala, c++, python, r, php
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, Column based queries
    Is human readable +
    no
    +
    +
    no
    +
    Orientationrowcolumn
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    no
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    yes
    +
    +
    yes
    +
    Data processing framework support + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, + + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_orc/csv/index.html b/site/file/apache_orc/csv/index.html new file mode 100644 index 0000000..c5f30fb --- /dev/null +++ b/site/file/apache_orc/csv/index.html @@ -0,0 +1,2924 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache ORC logo Apache ORC +CSV logo CSV

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache ORCCSV
    NameApache ORCCSV
    DescriptionORC is a self-describing type-aware columnar file format designed for Hadoop workloads.Comma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.
    LicenseApache license 2.0N/A
    Source codehttps://github.com/apache/orc
    Websitehttps://orc.apache.org/https://www.rfc-editor.org/rfc/rfc4180.html
    Year created20130
    CompanyHortonworks, Facebook
    Language supportjava, scala, c++, pythonjava, scala, c++, python, r, php, go
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    no
    +
    +
    yes
    +
    Orientationrowrow
    Has type system +
    yes
    +
    +
    no
    +
    Has nested structure support +
    yes
    +
    +
    no
    +
    Has native compression +
    yes
    +
    +
    no
    +
    Has encoding support +
    yes
    +
    +
    no
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    no
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    no
    +
    Has encryption support +
    yes
    +
    +
    no
    +
    Data processing framework support + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_orc/delta_lake/index.html b/site/file/apache_orc/delta_lake/index.html new file mode 100644 index 0000000..b3521c7 --- /dev/null +++ b/site/file/apache_orc/delta_lake/index.html @@ -0,0 +1,2917 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache ORC logo Apache ORC +Delta Lake logo Delta Lake

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache ORCDelta Lake
    NameApache ORCDelta Lake
    DescriptionORC is a self-describing type-aware columnar file format designed for Hadoop workloads.Delta Lake is an open-source storage framework that enables building a Lakehouse architecture.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/orchttps://github.com/delta-io/delta
    Websitehttps://orc.apache.org/https://delta.io/
    Year created20132019
    CompanyHortonworks, FacebookDatabricks
    Language supportjava, scala, c++, pythonscala, java, python, rust
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    no
    +
    +
    no
    +
    Orientationrowcolumn
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    yes
    +
    Has acid support +
    no
    +
    +
    yes
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    yes
    +
    +
    maybe
    +
    Data processing framework support + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, + + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_parquet/apache_avro/index.html b/site/file/apache_parquet/apache_avro/index.html new file mode 100644 index 0000000..7f3807e --- /dev/null +++ b/site/file/apache_parquet/apache_avro/index.html @@ -0,0 +1,2918 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Parquet logo Apache Parquet +Apache Avro logo Apache Avro

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache ParquetApache Avro
    NameApache ParquetApache Avro
    DescriptionApache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.Apache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/parquet-formathttps://github.com/apache/avro
    Websitehttps://parquet.apache.org/https://avro.apache.org/
    Year created20132009
    CompanyTwitter, ClouderaApache
    Language supportjava, scala, c++, python, r, phpjava, c++, c#, c, python, javascript, perl, ruby, php, rust
    Use casesWrite once read many, Analytics, Efficient storage, Column based queriesStream processing, Analytics, Efficient data exchange
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumnrow
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    no
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    yes
    +
    +
    no
    +
    Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_parquet/apache_hudi/index.html b/site/file/apache_parquet/apache_hudi/index.html new file mode 100644 index 0000000..29ab42c --- /dev/null +++ b/site/file/apache_parquet/apache_hudi/index.html @@ -0,0 +1,2914 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Parquet logo Apache Parquet +Apache Hudi logo Apache Hudi

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache ParquetApache Hudi
    NameApache ParquetApache Hudi
    DescriptionApache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.Apache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/parquet-formathttps://github.com/apache/hudi
    Websitehttps://parquet.apache.org/https://hudi.apache.org/
    Year created20132016
    CompanyTwitter, ClouderaUber
    Language supportjava, scala, c++, python, r, php
    Use casesWrite once read many, Analytics, Efficient storage, Column based queriesIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactions
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumncolumn or row
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    yes
    +
    Has acid support +
    no
    +
    +
    yes
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    yes
    +
    +
    maybe
    +
    Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Spark, + Apache Flink, +
    Analytics query support + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_parquet/apache_iceberg/index.html b/site/file/apache_parquet/apache_iceberg/index.html new file mode 100644 index 0000000..69c3a7c --- /dev/null +++ b/site/file/apache_parquet/apache_iceberg/index.html @@ -0,0 +1,2920 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Parquet logo Apache Parquet +Apache Iceberg logo Apache Iceberg

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache ParquetApache Iceberg
    NameApache ParquetApache Iceberg
    DescriptionApache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.Iceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/parquet-formathttps://github.com/apache/iceberg
    Websitehttps://parquet.apache.org/https://iceberg.apache.org/
    Year created20132017
    CompanyTwitter, ClouderaNetflix
    Language supportjava, scala, c++, python, r, php
    Use casesWrite once read many, Analytics, Efficient storage, Column based queriesWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumncolumn or row
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    no
    +
    +
    yes
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    yes
    +
    +
    maybe
    +
    Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_parquet/apache_orc/index.html b/site/file/apache_parquet/apache_orc/index.html new file mode 100644 index 0000000..549b562 --- /dev/null +++ b/site/file/apache_parquet/apache_orc/index.html @@ -0,0 +1,2921 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Parquet logo Apache Parquet +Apache ORC logo Apache ORC

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache ParquetApache ORC
    NameApache ParquetApache ORC
    DescriptionApache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.ORC is a self-describing type-aware columnar file format designed for Hadoop workloads.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/parquet-formathttps://github.com/apache/orc
    Websitehttps://parquet.apache.org/https://orc.apache.org/
    Year created20132013
    CompanyTwitter, ClouderaHortonworks, Facebook
    Language supportjava, scala, c++, python, r, phpjava, scala, c++, python
    Use casesWrite once read many, Analytics, Efficient storage, Column based queriesWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumnrow
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    no
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    yes
    +
    +
    yes
    +
    Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_parquet/csv/index.html b/site/file/apache_parquet/csv/index.html new file mode 100644 index 0000000..bd446c5 --- /dev/null +++ b/site/file/apache_parquet/csv/index.html @@ -0,0 +1,2923 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Parquet logo Apache Parquet +CSV logo CSV

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache ParquetCSV
    NameApache ParquetCSV
    DescriptionApache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.Comma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.
    LicenseApache license 2.0N/A
    Source codehttps://github.com/apache/parquet-format
    Websitehttps://parquet.apache.org/https://www.rfc-editor.org/rfc/rfc4180.html
    Year created20130
    CompanyTwitter, Cloudera
    Language supportjava, scala, c++, python, r, phpjava, scala, c++, python, r, php, go
    Use casesWrite once read many, Analytics, Efficient storage, Column based queries
    Is human readable +
    no
    +
    +
    yes
    +
    Orientationcolumnrow
    Has type system +
    yes
    +
    +
    no
    +
    Has nested structure support +
    yes
    +
    +
    no
    +
    Has native compression +
    yes
    +
    +
    no
    +
    Has encoding support +
    yes
    +
    +
    no
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    no
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    no
    +
    Has encryption support +
    yes
    +
    +
    no
    +
    Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/apache_parquet/delta_lake/index.html b/site/file/apache_parquet/delta_lake/index.html new file mode 100644 index 0000000..47ff87a --- /dev/null +++ b/site/file/apache_parquet/delta_lake/index.html @@ -0,0 +1,2916 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Apache Parquet logo Apache Parquet +Delta Lake logo Delta Lake

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeApache ParquetDelta Lake
    NameApache ParquetDelta Lake
    DescriptionApache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.Delta Lake is an open-source storage framework that enables building a Lakehouse architecture.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/apache/parquet-formathttps://github.com/delta-io/delta
    Websitehttps://parquet.apache.org/https://delta.io/
    Year created20132019
    CompanyTwitter, ClouderaDatabricks
    Language supportjava, scala, c++, python, r, phpscala, java, python, rust
    Use casesWrite once read many, Analytics, Efficient storage, Column based queriesWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumncolumn
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    yes
    +
    Has acid support +
    no
    +
    +
    yes
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    yes
    +
    +
    maybe
    +
    Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Spark, +
    Analytics query support + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/csv/apache_avro/index.html b/site/file/csv/apache_avro/index.html new file mode 100644 index 0000000..da7ce54 --- /dev/null +++ b/site/file/csv/apache_avro/index.html @@ -0,0 +1,2921 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    CSV logo CSV +Apache Avro logo Apache Avro

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeCSVApache Avro
    NameCSVApache Avro
    DescriptionComma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.Apache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.
    Source codehttps://github.com/apache/avro
    Websitehttps://www.rfc-editor.org/rfc/rfc4180.htmlhttps://avro.apache.org/
    Language supportjava, scala, c++, python, r, php, gojava, c++, c#, c, python, javascript, perl, ruby, php, rust
    LicenseN/AApache license 2.0
    Year created02009
    CompanyApache
    Use casesStream processing, Analytics, Efficient data exchange
    Is human readable +
    yes
    +
    +
    no
    +
    Orientationrowrow
    Has type system +
    no
    +
    +
    yes
    +
    Has nested structure support +
    no
    +
    +
    yes
    +
    Has native compression +
    no
    +
    +
    yes
    +
    Has encoding support +
    no
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    no
    +
    +
    no
    +
    Has metadata +
    no
    +
    +
    yes
    +
    Has encryption support +
    no
    +
    +
    no
    +
    Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/csv/apache_hudi/index.html b/site/file/csv/apache_hudi/index.html new file mode 100644 index 0000000..5c341d6 --- /dev/null +++ b/site/file/csv/apache_hudi/index.html @@ -0,0 +1,2917 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    CSV logo CSV +Apache Hudi logo Apache Hudi

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeCSVApache Hudi
    NameCSVApache Hudi
    DescriptionComma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.Apache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.
    Source codehttps://github.com/apache/hudi
    Websitehttps://www.rfc-editor.org/rfc/rfc4180.htmlhttps://hudi.apache.org/
    Language supportjava, scala, c++, python, r, php, go
    LicenseN/AApache license 2.0
    Year created02016
    CompanyUber
    Use casesIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactions
    Is human readable +
    yes
    +
    +
    no
    +
    Orientationrowcolumn or row
    Has type system +
    no
    +
    +
    yes
    +
    Has nested structure support +
    no
    +
    +
    yes
    +
    Has native compression +
    no
    +
    +
    yes
    +
    Has encoding support +
    no
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    yes
    +
    Has acid support +
    no
    +
    +
    yes
    +
    Has metadata +
    no
    +
    +
    yes
    +
    Has encryption support +
    no
    +
    +
    maybe
    +
    Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Spark, + Apache Flink, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/csv/apache_iceberg/index.html b/site/file/csv/apache_iceberg/index.html new file mode 100644 index 0000000..f0f8da5 --- /dev/null +++ b/site/file/csv/apache_iceberg/index.html @@ -0,0 +1,2923 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    CSV logo CSV +Apache Iceberg logo Apache Iceberg

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeCSVApache Iceberg
    NameCSVApache Iceberg
    DescriptionComma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.Iceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.
    Source codehttps://github.com/apache/iceberg
    Websitehttps://www.rfc-editor.org/rfc/rfc4180.htmlhttps://iceberg.apache.org/
    Language supportjava, scala, c++, python, r, php, go
    LicenseN/AApache license 2.0
    Year created02017
    CompanyNetflix
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    yes
    +
    +
    no
    +
    Orientationrowcolumn or row
    Has type system +
    no
    +
    +
    yes
    +
    Has nested structure support +
    no
    +
    +
    yes
    +
    Has native compression +
    no
    +
    +
    yes
    +
    Has encoding support +
    no
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    no
    +
    +
    yes
    +
    Has metadata +
    no
    +
    +
    yes
    +
    Has encryption support +
    no
    +
    +
    maybe
    +
    Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/csv/apache_orc/index.html b/site/file/csv/apache_orc/index.html new file mode 100644 index 0000000..6c29d52 --- /dev/null +++ b/site/file/csv/apache_orc/index.html @@ -0,0 +1,2924 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    CSV logo CSV +Apache ORC logo Apache ORC

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeCSVApache ORC
    NameCSVApache ORC
    DescriptionComma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.ORC is a self-describing type-aware columnar file format designed for Hadoop workloads.
    Source codehttps://github.com/apache/orc
    Websitehttps://www.rfc-editor.org/rfc/rfc4180.htmlhttps://orc.apache.org/
    Language supportjava, scala, c++, python, r, php, gojava, scala, c++, python
    LicenseN/AApache license 2.0
    Year created02013
    CompanyHortonworks, Facebook
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    yes
    +
    +
    no
    +
    Orientationrowrow
    Has type system +
    no
    +
    +
    yes
    +
    Has nested structure support +
    no
    +
    +
    yes
    +
    Has native compression +
    no
    +
    +
    yes
    +
    Has encoding support +
    no
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    no
    +
    +
    no
    +
    Has metadata +
    no
    +
    +
    yes
    +
    Has encryption support +
    no
    +
    +
    yes
    +
    Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/csv/apache_parquet/index.html b/site/file/csv/apache_parquet/index.html new file mode 100644 index 0000000..b7e8cd0 --- /dev/null +++ b/site/file/csv/apache_parquet/index.html @@ -0,0 +1,2923 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    CSV logo CSV +Apache Parquet logo Apache Parquet

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeCSVApache Parquet
    NameCSVApache Parquet
    DescriptionComma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.Apache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.
    Source codehttps://github.com/apache/parquet-format
    Websitehttps://www.rfc-editor.org/rfc/rfc4180.htmlhttps://parquet.apache.org/
    Language supportjava, scala, c++, python, r, php, gojava, scala, c++, python, r, php
    LicenseN/AApache license 2.0
    Year created02013
    CompanyTwitter, Cloudera
    Use casesWrite once read many, Analytics, Efficient storage, Column based queries
    Is human readable +
    yes
    +
    +
    no
    +
    Orientationrowcolumn
    Has type system +
    no
    +
    +
    yes
    +
    Has nested structure support +
    no
    +
    +
    yes
    +
    Has native compression +
    no
    +
    +
    yes
    +
    Has encoding support +
    no
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    no
    +
    Has acid support +
    no
    +
    +
    no
    +
    Has metadata +
    no
    +
    +
    yes
    +
    Has encryption support +
    no
    +
    +
    yes
    +
    Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/csv/delta_lake/index.html b/site/file/csv/delta_lake/index.html new file mode 100644 index 0000000..7c294ed --- /dev/null +++ b/site/file/csv/delta_lake/index.html @@ -0,0 +1,2919 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    CSV logo CSV +Delta Lake logo Delta Lake

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeCSVDelta Lake
    NameCSVDelta Lake
    DescriptionComma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.Delta Lake is an open-source storage framework that enables building a Lakehouse architecture.
    Source codehttps://github.com/delta-io/delta
    Websitehttps://www.rfc-editor.org/rfc/rfc4180.htmlhttps://delta.io/
    Language supportjava, scala, c++, python, r, php, goscala, java, python, rust
    LicenseN/AApache license 2.0
    Year created02019
    CompanyDatabricks
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    yes
    +
    +
    no
    +
    Orientationrowcolumn
    Has type system +
    no
    +
    +
    yes
    +
    Has nested structure support +
    no
    +
    +
    yes
    +
    Has native compression +
    no
    +
    +
    yes
    +
    Has encoding support +
    no
    +
    +
    yes
    +
    Has constraint support +
    no
    +
    +
    yes
    +
    Has acid support +
    no
    +
    +
    yes
    +
    Has metadata +
    no
    +
    +
    yes
    +
    Has encryption support +
    no
    +
    +
    maybe
    +
    Data processing framework support + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Spark, +
    Analytics query support + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, + + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/delta_lake/apache_avro/index.html b/site/file/delta_lake/apache_avro/index.html new file mode 100644 index 0000000..434ced4 --- /dev/null +++ b/site/file/delta_lake/apache_avro/index.html @@ -0,0 +1,2914 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Delta Lake logo Delta Lake +Apache Avro logo Apache Avro

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeDelta LakeApache Avro
    NameDelta LakeApache Avro
    DescriptionDelta Lake is an open-source storage framework that enables building a Lakehouse architecture.Apache Avro is the leading serialization format for record data, and first choice for streaming data pipelines.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/delta-io/deltahttps://github.com/apache/avro
    Websitehttps://delta.io/https://avro.apache.org/
    Year created20192009
    CompanyDatabricksApache
    Language supportscala, java, python, rustjava, c++, c#, c, python, javascript, perl, ruby, php, rust
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsStream processing, Analytics, Efficient data exchange
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumnrow
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    yes
    +
    +
    no
    +
    Has acid support +
    yes
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    maybe
    +
    +
    no
    +
    Data processing framework support + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/delta_lake/apache_hudi/index.html b/site/file/delta_lake/apache_hudi/index.html new file mode 100644 index 0000000..2e1ecfb --- /dev/null +++ b/site/file/delta_lake/apache_hudi/index.html @@ -0,0 +1,2910 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Delta Lake logo Delta Lake +Apache Hudi logo Apache Hudi

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeDelta LakeApache Hudi
    NameDelta LakeApache Hudi
    DescriptionDelta Lake is an open-source storage framework that enables building a Lakehouse architecture.Apache Hudi is a transactional data lake platform that brings database and data warehouse capabilities to the data lake. Utilises data stored in either parquet or orc.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/delta-io/deltahttps://github.com/apache/hudi
    Websitehttps://delta.io/https://hudi.apache.org/
    Year created20192016
    CompanyDatabricksUber
    Language supportscala, java, python, rust
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsIncremental data processing, Data upserts, Change Data Capture (CDC), ACID transactions
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumncolumn or row
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    yes
    +
    +
    yes
    +
    Has acid support +
    yes
    +
    +
    yes
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    maybe
    +
    +
    maybe
    +
    Data processing framework support + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Spark, + Apache Flink, +
    Analytics query support + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, + + Apache Hive, + Apache Impala, + AWS Athena, + BigQuery, + Clickhouse, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/delta_lake/apache_iceberg/index.html b/site/file/delta_lake/apache_iceberg/index.html new file mode 100644 index 0000000..721dcf9 --- /dev/null +++ b/site/file/delta_lake/apache_iceberg/index.html @@ -0,0 +1,2916 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Delta Lake logo Delta Lake +Apache Iceberg logo Apache Iceberg

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeDelta LakeApache Iceberg
    NameDelta LakeApache Iceberg
    DescriptionDelta Lake is an open-source storage framework that enables building a Lakehouse architecture.Iceberg is a high-performance format for huge analytic tables. Utilises data stored in either parquet, avro, or orc.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/delta-io/deltahttps://github.com/apache/iceberg
    Websitehttps://delta.io/https://iceberg.apache.org/
    Year created20192017
    CompanyDatabricksNetflix
    Language supportscala, java, python, rust
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumncolumn or row
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    yes
    +
    +
    no
    +
    Has acid support +
    yes
    +
    +
    yes
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    maybe
    +
    +
    maybe
    +
    Data processing framework support + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + AWS Athena, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/delta_lake/apache_orc/index.html b/site/file/delta_lake/apache_orc/index.html new file mode 100644 index 0000000..a146ac2 --- /dev/null +++ b/site/file/delta_lake/apache_orc/index.html @@ -0,0 +1,2917 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Delta Lake logo Delta Lake +Apache ORC logo Apache ORC

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeDelta LakeApache ORC
    NameDelta LakeApache ORC
    DescriptionDelta Lake is an open-source storage framework that enables building a Lakehouse architecture.ORC is a self-describing type-aware columnar file format designed for Hadoop workloads.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/delta-io/deltahttps://github.com/apache/orc
    Websitehttps://delta.io/https://orc.apache.org/
    Year created20192013
    CompanyDatabricksHortonworks, Facebook
    Language supportscala, java, python, rustjava, scala, c++, python
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumnrow
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    yes
    +
    +
    no
    +
    Has acid support +
    yes
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    maybe
    +
    +
    yes
    +
    Data processing framework support + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Flink, + Apache Gobblin, + Apache Hadoop, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Hive, + Apache Pinot, + AWS Athena, + BigQuery, + Clickhouse, + Firebolt, + Presto, + Trino, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/delta_lake/apache_parquet/index.html b/site/file/delta_lake/apache_parquet/index.html new file mode 100644 index 0000000..1afc562 --- /dev/null +++ b/site/file/delta_lake/apache_parquet/index.html @@ -0,0 +1,2916 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Delta Lake logo Delta Lake +Apache Parquet logo Apache Parquet

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeDelta LakeApache Parquet
    NameDelta LakeApache Parquet
    DescriptionDelta Lake is an open-source storage framework that enables building a Lakehouse architecture.Apache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.
    LicenseApache license 2.0Apache license 2.0
    Source codehttps://github.com/delta-io/deltahttps://github.com/apache/parquet-format
    Websitehttps://delta.io/https://parquet.apache.org/
    Year created20192013
    CompanyDatabricksTwitter, Cloudera
    Language supportscala, java, python, rustjava, scala, c++, python, r, php
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactionsWrite once read many, Analytics, Efficient storage, Column based queries
    Is human readable +
    no
    +
    +
    no
    +
    Orientationcolumncolumn
    Has type system +
    yes
    +
    +
    yes
    +
    Has nested structure support +
    yes
    +
    +
    yes
    +
    Has native compression +
    yes
    +
    +
    yes
    +
    Has encoding support +
    yes
    +
    +
    yes
    +
    Has constraint support +
    yes
    +
    +
    no
    +
    Has acid support +
    yes
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    yes
    +
    Has encryption support +
    maybe
    +
    +
    yes
    +
    Data processing framework support + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Spark, +
    Analytics query support + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, + + Apache Hive, + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/delta_lake/csv/index.html b/site/file/delta_lake/csv/index.html new file mode 100644 index 0000000..9d1d6b6 --- /dev/null +++ b/site/file/delta_lake/csv/index.html @@ -0,0 +1,2919 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compare File technologies/tools - Tech Diff + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + + + +
    +
    +
    + + + + +
    + +
    + + + + + + + +

    File

    +

    Delta Lake logo Delta Lake +CSV logo CSV

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AttributeDelta LakeCSV
    NameDelta LakeCSV
    DescriptionDelta Lake is an open-source storage framework that enables building a Lakehouse architecture.Comma-Separated Values (CSV) is a text file format that uses commas to separate values in plain text.
    LicenseApache license 2.0N/A
    Source codehttps://github.com/delta-io/delta
    Websitehttps://delta.io/https://www.rfc-editor.org/rfc/rfc4180.html
    Year created20190
    CompanyDatabricks
    Language supportscala, java, python, rustjava, scala, c++, python, r, php, go
    Use casesWrite once read many, Analytics, Efficient storage, ACID transactions
    Is human readable +
    no
    +
    +
    yes
    +
    Orientationcolumnrow
    Has type system +
    yes
    +
    +
    no
    +
    Has nested structure support +
    yes
    +
    +
    no
    +
    Has native compression +
    yes
    +
    +
    no
    +
    Has encoding support +
    yes
    +
    +
    no
    +
    Has constraint support +
    yes
    +
    +
    no
    +
    Has acid support +
    yes
    +
    +
    no
    +
    Has metadata +
    yes
    +
    +
    no
    +
    Has encryption support +
    maybe
    +
    +
    no
    +
    Data processing framework support + Apache Drill, + Apache Flink, + Apache Spark, + + Apache Beam, + Apache Drill, + Apache Flink, + Apache Gobblin, + Apache Hive, + Apache NiFi, + Apache Pig, + Apache Spark, +
    Analytics query support + Apache Hive, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + Presto, + Trino, + + Apache Impala, + Apache Druid, + Apache Pinot, + AWS Athena, + Azure Synapse, + BigQuery, + Clickhouse, + Dremio, + DuckDB, + Firebolt, +
    + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/site/file/index.html b/site/file/index.html index 0a0ab65..a7d52e9 100644 --- a/site/file/index.html +++ b/site/file/index.html @@ -16,7 +16,7 @@ - + @@ -241,16 +241,17 @@ -
  • - - - + +
  • + + File - -
  • + + + @@ -258,16 +259,17 @@ -
  • - - - + +
  • + + - Job_orchestration + Job orchestration - -
  • + + + @@ -352,6 +354,57 @@ + + + + + + + + +
  • + + + + + + + + + + + +
  • + + + + + + + + + + + + + + + + +
  • + + + + + + + + + + + +
  • + + @@ -950,13 +3099,13 @@

    File

    - +