Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adds support for id_column in tables for auto_publish #790

Merged
merged 26 commits into from
Aug 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
61a4a03
adds support for id_column in tables
Binabh Jul 30, 2023
d4f27e5
fix rust suggestions
Binabh Jul 30, 2023
164299c
remove unnecessary call to map
Binabh Jul 30, 2023
4f00bdc
add support for multiple id columns
Binabh Jul 30, 2023
8c5e465
adds info for id column not found
Binabh Jul 31, 2023
e1d66e1
convert match to if let
Binabh Jul 31, 2023
db355f4
change logic to check available columns and types
Binabh Aug 1, 2023
5aae45a
collapse if else block
Binabh Aug 1, 2023
6d1c4af
rename id_formatter to source_id_formatter with backwards compatibility
Binabh Aug 1, 2023
d04d860
minor cleanup suggestions
nyurik Aug 2, 2023
204ae01
Merge pull request #1 from nyurik/column-id-fixes
Binabh Aug 2, 2023
60172ff
Merge branch 'main' into specify-column-id-auto-publish
nyurik Aug 3, 2023
ddbf520
minor changes to the id/typ check
nyurik Aug 3, 2023
37c8826
Merge branch 'main' into specify-column-id-auto-publish
nyurik Aug 3, 2023
b33a069
Merge branch 'main' into specify-column-id-auto-publish
nyurik Aug 3, 2023
233095e
Merge branch 'main' into specify-column-id-auto-publish
nyurik Aug 3, 2023
450a6c2
Partial implementation of the id_column detection
nyurik Aug 3, 2023
529a16a
Merge pull request #2 from nyurik/specify-column-id-auto-publish
Binabh Aug 3, 2023
7491218
Merge branch 'main' into specify-column-id-auto-publish
nyurik Aug 3, 2023
f0d9e60
rename id_column to id_columns
Binabh Aug 12, 2023
f112a6a
make logging consistent
Binabh Aug 12, 2023
d2dd32d
remove duplicate id_column value in properties
Binabh Aug 12, 2023
3a149b6
remove column from properties if already in id_column
Binabh Aug 12, 2023
7b83b3f
add test for bigint feature id
Binabh Aug 12, 2023
4c1ce7b
Merge branch 'main' into specify-column-id-auto-publish
nyurik Aug 13, 2023
3be8f4c
minor test cleanup
nyurik Aug 13, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/src/config-file.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ postgres:
source_id_format: 'table.{schema}.{table}.{column}'
# Add more schemas to the ones listed above
from_schemas: my_other_schema
# A table column to use as the feature ID
# If a table has no column with this name, `id_column` will not be set for that table.
# If a list of strings is given, the first found column will be treated as a feature ID.
id_columns: feature_id
functions:
# Optionally set how source ID should be generated based on the function's name and schema
source_id_format: '{schema}.{function}'
Expand Down
4 changes: 4 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ debug-page *ARGS: start
psql *ARGS:
psql {{ ARGS }} {{ DATABASE_URL }}

# Run PSQL utility against the test database
pg_dump *ARGS:
pg_dump {{ ARGS }} {{ DATABASE_URL }}

# Perform cargo clean to delete all build files
clean: clean-test stop
cargo clean
Expand Down
6 changes: 6 additions & 0 deletions martin/src/pg/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ pub struct PgCfgPublishType {
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(alias = "id_format")]
pub source_id_format: Option<String>,
/// A table column to use as the feature ID
/// If a table has no column with this name, `id_column` will not be set for that table.
/// If a list of strings is given, the first found column will be treated as a feature ID.
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(alias = "id_column")]
pub id_columns: Option<OneOrMany<String>>,
}

impl PgConfig {
Expand Down
59 changes: 58 additions & 1 deletion martin/src/pg/configurator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::pg::pool::PgPool;
use crate::pg::table_source::{
calc_srid, merge_table_info, query_available_tables, table_to_query,
};
use crate::pg::utils::{find_info, normalize_key, InfoMap};
use crate::pg::utils::{find_info, find_kv_ignore_case, normalize_key, InfoMap};
use crate::pg::PgError::InvalidTableExtent;
use crate::pg::Result;
use crate::source::Sources;
Expand All @@ -27,6 +27,7 @@ pub type SqlTableInfoMapMapMap = InfoMap<InfoMap<InfoMap<TableInfo>>>;
pub struct PgBuilderAuto {
source_id_format: String,
schemas: Option<HashSet<String>>,
id_columns: Option<Vec<String>>,
}

#[derive(Debug)]
Expand Down Expand Up @@ -122,6 +123,7 @@ impl PgBuilder {
let id2 = self.resolve_id(&source_id, &db_inf);
let Some(srid) = calc_srid(&db_inf.format_id(), &id2, db_inf.srid, 0, self.default_srid) else { continue };
db_inf.srid = srid;
update_id_column(&id2, &mut db_inf, auto_tables);
info!("Discovered source {id2} from {}", summary(&db_inf));
pending.push(table_to_query(
id2,
Expand Down Expand Up @@ -232,12 +234,57 @@ impl PgBuilder {
}
}

/// Try to find any ID column in a list of table columns (properties) that match one of the given `id_column` values.
/// If found, modify `id_column` value on the table info.
fn update_id_column(id: &str, inf: &mut TableInfo, auto_tables: &PgBuilderAuto) {
let Some(props) = inf.properties.as_mut() else { return };
let Some(try_columns) = &auto_tables.id_columns else { return };

for key in try_columns.iter() {
let (column, typ) = if let Some(typ) = props.get(key) {
(key, typ)
} else {
match find_kv_ignore_case(props, key) {
Ok(Some(result)) => {
info!("For source {id}, id_column '{key}' was not found, but found '{result}' instead.");
(result, props.get(result).unwrap())
}
Ok(None) => continue,
Err(multiple) => {
error!("Unable to configure source {id} because id_column '{key}' has no exact match or more than one potential matches: {}", multiple.join(", "));
continue;
}
}
};
// ID column can be any integer type as defined in
// https://github.com/postgis/postgis/blob/559c95d85564fb74fa9e3b7eafb74851810610da/postgis/mvt.c#L387C4-L387C66
if typ != "int4" && typ != "int8" && typ != "int2" {
Binabh marked this conversation as resolved.
Show resolved Hide resolved
warn!("Unable to use column `{key}` in table {}.{} as a tile feature ID because it has a non-integer type `{typ}`.", inf.schema, inf.table);
continue;
}

inf.id_column = Some(column.to_string());
let mut final_props = props.clone();
Binabh marked this conversation as resolved.
Show resolved Hide resolved
final_props.remove(column);
inf.properties = Some(final_props);
return;
}

info!(
"No ID column found for table {}.{} - searched for an integer column named {}.",
inf.schema,
inf.table,
try_columns.join(", ")
);
}

fn new_auto_publish(config: &PgConfig, is_function: bool) -> Option<PgBuilderAuto> {
let default_id_fmt = |is_func| (if is_func { "{function}" } else { "{table}" }).to_string();
let default = |schemas| {
Some(PgBuilderAuto {
source_id_format: default_id_fmt(is_function),
schemas,
id_columns: None,
})
};

Expand All @@ -252,6 +299,14 @@ fn new_auto_publish(config: &PgConfig, is_function: bool) -> Option<PgBuilderAut
.cloned()
.unwrap_or_else(|| default_id_fmt(is_function)),
schemas: merge_opt_hs(&a.from_schemas, &item.from_schemas),
id_columns: item.id_columns.as_ref().and_then(|ids| {
if is_function {
error!("Configuration parameter auto_publish.functions.id_columns is not supported");
None
} else {
Some(ids.iter().cloned().collect())
}
}),
}),
BoolOrObject::Bool(true) => default(merge_opt_hs(&a.from_schemas, &None)),
BoolOrObject::Bool(false) => None,
Expand Down Expand Up @@ -287,6 +342,7 @@ fn summary(info: &TableInfo) -> String {
Some(true) => "view",
_ => "table",
};
// TODO: add column_id to the summary if it is set
format!(
"{relkind} {}.{} with {} column ({}, SRID={})",
info.schema,
Expand Down Expand Up @@ -331,6 +387,7 @@ mod tests {
Some(PgBuilderAuto {
source_id_format: source_id_format.to_string(),
schemas: schemas.map(|s| s.iter().map(|s| (*s).to_string()).collect()),
id_columns: None,
})
}

Expand Down
1 change: 1 addition & 0 deletions tests/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ postgres:
auto_publish:
tables:
from_schemas: autodetect
id_columns: [feat_id, big_feat_id]

# Associative arrays of table sources
tables:
Expand Down
5 changes: 5 additions & 0 deletions tests/expected/auto/catalog_auto.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
"content_type": "application/x-protobuf",
"description": "autodetect.auto_table.geom"
},
{
"id": "bigint_table",
"content_type": "application/x-protobuf",
"description": "autodetect.bigint_table.geom"
},
{
"id": "function_Mixed_Name",
"content_type": "application/x-protobuf",
Expand Down
5 changes: 5 additions & 0 deletions tests/expected/configured/catalog_cfg.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
"content_type": "application/x-protobuf",
"description": "autodetect.auto_table.geom"
},
{
"id": "bigint_table",
"content_type": "application/x-protobuf",
"description": "autodetect.bigint_table.geom"
},
{
"id": "function_zxy_query",
"content_type": "application/x-protobuf",
Expand Down
12 changes: 12 additions & 0 deletions tests/expected/generated_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@ postgres:
properties:
feat_id: int4
gid: int4
bigint_table:
schema: autodetect
table: bigint_table
srid: 4326
geometry_column: geom
extent: 4096
buffer: 64
clip_geom: true
geometry_type: POINT
properties:
big_feat_id: int8
id: int4
points1:
schema: public
table: points1
Expand Down
22 changes: 21 additions & 1 deletion tests/expected/given_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ postgres:
auto_publish:
tables:
from_schemas: autodetect
id_columns:
- feat_id
- big_feat_id
tables:
MixPoints:
schema: MixedCase
Expand All @@ -28,6 +31,7 @@ postgres:
table: auto_table
srid: 4326
geometry_column: geom
id_column: feat_id
Binabh marked this conversation as resolved.
Show resolved Hide resolved
bounds:
- -166.87107126230424
- -53.44747249115674
Expand All @@ -38,8 +42,24 @@ postgres:
clip_geom: true
geometry_type: POINT
properties:
feat_id: int4
gid: int4
bigint_table:
schema: autodetect
table: bigint_table
srid: 4326
geometry_column: geom
id_column: big_feat_id
bounds:
- -174.89475564568033
- -77.2579745396886
- 174.72753224514435
- 73.80785950599903
extent: 4096
buffer: 64
clip_geom: true
geometry_type: POINT
properties:
id: int4
points1:
layer_id: abc
schema: public
Expand Down
57 changes: 57 additions & 0 deletions tests/fixtures/tables/autodetect.sql
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,60 @@ values (1, 71951, '0101000020E6100000211700C9E6DA6140F510E7C8F4DA2740'),

CREATE INDEX ON autodetect.auto_table USING GIST (geom);
CLUSTER auto_table_geom_idx ON autodetect.auto_table;


CREATE TABLE autodetect.bigint_table
(
id SERIAL PRIMARY KEY,
big_feat_id BIGINT,
geom GEOMETRY(POINT, 4326)
);

-- INSERT INTO autodetect.bigint_table
-- SELECT generate_series(1, 3) as id,
-- (random() * 100000)::bigint as big_feat_id,
-- (ST_DUMP(ST_GENERATEPOINTS(st_transform(st_tileenvelope(18, 235085, 122323), 4326), 3))).geom;
-- INSERT INTO autodetect.bigint_table
-- SELECT generate_series(4, 30) as id,
-- (random() * 100000)::bigint as big_feat_id,
-- (ST_DUMP(ST_GENERATEPOINTS(st_transform(st_tileenvelope(0, 0, 0), 4326), 27))).geom;
--
-- To dump the data above, uncomment code the above, comment the INSERT code bellow, and run:
-- just restart
-- just pg_dump --data-only --inserts --rows-per-insert=100 --table=autodetect.bigint_table

INSERT INTO autodetect.bigint_table
VALUES (1, 89698, '0101000020E6100000D4F7A0E7E2DA61402E2E980CB9DA2740'),
(2, 97180, '0101000020E61000007A523359E5DA614005BA7E9C85DA2740'),
(3, 91401, '0101000020E6100000267D6637ECDA61404174148C04DB2740'),
(4, 19522, '0101000020E610000038BE427924075C40562A5373C3172B40'),
(5, 49248, '0101000020E610000056ACFE9B1B195040B046F6B4A23D4440'),
(6, 75734, '0101000020E6100000788397D6A1DC65C08EB80652655A3840'),
(7, 71026, '0101000020E6100000EC1697F79472644072CAA4A7825053C0'),
(8, 97852, '0101000020E6100000C6F5B3F147D7654069A3BA7DDAE75140'),
(9, 69521, '0101000020E6100000FC6AF6D55BBB56409A2D1063813A50C0'),
(10, 87053, '0101000020E6100000ED99CCB0CCE559C08127412EE0294840'),
(11, 12902, '0101000020E61000006217B1200EEB61C0244EB0D2B3343CC0'),
(12, 39932, '0101000020E61000004441096983A141405B2358D3501951C0'),
(13, 46528, '0101000020E61000000440A3130F2F22C0DA24320DBB884940'),
(14, 44983, '0101000020E6100000757C081A6AAF4C40F638AE6D76235240'),
(15, 67146, '0101000020E61000007362E3AC00ED5AC05E54ABF8767342C0'),
(16, 81358, '0101000020E61000002DFD3B5895995E40C2719E2453BC2E40'),
(17, 62090, '0101000020E610000088F832477C074D40D0BCE1674EF0EE3F'),
(18, 89445, '0101000020E6100000F842E68F1C3862C046498B626D844B40'),
(19, 26213, '0101000020E61000005CC3556056316440A8AF47A7B12750C0'),
(20, 32077, '0101000020E61000005A111ACFC82E6040D4815BF8B3735240'),
(21, 61633, '0101000020E6100000106F7D3FA780514078C38538196A31C0'),
(22, 78250, '0101000020E6100000C096DB4E2ECC4CC0BAD71FB916584C40'),
(23, 16594, '0101000020E6100000562AF86EEEAE5BC09253DCA354D24FC0'),
(24, 59198, '0101000020E61000002C1AA37C3C953C40EED3DEACD3904EC0'),
(25, 48619, '0101000020E61000008490A3B7F1984140EEAFFCB7C8C54A40'),
(26, 38695, '0101000020E61000005F575A5F50CD50C08E617B39FBDE3DC0'),
(27, 34060, '0101000020E6100000E8D20BA94C905D40A413D67AEBD73E40'),
(28, 36744, '0101000020E61000007E97BBCB8F2C54401C352F1857913A40'),
(29, 34349, '0101000020E6100000F25A591EB3AA5FC0809D251CD1694540'),
(30, 89548, '0101000020E610000052F29460E5F96140CAA28F5DB33C51C0');


CREATE INDEX ON autodetect.bigint_table USING GIST (geom);
CLUSTER bigint_table_geom_idx ON autodetect.bigint_table;
Loading