Skip to content

Commit

Permalink
Merge pull request #166 from keboola/vb-CT-1526-add-snowflake-vector-…
Browse files Browse the repository at this point in the history
…datatype

CT-1526 add snowflake vector datatype
  • Loading branch information
vojtabiberle authored Aug 7, 2024
2 parents 1648bfa + d70f4a8 commit c91e470
Show file tree
Hide file tree
Showing 11 changed files with 301 additions and 78 deletions.
2 changes: 1 addition & 1 deletion packages/php-datatypes/src/Definition/Exasol.php
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ private function validateLength(string $type, $length = null): void
}

if (preg_match('/(?<val>[1-9]+\d*)\s*(?<unit>BYTE|BIT)/i', (string) $length, $matched)) {
$val = $matched['val'];
$val = (int) $matched['val'];
$unit = strtoupper($matched['unit']);

$limits = [
Expand Down
20 changes: 20 additions & 0 deletions packages/php-datatypes/src/Definition/Snowflake.php
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class Snowflake extends Common
public const TYPE_ARRAY = 'ARRAY';
public const TYPE_GEOGRAPHY = 'GEOGRAPHY';
public const TYPE_GEOMETRY = 'GEOMETRY';
public const TYPE_VECTOR = 'VECTOR';
public const TYPES = [
self::TYPE_NUMBER,
self::TYPE_DEC,
Expand Down Expand Up @@ -103,6 +104,7 @@ class Snowflake extends Common
self::TYPE_ARRAY,
self::TYPE_GEOGRAPHY,
self::TYPE_GEOMETRY,
self::TYPE_VECTOR,
];
public const MAX_VARCHAR_LENGTH = 16777216;
public const MAX_VARBINARY_LENGTH = 8388608;
Expand Down Expand Up @@ -350,6 +352,24 @@ private function validateLength(string $type, $length = null): void
break;
}
break;
case self::TYPE_VECTOR:
$valid = false;
if ($length === null || !is_string($length)) {
break;
}
/** matches:
* TYPE - INT|FLOAT - case insensitive
* ,
* any white space zero or infinite times
* any digit with 0 to 4 places
*/
if (preg_match('/^(?<TYPE>INT|FLOAT),[^\S\r\n]*(?<DIM>[\d]{1,4})$/i', $length, $matches)) {
$dimension = (int) $matches['DIM'];
if ($dimension > 0 && $dimension <= 4096) {
$valid = true;
}
}
break;
default:
if (!is_null($length) && $length !== '') {
$valid = false;
Expand Down
25 changes: 23 additions & 2 deletions packages/php-datatypes/tests/SnowflakeDatatypeTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ public function testInvalidBinaryLengths($length): void
public function testBasetypes(): void
{
foreach (Snowflake::TYPES as $type) {
$basetype = (new Snowflake($type))->getBasetype();
$basetype = (new Snowflake($type, $this->getTypeDefaultOptions($type)))->getBasetype();
switch ($type) {
case 'INT':
case 'INTEGER':
Expand Down Expand Up @@ -484,7 +484,8 @@ public function testArrayFromLength(string $type, ?string $length, array $expect
*/
public function testBackendBasetypeFromAlias(string $type, string $expectedType): void
{
$definition = new Snowflake($type);

$definition = new Snowflake($type, $this->getTypeDefaultOptions($type));
$this->assertSame($expectedType, $definition->getBackendBasetype());
}

Expand Down Expand Up @@ -569,4 +570,24 @@ public function provideTestGetTypeFromAlias(): Generator
];
}
}

/**
* @return array{
* length?:string|null|array,
* nullable?:bool,
* default?:string|null
* }
*/
private function getTypeDefaultOptions(string $type): array
{
$options = [];
if ($type === Snowflake::TYPE_VECTOR) {
// VECTOR don't have any meaningfully default option
$options = [
'length' => 'INT,3',
];
}

return $options;
}
}
8 changes: 5 additions & 3 deletions packages/php-db-import-export/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@

### Docker

Prepare `.env` (copy of `.env.dist`) and set up AWS keys which has access to `keboola-drivers` bucket in order to build this image.
Prepare `.env` (copy of `.env.dist`) and set up AWS keys which has access to `keboola-drivers` bucket in order to build this image. Also add this user to group `ci-php-import-export-lib` witch will allow you to work with newly created bucket for tests.

User can be created in `Dev - Main legacy`, where are also groups for `keboola-drivers` and `ci-php-import-export-lib`.

If you don't have access to `keboola-drivers` you have to change Dockerfile.
- Comment out first stage which downloads Teradata driver and tools and supply own downloaded from Teradata site:
Expand Down Expand Up @@ -54,7 +56,7 @@ ABS_CONTAINER_NAME=containerName
- Create service account in [IAM](https://console.cloud.google.com/iam-admin/serviceaccounts)
- In bucket permissions grant service account admin access to bucket
- Create new service account key
- Convert key to string `awk -v RS= '{$1=$1}1' <key_file>.json >> .env`
- Convert key to string `awk -v RS= '{$1=$1}1' <key_file>.json >> .env` (or `cat file.json | jq -c | jq -R`)
- Set content on last line of .env as variable `GCS_CREDENTIALS`

- Upload test fixtures to GCS `docker compose run --rm dev composer loadGcs-bigquery` or `docker compose run --rm dev composer loadGcs-snowflake` (depending on backend)
Expand Down Expand Up @@ -84,7 +86,7 @@ CREATE STORAGE INTEGRATION "KEBOOLA_DB_IMPORT_EXPORT"
STORAGE_ALLOWED_LOCATIONS = ('gcs://<your gcs bucket>/');
-- set integration name to env GCS_INTEGRATION_NAME in .env file
-- get service account id `STORAGE_GCP_SERVICE_ACCOUNT`
DESC STORAGE INTEGRATION "CI_PHP_IE_LIB";
DESC STORAGE INTEGRATION "KEBOOLA_DB_IMPORT_EXPORT";
-- continue according manual ^ in snflk documentation assign roles for Data loading and unloading
```

Expand Down
6 changes: 4 additions & 2 deletions packages/php-db-import-export/provisioning/src/Delete.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,14 @@

namespace Keboola\Provisioning;

use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;

#[AsCommand(
name: 'app:delete:synapse',
)]
final class Delete extends BaseCmd
{
private const OPTION_AZURE_RESOURCE_GROUP = 'resourceGroup';
Expand All @@ -17,8 +21,6 @@ final class Delete extends BaseCmd
private const OPTION_SYNAPSE_SERVICE_PRINCIPAL_OBJECT_ID = 'synapseServicePrincipalObjectId';
private const OPTION_SYNAPSE_SQL_SERVER_NAME = 'synapseSqlServerName';

protected static string $defaultName = 'app:delete:synapse';

protected function configure(): void
{
$this
Expand Down
6 changes: 4 additions & 2 deletions packages/php-db-import-export/provisioning/src/Deploy.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,19 @@

namespace Keboola\Provisioning;

use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;

#[AsCommand(
name: 'app:deploy:synapse',
)]
final class Deploy extends BaseCmd
{
private const OPTION_AZURE_RESOURCE_GROUP = 'resourceGroup';
private const OPTION_SERVER_NAME = 'serverName';

protected static string $defaultName = 'app:deploy:synapse';

protected function configure(): void
{
$this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class SqlBuilder
Snowflake::TYPE_VARIANT,
Snowflake::TYPE_OBJECT,
Snowflake::TYPE_ARRAY,
Snowflake::TYPE_VECTOR,
];
public const SRC_ALIAS = 'src';

Expand Down Expand Up @@ -210,6 +211,24 @@ public function getInsertAllIntoTargetTableCommand(
);
continue;
}
if ($type === Snowflake::TYPE_ARRAY) {
$columnsSetSql[] = sprintf(
'CAST(PARSE_JSON(%s) AS %s) AS %s',
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName()),
$destinationColumn->getColumnDefinition()->getSQLDefinition(),
SnowflakeQuote::quoteSingleIdentifier($destinationColumn->getColumnName()),
);
continue;
}
if ($type === Snowflake::TYPE_VECTOR) {
$columnsSetSql[] = sprintf(
'CAST(PARSE_JSON(%s) AS ARRAY)::%s AS %s',
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName()),
$destinationColumn->getColumnDefinition()->getSQLDefinition(),
SnowflakeQuote::quoteSingleIdentifier($destinationColumn->getColumnName()),
);
continue;
}
$columnsSetSql[] = sprintf(
'CAST(%s AS %s) AS %s',
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName()),
Expand Down Expand Up @@ -311,6 +330,24 @@ public function getUpdateWithPkCommand(
);
continue;
}
if ($type === Snowflake::TYPE_ARRAY) {
$columnsSet[] = sprintf(
'%s = CAST(PARSE_JSON("src".%s) AS %s)',
SnowflakeQuote::quoteSingleIdentifier($destinationColumn->getColumnName()),
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName()),
$destinationColumn->getColumnDefinition()->getSQLDefinition(),
);
continue;
}
if ($type === Snowflake::TYPE_VECTOR) {
$columnsSet[] = sprintf(
'%s = CAST(PARSE_JSON("src".%s) AS ARRAY)::%s',
SnowflakeQuote::quoteSingleIdentifier($destinationColumn->getColumnName()),
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName()),
$destinationColumn->getColumnDefinition()->getSQLDefinition(),
);
continue;
}
$columnsSet[] = sprintf(
'%s = CAST("src".%s AS %s)',
SnowflakeQuote::quoteSingleIdentifier($destinationColumn->getColumnName()),
Expand Down
Loading

0 comments on commit c91e470

Please sign in to comment.