Skip to content

Commit

Permalink
feat: add more configurability to feature_group.proto (#5656)
Browse files Browse the repository at this point in the history
* feat: returns usage metadata for context caching

PiperOrigin-RevId: 670830441

Source-Link: googleapis/googleapis@95a1490

Source-Link: googleapis/googleapis-gen@7bffbbc
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLWFpcGxhdGZvcm0vLk93bEJvdC55YW1sIiwiaCI6IjdiZmZiYmM2ODdhNzc2OGNkMTZkNjk2MzUzMzJjOGY4MTQwNDgxYTkifQ==

* feat: add FLEX_START to Scheduling.strategy

PiperOrigin-RevId: 670984417

Source-Link: googleapis/googleapis@fa23e30

Source-Link: googleapis/googleapis-gen@474efb3
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLWFpcGxhdGZvcm0vLk93bEJvdC55YW1sIiwiaCI6IjQ3NGVmYjNjMjY5ZmMxMDQ0NTdkYjJkM2RiNGUxMWQ3YzdjMGEwNjMifQ==

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* feat: add FLEX_START to Scheduling.strategy

PiperOrigin-RevId: 670985191

Source-Link: googleapis/googleapis@cb6264a

Source-Link: googleapis/googleapis-gen@9d3a1d7
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLWFpcGxhdGZvcm0vLk93bEJvdC55YW1sIiwiaCI6IjlkM2ExZDc4ZjVlZmY0ZWQxNjM0NGJlNGFlMWE3OTE0NWU5MDA3YmMifQ==

* feat: add more configurability to feature_group.proto
feat: add ragSource to feature_view.proto
feat: add sync watermark to feature_view_sync.proto
docs: fix typo in feature_online_store_admin_service.proto

PiperOrigin-RevId: 671111091

Source-Link: googleapis/googleapis@cb6a8ee

Source-Link: googleapis/googleapis-gen@f38a35f
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLWFpcGxhdGZvcm0vLk93bEJvdC55YW1sIiwiaCI6ImYzOGEzNWZmYzgzN2E5ZmRkZmM1OTI4OTAxNjYzZTlkMzJlZWVjMzQifQ==

* feat: add more configurability to feature_group.proto
feat: add ragSource to feature_view.proto
feat: add sync watermark to feature_view_sync.proto
docs: fix typo in feature_online_store_admin_service.proto

PiperOrigin-RevId: 671113897

Source-Link: googleapis/googleapis@68f6c7f

Source-Link: googleapis/googleapis-gen@03aeb16
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLWFpcGxhdGZvcm0vLk93bEJvdC55YW1sIiwiaCI6IjAzYWViMTY5ZTQyN2RmMDI1ZjU1NzYwM2RhZDhkZGNmZjU5YTQ2MmIifQ==

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Co-authored-by: sofisl <[email protected]>
  • Loading branch information
3 people authored Sep 5, 2024
1 parent f4072a0 commit 9760c20
Show file tree
Hide file tree
Showing 34 changed files with 2,099 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -366,17 +366,20 @@ message Scheduling {
// Strategy will default to STANDARD.
STRATEGY_UNSPECIFIED = 0;

// Regular on-demand provisioning strategy.
// Deprecated. Regular on-demand provisioning strategy.
ON_DEMAND = 1 [deprecated = true];

// Low cost by making potential use of spot resources.
// Deprecated. Low cost by making potential use of spot resources.
LOW_COST = 2 [deprecated = true];

// Standard provisioning strategy uses regular on-demand resources.
STANDARD = 3;

// Spot provisioning strategy uses spot resources.
SPOT = 4;

// Flex Start strategy uses DWS to queue for resources.
FLEX_START = 6;
}

// The maximum job running time. The default is 7 days.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,28 @@ message FeatureGroup {
repeated string entity_id_columns = 2
[(google.api.field_behavior) = OPTIONAL];

// Optional. Set if the data source is not a time-series.
bool static_data_source = 3 [(google.api.field_behavior) = OPTIONAL];

// Optional. If the source is a time-series source, this can be set to
// control how downstream sources (ex:
// [FeatureView][google.cloud.aiplatform.v1.FeatureView] ) will treat
// time-series sources. If not set, will treat the source as a time-series
// source with `feature_timestamp` as timestamp column and no scan boundary.
TimeSeries time_series = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. If set, all feature values will be fetched
// from a single row per unique entityId including nulls.
// If not set, will collapse all rows for each unique entityId into a singe
// row with any non-null values if present, if no non-null values are
// present will sync null.
// ex: If source has schema
// `(entity_id, feature_timestamp, f0, f1)` and the following rows:
// `(e1, 2020-01-01T10:00:00.123Z, 10, 15)`
// `(e1, 2020-02-01T10:00:00.123Z, 20, null)`
// If dense is set, `(e1, 20, null)` is synced to online stores. If dense is
// not set, `(e1, 20, 15)` is synced to online stores.
bool dense = 5 [(google.api.field_behavior) = OPTIONAL];
}

oneof source {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ message SyncFeatureViewRequest {
];
}

// Respose message for
// Response message for
// [FeatureOnlineStoreAdminService.SyncFeatureView][google.cloud.aiplatform.v1.FeatureOnlineStoreAdminService.SyncFeatureView].
message SyncFeatureViewResponse {
// Format:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,25 @@ message FeatureView {
optional int64 project_number = 2 [(google.api.field_behavior) = OPTIONAL];
}

// A Vertex Rag source for features that need to be synced to Online
// Store.
message VertexRagSource {
// Required. The BigQuery view/table URI that will be materialized on each
// manual sync trigger. The table/view is expected to have the following
// columns and types at least:
// - `corpus_id` (STRING, NULLABLE/REQUIRED)
// - `file_id` (STRING, NULLABLE/REQUIRED)
// - `chunk_id` (STRING, NULLABLE/REQUIRED)
// - `chunk_data_type` (STRING, NULLABLE/REQUIRED)
// - `chunk_data` (STRING, NULLABLE/REQUIRED)
// - `embeddings` (FLOAT, REPEATED)
// - `file_original_uri` (STRING, NULLABLE/REQUIRED)
string uri = 1 [(google.api.field_behavior) = REQUIRED];

// Optional. The RAG corpus id corresponding to this FeatureView.
int64 rag_corpus_id = 2 [(google.api.field_behavior) = OPTIONAL];
}

oneof source {
// Optional. Configures how data is supposed to be extracted from a BigQuery
// source to be loaded onto the FeatureOnlineStore.
Expand All @@ -167,6 +186,10 @@ message FeatureView {
// need to be loaded onto the FeatureOnlineStore.
FeatureRegistrySource feature_registry_source = 9
[(google.api.field_behavior) = OPTIONAL];

// Optional. The Vertex RAG Source that the FeatureView is linked to.
VertexRagSource vertex_rag_source = 18
[(google.api.field_behavior) = OPTIONAL];
}

// Identifier. Name of the FeatureView. Format:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ message FeatureViewSync {

// Output only. BigQuery slot milliseconds consumed for the sync job.
int64 total_slot = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

// Lower bound of the system time watermark for the sync job. This is only
// set for continuously syncing feature views.
google.protobuf.Timestamp system_watermark_time = 5;
}

// Identifier. Name of the FeatureViewSync. Format:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,24 @@ message CachedContent {
singular: "cachedContent"
};

// Metadata on the usage of the cached content.
message UsageMetadata {
// Total number of tokens that the cached content consumes.
int32 total_token_count = 1;

// Number of text characters.
int32 text_count = 2;

// Number of images.
int32 image_count = 3;

// Duration of video in seconds.
int32 video_duration_seconds = 4;

// Duration of audio in seconds.
int32 audio_duration_seconds = 5;
}

// Expiration time of the cached content.
oneof expiration {
// Timestamp of when this resource is considered expired.
Expand Down Expand Up @@ -112,4 +130,7 @@ message CachedContent {
// Output only. When the cache entry was last updated in UTC time.
google.protobuf.Timestamp update_time = 8
[(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Metadata on the usage of the cached content.
UsageMetadata usage_metadata = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
}
Original file line number Diff line number Diff line change
Expand Up @@ -366,17 +366,20 @@ message Scheduling {
// Strategy will default to STANDARD.
STRATEGY_UNSPECIFIED = 0;

// Regular on-demand provisioning strategy.
// Deprecated. Regular on-demand provisioning strategy.
ON_DEMAND = 1 [deprecated = true];

// Low cost by making potential use of spot resources.
// Deprecated. Low cost by making potential use of spot resources.
LOW_COST = 2 [deprecated = true];

// Standard provisioning strategy uses regular on-demand resources.
STANDARD = 3;

// Spot provisioning strategy uses spot resources.
SPOT = 4;

// Flex Start strategy uses DWS to queue for resources.
FLEX_START = 6;
}

// The maximum job running time. The default is 7 days.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,28 @@ message FeatureGroup {
repeated string entity_id_columns = 2
[(google.api.field_behavior) = OPTIONAL];

// Optional. Set if the data source is not a time-series.
bool static_data_source = 3 [(google.api.field_behavior) = OPTIONAL];

// Optional. If the source is a time-series source, this can be set to
// control how downstream sources (ex:
// [FeatureView][google.cloud.aiplatform.v1beta1.FeatureView] ) will treat
// time-series sources. If not set, will treat the source as a time-series
// source with `feature_timestamp` as timestamp column and no scan boundary.
TimeSeries time_series = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. If set, all feature values will be fetched
// from a single row per unique entityId including nulls.
// If not set, will collapse all rows for each unique entityId into a singe
// row with any non-null values if present, if no non-null values are
// present will sync null.
// ex: If source has schema
// `(entity_id, feature_timestamp, f0, f1)` and the following rows:
// `(e1, 2020-01-01T10:00:00.123Z, 10, 15)`
// `(e1, 2020-02-01T10:00:00.123Z, 20, null)`
// If dense is set, `(e1, 20, null)` is synced to online stores. If dense is
// not set, `(e1, 20, 15)` is synced to online stores.
bool dense = 5 [(google.api.field_behavior) = OPTIONAL];
}

oneof source {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ message SyncFeatureViewRequest {
];
}

// Respose message for
// Response message for
// [FeatureOnlineStoreAdminService.SyncFeatureView][google.cloud.aiplatform.v1beta1.FeatureOnlineStoreAdminService.SyncFeatureView].
message SyncFeatureViewResponse {
// Format:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,25 @@ message FeatureView {
optional int64 project_number = 2 [(google.api.field_behavior) = OPTIONAL];
}

// A Vertex Rag source for features that need to be synced to Online
// Store.
message VertexRagSource {
// Required. The BigQuery view/table URI that will be materialized on each
// manual sync trigger. The table/view is expected to have the following
// columns and types at least:
// - `corpus_id` (STRING, NULLABLE/REQUIRED)
// - `file_id` (STRING, NULLABLE/REQUIRED)
// - `chunk_id` (STRING, NULLABLE/REQUIRED)
// - `chunk_data_type` (STRING, NULLABLE/REQUIRED)
// - `chunk_data` (STRING, NULLABLE/REQUIRED)
// - `embeddings` (FLOAT, REPEATED)
// - `file_original_uri` (STRING, NULLABLE/REQUIRED)
string uri = 1 [(google.api.field_behavior) = REQUIRED];

// Optional. The RAG corpus id corresponding to this FeatureView.
int64 rag_corpus_id = 2 [(google.api.field_behavior) = OPTIONAL];
}

// Service agent type used during data sync.
enum ServiceAgentType {
// By default, the project-level Vertex AI Service Agent is enabled.
Expand All @@ -263,6 +282,10 @@ message FeatureView {
// need to be loaded onto the FeatureOnlineStore.
FeatureRegistrySource feature_registry_source = 9
[(google.api.field_behavior) = OPTIONAL];

// Optional. The Vertex RAG Source that the FeatureView is linked to.
VertexRagSource vertex_rag_source = 18
[(google.api.field_behavior) = OPTIONAL];
}

// Identifier. Name of the FeatureView. Format:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ message FeatureViewSync {

// Output only. BigQuery slot milliseconds consumed for the sync job.
int64 total_slot = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

// Lower bound of the system time watermark for the sync job. This is only
// set for continuously syncing feature views.
google.protobuf.Timestamp system_watermark_time = 5;
}

// Identifier. Name of the FeatureViewSync. Format:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,11 @@ message GenerateContentResponse {
int32 candidates_token_count = 2;

int32 total_token_count = 3;

// Output only. Number of tokens in the cached part in the input (the cached
// content).
int32 cached_content_token_count = 5
[(google.api.field_behavior) = OUTPUT_ONLY];
}

// Output only. Generated candidates.
Expand Down
Loading

0 comments on commit 9760c20

Please sign in to comment.