Skip to content

Commit

Permalink
feat: [dataproc] add support for new Dataproc features (#5666)
Browse files Browse the repository at this point in the history
* feat: add support for new Dataproc features
1. Allow flink job support for jobs
2. Add unreachable output field for LIST jobs API

PiperOrigin-RevId: 672705294

Source-Link: googleapis/googleapis@32bc036

Source-Link: googleapis/googleapis-gen@46e7728
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLWRhdGFwcm9jLy5Pd2xCb3QueWFtbCIsImgiOiI0NmU3NzI4Yzk5MDhkOTc5M2ViY2UxMDYxYjBkMWM2YzRiYWQ5MjViIn0=

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* feat: add support for new Dataproc features
1. Allow flink and trino job support for workflow templates API
2. Add unreachable output field for LIST workflow template API
3. Add unreachable output field for LIST batch templates API
4. Add kms key input for create cluster API
5. Add FLINK metric source for Dataproc Metric Source

PiperOrigin-RevId: 673000575

Source-Link: googleapis/googleapis@02f62c8

Source-Link: googleapis/googleapis-gen@7726f47
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLWRhdGFwcm9jLy5Pd2xCb3QueWFtbCIsImgiOiI3NzI2ZjQ3OGFjMTlkOTFlOTE0ZWQzYWU1NDZjZjI0NDUzZDAwMGI1In0=

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Co-authored-by: sofisl <[email protected]>
  • Loading branch information
3 people authored Sep 12, 2024
1 parent 5a26698 commit a53df0d
Show file tree
Hide file tree
Showing 7 changed files with 1,497 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@ message ListBatchesResponse {
// A token, which can be sent as `page_token` to retrieve the next page.
// If this field is omitted, there are no subsequent pages.
string next_page_token = 2;

// Output only. List of Batches that could not be included in the response.
// Attempting to get one of these resources may indicate why it was not
// included in the list response.
repeated string unreachable = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// A request to delete a batch workload.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,34 @@ message EncryptionConfig {
// Optional. The Cloud KMS key name to use for PD disk encryption for all
// instances in the cluster.
string gce_pd_kms_key_name = 1 [(google.api.field_behavior) = OPTIONAL];

// Optional. The Cloud KMS key resource name to use for cluster persistent
// disk and job argument encryption. See [Use CMEK with cluster data]
// (https://cloud.google.com//dataproc/docs/concepts/configuring-clusters/customer-managed-encryption#use_cmek_with_cluster_data)
// for more information.
//
// When this key resource name is provided, the following job arguments of
// the following job types submitted to the cluster are encrypted using CMEK:
//
// * [FlinkJob
// args](https://cloud.google.com/dataproc/docs/reference/rest/v1/FlinkJob)
// * [HadoopJob
// args](https://cloud.google.com/dataproc/docs/reference/rest/v1/HadoopJob)
// * [SparkJob
// args](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkJob)
// * [SparkRJob
// args](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkRJob)
// * [PySparkJob
// args](https://cloud.google.com/dataproc/docs/reference/rest/v1/PySparkJob)
// * [SparkSqlJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkSqlJob)
// scriptVariables and queryList.queries
// * [HiveJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HiveJob)
// scriptVariables and queryList.queries
// * [PigJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PigJob)
// scriptVariables and queryList.queries
// * [PrestoJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PrestoJob)
// scriptVariables and queryList.queries
string kms_key = 2 [(google.api.field_behavior) = OPTIONAL];
}

// Common config settings for resources of Compute Engine cluster
Expand Down Expand Up @@ -1211,6 +1239,9 @@ message DataprocMetricConfig {

// hivemetastore metric source
HIVEMETASTORE = 7;

// flink metric source
FLINK = 8;
}

// A Dataproc custom metric.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,47 @@ message TrinoJob {
LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL];
}

// A Dataproc job for running Apache Flink applications on YARN.
message FlinkJob {
// Required. The specification of the main method to call to drive the job.
// Specify either the jar file that contains the main class or the main class
// name. To pass both a main jar and a main class in the jar, add the jar to
// [jarFileUris][google.cloud.dataproc.v1.FlinkJob.jar_file_uris], and then
// specify the main class name in
// [mainClass][google.cloud.dataproc.v1.FlinkJob.main_class].
oneof driver {
// The HCFS URI of the jar file that contains the main class.
string main_jar_file_uri = 1;

// The name of the driver's main class. The jar file that contains the class
// must be in the default CLASSPATH or specified in
// [jarFileUris][google.cloud.dataproc.v1.FlinkJob.jar_file_uris].
string main_class = 2;
}

// Optional. The arguments to pass to the driver. Do not include arguments,
// such as `--conf`, that can be set as job properties, since a collision
// might occur that causes an incorrect job submission.
repeated string args = 3 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of jar files to add to the CLASSPATHs of the
// Flink driver and tasks.
repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URI of the savepoint, which contains the last saved progress
// for starting the current job.
string savepoint_uri = 9 [(google.api.field_behavior) = OPTIONAL];

// Optional. A mapping of property names to values, used to configure Flink.
// Properties that conflict with values set by the Dataproc API might be
// overwritten. Can include properties set in
// /etc/flink/conf/flink-defaults.conf and classes in user code.
map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL];

// Optional. The runtime log config for job execution.
LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL];
}

// Dataproc job config.
message JobPlacement {
// Required. The name of the cluster where the job will be submitted.
Expand Down Expand Up @@ -722,6 +763,9 @@ message Job {

// Optional. Job is a Trino job.
TrinoJob trino_job = 28 [(google.api.field_behavior) = OPTIONAL];

// Optional. Job is a Flink job.
FlinkJob flink_job = 29 [(google.api.field_behavior) = OPTIONAL];
}

// Output only. The job status. Additional application-specific
Expand Down Expand Up @@ -964,6 +1008,12 @@ message ListJobsResponse {
// to fetch. To fetch additional results, provide this value as the
// `page_token` in a subsequent <code>ListJobsRequest</code>.
string next_page_token = 2 [(google.api.field_behavior) = OPTIONAL];

// Output only. List of jobs with
// [kms_key][google.cloud.dataproc.v1.EncryptionConfig.kms_key]-encrypted
// parameters that could not be decrypted. A response to a `jobs.get` request
// may indicate the reason for the decryption failure for a specific job.
repeated string unreachable = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// A request to cancel a job.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,39 @@ message WorkflowTemplate {
history: ORIGINALLY_SINGLE_PATTERN
};

// Encryption settings for encrypting workflow template job arguments.
message EncryptionConfig {
// Optional. The Cloud KMS key name to use for encrypting
// workflow template job arguments.
//
// When this this key is provided, the following workflow template
// [job arguments]
// (https://cloud.google.com/dataproc/docs/concepts/workflows/use-workflows#adding_jobs_to_a_template),
// if present, are
// [CMEK
// encrypted](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/customer-managed-encryption#use_cmek_with_workflow_template_data):
//
// * [FlinkJob
// args](https://cloud.google.com/dataproc/docs/reference/rest/v1/FlinkJob)
// * [HadoopJob
// args](https://cloud.google.com/dataproc/docs/reference/rest/v1/HadoopJob)
// * [SparkJob
// args](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkJob)
// * [SparkRJob
// args](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkRJob)
// * [PySparkJob
// args](https://cloud.google.com/dataproc/docs/reference/rest/v1/PySparkJob)
// * [SparkSqlJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkSqlJob)
// scriptVariables and queryList.queries
// * [HiveJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HiveJob)
// scriptVariables and queryList.queries
// * [PigJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PigJob)
// scriptVariables and queryList.queries
// * [PrestoJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PrestoJob)
// scriptVariables and queryList.queries
string kms_key = 1 [(google.api.field_behavior) = OPTIONAL];
}

string id = 2 [(google.api.field_behavior) = REQUIRED];

// Output only. The resource name of the workflow template, as described
Expand Down Expand Up @@ -270,6 +303,11 @@ message WorkflowTemplate {
// the cluster is deleted.
google.protobuf.Duration dag_timeout = 10
[(google.api.field_behavior) = OPTIONAL];

// Optional. Encryption settings for encrypting workflow template job
// arguments.
EncryptionConfig encryption_config = 11
[(google.api.field_behavior) = OPTIONAL];
}

// Specifies workflow execution target.
Expand Down Expand Up @@ -371,6 +409,12 @@ message OrderedJob {

// Optional. Job is a Presto job.
PrestoJob presto_job = 12 [(google.api.field_behavior) = OPTIONAL];

// Optional. Job is a Trino job.
TrinoJob trino_job = 13 [(google.api.field_behavior) = OPTIONAL];

// Optional. Job is a Flink job.
FlinkJob flink_job = 14 [(google.api.field_behavior) = OPTIONAL];
}

// Optional. The labels to associate with this job.
Expand Down Expand Up @@ -806,6 +850,11 @@ message ListWorkflowTemplatesResponse {
// results to fetch. To fetch additional results, provide this value as the
// page_token in a subsequent <code>ListWorkflowTemplatesRequest</code>.
string next_page_token = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. List of workflow templates that could not be included in the
// response. Attempting to get one of these resources may indicate why it was
// not included in the list response.
repeated string unreachable = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// A request to delete a workflow template.
Expand Down
Loading

0 comments on commit a53df0d

Please sign in to comment.