diff --git a/pom.xml b/pom.xml index 9c38677d62..b36cc88048 100644 --- a/pom.xml +++ b/pom.xml @@ -206,6 +206,11 @@ oss-sonatype https://oss.sonatype.org/content/repositories/releases/ + + + huawei + https://repo.huaweicloud.com/repository/maven/huaweicloudsdk/ + @@ -283,7 +288,7 @@ true javadocs - engine-java-docs + taier-java-docs diff --git a/sql/1.5/1.5_increment.sql b/sql/1.5/1.5_increment.sql new file mode 100644 index 0000000000..1069aa8006 --- /dev/null +++ b/sql/1.5/1.5_increment.sql @@ -0,0 +1,95 @@ +DELETE FROM console_component_config WHERE component_id = -109; +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'CHECKBOX', 1, 'deploymode', '["perjob"]', null, '', '', null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'GROUP', 1, 'perjob', 'perjob', null, 'deploymode', 'perjob', null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'addColumnSupport', 'true', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.cores.max', '1', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'spark.driver.extraJavaOptions', '-Dfile.encoding=utf-8', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'spark.eventLog.compress', 'true', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'spark.eventLog.dir', 'hdfs:///tmp/spark-yarn-logs', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'spark.eventLog.enabled', 'true', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.executor.cores', '1', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'spark.executor.extraJavaOptions', '-Dfile.encoding=utf-8', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.executor.heartbeatInterval', '10s', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.executor.instances', '1', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.executor.memory', '512m', null, 'deploymode$perjob', null, null, now(), now(), 0); + +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.sql.adaptive.advisoryPartitionSizeInBytes', '64MB', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.sql.adaptive.coalescePartitions.minPartitionSize', '1MB', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.sql.adaptive.coalescePartitions.initialPartitionNum', '200', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes', '256MB', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.sql.adaptive.skewJoin.skewedPartitionFactor', '5', null, 'deploymode$perjob', null, null, now(), now(), 0); + +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.network.timeout', '600s', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.rpc.askTimeout', '600s', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.speculation', 'true', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.submit.deployMode', 'cluster', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'spark.yarn.appMasterEnv.PYSPARK_DRIVER_PYTHON', '/data/miniconda2/bin/python3', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'spark.yarn.appMasterEnv.PYSPARK_PYTHON', '/data/miniconda2/bin/python3', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.yarn.maxAppAttempts', '1', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'sparkPythonExtLibPath', 'hdfs:///dtInsight/pythons/pyspark.zip,hdfs:///dtInsight/pythons/py4j-0.10.7-src.zip', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'sparkSqlProxyPath', 'hdfs:///dtInsight/spark/spark-sql-proxy.jar', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'sparkYarnArchive', 'hdfs:///dtInsight/sparkjars/jars', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'yarnAccepterTaskNumber', '3', null, 'deploymode$perjob', null, null, now(), now(), 0); + +DELETE FROM dict WHERE dict_code = 'typename_mapping' AND dict_name IN ('yarn2-hdfs2-spark320','yarn3-hdfs3-spark320'); + +INSERT INTO dict (dict_code, dict_name, dict_value, dict_desc, type, sort, data_type, depend_name, is_default, gmt_create, gmt_modified, is_deleted) VALUES ('typename_mapping', 'yarn2-hdfs2-spark320', '-109', null, 6, 0, 'LONG', '', 0, now(),now(), 0); +INSERT INTO dict (dict_code, dict_name, dict_value, dict_desc, type, sort, data_type, depend_name, is_default, gmt_create, gmt_modified, is_deleted) VALUES ('typename_mapping', 'yarn3-hdfs3-spark320', '-109', null, 6, 0, 'LONG', '', 0, now(),now(), 0); + +DELETE FROM dict WHERE dict_code = 'component_model_config' AND depend_name = 'YARN'; +INSERT INTO dict (dict_code, dict_name, dict_value, dict_desc, type, sort, data_type, depend_name, is_default, gmt_create, gmt_modified, is_deleted) VALUES ('component_model_config', 'Apache Hadoop 2.x', '{"HDFS": {"HDFS": "yarn2-hdfs2-hadoop2", "FLINK": [{"112": "yarn2-hdfs2-flink112"}], "SPARK": [{"320": "yarn2-hdfs2-spark320"}, {"210": "yarn2-hdfs2-spark210"], "SCRIPT": "yarn2-hdfs2-script"}, "YARN": "yarn2"}', null, 14, 1, 'STRING', 'YARN', 0, now(),now(), 0); +INSERT INTO dict (dict_code, dict_name, dict_value, dict_desc, type, sort, data_type, depend_name, is_default, gmt_create, gmt_modified, is_deleted) VALUES ('component_model_config', 'Apache Hadoop 3.x', '{"HDFS": {"HDFS": "yarn3-hdfs3-hadoop3", "FLINK": [{"112": "yarn3-hdfs3-flink112"}], "SPARK": [{"320": "yarn3-hdfs3-spark320"}, {"210": "yarn3-hdfs3-spark210"}], "SCRIPT": "yarn3-hdfs3-script"}, "YARN": "yarn3"}', null, 14, 1, 'STRING', 'YARN', 0, now(),now(), 0); +INSERT INTO dict (dict_code, dict_name, dict_value, dict_desc, type, sort, data_type, depend_name, is_default, gmt_create, gmt_modified, is_deleted) VALUES ('component_model_config', 'CDH 5.x', '{"HDFS": {"HDFS": "yarn2-hdfs2-hadoop2", "FLINK": [{"112": "yarn2-hdfs2-flink112"}], "SPARK": [{"320": "yarn2-hdfs2-spark320"}, {"210": "yarn2-hdfs2-spark210"}], "SCRIPT": "yarn2-hdfs2-script"}, "YARN": "yarn2"}', null, 14, 1, 'STRING', 'YARN', 0, now(),now(), 0); +INSERT INTO dict (dict_code, dict_name, dict_value, dict_desc, type, sort, data_type, depend_name, is_default, gmt_create, gmt_modified, is_deleted) VALUES ('component_model_config', 'CDH 6.0.x', '{"HDFS": {"HDFS": "yarn3-hdfs3-hadoop3", "FLINK": [{"112": "yarn3-hdfs3-flink112"}], "SPARK": [{"320": "yarn3-hdfs3-spark320"}, {"210": "yarn3-hdfs3-spark210"}], "SCRIPT": "yarn3-hdfs3-script"}, "YARN": "yarn3"}', null, 14, 1, 'STRING', 'YARN', 0, now(),now(), 0); +INSERT INTO dict (dict_code, dict_name, dict_value, dict_desc, type, sort, data_type, depend_name, is_default, gmt_create, gmt_modified, is_deleted) VALUES ('component_model_config', 'CDH 6.1.x', '{"HDFS": {"HDFS": "yarn3-hdfs3-hadoop3", "FLINK": [{"112": "yarn3-hdfs3-flink112"}], "SPARK": [{"320": "yarn3-hdfs3-spark320"}, {"210": "yarn3-hdfs3-spark210"}], "SCRIPT": "yarn3-hdfs3-script"}, "YARN": "yarn3"}', null, 14, 1, 'STRING', 'YARN', 0, now(),now(), 0); +INSERT INTO dict (dict_code, dict_name, dict_value, dict_desc, type, sort, data_type, depend_name, is_default, gmt_create, gmt_modified, is_deleted) VALUES ('component_model_config', 'CDH 6.2.x', '{"HDFS": {"HDFS": "yarn3-hdfs3-hadoop3", "FLINK": [{"112": "yarn3-hdfs3-flink112"}], "SPARK": [{"320": "yarn3-hdfs3-spark320"}, {"210": "yarn3-hdfs3-spark210"], "SCRIPT": "yarn3-hdfs3-script"}, "YARN": "yarn3"}', null, 14, 1, 'STRING', 'YARN', 0, now(),now(), 0); +INSERT INTO dict (dict_code, dict_name, dict_value, dict_desc, type, sort, data_type, depend_name, is_default, gmt_create, gmt_modified, is_deleted) VALUES ('component_model_config', 'CDP 7.x', '{"HDFS": {"HDFS": "yarn3-hdfs3-hadoop3", "FLINK": [{"112": "yarn3-hdfs3-flink112"}], "SPARK": [{"320": "yarn3-hdfs3-spark320"}, {"210": "yarn3-hdfs3-spark210"}], "SCRIPT": "yarn3-hdfs3-script"}, "YARN": "yarn3"}', null, 14, 1, 'STRING', 'YARN', 0, now(),now(), 0); +INSERT INTO dict (dict_code, dict_name, dict_value, dict_desc, type, sort, data_type, depend_name, is_default, gmt_create, gmt_modified, is_deleted) VALUES ('component_model_config', 'HDP 2.6.x', '{"HDFS": {"HDFS": "yarn2-hdfs2-hadoop2", "FLINK": [{"112": "yarn2-hdfs2-flink112"}], "SPARK": [{"320": "yarn2-hdfs2-spark320"}, {"210": "yarn2-hdfs2-spark210"}], "SCRIPT": "yarn2-hdfs2-script"}, "YARN": "yarn2"}', null, 14, 1, 'STRING', 'YARN', 0, now(),now(), 0); +INSERT INTO dict (dict_code, dict_name, dict_value, dict_desc, type, sort, data_type, depend_name, is_default, gmt_create, gmt_modified, is_deleted) VALUES ('component_model_config', 'HDP 3.0.x', '{"HDFS": {"HDFS": "yarn3-hdfs3-hadoop3", "FLINK": [{"112": "yarn3-hdfs3-flink112"}], "SPARK": [{"320": "yarn3-hdfs3-spark320"}, {"210": "yarn3-hdfs3-spark210"}], "SCRIPT": "yarn3-hdfs3-script"}, "YARN": "yarn3"}', null, 14, 1, 'STRING', 'YARN', 0, now(),now(), 0); +INSERT INTO dict (dict_code, dict_name, dict_value, dict_desc, type, sort, data_type, depend_name, is_default, gmt_create, gmt_modified, is_deleted) VALUES ('component_model_config', 'HDP 3.x', '{"HDFS": {"HDFS": "yarn3-hdfs3-hadoop3", "FLINK": [{"112": "yarn3-hdfs3-flink112"}], "SPARK": [{"320": "yarn3-hdfs3-spark320"}, {"210": "yarn3-hdfs3-spark210"}], "SCRIPT": "yarn3-hdfs3-script"}, "YARN": "yarn3"}', null, 14, 1, 'STRING', 'YARN', 0, now(),now(), 0); +INSERT INTO dict (dict_code, dict_name, dict_value, dict_desc, type, sort, data_type, depend_name, is_default, gmt_create, gmt_modified, is_deleted) VALUES ('component_model_config', 'TDH 5.2.x', '{"HDFS": {"HDFS": "yarn2-hdfs2-hadoop2", "FLINK": [{"112": "yarn2-hdfs2-flink112"}], "SPARK": [{"320": "yarn2-hdfs2-spark320"}, {"210": "yarn2-hdfs2-spark210"}], "SCRIPT": "yarn2-hdfs2-script"}, "YARN": "yarn2"}', null, 14, 1, 'STRING', 'YARN', 0, now(),now(), 0); +INSERT INTO dict (dict_code, dict_name, dict_value, dict_desc, type, sort, data_type, depend_name, is_default, gmt_create, gmt_modified, is_deleted) VALUES ('component_model_config', 'TDH 6.x', '{"HDFS": {"HDFS": "yarn2-hdfs2-hadoop2", "FLINK": [{"112": "yarn2-hdfs2-flink112"}], "SPARK": [{"320": "yarn2-hdfs2-spark320"}, {"210": "yarn2-hdfs2-spark210"}], "SCRIPT": "yarn2-hdfs2-script"}, "YARN": "yarn2"}', null, 14, 1, 'STRING', 'YARN', 0, now(),now(), 0); +INSERT INTO dict (dict_code, dict_name, dict_value, dict_desc, type, sort, data_type, depend_name, is_default, gmt_create, gmt_modified, is_deleted) VALUES ('component_model_config', 'TDH 7.x', '{"HDFS": {"HDFS": "yarn2-hdfs2-hadoop2", "FLINK": [{"112": "yarn2-hdfs2-flink112"}], "SPARK": [{"320": "yarn2-hdfs2-spark320"}, {"210": "yarn2-hdfs2-spark210"}], "SCRIPT": "yarn2-hdfs2-script"}, "YARN": "yarn2"}', null, 14, 1, 'STRING', 'YARN', 0, now(),now(), 0); + + + +INSERT INTO dict (dict_code, dict_name, dict_value, dict_desc, type, sort, data_type, depend_name, is_default, gmt_create, gmt_modified, is_deleted) VALUES ('spark_version', '3.2', '320', null, 2, 1, 'INTEGER', '', 1, now(),now(), 0); + +UPDATE dict set dict_value = '{"actions": ["SAVE_TASK", "RUN_TASK", "STOP_TASK", "SUBMIT_TASK", "OPERATOR_TASK"], "barItem": ["task", "dependency", "task_params", "env_params"], "formField": ["datasource","queue","componentVersion"], "renderKind": "editor","dataTypeCodes":["45"]}' +WHERE dict_code = 1 AND dict_name = 'SparkSQL'; + +DELETE FROM task_param_template WHERE task_name = 'SPARK_SQL' AND task_version = '3.2'; + +INSERT INTO task_param_template (task_type, task_name, task_version, params, gmt_create, gmt_modified, is_deleted) VALUES (0, 'SPARK_SQL', '3.2', '## Driver程序使用的CPU核数,默认为1 +# spark.driver.cores=1 + +## Driver程序使用内存大小,默认1g +# spark.driver.memory=1g + +## 对Spark每个action结果集大小的限制,最少是1M,若设为0则不限制大小。 +## 若Job结果超过限制则会异常退出,若结果集限制过大也可能造成OOM问题,默认1g +# spark.driver.maxResultSize=1g + +## 启动的executor的数量,默认为1 +# spark.executor.instances=1 + +## 每个executor使用的CPU核数,默认为1 +# spark.executor.cores=1 + +## 每个executor内存大小,默认1g +# spark.executor.memory=1g + +## spark 日志级别可选ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN +# logLevel = INFO + +## spark中所有网络交互的最大超时时间 +# spark.network.timeout=120s + +## executor的OffHeap内存,和spark.executor.memory配置使用 +# spark.yarn.executor.memoryOverhead= + +## 设置spark sql shuffle分区数,默认200 +# spark.sql.shuffle.partitions=200 + +## 开启spark推测行为,默认false +# spark.speculation=false', now(), now(), 0); \ No newline at end of file diff --git a/sql/init.sql b/sql/init.sql index bff138a20a..f66ac9f675 100644 --- a/sql/init.sql +++ b/sql/init.sql @@ -1541,6 +1541,8 @@ INSERT INTO `dict` VALUES (5, 'spark_thrift_version', '2.x', '2.x', NULL, 3, 2, INSERT INTO `dict` VALUES (7, 'hadoop_config', 'HDP 3.1.x', '-200', '', 5, 0, 'LONG', 'SPARK', 0, '2021-02-05 11:53:21', '2021-02-05 11:53:21', 0); INSERT INTO `dict` VALUES (9, 'typename_mapping', 'yarn3-hdfs3-spark210', '-108', NULL, 6, 0, 'LONG', '', 0, '2021-03-04 17:50:23', '2021-03-04 17:50:23', 0); INSERT INTO `dict` VALUES (11, 'typename_mapping', 'yarn2-hdfs2-spark210', '-108', NULL, 6, 0, 'LONG', '', 0, '2021-03-04 17:50:24', '2021-03-04 17:50:24', 0); +INSERT INTO `dict` VALUES (11, 'typename_mapping', 'yarn2-hdfs2-spark320', '-109', NULL, 6, 0, 'LONG', '', 0, '2021-03-04 17:50:24', '2021-03-04 17:50:24', 0); +INSERT INTO `dict` VALUES (11, 'typename_mapping', 'yarn3-hdfs3-spark320', '-109', NULL, 6, 0, 'LONG', '', 0, '2021-03-04 17:50:24', '2021-03-04 17:50:24', 0); INSERT INTO `dict` VALUES (13, 'typename_mapping', 'dummy', '-101', NULL, 6, 0, 'LONG', '', 0, '2021-03-04 17:50:24', '2021-03-04 17:50:24', 0); INSERT INTO `dict` VALUES (15, 'typename_mapping', 'hive', '-117', NULL, 6, 0, 'LONG', '', 0, '2021-03-04 17:50:24', '2021-03-04 17:50:24', 0); INSERT INTO `dict` VALUES (17, 'typename_mapping', 'hive2', '-117', NULL, 6, 0, 'LONG', '', 0, '2021-03-04 17:50:24', '2021-03-04 17:50:24', 0); @@ -1570,19 +1572,19 @@ INSERT INTO `dict` VALUES (85, 'ResourceManager', 'ResourceManager', '3', '资 INSERT INTO `dict` VALUES (87, 'TaskManager', 'TaskManager', '1', '任务管理', 32, 1, 'STRING', '', 1, '2022-02-11 10:42:19', '2022-02-11 10:42:19', 0); INSERT INTO `dict` VALUES (89, 'CustomFunction', 'CustomFunction', '6', '自定义函数', 33, 4, 'STRING', '', 1, '2022-02-11 10:42:57', '2022-02-11 10:42:57', 0); INSERT INTO `dict` VALUES (91, 'SystemFunction', 'SystemFunction', '6', '系统函数', 33, 2, 'STRING', '', 1, '2022-02-11 10:42:57', '2022-02-11 10:42:57', 0); -INSERT INTO `dict` VALUES (95, 'component_model_config', 'Apache Hadoop 2.x', '{\"HDFS\": {\"HDFS\": \"yarn2-hdfs2-hadoop2\", \"FLINK\": [{\"112\": \"yarn2-hdfs2-flink112\"}], \"SPARK\": [{\"210\": \"yarn2-hdfs2-spark210\", \"240\": \"yarn2-hdfs2-spark240\"}], \"SCRIPT\": \"yarn2-hdfs2-script\"}, \"YARN\": \"yarn2\"}', NULL, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:01:55', '2021-12-28 11:01:55', 0); -INSERT INTO `dict` VALUES (97, 'component_model_config', 'Apache Hadoop 3.x', '{\"HDFS\": {\"HDFS\": \"yarn3-hdfs3-hadoop3\", \"FLINK\": [{\"112\": \"yarn3-hdfs3-flink112\"}], \"SPARK\": [{\"210\": \"yarn3-hdfs3-spark210\", \"240\": \"yarn3-hdfs3-spark240\"}], \"SCRIPT\": \"yarn3-hdfs3-script\"}, \"YARN\": \"yarn3\"}', NULL, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:03:45', '2021-12-28 11:03:45', 0); -INSERT INTO `dict` VALUES (99, 'component_model_config', 'HDP 3.0.x', '{\"HDFS\": {\"HDFS\": \"yarn3-hdfs3-hadoop3\", \"FLINK\": [{\"112\": \"yarn3-hdfs3-flink112\"}], \"SPARK\": [{\"210\": \"yarn3-hdfs3-spark210\", \"240\": \"yarn3-hdfs3-spark240\"}], \"SCRIPT\": \"yarn3-hdfs3-script\"}, \"YARN\": \"yarn3\"}', NULL, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:04:23', '2021-12-28 11:04:23', 0); -INSERT INTO `dict` VALUES (101, 'component_model_config', 'CDH 6.0.x', '{\"HDFS\": {\"HDFS\": \"yarn3-hdfs3-hadoop3\", \"FLINK\": [{\"1.8\": \"yarn3-hdfs3-flink180\"}, {\"1.10\": \"yarn3-hdfs3-flink110\"}, {\"112\": \"yarn3-hdfs3-flink112\"}], \"SPARK\": [{\"210\": \"yarn3-hdfs3-spark210\", \"240\": \"yarn3-hdfs3-spark240\"}], \"SCRIPT\": \"yarn3-hdfs3-script\"}, \"YARN\": \"yarn3\"}', NULL, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:04:40', '2021-12-28 11:04:40', 0); -INSERT INTO `dict` VALUES (103, 'component_model_config', 'CDH 6.1.x', '{\"HDFS\": {\"HDFS\": \"yarn3-hdfs3-hadoop3\", \"FLINK\": [{\"112\": \"yarn3-hdfs3-flink112\"}], \"SPARK\": [{\"210\": \"yarn3-hdfs3-spark210\", \"240\": \"yarn3-hdfs3-spark240\"}], \"SCRIPT\": \"yarn3-hdfs3-script\"}, \"YARN\": \"yarn3\"}', NULL, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:04:55', '2021-12-28 11:04:55', 0); -INSERT INTO `dict` VALUES (105, 'component_model_config', 'CDH 6.2.x', '{\"HDFS\": {\"HDFS\": \"yarn3-hdfs3-hadoop3\", \"TONY\": \"yarn3-hdfs3-tony\", \"FLINK\": [{\"1.8\": \"yarn3-hdfs3-flink180\"}, {\"1.10\": \"yarn3-hdfs3-flink110\"}, {\"112\": \"yarn3-hdfs3-flink112\"}], \"SPARK\": [{\"210\": \"yarn3-hdfs3-spark210\", \"240(CDH 6.2)\": \"yarn3-hdfs3-spark240cdh620\"}], \"LEARNING\": \"yarn3-hdfs3-learning\", \"SCRIPT\": \"yarn3-hdfs3-script\"}, \"YARN\": \"yarn3\"}', NULL, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:05:06', '2021-12-28 11:05:06', 0); -INSERT INTO `dict` VALUES (107, 'component_model_config', 'HDP 2.6.x', '{\"HDFS\": {\"HDFS\": \"yarn2-hdfs2-hadoop2\", \"FLINK\": [{\"112\": \"yarn2-hdfs2-flink112\"}], \"SPARK\": [{\"210\": \"yarn2-hdfs2-spark210\", \"240\": \"yarn2-hdfs2-spark240\"}], \"SCRIPT\": \"yarn2-hdfs2-script\"}, \"YARN\": \"yarn2\"}', NULL, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:06:38', '2021-12-28 11:06:38', 0); -INSERT INTO `dict` VALUES (109, 'component_model_config', 'CDH 5.x', '{\"HDFS\": {\"HDFS\": \"yarn2-hdfs2-hadoop2\", \"FLINK\": [{\"112\": \"yarn2-hdfs2-flink112\"}], \"SPARK\": [{\"210\": \"yarn2-hdfs2-spark210\", \"240\": \"yarn2-hdfs2-spark240\"}], \"SCRIPT\": \"yarn2-hdfs2-script\"}, \"YARN\": \"yarn2\"}', NULL, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:07:19', '2021-12-28 11:07:19', 0); -INSERT INTO `dict` VALUES (111, 'component_model_config', 'HDP 3.x', '{\"HDFS\": {\"HDFS\": \"yarn3-hdfs3-hadoop3\", \"FLINK\": [{\"112\": \"yarn3-hdfs3-flink112\"}], \"SPARK\": [{\"210\": \"yarn3-hdfs3-spark210\", \"240\": \"yarn3-hdfs3-spark240\"}], \"SCRIPT\": \"yarn3-hdfs3-script\"}, \"YARN\": \"yarn3\"}', NULL, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:43:05', '2021-12-28 11:43:05', 0); -INSERT INTO `dict` VALUES (113, 'component_model_config', 'TDH 5.2.x', '{\"HDFS\": {\"HDFS\": \"yarn2-hdfs2-hadoop2\", \"FLINK\": [{\"112\": \"yarn2-hdfs2-flink112\"}], \"SPARK\": [{\"210\": \"yarn2-hdfs2-spark210\", \"240\": \"yarn2-hdfs2-spark240\"}], \"SCRIPT\": \"yarn2-hdfs2-script\"}, \"YARN\": \"yarn2\"}', NULL, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:44:33', '2021-12-28 11:44:33', 0); -INSERT INTO `dict` VALUES (115, 'component_model_config', 'TDH 6.x', '{\"HDFS\": {\"HDFS\": \"yarn2-hdfs2-hadoop2\", \"FLINK\": [{\"112\": \"yarn2-hdfs2-flink112\"}], \"SPARK\": [{\"210\": \"yarn2-hdfs2-spark210\", \"240\": \"yarn2-hdfs2-spark240\"}], \"SCRIPT\": \"yarn2-hdfs2-script\"}, \"YARN\": \"yarn2\"}', NULL, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:44:43', '2021-12-28 11:44:43', 0); -INSERT INTO `dict` VALUES (117, 'component_model_config', 'TDH 7.x', '{\"HDFS\": {\"HDFS\": \"yarn2-hdfs2-hadoop2\", \"FLINK\": [{\"112\": \"yarn2-hdfs2-flink112\"}], \"SPARK\": [{\"210\": \"yarn2-hdfs2-spark210\", \"240\": \"yarn2-hdfs2-spark240\"}], \"SCRIPT\": \"yarn2-hdfs2-script\"}, \"YARN\": \"yarn2\"}', NULL, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:45:02', '2021-12-28 11:45:02', 0); -INSERT INTO `dict` VALUES (119, 'component_model_config', 'CDP 7.x', '{\"HDFS\": {\"HDFS\": \"yarn3-hdfs3-hadoop3\", \"FLINK\": [{\"112\": \"yarn3-hdfs3-flink112\"}], \"SPARK\": [{\"210\": \"yarn3-hdfs3-spark210\", \"240\": \"yarn3-hdfs3-spark240\"}], \"SCRIPT\": \"yarn3-hdfs3-script\"}, \"YARN\": \"yarn3\"}', NULL, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:45:02', '2021-12-28 11:45:02', 0); +INSERT INTO `dict` VALUES (95,'component_model_config', 'Apache Hadoop 2.x', '{"HDFS": {"HDFS": "yarn2-hdfs2-hadoop2", "FLINK": [{"112": "yarn2-hdfs2-flink112"}], "SPARK": [{"320": "yarn2-hdfs2-spark320"}, {"210": "yarn2-hdfs2-spark210"], "SCRIPT": "yarn2-hdfs2-script"}, "YARN": "yarn2"}', null, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:01:55', '2021-12-28 11:01:55', 0); +INSERT INTO `dict` VALUES (97,'component_model_config', 'Apache Hadoop 3.x', '{"HDFS": {"HDFS": "yarn3-hdfs3-hadoop3", "FLINK": [{"112": "yarn3-hdfs3-flink112"}], "SPARK": [{"320": "yarn3-hdfs3-spark320"}, {"210": "yarn3-hdfs3-spark210"}], "SCRIPT": "yarn3-hdfs3-script"}, "YARN": "yarn3"}', null, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:03:45', '2021-12-28 11:03:45', 0); +INSERT INTO `dict` VALUES (99,'component_model_config', 'HDP 3.0.x', '{"HDFS": {"HDFS": "yarn3-hdfs3-hadoop3", "FLINK": [{"112": "yarn3-hdfs3-flink112"}], "SPARK": [{"320": "yarn3-hdfs3-spark320"}, {"210": "yarn3-hdfs3-spark210"}], "SCRIPT": "yarn3-hdfs3-script"}, "YARN": "yarn3"}', null, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:04:23', '2021-12-28 11:04:23', 0); +INSERT INTO `dict` VALUES (101,'component_model_config', 'CDH 6.0.x', '{"HDFS": {"HDFS": "yarn3-hdfs3-hadoop3", "FLINK": [{"112": "yarn3-hdfs3-flink112"}], "SPARK": [{"320": "yarn3-hdfs3-spark320"}, {"210": "yarn3-hdfs3-spark210"}], "SCRIPT": "yarn3-hdfs3-script"}, "YARN": "yarn3"}', null, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:04:40', '2021-12-28 11:04:40', 0); +INSERT INTO `dict` VALUES (103,'component_model_config', 'CDH 6.1.x', '{"HDFS": {"HDFS": "yarn3-hdfs3-hadoop3", "FLINK": [{"112": "yarn3-hdfs3-flink112"}], "SPARK": [{"320": "yarn3-hdfs3-spark320"}, {"210": "yarn3-hdfs3-spark210"}], "SCRIPT": "yarn3-hdfs3-script"}, "YARN": "yarn3"}', null, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:04:55', '2021-12-28 11:04:55', 0); +INSERT INTO `dict` VALUES (105,'component_model_config', 'CDH 6.2.x', '{"HDFS": {"HDFS": "yarn3-hdfs3-hadoop3", "FLINK": [{"112": "yarn3-hdfs3-flink112"}], "SPARK": [{"320": "yarn3-hdfs3-spark320"}, {"210": "yarn3-hdfs3-spark210"], "SCRIPT": "yarn3-hdfs3-script"}, "YARN": "yarn3"}', null, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:05:06', '2021-12-28 11:05:06', 0); +INSERT INTO `dict` VALUES (107,'component_model_config', 'HDP 2.6.x', '{"HDFS": {"HDFS": "yarn2-hdfs2-hadoop2", "FLINK": [{"112": "yarn2-hdfs2-flink112"}], "SPARK": [{"320": "yarn2-hdfs2-spark320"}, {"210": "yarn2-hdfs2-spark210"}], "SCRIPT": "yarn2-hdfs2-script"}, "YARN": "yarn2"}', null, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:06:38', '2021-12-28 11:06:38', 0); +INSERT INTO `dict` VALUES (109,'component_model_config', 'CDH 5.x', '{"HDFS": {"HDFS": "yarn2-hdfs2-hadoop2", "FLINK": [{"112": "yarn2-hdfs2-flink112"}], "SPARK": [{"320": "yarn2-hdfs2-spark320"}, {"210": "yarn2-hdfs2-spark210"}], "SCRIPT": "yarn2-hdfs2-script"}, "YARN": "yarn2"}', null, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:07:19', '2021-12-28 11:07:19', 0); +INSERT INTO `dict` VALUES (111,'component_model_config', 'HDP 3.x', '{"HDFS": {"HDFS": "yarn3-hdfs3-hadoop3", "FLINK": [{"112": "yarn3-hdfs3-flink112"}], "SPARK": [{"320": "yarn3-hdfs3-spark320"}, {"210": "yarn3-hdfs3-spark210"}], "SCRIPT": "yarn3-hdfs3-script"}, "YARN": "yarn3"}', null, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:43:05', '2021-12-28 11:43:05', 0); +INSERT INTO `dict` VALUES (113,'component_model_config', 'TDH 5.2.x', '{"HDFS": {"HDFS": "yarn2-hdfs2-hadoop2", "FLINK": [{"112": "yarn2-hdfs2-flink112"}], "SPARK": [{"320": "yarn2-hdfs2-spark320"}, {"210": "yarn2-hdfs2-spark210"}], "SCRIPT": "yarn2-hdfs2-script"}, "YARN": "yarn2"}', null, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:44:33', '2021-12-28 11:44:33', 0); +INSERT INTO `dict` VALUES (115,'component_model_config', 'TDH 6.x', '{"HDFS": {"HDFS": "yarn2-hdfs2-hadoop2", "FLINK": [{"112": "yarn2-hdfs2-flink112"}], "SPARK": [{"320": "yarn2-hdfs2-spark320"}, {"210": "yarn2-hdfs2-spark210"}], "SCRIPT": "yarn2-hdfs2-script"}, "YARN": "yarn2"}', null, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:44:43', '2021-12-28 11:44:43', 0); +INSERT INTO `dict` VALUES (117,'component_model_config', 'TDH 7.x', '{"HDFS": {"HDFS": "yarn2-hdfs2-hadoop2", "FLINK": [{"112": "yarn2-hdfs2-flink112"}], "SPARK": [{"320": "yarn2-hdfs2-spark320"}, {"210": "yarn2-hdfs2-spark210"}], "SCRIPT": "yarn2-hdfs2-script"}, "YARN": "yarn2"}', null, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:45:02', '2021-12-28 11:45:02', 0); +INSERT INTO `dict` VALUES (119,'component_model_config', 'CDP 7.x', '{"HDFS": {"HDFS": "yarn3-hdfs3-hadoop3", "FLINK": [{"112": "yarn3-hdfs3-flink112"}], "SPARK": [{"320": "yarn3-hdfs3-spark320"}, {"210": "yarn3-hdfs3-spark210"}], "SCRIPT": "yarn3-hdfs3-script"}, "YARN": "yarn3"}', null, 14, 1, 'STRING', 'YARN', 0, '2021-12-28 11:45:02', '2021-12-28 11:45:02', 0); INSERT INTO `dict` VALUES (121, 'typename_mapping', 'yarn2-hdfs2-flink112', '-115', NULL, 6, 0, 'LONG', '', 0, '2021-05-18 11:29:00', '2021-05-18 11:29:00', 0); INSERT INTO `dict` VALUES (123, 'typename_mapping', 'yarn3-hdfs3-flink112', '-115', NULL, 6, 0, 'LONG', '', 0, '2021-05-18 11:29:00', '2021-05-18 11:29:00', 0); INSERT INTO `dict` VALUES (125, 'hive_version', '1.x', '1.x', NULL, 4, 1, 'STRING', '', 0, '2022-05-03 22:20:53', '2022-05-03 22:20:53', 0); @@ -1675,7 +1677,7 @@ INSERT INTO `dict` VALUES (299, 'typename_mapping', 'flink112-standalone', '-120 INSERT INTO `dict` VALUES (301, 'flink_version', '1.12-on-yarn', '112', NULL, 1, 2, 'INTEGER', '', 0, '2022-05-03 22:13:12', '2022-05-03 22:13:12', 0); INSERT INTO `dict` VALUES (303, 'flink_version', '1.12-standalone', '112', NULL, 1, 2, 'INTEGER', '', 0, '2022-09-20 14:57:48', '2022-09-20 14:57:48', 0); INSERT INTO `dict` VALUES (305, '-1', '虚节点', '{\"actions\": [\"SAVE_TASK\", \"SUBMIT_TASK\", \"OPERATOR_TASK\"], \"barItem\":[ \"dependency\"],\"formField\": [], \"renderKind\": \"virtual\"}', NULL, 30, -1, 'STRING', '', 1, '2022-02-11 10:28:45', '2022-02-11 10:28:45', 0); -INSERT INTO `dict` VALUES (307, '0', 'SparkSQL', '{\"actions\": [\"SAVE_TASK\", \"RUN_TASK\", \"STOP_TASK\", \"SUBMIT_TASK\", \"OPERATOR_TASK\"], \"barItem\": [\"task\", \"dependency\", \"task_params\", \"env_params\"], \"formField\": [\"datasource\",\"queue\"], \"renderKind\": \"editor\",\"dataTypeCodes\":[\"45\"]}', NULL, 30, 0, 'STRING', '', 1, '2022-02-11 10:28:45', '2022-02-11 10:28:45', 0); +INSERT INTO `dict` VALUES (307, '0', 'SparkSQL', '{\"actions\": [\"SAVE_TASK\", \"RUN_TASK\", \"STOP_TASK\", \"SUBMIT_TASK\", \"OPERATOR_TASK\"], \"barItem\": [\"task\", \"dependency\", \"task_params\", \"env_params\"], \"formField\": [\"datasource\",\"queue\",\"componentVersion\"], \"renderKind\": \"editor\",\"dataTypeCodes\":[\"45\"]}', NULL, 30, 0, 'STRING', '', 1, '2022-02-11 10:28:45', '2022-02-11 10:28:45', 0); INSERT INTO `dict` VALUES (309, '1', 'Spark', '{\"actions\": [\"SAVE_TASK\", \"SUBMIT_TASK\", \"OPERATOR_TASK\"], \"formField\": [\"resourceIdList\", \"mainClass\", \"exeArgs\", \"componentVersion\"],\"barItem\":[ \"dependency\",\"env_params\",\"task_params\"], \"renderKind\": \"spark\"}', NULL, 30, 0, 'STRING', '', 0, '2022-09-03 07:27:25', '2022-09-03 07:27:25', 0); INSERT INTO `dict` VALUES (311, '2', 'SYNC', '{\"actions\": [\"SAVE_TASK\", \"RUN_TASK\", \"STOP_TASK\", \"SUBMIT_TASK\", \"OPERATOR_TASK\"], \"barItem\": [\"task\", \"dependency\", \"task_config\", \"task_params\", \"env_params\"], \"formField\": [\"createModel\", \"syncModel\"], \"renderKind\": \"dataSync\", \"renderCondition\": {\"key\": \"createModel\", \"value\": 0, \"renderKind\": \"editor\"}, \"actionsCondition\": {\"key\": \"createModel\", \"value\": 0, \"actions\": [\"CONVERT_TASK\", \"SAVE_TASK\", \"RUN_TASK\", \"STOP_TASK\", \"SUBMIT_TASK\", \"OPERATOR_TASK\"]}}', NULL, 30, 2, 'STRING', '', 1, '2022-02-11 10:28:45', '2022-02-11 10:28:45', 0); INSERT INTO `dict` VALUES (313, '5', 'FlinkSQL', '{\"actions\": [\"GRAMMAR_TASK\", \"SAVE_TASK\", \"OPERATOR_TASK\"], \"barItem\": [\"task\", \"env_params\"], \"formField\": [\"componentVersion\"], \"renderKind\": \"editor\", \"actionsCondition\": {\"key\": \"createModel\", \"value\": 0, \"actions\": [\"CONVERT_TASK\", \"FORMAT_TASK\", \"GRAMMAR_TASK\", \"SAVE_TASK\", \"OPERATOR_TASK\"]}, \"barItemCondition\": {\"key\": \"createModel\", \"value\": 0, \"barItem\": [\"task\", \"flinksql_source\", \"flinksql_result\", \"flinksql_dimension\", \"env_params\"]}}', NULL, 30, 5, 'STRING', '', 0, '2022-09-03 07:25:04', '2022-09-03 07:25:04', 0); @@ -2264,7 +2266,7 @@ VALUES (3, 1, 'SPARK', '2.1', '2021-11-18 10:36:13', '2021-11-18 10:36:13', 0); INSERT INTO `task_param_template` VALUES (5, 2, 'SYNC', '1.12', - '## 任务运行方式:\n## per_job:单独为任务创建flink yarn session,适用于低频率,大数据量同步\n## session:多个任务共用一个flink yarn session,适用于高频率、小数据量同步,默认per_job\n## standalone:多个任务共用一个flink standalone\n## flinkTaskRunMode=per_job\n## per_job模式下jobManager配置的内存大小,默认1024(单位M)\n## jobmanager.memory.mb=1024\n## per_job模式下taskManager配置的内存大小,默认1024(单位M)\n## taskmanager.memory.mb=1024\n## per_job模式下每个taskManager 对应 slot的数量\n## slots=1\n## checkpoint保存时间间隔\n## flink.checkpoint.interval=300000\n## 任务优先级, 范围:1-1000\n## job.priority=10', + '## 任务运行方式:\n## per_job:单独为任务创建flink yarn session,适用于低频率,大数据量同步\n## session:多个任务共用一个flink yarn session,适用于高频率、小数据量同步,默认session\n## standalone:多个任务共用一个flink standalone\n## flinkTaskRunMode=per_job\n## per_job模式下jobManager配置的内存大小,默认1024(单位M)\n## jobmanager.memory.mb=1024\n## per_job模式下taskManager配置的内存大小,默认1024(单位M)\n## taskmanager.memory.mb=1024\n## per_job模式下每个taskManager 对应 slot的数量\n## slots=1\n## checkpoint保存时间间隔\n## flink.checkpoint.interval=300000\n## 任务优先级, 范围:1-1000\n## job.priority=10', '2021-11-18 10:37:24', '2021-11-18 10:37:24', 0); INSERT INTO `task_param_template` VALUES (7, 5, 'FlinkSQL', '1.12', @@ -2472,4 +2474,71 @@ insert into console_component_config (cluster_id, component_id, component_type_c values (-2, -233, 8, 'INPUT', 1, 'execute.dir', '/tmp/dir', null, null, null, null, now(), now(), 0), (-2, -233, 8, 'INPUT', 1, 'DataX.python.path', 'python3', null, null, null, null, now(), now(), 0); + +DELETE FROM console_component_config WHERE component_id = -109; +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'CHECKBOX', 1, 'deploymode', '["perjob"]', null, '', '', null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'GROUP', 1, 'perjob', 'perjob', null, 'deploymode', 'perjob', null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'addColumnSupport', 'true', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.cores.max', '1', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'spark.driver.extraJavaOptions', '-Dfile.encoding=utf-8', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'spark.eventLog.compress', 'true', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'spark.eventLog.dir', 'hdfs:///tmp/spark-yarn-logs', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'spark.eventLog.enabled', 'true', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.executor.cores', '1', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'spark.executor.extraJavaOptions', '-Dfile.encoding=utf-8', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.executor.heartbeatInterval', '10s', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.executor.instances', '1', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.executor.memory', '512m', null, 'deploymode$perjob', null, null, now(), now(), 0); + +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.sql.adaptive.advisoryPartitionSizeInBytes', '64MB', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.sql.adaptive.coalescePartitions.minPartitionSize', '1MB', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.sql.adaptive.coalescePartitions.initialPartitionNum', '200', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes', '256MB', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.sql.adaptive.skewJoin.skewedPartitionFactor', '5', null, 'deploymode$perjob', null, null, now(), now(), 0); + +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.network.timeout', '600s', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.rpc.askTimeout', '600s', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.speculation', 'true', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.submit.deployMode', 'cluster', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'spark.yarn.appMasterEnv.PYSPARK_DRIVER_PYTHON', '/data/miniconda2/bin/python3', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'spark.yarn.appMasterEnv.PYSPARK_PYTHON', '/data/miniconda2/bin/python3', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'spark.yarn.maxAppAttempts', '1', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'sparkPythonExtLibPath', 'hdfs:///dtInsight/pythons/pyspark.zip,hdfs:///dtInsight/pythons/py4j-0.10.7-src.zip', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'sparkSqlProxyPath', 'hdfs:///dtInsight/spark/spark-sql-proxy.jar', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 1, 'sparkYarnArchive', 'hdfs:///dtInsight/sparkjars/jars', null, 'deploymode$perjob', null, null, now(), now(), 0); +INSERT INTO console_component_config (cluster_id, component_id, component_type_code, type, required, `key`, value, `values`, dependencyKey, dependencyValue, `desc`, gmt_create, gmt_modified, is_deleted) VALUES (-2, -109, 1, 'INPUT', 0, 'yarnAccepterTaskNumber', '3', null, 'deploymode$perjob', null, null, now(), now(), 0); + +INSERT INTO task_param_template (task_type, task_name, task_version, params, gmt_create, gmt_modified, is_deleted) VALUES (0, 'SPARK_SQL', '3.2', '## Driver程序使用的CPU核数,默认为1 +# spark.driver.cores=1 + +## Driver程序使用内存大小,默认1g +# spark.driver.memory=1g + +## 对Spark每个action结果集大小的限制,最少是1M,若设为0则不限制大小。 +## 若Job结果超过限制则会异常退出,若结果集限制过大也可能造成OOM问题,默认1g +# spark.driver.maxResultSize=1g + +## 启动的executor的数量,默认为1 +# spark.executor.instances=1 + +## 每个executor使用的CPU核数,默认为1 +# spark.executor.cores=1 + +## 每个executor内存大小,默认1g +# spark.executor.memory=1g + +## spark 日志级别可选ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN +# logLevel = INFO + +## spark中所有网络交互的最大超时时间 +# spark.network.timeout=120s + +## executor的OffHeap内存,和spark.executor.memory配置使用 +# spark.yarn.executor.memoryOverhead= + +## 设置spark sql shuffle分区数,默认200 +# spark.sql.shuffle.partitions=200 + +## 开启spark推测行为,默认false +# spark.speculation=false', now(), now(), 0); COMMIT; \ No newline at end of file diff --git a/taier-common/pom.xml b/taier-common/pom.xml index ba0ffeafb5..c8b40ee5c7 100644 --- a/taier-common/pom.xml +++ b/taier-common/pom.xml @@ -222,7 +222,7 @@ 3.0.1 ../javadocs - engine-api-client + taier-api-client -Xdoclint:none diff --git a/taier-data-develop/pom.xml b/taier-data-develop/pom.xml index b8ea022b24..5b09d9cfe2 100644 --- a/taier-data-develop/pom.xml +++ b/taier-data-develop/pom.xml @@ -131,7 +131,7 @@ org.apache.maven.plugins maven-shade-plugin - 2.4.3 + 3.2.4 org.springframework.boot diff --git a/taier-data-develop/src/main/java/com/dtstack/taier/develop/datasource/convert/utils/AbstractAssertUtils.java b/taier-data-develop/src/main/java/com/dtstack/taier/develop/datasource/convert/utils/AbstractAssertUtils.java deleted file mode 100644 index d50eaab141..0000000000 --- a/taier-data-develop/src/main/java/com/dtstack/taier/develop/datasource/convert/utils/AbstractAssertUtils.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.dtstack.taier.develop.datasource.convert.utils; - -import com.dtstack.taier.common.exception.DtCenterDefException; -import com.dtstack.taier.common.exception.ExceptionEnums; -import org.apache.commons.collections.CollectionUtils; -import org.apache.commons.collections.MapUtils; -import org.apache.commons.lang3.StringUtils; - -import java.util.Collection; -import java.util.Map; - -/** - * 断言工具类 - * - * @author :wangchuan - * date:Created in 下午2:16 2021/7/5 - * company: www.dtstack.com - */ -public abstract class AbstractAssertUtils { - - public static void isTrue(boolean expression, String message) { - if (!expression) { - throw new DtCenterDefException(message); - } - } - - public static void isOverLength(String content, Integer limit, String message) { - if (StringUtils.isNotBlank(content) && content.length() > limit) { - throw new DtCenterDefException(message); - } - } - - public static void isTrue(boolean expression, ExceptionEnums exceptionEnums) { - if (!expression) { - throw new DtCenterDefException(exceptionEnums); - } - } - - public static void notNull(Object obj, String message) { - if (obj == null) { - throw new DtCenterDefException(message); - } - } - - public static void isNull(Object obj, String message) { - if (obj != null) { - throw new DtCenterDefException(message); - } - } - - public static void isNull(Object obj, ExceptionEnums exceptionEnums) { - if (obj != null) { - throw new DtCenterDefException(exceptionEnums); - } - } - - public static void notBlank(String obj, ExceptionEnums exceptionEnums) { - if (StringUtils.isBlank(obj)) { - throw new DtCenterDefException(exceptionEnums); - } - } - - public static void notBlank(String obj, String message) { - if (StringUtils.isBlank(obj)) { - throw new DtCenterDefException(message); - } - } - - public static void isFalse(boolean expression, String message) { - if (expression) { - throw new DtCenterDefException(message); - } - } - - public static void isFalse(boolean expression, ExceptionEnums exceptionEnums) { - if (expression) { - throw new DtCenterDefException(exceptionEnums); - } - } - - public static void notNull(Object obj, ExceptionEnums exceptionEnums) { - if (obj == null) { - throw new DtCenterDefException(exceptionEnums); - } - } - - public static void notNull(Collection collection, String message) { - if (CollectionUtils.isEmpty(collection)) { - throw new DtCenterDefException(message); - } - } - - public static void notNull(Collection collection, ExceptionEnums exceptionEnums) { - if (CollectionUtils.isEmpty(collection)) { - throw new DtCenterDefException(exceptionEnums); - } - } - - public static void notEmpty(Map collection, String message) { - if (MapUtils.isEmpty(collection)) { - throw new DtCenterDefException(message); - } - } - -} diff --git a/taier-data-develop/src/main/java/com/dtstack/taier/develop/service/develop/impl/DevelopSelectSqlService.java b/taier-data-develop/src/main/java/com/dtstack/taier/develop/service/develop/impl/DevelopSelectSqlService.java index a2d0fb7fdd..5174eaf24b 100644 --- a/taier-data-develop/src/main/java/com/dtstack/taier/develop/service/develop/impl/DevelopSelectSqlService.java +++ b/taier-data-develop/src/main/java/com/dtstack/taier/develop/service/develop/impl/DevelopSelectSqlService.java @@ -150,6 +150,7 @@ public String sendSqlTask(String sql, String taskParams, String jobId, Task task paramActionExt.setTenantId(task.getTenantId()); paramActionExt.setQueueName(task.getQueueName()); paramActionExt.setDatasourceId(task.getDatasourceId()); + paramActionExt.setComponentVersion(task.getComponentVersion()); actionService.start(paramActionExt); return jobId; } diff --git a/taier-data-develop/src/main/java/com/dtstack/taier/develop/service/develop/saver/AbstractTaskSaver.java b/taier-data-develop/src/main/java/com/dtstack/taier/develop/service/develop/saver/AbstractTaskSaver.java index 28dd84508b..d4191d6a49 100644 --- a/taier-data-develop/src/main/java/com/dtstack/taier/develop/service/develop/saver/AbstractTaskSaver.java +++ b/taier-data-develop/src/main/java/com/dtstack/taier/develop/service/develop/saver/AbstractTaskSaver.java @@ -21,6 +21,7 @@ import com.alibaba.fastjson.JSONObject; import com.baomidou.mybatisplus.core.toolkit.Wrappers; import com.dtstack.taier.common.enums.EComponentType; +import com.dtstack.taier.common.enums.EComputeType; import com.dtstack.taier.common.enums.EScheduleJobType; import com.dtstack.taier.common.enums.EScheduleStatus; import com.dtstack.taier.common.enums.ESubmitStatus; @@ -183,6 +184,9 @@ public TaskVO updateTaskInfo(TaskResourceParam taskResourceParam) { Task task = developTaskService.getOne(Wrappers.lambdaQuery(Task.class) .eq(Task::getName, taskVO.getName()) .eq(Task::getTenantId, taskVO.getTenantId())); + if(EComputeType.BATCH.getType() == taskVO.getComputeType()){ + taskVO.setJobId(null); + } if (taskVO.getId() != null && taskVO.getId() > 0) { //update diff --git a/taier-data-develop/src/main/java/com/dtstack/taier/develop/service/develop/saver/DefaultTaskSaver.java b/taier-data-develop/src/main/java/com/dtstack/taier/develop/service/develop/saver/DefaultTaskSaver.java index 1cc8d216f7..0e1c3646e0 100644 --- a/taier-data-develop/src/main/java/com/dtstack/taier/develop/service/develop/saver/DefaultTaskSaver.java +++ b/taier-data-develop/src/main/java/com/dtstack/taier/develop/service/develop/saver/DefaultTaskSaver.java @@ -26,6 +26,7 @@ import com.dtstack.taier.develop.dto.devlop.TaskVO; import com.dtstack.taier.develop.service.develop.impl.DevelopTaskTaskService; import com.dtstack.taier.develop.service.user.UserService; +import com.dtstack.taier.pluginapi.enums.ComputeType; import com.dtstack.taier.pluginapi.enums.EJobType; import org.apache.commons.lang.BooleanUtils; import org.apache.commons.lang.StringUtils; @@ -50,13 +51,13 @@ public class DefaultTaskSaver extends AbstractTaskSaver { @Autowired private UserService userService; - @Autowired - private DevelopTaskTaskService developTaskTaskService; @Override public TaskResourceParam beforeProcessing(TaskResourceParam taskResourceParam) { // sql 任务必须选择数据源 EScheduleJobType scheduleJobType = EScheduleJobType.getByTaskType(taskResourceParam.getTaskType()); + taskResourceParam.setTaskParams(taskResourceParam.getTaskParams() == null ? taskTemplateService.getTaskTemplate(taskResourceParam.getTaskType(), taskResourceParam.getComponentVersion()).getParams() : taskResourceParam.getTaskParams()); + taskResourceParam.setComputeType(ComputeType.BATCH.getType()); if (EComputeType.BATCH.getType() == scheduleJobType.getComputeType().getType() && EJobType.SQL.getType() == scheduleJobType.getEngineJobType()) { if (null == taskResourceParam.getDatasourceId()) { throw new TaierDefineException(ErrorCode.DATA_SOURCE_NOT_SET); diff --git a/taier-datasource/pom.xml b/taier-datasource/pom.xml index cc177fe870..b69794e572 100644 --- a/taier-datasource/pom.xml +++ b/taier-datasource/pom.xml @@ -89,6 +89,24 @@ + + + + central + https://repo1.maven.org/maven2/ + + + + oss-sonatype + https://oss.sonatype.org/content/repositories/releases/ + + + + huawei + https://repo.huaweicloud.com/repository/maven/huaweicloudsdk/ + + + diff --git a/taier-datasource/taier-datasource-api/src/main/java/com/dtstack/taier/datasource/api/base/ClientCache.java b/taier-datasource/taier-datasource-api/src/main/java/com/dtstack/taier/datasource/api/base/ClientCache.java index f2a9b13cd3..697ef0b626 100644 --- a/taier-datasource/taier-datasource-api/src/main/java/com/dtstack/taier/datasource/api/base/ClientCache.java +++ b/taier-datasource/taier-datasource-api/src/main/java/com/dtstack/taier/datasource/api/base/ClientCache.java @@ -25,11 +25,15 @@ import com.dtstack.taier.datasource.api.client.IRestful; import com.dtstack.taier.datasource.api.client.ITable; import com.dtstack.taier.datasource.api.client.IYarn; +import com.dtstack.taier.datasource.api.config.Configuration; +import com.dtstack.taier.datasource.api.context.ClientEnvironment; import com.dtstack.taier.datasource.api.exception.InitializeException; +import com.dtstack.taier.datasource.api.manager.ManagerFactory; import com.dtstack.taier.datasource.api.manager.list.ClientManager; import com.dtstack.taier.datasource.api.source.DataSourceType; import lombok.extern.slf4j.Slf4j; +import java.util.HashMap; import java.util.Objects; /** @@ -78,6 +82,18 @@ public static IClient getClient(Integer dataSourceType) { return getClientByType(IClient.class, dataSourceType); } + public static void main(String[] args) { + Configuration configuration = new Configuration(new HashMap<>()); + ClientEnvironment clientEnvironment = new ClientEnvironment(configuration); + clientEnvironment.start(); + ClientCache.setEnv(clientEnvironment.getManagerFactory().getManager(ClientManager.class)); + ClientManager clientManager = new ClientManager(); + clientManager.setManagerFactory(new ManagerFactory()); + setEnv(clientManager); + IClient client = getClient(DataSourceType.KAFKA.getVal()); + System.out.println(client); + } + /** * 获取 HDFS 文件客户端 * diff --git a/taier-datasource/taier-datasource-plugin/taier-datasource-plugin-aws_s3/pom.xml b/taier-datasource/taier-datasource-plugin/taier-datasource-plugin-aws_s3/pom.xml index bdf5d6497f..71ecb46ba1 100644 --- a/taier-datasource/taier-datasource-plugin/taier-datasource-plugin-aws_s3/pom.xml +++ b/taier-datasource/taier-datasource-plugin/taier-datasource-plugin-aws_s3/pom.xml @@ -51,7 +51,7 @@ org.apache.maven.plugins maven-shade-plugin - 3.2.1 + 3.2.4 package diff --git a/taier-datasource/taier-datasource-plugin/taier-datasource-plugin-csp_s3/pom.xml b/taier-datasource/taier-datasource-plugin/taier-datasource-plugin-csp_s3/pom.xml index 68b777557f..8b5b54d92a 100644 --- a/taier-datasource/taier-datasource-plugin/taier-datasource-plugin-csp_s3/pom.xml +++ b/taier-datasource/taier-datasource-plugin/taier-datasource-plugin-csp_s3/pom.xml @@ -50,7 +50,7 @@ org.apache.maven.plugins maven-shade-plugin - 3.2.1 + 3.2.4 package diff --git a/taier-datasource/taier-datasource-plugin/taier-datasource-plugin-hive1/pom.xml b/taier-datasource/taier-datasource-plugin/taier-datasource-plugin-hive1/pom.xml index b3e49611d8..49ed6ec582 100644 --- a/taier-datasource/taier-datasource-plugin/taier-datasource-plugin-hive1/pom.xml +++ b/taier-datasource/taier-datasource-plugin/taier-datasource-plugin-hive1/pom.xml @@ -22,10 +22,10 @@ 1.0.5 1.8.3 1.1.2.6 - 1.1.1 + 1.2.1 2.7.3 1.6.3 - 1.1.1 + 1.2.1 @@ -424,6 +424,11 @@ + + org.apache.httpcomponents + httpclient + 4.5.3 + diff --git a/taier-datasource/taier-datasource-plugin/taier-datasource-plugin-hive1/src/main/java/org/apache/hive/jdbc/HiveConnection.java b/taier-datasource/taier-datasource-plugin/taier-datasource-plugin-hive1/src/main/java/org/apache/hive/jdbc/HiveConnection.java index 8ce1ed9bc5..04f72b58e4 100644 --- a/taier-datasource/taier-datasource-plugin/taier-datasource-plugin-hive1/src/main/java/org/apache/hive/jdbc/HiveConnection.java +++ b/taier-datasource/taier-datasource-plugin/taier-datasource-plugin-hive1/src/main/java/org/apache/hive/jdbc/HiveConnection.java @@ -18,43 +18,10 @@ package org.apache.hive.jdbc; -import java.io.FileInputStream; -import java.io.IOException; -import java.security.KeyStore; -import java.sql.Array; -import java.sql.Blob; -import java.sql.CallableStatement; -import java.sql.Clob; -import java.sql.Connection; -import java.sql.DatabaseMetaData; -import java.sql.DriverManager; -import java.sql.NClob; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLClientInfoException; -import java.sql.SQLException; -import java.sql.SQLWarning; -import java.sql.SQLXML; -import java.sql.Savepoint; -import java.sql.Statement; -import java.sql.Struct; -import java.util.HashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Properties; -import java.util.concurrent.Executor; -import java.util.concurrent.TimeUnit; - -import javax.security.sasl.Sasl; -import javax.security.sasl.SaslException; - import org.apache.commons.lang3.BooleanUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hive.jdbc.Utils.JdbcConnectionParams; import org.apache.hive.service.auth.HiveAuthFactory; import org.apache.hive.service.auth.KerberosSaslHelper; @@ -74,15 +41,60 @@ import org.apache.hive.service.cli.thrift.TRenewDelegationTokenResp; import org.apache.hive.service.cli.thrift.TSessionHandle; import org.apache.http.HttpRequestInterceptor; -import org.apache.http.conn.scheme.Scheme; +import org.apache.http.HttpResponse; +import org.apache.http.client.CookieStore; +import org.apache.http.client.ServiceUnavailableRetryStrategy; +import org.apache.http.config.Registry; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.conn.socket.ConnectionSocketFactory; import org.apache.http.conn.ssl.SSLSocketFactory; -import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.http.impl.client.BasicCookieStore; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.conn.BasicHttpClientConnectionManager; +import org.apache.http.protocol.HttpContext; import org.apache.thrift.TException; import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.transport.THttpClient; import org.apache.thrift.transport.TTransport; import org.apache.thrift.transport.TTransportException; +import javax.net.ssl.KeyManagerFactory; +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManagerFactory; +import javax.security.sasl.Sasl; +import javax.security.sasl.SaslException; +import java.io.FileInputStream; +import java.io.IOException; +import java.security.KeyStore; +import java.security.SecureRandom; +import java.sql.Array; +import java.sql.Blob; +import java.sql.CallableStatement; +import java.sql.Clob; +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.DriverManager; +import java.sql.NClob; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLClientInfoException; +import java.sql.SQLException; +import java.sql.SQLWarning; +import java.sql.SQLXML; +import java.sql.Savepoint; +import java.sql.Statement; +import java.sql.Struct; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; +import java.util.concurrent.Executor; +import java.util.concurrent.TimeUnit; + /** * HiveConnection. * @@ -185,6 +197,7 @@ public HiveConnection(String uri, Properties info) throws SQLException { supportedProtocols.add(TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V5); supportedProtocols.add(TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6); supportedProtocols.add(TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V7); + supportedProtocols.add(TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8); // open client session openSession(); @@ -224,6 +237,8 @@ private void openTransport() throws SQLException { port = connParams.getPort(); LOG.info("Will retry opening client transport"); } else { + LOG.info("Transport Used for JDBC connection: " + + sessConfMap.get(JdbcConnectionParams.TRANSPORT_MODE)); throw new SQLException("Could not open client transport with JDBC Uri: " + jdbcUriString + ": " + e.getMessage(), " 08S01", e); } @@ -247,7 +262,7 @@ private String getServerHttpUrl(boolean useSsl) { } private TTransport createHttpTransport() throws SQLException, TTransportException { - DefaultHttpClient httpClient; + CloseableHttpClient httpClient; boolean useSsl = isSslConnection(); // Create an http client from the configs httpClient = getHttpClient(useSsl); @@ -264,6 +279,9 @@ private TTransport createHttpTransport() throws SQLException, TTransportExceptio } } catch (TException e) { + LOG.info("JDBC Connection Parameters used : useSSL = " + useSsl + " , httpPath = " + + sessConfMap.get(JdbcConnectionParams.HTTP_PATH) + " Authentication type = " + + sessConfMap.get(JdbcConnectionParams.AUTH_TYPE)); String msg = "Could not create http connection to " + jdbcUriString + ". " + e.getMessage(); throw new TTransportException(msg, e); @@ -271,37 +289,92 @@ private TTransport createHttpTransport() throws SQLException, TTransportExceptio return transport; } - private DefaultHttpClient getHttpClient(Boolean useSsl) throws SQLException { - DefaultHttpClient httpClient = new DefaultHttpClient(); + private CloseableHttpClient getHttpClient(Boolean useSsl) throws SQLException { + boolean isCookieEnabled = sessConfMap.get(JdbcConnectionParams.COOKIE_AUTH) == null || + (!JdbcConnectionParams.COOKIE_AUTH_FALSE.equalsIgnoreCase( + sessConfMap.get(JdbcConnectionParams.COOKIE_AUTH))); + String cookieName = sessConfMap.get(JdbcConnectionParams.COOKIE_NAME) == null ? + JdbcConnectionParams.DEFAULT_COOKIE_NAMES_HS2 : + sessConfMap.get(JdbcConnectionParams.COOKIE_NAME); + CookieStore cookieStore = isCookieEnabled ? new BasicCookieStore() : null; + HttpClientBuilder httpClientBuilder; // Request interceptor for any request pre-processing logic HttpRequestInterceptor requestInterceptor; - // If Kerberos + Map additionalHttpHeaders = new HashMap(); + + // Retrieve the additional HttpHeaders + for (Map.Entry entry : sessConfMap.entrySet()) { + String key = entry.getKey(); + + if (key.startsWith(JdbcConnectionParams.HTTP_HEADER_PREFIX)) { + additionalHttpHeaders.put(key.substring(JdbcConnectionParams.HTTP_HEADER_PREFIX.length()), + entry.getValue()); + } + } + // Configure http client for kerberos/password based authentication if (isKerberosAuthMode()) { /** * Add an interceptor which sets the appropriate header in the request. * It does the kerberos authentication and get the final service ticket, * for sending to the server before every request. * In https mode, the entire information is encrypted - * TODO: Optimize this with a mix of kerberos + using cookie. */ requestInterceptor = new HttpKerberosRequestInterceptor(sessConfMap.get(JdbcConnectionParams.AUTH_PRINCIPAL), - host, getServerHttpUrl(useSsl), assumeSubject); + host, getServerHttpUrl(useSsl), assumeSubject, cookieStore, cookieName, useSsl, + additionalHttpHeaders); } else { /** * Add an interceptor to pass username/password in the header. * In https mode, the entire information is encrypted */ - requestInterceptor = new HttpBasicAuthInterceptor(getUserName(), getPassword()); + requestInterceptor = new HttpBasicAuthInterceptor(getUserName(), getPassword(), + cookieStore, cookieName, useSsl, + additionalHttpHeaders); } - // Configure httpClient for SSL + // Configure http client for cookie based authentication + if (isCookieEnabled) { + // Create a http client with a retry mechanism when the server returns a status code of 401. + httpClientBuilder = + HttpClients.custom().setServiceUnavailableRetryStrategy( + new ServiceUnavailableRetryStrategy() { + + @Override + public boolean retryRequest( + final HttpResponse response, + final int executionCount, + final HttpContext context) { + int statusCode = response.getStatusLine().getStatusCode(); + boolean ret = statusCode == 401 && executionCount <= 1; + + // Set the context attribute to true which will be interpreted by the request interceptor + if (ret) { + context.setAttribute(Utils.HIVE_SERVER2_RETRY_KEY, Utils.HIVE_SERVER2_RETRY_TRUE); + } + return ret; + } + + @Override + public long getRetryInterval() { + // Immediate retry + return 0; + } + }); + } else { + httpClientBuilder = HttpClientBuilder.create(); + } + // Add the request interceptor to the client builder + httpClientBuilder.addInterceptorFirst(requestInterceptor); + // Configure http client for SSL if (useSsl) { + String useTwoWaySSL = sessConfMap.get(JdbcConnectionParams.USE_TWO_WAY_SSL); String sslTrustStorePath = sessConfMap.get(JdbcConnectionParams.SSL_TRUST_STORE); String sslTrustStorePassword = sessConfMap.get( JdbcConnectionParams.SSL_TRUST_STORE_PASSWORD); KeyStore sslTrustStore; SSLSocketFactory socketFactory; + /** * The code within the try block throws: * 1. SSLInitializationException @@ -315,11 +388,13 @@ private DefaultHttpClient getHttpClient(Boolean useSsl) throws SQLException { * and throw a SQLException. */ try { - if (sslTrustStorePath == null || sslTrustStorePath.isEmpty()) { + if (useTwoWaySSL != null && + useTwoWaySSL.equalsIgnoreCase(JdbcConnectionParams.TRUE)) { + socketFactory = getTwoWaySSLSocketFactory(); + } else if (sslTrustStorePath == null || sslTrustStorePath.isEmpty()) { // Create a default socket factory based on standard JSSE trust material socketFactory = SSLSocketFactory.getSocketFactory(); - } - else { + } else { // Pick trust store config from the given path sslTrustStore = KeyStore.getInstance(JdbcConnectionParams.SSL_TRUST_STORE_TYPE); sslTrustStore.load(new FileInputStream(sslTrustStorePath), @@ -327,8 +402,13 @@ private DefaultHttpClient getHttpClient(Boolean useSsl) throws SQLException { socketFactory = new SSLSocketFactory(sslTrustStore); } socketFactory.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER); - Scheme sslScheme = new Scheme("https", 443, socketFactory); - httpClient.getConnectionManager().getSchemeRegistry().register(sslScheme); + + final Registry registry = + RegistryBuilder.create() + .register("https", socketFactory) + .build(); + + httpClientBuilder.setConnectionManager(new BasicHttpClientConnectionManager(registry)); } catch (Exception e) { String msg = "Could not create an https connection to " + @@ -336,8 +416,7 @@ private DefaultHttpClient getHttpClient(Boolean useSsl) throws SQLException { throw new SQLException(msg, " 08S01", e); } } - httpClient.addRequestInterceptor(requestInterceptor); - return httpClient; + return httpClientBuilder.build(); } /** @@ -360,17 +439,20 @@ private TTransport createBinaryTransport() throws SQLException, TTransportExcept // If Kerberos Map saslProps = new HashMap(); SaslQOP saslQOP = SaslQOP.AUTH; - if (sessConfMap.containsKey(JdbcConnectionParams.AUTH_PRINCIPAL)) { - if (sessConfMap.containsKey(JdbcConnectionParams.AUTH_QOP)) { - try { - saslQOP = SaslQOP.fromString(sessConfMap.get(JdbcConnectionParams.AUTH_QOP)); - } catch (IllegalArgumentException e) { - throw new SQLException("Invalid " + JdbcConnectionParams.AUTH_QOP + - " parameter. " + e.getMessage(), "42000", e); - } + if (sessConfMap.containsKey(JdbcConnectionParams.AUTH_QOP)) { + try { + saslQOP = SaslQOP.fromString(sessConfMap.get(JdbcConnectionParams.AUTH_QOP)); + } catch (IllegalArgumentException e) { + throw new SQLException("Invalid " + JdbcConnectionParams.AUTH_QOP + + " parameter. " + e.getMessage(), "42000", e); } saslProps.put(Sasl.QOP, saslQOP.toString()); - saslProps.put(Sasl.SERVER_AUTH, "true"); + } else { + // If the client did not specify qop then just negotiate the one supported by server + saslProps.put(Sasl.QOP, "auth-conf,auth-int,auth"); + } + saslProps.put(Sasl.SERVER_AUTH, "true"); + if (sessConfMap.containsKey(JdbcConnectionParams.AUTH_PRINCIPAL)) { transport = KerberosSaslHelper.getKerberosTransport( sessConfMap.get(JdbcConnectionParams.AUTH_PRINCIPAL), host, HiveAuthFactory.getSocketTransport(host, port, loginTimeout), saslProps, @@ -393,7 +475,9 @@ private TTransport createBinaryTransport() throws SQLException, TTransportExcept if (isSslConnection()) { // get SSL socket String sslTrustStore = sessConfMap.get(JdbcConnectionParams.SSL_TRUST_STORE); - String sslTrustStorePassword = sessConfMap.get(JdbcConnectionParams.SSL_TRUST_STORE_PASSWORD); + String sslTrustStorePassword = sessConfMap.get( + JdbcConnectionParams.SSL_TRUST_STORE_PASSWORD); + if (sslTrustStore == null || sslTrustStore.isEmpty()) { transport = HiveAuthFactory.getSSLSocket(host, port, loginTimeout); } else { @@ -419,6 +503,49 @@ private TTransport createBinaryTransport() throws SQLException, TTransportExcept return transport; } + SSLSocketFactory getTwoWaySSLSocketFactory() throws SQLException { + SSLSocketFactory socketFactory = null; + + try { + KeyManagerFactory keyManagerFactory = KeyManagerFactory.getInstance( + JdbcConnectionParams.SUNX509_ALGORITHM_STRING, + JdbcConnectionParams.SUNJSSE_ALGORITHM_STRING); + String keyStorePath = sessConfMap.get(JdbcConnectionParams.SSL_KEY_STORE); + String keyStorePassword = sessConfMap.get(JdbcConnectionParams.SSL_KEY_STORE_PASSWORD); + KeyStore sslKeyStore = KeyStore.getInstance(JdbcConnectionParams.SSL_KEY_STORE_TYPE); + + if (keyStorePath == null || keyStorePath.isEmpty()) { + throw new IllegalArgumentException(JdbcConnectionParams.SSL_KEY_STORE + + " Not configured for 2 way SSL connection, keyStorePath param is empty"); + } + sslKeyStore.load(new FileInputStream(keyStorePath), + keyStorePassword.toCharArray()); + keyManagerFactory.init(sslKeyStore, keyStorePassword.toCharArray()); + + TrustManagerFactory trustManagerFactory = TrustManagerFactory.getInstance( + JdbcConnectionParams.SUNX509_ALGORITHM_STRING); + String trustStorePath = sessConfMap.get(JdbcConnectionParams.SSL_TRUST_STORE); + String trustStorePassword = sessConfMap.get( + JdbcConnectionParams.SSL_TRUST_STORE_PASSWORD); + KeyStore sslTrustStore = KeyStore.getInstance(JdbcConnectionParams.SSL_TRUST_STORE_TYPE); + + if (trustStorePath == null || trustStorePath.isEmpty()) { + throw new IllegalArgumentException(JdbcConnectionParams.SSL_TRUST_STORE + + " Not configured for 2 way SSL connection"); + } + sslTrustStore.load(new FileInputStream(trustStorePath), + trustStorePassword.toCharArray()); + trustManagerFactory.init(sslTrustStore); + SSLContext context = SSLContext.getInstance("TLS"); + context.init(keyManagerFactory.getKeyManagers(), + trustManagerFactory.getTrustManagers(), new SecureRandom()); + socketFactory = new SSLSocketFactory(context); + } catch (Exception e) { + throw new SQLException("Error while initializing 2 way ssl socket factory ", e); + } + return socketFactory; + } + // Lookup the delegation token. First in the connection URL, then Configuration private String getClientDelegationToken(Map jdbcConnConf) throws SQLException { diff --git a/taier-ui/src/components/scaffolds/create.tsx b/taier-ui/src/components/scaffolds/create.tsx index e06b2d8aa8..4f8baf32d3 100644 --- a/taier-ui/src/components/scaffolds/create.tsx +++ b/taier-ui/src/components/scaffolds/create.tsx @@ -70,7 +70,7 @@ const ComponentVersion = ({ onChange }: ICreateFormProps) => { const [versions, setVersions] = useState<{ label: string; value: string }[]>([]); useEffect(() => { - if (taskType) { + if (taskType !== undefined) { api.getComponentVersionByTaskType<{ componentVersion: string; default: boolean; componentName: string }[]>({ taskType, }).then((res) => { diff --git a/taier-worker/taier-worker-api/src/main/java/com/dtstack/taier/pluginapi/constrant/ConfigConstant.java b/taier-worker/taier-worker-api/src/main/java/com/dtstack/taier/pluginapi/constrant/ConfigConstant.java index ff1cf596b9..94262f4c0b 100644 --- a/taier-worker/taier-worker-api/src/main/java/com/dtstack/taier/pluginapi/constrant/ConfigConstant.java +++ b/taier-worker/taier-worker-api/src/main/java/com/dtstack/taier/pluginapi/constrant/ConfigConstant.java @@ -113,4 +113,11 @@ public class ConfigConstant { public static final String DATAX_PYTHON_BIN = "DataX.python.path"; + + public static final String SPARK_KERBEROS_REMOTE_KRB5 = "spark.kerberos.remotekrb5"; + + public static final String SPARK_KERBEROS_REMOTE_KEYTAB = "spark.kerberos.remotekeytab"; + + public static final String SPARK_HADOOP_CONF_REMOTE_DIR = "spark.hadoopconf.remotedir"; + } diff --git a/taier-worker/taier-worker-plugin/base/src/main/java/com/dtstack/taier/base/util/KerberosUtils.java b/taier-worker/taier-worker-plugin/base/src/main/java/com/dtstack/taier/base/util/KerberosUtils.java index 84ed2708c2..75b3350844 100644 --- a/taier-worker/taier-worker-plugin/base/src/main/java/com/dtstack/taier/base/util/KerberosUtils.java +++ b/taier-worker/taier-worker-plugin/base/src/main/java/com/dtstack/taier/base/util/KerberosUtils.java @@ -432,7 +432,7 @@ public static synchronized String[] getKerberosFile(BaseConfig config, String lo public static String getKeytabPath(BaseConfig config) { String fileName = config.getPrincipalFile(); String remoteDir = config.getRemoteDir(); - String localDir = USER_DIR + remoteDir; + String localDir = ConfigConstant.LOCAL_KEYTAB_DIR_PARENT + remoteDir; File path = new File(localDir); if (!path.exists()) { diff --git a/taier-worker/taier-worker-plugin/dummy/pom.xml b/taier-worker/taier-worker-plugin/dummy/pom.xml index a57df43ea8..be017c65da 100644 --- a/taier-worker/taier-worker-plugin/dummy/pom.xml +++ b/taier-worker/taier-worker-plugin/dummy/pom.xml @@ -41,7 +41,7 @@ org.apache.maven.plugins maven-shade-plugin - 3.0.0 + 3.2.4 package diff --git a/taier-worker/taier-worker-plugin/flink/flink112-standalone/pom.xml b/taier-worker/taier-worker-plugin/flink/flink112-standalone/pom.xml index cb5a1ced18..d6ec9bd158 100644 --- a/taier-worker/taier-worker-plugin/flink/flink112-standalone/pom.xml +++ b/taier-worker/taier-worker-plugin/flink/flink112-standalone/pom.xml @@ -258,7 +258,7 @@ org.apache.maven.plugins maven-shade-plugin - 3.0.0 + 3.2.4 package diff --git a/taier-worker/taier-worker-plugin/flink/pom.xml b/taier-worker/taier-worker-plugin/flink/pom.xml index e584e7afd0..73f2947352 100644 --- a/taier-worker/taier-worker-plugin/flink/pom.xml +++ b/taier-worker/taier-worker-plugin/flink/pom.xml @@ -18,8 +18,8 @@ common yarn-hdfs-flink112-core - yarn3-hdfs3-flink112 yarn2-hdfs2-flink112 + yarn3-hdfs3-flink112 flink112-standalone flink-base diff --git a/taier-worker/taier-worker-plugin/flink/yarn2-hdfs2-flink112/pom.xml b/taier-worker/taier-worker-plugin/flink/yarn2-hdfs2-flink112/pom.xml index 3b92b61e34..bafad536b9 100644 --- a/taier-worker/taier-worker-plugin/flink/yarn2-hdfs2-flink112/pom.xml +++ b/taier-worker/taier-worker-plugin/flink/yarn2-hdfs2-flink112/pom.xml @@ -247,6 +247,25 @@ + + + + central + https://repo1.maven.org/maven2/ + + + + oss-sonatype + https://oss.sonatype.org/content/repositories/releases/ + + + + huawei + https://repo.huaweicloud.com/repository/maven/huaweicloudsdk/ + + + + @@ -259,7 +278,7 @@ org.apache.maven.plugins maven-shade-plugin - 3.0.0 + 3.2.4 package diff --git a/taier-worker/taier-worker-plugin/flink/yarn3-hdfs3-flink112/pom.xml b/taier-worker/taier-worker-plugin/flink/yarn3-hdfs3-flink112/pom.xml index 14fb1fa55a..e9c63167b6 100644 --- a/taier-worker/taier-worker-plugin/flink/yarn3-hdfs3-flink112/pom.xml +++ b/taier-worker/taier-worker-plugin/flink/yarn3-hdfs3-flink112/pom.xml @@ -252,6 +252,23 @@ + + + + central + https://repo1.maven.org/maven2/ + + + + oss-sonatype + https://oss.sonatype.org/content/repositories/releases/ + + + + huawei + https://repo.huaweicloud.com/repository/maven/huaweicloudsdk/ + + diff --git a/taier-worker/taier-worker-plugin/hadoop/yarn2-hdfs2-hadoop2/pom.xml b/taier-worker/taier-worker-plugin/hadoop/yarn2-hdfs2-hadoop2/pom.xml index db5d0755d7..9ca12bf265 100644 --- a/taier-worker/taier-worker-plugin/hadoop/yarn2-hdfs2-hadoop2/pom.xml +++ b/taier-worker/taier-worker-plugin/hadoop/yarn2-hdfs2-hadoop2/pom.xml @@ -75,7 +75,7 @@ org.apache.maven.plugins maven-shade-plugin - 3.0.0 + 3.2.4 package diff --git a/taier-worker/taier-worker-plugin/hadoop/yarn3-hdfs3-hadoop3/pom.xml b/taier-worker/taier-worker-plugin/hadoop/yarn3-hdfs3-hadoop3/pom.xml index cfa93dbd7a..1b166bc1c3 100644 --- a/taier-worker/taier-worker-plugin/hadoop/yarn3-hdfs3-hadoop3/pom.xml +++ b/taier-worker/taier-worker-plugin/hadoop/yarn3-hdfs3-hadoop3/pom.xml @@ -75,7 +75,7 @@ org.apache.maven.plugins maven-shade-plugin - 3.0.0 + 3.2.4 package diff --git a/taier-worker/taier-worker-plugin/pom.xml b/taier-worker/taier-worker-plugin/pom.xml index 5c1a7d75c8..52ee72ce9d 100644 --- a/taier-worker/taier-worker-plugin/pom.xml +++ b/taier-worker/taier-worker-plugin/pom.xml @@ -1,8 +1,14 @@ + + + taier-worker + com.dtstack.taier + 1.0.0 + ../pom.xml + 4.0.0 - com.dtstack.taier taier-worker-plugin 1.0.0 pom @@ -164,7 +170,7 @@ true javadocs - engine-java-docs + taier-java-docs diff --git a/taier-worker/taier-worker-plugin/script/yarn2-hdfs2-script/script-client/pom.xml b/taier-worker/taier-worker-plugin/script/yarn2-hdfs2-script/script-client/pom.xml index 3bd0127e06..7f318280f7 100644 --- a/taier-worker/taier-worker-plugin/script/yarn2-hdfs2-script/script-client/pom.xml +++ b/taier-worker/taier-worker-plugin/script/yarn2-hdfs2-script/script-client/pom.xml @@ -31,12 +31,6 @@ 1.0.0 - - - - - - org.apache.hadoop diff --git a/taier-worker/taier-worker-plugin/script/yarn2-hdfs2-script/script-core/pom.xml b/taier-worker/taier-worker-plugin/script/yarn2-hdfs2-script/script-core/pom.xml index 1eb0046d8e..363e6d129d 100644 --- a/taier-worker/taier-worker-plugin/script/yarn2-hdfs2-script/script-core/pom.xml +++ b/taier-worker/taier-worker-plugin/script/yarn2-hdfs2-script/script-core/pom.xml @@ -126,7 +126,7 @@ org.apache.maven.plugins maven-shade-plugin - 3.0.0 + 3.2.4 package diff --git a/taier-worker/taier-worker-plugin/script/yarn3-hdfs3-script/script-client/pom.xml b/taier-worker/taier-worker-plugin/script/yarn3-hdfs3-script/script-client/pom.xml index c941f86e64..70ed2bb81f 100644 --- a/taier-worker/taier-worker-plugin/script/yarn3-hdfs3-script/script-client/pom.xml +++ b/taier-worker/taier-worker-plugin/script/yarn3-hdfs3-script/script-client/pom.xml @@ -31,11 +31,6 @@ 1.0.0 - - - - - org.apache.hadoop @@ -101,7 +96,7 @@ org.apache.maven.plugins maven-shade-plugin - 3.0.0 + 3.2.4 package diff --git a/taier-worker/taier-worker-plugin/script/yarn3-hdfs3-script/script-core/pom.xml b/taier-worker/taier-worker-plugin/script/yarn3-hdfs3-script/script-core/pom.xml index 816a8e9763..0f125da118 100644 --- a/taier-worker/taier-worker-plugin/script/yarn3-hdfs3-script/script-core/pom.xml +++ b/taier-worker/taier-worker-plugin/script/yarn3-hdfs3-script/script-core/pom.xml @@ -125,7 +125,7 @@ org.apache.maven.plugins maven-shade-plugin - 3.0.0 + 3.2.4 package diff --git a/taier-worker/taier-worker-plugin/spark/pom.xml b/taier-worker/taier-worker-plugin/spark/pom.xml index 2e4f7ff13b..4264aa56fe 100644 --- a/taier-worker/taier-worker-plugin/spark/pom.xml +++ b/taier-worker/taier-worker-plugin/spark/pom.xml @@ -15,12 +15,119 @@ pom + + UTF-8 + 2.1.3 + 2.11.8 + + - yarn2-hdfs2-spark210-core + spark-sql-proxy + spark-yarn-client-core + yarn-hdfs-spark210-core + yarn-hdfs-spark320-core yarn2-hdfs2-spark210 yarn3-hdfs3-spark210 + yarn2-hdfs2-spark320 + yarn3-hdfs3-spark320 + + + + com.dtstack.taier + taier-worker-plugin.base + 1.0.0 + + + + com.dtstack.taier + taier-worker-plugin.spark.spark-sql-proxy + 1.0.0 + + + + org.apache.hadoop + hadoop-common + ${hadoop2.version} + provided + + + + com.google.guava + guava + 14.0.1 + + + + org.scala-lang + scala-library + ${scala.version} + + + + + org.apache.spark + spark-hive_2.11 + ${spark.version} + + + log4j + apache-log4j-extras + + + + + + org.apache.spark + spark-core_2.11 + ${spark.version} + + + + org.apache.spark + spark-yarn_2.11 + ${spark.version} + + + + org.apache.hadoop + hadoop-common + ${hadoop2.version} + + + + org.apache.hadoop + hadoop-hdfs + ${hadoop2.version} + + + + org.apache.hadoop + hadoop-client + ${hadoop2.version} + + + + org.apache.hadoop + hadoop-yarn-client + ${hadoop2.version} + + + + xalan + xalan + 2.7.1 + + + + xerces + xercesImpl + 2.9.1 + + + + @@ -30,5 +137,132 @@ src/main/resources/ + + + + + + net.alchim31.maven + scala-maven-plugin + 3.2.1 + + + scala-compile-first + + add-source + compile + + process-resources + + + scala-test-compile + + testCompile + + process-test-resources + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 2.0.2 + + + + compile + + compile + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.2.4 + + + + shade + + package + + + false + true + ${project.basedir}/target/dependency-reduced-pom.xml + true + + + + + + + + META-INF/MANIFEST.MF + MANIFEST.MF + + + META-INF/services/org.apache.hadoop.security.SecurityInfo + + + META-INF/services/org.apache.hadoop.fs.FileSystem + + + + + + org.slf4j:slf4j-log4j12 + log4j:log4j + org.slf4j:slf4j-api + netty-all:io.netty + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + org.apache.maven.plugins + maven-antrun-plugin + 1.2 + + + copy-resources + + run + + + package + + + + + + + + + + + + + + + - \ No newline at end of file + diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210/spark-sql-proxy/pom.xml b/taier-worker/taier-worker-plugin/spark/spark-sql-proxy/pom.xml similarity index 56% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210/spark-sql-proxy/pom.xml rename to taier-worker/taier-worker-plugin/spark/spark-sql-proxy/pom.xml index f53665a02c..45f53b7ef7 100644 --- a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210/spark-sql-proxy/pom.xml +++ b/taier-worker/taier-worker-plugin/spark/spark-sql-proxy/pom.xml @@ -3,44 +3,40 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - taier-worker-plugin.spark.yarn2-hdfs2-spark210 + taier-worker-plugin.spark com.dtstack.taier 1.0.0 ../pom.xml 4.0.0 - taier-worker-plugin.spark.yarn2-hdfs2-spark210.spark-sql-proxy - taier-worker-plugin.spark.yarn2-hdfs2-spark210.spark-sql-proxy + taier-worker-plugin.spark.spark-sql-proxy + taier-worker-plugin.spark.spark-sql-proxy + + jar spark-sql-proxy - yarn2-hdfs2-spark210 - 1.5.0-SNAPSHOT + spark-sql-proxy + UTF-8 + 2.1.3 + 2.11.8 + 3.2.2 - jar com.dtstack.taier - taier-worker-plugin.spark.yarn2-hdfs2-spark210-core.spark-sql-proxy-core + taier-worker-plugin.base 1.0.0 - - - commons-lang3 - org.apache.commons - - - org.apache.spark spark-sql_2.11 ${spark.version} provided - org.apache.spark spark-hive-thriftserver_2.11 @@ -60,64 +56,46 @@ - + + + net.alchim31.maven + scala-maven-plugin + + + + org.apache.maven.plugins + maven-compiler-plugin + + org.apache.maven.plugins maven-shade-plugin - 3.0.0 - - - package - - shade - - - false - - - org.slf4j - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - + org.apache.maven.plugins maven-antrun-plugin 1.2 copy-resources - - package run + + package - - - + @@ -126,4 +104,4 @@ - \ No newline at end of file + diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-sql-proxy-core/src/main/java/com/dtstack/taier/sql/main/SqlProxy.java b/taier-worker/taier-worker-plugin/spark/spark-sql-proxy/src/main/java/com/dtstack/taier/sql/main/SqlProxy.java similarity index 98% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-sql-proxy-core/src/main/java/com/dtstack/taier/sql/main/SqlProxy.java rename to taier-worker/taier-worker-plugin/spark/spark-sql-proxy/src/main/java/com/dtstack/taier/sql/main/SqlProxy.java index f014d870a5..cee4e0ff1f 100644 --- a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-sql-proxy-core/src/main/java/com/dtstack/taier/sql/main/SqlProxy.java +++ b/taier-worker/taier-worker-plugin/spark/spark-sql-proxy/src/main/java/com/dtstack/taier/sql/main/SqlProxy.java @@ -78,8 +78,8 @@ public void runJob(String submitSql, String appName, String logLevel, SparkConf //屏蔽引号内的 分号 Splitter splitter = new Splitter(';'); List sqlArray = splitter.splitEscaped(unzipSql); - for(String sql : sqlArray){ - if(sql == null || sql.trim().length() == 0){ + for (String sql : sqlArray) { + if (sql == null || sql.trim().length() == 0) { continue; } logger.info("processed sql statement {}", sql); diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-sql-proxy-core/src/main/java/com/dtstack/taier/sql/main/util/ZipUtil.java b/taier-worker/taier-worker-plugin/spark/spark-sql-proxy/src/main/java/com/dtstack/taier/sql/main/util/ZipUtil.java similarity index 100% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-sql-proxy-core/src/main/java/com/dtstack/taier/sql/main/util/ZipUtil.java rename to taier-worker/taier-worker-plugin/spark/spark-sql-proxy/src/main/java/com/dtstack/taier/sql/main/util/ZipUtil.java diff --git a/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/pom.xml b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/pom.xml new file mode 100644 index 0000000000..65568b88df --- /dev/null +++ b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/pom.xml @@ -0,0 +1,117 @@ + + + + taier-worker-plugin.spark + com.dtstack.taier + 1.0.0 + ../pom.xml + + 4.0.0 + + taier-worker-plugin.spark.spark-yarn-client-core + taier-worker-plugin.spark.spark-yarn-client-core + jar + + + + com.dtstack.taier + taier-worker-plugin.base + + + + com.google.guava + guava + provided + + + + + org.eclipse.jetty + jetty-server + 9.3.19.v20170502 + + + + + org.apache.spark + spark-hive_2.11 + provided + + + + org.apache.spark + spark-core_2.11 + provided + + + org.apache.hadoop + hadoop-confHdfsPath + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-client + + + + + + org.apache.spark + spark-yarn_2.11 + provided + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hadoop + hadoop-yarn-api + + + + hadooop-yarn-server-web-proxy + org.apache.hadoop + + + + + + + org.apache.hadoop + hadoop-client + provided + + + + org.apache.hadoop + hadoop-yarn-client + provided + + + + org.apache.hadoop + hadoop-hdfs + provided + + + + + + + net.alchim31.maven + scala-maven-plugin + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + diff --git a/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkext/ClientExt.java b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkext/ClientExt.java new file mode 100644 index 0000000000..f28ad31404 --- /dev/null +++ b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkext/ClientExt.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.taier.sparkyarn.sparkext; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.client.api.YarnClient; +import org.apache.spark.SparkConf; +import org.apache.spark.deploy.yarn.ClientArguments; +import org.apache.spark.deploy.yarn.DtClient; + +/** + * 修改Saprk yarn client ---> 修改提交之前的配置包打包 + * Date: 2018/5/9 + * Company: www.dtstack.com + * + * @author xuchao + */ + +public class ClientExt extends DtClient { + + public ClientExt(ClientArguments args, Configuration hadoopConf, SparkConf sparkConf, YarnClient yarnClient) { + super(args, hadoopConf, sparkConf, yarnClient); + } +} \ No newline at end of file diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkext/ClientExtFactory.java b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkext/ClientExtFactory.java similarity index 77% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkext/ClientExtFactory.java rename to taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkext/ClientExtFactory.java index 1a77d6e29f..602ea0698d 100644 --- a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkext/ClientExtFactory.java +++ b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkext/ClientExtFactory.java @@ -18,8 +18,8 @@ package com.dtstack.taier.sparkyarn.sparkext; -import com.dtstack.taier.base.filesystem.FilesystemManager; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.client.api.YarnClient; import org.apache.spark.SparkConf; import org.apache.spark.deploy.yarn.ClientArguments; @@ -28,16 +28,17 @@ * Reason: * Date: 2019/1/21 * Company: www.dtstack.com + * * @author xuchao */ public class ClientExtFactory { - public static ClientExt getClientExt(FilesystemManager filesystemManager, - ClientArguments args, + public static ClientExt getClientExt(ClientArguments args, Configuration hadoopConf, - SparkConf sparkConf){ + SparkConf sparkConf, + YarnClient yarnClient) { - return new ClientExt(filesystemManager, args, hadoopConf, sparkConf); + return new ClientExt(args, hadoopConf, sparkConf, yarnClient); } } diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkJobLog.java b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkJobLog.java similarity index 100% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkJobLog.java rename to taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkJobLog.java diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkYarnClient.java b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkYarnClient.java similarity index 95% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkYarnClient.java rename to taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkYarnClient.java index e360f3ff62..a5cf6cd181 100644 --- a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkYarnClient.java +++ b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkYarnClient.java @@ -45,6 +45,7 @@ import com.dtstack.taier.sparkyarn.sparkext.ClientExt; import com.dtstack.taier.sparkyarn.sparkext.ClientExtFactory; import com.dtstack.taier.sparkyarn.sparkyarn.constant.AppEnvConstant; +import com.dtstack.taier.sparkyarn.sparkyarn.constant.SparkConstants; import com.dtstack.taier.sparkyarn.sparkyarn.file.SparkResourceUploader; import com.dtstack.taier.sparkyarn.sparkyarn.parser.AddJarOperator; import com.dtstack.taier.sparkyarn.sparkyarn.util.HadoopConf; @@ -67,7 +68,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.File; import java.io.IOException; import java.net.URLDecoder; import java.net.URLEncoder; @@ -94,8 +94,6 @@ public class SparkYarnClient extends AbstractClient { private static final String SPARK_YARN_MODE = "SPARK_YARN_MODE"; - private static final String IS_CARBON_SPARK_KEY = "isCarbondata"; - private static final String SESSION_CONF_KEY_PREFIX = "session."; private static final String KEY_DEFAULT_FILE_FORMAT = "hive.default.fileformat"; @@ -110,16 +108,12 @@ public class SparkYarnClient extends AbstractClient { private static final String KEY_PRE_STR = "spark."; - private static final String SPARK_JAVA_OPTS_KEY = "SPARK_JAVA_OPTS"; - private static final String PYTHON_RUNNER_CLASS = "org.apache.spark.deploy.PythonRunner"; private static final String PYTHON_RUNNER_DEPENDENCY_RES_KEY = "extRefResource"; private static final String CLUSTER_INFO_WS_FORMAT = "%s/ws/v1/cluster"; - private static final String USER_DIR = System.getProperty("user.dir"); - /** * 如果请求 CLUSTER_INFO_WS_FORMAT 返回信息包含该特征则表示是alive */ @@ -139,10 +133,6 @@ public class SparkYarnClient extends AbstractClient { private ThreadPoolExecutor threadPoolExecutor; - private static String userDir = System.getProperty("user.dir"); - - private static final String SPARK_CONF_DIR = "sparkconf"; - public static final String SPARK_LOG4J_FILE_NAME = "log4j-spark.properties"; public static final String SPARK_LOCAL_LOG4J_KEY = "spark_local_log4j_key"; @@ -249,15 +239,13 @@ private JobResult submitJobWithJar(JobClient jobClient) { ClientArguments clientArguments = new ClientArguments(argList.toArray(new String[argList.size()])); SparkConf sparkConf = buildBasicSparkConf(jobClient); sparkConf.setAppName(appName); - setSparkLog4jLocalFilePath(sparkConf, jobClient); fillExtSparkConf(sparkConf, jobClient.getConfProperties()); - setSparkLog4jConfiguration(sparkConf); + setSparkLog4j(jobClient,sparkConf); ApplicationId appId = null; try { - ClientExt clientExt = ClientExtFactory.getClientExt(filesystemManager, clientArguments, yarnConf, sparkConf); - clientExt.setSparkYarnConfig(sparkYarnConfig); + ClientExt clientExt = ClientExtFactory.getClientExt(clientArguments, yarnConf, sparkConf, yarnClient); String proxyUserName = sparkYarnConfig.getDtProxyUserName(); if (StringUtils.isNotBlank(proxyUserName)) { logger.info("jobId {} ugi proxyUser is {}", jobClient.getJobId(), proxyUserName); @@ -351,15 +339,12 @@ private JobResult submitPythonJob(JobClient jobClient) { sparkConf.set("spark.submit.pyFiles", pythonExtPath); sparkConf.setAppName(appName); - setSparkLog4jLocalFilePath(sparkConf, jobClient); fillExtSparkConf(sparkConf, jobClient.getConfProperties()); setSparkLog4jConfiguration(sparkConf); try { ClientArguments clientArguments = new ClientArguments(argList.toArray(new String[argList.size()])); - ClientExt clientExt = new ClientExt(filesystemManager, clientArguments, yarnConf, sparkConf); - clientExt.setSparkYarnConfig(sparkYarnConfig); - + ClientExt clientExt = new ClientExt(clientArguments, yarnConf, sparkConf, yarnClient); String proxyUserName = sparkYarnConfig.getDtProxyUserName(); if (StringUtils.isNotBlank(proxyUserName)) { logger.info("ugi proxyUser is {}", proxyUserName); @@ -459,15 +444,14 @@ private JobResult submitSparkSqlJobForBatch(JobClient jobClient) { ClientArguments clientArguments = new ClientArguments(argList.toArray(new String[argList.size()])); SparkConf sparkConf = buildBasicSparkConf(jobClient); sparkConf.setAppName(jobClient.getJobName()); - setSparkLog4jLocalFilePath(sparkConf, jobClient); + setSparkLog4j(jobClient, sparkConf); fillExtSparkConf(sparkConf, confProp); setSparkLog4jConfiguration(sparkConf); ApplicationId appId = null; try { - ClientExt clientExt = ClientExtFactory.getClientExt(filesystemManager, clientArguments, yarnConf, sparkConf); - clientExt.setSparkYarnConfig(sparkYarnConfig); + ClientExt clientExt = ClientExtFactory.getClientExt(clientArguments, yarnConf, sparkConf, yarnClient); String proxyUserName = sparkYarnConfig.getDtProxyUserName(); if (StringUtils.isNotBlank(proxyUserName)) { logger.info("ugi proxyUser is {}", proxyUserName); @@ -502,15 +486,6 @@ private Map getSparkSessionConf(Properties confProp) { return map; } - private void setSparkLog4jLocalFilePath(SparkConf sparkConf, JobClient jobClient) { - Properties confProp = jobClient.getConfProperties(); - String logLevel = MathUtil.getString(confProp.get(LOG_LEVEL_KEY), "info"); - String path = userDir + File.separator + SPARK_CONF_DIR + File.separator + logLevel.toLowerCase() + File.separator + SPARK_LOG4J_FILE_NAME; - File file = new File(path); - if (file.exists()) { - sparkConf.set(SPARK_LOCAL_LOG4J_KEY, path); - } - } private SparkConf buildBasicSparkConf(JobClient jobClient) { @@ -899,4 +874,37 @@ private YarnClient buildYarnClient() { } + + private void setSparkLog4j(JobClient jobClient, SparkConf sparkConf) { + Properties confProp = jobClient.getConfProperties(); + String logLevel = MathUtil.getString(confProp.get(SparkConstants.LOG_LEVEL_KEY), "info"); + sparkConf.set( + "spark.log4j.content", + StringUtils.replace(SparkConstants.SPARK_LOG4J_CONTENT, "INFO", logLevel)); + String log4jContent = SparkConstants.SPARK_JAVA_OPTIONS_LOG4J_CONTENT; + setSparkExtraJavaOptions(log4jContent, sparkConf); + } + + private void setSparkExtraJavaOptions(String options, SparkConf sparkConf) { + String driverExtraJavaOptions = + sparkConf.get(SparkConstants.SPARK_DRIVER_EXTRA_JAVA_OPTIONS, ""); + if (StringUtils.isBlank(driverExtraJavaOptions)) { + sparkConf.set(SparkConstants.SPARK_DRIVER_EXTRA_JAVA_OPTIONS, options); + } else { + sparkConf.set( + SparkConstants.SPARK_DRIVER_EXTRA_JAVA_OPTIONS, + driverExtraJavaOptions + " " + options); + } + String executorExtraJavaOptions = + sparkConf.get(SparkConstants.SPARK_EXECUTOR_EXTRA_JAVA_OPTIONS, ""); + if (StringUtils.isBlank(executorExtraJavaOptions)) { + sparkConf.set(SparkConstants.SPARK_EXECUTOR_EXTRA_JAVA_OPTIONS, options); + } else { + sparkConf.set( + SparkConstants.SPARK_EXECUTOR_EXTRA_JAVA_OPTIONS, + executorExtraJavaOptions + " " + options); + } + } + + } diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkYarnConfig.java b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkYarnConfig.java similarity index 100% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkYarnConfig.java rename to taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkYarnConfig.java diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkYarnResourceInfo.java b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkYarnResourceInfo.java similarity index 100% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkYarnResourceInfo.java rename to taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/SparkYarnResourceInfo.java diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/constant/AppEnvConstant.java b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/constant/AppEnvConstant.java similarity index 100% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/constant/AppEnvConstant.java rename to taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/constant/AppEnvConstant.java diff --git a/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/constant/SparkConstants.java b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/constant/SparkConstants.java new file mode 100644 index 0000000000..dc9128af8e --- /dev/null +++ b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/constant/SparkConstants.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.taier.sparkyarn.sparkyarn.constant; + +import java.util.ArrayList; +import java.util.Arrays; + +public class SparkConstants { + public static final String HADOOP_CONF = "__hadoop_conf__"; + + public static final String HIVE_SITE = "/hive-site.xml"; + + public static final String CORE_SITE = "/core-site.xml"; + + public static final String YARN_SITE = "/yarn-site.xml"; + + public static final ArrayList FILTER_PARAM = + new ArrayList<>( + Arrays.asList( + "fs.hdfs.impl.disable.cache", + "fs.file.impl.disable.cache", + "hive.execution.engine")); + + public static final String SPARK_LOG4J_CONTENT = + "log4j.rootLogger=INFO,Client\n" + + "log4j.logger.Client=INFO,Client\n" + + "log4j.additivity.Client = false\n" + + "log4j.appender.console.target=System.err\n" + + "log4j.appender.Client=org.apache.log4j.ConsoleAppender\n" + + "log4j.appender.Client.layout=org.apache.log4j.PatternLayout\n" + + "log4j.appender.Client.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n"; + + public static final String SPARK_JAVA_OPTIONS_LOG4J_CONTENT = + "-Dlog4j.configuration=./__spark_conf__/log4j.properties"; + + + public static final String SPARK_DRIVER_EXTRA_JAVA_OPTIONS = "spark.driver.extraJavaOptions"; + + public static final String SPARK_EXECUTOR_EXTRA_JAVA_OPTIONS = + "spark.executor.extraJavaOptions"; + + public static final String LOG_LEVEL_KEY = "logLevel"; +} \ No newline at end of file diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/enums/ExceptionInfoConstrant.java b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/enums/ExceptionInfoConstrant.java similarity index 100% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/enums/ExceptionInfoConstrant.java rename to taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/enums/ExceptionInfoConstrant.java diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/enums/Status.java b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/enums/Status.java similarity index 100% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/enums/Status.java rename to taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/enums/Status.java diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/file/ResourceCleaner.java b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/file/ResourceCleaner.java similarity index 100% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/file/ResourceCleaner.java rename to taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/file/ResourceCleaner.java diff --git a/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/file/SparkResourceUploader.java b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/file/SparkResourceUploader.java new file mode 100644 index 0000000000..d020d00b8c --- /dev/null +++ b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/file/SparkResourceUploader.java @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.taier.sparkyarn.sparkyarn.file; + +import com.dtstack.taier.base.filesystem.FilesystemManager; +import com.dtstack.taier.base.util.KerberosUtils; +import com.dtstack.taier.pluginapi.constrant.ConfigConstant; +import com.dtstack.taier.pluginapi.exception.ExceptionUtil; +import com.dtstack.taier.pluginapi.exception.PluginDefineException; +import com.dtstack.taier.sparkyarn.sparkyarn.SparkYarnConfig; +import com.dtstack.taier.sparkyarn.sparkyarn.SparkYarnResourceInfo; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.lang.reflect.Method; +import java.net.InetAddress; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; + +import static com.dtstack.taier.sparkyarn.sparkyarn.constant.SparkConstants.*; + +public class SparkResourceUploader { + + private static final Logger logger = LoggerFactory.getLogger(SparkResourceUploader.class); + + public static final String SP = File.separator; + + // default hdfs resource cleaner rate + public static final String SPARK_DEFAULT_CLEAR_RESOURCED_RATE = "30"; + + private final YarnConfiguration yarnConf; + + private final Properties sparkExtProp; + + private final SparkYarnConfig sparkYarnConfig; + + + + private final FilesystemManager filesystemManager; + + public SparkResourceUploader( + YarnConfiguration yarnConf, + SparkYarnConfig sparkYarnConfig, + Properties sparkExtProp, + FilesystemManager filesystemManager) { + this.yarnConf = yarnConf; + this.sparkExtProp = sparkExtProp; + this.sparkYarnConfig = sparkYarnConfig; + this.filesystemManager = filesystemManager; + } + + public void uploadSparkResource() { + Object sparkResourcesDirProp = sparkExtProp.get(SparkYarnResourceInfo.SPARK_RESOURCES_DIR); + if (sparkResourcesDirProp == null || StringUtils.isBlank(sparkResourcesDirProp.toString())) { + sparkResourcesDirProp = SparkYarnResourceInfo.DEFAULT_SPARK_RESOURCES_DIR; + } + final String sparkResourcesDir = sparkResourcesDirProp.toString(); + String md5sum = sparkYarnConfig.getMd5sum(); + String sparkClearResourceRate = + sparkExtProp + .getOrDefault( + SparkYarnResourceInfo.SPARK_CLEAR_RESOURCED_RATE, + SPARK_DEFAULT_CLEAR_RESOURCED_RATE) + .toString(); + try { + KerberosUtils.login( + sparkYarnConfig, + () -> { + try { + FileSystem fileSystem = FileSystem.get(yarnConf); + String hostName = InetAddress.getLocalHost().getHostName(); + String sparkResourcesDirHostName = + sparkResourcesDir + SparkResourceUploader.SP + hostName; + String sparkResourcesDirMd5sum = + sparkResourcesDir + + SparkResourceUploader.SP + + hostName + + SparkResourceUploader.SP + + md5sum; + ResourceCleaner.start( + fileSystem, + sparkResourcesDirHostName, + sparkResourcesDirMd5sum, + sparkClearResourceRate); + uploadHadoopConf(fileSystem, sparkResourcesDirMd5sum); + uploadSparkSqlProxy(fileSystem, sparkResourcesDirMd5sum); + uploadKerberosConf(fileSystem, sparkResourcesDirMd5sum); + + + } catch (IOException e) { + throw new PluginDefineException("upload hadoop conf", e); + } + return null; + }, + yarnConf); + } catch (Exception e) { + throw new PluginDefineException("upload hadoop conf", e); + } + } + + private void uploadSparkSqlProxy(FileSystem fileSystem, String sparkResourcesDirMd5sum) { + try { + Path localPath = new Path(getSqlProxyJarPath()); + logger.info("local path {}", localPath); + String sparkSqlProxyPath = sparkResourcesDirMd5sum + "/spark-sql-proxy.jar"; + Path remotePath = new Path(sparkSqlProxyPath); + fileSystem.copyFromLocalFile(localPath, remotePath); + sparkYarnConfig.setSparkSqlProxyPath(sparkSqlProxyPath); + } catch (IOException e) { + throw new PluginDefineException("upload spark sql proxy failed", e); + } + } + + private String getSqlProxyJarPath() { + String path = this.getClass().getProtectionDomain().getCodeSource().getLocation().getPath(); + + File pluginDir = new File(path).getParentFile().getParentFile(); + File[] sqlProxyDir = + pluginDir.listFiles( + (dir, name) -> + dir.isDirectory() + && name.toLowerCase().startsWith("spark-sql-proxy")); + if (sqlProxyDir != null && sqlProxyDir.length == 1) { + File[] sqlProxyJars = sqlProxyDir[0].listFiles(); + if (sqlProxyJars != null && sqlProxyJars.length == 1) { + String sqlProxyJar = sqlProxyJars[0].getName(); + if (sqlProxyJar.toLowerCase().startsWith("spark-sql-proxy") && sqlProxyJar.toLowerCase().endsWith(".jar")) { + return sqlProxyJars[0].getAbsolutePath(); + } + } + } + throw new PluginDefineException( + "Can not find spark sql proxy jar in path: " + pluginDir); + } + + + public void uploadKerberosConf(FileSystem fileSystem, String sparkResourcesDirMd5sum) { + if (sparkYarnConfig.isOpenKerberos()) { + try { + String keytab = KerberosUtils.getKeytabPath(sparkYarnConfig); + String krb5 = new File(keytab).getParent() + File.separator + ConfigConstant.KRB5_CONF; + String remoteKeytab = + sparkResourcesDirMd5sum + File.separator + new File(keytab).getName(); + String remoteKrb5 = + sparkResourcesDirMd5sum + File.separator + new File(krb5).getName(); + + fileSystem.copyFromLocalFile(new Path(keytab), new Path(remoteKeytab)); + fileSystem.copyFromLocalFile(new Path(krb5), new Path(remoteKrb5)); + sparkExtProp.setProperty(ConfigConstant.SPARK_KERBEROS_REMOTE_KEYTAB, remoteKeytab); + sparkExtProp.setProperty(ConfigConstant.SPARK_KERBEROS_REMOTE_KRB5, remoteKrb5); + } catch (IOException e) { + throw new PluginDefineException("upload kerberos conf failed", e); + } + } + } + + + public void uploadHadoopConf(FileSystem fileSystem, String sparkResourcesDirMd5sum) { + try { + Class clazz = Class.forName("org.apache.hadoop.conf.Configuration"); + Method method = clazz.getDeclaredMethod("getOverlay"); + method.setAccessible(true); + Properties yarnConfProp = (Properties) method.invoke(yarnConf); + Map yarnConfMap = new HashMap<>(); + for (Map.Entry yarnConfEntry : yarnConfProp.entrySet()) { + if (FILTER_PARAM.contains((String) yarnConfEntry.getKey())) { + continue; + } + yarnConfMap.put((String) yarnConfEntry.getKey(), yarnConfEntry.getValue()); + } + String coreSiteContent = getCoreSiteContent(yarnConfMap); + File tmpHadoopConfFileDir = + new File( + String.format( + "%s/%s/%s/%s", + System.getProperty("user.dir"), + "tmp", + "spark", + "local_hadoop_conf")); + if (!tmpHadoopConfFileDir.exists()) { + tmpHadoopConfFileDir.mkdirs(); + } + File tmpHadoopConfFile = + File.createTempFile( + sparkYarnConfig.getMd5sum() + "core-site.xml", + null, + tmpHadoopConfFileDir); + try (FileWriter fwrt = new FileWriter(tmpHadoopConfFile)) { + fwrt.write(coreSiteContent); + fwrt.flush(); + } catch (Exception e) { + logger.error("Write yarnConf error " + ExceptionUtil.getErrorMessage(e)); + tmpHadoopConfFile.delete(); + throw new PluginDefineException(e); + } + + String sparkHadoopConfDir = + sparkResourcesDirMd5sum + File.separator + HADOOP_CONF; + String hiveSite = sparkHadoopConfDir + HIVE_SITE; + String coreSite = sparkHadoopConfDir + CORE_SITE; + String yarnSite = sparkHadoopConfDir + YARN_SITE; + Path remoteHiveSitePath = new Path(hiveSite); + logger.info("Upload hive-site.xml to remote path {}", remoteHiveSitePath); + fileSystem.copyFromLocalFile(new Path(tmpHadoopConfFile.getPath()), remoteHiveSitePath); + fileSystem.setPermission(remoteHiveSitePath, new FsPermission((short) 0777)); + + Path remoteCoreSitePath = new Path(coreSite); + logger.info("Upload core-site.xml to remote path {}", remoteCoreSitePath); + fileSystem.copyFromLocalFile(new Path(tmpHadoopConfFile.getPath()), remoteCoreSitePath); + fileSystem.setPermission(remoteCoreSitePath, new FsPermission((short) 0777)); + + // upload yarn-site.xml + Path remoteYarnSitePath = new Path(yarnSite); + logger.info("Upload yarn-site.xml to remote path {}", remoteYarnSitePath); + fileSystem.copyFromLocalFile(new Path(tmpHadoopConfFile.getPath()), remoteYarnSitePath); + fileSystem.setPermission(remoteYarnSitePath, new FsPermission((short) 0777)); + + sparkExtProp.setProperty( + ConfigConstant.SPARK_HADOOP_CONF_REMOTE_DIR, sparkHadoopConfDir); + tmpHadoopConfFile.delete(); + + } catch (Exception e) { + throw new PluginDefineException("upload hadoop conf failed", e); + } + } + + private String getCoreSiteContent(Map hadoopConfMap) { + StringBuilder hadoopConfContent = new StringBuilder(); + hadoopConfContent + .append("") + .append(System.lineSeparator()); + hadoopConfContent + .append("") + .append(System.lineSeparator()); + hadoopConfContent.append("").append(System.lineSeparator()); + Iterator> it = hadoopConfMap.entrySet().iterator(); + while (it.hasNext()) { + Map.Entry e = it.next(); + String name = e.getKey(); + // xml文件校验&需要转换为xml文件可识别字符 + String value = e.getValue().toString().replaceAll("&", "&"); + hadoopConfContent.append(" ").append(System.lineSeparator()); + hadoopConfContent + .append(" ") + .append(name) + .append("") + .append(System.lineSeparator()); + hadoopConfContent + .append(" ") + .append(value) + .append("") + .append(System.lineSeparator()); + hadoopConfContent.append(" ").append(System.lineSeparator()); + } + hadoopConfContent.append("").append(System.lineSeparator()); + + return hadoopConfContent.toString(); + } +} diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/parser/AddJarOperator.java b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/parser/AddJarOperator.java similarity index 100% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/parser/AddJarOperator.java rename to taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/parser/AddJarOperator.java diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/util/HadoopConf.java b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/util/HadoopConf.java similarity index 100% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/util/HadoopConf.java rename to taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/util/HadoopConf.java diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/org/apache/hadoop/security/UserGroupInformation.java b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/org/apache/hadoop/security/UserGroupInformation.java similarity index 100% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/org/apache/hadoop/security/UserGroupInformation.java rename to taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/java/org/apache/hadoop/security/UserGroupInformation.java diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/scala/org/apache/spark/deploy/rest/DtRestSubmissionClient.scala b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/scala/org/apache/spark/deploy/rest/DtRestSubmissionClient.scala similarity index 100% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/scala/org/apache/spark/deploy/rest/DtRestSubmissionClient.scala rename to taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/scala/org/apache/spark/deploy/rest/DtRestSubmissionClient.scala diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/scala/org/apache/spark/deploy/yarn/DtClient.scala b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/scala/org/apache/spark/deploy/yarn/DtClient.scala similarity index 95% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/scala/org/apache/spark/deploy/yarn/DtClient.scala rename to taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/scala/org/apache/spark/deploy/yarn/DtClient.scala index 7e226f920f..66b43c0c9c 100644 --- a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/scala/org/apache/spark/deploy/yarn/DtClient.scala +++ b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/scala/org/apache/spark/deploy/yarn/DtClient.scala @@ -17,14 +17,8 @@ package org.apache.spark.deploy.yarn -import com.dtstack.taier.sparkyarn.sparkyarn.SparkYarnClient - -import java.io.{File, FileOutputStream, IOException, OutputStreamWriter} -import java.net.{InetAddress, URI, UnknownHostException} -import java.nio.ByteBuffer -import java.nio.charset.StandardCharsets -import java.util.zip.{ZipEntry, ZipOutputStream} -import java.util.{Properties, UUID} +import com.dtstack.taier.pluginapi.constrant.ConfigConstant +import com.dtstack.taier.sparkyarn.sparkyarn.constant.SparkConstants import com.google.common.base.Objects import com.google.common.io.Files import org.apache.hadoop.conf.Configuration @@ -43,7 +37,7 @@ import org.apache.hadoop.yarn.client.api.{YarnClient, YarnClientApplication} import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException import org.apache.hadoop.yarn.util.Records -import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.deploy.yarn.DtConfig._ import org.apache.spark.deploy.yarn.config._ import org.apache.spark.deploy.yarn.security.ConfigurableCredentialManager import org.apache.spark.internal.Logging @@ -52,6 +46,12 @@ import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle, YarnCommandBu import org.apache.spark.util.{CallerContext, Utils} import org.apache.spark.{SecurityManager, SparkConf, SparkException} +import java.io.{File, FileOutputStream, IOException, OutputStreamWriter} +import java.net.{InetAddress, URI, UnknownHostException} +import java.nio.ByteBuffer +import java.nio.charset.{Charset, StandardCharsets} +import java.util.zip.{ZipEntry, ZipOutputStream} +import java.util.{Properties, UUID} import scala.collection.JavaConverters._ import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, ListBuffer, Map} import scala.util.control.NonFatal @@ -60,15 +60,12 @@ import scala.util.{Failure, Success, Try} private[spark] class DtClient( val args: ClientArguments, val hadoopConf: Configuration, - val sparkConf: SparkConf) + val sparkConf: SparkConf, + val yarnClient: YarnClient) extends Logging { import DtClient._ - def this(clientArgs: ClientArguments, spConf: SparkConf) = - this(clientArgs, SparkHadoopUtil.get.newConfiguration(spConf), spConf) - - private val yarnClient = YarnClient.createYarnClient private val yarnConf = new YarnConfiguration(hadoopConf) val MEMORY_OVERHEAD_FACTOR = 0.10 @@ -104,6 +101,8 @@ private[spark] class DtClient( private var principal: String = null private var keytab: String = null private var credentials: Credentials = null + private var krb5: String = null + private var krb5FileName = KRB5FILENAME private var amKeytabFileName: String = null private val launcherBackend = new LauncherBackend() { @@ -146,14 +145,13 @@ private[spark] class DtClient( * available in the alpha API. */ def submitApplication(priority : Int = 0): ApplicationId = { + logInfo("submit action") var appId: ApplicationId = null try { launcherBackend.connect() // Setup the credentials before doing anything else, // so we have don't have issues at any point. setupCredentials() - yarnClient.init(yarnConf) - yarnClient.start() logInfo("Requesting a new application from cluster with %d NodeManagers" .format(yarnClient.getYarnClusterMetrics.getNumNodeManagers)) @@ -400,6 +398,27 @@ private[spark] class DtClient( UserGroupInformation.loginUserFromKeytab(userPrincipal, userKeytabPath) } + def setupCredentials(): Unit = { + loginFromKeytab = sparkConf.contains(PRINCIPAL.key) + if (loginFromKeytab) { + principal = sparkConf.get(PRINCIPAL).get + keytab = sparkConf.get(KEYTAB).orNull + krb5 = sparkConf.get(KRB5_CONF).orNull + + require(keytab != null, "Keytab must be specified when principal is specified.") + logInfo("Attempting to login to the Kerberos" + + s" using principal: $principal and keytab: $keytab") + val f = new File(keytab) + // Generate a file name that can be used for the keytab file, that does not conflict + // with any user file. + amKeytabFileName = f.getName + "-" + UUID.randomUUID().toString + sparkConf.set(PRINCIPAL.key, principal) + } + // Defensive copy of the credentials + + credentials = new Credentials(UserGroupInformation.getCurrentUser.getCredentials) + } + /** * Upload any resources to the distributed cache if needed. If a resource is intended to be * consumed locally, set up the appropriate config for downstream code to handle it properly. @@ -520,22 +539,42 @@ private[spark] class DtClient( } } - val log4jLocalPath = sparkConf.get(SparkYarnClient.SPARK_LOCAL_LOG4J_KEY, "") - if (!"".equals(log4jLocalPath)) { - distribute(log4jLocalPath, destName = Some(SparkYarnClient.SPARK_LOG4J_FILE_NAME)) + + val hadoopConfPath: String = sparkConf.get(ConfigConstant.SPARK_HADOOP_CONF_REMOTE_DIR).toString + distribute(hadoopConfPath, destName = Some(SparkConstants.HADOOP_CONF)) + + sparkConf.getOption(DtConfig.SPARK_UDFS_TO_DISTRIBUTE.key) match { + case Some(v) => v.split(",").foreach( + lib => { + val libPath = lib.split("/").last + distribute(lib, destName = Some(libPath), appMasterOnly = false) + } + ) + + case None => } // If we passed in a keytab, make sure we copy the keytab to the staging directory on // HDFS, and setup the relevant environment vars, so the AM can login again. + // 开启kerberos使用hdfs上的keytab路径 if (loginFromKeytab) { logInfo("To enable the AM to login from keytab, credentials are being copied over to the AM" + " via the YARN Secure Distributed Cache.") - val (_, localizedPath) = distribute(keytab, + val remoteKeytab: String = sparkConf.get(ConfigConstant.SPARK_KERBEROS_REMOTE_KEYTAB) + val (_, localizedPath) = distribute(remoteKeytab, destName = Some(amKeytabFileName), appMasterOnly = true) require(localizedPath != null, "Keytab file already distributed.") } + if (loginFromKeytab) { + logInfo("To enable the AM to login from krb5.conf, credentials are being copied over to the AM" + + " via the YARN Secure Distributed Cache.") + val (_, localizedPath) = distribute(krb5, + destName = Some(krb5FileName)) + require(localizedPath != null, "krb5.conf file already distributed.") + } + /** * Add Spark to the cache. There are two settings that control what files to add to the cache: * - if a Spark archive is defined, use the archive. The archive is expected to contain @@ -680,10 +719,16 @@ private[spark] class DtClient( val remoteFs = FileSystem.get(remoteConfArchivePath.toUri(), hadoopConf) sparkConf.set(CACHED_CONF_ARCHIVE, remoteConfArchivePath.toString()) - val localConfArchive = new Path(createConfArchive().toURI()) - copyFileToRemote(destDir, localConfArchive, replication, symlinkCache, force = true, - destName = Some(LOCALIZED_CONF_ARCHIVE)) - + val localConfArcFile = createConfArchive() + val localConfArchive = new Path(localConfArcFile.toURI()) + try { + copyFileToRemote(destDir, localConfArchive, replication, symlinkCache, force = true, + destName = Some(LOCALIZED_CONF_ARCHIVE)) + }finally { + if(!localConfArcFile.delete) { + logError("failed to delete file " + localConfArcFile.getAbsolutePath) + } + } // Manually add the config archive to the cache manager so that the AM is launched with // the proper files set up. distCacheMgr.addResource( @@ -698,98 +743,6 @@ private[spark] class DtClient( localResources } - def loadConfFromLocal(hadoopConfFiles: HashMap[String, File]): Unit ={ - Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR").foreach { envKey => - sys.env.get(envKey).foreach { path => - val dir = new File(path) - if (dir.isDirectory()) { - val files = dir.listFiles() - if (files == null) { - logWarning("Failed to list files under directory " + dir) - } else { - files.foreach { file => - if (file.isFile && !hadoopConfFiles.contains(file.getName())) { - hadoopConfFiles(file.getName()) = file - } - } - } - } - } - } - } - - def loadHadoopConf(hadoopConfFiles: HashMap[String, File]): Unit ={ - loadConfFromLocal(hadoopConfFiles) - } - - /** - * Create an archive with the config files for distribution. - * - * These will be used by AM and executors. The files are zipped and added to the job as an - * archive, so that YARN will explode it when distributing to AM and executors. This directory - * is then added to the classpath of AM and executor process, just to make sure that everybody - * is using the same default config. - * - * This follows the order of precedence set by the startup scripts, in which HADOOP_CONF_DIR - * shows up in the classpath before YARN_CONF_DIR. - * - * Currently this makes a shallow copy of the conf directory. If there are cases where a - * Hadoop config directory contains subdirectories, this code will have to be fixed. - * - * The archive also contains some Spark configuration. Namely, it saves the contents of - * SparkConf in a file to be loaded by the AM process. - */ - def createConfArchive(): File = { - val hadoopConfFiles = new HashMap[String, File]() - - // Uploading $SPARK_CONF_DIR/log4j.properties file to the distributed cache to make sure that - // the executors will use the latest configurations instead of the default values. This is - // required when user changes log4j.properties directly to set the log configurations. If - // configuration file is provided through --files then executors will be taking configurations - // from --files instead of $SPARK_CONF_DIR/log4j.properties. - - // Also uploading metrics.properties to distributed cache if exists in classpath. - // If user specify this file using --files then executors will use the one - // from --files instead. - for { prop <- Seq("log4j.properties", "metrics.properties") - url <- Option(Utils.getContextOrSparkClassLoader.getResource(prop)) - if url.getProtocol == "file" } { - hadoopConfFiles(prop) = new File(url.getPath) - } - - loadHadoopConf(hadoopConfFiles) - - val confArchive = File.createTempFile(LOCALIZED_CONF_DIR, ".zip", - new File(Utils.getLocalDir(sparkConf))) - val confStream = new ZipOutputStream(new FileOutputStream(confArchive)) - - try { - confStream.setLevel(0) - hadoopConfFiles.foreach { case (name, file) => - if (file.canRead()) { - confStream.putNextEntry(new ZipEntry(name)) - Files.copy(file, confStream) - confStream.closeEntry() - } - } - - // Save Spark configuration to a file in the archive. - val props = new Properties() - sparkConf.getAll.foreach { case (k, v) => props.setProperty(k, v) } - // Override spark.yarn.key to point to the location in distributed cache which will be used - // by AM. - Option(amKeytabFileName).foreach { k => props.setProperty(KEYTAB.key, k) } - confStream.putNextEntry(new ZipEntry(SPARK_CONF_FILE)) - val writer = new OutputStreamWriter(confStream, StandardCharsets.UTF_8) - props.store(writer, "Spark configuration.") - writer.flush() - confStream.closeEntry() - } finally { - confStream.close() - } - confArchive - } - /** * Set up the environment for launching our ApplicationMaster container. */ @@ -808,9 +761,6 @@ private[spark] class DtClient( logInfo(s"Credentials file set to: $credentialsFile") } - // env required by python process - sparkConf.getExecutorEnv.foreach(user_env => (env(user_env._1) = user_env._2)) - // Pick up any environment variables for the AM provided through spark.yarn.appMasterEnv.* val amEnvPrefix = "spark.yarn.appMasterEnv." sparkConf.getAll @@ -1072,24 +1022,52 @@ private[spark] class DtClient( amContainer } - def setupCredentials(): Unit = { - loginFromKeytab = sparkConf.contains(PRINCIPAL.key) - if (loginFromKeytab) { - principal = sparkConf.get(PRINCIPAL).get - keytab = sparkConf.get(KEYTAB).orNull + /** + * Create an archive with the config files for distribution. + * + * These will be used by AM and executors. The files are zipped and added to the job as an + * archive, so that YARN will explode it when distributing to AM and executors. This directory + * is then added to the classpath of AM and executor process, just to make sure that everybody + * is using the same default config. + * + * This follows the order of precedence set by the startup scripts, in which HADOOP_CONF_DIR + * shows up in the classpath before YARN_CONF_DIR. + * + * Currently this makes a shallow copy of the conf directory. If there are cases where a + * Hadoop config directory contains subdirectories, this code will have to be fixed. + * + * The archive also contains some Spark configuration. Namely, it saves the contents of + * SparkConf in a file to be loaded by the AM process. + */ + def createConfArchive(): File = { - require(keytab != null, "Keytab must be specified when principal is specified.") - logInfo("Attempting to login to the Kerberos" + - s" using principal: $principal and keytab: $keytab") - val f = new File(keytab) - // Generate a file name that can be used for the keytab file, that does not conflict - // with any user file. - amKeytabFileName = f.getName + "-" + UUID.randomUUID().toString - sparkConf.set(PRINCIPAL.key, principal) - } - // Defensive copy of the credentials + val confArchive = File.createTempFile(LOCALIZED_CONF_DIR, ".zip", + new File(Utils.getLocalDir(sparkConf))) + val confStream = new ZipOutputStream(new FileOutputStream(confArchive)) - credentials = new Credentials(UserGroupInformation.getCurrentUser.getCredentials) + try { + confStream.setLevel(0) + + confStream.putNextEntry(new ZipEntry("log4j.properties")) + val sparkLog4jContent: String = sparkConf.get("spark.log4j.content") + confStream.write(sparkLog4jContent.getBytes(Charset.forName("UTF-8"))) + confStream.closeEntry() + + // Save Spark configuration to a file in the archive. + val props = new Properties() + sparkConf.getAll.foreach { case (k, v) => props.setProperty(k, v) } + // Override spark.yarn.key to point to the location in distributed cache which will be used + // by AM. + Option(amKeytabFileName).foreach { k => props.setProperty(KEYTAB.key, k) } + confStream.putNextEntry(new ZipEntry(SPARK_CONF_FILE)) + val writer = new OutputStreamWriter(confStream, StandardCharsets.UTF_8) + props.store(writer, "Spark configuration.") + writer.flush() + confStream.closeEntry() + } finally { + confStream.close() + } + confArchive } /** @@ -1250,23 +1228,8 @@ private[spark] class DtClient( private object DtClient extends Logging { - def main(argStrings: Array[String]) { - if (!sys.props.contains("SPARK_SUBMIT")) { - logWarning("WARNING: This client is deprecated and will be removed in a " + - "future version of Spark. Use ./bin/spark-submit with \"--master yarn\"") - } - - // Set an env variable indicating we are running in YARN mode. - // Note that any env variable with the SPARK_ prefix gets propagated to all (remote) processes - System.setProperty("SPARK_YARN_MODE", "true") - val sparkConf = new SparkConf - // SparkSubmit would use yarn cache to distribute files & jars in yarn mode, - // so remove them from sparkConf here for yarn mode. - sparkConf.remove("spark.jars") - sparkConf.remove("spark.files") - val args = new ClientArguments(argStrings) - new DtClient(args, sparkConf).run() - } + // Subdirectory in the conf directory containing Hadoop config files. + val LOCALIZED_HADOOP_CONF_DIR = "__hadoop_conf__" // Alias for the user jar val APP_JAR_NAME: String = "__app__.jar" @@ -1289,6 +1252,24 @@ private object DtClient extends Logging { // Distribution-defined classpath to add to processes val ENV_DIST_CLASSPATH = "SPARK_DIST_CLASSPATH" + def main(argStrings: Array[String]) { + if (!sys.props.contains("SPARK_SUBMIT")) { + logWarning("WARNING: This client is deprecated and will be removed in a " + + "future version of Spark. Use ./bin/spark-submit with \"--master yarn\"") + } + + // Set an env variable indicating we are running in YARN mode. + // Note that any env variable with the SPARK_ prefix gets propagated to all (remote) processes + System.setProperty("SPARK_YARN_MODE", "true") + val sparkConf = new SparkConf + // SparkSubmit would use yarn cache to distribute files & jars in yarn mode, + // so remove them from sparkConf here for yarn mode. + sparkConf.remove("spark.jars") + sparkConf.remove("spark.files") + val args = new ClientArguments(argStrings) +// new DtClient(args, sparkConf, yarnClient = yarnClient).run() + } + // Subdirectory where the user's Spark and Hadoop config files will be placed. val LOCALIZED_CONF_DIR = "__spark_conf__" @@ -1392,6 +1373,9 @@ private object DtClient extends Logging { addClasspathEntry(getClusterPath(sparkConf, cp), env) } + addClasspathEntry( + buildPath(Environment.PWD.$$(), LOCALIZED_HADOOP_CONF_DIR), env) + addClasspathEntry(YarnSparkHadoopUtil.expandEnvironment(Environment.PWD), env) addClasspathEntry( diff --git a/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/scala/org/apache/spark/deploy/yarn/DtConfig.scala b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/scala/org/apache/spark/deploy/yarn/DtConfig.scala new file mode 100644 index 0000000000..c207a4131c --- /dev/null +++ b/taier-worker/taier-worker-plugin/spark/spark-yarn-client-core/src/main/scala/org/apache/spark/deploy/yarn/DtConfig.scala @@ -0,0 +1,14 @@ +package org.apache.spark.deploy.yarn + +import org.apache.spark.internal.config.ConfigBuilder + +object DtConfig { + val SPARK_UDFS_TO_DISTRIBUTE = ConfigBuilder("spark.udfs.jars") + .stringConf + .createOptional + private[spark] val KRB5_CONF = ConfigBuilder("spark.kerberos.remotekrb5") + .doc("Location of user's krb5.conf.") + .stringConf.createOptional + private[spark] var KRB5FILENAME: String = "krb5.conf" + +} diff --git a/taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark210-core/pom.xml b/taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark210-core/pom.xml new file mode 100644 index 0000000000..ccd2dd6ad7 --- /dev/null +++ b/taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark210-core/pom.xml @@ -0,0 +1,45 @@ + + + + taier-worker-plugin.spark + com.dtstack.taier + 1.0.0 + ../pom.xml + + 4.0.0 + + taier-worker-plugin.spark.yarn-hdfs-spark210-core + jar + + + UTF-8 + 2.1.3 + 2.11.8 + 3.2.2 + + + + + + com.dtstack.taier + taier-worker-plugin.spark.spark-yarn-client-core + 1.0.0 + + + + + org.apache.spark + spark-hive_2.11 + provided + + + + + org.apache.hadoop + hadoop-common + provided + + + \ No newline at end of file diff --git a/taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark320-core/pom.xml b/taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark320-core/pom.xml new file mode 100644 index 0000000000..58a6ef2c0e --- /dev/null +++ b/taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark320-core/pom.xml @@ -0,0 +1,125 @@ + + + + taier-worker-plugin.spark + com.dtstack.taier + 1.0.0 + ../pom.xml + + 4.0.0 + + taier-worker-plugin.spark.yarn-hdfs-spark320-core + jar + + + UTF-8 + 3.2.2 + 2.12.8 + 3.2.2 + + + + + taier-worker-plugin.spark.yarn-hdfs-spark210-core + com.dtstack.taier + 1.0.0 + + + + com.google.guava + guava + 14.0.1 + + + + + org.eclipse.jetty + jetty-server + 9.3.19.v20170502 + + + + org.scala-lang + scala-library + ${scala.version} + + + + + org.apache.spark + spark-hive_2.12 + ${spark.version} + provided + + + log4j + apache-log4j-extras + + + + + + org.apache.spark + spark-core_2.12 + ${spark.version} + provided + + + org.apache.hadoop + hadoop-confHdfsPath + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-client + + + + + + org.jsoup + jsoup + 1.10.3 + + + + org.apache.spark + spark-yarn_2.12 + ${spark.version} + provided + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hadoop + hadoop-yarn-api + + + hadooop-yarn-server-web-proxy + org.apache.hadoop + + + + + + + + + net.alchim31.maven + scala-maven-plugin + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + \ No newline at end of file diff --git a/taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark320-core/src/main/java/com/dtstack/taier/spark/common/constant/ConfigConstant.java b/taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark320-core/src/main/java/com/dtstack/taier/spark/common/constant/ConfigConstant.java new file mode 100644 index 0000000000..2e33b814ab --- /dev/null +++ b/taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark320-core/src/main/java/com/dtstack/taier/spark/common/constant/ConfigConstant.java @@ -0,0 +1,57 @@ +package com.dtstack.taier.spark.common.constant; + +public class ConfigConstant { + + public static final String HIVE_METASTORE_URIS = "hive.metastore.uris"; + + public static final String KEY_DEFAULT_FILE_FORMAT = "hive.default.fileformat"; + + public static final String SPARK_KERBEROS_REMOTE_KRB5 = "spark.kerberos.remotekrb5"; + + public static final String SPARK_KERBEROS_REMOTE_KEYTAB = "spark.kerberos.remotekeytab"; + + public static final String SPARK_HADOOP_CONF_REMOTE_DIR = "spark.hadoopconf.remotedir"; + + public static final String SPARK_CLEAR_RESOURCED_RATE = "spark.clear.resource.rate"; + + public static final String SPARK_RESOURCES_DIR = "spark.resources.dir"; + + public static final String SPARK_DRIVER_EXTRA_JAVA_OPTIONS = "spark.driver.extraJavaOptions"; + + public static final String SPARK_EXECUTOR_EXTRA_JAVA_OPTIONS = + "spark.executor.extraJavaOptions"; + + public static final String DRIVER_CORE_KEY = "driver.cores"; + + public static final String DRIVER_MEM_KEY = "driver.memory"; + + public static final String DRIVER_MEM_OVERHEAD_KEY = "yarn.driver.memoryOverhead"; + + public static final String EXECUTOR_INSTANCES_KEY = "executor.instances"; + + public static final String EXECUTOR_MEM_KEY = "executor.memory"; + + public static final String EXECUTOR_CORES_KEY = "executor.cores"; + + public static final String EXECUTOR_MEM_OVERHEAD_KEY = "yarn.executor.memoryOverhead"; + + public static final String SPARK_RANGER_CONF_REMOTE_DIR = "spark.ranger.conf.remote.dir"; + + public static final String SPARK_RANGER_ENABLED = "spark.ranger.enabled"; + + public static final String SPARK_SQL_EXTENSIONS = "spark.sql.extensions"; + + public static final String HTTP_AUTHENTICATION_TOKEN_KEY = "http.authentication.token"; + + public static final String SPARK_EVENT_LOG_DIR = "spark.eventLog.dir"; + + public static final String SPARK_LOCAL_SPARK_HOME = "spark.local.spark.home"; + + public static final String SPARK_PROMETHEUS_SINK_JAR_PATH = "spark.prometheus.sink.jar.path"; + + public static final String SPARK_YARN_ARCHIVE = "spark.yarn.archive"; + + public static final String SPARK_PYTHON_EXT_LIB_PATH = "spark.python.extLib.path"; + + private ConfigConstant() {} +} diff --git a/taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark320-core/src/main/java/com/dtstack/taier/spark/common/constant/SparkConstants.java b/taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark320-core/src/main/java/com/dtstack/taier/spark/common/constant/SparkConstants.java new file mode 100644 index 0000000000..ca28b1e2f8 --- /dev/null +++ b/taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark320-core/src/main/java/com/dtstack/taier/spark/common/constant/SparkConstants.java @@ -0,0 +1,145 @@ +package com.dtstack.taier.spark.common.constant; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; + +public class SparkConstants { + + public static final String SPARK = "spark"; + + public static final String SPLIT = "_"; + + public static final String SP = File.separator; + + public static final String USER_DIR = System.getProperty("user.dir"); + + public static final String TMP_DIR = USER_DIR + SP + "tmp"; + + public static final String GRAMMAR = "grammar"; + + public static final String TYPE_NAME_KEY = "typeName"; + + public static final String MODEL_PARAM = "modelParam"; + + public static final String APP_ENV = "--app-env"; + + public static final String HDFS_PREFIX = "hdfs://"; + + public static final String HTTP_PREFIX = "http://"; + + public static final String KEY_DEFAULT_FILE_FORMAT = "hive.default.fileformat"; + + public static final String DEFAULT_FILE_FORMAT = "orc"; + + public static final String LOG_LEVEL_KEY = "logLevel"; + + public static final String HADOOP_CONF = "__hadoop_conf__"; + + public static final String HIVE_SITE = "/hive-site.xml"; + + public static final String CORE_SITE = "/core-site.xml"; + + public static final String YARN_SITE = "/yarn-site.xml"; + + public static final String UDF_JAR = "udf_jar"; + + public static final String IS_CARBON_SPARK_KEY = "isCarbondata"; + + public static final String SESSION_CONF_KEY_PREFIX = "session."; + + public static final String SPARK_CONFIG_PREFIX = "spark."; + + public static final String PYTHON_RUNNER_CLASS = "org.apache.spark.deploy.PythonRunner"; + + public static final String PYTHON_RUNNER_DEPENDENCY_RES_KEY = "extRefResource"; + + public static final String SPARK_LOCAL_LOG4J_KEY = "spark_local_log4j_key"; + + public static final String SPARK_CONF_DIR = "sparkconf"; + + public static final String SPARK_LOG4J_FILE_NAME = "log4j-spark.properties"; + + public static final int DEFAULT_CORES = 1; + + public static final int DEFAULT_INSTANCES = 1; + + public static final int DEFAULT_MEM = 512; + + public static final int DEFAULT_MEM_OVERHEAD = 384; + + public static final String DEFAULT_SPARK_YARN_ARCHIVE = "%s/sparkjars/jars"; + + public static final String DEFAULT_SPARK_SQL_PROXY_JAR_PATH = + "%s/user/spark/spark-sql-proxy.jar"; + + public static final String DEFAULT_SPARK_PYTHON_EXTLIBPATH = + "%s/pythons/pyspark.zip,/pythons/py4j-0.10.4-src.zip"; + + public static final String DEFAULT_SPARK_SQL_PROXY_MAIN_CLASS = + "com.dtstack.engine.spark.sql.SqlProxy"; + + public static final String DEFAULT_CARBON_SQL_PROXY_MAIN_CLASS = + "com.dtstack.engine.spark.sql.CarbondataSqlProxy"; + + public static final String HIVE_CONF_NAME = "hive-site.xml"; + + public static final String CARBON_HIVE_CONF_NAME = "carbon-hive-site.xml"; + + public static final String DEFAULT_APP_NAME = "spark_default_name"; + + public static final String SQL_KEY = "sql"; + + public static final String APP_NAME_KEY = "appName"; + + public static final String SPARK_SESSION_CONF_KEY = "sparkSessionConf"; + + // ------------------------------------------------------------------------ + // Kerberos Configs + // ------------------------------------------------------------------------ + public static final String SPARK_JAVAOPTIONS = "-Djava.security.krb5.conf=./krb5.conf"; + + public static final String SPARK_JAVA_OPTIONS_KRB5CONF = + "-Djava.security.krb5.conf=./krb5.conf"; + + public static final String SPARK_JAVA_OPTIONS_LOG4J_CONTENT = + "-Dlog4j.configuration=./__spark_conf__/log4j.properties"; + + // 默认hdfs resource文件清除频率 + public static final String SPARK_DEFAULT_CLEAR_RESOURCED_RATE = "30"; + + public static final String SPARK_LOCAL_TMP = "spark_local_tmp"; + + public static final String SPARK_LOG4J_CONTENT = + "log4j.rootLogger=INFO,Client\n" + + "log4j.logger.Client=INFO,Client\n" + + "log4j.additivity.Client = false\n" + + "log4j.appender.console.target=System.err\n" + + "log4j.appender.Client=org.apache.log4j.ConsoleAppender\n" + + "log4j.appender.Client.layout=org.apache.log4j.PatternLayout\n" + + "log4j.appender.Client.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n"; + + public static final String LOCAL_KEYTAB_DIR_PARENT = USER_DIR + "/kerberos/keytab"; + + public static final String RANGER_SECURITY = "/ranger-spark-security.xml"; + + public static final String RANGER_AUDIT = "/ranger-spark-audit.xml"; + + public static final String XML_SUFFIX = ".xml"; + + public static final String RANGER = "ranger"; + + public static final String TMP_RANGER_FILE_PATH = USER_DIR + "/tmp/tmpRangerConf"; + + public static final String DEFAULT_SPARK_PROMETHEUS_SINK_JAR_PATH = + "/opt/dtstack/DTSpark2.4/spark_pkg/jars/spark-prometheus-sink-2.4.8-dt.jar"; + + public static final ArrayList FILTER_PARAM = + new ArrayList<>( + Arrays.asList( + "fs.hdfs.impl.disable.cache", + "fs.file.impl.disable.cache", + "hive.execution.engine")); + + private SparkConstants() {} +} diff --git a/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/spark-yarn-client/src/main/java/org/apache/hadoop/security/UserGroupInformation.java b/taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark320-core/src/main/java/org/apache/hadoop/security/UserGroupInformation.java similarity index 54% rename from taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/spark-yarn-client/src/main/java/org/apache/hadoop/security/UserGroupInformation.java rename to taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark320-core/src/main/java/org/apache/hadoop/security/UserGroupInformation.java index cc34f4bc5f..ac08ef4bd8 100644 --- a/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/spark-yarn-client/src/main/java/org/apache/hadoop/security/UserGroupInformation.java +++ b/taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark320-core/src/main/java/org/apache/hadoop/security/UserGroupInformation.java @@ -18,18 +18,15 @@ package org.apache.hadoop.security; import com.google.common.annotations.VisibleForTesting; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.retry.RetryPolicies; +import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; -import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; -import org.apache.hadoop.metrics2.lib.MetricsRegistry; -import org.apache.hadoop.metrics2.lib.MutableQuantiles; -import org.apache.hadoop.metrics2.lib.MutableRate; +import org.apache.hadoop.metrics2.lib.*; import org.apache.hadoop.security.SaslRpcServer.AuthMethod; import org.apache.hadoop.security.authentication.util.KerberosUtil; import org.apache.hadoop.security.token.Token; @@ -37,38 +34,32 @@ import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import javax.security.auth.DestroyFailedException; import javax.security.auth.Subject; import javax.security.auth.callback.CallbackHandler; import javax.security.auth.kerberos.KerberosPrincipal; import javax.security.auth.kerberos.KerberosTicket; -import javax.security.auth.kerberos.KeyTab; import javax.security.auth.login.AppConfigurationEntry; import javax.security.auth.login.AppConfigurationEntry.LoginModuleControlFlag; +import javax.security.auth.login.Configuration.Parameters; import javax.security.auth.login.LoginContext; import javax.security.auth.login.LoginException; import javax.security.auth.spi.LoginModule; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.lang.reflect.UndeclaredThrowableException; -import java.security.AccessControlContext; -import java.security.AccessController; -import java.security.Principal; -import java.security.PrivilegedAction; -import java.security.PrivilegedActionException; -import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.security.*; +import java.util.*; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.*; +import static org.apache.hadoop.security.UGIExceptionMessages.*; import static org.apache.hadoop.util.PlatformName.IBM_JAVA; /** @@ -77,19 +68,64 @@ * user's username and groups. It supports both the Windows, Unix and Kerberos * login modules. */ -@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce", "HBase", "Hive", "Oozie"}) +@InterfaceAudience.Public @InterfaceStability.Evolving public class UserGroupInformation { - private static final Log LOG = LogFactory.getLog(UserGroupInformation.class); + /**Environment variable pointing to the token cache file*/ + public static final String HADOOP_TOKEN_FILE_LOCATION = + "HADOOP_TOKEN_FILE_LOCATION"; + /** * Percentage of the ticket window to use before we renew ticket. */ private static final float TICKET_RENEW_WINDOW = 0.80f; private static boolean shouldRenewImmediatelyForTests = false; - private static ThreadLocal> threadLocal = new ThreadLocal<>(); - static final String HADOOP_USER_NAME = "HADOOP_USER_NAME"; static final String HADOOP_PROXY_USER = "HADOOP_PROXY_USER"; + @VisibleForTesting + static final Logger LOG = LoggerFactory.getLogger( + UserGroupInformation.class); + /** + * Information about the logged in user. + */ + private static final AtomicReference loginUserRef = + new AtomicReference<>(); + private static final boolean windows = + System.getProperty("os.name").startsWith("Windows"); + private static final boolean is64Bit = + System.getProperty("os.arch").contains("64") || + System.getProperty("os.arch").contains("s390x"); + + /** Metrics to track UGI activity */ + static UgiMetrics metrics = UgiMetrics.create(); + /** The auth method to use */ + private static AuthenticationMethod authenticationMethod; + /** Server-side groups fetching service */ + private static Groups groups; + private static final boolean aix = System.getProperty("os.name").equals("AIX"); + /** Min time (in seconds) before relogin for Kerberos */ + private static long kerberosMinSecondsBeforeRelogin; + /** The configuration to use */ + + private static Configuration conf; + + /** + * Create a UserGroupInformation for the given subject. + * This does not change the subject or acquire new credentials. + * + * The creator of subject is responsible for renewing credentials. + * @param subject the user's subject + */ + UserGroupInformation(Subject subject) { + this.subject = subject; + // do not access ANY private credentials since they are mutable + // during a relogin. no principal locking necessary since + // relogin/logout does not remove User principal. + this.user = subject.getPrincipals(User.class).iterator().next(); + if (user == null || user.getName() == null) { + throw new IllegalStateException("Subject does not contain a valid User"); + } + } /** * For the purposes of unit tests, we want to test login @@ -98,191 +134,54 @@ public class UserGroupInformation { * @param immediate true if we should login without waiting for ticket window */ @VisibleForTesting - static void setShouldRenewImmediatelyForTests(boolean immediate) { + public static void setShouldRenewImmediatelyForTests(boolean immediate) { shouldRenewImmediatelyForTests = immediate; } - public static void setThreadLocalData(String key, String val){ - Map dataMap = threadLocal.get(); - if(dataMap == null){ - dataMap = new HashMap<>(); - } - - dataMap.put(key, val); - threadLocal.set(dataMap); - } - - /** - * UgiMetrics maintains UGI activity statistics - * and publishes them through the metrics interfaces. + /** + * Reattach the class's metrics to a new metric system. */ - @Metrics(about="User and group related metrics", context="ugi") - static class UgiMetrics { - final MetricsRegistry registry = new MetricsRegistry("UgiMetrics"); - - @Metric("Rate of successful kerberos logins and latency (milliseconds)") - MutableRate loginSuccess; - @Metric("Rate of failed kerberos logins and latency (milliseconds)") - MutableRate loginFailure; - @Metric("GetGroups") MutableRate getGroups; - MutableQuantiles[] getGroupsQuantiles; - - static UgiMetrics create() { - return DefaultMetricsSystem.instance().register(new UgiMetrics()); - } - - void addGetGroups(long latency) { - getGroups.add(latency); - if (getGroupsQuantiles != null) { - for (MutableQuantiles q : getGroupsQuantiles) { - q.add(latency); - } - } - } + public static void reattachMetrics() { + UgiMetrics.reattach(); } - + /** - * A login module that looks at the Kerberos, Unix, or Windows principal and - * adds the corresponding UserName. + * Set the static configuration for UGI. + * In particular, set the security authentication mechanism and the + * group look up service. + * @param conf the configuration to use */ - @InterfaceAudience.Private - public static class HadoopLoginModule implements LoginModule { - private Subject subject; - - @Override - public boolean abort() throws LoginException { - return true; - } - - private T getCanonicalUser(Class cls) { - for(T user: subject.getPrincipals(cls)) { - return user; - } - return null; - } - - @Override - public boolean commit() throws LoginException { - if (LOG.isDebugEnabled()) { - LOG.debug("hadoop login commit"); - } - // if we already have a user, we are done. - if (!subject.getPrincipals(User.class).isEmpty()) { - if (LOG.isDebugEnabled()) { - LOG.debug("using existing subject:"+subject.getPrincipals()); - } - return true; - } - Principal user = null; - // if we are using kerberos, try it out - if (isAuthenticationMethodEnabled(AuthenticationMethod.KERBEROS)) { - user = getCanonicalUser(KerberosPrincipal.class); - if (LOG.isDebugEnabled()) { - LOG.debug("using kerberos user:"+user); - } - } - //If we don't have a kerberos user and security is disabled, check - //if user is specified in the environment or properties - if (!isSecurityEnabled() && (user == null)) { - - String envUser = null; - Map data = threadLocal.get(); - if(data != null){ - envUser = data.get(HADOOP_USER_NAME); - }else{ - envUser = System.getenv(HADOOP_USER_NAME); - - } - - if (envUser == null) { - envUser = System.getProperty(HADOOP_USER_NAME); - } - user = envUser == null ? null : new User(envUser); - } - // use the OS user - if (user == null) { - user = getCanonicalUser(OS_PRINCIPAL_CLASS); - if (LOG.isDebugEnabled()) { - LOG.debug("using local user:"+user); - } - } - // if we found the user, add our principal - if (user != null) { - if (LOG.isDebugEnabled()) { - LOG.debug("Using user: \"" + user + "\" with name " + user.getName()); - } - - User userEntry = null; - try { - userEntry = new User(user.getName()); - } catch (Exception e) { - throw (LoginException)(new LoginException(e.toString()).initCause(e)); - } - if (LOG.isDebugEnabled()) { - LOG.debug("User entry: \"" + userEntry.toString() + "\"" ); - } - - subject.getPrincipals().add(userEntry); - return true; - } - LOG.error("Can't find user in " + subject); - throw new LoginException("Can't find user name"); - } - - @Override - public void initialize(Subject subject, CallbackHandler callbackHandler, - Map sharedState, Map options) { - this.subject = subject; - } - - @Override - public boolean login() throws LoginException { - if (LOG.isDebugEnabled()) { - LOG.debug("hadoop login"); - } - return true; - } - - @Override - public boolean logout() throws LoginException { - if (LOG.isDebugEnabled()) { - LOG.debug("hadoop logout"); - } - return true; - } + @InterfaceAudience.Public + @InterfaceStability.Evolving + public static void setConfiguration(Configuration conf) { + initialize(conf, true); } - /** Metrics to track UGI activity */ - static UgiMetrics metrics = UgiMetrics.create(); - /** The auth method to use */ - private static AuthenticationMethod authenticationMethod; - /** Server-side groups fetching service */ - private static Groups groups; - /** The configuration to use */ - private static Configuration conf; + public static boolean isInitialized() { + return conf != null; + } - - /** Leave 10 minutes between relogin attempts. */ - private static final long MIN_TIME_BEFORE_RELOGIN = 10 * 60 * 1000L; - - /**Environment variable pointing to the token cache file*/ - public static final String HADOOP_TOKEN_FILE_LOCATION = - "HADOOP_TOKEN_FILE_LOCATION"; - - /** + /** * A method to initialize the fields that depend on a configuration. * Must be called before useKerberos or groups is used. */ private static void ensureInitialized() { - if (conf == null) { + if (!isInitialized()) { synchronized(UserGroupInformation.class) { - if (conf == null) { // someone might have beat us + if (!isInitialized()) { // someone might have beat us initialize(new Configuration(), false); } } } } + @InterfaceAudience.Private + @InterfaceStability.Evolving + private static boolean isAuthenticationMethodEnabled(AuthenticationMethod method) { + ensureInitialized(); + return (authenticationMethod == method); + } + /** * Initialize UGI and related classes. * @param conf the configuration to use @@ -295,9 +194,19 @@ private static synchronized void initialize(Configuration conf, HadoopKerberosName.setConfiguration(conf); } catch (IOException ioe) { throw new RuntimeException( - "Problem with Kerberos auth_to_local name configuration", ioe); + "Problem with Kerberos auth_to_local name configuration", ioe); } } + try { + kerberosMinSecondsBeforeRelogin = 1000L * conf.getLong( + HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN, + HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN_DEFAULT); + } + catch(NumberFormatException nfe) { + throw new IllegalArgumentException("Invalid attribute value for " + + HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN + " of " + + conf.get(HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN)); + } // If we haven't set up testing groups, use the configuration to find it if (!(groups instanceof TestingGroups)) { groups = Groups.getUserToGroupsMappingService(conf); @@ -311,90 +220,57 @@ private static synchronized void initialize(Configuration conf, MutableQuantiles[] getGroupsQuantiles = new MutableQuantiles[length]; for (int i = 0; i < length; i++) { getGroupsQuantiles[i] = metrics.registry.newQuantiles( - "getGroups" + intervals[i] + "s", - "Get groups", "ops", "latency", intervals[i]); + "getGroups" + intervals[i] + "s", + "Get groups", "ops", "latency", intervals[i]); } metrics.getGroupsQuantiles = getGroupsQuantiles; } } } - /** - * Set the static configuration for UGI. - * In particular, set the security authentication mechanism and the - * group look up service. - * @param conf the configuration to use - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public static void setConfiguration(Configuration conf) { - initialize(conf, true); - } - + private final Subject subject; + // All non-static fields must be read-only caches that come from the subject. + private final User user; + + private static String OS_LOGIN_MODULE_NAME; + private static Class OS_PRINCIPAL_CLASS; + @InterfaceAudience.Private @VisibleForTesting - static void reset() { + public static void reset() { authenticationMethod = null; conf = null; groups = null; + kerberosMinSecondsBeforeRelogin = 0; setLoginUser(null); HadoopKerberosName.setRules(null); } - + /** * Determine if UserGroupInformation is using Kerberos to determine * user identities or is relying on simple authentication - * + * * @return true if UGI is working in a secure environment */ public static boolean isSecurityEnabled() { return !isAuthenticationMethodEnabled(AuthenticationMethod.SIMPLE); } - - @InterfaceAudience.Private - @InterfaceStability.Evolving - private static boolean isAuthenticationMethodEnabled(AuthenticationMethod method) { - ensureInitialized(); - return (authenticationMethod == method); - } - - /** - * Information about the logged in user. - */ - private static ThreadLocal userThreadLocal = new ThreadLocal<>(); - private static String keytabPrincipal = null; - private static String keytabFile = null; - - private final Subject subject; - // All non-static fields must be read-only caches that come from the subject. - private final User user; - private final boolean isKeytab; - private final boolean isKrbTkt; - - private static String OS_LOGIN_MODULE_NAME; - private static Class OS_PRINCIPAL_CLASS; - - private static final boolean windows = - System.getProperty("os.name").startsWith("Windows"); - private static final boolean is64Bit = - System.getProperty("os.arch").contains("64"); - private static final boolean aix = "AIX".equals(System.getProperty("os.name")); /* Return the OS login module class name */ private static String getOSLoginModuleName() { if (IBM_JAVA) { if (windows) { return is64Bit ? "com.ibm.security.auth.module.Win64LoginModule" - : "com.ibm.security.auth.module.NTLoginModule"; + : "com.ibm.security.auth.module.NTLoginModule"; } else if (aix) { return is64Bit ? "com.ibm.security.auth.module.AIX64LoginModule" - : "com.ibm.security.auth.module.AIXLoginModule"; + : "com.ibm.security.auth.module.AIXLoginModule"; } else { return "com.ibm.security.auth.module.LinuxLoginModule"; } } else { return windows ? "com.sun.security.auth.module.NTLoginModule" - : "com.sun.security.auth.module.UnixLoginModule"; + : "com.sun.security.auth.module.UnixLoginModule"; } } @@ -418,7 +294,7 @@ private static Class getOsPrincipalClass() { } } else { principalClass = windows ? "com.sun.security.auth.NTUserPrincipal" - : "com.sun.security.auth.UnixPrincipal"; + : "com.sun.security.auth.UnixPrincipal"; } return (Class) cl.loadClass(principalClass); } catch (ClassNotFoundException e) { @@ -426,233 +302,42 @@ private static Class getOsPrincipalClass() { } return null; } + + private static HadoopLoginContext + newLoginContext(String appName, Subject subject, + HadoopConfiguration loginConf) + throws LoginException { + // Temporarily switch the thread's ContextClassLoader to match this + // class's classloader, so that we can properly load HadoopLoginModule + // from the JAAS libraries. + Thread t = Thread.currentThread(); + ClassLoader oldCCL = t.getContextClassLoader(); + t.setContextClassLoader(HadoopLoginModule.class.getClassLoader()); + try { + return new HadoopLoginContext(appName, subject, loginConf); + } finally { + t.setContextClassLoader(oldCCL); + } + } static { OS_LOGIN_MODULE_NAME = getOSLoginModuleName(); OS_PRINCIPAL_CLASS = getOsPrincipalClass(); } - private static class RealUser implements Principal { - private final UserGroupInformation realUser; - - RealUser(UserGroupInformation realUser) { - this.realUser = realUser; - } - - @Override - public String getName() { - return realUser.getUserName(); - } - - public UserGroupInformation getRealUser() { - return realUser; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } else if (o == null || getClass() != o.getClass()) { - return false; - } else { - return realUser.equals(((RealUser) o).realUser); - } - } - - @Override - public int hashCode() { - return realUser.hashCode(); - } - - @Override - public String toString() { - return realUser.toString(); - } - } - - /** - * A JAAS configuration that defines the login modules that we want - * to use for login. - */ - private static class HadoopConfiguration - extends javax.security.auth.login.Configuration { - private static final String SIMPLE_CONFIG_NAME = "hadoop-simple"; - private static final String USER_KERBEROS_CONFIG_NAME = - "hadoop-user-kerberos"; - private static final String KEYTAB_KERBEROS_CONFIG_NAME = - "hadoop-keytab-kerberos"; - - private static final Map BASIC_JAAS_OPTIONS = - new HashMap(); - static { - String jaasEnvVar = System.getenv("HADOOP_JAAS_DEBUG"); - if (jaasEnvVar != null && "true".equalsIgnoreCase(jaasEnvVar)) { - BASIC_JAAS_OPTIONS.put("debug", "true"); - } - } - - private static final AppConfigurationEntry OS_SPECIFIC_LOGIN = - new AppConfigurationEntry(OS_LOGIN_MODULE_NAME, - LoginModuleControlFlag.REQUIRED, - BASIC_JAAS_OPTIONS); - private static final AppConfigurationEntry HADOOP_LOGIN = - new AppConfigurationEntry(HadoopLoginModule.class.getName(), - LoginModuleControlFlag.REQUIRED, - BASIC_JAAS_OPTIONS); - private static final Map USER_KERBEROS_OPTIONS = - new HashMap(); - static { - if (IBM_JAVA) { - USER_KERBEROS_OPTIONS.put("useDefaultCcache", "true"); - } else { - USER_KERBEROS_OPTIONS.put("doNotPrompt", "true"); - USER_KERBEROS_OPTIONS.put("useTicketCache", "true"); - } - String ticketCache = System.getenv("KRB5CCNAME"); - if (ticketCache != null) { - if (IBM_JAVA) { - // The first value searched when "useDefaultCcache" is used. - System.setProperty("KRB5CCNAME", ticketCache); - } else { - USER_KERBEROS_OPTIONS.put("ticketCache", ticketCache); - } - } - USER_KERBEROS_OPTIONS.put("renewTGT", "true"); - USER_KERBEROS_OPTIONS.putAll(BASIC_JAAS_OPTIONS); - } - private static final AppConfigurationEntry USER_KERBEROS_LOGIN = - new AppConfigurationEntry(KerberosUtil.getKrb5LoginModuleName(), - LoginModuleControlFlag.OPTIONAL, - USER_KERBEROS_OPTIONS); - private static final Map KEYTAB_KERBEROS_OPTIONS = - new HashMap(); - static { - if (IBM_JAVA) { - KEYTAB_KERBEROS_OPTIONS.put("credsType", "both"); - } else { - KEYTAB_KERBEROS_OPTIONS.put("doNotPrompt", "true"); - KEYTAB_KERBEROS_OPTIONS.put("useKeyTab", "true"); - KEYTAB_KERBEROS_OPTIONS.put("storeKey", "true"); - } - KEYTAB_KERBEROS_OPTIONS.put("refreshKrb5Config", "true"); - KEYTAB_KERBEROS_OPTIONS.putAll(BASIC_JAAS_OPTIONS); - } - private static final AppConfigurationEntry KEYTAB_KERBEROS_LOGIN = - new AppConfigurationEntry(KerberosUtil.getKrb5LoginModuleName(), - LoginModuleControlFlag.REQUIRED, - KEYTAB_KERBEROS_OPTIONS); - - private static final AppConfigurationEntry[] SIMPLE_CONF = - new AppConfigurationEntry[]{OS_SPECIFIC_LOGIN, HADOOP_LOGIN}; - - private static final AppConfigurationEntry[] USER_KERBEROS_CONF = - new AppConfigurationEntry[]{OS_SPECIFIC_LOGIN, USER_KERBEROS_LOGIN, - HADOOP_LOGIN}; - - private static final AppConfigurationEntry[] KEYTAB_KERBEROS_CONF = - new AppConfigurationEntry[]{KEYTAB_KERBEROS_LOGIN, HADOOP_LOGIN}; - - @Override - public AppConfigurationEntry[] getAppConfigurationEntry(String appName) { - if (SIMPLE_CONFIG_NAME.equals(appName)) { - return SIMPLE_CONF; - } else if (USER_KERBEROS_CONFIG_NAME.equals(appName)) { - return USER_KERBEROS_CONF; - } else if (KEYTAB_KERBEROS_CONFIG_NAME.equals(appName)) { - if (IBM_JAVA) { - KEYTAB_KERBEROS_OPTIONS.put("useKeytab", - prependFileAuthority(keytabFile)); - } else { - KEYTAB_KERBEROS_OPTIONS.put("keyTab", keytabFile); - } - KEYTAB_KERBEROS_OPTIONS.put("principal", keytabPrincipal); - return KEYTAB_KERBEROS_CONF; - } - return null; - } - } - - private static String prependFileAuthority(String keytabPath) { - return keytabPath.startsWith("file://") ? keytabPath - : "file://" + keytabPath; - } - - /** - * Represents a javax.security configuration that is created at runtime. - */ - private static class DynamicConfiguration - extends javax.security.auth.login.Configuration { - private AppConfigurationEntry[] ace; - - DynamicConfiguration(AppConfigurationEntry[] ace) { - this.ace = ace; - } - - @Override - public AppConfigurationEntry[] getAppConfigurationEntry(String appName) { - return ace; - } - } - - private static LoginContext - newLoginContext(String appName, Subject subject, - javax.security.auth.login.Configuration loginConf) - throws LoginException { - // Temporarily switch the thread's ContextClassLoader to match this - // class's classloader, so that we can properly load HadoopLoginModule - // from the JAAS libraries. - Thread t = Thread.currentThread(); - ClassLoader oldCCL = t.getContextClassLoader(); - t.setContextClassLoader(HadoopLoginModule.class.getClassLoader()); - try { - return new LoginContext(appName, subject, null, loginConf); - } finally { - t.setContextClassLoader(oldCCL); - } - } - - private LoginContext getLogin() { - return user.getLogin(); - } - - private void setLogin(LoginContext login) { - user.setLogin(login); - } - - /** - * Create a UserGroupInformation for the given subject. - * This does not change the subject or acquire new credentials. - * @param subject the user's subject - */ - UserGroupInformation(Subject subject) { - this.subject = subject; - this.user = subject.getPrincipals(User.class).iterator().next(); - this.isKeytab = !subject.getPrivateCredentials(KeyTab.class).isEmpty(); - this.isKrbTkt = !subject.getPrivateCredentials(KerberosTicket.class).isEmpty(); - } - - /** - * checks if logged in using kerberos - * @return true if the subject logged via keytab or has a Kerberos TGT - */ - public boolean hasKerberosCredentials() { - return isKeytab || isKrbTkt; - } - - /** - * Return the current user, including any doAs in the current stack. - * @return the current user - * @throws IOException if login fails - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public synchronized - static UserGroupInformation getCurrentUser() throws IOException { - AccessControlContext context = AccessController.getContext(); - Subject subject = Subject.getSubject(context); - if (subject == null || subject.getPrincipals(User.class).isEmpty()) { - return getLoginUser(); - } else { - return new UserGroupInformation(subject); + /** + * Return the current user, including any doAs in the current stack. + * @return the current user + * @throws IOException if login fails + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public static UserGroupInformation getCurrentUser() throws IOException { + AccessControlContext context = AccessController.getContext(); + Subject subject = Subject.getSubject(context); + if (subject == null || subject.getPrincipals(User.class).isEmpty()) { + return getLoginUser(); + } else { + return new UserGroupInformation(subject); } } @@ -664,16 +349,16 @@ static UserGroupInformation getCurrentUser() throws IOException { * @param user The user name, or NULL if none is specified. * * @return The most appropriate UserGroupInformation - */ + */ public static UserGroupInformation getBestUGI( - String ticketCachePath, String user) throws IOException { + String ticketCachePath, String user) throws IOException { if (ticketCachePath != null) { return getUGIFromTicketCache(ticketCachePath, user); } else if (user == null) { return getCurrentUser(); } else { return createRemoteUser(user); - } + } } /** @@ -681,145 +366,115 @@ public static UserGroupInformation getBestUGI( * * @param user The principal name to load from the ticket * cache + * @param ticketCache the path to the ticket cache file * * @throws IOException if the kerberos login fails */ @InterfaceAudience.Public @InterfaceStability.Evolving public static UserGroupInformation getUGIFromTicketCache( - String ticketCache, String user) throws IOException { + String ticketCache, String user) throws IOException { if (!isAuthenticationMethodEnabled(AuthenticationMethod.KERBEROS)) { return getBestUGI(null, user); } - try { - Map krbOptions = new HashMap(); - if (IBM_JAVA) { - krbOptions.put("useDefaultCcache", "true"); - // The first value searched when "useDefaultCcache" is used. - System.setProperty("KRB5CCNAME", ticketCache); - } else { - krbOptions.put("doNotPrompt", "true"); - krbOptions.put("useTicketCache", "true"); - krbOptions.put("useKeyTab", "false"); - krbOptions.put("ticketCache", ticketCache); - } - krbOptions.put("renewTGT", "false"); - krbOptions.putAll(HadoopConfiguration.BASIC_JAAS_OPTIONS); - AppConfigurationEntry ace = new AppConfigurationEntry( - KerberosUtil.getKrb5LoginModuleName(), - LoginModuleControlFlag.REQUIRED, - krbOptions); - DynamicConfiguration dynConf = - new DynamicConfiguration(new AppConfigurationEntry[]{ ace }); - LoginContext login = newLoginContext( - HadoopConfiguration.USER_KERBEROS_CONFIG_NAME, null, dynConf); - login.login(); + LoginParams params = new LoginParams(); + params.put(LoginParam.PRINCIPAL, user); + params.put(LoginParam.CCACHE, ticketCache); + return doSubjectLogin(null, params); + } - Subject loginSubject = login.getSubject(); - Set loginPrincipals = loginSubject.getPrincipals(); - if (loginPrincipals.isEmpty()) { - throw new RuntimeException("No login principals found!"); - } - if (loginPrincipals.size() != 1) { - LOG.warn("found more than one principal in the ticket cache file " + - ticketCache); - } - User ugiUser = new User(loginPrincipals.iterator().next().getName(), - AuthenticationMethod.KERBEROS, login); - loginSubject.getPrincipals().add(ugiUser); - UserGroupInformation ugi = new UserGroupInformation(loginSubject); - ugi.setLogin(login); - ugi.setAuthenticationMethod(AuthenticationMethod.KERBEROS); - return ugi; - } catch (LoginException le) { - throw new IOException("failure to login using ticket cache file " + - ticketCache, le); - } + private void setLogin(LoginContext login) { + user.setLogin(login); } - /** + /** * Create a UserGroupInformation from a Subject with Kerberos principal. * - * @throws IOException if the kerberos login fails + * @param subject The KerberosPrincipal to use in UGI. + * The creator of subject is responsible for + * renewing credentials. + * + * @throws IOException + * @throws KerberosAuthException if the kerberos login fails */ public static UserGroupInformation getUGIFromSubject(Subject subject) - throws IOException { + throws IOException { if (subject == null) { - throw new IOException("Subject must not be null"); + throw new KerberosAuthException(SUBJECT_MUST_NOT_BE_NULL); } if (subject.getPrincipals(KerberosPrincipal.class).isEmpty()) { - throw new IOException("Provided Subject must contain a KerberosPrincipal"); + throw new KerberosAuthException(SUBJECT_MUST_CONTAIN_PRINCIPAL); } - KerberosPrincipal principal = - subject.getPrincipals(KerberosPrincipal.class).iterator().next(); - - User ugiUser = new User(principal.getName(), - AuthenticationMethod.KERBEROS, null); - subject.getPrincipals().add(ugiUser); - UserGroupInformation ugi = new UserGroupInformation(subject); - ugi.setLogin(null); - ugi.setAuthenticationMethod(AuthenticationMethod.KERBEROS); - return ugi; + // null params indicate external subject login. no login context will + // be attached. + return doSubjectLogin(subject, null); } /** - * Get the currently logged in user. + * Get the currently logged in user. If no explicit login has occurred, + * the user will automatically be logged in with either kerberos credentials + * if available, or as the local OS user, based on security settings. * @return the logged in user * @throws IOException if login fails */ @InterfaceAudience.Public @InterfaceStability.Evolving - public synchronized - static UserGroupInformation getLoginUser() throws IOException { - UserGroupInformation loginUser = userThreadLocal.get(); + public static UserGroupInformation getLoginUser() throws IOException { + UserGroupInformation loginUser = loginUserRef.get(); + // a potential race condition exists only for the initial creation of + // the login user. there's no need to penalize all subsequent calls + // with sychronization overhead so optimistically create a login user + // and discard if we lose the race. if (loginUser == null) { - loginUserFromSubject(null); - loginUser = userThreadLocal.get(); + UserGroupInformation newLoginUser = createLoginUser(null); + do { + // it's extremely unlikely that the login user will be non-null + // (lost CAS race), but be nulled before the subsequent get, but loop + // for correctness. + if (loginUserRef.compareAndSet(null, newLoginUser)) { + loginUser = newLoginUser; + // only spawn renewal if this login user is the winner. + loginUser.spawnAutoRenewalThreadForUserCreds(false); + } else { + loginUser = loginUserRef.get(); + } + } while (loginUser == null); } return loginUser; } - /** - * remove the login method that is followed by a space from the username - * e.g. "jack (auth:SIMPLE)" -> "jack" - * - * @param userName - * @return userName without login method - */ - public static String trimLoginMethod(String userName) { - int spaceIndex = userName.indexOf(' '); - if (spaceIndex >= 0) { - userName = userName.substring(0, spaceIndex); - } - return userName; + @InterfaceAudience.Private + @InterfaceStability.Unstable + @VisibleForTesting + public static void setLoginUser(UserGroupInformation ugi) { + // if this is to become stable, should probably logout the currently + // logged in ugi if it's different + loginUserRef.set(ugi); } /** * Log in a user using the given subject - * @parma subject the subject to use when logging in a user, or null to + * @param subject the subject to use when logging in a user, or null to * create a new subject. + * + * If subject is not null, the creator of subject is responsible for renewing + * credentials. + * * @throws IOException if login fails */ @InterfaceAudience.Public @InterfaceStability.Evolving - public synchronized - static void loginUserFromSubject(Subject subject) throws IOException { - ensureInitialized(); - UserGroupInformation loginUser; + public static void loginUserFromSubject(Subject subject) throws IOException { + setLoginUser(createLoginUser(subject)); + } + + private static + UserGroupInformation createLoginUser(Subject subject) throws IOException { + UserGroupInformation realUser = doSubjectLogin(subject, null); + UserGroupInformation loginUser = null; try { - if (subject == null) { - subject = new Subject(); - } - LoginContext login = - newLoginContext(authenticationMethod.getLoginAppName(), - subject, new HadoopConfiguration()); - login.login(); - UserGroupInformation realUser = new UserGroupInformation(subject); - realUser.setLogin(login); - realUser.setAuthenticationMethod(authenticationMethod); - realUser = new UserGroupInformation(login.getSubject()); // If the HADOOP_PROXY_USER environment variable or property // is specified, create a proxy user as the logged in user. String proxyUser = System.getenv(HADOOP_PROXY_USER); @@ -827,416 +482,142 @@ static void loginUserFromSubject(Subject subject) throws IOException { proxyUser = System.getProperty(HADOOP_PROXY_USER); } loginUser = proxyUser == null ? realUser : createProxyUser(proxyUser, realUser); - userThreadLocal.set(loginUser); + + String tokenFileLocation = System.getProperty(HADOOP_TOKEN_FILES); + if (tokenFileLocation == null) { + tokenFileLocation = conf.get(HADOOP_TOKEN_FILES); + } + if (tokenFileLocation != null) { + for (String tokenFileName: + StringUtils.getTrimmedStrings(tokenFileLocation)) { + if (tokenFileName.length() > 0) { + File tokenFile = new File(tokenFileName); + if (tokenFile.exists() && tokenFile.isFile()) { + Credentials cred = Credentials.readTokenStorageFile( + tokenFile, conf); + loginUser.addCredentials(cred); + } else { + LOG.info("tokenFile("+tokenFileName+") does not exist"); + } + } + } + } String fileLocation = System.getenv(HADOOP_TOKEN_FILE_LOCATION); if (fileLocation != null) { // Load the token storage file and put all of the tokens into the // user. Don't use the FileSystem API for reading since it has a lock // cycle (HADOOP-9212). + File source = new File(fileLocation); + LOG.debug("Reading credentials from location set in {}: {}", + HADOOP_TOKEN_FILE_LOCATION, + source.getCanonicalPath()); + if (!source.isFile()) { + throw new FileNotFoundException("Source file " + + source.getCanonicalPath() + " from " + + HADOOP_TOKEN_FILE_LOCATION + + " not found"); + } Credentials cred = Credentials.readTokenStorageFile( - new File(fileLocation), conf); + source, conf); + LOG.debug("Loaded {} tokens", cred.numberOfTokens()); loginUser.addCredentials(cred); } - loginUser.spawnAutoRenewalThreadForUserCreds(); - } catch (LoginException le) { - LOG.debug("failure to login", le); - throw new IOException("failure to login", le); + } catch (IOException ioe) { + LOG.debug("failure to load login credentials", ioe); + throw ioe; } if (LOG.isDebugEnabled()) { LOG.debug("UGI loginUser:"+loginUser); - } + } + return loginUser; } - @InterfaceAudience.Private - @InterfaceStability.Unstable - @VisibleForTesting - public synchronized static void setLoginUser(UserGroupInformation ugi) { - // if this is to become stable, should probably logout the currently - // logged in ugi if it's different - //loginUser = ugi; - userThreadLocal.set(ugi); - } - /** - * Is this user logged in from a keytab file? - * @return true if the credentials are from a keytab file. - */ - public boolean isFromKeytab() { - return isKeytab; - } - - /** - * Get the Kerberos TGT - * @return the user's TGT or null if none was found + * Get time for next login retry. This will allow the thread to retry with + * exponential back-off, until tgt endtime. + * Last retry is {@link #kerberosMinSecondsBeforeRelogin} before endtime. + * + * @param tgtEndTime EndTime of the tgt. + * @param now Current time. + * @param rp The retry policy. + * @return Time for next login retry. */ - private synchronized KerberosTicket getTGT() { - Set tickets = subject - .getPrivateCredentials(KerberosTicket.class); - for (KerberosTicket ticket : tickets) { - if (SecurityUtil.isOriginalTGT(ticket)) { - if (LOG.isDebugEnabled()) { - LOG.debug("Found tgt " + ticket); - } - return ticket; - } - } - return null; - } - - private long getRefreshTime(KerberosTicket tgt) { - long start = tgt.getStartTime().getTime(); - long end = tgt.getEndTime().getTime(); - return start + (long) ((end - start) * TICKET_RENEW_WINDOW); + @VisibleForTesting + static long getNextTgtRenewalTime(final long tgtEndTime, final long now, + final RetryPolicy rp) throws Exception { + final long lastRetryTime = tgtEndTime - kerberosMinSecondsBeforeRelogin; + final RetryPolicy.RetryAction ra = rp.shouldRetry(null, + metrics.renewalFailures.value(), 0, false); + return Math.min(lastRetryTime, now + ra.delayMillis); } - /**Spawn a thread to do periodic renewals of kerberos credentials*/ - private void spawnAutoRenewalThreadForUserCreds() { - if (isSecurityEnabled()) { - //spawn thread only if we have kerb credentials - if (user.getAuthenticationMethod() == AuthenticationMethod.KERBEROS && - !isKeytab) { - Thread t = new Thread(new Runnable() { - - @Override - public void run() { - String cmd = conf.get("hadoop.kerberos.kinit.command", - "kinit"); - KerberosTicket tgt = getTGT(); - if (tgt == null) { - return; - } - long nextRefresh = getRefreshTime(tgt); - while (true) { - try { - long now = Time.now(); - if(LOG.isDebugEnabled()) { - LOG.debug("Current time is " + now); - LOG.debug("Next refresh is " + nextRefresh); - } - if (now < nextRefresh) { - Thread.sleep(nextRefresh - now); - } - Shell.execCommand(cmd, "-R"); - if(LOG.isDebugEnabled()) { - LOG.debug("renewed ticket"); - } - reloginFromTicketCache(); - tgt = getTGT(); - if (tgt == null) { - LOG.warn("No TGT after renewal. Aborting renew thread for " + - getUserName()); - return; - } - nextRefresh = Math.max(getRefreshTime(tgt), - now + MIN_TIME_BEFORE_RELOGIN); - } catch (InterruptedException ie) { - LOG.warn("Terminating renewal thread"); - return; - } catch (IOException ie) { - LOG.warn("Exception encountered while running the" + - " renewal command. Aborting renew thread. " + ie); - return; - } - } - } - }); - t.setDaemon(true); - t.setName("TGT Renewer for " + getUserName()); - t.start(); - } - } - } /** * Log a user in from a keytab file. Loads a user identity from a keytab * file and logs them in. They become the currently logged-in user. * @param user the principal name to load from the keytab * @param path the path to the keytab file - * @throws IOException if the keytab file can't be read + * @throws IOException + * @throws KerberosAuthException if it's a kerberos login exception. */ @InterfaceAudience.Public @InterfaceStability.Evolving - public synchronized + public static void loginUserFromKeytab(String user, String path - ) throws IOException { - if (!isSecurityEnabled()) { + ) throws IOException { + if (!isSecurityEnabled()) return; - } - keytabFile = path; - keytabPrincipal = user; - Subject subject = new Subject(); - LoginContext login; - long start = 0; - try { - login = newLoginContext(HadoopConfiguration.KEYTAB_KERBEROS_CONFIG_NAME, - subject, new HadoopConfiguration()); - start = Time.now(); - login.login(); - metrics.loginSuccess.add(Time.now() - start); - UserGroupInformation loginUser = new UserGroupInformation(subject); - loginUser.setLogin(login); - loginUser.setAuthenticationMethod(AuthenticationMethod.KERBEROS); - userThreadLocal.set(loginUser); - } catch (LoginException le) { - if (start > 0) { - metrics.loginFailure.add(Time.now() - start); - } - throw new IOException("Login failure for " + user + " from keytab " + - path+ ": " + le, le); - } - LOG.info("Login successful for user " + keytabPrincipal - + " using keytab file " + keytabFile); + setLoginUser(loginUserFromKeytabAndReturnUGI(user, path)); + LOG.info("Login successful for user " + user + + " using keytab file " + path); } /** - * Log the current user out who previously logged in using keytab. - * This method assumes that the user logged in by calling - * {@link #loginUserFromKeytab(String, String)}. + * remove the login method that is followed by a space from the username + * e.g. "jack (auth:SIMPLE)" -> "jack" * - * @throws IOException if a failure occurred in logout, or if the user did - * not log in by invoking loginUserFromKeyTab() before. + * @param userName + * @return userName without login method */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public void logoutUserFromKeytab() throws IOException { - if (!isSecurityEnabled() || - user.getAuthenticationMethod() != AuthenticationMethod.KERBEROS) { - return; - } - LoginContext login = getLogin(); - if (login == null || keytabFile == null) { - throw new IOException("loginUserFromKeytab must be done first"); + public static String trimLoginMethod(String userName) { + int spaceIndex = userName.indexOf(' '); + if (spaceIndex >= 0) { + userName = userName.substring(0, spaceIndex); } + return userName; + } - try { - if (LOG.isDebugEnabled()) { - LOG.debug("Initiating logout for " + getUserName()); - } - synchronized (UserGroupInformation.class) { - login.logout(); - } - } catch (LoginException le) { - throw new IOException("Logout failure for " + user + " from keytab " + - keytabFile, le); - } + /** + * Log a user in from a keytab file. Loads a user identity from a keytab + * file and login them in. This new user does not affect the currently + * logged-in user. + * @param user the principal name to load from the keytab + * @param path the path to the keytab file + * @throws IOException if the keytab file can't be read + */ + public + static UserGroupInformation loginUserFromKeytabAndReturnUGI(String user, + String path + ) throws IOException { + if (!isSecurityEnabled()) + return UserGroupInformation.getCurrentUser(); - LOG.info("Logout successful for user " + keytabPrincipal - + " using keytab file " + keytabFile); + LoginParams params = new LoginParams(); + params.put(LoginParam.PRINCIPAL, user); + params.put(LoginParam.KEYTAB, path); + return doSubjectLogin(null, params); } - - /** - * Re-login a user from keytab if TGT is expired or is close to expiry. - * - * @throws IOException - */ - public synchronized void checkTGTAndReloginFromKeytab() throws IOException { - if (!isSecurityEnabled() - || user.getAuthenticationMethod() != AuthenticationMethod.KERBEROS - || !isKeytab) { - return; - } - KerberosTicket tgt = getTGT(); - if (tgt != null && !shouldRenewImmediatelyForTests && - Time.now() < getRefreshTime(tgt)) { - return; - } - reloginFromKeytab(); - } - - /** - * Re-Login a user in from a keytab file. Loads a user identity from a keytab - * file and logs them in. They become the currently logged-in user. This - * method assumes that {@link #loginUserFromKeytab(String, String)} had - * happened already. - * The Subject field of this UserGroupInformation object is updated to have - * the new credentials. - * @throws IOException on a failure - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public synchronized void reloginFromKeytab() - throws IOException { - if (!isSecurityEnabled() || - user.getAuthenticationMethod() != AuthenticationMethod.KERBEROS || - !isKeytab) { - return; - } - - long now = Time.now(); - if (!shouldRenewImmediatelyForTests && !hasSufficientTimeElapsed(now)) { - return; - } - - KerberosTicket tgt = getTGT(); - //Return if TGT is valid and is not going to expire soon. - if (tgt != null && !shouldRenewImmediatelyForTests && - now < getRefreshTime(tgt)) { - return; - } - - LoginContext login = getLogin(); - if (login == null || keytabFile == null) { - throw new IOException("loginUserFromKeyTab must be done first"); - } - - long start = 0; - // register most recent relogin attempt - user.setLastLogin(now); - try { - if (LOG.isDebugEnabled()) { - LOG.debug("Initiating logout for " + getUserName()); - } - synchronized (UserGroupInformation.class) { - // clear up the kerberos state. But the tokens are not cleared! As per - // the Java kerberos login module code, only the kerberos credentials - // are cleared - login.logout(); - // login and also update the subject field of this instance to - // have the new credentials (pass it to the LoginContext constructor) - login = newLoginContext( - HadoopConfiguration.KEYTAB_KERBEROS_CONFIG_NAME, getSubject(), - new HadoopConfiguration()); - if (LOG.isDebugEnabled()) { - LOG.debug("Initiating re-login for " + keytabPrincipal); - } - start = Time.now(); - login.login(); - metrics.loginSuccess.add(Time.now() - start); - setLogin(login); - } - } catch (LoginException le) { - if (start > 0) { - metrics.loginFailure.add(Time.now() - start); - } - throw new IOException("Login failure for " + keytabPrincipal + - " from keytab " + keytabFile, le); - } - } - - /** - * Re-Login a user in from the ticket cache. This - * method assumes that login had happened already. - * The Subject field of this UserGroupInformation object is updated to have - * the new credentials. - * @throws IOException on a failure - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public synchronized void reloginFromTicketCache() - throws IOException { - if (!isSecurityEnabled() || - user.getAuthenticationMethod() != AuthenticationMethod.KERBEROS || - !isKrbTkt) { - return; - } - LoginContext login = getLogin(); - if (login == null) { - throw new IOException("login must be done first"); - } - long now = Time.now(); - if (!hasSufficientTimeElapsed(now)) { - return; - } - // register most recent relogin attempt - user.setLastLogin(now); - try { - if (LOG.isDebugEnabled()) { - LOG.debug("Initiating logout for " + getUserName()); - } - //clear up the kerberos state. But the tokens are not cleared! As per - //the Java kerberos login module code, only the kerberos credentials - //are cleared - login.logout(); - //login and also update the subject field of this instance to - //have the new credentials (pass it to the LoginContext constructor) - login = - newLoginContext(HadoopConfiguration.USER_KERBEROS_CONFIG_NAME, - getSubject(), new HadoopConfiguration()); - if (LOG.isDebugEnabled()) { - LOG.debug("Initiating re-login for " + getUserName()); - } - login.login(); - setLogin(login); - } catch (LoginException le) { - throw new IOException("Login failure for " + getUserName(), le); - } - } - - - /** - * Log a user in from a keytab file. Loads a user identity from a keytab - * file and login them in. This new user does not affect the currently - * logged-in user. - * @param user the principal name to load from the keytab - * @param path the path to the keytab file - * @throws IOException if the keytab file can't be read - */ - public synchronized - static UserGroupInformation loginUserFromKeytabAndReturnUGI(String user, - String path - ) throws IOException { - if (!isSecurityEnabled()) { - return UserGroupInformation.getCurrentUser(); - } - String oldKeytabFile = null; - String oldKeytabPrincipal = null; - - long start = 0; - try { - oldKeytabFile = keytabFile; - oldKeytabPrincipal = keytabPrincipal; - keytabFile = path; - keytabPrincipal = user; - Subject subject = new Subject(); - - LoginContext login = newLoginContext( - HadoopConfiguration.KEYTAB_KERBEROS_CONFIG_NAME, subject, - new HadoopConfiguration()); - - start = Time.now(); - login.login(); - metrics.loginSuccess.add(Time.now() - start); - UserGroupInformation newLoginUser = new UserGroupInformation(subject); - newLoginUser.setLogin(login); - newLoginUser.setAuthenticationMethod(AuthenticationMethod.KERBEROS); - - return newLoginUser; - } catch (LoginException le) { - if (start > 0) { - metrics.loginFailure.add(Time.now() - start); - } - throw new IOException("Login failure for " + user + " from keytab " + - path, le); - } finally { - if(oldKeytabFile != null) { - keytabFile = oldKeytabFile; - } - if(oldKeytabPrincipal != null) { - keytabPrincipal = oldKeytabPrincipal; - } - } - } - - private boolean hasSufficientTimeElapsed(long now) { - if (now - user.getLastLogin() < MIN_TIME_BEFORE_RELOGIN ) { - LOG.warn("Not attempting to re-login since the last re-login was " + - "attempted less than " + (MIN_TIME_BEFORE_RELOGIN/1000) + " seconds"+ - " before."); - return false; - } - return true; - } - + /** * Did the login happen via keytab * @return true or false */ @InterfaceAudience.Public @InterfaceStability.Evolving - public synchronized static boolean isLoginKeytabBased() throws IOException { - return getLoginUser().isKeytab; + public static boolean isLoginKeytabBased() throws IOException { + return getLoginUser().isFromKeytab(); } /** @@ -1244,91 +625,9 @@ public synchronized static boolean isLoginKeytabBased() throws IOException { * @return true or false */ public static boolean isLoginTicketBased() throws IOException { - return getLoginUser().isKrbTkt; - } - - /** - * Create a user from a login name. It is intended to be used for remote - * users in RPC, since it won't have any credentials. - * @param user the full user principal name, must not be empty or null - * @return the UserGroupInformation for the remote user. - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public static UserGroupInformation createRemoteUser(String user) { - return createRemoteUser(user, AuthMethod.SIMPLE); - } - - /** - * Create a user from a login name. It is intended to be used for remote - * users in RPC, since it won't have any credentials. - * @param user the full user principal name, must not be empty or null - * @return the UserGroupInformation for the remote user. - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public static UserGroupInformation createRemoteUser(String user, AuthMethod authMethod) { - if (user == null || user.isEmpty()) { - throw new IllegalArgumentException("Null user"); - } - Subject subject = new Subject(); - subject.getPrincipals().add(new User(user)); - UserGroupInformation result = new UserGroupInformation(subject); - result.setAuthenticationMethod(authMethod); - return result; + return getLoginUser().isFromTicket(); } - /** - * existing types of authentications' methods - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public static enum AuthenticationMethod { - // currently we support only one auth per method, but eventually a - // subtype is needed to differentiate, ex. if digest is token or ldap - SIMPLE(AuthMethod.SIMPLE, - HadoopConfiguration.SIMPLE_CONFIG_NAME), - KERBEROS(AuthMethod.KERBEROS, - HadoopConfiguration.USER_KERBEROS_CONFIG_NAME), - TOKEN(AuthMethod.TOKEN), - CERTIFICATE(null), - KERBEROS_SSL(null), - PROXY(null); - - private final AuthMethod authMethod; - private final String loginAppName; - - private AuthenticationMethod(AuthMethod authMethod) { - this(authMethod, null); - } - private AuthenticationMethod(AuthMethod authMethod, String loginAppName) { - this.authMethod = authMethod; - this.loginAppName = loginAppName; - } - - public AuthMethod getAuthMethod() { - return authMethod; - } - - String getLoginAppName() { - if (loginAppName == null) { - throw new UnsupportedOperationException( - this + " login authentication is not supported"); - } - return loginAppName; - } - - public static AuthenticationMethod valueOf(AuthMethod authMethod) { - for (AuthenticationMethod value : values()) { - if (value.getAuthMethod() == authMethod) { - return value; - } - } - throw new IllegalArgumentException( - "no authentication method for " + authMethod); - } - }; - /** * Create a proxy user using username of the effective user and the ugi of the * real user. @@ -1348,83 +647,14 @@ public static UserGroupInformation createProxyUser(String user, } Subject subject = new Subject(); Set principals = subject.getPrincipals(); - principals.add(new User(user)); + principals.add(new User(user, AuthenticationMethod.PROXY, null)); principals.add(new RealUser(realUser)); - UserGroupInformation result =new UserGroupInformation(subject); - result.setAuthenticationMethod(AuthenticationMethod.PROXY); - return result; - } - - /** - * get RealUser (vs. EffectiveUser) - * @return realUser running over proxy user - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public UserGroupInformation getRealUser() { - for (RealUser p: subject.getPrincipals(RealUser.class)) { - return p.getRealUser(); - } - return null; - } - - - - /** - * This class is used for storing the groups for testing. It stores a local - * map that has the translation of usernames to groups. - */ - private static class TestingGroups extends Groups { - private final Map> userToGroupsMapping = - new HashMap>(); - private Groups underlyingImplementation; - - private TestingGroups(Groups underlyingImplementation) { - super(new Configuration()); - this.underlyingImplementation = underlyingImplementation; - } - - @Override - public List getGroups(String user) throws IOException { - List result = userToGroupsMapping.get(user); - - if (result == null) { - result = underlyingImplementation.getGroups(user); - } - - return result; - } - - private void setUserGroups(String user, String[] groups) { - userToGroupsMapping.put(user, Arrays.asList(groups)); - } - } - - /** - * Create a UGI for testing HDFS and MapReduce - * @param user the full user principal name - * @param userGroups the names of the groups that the user belongs to - * @return a fake user for running unit tests - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public static UserGroupInformation createUserForTesting(String user, - String[] userGroups) { - ensureInitialized(); - UserGroupInformation ugi = createRemoteUser(user); - // make sure that the testing object is setup - if (!(groups instanceof TestingGroups)) { - groups = new TestingGroups(groups); - } - // add the user groups - ((TestingGroups) groups).setUserGroups(ugi.getShortUserName(), userGroups); - return ugi; + return new UserGroupInformation(subject); } - /** * Create a proxy user UGI for testing HDFS and MapReduce - * + * * @param user * the full user principal name for effective user * @param realUser @@ -1445,350 +675,1388 @@ public static UserGroupInformation createProxyUserForTesting(String user, ((TestingGroups) groups).setUserGroups(ugi.getShortUserName(), userGroups); return ugi; } - + /** - * Get the user's login name. - * @return the user's name up to the first '/' or '@'. + * Returns the authentication method of a ugi. If the authentication method is + * PROXY, returns the authentication method of the real user. + * + * @param ugi + * @return AuthenticationMethod */ - public String getShortUserName() { - for (User p: subject.getPrincipals(User.class)) { - return p.getShortName(); + public static AuthenticationMethod getRealAuthenticationMethod( + UserGroupInformation ugi) { + AuthenticationMethod authMethod = ugi.getAuthenticationMethod(); + if (authMethod == AuthenticationMethod.PROXY) { + authMethod = ugi.getRealUser().getAuthenticationMethod(); } - return null; - } - - public String getPrimaryGroupName() throws IOException { - String[] groups = getGroupNames(); - if (groups.length == 0) { - throw new IOException("There is no primary group for UGI " + this); - } - return groups[0]; + return authMethod; } /** - * Get the user's full principal name. - * @return the user's full principal name. + * Log current UGI and token information into specified log. + * @param ugi - UGI + * @throws IOException */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public String getUserName() { - return user.getName(); + @InterfaceAudience.LimitedPrivate({"HDFS", "KMS"}) + @InterfaceStability.Unstable + public static void logUserInfo(Logger log, String caption, + UserGroupInformation ugi) throws IOException { + if (log.isDebugEnabled()) { + log.debug(caption + " UGI: " + ugi); + for (Token token : ugi.getTokens()) { + log.debug("+token:" + token); + } + } } /** - * Add a TokenIdentifier to this UGI. The TokenIdentifier has typically been - * authenticated by the RPC layer as belonging to the user represented by this - * UGI. - * - * @param tokenId - * tokenIdentifier to be added - * @return true on successful add of new tokenIdentifier + * Log all (current, real, login) UGI and token info into specified log. + * @param ugi - UGI + * @throws IOException */ - public synchronized boolean addTokenIdentifier(TokenIdentifier tokenId) { - return subject.getPublicCredentials().add(tokenId); + @InterfaceAudience.LimitedPrivate({"HDFS", "KMS"}) + @InterfaceStability.Unstable + public static void logAllUserInfo(Logger log, UserGroupInformation ugi) throws + IOException { + if (log.isDebugEnabled()) { + logUserInfo(log, "Current", ugi.getCurrentUser()); + if (ugi.getRealUser() != null) { + logUserInfo(log, "Real", ugi.getRealUser()); + } + logUserInfo(log, "Login", ugi.getLoginUser()); + } } - /** - * Get the set of TokenIdentifiers belonging to this UGI - * - * @return the set of TokenIdentifiers belonging to this UGI - */ - public synchronized Set getTokenIdentifiers() { - return subject.getPublicCredentials(TokenIdentifier.class); - } - - /** - * Add a token to this UGI - * - * @param token Token to be added - * @return true on successful add of new token - */ - public boolean addToken(Token token) { - return (token != null) ? addToken(token.getService(), token) : false; + private long getRefreshTime(KerberosTicket tgt) { + long start = tgt.getStartTime().getTime(); + long end = tgt.getEndTime().getTime(); + return start + (long) ((end - start) * TICKET_RENEW_WINDOW); } /** - * Add a named token to this UGI - * - * @param alias Name of the token - * @param token Token to be added - * @return true on successful add of new token - */ - public boolean addToken(Text alias, Token token) { - synchronized (subject) { - getCredentialsInternal().addToken(alias, token); - return true; - } - } - - /** - * Obtain the collection of tokens associated with this user. - * - * @return an unmodifiable collection of tokens associated with user + * Log all (current, real, login) UGI and token info into UGI debug log. + * @param ugi - UGI + * @throws IOException */ - public Collection> getTokens() { - synchronized (subject) { - return Collections.unmodifiableCollection( - new ArrayList>(getCredentialsInternal().getAllTokens())); - } + public static void logAllUserInfo(UserGroupInformation ugi) throws + IOException { + logAllUserInfo(LOG, ugi); } /** - * Obtain the tokens in credentials form associated with this user. - * - * @return Credentials of tokens associated with this user + * Login a subject with the given parameters. If the subject is null, + * the login context used to create the subject will be attached. + * @param subject to login, null for new subject. + * @param params for login, null for externally managed ugi. + * @return UserGroupInformation for subject + * @throws IOException */ - public Credentials getCredentials() { - synchronized (subject) { - Credentials creds = new Credentials(getCredentialsInternal()); - Iterator> iter = creds.getAllTokens().iterator(); - while (iter.hasNext()) { - if (iter.next().isPrivate()) { - iter.remove(); - } + private static UserGroupInformation doSubjectLogin( + Subject subject, LoginParams params) throws IOException { + ensureInitialized(); + // initial default login. + if (subject == null && params == null) { + params = LoginParams.getDefaults(); + } + HadoopConfiguration loginConf = new HadoopConfiguration(params); + try { + HadoopLoginContext login = newLoginContext( + authenticationMethod.getLoginAppName(), subject, loginConf); + login.login(); + UserGroupInformation ugi = new UserGroupInformation(login.getSubject()); + // attach login context for relogin unless this was a pre-existing + // subject. + if (subject == null) { + params.put(LoginParam.PRINCIPAL, ugi.getUserName()); + ugi.setLogin(login); } - return creds; + return ugi; + } catch (LoginException le) { + KerberosAuthException kae = + new KerberosAuthException(FAILURE_TO_LOGIN, le); + if (params != null) { + kae.setPrincipal(params.get(LoginParam.PRINCIPAL)); + kae.setKeytabFile(params.get(LoginParam.KEYTAB)); + kae.setTicketCacheFile(params.get(LoginParam.CCACHE)); + } + throw kae; } } /** - * Add the given Credentials to this user. - * @param credentials of tokens and secrets + * A test method to print out the current user's UGI. + * @param args if there are two arguments, read the user from the keytab + * and print it out. + * @throws Exception */ - public void addCredentials(Credentials credentials) { - synchronized (subject) { - getCredentialsInternal().addAll(credentials); + public static void main(String [] args) throws Exception { + System.out.println("Getting UGI for current user"); + UserGroupInformation ugi = getCurrentUser(); + ugi.print(); + System.out.println("UGI: " + ugi); + System.out.println("Auth method " + ugi.user.getAuthenticationMethod()); + System.out.println("Keytab " + ugi.isFromKeytab()); + System.out.println("============================================================"); + + if (args.length == 2) { + System.out.println("Getting UGI from keytab...."); + loginUserFromKeytab(args[0], args[1]); + getCurrentUser().print(); + System.out.println("Keytab: " + ugi); + UserGroupInformation loginUgi = getLoginUser(); + System.out.println("Auth method " + loginUgi.getAuthenticationMethod()); + System.out.println("Keytab " + loginUgi.isFromKeytab()); } } - private synchronized Credentials getCredentialsInternal() { - final Credentials credentials; - final Set credentialsSet = - subject.getPrivateCredentials(Credentials.class); - if (!credentialsSet.isEmpty()){ - credentials = credentialsSet.iterator().next(); - } else { - credentials = new Credentials(); - subject.getPrivateCredentials().add(credentials); - } - return credentials; + // return the LoginContext only if it's managed by the ugi. externally + // managed login contexts will be ignored. + private HadoopLoginContext getLogin() { + LoginContext login = user.getLogin(); + return (login instanceof HadoopLoginContext) + ? (HadoopLoginContext)login : null; } /** - * Get the group names for this user. - * @return the list of users with the primary group first. If the command - * fails, it returns an empty list. + * checks if logged in using kerberos + * @return true if the subject logged via keytab or has a Kerberos TGT */ - public synchronized String[] getGroupNames() { - ensureInitialized(); - try { - Set result = new LinkedHashSet - (groups.getGroups(getShortUserName())); - return result.toArray(new String[result.size()]); - } catch (IOException ie) { - if (LOG.isDebugEnabled()) { - LOG.debug("Failed to get groups for user " + getShortUserName() - + " by " + ie); - LOG.trace("TRACE", ie); - } - return StringUtils.emptyStringArray; - } + public boolean hasKerberosCredentials() { + return user.getAuthenticationMethod() == AuthenticationMethod.KERBEROS; } - - /** - * Return the username. - */ - @Override - public String toString() { - StringBuilder sb = new StringBuilder(getUserName()); - sb.append(" (auth:"+getAuthenticationMethod()+")"); - if (getRealUser() != null) { - sb.append(" via ").append(getRealUser().toString()); - } - return sb.toString(); + + private String getKeytab() { + HadoopLoginContext login = getLogin(); + return (login != null) + ? login.getConfiguration().getParameters().get(LoginParam.KEYTAB) + : null; } /** - * Sets the authentication method in the subject - * - * @param authMethod + * Is the ugi managed by the UGI or an external subject? + * @return true if managed by UGI. */ - public synchronized - void setAuthenticationMethod(AuthenticationMethod authMethod) { - user.setAuthenticationMethod(authMethod); + private boolean isHadoopLogin() { + // checks if the private hadoop login context is managing the ugi. + return getLogin() != null; } /** - * Sets the authentication method in the subject - * - * @param authMethod + * Is this user logged in from a keytab file managed by the UGI? + * @return true if the credentials are from a keytab file. */ - public void setAuthenticationMethod(AuthMethod authMethod) { - user.setAuthenticationMethod(AuthenticationMethod.valueOf(authMethod)); + public boolean isFromKeytab() { + // can't simply check if keytab is present since a relogin failure will + // have removed the keytab from priv creds. instead, check login params. + return hasKerberosCredentials() && isHadoopLogin() && getKeytab() != null; } /** - * Get the authentication method from the subject - * - * @return AuthenticationMethod in the subject, null if not present. + * Is this user logged in from a ticket (but no keytab) managed by the UGI? + * @return true if the credentials are from a ticket cache. */ - public synchronized AuthenticationMethod getAuthenticationMethod() { - return user.getAuthenticationMethod(); + private boolean isFromTicket() { + return hasKerberosCredentials() && isHadoopLogin() && getKeytab() == null; } /** - * Get the authentication method from the real user's subject. If there - * is no real user, return the given user's authentication method. - * - * @return AuthenticationMethod in the subject, null if not present. + * Get the Kerberos TGT + * @return the user's TGT or null if none was found */ - public synchronized AuthenticationMethod getRealAuthenticationMethod() { - UserGroupInformation ugi = getRealUser(); - if (ugi == null) { - ugi = this; + private KerberosTicket getTGT() { + Set tickets = subject + .getPrivateCredentials(KerberosTicket.class); + for (KerberosTicket ticket : tickets) { + if (SecurityUtil.isOriginalTGT(ticket)) { + return ticket; + } } - return ugi.getAuthenticationMethod(); + return null; + } + + @InterfaceAudience.Private + @InterfaceStability.Unstable + public boolean shouldRelogin() { + return hasKerberosCredentials() && isHadoopLogin(); } + @InterfaceAudience.Private + @InterfaceStability.Unstable + @VisibleForTesting /** - * Returns the authentication method of a ugi. If the authentication method is - * PROXY, returns the authentication method of the real user. - * - * @param ugi - * @return AuthenticationMethod + * Spawn a thread to do periodic renewals of kerberos credentials from + * a ticket cache. NEVER directly call this method. + * @param force - used by tests to forcibly spawn thread */ - public static AuthenticationMethod getRealAuthenticationMethod( - UserGroupInformation ugi) { - AuthenticationMethod authMethod = ugi.getAuthenticationMethod(); - if (authMethod == AuthenticationMethod.PROXY) { - authMethod = ugi.getRealUser().getAuthenticationMethod(); + void spawnAutoRenewalThreadForUserCreds(boolean force) { + if (!force && (!shouldRelogin() || isFromKeytab())) { + return; } - return authMethod; + + //spawn thread only if we have kerb credentials + KerberosTicket tgt = getTGT(); + if (tgt == null) { + return; + } + String cmd = conf.get("hadoop.kerberos.kinit.command", "kinit"); + long nextRefresh = getRefreshTime(tgt); + Thread t = + new Thread(new AutoRenewalForUserCredsRunnable(tgt, cmd, nextRefresh)); + t.setDaemon(true); + t.setName("TGT Renewer for " + getUserName()); + t.start(); } /** - * Compare the subjects to see if they are equal to each other. + * Log the current user out who previously logged in using keytab. + * This method assumes that the user logged in by calling + * {@link #loginUserFromKeytab(String, String)}. + * + * @throws IOException + * @throws KerberosAuthException if a failure occurred in logout, + * or if the user did not log in by invoking loginUserFromKeyTab() before. */ - @Override - public boolean equals(Object o) { - if (o == this) { - return true; - } else if (o == null || getClass() != o.getClass()) { - return false; - } else { - return subject == ((UserGroupInformation) o).subject; + @InterfaceAudience.Public + @InterfaceStability.Evolving + public void logoutUserFromKeytab() throws IOException { + if (!hasKerberosCredentials()) { + return; + } + HadoopLoginContext login = getLogin(); + String keytabFile = getKeytab(); + if (login == null || keytabFile == null) { + throw new KerberosAuthException(MUST_FIRST_LOGIN_FROM_KEYTAB); + } + + try { + if (LOG.isDebugEnabled()) { + LOG.debug("Initiating logout for " + getUserName()); + } + // hadoop login context internally locks credentials. + login.logout(); + } catch (LoginException le) { + KerberosAuthException kae = new KerberosAuthException(LOGOUT_FAILURE, le); + kae.setUser(user.toString()); + kae.setKeytabFile(keytabFile); + throw kae; } + + LOG.info("Logout successful for user " + getUserName() + + " using keytab file " + keytabFile); } /** - * Return the hash of the subject. + * Re-login a user from keytab if TGT is expired or is close to expiry. + * + * @throws IOException + * @throws KerberosAuthException if it's a kerberos login exception. */ - @Override - public int hashCode() { - return System.identityHashCode(subject); + public void checkTGTAndReloginFromKeytab() throws IOException { + reloginFromKeytab(true); } - /** - * Get the underlying subject from this ugi. - * @return the subject that represents this user. - */ - protected Subject getSubject() { - return subject; + // if the first kerberos ticket is not TGT, then remove and destroy it since + // the kerberos library of jdk always use the first kerberos ticket as TGT. + // See HADOOP-13433 for more details. + @VisibleForTesting + void fixKerberosTicketOrder() { + Set creds = getSubject().getPrivateCredentials(); + synchronized (creds) { + for (Iterator iter = creds.iterator(); iter.hasNext();) { + Object cred = iter.next(); + if (cred instanceof KerberosTicket) { + KerberosTicket ticket = (KerberosTicket) cred; + if (ticket.isDestroyed() || ticket.getServer() == null) { + LOG.warn("Ticket is already destroyed, remove it."); + iter.remove(); + } else if (!ticket.getServer().getName().startsWith("krbtgt")) { + LOG.warn( + "The first kerberos ticket is not TGT" + + "(the server principal is {}), remove and destroy it.", + ticket.getServer()); + iter.remove(); + try { + ticket.destroy(); + } catch (DestroyFailedException e) { + LOG.warn("destroy ticket failed", e); + } + } else { + return; + } + } + } + } + LOG.warn("Warning, no kerberos ticket found while attempting to renew ticket"); } /** - * Run the given action as the user. - * @param the return type of the run method - * @param action the method to execute - * @return the value from the run method + * Re-Login a user in from a keytab file. Loads a user identity from a keytab + * file and logs them in. They become the currently logged-in user. This + * method assumes that {@link #loginUserFromKeytab(String, String)} had + * happened already. + * The Subject field of this UserGroupInformation object is updated to have + * the new credentials. + * @throws IOException + * @throws KerberosAuthException on a failure */ @InterfaceAudience.Public @InterfaceStability.Evolving - public T doAs(PrivilegedAction action) { - logPrivilegedAction(subject, action); - return Subject.doAs(subject, action); + public void reloginFromKeytab() throws IOException { + reloginFromKeytab(false); } - - /** - * Run the given action as the user, potentially throwing an exception. - * @param the return type of the run method - * @param action the method to execute - * @return the value from the run method - * @throws IOException if the action throws an IOException - * @throws Error if the action throws an Error - * @throws RuntimeException if the action throws a RuntimeException - * @throws InterruptedException if the action throws an InterruptedException - * @throws UndeclaredThrowableException if the action throws something else + + private void reloginFromKeytab(boolean checkTGT) throws IOException { + if (!shouldRelogin() || !isFromKeytab()) { + return; + } + HadoopLoginContext login = getLogin(); + if (login == null) { + throw new KerberosAuthException(MUST_FIRST_LOGIN_FROM_KEYTAB); + } + if (checkTGT) { + KerberosTicket tgt = getTGT(); + if (tgt != null && !shouldRenewImmediatelyForTests && + Time.now() < getRefreshTime(tgt)) { + return; + } + } + relogin(login); + } + + /** + * Create a user from a login name. It is intended to be used for remote + * users in RPC, since it won't have any credentials. + * @param user the full user principal name, must not be empty or null + * @return the UserGroupInformation for the remote user. */ @InterfaceAudience.Public @InterfaceStability.Evolving - public T doAs(PrivilegedExceptionAction action - ) throws IOException, InterruptedException { + public static UserGroupInformation createRemoteUser(String user) { + return createRemoteUser(user, AuthMethod.SIMPLE); + } + + /** + * Create a user from a login name. It is intended to be used for remote + * users in RPC, since it won't have any credentials. + * @param user the full user principal name, must not be empty or null + * @return the UserGroupInformation for the remote user. + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public static UserGroupInformation createRemoteUser(String user, AuthMethod authMethod) { + if (user == null || user.isEmpty()) { + throw new IllegalArgumentException("Null user"); + } + Subject subject = new Subject(); + subject.getPrincipals().add(new User(user)); + UserGroupInformation result = new UserGroupInformation(subject); + result.setAuthenticationMethod(authMethod); + return result; + } + + /** + * Re-Login a user in from the ticket cache. This + * method assumes that login had happened already. + * The Subject field of this UserGroupInformation object is updated to have + * the new credentials. + * @throws IOException + * @throws KerberosAuthException on a failure + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public void reloginFromTicketCache() throws IOException { + if (!shouldRelogin() || !isFromTicket()) { + return; + } + HadoopLoginContext login = getLogin(); + if (login == null) { + throw new KerberosAuthException(MUST_FIRST_LOGIN); + } + relogin(login); + }; + + private void relogin(HadoopLoginContext login) throws IOException { + // ensure the relogin is atomic to avoid leaving credentials in an + // inconsistent state. prevents other ugi instances, SASL, and SPNEGO + // from accessing or altering credentials during the relogin. + synchronized(login.getSubjectLock()) { + // another racing thread may have beat us to the relogin. + if (login == getLogin()) { + unprotectedRelogin(login); + } + } + } + + /** + * get RealUser (vs. EffectiveUser) + * @return realUser running over proxy user + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public UserGroupInformation getRealUser() { + for (RealUser p: subject.getPrincipals(RealUser.class)) { + return p.getRealUser(); + } + return null; + } + + private void unprotectedRelogin(HadoopLoginContext login) throws IOException { + assert Thread.holdsLock(login.getSubjectLock()); + long now = Time.now(); + if (!hasSufficientTimeElapsed(now)) { + return; + } + // register most recent relogin attempt + user.setLastLogin(now); try { - logPrivilegedAction(subject, action); - return Subject.doAs(subject, action); - } catch (PrivilegedActionException pae) { - Throwable cause = pae.getCause(); if (LOG.isDebugEnabled()) { - LOG.debug("PrivilegedActionException as:" + this + " cause:" + cause); + LOG.debug("Initiating logout for " + getUserName()); } - if (cause instanceof IOException) { - throw (IOException) cause; - } else if (cause instanceof Error) { - throw (Error) cause; - } else if (cause instanceof RuntimeException) { - throw (RuntimeException) cause; - } else if (cause instanceof InterruptedException) { - throw (InterruptedException) cause; - } else { - throw new UndeclaredThrowableException(cause); + //clear up the kerberos state. But the tokens are not cleared! As per + //the Java kerberos login module code, only the kerberos credentials + //are cleared + login.logout(); + //login and also update the subject field of this instance to + //have the new credentials (pass it to the LoginContext constructor) + login = newLoginContext( + login.getAppName(), login.getSubject(), login.getConfiguration()); + if (LOG.isDebugEnabled()) { + LOG.debug("Initiating re-login for " + getUserName()); } + login.login(); + // this should be unnecessary. originally added due to improper locking + // of the subject during relogin. + fixKerberosTicketOrder(); + setLogin(login); + } catch (LoginException le) { + KerberosAuthException kae = new KerberosAuthException(LOGIN_FAILURE, le); + kae.setUser(getUserName()); + throw kae; } } - private void logPrivilegedAction(Subject subject, Object action) { - if (LOG.isDebugEnabled()) { - // would be nice if action included a descriptive toString() - String where = new Throwable().getStackTrace()[2].toString(); - LOG.debug("PrivilegedAction as:"+this+" from:"+where); + /** + * Create a UGI for testing HDFS and MapReduce + * @param user the full user principal name + * @param userGroups the names of the groups that the user belongs to + * @return a fake user for running unit tests + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public static UserGroupInformation createUserForTesting(String user, + String[] userGroups) { + ensureInitialized(); + UserGroupInformation ugi = createRemoteUser(user); + // make sure that the testing object is setup + if (!(groups instanceof TestingGroups)) { + groups = new TestingGroups(groups); + } + // add the user groups + ((TestingGroups) groups).setUserGroups(ugi.getShortUserName(), userGroups); + return ugi; + } + + private boolean hasSufficientTimeElapsed(long now) { + if (!shouldRenewImmediatelyForTests && + now - user.getLastLogin() < kerberosMinSecondsBeforeRelogin ) { + LOG.warn("Not attempting to re-login since the last re-login was " + + "attempted less than " + (kerberosMinSecondsBeforeRelogin/1000) + + " seconds before. Last Login=" + user.getLastLogin()); + return false; } + return true; } - private void print() throws IOException { - System.out.println("User: " + getUserName()); - System.out.print("Group Ids: "); - System.out.println(); - String[] groups = getGroupNames(); - System.out.print("Groups: "); - for(int i=0; i < groups.length; i++) { - System.out.print(groups[i] + " "); + /** + * Get the user's login name. + * @return the user's name up to the first '/' or '@'. + */ + public String getShortUserName() { + return user.getShortName(); + } + + public String getPrimaryGroupName() throws IOException { + List groups = getGroups(); + if (groups.isEmpty()) { + throw new IOException("There is no primary group for UGI " + this); } - System.out.println(); + return groups.get(0); } /** - * A test method to print out the current user's UGI. - * @param args if there are two arguments, read the user from the keytab - * and print it out. - * @throws Exception + * Get the user's full principal name. + * @return the user's full principal name. */ - public static void main(String [] args) throws Exception { - System.out.println("Getting UGI for current user"); - UserGroupInformation ugi = getCurrentUser(); - ugi.print(); - System.out.println("UGI: " + ugi); - System.out.println("Auth method " + ugi.user.getAuthenticationMethod()); - System.out.println("Keytab " + ugi.isKeytab); - System.out.println("============================================================"); - - if (args.length == 2) { - System.out.println("Getting UGI from keytab...."); - loginUserFromKeytab(args[0], args[1]); - getCurrentUser().print(); - System.out.println("Keytab: " + ugi); -// System.out.println("Auth method " + loginUser.user.getAuthenticationMethod()); -// System.out.println("Keytab " + loginUser.isKeytab); + @InterfaceAudience.Public + @InterfaceStability.Evolving + public String getUserName() { + return user.getName(); + } + + /** + * Add a TokenIdentifier to this UGI. The TokenIdentifier has typically been + * authenticated by the RPC layer as belonging to the user represented by this + * UGI. + * + * @param tokenId + * tokenIdentifier to be added + * @return true on successful add of new tokenIdentifier + */ + public synchronized boolean addTokenIdentifier(TokenIdentifier tokenId) { + return subject.getPublicCredentials().add(tokenId); + } + + /** + * Get the set of TokenIdentifiers belonging to this UGI + * + * @return the set of TokenIdentifiers belonging to this UGI + */ + public synchronized Set getTokenIdentifiers() { + return subject.getPublicCredentials(TokenIdentifier.class); + } + + /** + * Add a token to this UGI + * + * @param token Token to be added + * @return true on successful add of new token + */ + public boolean addToken(Token token) { + return (token != null) ? addToken(token.getService(), token) : false; + } + + /** + * Add a named token to this UGI + * + * @param alias Name of the token + * @param token Token to be added + * @return true on successful add of new token + */ + public boolean addToken(Text alias, Token token) { + synchronized (subject) { + getCredentialsInternal().addToken(alias, token); + return true; + } + } + + /** + * Obtain the collection of tokens associated with this user. + * + * @return an unmodifiable collection of tokens associated with user + */ + public Collection> getTokens() { + synchronized (subject) { + return Collections.unmodifiableCollection( + new ArrayList>(getCredentialsInternal().getAllTokens())); + } + } + + /** + * Obtain the tokens in credentials form associated with this user. + * + * @return Credentials of tokens associated with this user + */ + public Credentials getCredentials() { + synchronized (subject) { + Credentials creds = new Credentials(getCredentialsInternal()); + Iterator> iter = creds.getAllTokens().iterator(); + while (iter.hasNext()) { + if (iter.next().isPrivate()) { + iter.remove(); + } + } + return creds; } } + /** + * Add the given Credentials to this user. + * @param credentials of tokens and secrets + */ + public void addCredentials(Credentials credentials) { + synchronized (subject) { + getCredentialsInternal().addAll(credentials); + } + } + + private synchronized Credentials getCredentialsInternal() { + final Credentials credentials; + final Set credentialsSet = + subject.getPrivateCredentials(Credentials.class); + if (!credentialsSet.isEmpty()){ + credentials = credentialsSet.iterator().next(); + } else { + credentials = new Credentials(); + subject.getPrivateCredentials().add(credentials); + } + return credentials; + } + + /** + * Get the group names for this user. {@link #getGroups()} is less + * expensive alternative when checking for a contained element. + * @return the list of users with the primary group first. If the command + * fails, it returns an empty list. + */ + public String[] getGroupNames() { + List groups = getGroups(); + return groups.toArray(new String[groups.size()]); + } + + /** + * Get the group names for this user. + * @return the list of users with the primary group first. If the command + * fails, it returns an empty list. + */ + public List getGroups() { + ensureInitialized(); + try { + return groups.getGroups(getShortUserName()); + } catch (IOException ie) { + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to get groups for user " + getShortUserName() + + " by " + ie); + LOG.trace("TRACE", ie); + } + return Collections.emptyList(); + } + } + + /** + * Return the username. + */ + @Override + public String toString() { + StringBuilder sb = new StringBuilder(getUserName()); + sb.append(" (auth:"+getAuthenticationMethod()+")"); + if (getRealUser() != null) { + sb.append(" via ").append(getRealUser().toString()); + } + return sb.toString(); + } + + /** + * Get the authentication method from the subject + * + * @return AuthenticationMethod in the subject, null if not present. + */ + public synchronized AuthenticationMethod getAuthenticationMethod() { + return user.getAuthenticationMethod(); + } + + /** + * Sets the authentication method in the subject + * + * @param authMethod + */ + public synchronized + void setAuthenticationMethod(AuthenticationMethod authMethod) { + user.setAuthenticationMethod(authMethod); + } + + /** + * Sets the authentication method in the subject + * + * @param authMethod + */ + public void setAuthenticationMethod(AuthMethod authMethod) { + user.setAuthenticationMethod(AuthenticationMethod.valueOf(authMethod)); + } + + /** + * Get the authentication method from the real user's subject. If there + * is no real user, return the given user's authentication method. + * + * @return AuthenticationMethod in the subject, null if not present. + */ + public synchronized AuthenticationMethod getRealAuthenticationMethod() { + UserGroupInformation ugi = getRealUser(); + if (ugi == null) { + ugi = this; + } + return ugi.getAuthenticationMethod(); + } + + /** + * Run the given action as the user, potentially throwing an exception. + * @param the return type of the run method + * @param action the method to execute + * @return the value from the run method + * @throws IOException if the action throws an IOException + * @throws Error if the action throws an Error + * @throws RuntimeException if the action throws a RuntimeException + * @throws InterruptedException if the action throws an InterruptedException + * @throws UndeclaredThrowableException if the action throws something else + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public T doAs(PrivilegedExceptionAction action + ) throws IOException, InterruptedException { + try { + logPrivilegedAction(subject, action); + return Subject.doAs(subject, action); + } catch (PrivilegedActionException pae) { + Throwable cause = pae.getCause(); + if (LOG.isDebugEnabled()) { + LOG.debug("PrivilegedActionException as:" + this + " cause:" + cause); + } + if (cause == null) { + throw new RuntimeException("PrivilegedActionException with no " + + "underlying cause. UGI [" + this + "]" +": " + pae, pae); + } else if (cause instanceof IOException) { + throw (IOException) cause; + } else if (cause instanceof Error) { + throw (Error) cause; + } else if (cause instanceof RuntimeException) { + throw (RuntimeException) cause; + } else if (cause instanceof InterruptedException) { + throw (InterruptedException) cause; + } else { + throw new UndeclaredThrowableException(cause); + } + } + } + + /** + * Compare the subjects to see if they are equal to each other. + */ + @Override + public boolean equals(Object o) { + if (o == this) { + return true; + } else if (o == null || getClass() != o.getClass()) { + return false; + } else { + return subject == ((UserGroupInformation) o).subject; + } + } + + /** + * Return the hash of the subject. + */ + @Override + public int hashCode() { + return System.identityHashCode(subject); + } + + /** + * Get the underlying subject from this ugi. + * @return the subject that represents this user. + */ + protected Subject getSubject() { + return subject; + } + + /** + * Run the given action as the user. + * @param the return type of the run method + * @param action the method to execute + * @return the value from the run method + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public T doAs(PrivilegedAction action) { + logPrivilegedAction(subject, action); + return Subject.doAs(subject, action); + } + + private void print() throws IOException { + System.out.println("User: " + getUserName()); + System.out.print("Group Ids: "); + System.out.println(); + String[] groups = getGroupNames(); + System.out.print("Groups: "); + for(int i=0; i < groups.length; i++) { + System.out.print(groups[i] + " "); + } + System.out.println(); + } + + private void logPrivilegedAction(Subject subject, Object action) { + if (LOG.isDebugEnabled()) { + // would be nice if action included a descriptive toString() + String where = new Throwable().getStackTrace()[2].toString(); + LOG.debug("PrivilegedAction as:"+this+" from:"+where); + } + } + +/** + * existing types of authentications' methods + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public enum AuthenticationMethod { + // currently we support only one auth per method, but eventually a + // subtype is needed to differentiate, ex. if digest is token or ldap + SIMPLE(AuthMethod.SIMPLE, + HadoopConfiguration.SIMPLE_CONFIG_NAME), + KERBEROS(AuthMethod.KERBEROS, + HadoopConfiguration.KERBEROS_CONFIG_NAME), + TOKEN(AuthMethod.TOKEN), + CERTIFICATE(null), + KERBEROS_SSL(null), + PROXY(null); + + private final AuthMethod authMethod; + private final String loginAppName; + + private AuthenticationMethod(AuthMethod authMethod) { + this(authMethod, null); + } + private AuthenticationMethod(AuthMethod authMethod, String loginAppName) { + this.authMethod = authMethod; + this.loginAppName = loginAppName; + } + + public AuthMethod getAuthMethod() { + return authMethod; + } + + public static AuthenticationMethod valueOf(AuthMethod authMethod) { + for (AuthenticationMethod value : values()) { + if (value.getAuthMethod() == authMethod) { + return value; + } + } + throw new IllegalArgumentException( + "no authentication method for " + authMethod); + } + + String getLoginAppName() { + if (loginAppName == null) { + throw new UnsupportedOperationException( + this + " login authentication is not supported"); + } + return loginAppName; + } + } + + // parameters associated with kerberos logins. may be extended to support + // additional authentication methods. + enum LoginParam { + PRINCIPAL, + KEYTAB, + CCACHE, + } + + /** + * UgiMetrics maintains UGI activity statistics + * and publishes them through the metrics interfaces. + */ + @Metrics(about="User and group related metrics", context="ugi") + static class UgiMetrics { + final MetricsRegistry registry = new MetricsRegistry("UgiMetrics"); + + @Metric("Rate of successful kerberos logins and latency (milliseconds)") + MutableRate loginSuccess; + @Metric("Rate of failed kerberos logins and latency (milliseconds)") + MutableRate loginFailure; + @Metric("GetGroups") MutableRate getGroups; + MutableQuantiles[] getGroupsQuantiles; + @Metric("Renewal failures since startup") + private MutableGaugeLong renewalFailuresTotal; + @Metric("Renewal failures since last successful login") + private MutableGaugeInt renewalFailures; + + static UgiMetrics create() { + return DefaultMetricsSystem.instance().register(new UgiMetrics()); + } + + static void reattach() { + metrics = UgiMetrics.create(); + } + + void addGetGroups(long latency) { + getGroups.add(latency); + if (getGroupsQuantiles != null) { + for (MutableQuantiles q : getGroupsQuantiles) { + q.add(latency); + } + } + } + + MutableGaugeInt getRenewalFailures() { + return renewalFailures; + } + } + + /** + * A login module that looks at the Kerberos, Unix, or Windows principal and + * adds the corresponding UserName. + */ + @InterfaceAudience.Private + public static class HadoopLoginModule implements LoginModule { + private Subject subject; + + @Override + public boolean abort() throws LoginException { + return true; + } + + private T getCanonicalUser(Class cls) { + for(T user: subject.getPrincipals(cls)) { + return user; + } + return null; + } + + @Override + public boolean commit() throws LoginException { + if (LOG.isDebugEnabled()) { + LOG.debug("hadoop login commit"); + } + // if we already have a user, we are done. + if (!subject.getPrincipals(User.class).isEmpty()) { + if (LOG.isDebugEnabled()) { + LOG.debug("using existing subject:"+subject.getPrincipals()); + } + return true; + } + Principal user = getCanonicalUser(KerberosPrincipal.class); + if (user != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("using kerberos user:"+user); + } + } + //If we don't have a kerberos user and security is disabled, check + //if user is specified in the environment or properties + if (!isSecurityEnabled() && (user == null)) { + String envUser = System.getenv(HADOOP_USER_NAME); + if (envUser == null) { + envUser = System.getProperty(HADOOP_USER_NAME); + } + user = envUser == null ? null : new User(envUser); + } + // use the OS user + if (user == null) { + user = getCanonicalUser(OS_PRINCIPAL_CLASS); + if (LOG.isDebugEnabled()) { + LOG.debug("using local user:"+user); + } + } + // if we found the user, add our principal + if (user != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("Using user: \"" + user + "\" with name " + user.getName()); + } + + User userEntry = null; + try { + // LoginContext will be attached later unless it's an external + // subject. + AuthenticationMethod authMethod = (user instanceof KerberosPrincipal) + ? AuthenticationMethod.KERBEROS : AuthenticationMethod.SIMPLE; + userEntry = new User(user.getName(), authMethod, null); + } catch (Exception e) { + throw (LoginException)(new LoginException(e.toString()).initCause(e)); + } + if (LOG.isDebugEnabled()) { + LOG.debug("User entry: \"" + userEntry.toString() + "\"" ); + } + + subject.getPrincipals().add(userEntry); + return true; + } + LOG.error("Can't find user in " + subject); + throw new LoginException("Can't find user name"); + } + + @Override + public void initialize(Subject subject, CallbackHandler callbackHandler, + Map sharedState, Map options) { + this.subject = subject; + } + + @Override + public boolean login() throws LoginException { + if (LOG.isDebugEnabled()) { + LOG.debug("hadoop login"); + } + return true; + } + + @Override + public boolean logout() throws LoginException { + if (LOG.isDebugEnabled()) { + LOG.debug("hadoop logout"); + } + return true; + } + } + + private static class RealUser implements Principal { + private final UserGroupInformation realUser; + + RealUser(UserGroupInformation realUser) { + this.realUser = realUser; + } + + @Override + public String getName() { + return realUser.getUserName(); + } + + public UserGroupInformation getRealUser() { + return realUser; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } else if (o == null || getClass() != o.getClass()) { + return false; + } else { + return realUser.equals(((RealUser) o).realUser); + } + } + + @Override + public int hashCode() { + return realUser.hashCode(); + } + + @Override + public String toString() { + return realUser.toString(); + } + } + + /** + * This class is used for storing the groups for testing. It stores a local + * map that has the translation of usernames to groups. + */ + private static class TestingGroups extends Groups { + private final Map> userToGroupsMapping = + new HashMap>(); + private Groups underlyingImplementation; + + private TestingGroups(Groups underlyingImplementation) { + super(new Configuration()); + this.underlyingImplementation = underlyingImplementation; + } + + @Override + public List getGroups(String user) throws IOException { + List result = userToGroupsMapping.get(user); + + if (result == null) { + result = underlyingImplementation.getGroups(user); + } + + return result; + } + + private void setUserGroups(String user, String[] groups) { + userToGroupsMapping.put(user, Arrays.asList(groups)); + } + } + + // explicitly private to prevent external tampering. + private static class LoginParams extends EnumMap + implements Parameters { + LoginParams() { + super(LoginParam.class); + } + + static LoginParams getDefaults() { + LoginParams params = new LoginParams(); + params.put(LoginParam.PRINCIPAL, System.getenv("KRB5PRINCIPAL")); + params.put(LoginParam.KEYTAB, System.getenv("KRB5KEYTAB")); + params.put(LoginParam.CCACHE, System.getenv("KRB5CCNAME")); + return params; + } + + // do not add null values, nor allow existing values to be overriden. + @Override + public String put(LoginParam param, String val) { + boolean add = val != null && !containsKey(param); + return add ? super.put(param, val) : null; + } + } + + // wrapper to allow access to fields necessary to recreate the same login + // context for relogin. explicitly private to prevent external tampering. + private static class HadoopLoginContext extends LoginContext { + private final String appName; + private final HadoopConfiguration conf; + + HadoopLoginContext(String appName, Subject subject, + HadoopConfiguration conf) throws LoginException { + super(appName, subject, null, conf); + this.appName = appName; + this.conf = conf; + } + + String getAppName() { + return appName; + } + + HadoopConfiguration getConfiguration() { + return conf; + } + + // the locking model for logins cannot rely on ugi instance synchronization + // since a subject will be referenced by multiple ugi instances. + Object getSubjectLock() { + Subject subject = getSubject(); + // if subject is null, the login context will create the subject + // so just lock on this context. + return (subject == null) ? this : subject.getPrivateCredentials(); + } + + @Override + public void login() throws LoginException { + synchronized(getSubjectLock()) { + MutableRate metric = metrics.loginFailure; + long start = Time.monotonicNow(); + try { + super.login(); + metric = metrics.loginSuccess; + } finally { + metric.add(Time.monotonicNow() - start); + } + } + } + + @Override + public void logout() throws LoginException { + synchronized(getSubjectLock()) { + if (this.getSubject() != null + && !this.getSubject().getPrivateCredentials().isEmpty()) { + super.logout(); + } + } + } + } + + /** + * A JAAS configuration that defines the login modules that we want + * to use for login. + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + private static class HadoopConfiguration + extends javax.security.auth.login.Configuration { + static final String KRB5_LOGIN_MODULE = + KerberosUtil.getKrb5LoginModuleName(); + static final String SIMPLE_CONFIG_NAME = "hadoop-simple"; + static final String KERBEROS_CONFIG_NAME = "hadoop-kerberos"; + + private static final Map BASIC_JAAS_OPTIONS = + new HashMap(); + static final AppConfigurationEntry OS_SPECIFIC_LOGIN = + new AppConfigurationEntry( + OS_LOGIN_MODULE_NAME, + LoginModuleControlFlag.REQUIRED, + BASIC_JAAS_OPTIONS); + static final AppConfigurationEntry HADOOP_LOGIN = + new AppConfigurationEntry( + HadoopLoginModule.class.getName(), + LoginModuleControlFlag.REQUIRED, + BASIC_JAAS_OPTIONS); + + static { + if ("true".equalsIgnoreCase(System.getenv("HADOOP_JAAS_DEBUG"))) { + BASIC_JAAS_OPTIONS.put("debug", "true"); + } + } + + private final LoginParams params; + + HadoopConfiguration(LoginParams params) { + this.params = params; + } + + private static String prependFileAuthority(String keytabPath) { + return keytabPath.startsWith("file://") + ? keytabPath + : "file://" + keytabPath; + } + + @Override + public LoginParams getParameters() { + return params; + } + + @Override + public AppConfigurationEntry[] getAppConfigurationEntry(String appName) { + ArrayList entries = new ArrayList<>(); + // login of external subject passes no params. technically only + // existing credentials should be used but other components expect + // the login to succeed with local user fallback if no principal. + if (params == null || appName.equals(SIMPLE_CONFIG_NAME)) { + entries.add(OS_SPECIFIC_LOGIN); + } else if (appName.equals(KERBEROS_CONFIG_NAME)) { + // existing semantics are the initial default login allows local user + // fallback. this is not allowed when a principal explicitly + // specified or during a relogin. + if (!params.containsKey(LoginParam.PRINCIPAL)) { + entries.add(OS_SPECIFIC_LOGIN); + } + entries.add(getKerberosEntry()); + } + entries.add(HADOOP_LOGIN); + return entries.toArray(new AppConfigurationEntry[0]); + } + + private AppConfigurationEntry getKerberosEntry() { + final Map options = new HashMap<>(BASIC_JAAS_OPTIONS); + LoginModuleControlFlag controlFlag = LoginModuleControlFlag.OPTIONAL; + // kerberos login is mandatory if principal is specified. principal + // will not be set for initial default login, but will always be set + // for relogins. + final String principal = params.get(LoginParam.PRINCIPAL); + if (principal != null) { + options.put("principal", principal); + controlFlag = LoginModuleControlFlag.REQUIRED; + } + + // use keytab if given else fallback to ticket cache. + if (IBM_JAVA) { + if (params.containsKey(LoginParam.KEYTAB)) { + final String keytab = params.get(LoginParam.KEYTAB); + if (keytab != null) { + options.put("useKeytab", prependFileAuthority(keytab)); + } else { + options.put("useDefaultKeytab", "true"); + } + options.put("credsType", "both"); + } else { + String ticketCache = params.get(LoginParam.CCACHE); + if (ticketCache != null) { + options.put("useCcache", prependFileAuthority(ticketCache)); + } else { + options.put("useDefaultCcache", "true"); + } + options.put("renewTGT", "true"); + } + } else { + if (params.containsKey(LoginParam.KEYTAB)) { + options.put("useKeyTab", "true"); + final String keytab = params.get(LoginParam.KEYTAB); + if (keytab != null) { + options.put("keyTab", keytab); + } + options.put("storeKey", "true"); + } else { + options.put("useTicketCache", "true"); + String ticketCache = params.get(LoginParam.CCACHE); + if (ticketCache != null) { + options.put("ticketCache", ticketCache); + } + options.put("renewTGT", "true"); + } + options.put("doNotPrompt", "true"); + } + options.put("refreshKrb5Config", "true"); + + return new AppConfigurationEntry( + KRB5_LOGIN_MODULE, controlFlag, options); + } + } + + @VisibleForTesting + class AutoRenewalForUserCredsRunnable implements Runnable { + private KerberosTicket tgt; + private RetryPolicy rp; + private String kinitCmd; + private long nextRefresh; + private boolean runRenewalLoop = true; + + AutoRenewalForUserCredsRunnable(KerberosTicket tgt, String kinitCmd, + long nextRefresh){ + this.tgt = tgt; + this.kinitCmd = kinitCmd; + this.nextRefresh = nextRefresh; + this.rp = null; + } + + public void setRunRenewalLoop(boolean runRenewalLoop) { + this.runRenewalLoop = runRenewalLoop; + } + + @Override + public void run() { + do { + try { + long now = Time.now(); + if (LOG.isDebugEnabled()) { + LOG.debug("Current time is " + now); + LOG.debug("Next refresh is " + nextRefresh); + } + if (now < nextRefresh) { + Thread.sleep(nextRefresh - now); + } + String output = Shell.execCommand(kinitCmd, "-R"); + if (LOG.isDebugEnabled()) { + LOG.debug("Renewed ticket. kinit output: {}", output); + } + reloginFromTicketCache(); + tgt = getTGT(); + if (tgt == null) { + LOG.warn("No TGT after renewal. Aborting renew thread for " + + getUserName()); + return; + } + nextRefresh = Math.max(getRefreshTime(tgt), + now + kerberosMinSecondsBeforeRelogin); + metrics.renewalFailures.set(0); + rp = null; + } catch (InterruptedException ie) { + LOG.warn("Terminating renewal thread"); + return; + } catch (IOException ie) { + metrics.renewalFailuresTotal.incr(); + final long now = Time.now(); + + if (tgt.isDestroyed()) { + LOG.error("TGT is destroyed. Aborting renew thread for {}.", + getUserName()); + return; + } + + long tgtEndTime; + // As described in HADOOP-15593 we need to handle the case when + // tgt.getEndTime() throws NPE because of JDK issue JDK-8147772 + // NPE is only possible if this issue is not fixed in the JDK + // currently used + try { + tgtEndTime = tgt.getEndTime().getTime(); + } catch (NullPointerException npe) { + LOG.error("NPE thrown while getting KerberosTicket endTime. " + + "Aborting renew thread for {}.", getUserName()); + return; + } + + LOG.warn("Exception encountered while running the renewal " + + "command for {}. (TGT end time:{}, renewalFailures: {}," + + "renewalFailuresTotal: {})", getUserName(), tgtEndTime, + metrics.renewalFailures.value(), + metrics.renewalFailuresTotal.value(), ie); + if (rp == null) { + // Use a dummy maxRetries to create the policy. The policy will + // only be used to get next retry time with exponential back-off. + // The final retry time will be later limited within the + // tgt endTime in getNextTgtRenewalTime. + rp = RetryPolicies.exponentialBackoffRetry(Long.SIZE - 2, + kerberosMinSecondsBeforeRelogin, TimeUnit.MILLISECONDS); + } + try { + nextRefresh = getNextTgtRenewalTime(tgtEndTime, now, rp); + } catch (Exception e) { + LOG.error("Exception when calculating next tgt renewal time", e); + return; + } + metrics.renewalFailures.incr(); + // retry until close enough to tgt endTime. + if (now > nextRefresh) { + LOG.error("TGT is expired. Aborting renew thread for {}.", + getUserName()); + return; + } + } + } while (runRenewalLoop); + } + } } diff --git a/taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark320-core/src/main/scala/org/apache/spark/deploy/yarn/DtClient.scala b/taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark320-core/src/main/scala/org/apache/spark/deploy/yarn/DtClient.scala new file mode 100644 index 0000000000..f0e8367a01 --- /dev/null +++ b/taier-worker/taier-worker-plugin/spark/yarn-hdfs-spark320-core/src/main/scala/org/apache/spark/deploy/yarn/DtClient.scala @@ -0,0 +1,1646 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.yarn + +import com.dtstack.taier.spark.common.constant.ConfigConstant +import com.dtstack.taier.sparkyarn.sparkyarn.constant.SparkConstants +import com.google.common.base.Objects +import com.google.common.io.Files +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs._ +import org.apache.hadoop.fs.permission.FsPermission +import org.apache.hadoop.io.Text +import org.apache.hadoop.mapreduce.MRJobConfig +import org.apache.hadoop.security.UserGroupInformation +import org.apache.hadoop.util.StringUtils +import org.apache.hadoop.yarn.api.ApplicationConstants.Environment +import org.apache.hadoop.yarn.api._ +import org.apache.hadoop.yarn.api.protocolrecords._ +import org.apache.hadoop.yarn.api.records._ +import org.apache.hadoop.yarn.client.api.{YarnClient, YarnClientApplication} +import org.apache.hadoop.yarn.conf.YarnConfiguration +import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException +import org.apache.hadoop.yarn.security.AMRMTokenIdentifier +import org.apache.hadoop.yarn.util.Records +import org.apache.spark.api.python.PythonUtils +import org.apache.spark.deploy.security.HadoopDelegationTokenManager +import org.apache.spark.deploy.yarn.DtConfig.{KRB5FILENAME, KRB5_CONF} +import org.apache.spark.deploy.yarn.ResourceRequestHelper._ +import org.apache.spark.deploy.yarn.config._ +import org.apache.spark.deploy.{SparkApplication, SparkHadoopUtil} +import org.apache.spark.internal.Logging +import org.apache.spark.internal.config.Python._ +import org.apache.spark.internal.config._ +import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle, YarnCommandBuilderUtils} +import org.apache.spark.resource.ResourceProfile +import org.apache.spark.util.{CallerContext, Utils, YarnContainerInfoHelper} +import org.apache.spark.{SecurityManager, SparkConf, SparkException} + +import java.io.{FileSystem => _, _} +import java.net.{InetAddress, URI, UnknownHostException} +import java.nio.ByteBuffer +import java.nio.charset.{Charset, StandardCharsets} +import java.util.zip.{ZipEntry, ZipOutputStream} +import java.util.{Locale, Properties, UUID} +import scala.collection.JavaConverters._ +import scala.collection.immutable.{Map => IMap} +import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, ListBuffer, Map} +import scala.util.control.NonFatal + +private[spark] class DtClient( + val args: ClientArguments, + val hadoopConfstr: Configuration, + val sparkConf: SparkConf, + val yarnClient: YarnClient) + extends Logging { + + import DtClient._ + import YarnSparkHadoopUtil._ + + // Executor offHeap memory in MiB. + protected val executorOffHeapMemory = Utils.executorOffHeapMemorySizeAsMb(sparkConf) +// private val hadoopConf = new YarnConfiguration(SparkHadoopUtil.newConfiguration(sparkConf)) + private val hadoopConf = new YarnConfiguration(hadoopConfstr) + private val isClusterMode = sparkConf.get(SUBMIT_DEPLOY_MODE) == "cluster" + private val isClientUnmanagedAMEnabled = sparkConf.get(YARN_UNMANAGED_AM) && !isClusterMode + // AM related configurations + private val amMemory = if (isClusterMode) { + sparkConf.get(DRIVER_MEMORY).toInt + } else { + sparkConf.get(AM_MEMORY).toInt + } + private val amMemoryOverhead = { + val amMemoryOverheadEntry = if (isClusterMode) DRIVER_MEMORY_OVERHEAD else AM_MEMORY_OVERHEAD + sparkConf.get(amMemoryOverheadEntry).getOrElse( + math.max((MEMORY_OVERHEAD_FACTOR * amMemory).toLong, + ResourceProfile.MEMORY_OVERHEAD_MIN_MIB)).toInt + } + private val amCores = if (isClusterMode) { + sparkConf.get(DRIVER_CORES) + } else { + sparkConf.get(AM_CORES) + } + // Executor related configurations + private val executorMemory = sparkConf.get(EXECUTOR_MEMORY) + private val executorMemoryOverhead = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD).getOrElse( + math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toLong, + ResourceProfile.MEMORY_OVERHEAD_MIN_MIB)).toInt + private val isPython = sparkConf.get(IS_PYTHON_APP) + private val pysparkWorkerMemory: Int = if (isPython) { + sparkConf.get(PYSPARK_EXECUTOR_MEMORY).map(_.toInt).getOrElse(0) + } else { + 0 + } + private val distCacheMgr = new ClientDistributedCacheManager() + private val cachedResourcesConf = new SparkConf(false) + private val krb5 = sparkConf.get(KRB5_CONF).orNull + private val krb5FileName = KRB5FILENAME + private val keytab = sparkConf.get(KEYTAB).orNull + private val amKeytabFileName: Option[String] = if (keytab != null && isClusterMode) { + val principal = sparkConf.get(PRINCIPAL).orNull + require((principal == null) == (keytab == null), + "Both principal and keytab must be defined, or neither.") + logInfo(s"Kerberos credentials: principal = $principal, keytab = $keytab") + // Generate a file name that can be used for the keytab file, that does not conflict + // with any user file. + Some(new File(keytab).getName() + "-" + UUID.randomUUID().toString) + } else { + None + } + private val launcherBackend = new LauncherBackend() { + override protected def conf: SparkConf = sparkConf + + override def onStopRequest(): Unit = { + if (isClusterMode && appId != null) { + yarnClient.killApplication(appId) + } else { + setState(SparkAppHandle.State.KILLED) + stop() + } + } + } + private val fireAndForget = isClusterMode && !sparkConf.get(WAIT_FOR_APP_COMPLETION) + + require(keytab == null || !Utils.isLocalUri(keytab), "Keytab should reference a local file.") + private var appMaster: ApplicationMaster = _ + private var stagingDirPath: Path = _ + private var appId: ApplicationId = null + + def stop(): Unit = { + if (appMaster != null) { + appMaster.stopUnmanaged(stagingDirPath) + } + launcherBackend.close() + yarnClient.stop() + } + + /** + * Submit an application to the ResourceManager. + * If set spark.yarn.submit.waitAppCompletion to true, it will stay alive + * reporting the application's status until the application has exited for any reason. + * Otherwise, the client process will exit after submission. + * If the application finishes with a failed, killed, or undefined status, + * throw an appropriate SparkException. + */ + def run(): Unit = { + this.appId = submitApplication() + if (!launcherBackend.isConnected() && fireAndForget) { + val report = getApplicationReport(appId) + val state = report.getYarnApplicationState + logInfo(s"Application report for $appId (state: $state)") + logInfo(formatReportDetails(report, getDriverLogsLink(report))) + if (state == YarnApplicationState.FAILED || state == YarnApplicationState.KILLED) { + throw new SparkException(s"Application $appId finished with status: $state") + } + } else { + val YarnAppReport(appState, finalState, diags) = monitorApplication(appId) + if (appState == YarnApplicationState.FAILED || finalState == FinalApplicationStatus.FAILED) { + diags.foreach { err => + logError(s"Application diagnostics message: $err") + } + throw new SparkException(s"Application $appId finished with failed status") + } + if (appState == YarnApplicationState.KILLED || finalState == FinalApplicationStatus.KILLED) { + throw new SparkException(s"Application $appId is killed") + } + if (finalState == FinalApplicationStatus.UNDEFINED) { + throw new SparkException(s"The final status of application $appId is undefined") + } + } + } + + /** + * Submit an application running our ApplicationMaster to the ResourceManager. + * + * The stable Yarn API provides a convenience method (YarnClient#createApplication) for + * creating applications and setting up the application submission context. This was not + * available in the alpha API. + */ + def submitApplication(priority : Int = 0): ApplicationId = { + logInfo("submit action") + ResourceRequestHelper.validateResources(sparkConf) + + var appId: ApplicationId = null + try { + launcherBackend.connect() + + logInfo("Requesting a new application from cluster with %d NodeManagers" + .format(yarnClient.getYarnClusterMetrics.getNumNodeManagers)) + + // Get a new application from our RM + val newApp = yarnClient.createApplication() + val newAppResponse = newApp.getNewApplicationResponse() + appId = newAppResponse.getApplicationId() + + // The app staging dir based on the STAGING_DIR configuration if configured + // otherwise based on the users home directory. + // scalastyle:off FileSystemGet + val appStagingBaseDir = sparkConf.get(STAGING_DIR) + .map { new Path(_, UserGroupInformation.getCurrentUser.getShortUserName) } + .getOrElse(FileSystem.get(hadoopConf).getHomeDirectory()) + stagingDirPath = new Path(appStagingBaseDir, getAppStagingDir(appId)) + // scalastyle:on FileSystemGet + + new CallerContext("CLIENT", sparkConf.get(APP_CALLER_CONTEXT), + Option(appId.toString)).setCurrentContext() + + // Verify whether the cluster has enough resources for our AM + verifyClusterResources(newAppResponse) + + // Set up the appropriate contexts to launch our AM + val containerContext = createContainerLaunchContext(newAppResponse) + val appContext = createApplicationSubmissionContext(newApp, containerContext) + + // Finally, submit and monitor the application + logInfo(s"Submitting application $appId to ResourceManager") + yarnClient.submitApplication(appContext) + launcherBackend.setAppId(appId.toString) + reportLauncherState(SparkAppHandle.State.SUBMITTED) + + appId + } catch { + case e: Throwable => + if (stagingDirPath != null) { + cleanupStagingDir() + } + throw e + } + } + + /** + * Set up the context for submitting our ApplicationMaster. + * This uses the YarnClientApplication not available in the Yarn alpha API. + */ + def createApplicationSubmissionContext( + newApp: YarnClientApplication, + containerContext: ContainerLaunchContext): ApplicationSubmissionContext = { + + val componentName = if (isClusterMode) { + config.YARN_DRIVER_RESOURCE_TYPES_PREFIX + } else { + config.YARN_AM_RESOURCE_TYPES_PREFIX + } + val yarnAMResources = getYarnResourcesAndAmounts(sparkConf, componentName) + val amResources = yarnAMResources ++ + getYarnResourcesFromSparkResources(SPARK_DRIVER_PREFIX, sparkConf) + logDebug(s"AM resources: $amResources") + val appContext = newApp.getApplicationSubmissionContext + appContext.setApplicationName(sparkConf.get("spark.app.name", "Spark")) + appContext.setQueue(sparkConf.get(QUEUE_NAME)) + appContext.setAMContainerSpec(containerContext) + appContext.setApplicationType(sparkConf.get(APPLICATION_TYPE)) + + sparkConf.get(APPLICATION_TAGS).foreach { tags => + appContext.setApplicationTags(new java.util.HashSet[String](tags.asJava)) + } + sparkConf.get(MAX_APP_ATTEMPTS) match { + case Some(v) => appContext.setMaxAppAttempts(v) + case None => logDebug(s"${MAX_APP_ATTEMPTS.key} is not set. " + + "Cluster's default value will be used.") + } + + sparkConf.get(AM_ATTEMPT_FAILURE_VALIDITY_INTERVAL_MS).foreach { interval => + appContext.setAttemptFailuresValidityInterval(interval) + } + + val capability = Records.newRecord(classOf[Resource]) + capability.setMemory(amMemory + amMemoryOverhead) + capability.setVirtualCores(amCores) + if (amResources.nonEmpty) { + ResourceRequestHelper.setResourceRequests(amResources, capability) + } + logDebug(s"Created resource capability for AM request: $capability") + + sparkConf.get(AM_NODE_LABEL_EXPRESSION) match { + case Some(expr) => + val amRequest = Records.newRecord(classOf[ResourceRequest]) + amRequest.setResourceName(ResourceRequest.ANY) + amRequest.setPriority(Priority.newInstance(0)) + amRequest.setCapability(capability) + amRequest.setNumContainers(1) + amRequest.setNodeLabelExpression(expr) + appContext.setAMContainerResourceRequest(amRequest) + case None => + appContext.setResource(capability) + } + + sparkConf.get(ROLLED_LOG_INCLUDE_PATTERN).foreach { includePattern => + try { + val logAggregationContext = Records.newRecord(classOf[LogAggregationContext]) + logAggregationContext.setRolledLogsIncludePattern(includePattern) + sparkConf.get(ROLLED_LOG_EXCLUDE_PATTERN).foreach { excludePattern => + logAggregationContext.setRolledLogsExcludePattern(excludePattern) + } + appContext.setLogAggregationContext(logAggregationContext) + } catch { + case NonFatal(e) => + logWarning(s"Ignoring ${ROLLED_LOG_INCLUDE_PATTERN.key} because the version of YARN " + + "does not support it", e) + } + } + appContext.setUnmanagedAM(isClientUnmanagedAMEnabled) + + sparkConf.get(APPLICATION_PRIORITY).foreach { appPriority => + appContext.setPriority(Priority.newInstance(appPriority)) + } + appContext + } + + /** + * Fail fast if we have requested more resources per container than is available in the cluster. + */ + private def verifyClusterResources(newAppResponse: GetNewApplicationResponse): Unit = { + val maxMem = newAppResponse.getMaximumResourceCapability().getMemory() + logInfo("Verifying our application has not requested more than the maximum " + + s"memory capability of the cluster ($maxMem MB per container)") + val executorMem = + executorMemory + executorOffHeapMemory + executorMemoryOverhead + pysparkWorkerMemory + if (executorMem > maxMem) { + throw new IllegalArgumentException(s"Required executor memory ($executorMemory MB), " + + s"offHeap memory ($executorOffHeapMemory) MB, overhead ($executorMemoryOverhead MB), " + + s"and PySpark memory ($pysparkWorkerMemory MB) is above the max threshold ($maxMem MB) " + + "of this cluster! Please check the values of 'yarn.scheduler.maximum-allocation-mb' " + + "and/or 'yarn.nodemanager.resource.memory-mb'.") + } + val amMem = amMemory + amMemoryOverhead + if (amMem > maxMem) { + throw new IllegalArgumentException(s"Required AM memory ($amMemory" + + s"+$amMemoryOverhead MB) is above the max threshold ($maxMem MB) of this cluster! " + + "Please check the values of 'yarn.scheduler.maximum-allocation-mb' and/or " + + "'yarn.nodemanager.resource.memory-mb'.") + } + logInfo("Will allocate AM container, with %d MB memory including %d MB overhead".format( + amMem, + amMemoryOverhead)) + + // We could add checks to make sure the entire cluster has enough resources but that involves + // getting all the node reports and computing ourselves. + } + + /** + * Set up a ContainerLaunchContext to launch our ApplicationMaster container. + * This sets up the launch environment, java options, and the command for launching the AM. + */ + private def createContainerLaunchContext(newAppResponse: GetNewApplicationResponse) + : ContainerLaunchContext = { + logInfo("Setting up container launch context for our AM") + val appId = newAppResponse.getApplicationId + val pySparkArchives = + if (sparkConf.get(IS_PYTHON_APP)) { + findPySparkArchives() + } else { + Nil + } + + val launchEnv = setupLaunchEnv(stagingDirPath, pySparkArchives) + val localResources = prepareLocalResources(stagingDirPath, pySparkArchives) + + val amContainer = Records.newRecord(classOf[ContainerLaunchContext]) + amContainer.setLocalResources(localResources.asJava) + amContainer.setEnvironment(launchEnv.asJava) + + val javaOpts = ListBuffer[String]() + + // Set the environment variable through a command prefix + // to append to the existing value of the variable + var prefixEnv: Option[String] = None + + // Add Xmx for AM memory + javaOpts += "-Xmx" + amMemory + "m" + + val tmpDir = new Path(Environment.PWD.$$(), YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR) + javaOpts += "-Djava.io.tmpdir=" + tmpDir + + // TODO: Remove once cpuset version is pushed out. + // The context is, default gc for server class machines ends up using all cores to do gc - + // hence if there are multiple containers in same node, Spark GC affects all other containers' + // performance (which can be that of other Spark containers) + // Instead of using this, rely on cpusets by YARN to enforce "proper" Spark behavior in + // multi-tenant environments. Not sure how default Java GC behaves if it is limited to subset + // of cores on a node. + val useConcurrentAndIncrementalGC = launchEnv.get("SPARK_USE_CONC_INCR_GC").exists(_.toBoolean) + if (useConcurrentAndIncrementalGC) { + // In our expts, using (default) throughput collector has severe perf ramifications in + // multi-tenant machines + javaOpts += "-XX:+UseConcMarkSweepGC" + javaOpts += "-XX:MaxTenuringThreshold=31" + javaOpts += "-XX:SurvivorRatio=8" + javaOpts += "-XX:+CMSIncrementalMode" + javaOpts += "-XX:+CMSIncrementalPacing" + javaOpts += "-XX:CMSIncrementalDutyCycleMin=0" + javaOpts += "-XX:CMSIncrementalDutyCycle=10" + } + + // Include driver-specific java options if we are launching a driver + if (isClusterMode) { + sparkConf.get(DRIVER_JAVA_OPTIONS).foreach { opts => + javaOpts ++= Utils.splitCommandString(opts) + .map(Utils.substituteAppId(_, appId.toString)) + .map(YarnSparkHadoopUtil.escapeForShell) + } + val libraryPaths = Seq(sparkConf.get(DRIVER_LIBRARY_PATH), + sys.props.get("spark.driver.libraryPath")).flatten + if (libraryPaths.nonEmpty) { + prefixEnv = Some(createLibraryPathPrefix(libraryPaths.mkString(File.pathSeparator), + sparkConf)) + } + if (sparkConf.get(AM_JAVA_OPTIONS).isDefined) { + logWarning(s"${AM_JAVA_OPTIONS.key} will not take effect in cluster mode") + } + } else { + // Validate and include yarn am specific java options in yarn-client mode. + sparkConf.get(AM_JAVA_OPTIONS).foreach { opts => + if (opts.contains("-Dspark")) { + val msg = s"${AM_JAVA_OPTIONS.key} is not allowed to set Spark options (was '$opts')." + throw new SparkException(msg) + } + if (opts.contains("-Xmx")) { + val msg = s"${AM_JAVA_OPTIONS.key} is not allowed to specify max heap memory settings " + + s"(was '$opts'). Use spark.yarn.am.memory instead." + throw new SparkException(msg) + } + javaOpts ++= Utils.splitCommandString(opts) + .map(Utils.substituteAppId(_, appId.toString)) + .map(YarnSparkHadoopUtil.escapeForShell) + } + sparkConf.get(AM_LIBRARY_PATH).foreach { paths => + prefixEnv = Some(createLibraryPathPrefix(paths, sparkConf)) + } + } + + // For log4j configuration to reference + javaOpts += ("-Dspark.yarn.app.container.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR) + + val userClass = + if (isClusterMode) { + Seq("--class", YarnSparkHadoopUtil.escapeForShell(args.userClass)) + } else { + Nil + } + val userJar = + if (args.userJar != null) { + Seq("--jar", args.userJar) + } else { + Nil + } + val primaryPyFile = + if (isClusterMode && args.primaryPyFile != null) { + Seq("--primary-py-file", new Path(args.primaryPyFile).getName()) + } else { + Nil + } + val primaryRFile = + if (args.primaryRFile != null) { + Seq("--primary-r-file", args.primaryRFile) + } else { + Nil + } + val amClass = + if (isClusterMode) { + Utils.classForName("org.apache.spark.deploy.yarn.ApplicationMaster").getName + } else { + Utils.classForName("org.apache.spark.deploy.yarn.ExecutorLauncher").getName + } + if (args.primaryRFile != null && + (args.primaryRFile.endsWith(".R") || args.primaryRFile.endsWith(".r"))) { + args.userArgs = ArrayBuffer(args.primaryRFile) ++ args.userArgs + } + val userArgs = args.userArgs.flatMap { arg => + Seq("--arg", YarnSparkHadoopUtil.escapeForShell(arg)) + } + val amArgs = + Seq(amClass) ++ userClass ++ userJar ++ primaryPyFile ++ primaryRFile ++ userArgs ++ + Seq("--properties-file", + buildPath(Environment.PWD.$$(), LOCALIZED_CONF_DIR, SPARK_CONF_FILE)) ++ + Seq("--dist-cache-conf", + buildPath(Environment.PWD.$$(), LOCALIZED_CONF_DIR, DIST_CACHE_CONF_FILE)) + + // Command for the ApplicationMaster + val commands = prefixEnv ++ + Seq(Environment.JAVA_HOME.$$() + "/bin/java", "-server") ++ + javaOpts ++ amArgs ++ + Seq( + "1>", ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout", + "2>", ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr") + + // TODO: it would be nicer to just make sure there are no null commands here + val printableCommands = commands.map(s => if (s == null) "null" else s).toList + amContainer.setCommands(printableCommands.asJava) + + logDebug("===============================================================================") + logDebug("YARN AM launch context:") + logDebug(s" user class: ${Option(args.userClass).getOrElse("N/A")}") + logDebug(" env:") + if (log.isDebugEnabled) { + Utils.redact(sparkConf, launchEnv.toSeq).foreach { case (k, v) => + logDebug(s" $k -> $v") + } + } + logDebug(" resources:") + localResources.foreach { case (k, v) => logDebug(s" $k -> $v")} + logDebug(" command:") + logDebug(s" ${printableCommands.mkString(" ")}") + logDebug("===============================================================================") + + // send the acl settings into YARN to control who has access via YARN interfaces + val securityManager = new SecurityManager(sparkConf) + amContainer.setApplicationACLs( + YarnSparkHadoopUtil.getApplicationAclsForYarn(securityManager).asJava) + setupSecurityToken(amContainer) + amContainer + } + + /** + * Set up security tokens for launching our ApplicationMaster container. + * + * In client mode, a set of credentials has been obtained by the scheduler, so they are copied + * and sent to the AM. In cluster mode, new credentials are obtained and then sent to the AM, + * along with whatever credentials the current user already has. + */ + private def setupSecurityToken(amContainer: ContainerLaunchContext): Unit = { + val currentUser = UserGroupInformation.getCurrentUser() + val credentials = currentUser.getCredentials() + + if (isClusterMode) { + val credentialManager = new HadoopDelegationTokenManager(sparkConf, hadoopConf, null) + credentialManager.obtainDelegationTokens(credentials) + } + + val serializedCreds = SparkHadoopUtil.get.serialize(credentials) + amContainer.setTokens(ByteBuffer.wrap(serializedCreds)) + } + + /** + * Upload any resources to the distributed cache if needed. If a resource is intended to be + * consumed locally, set up the appropriate config for downstream code to handle it properly. + * This is used for setting up a container launch context for our ApplicationMaster. + * Exposed for testing. + */ + def prepareLocalResources( + destDir: Path, + pySparkArchives: Seq[String]): HashMap[String, LocalResource] = { + logInfo("Preparing resources for our AM container") + // Upload Spark and the application JAR to the remote file system if necessary, + // and add them as local resources to the application master. + if (sparkConf.get("security").equalsIgnoreCase("true")){ + initSecurity() + } + + val fs = destDir.getFileSystem(hadoopConf) + + // Used to keep track of URIs added to the distributed cache. If the same URI is added + // multiple times, YARN will fail to launch containers for the app with an internal + // error. + val distributedUris = new HashSet[String] + // Used to keep track of URIs(files) added to the distribute cache have the same name. If + // same name but different path files are added multiple time, YARN will fail to launch + // containers for the app with an internal error. + val distributedNames = new HashSet[String] + + val replication = sparkConf.get(STAGING_FILE_REPLICATION).map(_.toShort) + .getOrElse(fs.getDefaultReplication(destDir)) + val localResources = HashMap[String, LocalResource]() + FileSystem.mkdirs(fs, destDir, new FsPermission(STAGING_DIR_PERMISSION)) + + val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]() + val symlinkCache: Map[URI, Path] = HashMap[URI, Path]() + + def addDistributedUri(uri: URI): Boolean = { + val uriStr = uri.toString() + val fileName = new File(uri.getPath).getName + if (distributedUris.contains(uriStr)) { + logWarning(s"Same path resource $uri added multiple times to distributed cache.") + false + } else if (distributedNames.contains(fileName)) { + logWarning(s"Same name resource $uri added multiple times to distributed cache") + false + } else { + distributedUris += uriStr + distributedNames += fileName + true + } + } + + /* + * Distribute a file to the cluster. + * + * If the file's path is a "local:" URI, it's actually not distributed. Other files are copied + * to HDFS (if not already there) and added to the application's distributed cache. + * + * @param path URI of the file to distribute. + * @param resType Type of resource being distributed. + * @param destName Name of the file in the distributed cache. + * @param targetDir Subdirectory where to place the file. + * @param appMasterOnly Whether to distribute only to the AM. + * @return A 2-tuple. First item is whether the file is a "local:" URI. Second item is the + * localized path for non-local paths, or the input `path` for local paths. + * The localized path will be null if the URI has already been added to the cache. + */ + def distribute( + path: String, + resType: LocalResourceType = LocalResourceType.FILE, + destName: Option[String] = None, + targetDir: Option[String] = None, + appMasterOnly: Boolean = false): (Boolean, String) = { + val trimmedPath = path.trim() + val localURI = Utils.resolveURI(trimmedPath) + if (localURI.getScheme != Utils.LOCAL_SCHEME) { + if (addDistributedUri(localURI)) { + val localPath = getQualifiedLocalPath(localURI, hadoopConf) + val linkname = targetDir.map(_ + "/").getOrElse("") + + destName.orElse(Option(localURI.getFragment())).getOrElse(localPath.getName()) + val destPath = copyFileToRemote(destDir, localPath, replication, symlinkCache) + val destFs = FileSystem.get(destPath.toUri(), hadoopConf) + distCacheMgr.addResource( + destFs, hadoopConf, destPath, localResources, resType, linkname, statCache, + appMasterOnly = appMasterOnly) + (false, linkname) + } else { + (false, null) + } + } else { + (true, trimmedPath) + } + } + + val hadoopconfpath: String = sparkConf.get(ConfigConstant.SPARK_HADOOP_CONF_REMOTE_DIR) + distribute(hadoopconfpath, destName = Some(SparkConstants.HADOOP_CONF)) + + sparkConf.getOption(DtConfig.SPARK_UDFS_TO_DISTRIBUTE.key) match { + case Some(v) => v.split(",").foreach( + lib => { + val libPath = lib.split("/").last + distribute(lib, destName = Some(libPath), appMasterOnly = false) + } + ) + + case None => + } + + // If we passed in a keytab, make sure we copy the keytab to the staging directory on + // HDFS, and setup the relevant environment vars, so the AM can login again. + amKeytabFileName.foreach { kt => + logInfo("To enable the AM to login from keytab, credentials are being copied over to the AM" + + " via the YARN Secure Distributed Cache.") + val remoteKeytab: String = sparkConf.get(ConfigConstant.SPARK_KERBEROS_REMOTE_KEYTAB) + val (_, localizedPath) = distribute(remoteKeytab, destName = Some(kt), appMasterOnly = true) + require(localizedPath != null, "Keytab file already distributed.") + } + + amKeytabFileName.foreach { kt => + logInfo("To enable the AM to login from keytab, credentials are being copied over to the AM" + + " via the YARN Secure Distributed Cache.") + val (_, localizedPath) = distribute(krb5, + destName = Some(krb5FileName)) + require(localizedPath != null, "Keytab file already distributed.") + } + + // If we passed in a ivySettings file, make sure we copy the file to the distributed cache + // in cluster mode so that the driver can access it + val ivySettings = sparkConf.getOption("spark.jars.ivySettings") + val ivySettingsLocalizedPath: Option[String] = ivySettings match { + case Some(ivySettingsPath) if isClusterMode => + val uri = new URI(ivySettingsPath) + Option(uri.getScheme).getOrElse("file") match { + case "file" => + val ivySettingsFile = new File(uri.getPath) + require(ivySettingsFile.exists(), s"Ivy settings file $ivySettingsFile not found") + require(ivySettingsFile.isFile(), s"Ivy settings file $ivySettingsFile is not a" + + "normal file") + // Generate a file name that can be used for the ivySettings file, that does not + // conflict with any user file. + val localizedFileName = Some(ivySettingsFile.getName() + "-" + + UUID.randomUUID().toString) + val (_, localizedPath) = distribute(ivySettingsPath, destName = localizedFileName) + require(localizedPath != null, "IvySettings file already distributed.") + Some(localizedPath) + case scheme => + throw new IllegalArgumentException(s"Scheme $scheme not supported in " + + "spark.jars.ivySettings") + } + case _ => None + } + + /** + * Add Spark to the cache. There are two settings that control what files to add to the cache: + * - if a Spark archive is defined, use the archive. The archive is expected to contain + * jar files at its root directory. + * - if a list of jars is provided, filter the non-local ones, resolve globs, and + * add the found files to the cache. + * + * Note that the archive cannot be a "local" URI. If none of the above settings are found, + * then upload all files found in $SPARK_HOME/jars. + */ + val sparkArchive = sparkConf.get(SPARK_ARCHIVE) + if (sparkArchive.isDefined) { + val archive = sparkArchive.get + require(!Utils.isLocalUri(archive), s"${SPARK_ARCHIVE.key} cannot be a local URI.") + distribute(Utils.resolveURI(archive).toString, + resType = LocalResourceType.ARCHIVE, + destName = Some(LOCALIZED_LIB_DIR)) + } else { + sparkConf.get(SPARK_JARS) match { + case Some(jars) => + // Break the list of jars to upload, and resolve globs. + val localJars = new ArrayBuffer[String]() + jars.foreach { jar => + if (!Utils.isLocalUri(jar)) { + val path = getQualifiedLocalPath(Utils.resolveURI(jar), hadoopConf) + val pathFs = FileSystem.get(path.toUri(), hadoopConf) + val fss = pathFs.globStatus(path) + if (fss == null) { + throw new FileNotFoundException(s"Path ${path.toString} does not exist") + } + fss.filter(_.isFile()).foreach { entry => + val uri = entry.getPath().toUri() + statCache.update(uri, entry) + distribute(uri.toString(), targetDir = Some(LOCALIZED_LIB_DIR)) + } + } else { + localJars += jar + } + } + + // Propagate the local URIs to the containers using the configuration. + sparkConf.set(SPARK_JARS, localJars.toSeq) + + case None => + // No configuration, so fall back to uploading local jar files. + logWarning(s"Neither ${SPARK_JARS.key} nor ${SPARK_ARCHIVE.key} is set, falling back " + + "to uploading libraries under SPARK_HOME.") + val jarsDir = new File(YarnCommandBuilderUtils.findJarsDir( + sparkConf.getenv("SPARK_HOME"))) + val jarsArchive = File.createTempFile(LOCALIZED_LIB_DIR, ".zip", + new File(Utils.getLocalDir(sparkConf))) + val jarsStream = new ZipOutputStream(new FileOutputStream(jarsArchive)) + + try { + jarsStream.setLevel(0) + jarsDir.listFiles().foreach { f => + if (f.isFile && f.getName.toLowerCase(Locale.ROOT).endsWith(".jar") && f.canRead) { + jarsStream.putNextEntry(new ZipEntry(f.getName)) + Files.copy(f, jarsStream) + jarsStream.closeEntry() + } + } + } finally { + jarsStream.close() + } + + distribute(jarsArchive.toURI.getPath, + resType = LocalResourceType.ARCHIVE, + destName = Some(LOCALIZED_LIB_DIR)) + jarsArchive.delete() + } + } + + /** + * Copy user jar to the distributed cache if their scheme is not "local". + * Otherwise, set the corresponding key in our SparkConf to handle it downstream. + */ + Option(args.userJar).filter(_.trim.nonEmpty).foreach { jar => + val (isLocal, localizedPath) = distribute(jar, destName = Some(APP_JAR_NAME)) + if (isLocal) { + require(localizedPath != null, s"Path $jar already distributed") + // If the resource is intended for local use only, handle this downstream + // by setting the appropriate property + sparkConf.set(APP_JAR, localizedPath) + } + } + + /** + * Do the same for any additional resources passed in through ClientArguments. + * Each resource category is represented by a 3-tuple of: + * (1) comma separated list of resources in this category, + * (2) resource type, and + * (3) whether to add these resources to the classpath + */ + val cachedSecondaryJarLinks = ListBuffer.empty[String] + List( + (sparkConf.get(JARS_TO_DISTRIBUTE), LocalResourceType.FILE, true), + (sparkConf.get(FILES_TO_DISTRIBUTE), LocalResourceType.FILE, false), + (sparkConf.get(ARCHIVES_TO_DISTRIBUTE), LocalResourceType.ARCHIVE, false) + ).foreach { case (flist, resType, addToClasspath) => + flist.foreach { file => + val (_, localizedPath) = distribute(file, resType = resType) + // If addToClassPath, we ignore adding jar multiple times to distributed cache. + if (addToClasspath) { + if (localizedPath != null) { + cachedSecondaryJarLinks += localizedPath + } + } else { + if (localizedPath == null) { + throw new IllegalArgumentException(s"Attempt to add ($file) multiple times" + + " to the distributed cache.") + } + } + } + } + if (cachedSecondaryJarLinks.nonEmpty) { + sparkConf.set(SECONDARY_JARS, cachedSecondaryJarLinks.toSeq) + } + + if (isClusterMode && args.primaryPyFile != null) { + distribute(args.primaryPyFile, appMasterOnly = true) + } + + pySparkArchives.foreach { f => + val uri = Utils.resolveURI(f) + if (uri.getScheme != Utils.LOCAL_SCHEME) { + distribute(f) + } + } + + // The python files list needs to be treated especially. All files that are not an + // archive need to be placed in a subdirectory that will be added to PYTHONPATH. + if (sparkConf.contains("spark.submit.pyFiles")) { + sparkConf.get("spark.submit.pyFiles").split(",").toSeq.foreach { f => + val targetDir = if (f.endsWith(".py")) Some(LOCALIZED_PYTHON_DIR) else None + distribute(f, targetDir = targetDir) + } + } + + // Update the configuration with all the distributed files, minus the conf archive. The + // conf archive will be handled by the AM differently so that we avoid having to send + // this configuration by other means. See SPARK-14602 for one reason of why this is needed. + distCacheMgr.updateConfiguration(cachedResourcesConf) + + // Upload the conf archive to HDFS manually, and record its location in the configuration. + // This will allow the AM to know where the conf archive is in HDFS, so that it can be + // distributed to the containers. + // + // This code forces the archive to be copied, so that unit tests pass (since in that case both + // file systems are the same and the archive wouldn't normally be copied). In most (all?) + // deployments, the archive would be copied anyway, since it's a temp file in the local file + // system. + val remoteConfArchivePath = new Path(destDir, LOCALIZED_CONF_ARCHIVE) + val remoteFs = FileSystem.get(remoteConfArchivePath.toUri(), hadoopConf) + cachedResourcesConf.set(CACHED_CONF_ARCHIVE, remoteConfArchivePath.toString()) + + val confsToOverride = Map.empty[String, String] + // If propagating the keytab to the AM, override the keytab name with the name of the + // distributed file. + amKeytabFileName.foreach { kt => confsToOverride.put("spark.yarn.keytab", kt) } + amKeytabFileName.foreach { kt => confsToOverride.put("spark.kerberos.keytab", kt) } + + // If propagating the ivySettings file to the distributed cache, override the ivySettings + // file name with the name of the distributed file. + ivySettingsLocalizedPath.foreach { path => + confsToOverride.put("spark.jars.ivySettings", path) + } + + val localConfArchive = new Path(createConfArchive(confsToOverride).toURI()) + copyFileToRemote(destDir, localConfArchive, replication, symlinkCache, force = true, + destName = Some(LOCALIZED_CONF_ARCHIVE)) + + // Manually add the config archive to the cache manager so that the AM is launched with + // the proper files set up. + distCacheMgr.addResource( + remoteFs, hadoopConf, remoteConfArchivePath, localResources, LocalResourceType.ARCHIVE, + LOCALIZED_CONF_DIR, statCache, appMasterOnly = false) + + localResources + } + + /** + * Copy the given file to a remote file system (e.g. HDFS) if needed. + * The file is only copied if the source and destination file systems are different or the source + * scheme is "file". This is used for preparing resources for launching the ApplicationMaster + * container. Exposed for testing. + */ + private[yarn] def copyFileToRemote( + destDir: Path, + srcPath: Path, + replication: Short, + symlinkCache: Map[URI, Path], + force: Boolean = false, + destName: Option[String] = None): Path = { + val destFs = destDir.getFileSystem(hadoopConf) + val srcFs = srcPath.getFileSystem(hadoopConf) + var destPath = srcPath + if (force || !compareFs(srcFs, destFs) || "file".equals(srcFs.getScheme)) { + destPath = new Path(destDir, destName.getOrElse(srcPath.getName())) + logInfo(s"Uploading resource $srcPath -> $destPath") + try { + FileUtil.copy(srcFs, srcPath, destFs, destPath, false, hadoopConf) + } catch { + // HADOOP-16878 changes the behavior to throw exceptions when src equals to dest + case e: PathOperationException + if srcFs.makeQualified(srcPath).equals(destFs.makeQualified(destPath)) => + } + destFs.setReplication(destPath, replication) + destFs.setPermission(destPath, new FsPermission(APP_FILE_PERMISSION)) + } else { + logInfo(s"Source and destination file systems are the same. Not copying $srcPath") + } + // Resolve any symlinks in the URI path so using a "current" symlink to point to a specific + // version shows the specific version in the distributed cache configuration + val qualifiedDestPath = destFs.makeQualified(destPath) + val qualifiedDestDir = qualifiedDestPath.getParent + val resolvedDestDir = symlinkCache.getOrElseUpdate(qualifiedDestDir.toUri(), { + val fc = FileContext.getFileContext(qualifiedDestDir.toUri(), hadoopConf) + fc.resolvePath(qualifiedDestDir) + }) + new Path(resolvedDestDir, qualifiedDestPath.getName()) + } + + def initSecurity():Unit = { + val userPrincipal = sparkConf.get("spark.yarn.principal") + val userKeytabPath = sparkConf.get("spark.yarn.keytab") + UserGroupInformation.setConfiguration(hadoopConf) + UserGroupInformation.loginUserFromKeytab(userPrincipal, userKeytabPath) + } + + /** + * Create an archive with the config files for distribution. + * + * These will be used by AM and executors. The files are zipped and added to the job as an + * archive, so that YARN will explode it when distributing to AM and executors. This directory + * is then added to the classpath of AM and executor process, just to make sure that everybody + * is using the same default config. + * + * This follows the order of precedence set by the startup scripts, in which HADOOP_CONF_DIR + * shows up in the classpath before YARN_CONF_DIR. + * + * Currently this makes a shallow copy of the conf directory. If there are cases where a + * Hadoop config directory contains subdirectories, this code will have to be fixed. + * + * The archive also contains some Spark configuration. Namely, it saves the contents of + * SparkConf in a file to be loaded by the AM process. + */ + private def createConfArchive(confsToOverride: Map[String, String]): File = { + + val confArchive = File.createTempFile(LOCALIZED_CONF_DIR, ".zip", + new File(Utils.getLocalDir(sparkConf))) + val confStream = new ZipOutputStream(new FileOutputStream(confArchive)) + + logDebug(s"Creating an archive with the config files for distribution at $confArchive.") + try { + confStream.setLevel(0) + + confStream.putNextEntry(new ZipEntry("log4j.properties")) + val sparkLog4jContent: String = sparkConf.get("spark.log4j.content") + confStream.write(sparkLog4jContent.getBytes(Charset.forName("UTF-8"))) + confStream.closeEntry() + + + // Save Spark configuration to a file in the archive. + val props = confToProperties(sparkConf) + confsToOverride.foreach { case (k, v) => props.setProperty(k, v)} + writePropertiesToArchive(props, SPARK_CONF_FILE, confStream) + + // Write the distributed cache config to the archive. + writePropertiesToArchive(confToProperties(cachedResourcesConf), DIST_CACHE_CONF_FILE, + confStream) + } finally { + confStream.close() + } + confArchive + } + + /** + * Set up the environment for launching our ApplicationMaster container. + */ + private def setupLaunchEnv( + stagingDirPath: Path, + pySparkArchives: Seq[String]): HashMap[String, String] = { + logInfo("Setting up the launch environment for our AM container") + val env = new HashMap[String, String]() + populateClasspath(args, hadoopConf, sparkConf, env, sparkConf.get(DRIVER_CLASS_PATH)) + env("SPARK_YARN_STAGING_DIR") = stagingDirPath.toString + env("SPARK_USER") = UserGroupInformation.getCurrentUser().getShortUserName() + + // Pick up any environment variables for the AM provided through spark.yarn.appMasterEnv.* + val amEnvPrefix = "spark.yarn.appMasterEnv." + sparkConf.getAll + .filter { case (k, v) => k.startsWith(amEnvPrefix) } + .map { case (k, v) => (k.substring(amEnvPrefix.length), v) } + .foreach { case (k, v) => YarnSparkHadoopUtil.addPathToEnvironment(env, k, v) } + + // If pyFiles contains any .py files, we need to add LOCALIZED_PYTHON_DIR to the PYTHONPATH + // of the container processes too. Add all non-.py files directly to PYTHONPATH. + // + // NOTE: the code currently does not handle .py files defined with a "local:" scheme. + val pythonPath = new ListBuffer[String]() + if (sparkConf.contains("spark.submit.pyFiles")) { + val (pyFiles, pyArchives) = sparkConf.get("spark.submit.pyFiles").split(",").toSeq.partition(_.endsWith(".py")) + if (pyFiles.nonEmpty) { + pythonPath += buildPath(Environment.PWD.$$(), LOCALIZED_PYTHON_DIR) + } + (pySparkArchives ++ pyArchives).foreach { path => + val uri = Utils.resolveURI(path) + if (uri.getScheme != Utils.LOCAL_SCHEME) { + pythonPath += buildPath(Environment.PWD.$$(), new Path(uri).getName()) + } else { + pythonPath += uri.getPath() + } + } + } + + // Finally, update the Spark config to propagate PYTHONPATH to the AM and executors. + if (pythonPath.nonEmpty) { + val pythonPathList = (sys.env.get("PYTHONPATH") ++ pythonPath) + env("PYTHONPATH") = (env.get("PYTHONPATH") ++ pythonPathList) + .mkString(ApplicationConstants.CLASS_PATH_SEPARATOR) + val pythonPathExecutorEnv = (sparkConf.getExecutorEnv.toMap.get("PYTHONPATH") ++ + pythonPathList).mkString(ApplicationConstants.CLASS_PATH_SEPARATOR) + sparkConf.setExecutorEnv("PYTHONPATH", pythonPathExecutorEnv) + } + + if (isClusterMode) { + // propagate PYSPARK_DRIVER_PYTHON and PYSPARK_PYTHON to driver in cluster mode + Seq("PYSPARK_DRIVER_PYTHON", "PYSPARK_PYTHON").foreach { envname => + if (!env.contains(envname)) { + sys.env.get(envname).foreach(env(envname) = _) + } + } + sys.env.get("PYTHONHASHSEED").foreach(env.put("PYTHONHASHSEED", _)) + } + + Seq(ENV_DIST_CLASSPATH, SPARK_TESTING).foreach { envVar => + sys.env.get(envVar).foreach(value => env(envVar) = value) + } + + env + } + + private def findPySparkArchives(): Seq[String] = { + sys.env.get("PYSPARK_ARCHIVES_PATH") + .map(_.split(",").toSeq) + .getOrElse { + val pyLibPath = Seq(sys.env("SPARK_HOME"), "python", "lib").mkString(File.separator) + val pyArchivesFile = new File(pyLibPath, "pyspark.zip") + require(pyArchivesFile.exists(), + s"$pyArchivesFile not found; cannot run pyspark application in YARN mode.") + val py4jFile = new File(pyLibPath, PythonUtils.PY4J_ZIP_NAME) + require(py4jFile.exists(), + s"$py4jFile not found; cannot run pyspark application in YARN mode.") + Seq(pyArchivesFile.getAbsolutePath(), py4jFile.getAbsolutePath()) + } + } + + /** + * Report the state of an application until it has exited, either successfully or + * due to some failure, then return a pair of the yarn application state (FINISHED, FAILED, + * KILLED, or RUNNING) and the final application state (UNDEFINED, SUCCEEDED, FAILED, + * or KILLED). + * + * @param appId ID of the application to monitor. + * @param returnOnRunning Whether to also return the application state when it is RUNNING. + * @param logApplicationReport Whether to log details of the application report every iteration. + * @param interval How often to poll the YARN RM for application status (in ms). + * @return A pair of the yarn application state and the final application state. + */ + def monitorApplication( + appId: ApplicationId, + returnOnRunning: Boolean = false, + logApplicationReport: Boolean = true, + interval: Long = sparkConf.get(REPORT_INTERVAL)): YarnAppReport = { + var lastState: YarnApplicationState = null + while (true) { + Thread.sleep(interval) + val report: ApplicationReport = + try { + getApplicationReport(appId) + } catch { + case e: ApplicationNotFoundException => + logError(s"Application $appId not found.") + cleanupStagingDir() + return YarnAppReport(YarnApplicationState.KILLED, FinalApplicationStatus.KILLED, None) + case NonFatal(e) if !e.isInstanceOf[InterruptedIOException] => + val msg = s"Failed to contact YARN for application $appId." + logError(msg, e) + // Don't necessarily clean up staging dir because status is unknown + return YarnAppReport(YarnApplicationState.FAILED, FinalApplicationStatus.FAILED, + Some(msg)) + } + val state = report.getYarnApplicationState + + if (logApplicationReport) { + logInfo(s"Application report for $appId (state: $state)") + + // If DEBUG is enabled, log report details every iteration + // Otherwise, log them every time the application changes state + if (log.isDebugEnabled) { + logDebug(formatReportDetails(report, getDriverLogsLink(report))) + } else if (lastState != state) { + logInfo(formatReportDetails(report, getDriverLogsLink(report))) + } + } + + if (lastState != state) { + state match { + case YarnApplicationState.RUNNING => + reportLauncherState(SparkAppHandle.State.RUNNING) + case YarnApplicationState.FINISHED => + report.getFinalApplicationStatus match { + case FinalApplicationStatus.FAILED => + reportLauncherState(SparkAppHandle.State.FAILED) + case FinalApplicationStatus.KILLED => + reportLauncherState(SparkAppHandle.State.KILLED) + case _ => + reportLauncherState(SparkAppHandle.State.FINISHED) + } + case YarnApplicationState.FAILED => + reportLauncherState(SparkAppHandle.State.FAILED) + case YarnApplicationState.KILLED => + reportLauncherState(SparkAppHandle.State.KILLED) + case _ => + } + } + + if (state == YarnApplicationState.FINISHED || + state == YarnApplicationState.FAILED || + state == YarnApplicationState.KILLED) { + cleanupStagingDir() + return createAppReport(report) + } + + if (returnOnRunning && state == YarnApplicationState.RUNNING) { + return createAppReport(report) + } + if (state == YarnApplicationState.ACCEPTED && isClientUnmanagedAMEnabled && + appMaster == null && report.getAMRMToken != null) { + appMaster = startApplicationMasterService(report) + } + lastState = state + } + + // Never reached, but keeps compiler happy + throw new SparkException("While loop is depleted! This should never happen...") + } + + def reportLauncherState(state: SparkAppHandle.State): Unit = { + launcherBackend.setState(state) + } + + /** + * Cleanup application staging directory. + */ + private def cleanupStagingDir(): Unit = { + if (sparkConf.get(PRESERVE_STAGING_FILES)) { + return + } + + def cleanupStagingDirInternal(): Unit = { + try { + val fs = stagingDirPath.getFileSystem(hadoopConf) + if (fs.delete(stagingDirPath, true)) { + logInfo(s"Deleted staging directory $stagingDirPath") + } + } catch { + case ioe: IOException => + logWarning("Failed to cleanup staging dir " + stagingDirPath, ioe) + } + } + + cleanupStagingDirInternal() + } + + /** Get the application report from the ResourceManager for an application we have submitted. */ + def getApplicationReport(appId: ApplicationId): ApplicationReport = + yarnClient.getApplicationReport(appId) + + private def startApplicationMasterService(report: ApplicationReport): ApplicationMaster = { + // Add AMRMToken to establish connection between RM and AM + val token = report.getAMRMToken + val amRMToken: org.apache.hadoop.security.token.Token[AMRMTokenIdentifier] = + new org.apache.hadoop.security.token.Token[AMRMTokenIdentifier]( + token.getIdentifier().array(), token.getPassword().array, + new Text(token.getKind()), new Text(token.getService())) + val currentUGI = UserGroupInformation.getCurrentUser + currentUGI.addToken(amRMToken) + + // Start Application Service in a separate thread and continue with application monitoring + val appMaster = new ApplicationMaster( + new ApplicationMasterArguments(Array.empty), sparkConf, hadoopConf) + val amService = new Thread("Unmanaged Application Master Service") { + override def run(): Unit = { + appMaster.runUnmanaged(null, report.getCurrentApplicationAttemptId, + stagingDirPath, cachedResourcesConf) + } + } + amService.setDaemon(true) + amService.start() + appMaster + } + + /** + * Format an application report and optionally, links to driver logs, in a human-friendly manner. + * + * @param report The application report from YARN. + * @param driverLogsLinks A map of driver log files and their links. Keys are the file names + * (e.g. `stdout`), and values are the links. If empty, nothing will be + * printed. + * @return Human-readable version of the input data. + */ + private def formatReportDetails(report: ApplicationReport, + driverLogsLinks: IMap[String, String]): String = { + val details = Seq[(String, String)]( + ("client token", getClientToken(report)), + ("diagnostics", report.getDiagnostics), + ("ApplicationMaster host", report.getHost), + ("ApplicationMaster RPC port", report.getRpcPort.toString), + ("queue", report.getQueue), + ("start time", report.getStartTime.toString), + ("final status", report.getFinalApplicationStatus.toString), + ("tracking URL", report.getTrackingUrl), + ("user", report.getUser) + ) ++ driverLogsLinks.map { case (fname, link) => (s"Driver Logs ($fname)", link) } + + // Use more loggable format if value is null or empty + details.map { case (k, v) => + val newValue = Option(v).filter(_.nonEmpty).getOrElse("N/A") + s"\n\t $k: $newValue" + }.mkString("") + } + + /** + * Return the security token used by this client to communicate with the ApplicationMaster. + * If no security is enabled, the token returned by the report is null. + */ + private def getClientToken(report: ApplicationReport): String = + Option(report.getClientToAMToken).map(_.toString).getOrElse("") + + /** + * Fetch links to the logs of the driver for the given application report. This requires + * query the ResourceManager via RPC. Returns an empty map if the links could not be fetched. + * If this feature is disabled via [[CLIENT_INCLUDE_DRIVER_LOGS_LINK]], or if the application + * report indicates that the driver container isn't currently running, an empty map is + * returned immediately. + */ + private def getDriverLogsLink(appReport: ApplicationReport): IMap[String, String] = { + if (!sparkConf.get(CLIENT_INCLUDE_DRIVER_LOGS_LINK) + || appReport.getYarnApplicationState != YarnApplicationState.RUNNING) { + return IMap.empty + } + try { + Option(appReport.getCurrentApplicationAttemptId) + .flatMap(attemptId => Option(yarnClient.getApplicationAttemptReport(attemptId))) + .flatMap(attemptReport => Option(attemptReport.getAMContainerId)) + .flatMap(amContainerId => Option(yarnClient.getContainerReport(amContainerId))) + .flatMap(containerReport => Option(containerReport.getLogUrl)) + .map(YarnContainerInfoHelper.getLogUrlsFromBaseUrl) + .getOrElse(IMap.empty) + } catch { + case e: Exception => + logWarning(s"Unable to get driver log links for $appId: $e") + // Include the full stack trace only at DEBUG level to reduce verbosity + logDebug(s"Unable to get driver log links for $appId", e) + IMap.empty + } + } + +} + +private object DtClient extends Logging { + + // Alias for the user jar + val APP_JAR_NAME: String = "__app__.jar" + + // Staging directory for any temporary jars or files + val SPARK_STAGING: String = ".sparkStaging" + + + // Staging directory is private! -> rwx-------- + val STAGING_DIR_PERMISSION: FsPermission = + FsPermission.createImmutable(Integer.parseInt("700", 8).toShort) + + // App files are world-wide readable and owner writable -> rw-r--r-- + val APP_FILE_PERMISSION: FsPermission = + FsPermission.createImmutable(Integer.parseInt("644", 8).toShort) + + // Distribution-defined classpath to add to processes + val ENV_DIST_CLASSPATH = "SPARK_DIST_CLASSPATH" + + // Subdirectory where the user's Spark and Hadoop config files will be placed. + val LOCALIZED_CONF_DIR = "__spark_conf__" + + // Subdirectory in the conf directory containing Hadoop config files. + val LOCALIZED_HADOOP_CONF_DIR = "__hadoop_conf__" + + // File containing the conf archive in the AM. See prepareLocalResources(). + val LOCALIZED_CONF_ARCHIVE = LOCALIZED_CONF_DIR + ".zip" + + // Name of the file in the conf archive containing Spark configuration. + val SPARK_CONF_FILE = "__spark_conf__.properties" + + // Name of the file in the conf archive containing the distributed cache info. + val DIST_CACHE_CONF_FILE = "__spark_dist_cache__.properties" + + // Subdirectory where the user's python files (not archives) will be placed. + val LOCALIZED_PYTHON_DIR = "__pyfiles__" + + // Subdirectory where Spark libraries will be placed. + val LOCALIZED_LIB_DIR = "__spark_libs__" + + val SPARK_TESTING = "SPARK_TESTING" + + /** + * Returns a list of URIs representing the user classpath. + * + * @param conf Spark configuration. + */ + def getUserClasspath(conf: SparkConf): Array[URI] = { + val mainUri = getMainJarUri(conf.get(APP_JAR)) + val secondaryUris = getSecondaryJarUris(conf.get(SECONDARY_JARS)) + (mainUri ++ secondaryUris).toArray + } + + private def getMainJarUri(mainJar: Option[String]): Option[URI] = { + mainJar.flatMap { path => + val uri = Utils.resolveURI(path) + if (uri.getScheme == Utils.LOCAL_SCHEME) Some(uri) else None + }.orElse(Some(new URI(APP_JAR_NAME))) + } + + private def getSecondaryJarUris(secondaryJars: Option[Seq[String]]): Seq[URI] = { + secondaryJars.getOrElse(Nil).map(new URI(_)) + } + + /** + * Whether to consider jars provided by the user to have precedence over the Spark jars when + * loading user classes. + */ + def isUserClassPathFirst(conf: SparkConf, isDriver: Boolean): Boolean = { + if (isDriver) { + conf.get(DRIVER_USER_CLASS_PATH_FIRST) + } else { + conf.get(EXECUTOR_USER_CLASS_PATH_FIRST) + } + } + + def createAppReport(report: ApplicationReport): YarnAppReport = { + val diags = report.getDiagnostics() + val diagsOpt = if (diags != null && diags.nonEmpty) Some(diags) else None + YarnAppReport(report.getYarnApplicationState(), report.getFinalApplicationStatus(), diagsOpt) + } + + /** + * Create a properly quoted and escaped library path string to be added as a prefix to the command + * executed by YARN. This is different from normal quoting / escaping due to YARN executing the + * command through "bash -c". + */ + def createLibraryPathPrefix(libpath: String, conf: SparkConf): String = { + val cmdPrefix = if (Utils.isWindows) { + Utils.libraryPathEnvPrefix(Seq(libpath)) + } else { + val envName = Utils.libraryPathEnvName + // For quotes, escape both the quote and the escape character when encoding in the command + // string. + val quoted = libpath.replace("\"", "\\\\\\\"") + envName + "=\\\"" + quoted + File.pathSeparator + "$" + envName + "\\\"" + } + getClusterPath(conf, cmdPrefix) + } + + /** + * Returns the path to be sent to the NM for a path that is valid on the gateway. + * + * This method uses two configuration values: + * + * - spark.yarn.config.gatewayPath: a string that identifies a portion of the input path that may + * only be valid in the gateway node. + * - spark.yarn.config.replacementPath: a string with which to replace the gateway path. This may + * contain, for example, env variable references, which will be expanded by the NMs when + * starting containers. + * + * If either config is not available, the input path is returned. + */ + def getClusterPath(conf: SparkConf, path: String): String = { + val localPath = conf.get(GATEWAY_ROOT_PATH) + val clusterPath = conf.get(REPLACEMENT_ROOT_PATH) + if (localPath != null && clusterPath != null) { + path.replace(localPath, clusterPath) + } else { + path + } + } + + def confToProperties(conf: SparkConf): Properties = { + val props = new Properties() + conf.getAll.foreach { case (k, v) => + props.setProperty(k, v) + } + props + } + + def writePropertiesToArchive(props: Properties, name: String, out: ZipOutputStream): Unit = { + out.putNextEntry(new ZipEntry(name)) + val writer = new OutputStreamWriter(out, StandardCharsets.UTF_8) + props.store(writer, "Spark configuration.") + writer.flush() + out.closeEntry() + } + + /** + * Return whether the two file systems are the same. + */ + protected def compareFs(srcFs: FileSystem, destFs: FileSystem): Boolean = { + val srcUri = srcFs.getUri() + val dstUri = destFs.getUri() + + compareUri(srcUri, dstUri) + } + + /** + * Return whether two URI represent file system are the same + */ + private[spark] def compareUri(srcUri: URI, dstUri: URI): Boolean = { + + if (srcUri.getScheme() == null || srcUri.getScheme() != dstUri.getScheme()) { + return false + } + + val srcAuthority = srcUri.getAuthority() + val dstAuthority = dstUri.getAuthority() + if (srcAuthority != null && !srcAuthority.equalsIgnoreCase(dstAuthority)) { + return false + } + + var srcHost = srcUri.getHost() + var dstHost = dstUri.getHost() + + // In HA or when using viewfs, the host part of the URI may not actually be a host, but the + // name of the HDFS namespace. Those names won't resolve, so avoid even trying if they + // match. + if (srcHost != null && dstHost != null && srcHost != dstHost) { + try { + srcHost = InetAddress.getByName(srcHost).getCanonicalHostName() + dstHost = InetAddress.getByName(dstHost).getCanonicalHostName() + } catch { + case e: UnknownHostException => + return false + } + } + + Objects.equal(srcHost, dstHost) && srcUri.getPort() == dstUri.getPort() + + } + + /** + * Return the path to the given application's staging directory. + */ + private def getAppStagingDir(appId: ApplicationId): String = { + buildPath(SPARK_STAGING, appId.toString()) + } + + /** + * Joins all the path components using Path.SEPARATOR. + */ + def buildPath(components: String*): String = { + components.mkString(Path.SEPARATOR) + } + + /** + * Populate the classpath entry in the given environment map. + * + * User jars are generally not added to the JVM's system classpath; those are handled by the AM + * and executor backend. When the deprecated `spark.yarn.user.classpath.first` is used, user jars + * are included in the system classpath, though. The extra class path and other uploaded files are + * always made available through the system class path. + * + * @param args Client arguments (when starting the AM) or null (when starting executors). + */ + private[yarn] def populateClasspath( + args: ClientArguments, + conf: Configuration, + sparkConf: SparkConf, + env: HashMap[String, String], + extraClassPath: Option[String] = None): Unit = { + extraClassPath.foreach { cp => + addClasspathEntry(getClusterPath(sparkConf, cp), env) + } + + // Add the localized Hadoop config at the end of the classpath, in case it contains other + // files (such as configuration files for different services) that are not part of the + // YARN cluster's config. + + addClasspathEntry( + buildPath(Environment.PWD.$$(), LOCALIZED_HADOOP_CONF_DIR), env) + + addClasspathEntry(Environment.PWD.$$(), env) + + addClasspathEntry(Environment.PWD.$$() + Path.SEPARATOR + LOCALIZED_CONF_DIR, env) + + if (sparkConf.get(USER_CLASS_PATH_FIRST)) { + // in order to properly add the app jar when user classpath is first + // we have to do the mainJar separate in order to send the right thing + // into addFileToClasspath + val mainJar = + if (args != null) { + getMainJarUri(Option(args.userJar)) + } else { + getMainJarUri(sparkConf.get(APP_JAR)) + } + mainJar.foreach(addFileToClasspath(sparkConf, conf, _, APP_JAR_NAME, env)) + + val secondaryJars = + if (args != null) { + getSecondaryJarUris(Option(sparkConf.get(JARS_TO_DISTRIBUTE))) + } else { + getSecondaryJarUris(sparkConf.get(SECONDARY_JARS)) + } + secondaryJars.foreach { x => + addFileToClasspath(sparkConf, conf, x, null, env) + } + } + + // Add the Spark jars to the classpath, depending on how they were distributed. + addClasspathEntry(buildPath(Environment.PWD.$$(), LOCALIZED_LIB_DIR, "*"), env) + if (sparkConf.get(SPARK_ARCHIVE).isEmpty) { + sparkConf.get(SPARK_JARS).foreach { jars => + jars.filter(Utils.isLocalUri).foreach { jar => + val uri = new URI(jar) + addClasspathEntry(getClusterPath(sparkConf, uri.getPath()), env) + } + } + } + + if (sparkConf.get(POPULATE_HADOOP_CLASSPATH)) { + populateHadoopClasspath(conf, env) + } + + sys.env.get(ENV_DIST_CLASSPATH).foreach { cp => + addClasspathEntry(getClusterPath(sparkConf, cp), env) + } + } + + /** + * Populate the classpath entry in the given environment map with any application + * classpath specified through the Hadoop and Yarn configurations. + */ + private[yarn] def populateHadoopClasspath(conf: Configuration, env: HashMap[String, String]) + : Unit = { + val classPathElementsToAdd = getYarnAppClasspath(conf) ++ getMRAppClasspath(conf) + classPathElementsToAdd.foreach { c => + YarnSparkHadoopUtil.addPathToEnvironment(env, Environment.CLASSPATH.name, c.trim) + } + } + + private def getYarnAppClasspath(conf: Configuration): Seq[String] = + Option(conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH)) match { + case Some(s) => s.toSeq + case None => getDefaultYarnApplicationClasspath + } + + private[yarn] def getDefaultYarnApplicationClasspath: Seq[String] = + YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH.toSeq + + private def getMRAppClasspath(conf: Configuration): Seq[String] = + Option(conf.getStrings("mapreduce.application.classpath")) match { + case Some(s) => s.toSeq + case None => getDefaultMRApplicationClasspath + } + + private[yarn] def getDefaultMRApplicationClasspath: Seq[String] = + StringUtils.getStrings(MRJobConfig.DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH).toSeq + + /** + * Adds the given path to the classpath, handling "local:" URIs correctly. + * + * If an alternate name for the file is given, and it's not a "local:" file, the alternate + * name will be added to the classpath (relative to the job's work directory). + * + * If not a "local:" file and no alternate name, the linkName will be added to the classpath. + * + * @param conf Spark configuration. + * @param hadoopConf Hadoop configuration. + * @param uri URI to add to classpath (optional). + * @param fileName Alternate name for the file (optional). + * @param env Map holding the environment variables. + */ + private def addFileToClasspath( + conf: SparkConf, + hadoopConf: Configuration, + uri: URI, + fileName: String, + env: HashMap[String, String]): Unit = { + if (uri != null && uri.getScheme == Utils.LOCAL_SCHEME) { + addClasspathEntry(getClusterPath(conf, uri.getPath), env) + } else if (fileName != null) { + addClasspathEntry(buildPath(Environment.PWD.$$(), fileName), env) + } else if (uri != null) { + val localPath = getQualifiedLocalPath(uri, hadoopConf) + val linkName = Option(uri.getFragment()).getOrElse(localPath.getName()) + addClasspathEntry(buildPath(Environment.PWD.$$(), linkName), env) + } + } + + /** + * Given a local URI, resolve it and return a qualified local path that corresponds to the URI. + * This is used for preparing local resources to be included in the container launch context. + */ + private def getQualifiedLocalPath(localURI: URI, hadoopConf: Configuration): Path = { + val qualifiedURI = + if (localURI.getScheme == null) { + // If not specified, assume this is in the local filesystem to keep the behavior + // consistent with that of Hadoop + new URI(FileSystem.getLocal(hadoopConf).makeQualified(new Path(localURI)).toString) + } else { + localURI + } + new Path(qualifiedURI) + } + + /** + * Add the given path to the classpath entry of the given environment map. + * If the classpath is already set, this appends the new path to the existing classpath. + */ + private def addClasspathEntry(path: String, env: HashMap[String, String]): Unit = + YarnSparkHadoopUtil.addPathToEnvironment(env, Environment.CLASSPATH.name, path) +} + +private[spark] class YarnClusterApplication extends SparkApplication { + + override def start(args: Array[String], conf: SparkConf): Unit = { + // SparkSubmit would use yarn cache to distribute files & jars in yarn mode, + // so remove them from sparkConf here for yarn mode. + conf.remove(JARS) + conf.remove(FILES) + conf.remove(ARCHIVES) + + new Client(new ClientArguments(args), conf, null).run() + } + +} + +private[spark] case class YarnAppReport( + appState: YarnApplicationState, + finalState: FinalApplicationStatus, + diagnostics: Option[String]) diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/pom.xml b/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/pom.xml deleted file mode 100644 index 168e5456fa..0000000000 --- a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/pom.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - - taier-worker-plugin.spark - com.dtstack.taier - 1.0.0 - .. - - 4.0.0 - - taier-worker-plugin.spark.yarn2-hdfs2-spark210-core - pom - - - UTF-8 - 2.1.3 - 2.11.8 - 3.2.2 - - - - spark-sql-proxy-core - spark-yarn-client-core - - \ No newline at end of file diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-sql-proxy-core/pom.xml b/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-sql-proxy-core/pom.xml deleted file mode 100644 index 2f4bc41b8a..0000000000 --- a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-sql-proxy-core/pom.xml +++ /dev/null @@ -1,66 +0,0 @@ - - - - taier-worker-plugin.spark.yarn2-hdfs2-spark210-core - com.dtstack.taier - 1.0.0 - ../pom.xml - - 4.0.0 - - taier-worker-plugin.spark.yarn2-hdfs2-spark210-core.spark-sql-proxy-core - taier-worker-plugin.spark.yarn2-hdfs2-spark210-core.spark-sql-proxy-core - - jar - - - 1.5.0-SNAPSHOT - - - - - - com.dtstack.taier - taier-worker-plugin.base - 1.0.0 - - - - org.apache.commons - commons-lang3 - 3.9 - - - - org.apache.spark - spark-sql_2.11 - ${spark.version} - provided - true - - - org.apache.spark - spark-hive-thriftserver_2.11 - ${spark.version} - provided - true - - - - org.apache.spark - spark-hive_2.11 - ${spark.version} - - - org.apache.hive - hive-exec - - - provided - true - - - - \ No newline at end of file diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/pom.xml b/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/pom.xml deleted file mode 100644 index cc3246d224..0000000000 --- a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/pom.xml +++ /dev/null @@ -1,204 +0,0 @@ - - - - taier-worker-plugin.spark.yarn2-hdfs2-spark210-core - com.dtstack.taier - 1.0.0 - ../pom.xml - - 4.0.0 - - taier-worker-plugin.spark.yarn2-hdfs2-spark210-core.spark-yarn-client-core - taier-worker-plugin.spark.yarn2-hdfs2-spark210-core.spark-yarn-client-core - jar - - - - - com.dtstack.taier - taier-worker-plugin.base - 1.0.0 - - - - org.scala-lang - scala-library - ${scala.version} - - - - - xalan - xalan - 2.7.1 - - - - - - org.apache.spark - spark-hive_2.11 - ${spark.version} - provided - true - - - - org.apache.spark - spark-core_2.11 - ${spark.version} - - - hadoop-confHdfsPath - org.apache.hadoop - - - hadoop-common - org.apache.hadoop - - - hadoop-client - org.apache.hadoop - - - provided - true - - - - org.jsoup - jsoup - 1.10.3 - - - - org.apache.spark - spark-yarn_2.11 - ${spark.version} - - - hadoop-yarn-common - org.apache.hadoop - - - hadoop-yarn-api - org.apache.hadoop - - - - org.apache.hadoop - hadooop-yarn-server-web-proxy - - - provided - true - - - - - - org.apache.hadoop - hadoop-hdfs - ${hadoop2.version} - provided - true - - - - org.apache.hadoop - hadoop-common - ${hadoop2.version} - provided - true - - - - org.apache.hadoop - hadoop-client - ${hadoop2.version} - provided - true - - - - org.apache.hadoop - hadoop-yarn-common - ${hadoop2.version} - provided - true - - - - org.apache.hadoop - hadoop-yarn-api - ${hadoop2.version} - provided - true - - - - org.apache.hadoop - hadoop-yarn-client - ${hadoop2.version} - provided - true - - - - - - - - net.alchim31.maven - scala-maven-plugin - 3.2.1 - - - org.apache.maven.plugins - maven-compiler-plugin - 2.0.2 - - - - - - - net.alchim31.maven - scala-maven-plugin - - - scala-compile-first - process-resources - - add-source - compile - - - - scala-test-compile - process-test-resources - - testCompile - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - compile - - compile - - - - - - - - \ No newline at end of file diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkext/ClientExt.java b/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkext/ClientExt.java deleted file mode 100644 index ecb25ee498..0000000000 --- a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkext/ClientExt.java +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.dtstack.taier.sparkyarn.sparkext; - -import com.dtstack.taier.base.filesystem.FilesystemManager; -import com.dtstack.taier.pluginapi.exception.PluginDefineException; -import com.dtstack.taier.sparkyarn.sparkyarn.SparkYarnConfig; -import com.google.common.base.Strings; -import com.google.common.io.Files; -import org.apache.hadoop.conf.Configuration; -import org.apache.spark.SparkConf; -import org.apache.spark.deploy.yarn.ClientArguments; -import org.apache.spark.deploy.yarn.DtClient; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.IOException; - -/** - * 修改Saprk yarn client ---> 修改提交之前的配置包打包 - * Date: 2018/5/9 - * Company: www.dtstack.com - * - * @author xuchao - */ - -public class ClientExt extends DtClient { - - private static final Logger LOG = LoggerFactory.getLogger(ClientExt.class); - - /** - * 是否从本地的环境变量加载 - */ - private boolean isLocal = true; - - private SparkYarnConfig sparkYarnConfig; - - private static String userDir = System.getProperty("user.dir"); - - private static String tmpHadoopFilePath = userDir + "/tmpHadoopConf"; - - public static String XML_SUFFIX = ".xml"; - public static String CONF_SUFFIX = ".conf"; - - private SparkConf sparkConf; - - private Configuration hadoopConf; - - private FilesystemManager filesystemManager; - - public ClientExt(FilesystemManager filesystemManager, ClientArguments args, Configuration hadoopConf, SparkConf sparkConf) { - super(args, hadoopConf, sparkConf); - this.filesystemManager = filesystemManager; - this.sparkConf = sparkConf; - this.hadoopConf = hadoopConf; - } - - public void setSparkYarnConfig(SparkYarnConfig sparkYarnConfig) { - this.sparkYarnConfig = sparkYarnConfig; - } - - @Override - public void loadHadoopConf(scala.collection.mutable.HashMap hadoopConfFiles) { - if (!Strings.isNullOrEmpty(sparkYarnConfig.getConfHdfsPath())) { - isLocal = false; - } - - if (isLocal) { - loadConfFromLocal(hadoopConfFiles); - } else { - String confDirName = this.creatDirIfPresent(); - this.loadConfFromDir(hadoopConfFiles, confDirName); - } - - } - - private String creatDirIfPresent() { - String confMd5Sum = sparkYarnConfig.getMd5sum(); - String confFileDirName = String.format("%s/%s", tmpHadoopFilePath, confMd5Sum); - String remotePath = sparkYarnConfig.getConfHdfsPath(); - File dirFile = new File(confFileDirName); - - try { - Files.createParentDirs(dirFile); - } catch (IOException e) { - throw new PluginDefineException(String.format("can not create dir '%s' on engine", dirFile.getParent())); - } - - if (dirFile.exists()) { - File[] files = dirFile.listFiles(); - if (files != null && files.length > 0) { - return confFileDirName; - } - } else { - if (!dirFile.mkdir()) { - throw new PluginDefineException(String.format("can not create dir '%s' on engine", confFileDirName)); - } - } - - - boolean downLoadSuccess = filesystemManager.downloadDir(remotePath, confFileDirName); - LOG.info("downloadDir remotePath:{} confFileDirName:{} status is: {} ", remotePath, confFileDirName, downLoadSuccess); - if (!downLoadSuccess) { - throw new PluginDefineException("yarn-site.xml/hdfs-site.xml/hive-site.xml configuration download failed"); - } - - return confFileDirName; - } - - public void loadConfFromDir(scala.collection.mutable.HashMap hadoopConfFiles, String confDirName) { - File confDir = new File(confDirName); - File[] files = confDir.listFiles((dir, name) -> name.endsWith(XML_SUFFIX) || name.endsWith(CONF_SUFFIX)); - for (File file : files) { - String fileName = file.getName(); - hadoopConfFiles.put(fileName, file); - } - } - -} diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/file/SparkResourceUploader.java b/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/file/SparkResourceUploader.java deleted file mode 100644 index deb9654f3f..0000000000 --- a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210-core/spark-yarn-client-core/src/main/java/com/dtstack/taier/sparkyarn/sparkyarn/file/SparkResourceUploader.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.dtstack.taier.sparkyarn.sparkyarn.file; - -import com.dtstack.taier.base.filesystem.FilesystemManager; -import com.dtstack.taier.base.util.KerberosUtils; -import com.dtstack.taier.pluginapi.exception.PluginDefineException; -import com.dtstack.taier.sparkyarn.sparkyarn.SparkYarnConfig; -import com.dtstack.taier.sparkyarn.sparkyarn.SparkYarnResourceInfo; -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.IOException; -import java.net.InetAddress; -import java.util.Properties; - -public class SparkResourceUploader { - - private static final Logger logger = LoggerFactory.getLogger(SparkResourceUploader.class); - - public static final String SP = File.separator; - - // default hdfs resource cleaner rate - public static final String SPARK_DEFAULT_CLEAR_RESOURCED_RATE = "30"; - - private final YarnConfiguration yarnConf; - - private final Properties sparkExtProp; - - private final SparkYarnConfig sparkYarnConfig; - - private final FilesystemManager filesystemManager; - - public SparkResourceUploader( - YarnConfiguration yarnConf, - SparkYarnConfig sparkYarnConfig, - Properties sparkExtProp, - FilesystemManager filesystemManager) { - this.yarnConf = yarnConf; - this.sparkExtProp = sparkExtProp; - this.sparkYarnConfig = sparkYarnConfig; - this.filesystemManager = filesystemManager; - } - - public void uploadSparkResource() { - Object sparkResourcesDirProp = sparkExtProp.get(SparkYarnResourceInfo.SPARK_RESOURCES_DIR); - if (sparkResourcesDirProp == null || StringUtils.isBlank(sparkResourcesDirProp.toString())) { - sparkResourcesDirProp = SparkYarnResourceInfo.DEFAULT_SPARK_RESOURCES_DIR; - } - final String sparkResourcesDir = sparkResourcesDirProp.toString(); - String md5sum = sparkYarnConfig.getMd5sum(); - String sparkClearResourceRate = - sparkExtProp - .getOrDefault( - SparkYarnResourceInfo.SPARK_CLEAR_RESOURCED_RATE, - SPARK_DEFAULT_CLEAR_RESOURCED_RATE) - .toString(); - try { - KerberosUtils.login( - sparkYarnConfig, - () -> { - try { - FileSystem fileSystem = FileSystem.get(yarnConf); - String hostName = InetAddress.getLocalHost().getHostName(); - String sparkResourcesDirHostName = - sparkResourcesDir + SparkResourceUploader.SP + hostName; - String sparkResourcesDirMd5sum = - sparkResourcesDir - + SparkResourceUploader.SP - + hostName - + SparkResourceUploader.SP - + md5sum; - ResourceCleaner.start( - fileSystem, - sparkResourcesDirHostName, - sparkResourcesDirMd5sum, - sparkClearResourceRate); - uploadSparkSqlProxy(fileSystem, sparkResourcesDirMd5sum); - - } catch (IOException e) { - throw new PluginDefineException("upload hadoop conf", e); - } - return null; - }, - yarnConf); - } catch (Exception e) { - throw new PluginDefineException("upload hadoop conf", e); - } - } - - private void uploadSparkSqlProxy(FileSystem fileSystem, String sparkResourcesDirMd5sum) { - try { - Path localPath = new Path(getSqlProxyJarPath()); - logger.info("local path {}", localPath); - String sparkSqlProxyPath = sparkResourcesDirMd5sum + "/spark-sql-proxy.jar"; - Path remotePath = new Path(sparkSqlProxyPath); - fileSystem.copyFromLocalFile(localPath, remotePath); - sparkYarnConfig.setSparkSqlProxyPath(sparkSqlProxyPath); - } catch (IOException e) { - throw new PluginDefineException("upload spark sql proxy failed", e); - } - } - - private String getSqlProxyJarPath() { - String path = this.getClass().getProtectionDomain().getCodeSource().getLocation().getPath(); - - File pluginDir = new File(path).getParentFile(); - File[] sqlProxyJars = - pluginDir.listFiles( - (dir, name) -> - dir.isDirectory() - && name.toLowerCase().startsWith("spark-sql-proxy")); - if (sqlProxyJars != null && sqlProxyJars.length == 1) { - String sqlProxyJar = sqlProxyJars[0].getName(); - if (sqlProxyJar.toLowerCase().startsWith("spark-sql-proxy") && sqlProxyJar.toLowerCase().endsWith(".jar")) { - return sqlProxyJars[0].getAbsolutePath(); - } - } - throw new PluginDefineException( - "Can not find spark sql proxy jar in path: " + pluginDir); - } -} diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210/pom.xml b/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210/pom.xml index efe732fefd..65b18b240b 100644 --- a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210/pom.xml +++ b/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210/pom.xml @@ -12,19 +12,135 @@ taier-worker-plugin.spark.yarn2-hdfs2-spark210 taier-worker-plugin.spark.yarn2-hdfs2-spark210 - pom + jar + spark-yarn-client + yarn2-hdfs2-spark210 UTF-8 2.1.3 2.11.8 3.2.2 - - spark-yarn-client - spark-sql-proxy - + + + com.dtstack.taier + taier-worker-plugin.spark.yarn-hdfs-spark210-core + 1.0.0 + + + + org.apache.spark + spark-hive_2.11 + - \ No newline at end of file + + org.apache.spark + spark-core_2.11 + ${spark.version} + + + org.apache.hadoop + hadoop-confHdfsPath + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-client + + + + + + org.apache.spark + spark-yarn_2.11 + ${spark.version} + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hadoop + hadoop-yarn-api + + + org.apache.hadoop + hadoop-client + + + org.apache.hadoop + hadoop-yarn-client + + + + + + + org.apache.hadoop + hadoop-client + + + xml-apis + xml-apis + + + + + + org.apache.hadoop + hadoop-yarn-client + + + + xalan + xalan + 2.7.1 + + + xml-apis + xml-apis + + + + + + + + + + src/main/java + + + src/main/resources + + + + + + net.alchim31.maven + scala-maven-plugin + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + org.apache.maven.plugins + maven-shade-plugin + + + + org.apache.maven.plugins + maven-antrun-plugin + + + + diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210/spark-yarn-client/pom.xml b/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210/spark-yarn-client/pom.xml deleted file mode 100644 index 1270a11c1f..0000000000 --- a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210/spark-yarn-client/pom.xml +++ /dev/null @@ -1,223 +0,0 @@ - - - - taier-worker-plugin.spark.yarn2-hdfs2-spark210 - com.dtstack.taier - 1.0.0 - ../pom.xml - - 4.0.0 - - taier-worker-plugin.spark.yarn2-hdfs2-spark210.spark-yarn-client - taier-worker-plugin.spark.yarn2-hdfs2-spark210.spark-yarn-client - - - spark-yarn-client - yarn2-hdfs2-spark210 - - - jar - - - - com.dtstack.taier - taier-worker-plugin.spark.yarn2-hdfs2-spark210-core.spark-yarn-client-core - 1.0.0 - - - - org.apache.spark - spark-hive_2.11 - ${spark.version} - - - - org.apache.spark - spark-core_2.11 - ${spark.version} - - - hadoop-confHdfsPath - org.apache.hadoop - - - hadoop-common - org.apache.hadoop - - - hadoop-client - org.apache.hadoop - - - - - - org.apache.spark - spark-yarn_2.11 - ${spark.version} - - - hadoop-yarn-common - org.apache.hadoop - - - hadoop-yarn-api - org.apache.hadoop - - - - org.apache.hadoop - hadooop-yarn-server-web-proxy - - - - - - - - org.apache.hadoop - hadoop-hdfs - ${hadoop2.version} - - - - org.apache.hadoop - hadoop-common - ${hadoop2.version} - - - - org.apache.hadoop - hadoop-client - ${hadoop2.version} - - - - org.apache.hadoop - hadoop-yarn-common - ${hadoop2.version} - - - - org.apache.hadoop - hadoop-yarn-api - ${hadoop2.version} - - - - org.apache.hadoop - hadoop-yarn-client - ${hadoop2.version} - - - - - - - - src/main/java - - - src/main/resources - - - - - - org.apache.maven.plugins - maven-shade-plugin - 3.0.0 - - - package - - shade - - - - false - true - ${project.basedir}/target/dependency-reduced-pom.xml - - true - - - - - - - - META-INF/MANIFEST.MF - MANIFEST.MF - - - META-INF/services/org.apache.hadoop.security.SecurityInfo - - - META-INF/services/org.apache.hadoop.fs.FileSystem - - - - - - org.slf4j:slf4j-log4j12 - log4j:log4j - org.slf4j:slf4j-api - netty-all:io.netty - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - - - maven-antrun-plugin - 1.2 - - - copy-resources - - package - - run - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210/spark-yarn-client/src/main/java/org/apache/hadoop/security/UserGroupInformation.java b/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210/spark-yarn-client/src/main/java/org/apache/hadoop/security/UserGroupInformation.java deleted file mode 100644 index 3ece3a0847..0000000000 --- a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210/spark-yarn-client/src/main/java/org/apache/hadoop/security/UserGroupInformation.java +++ /dev/null @@ -1,1794 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.security; - -import com.google.common.annotations.VisibleForTesting; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.metrics2.annotation.Metric; -import org.apache.hadoop.metrics2.annotation.Metrics; -import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; -import org.apache.hadoop.metrics2.lib.MetricsRegistry; -import org.apache.hadoop.metrics2.lib.MutableQuantiles; -import org.apache.hadoop.metrics2.lib.MutableRate; -import org.apache.hadoop.security.SaslRpcServer.AuthMethod; -import org.apache.hadoop.security.authentication.util.KerberosUtil; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.TokenIdentifier; -import org.apache.hadoop.util.Shell; -import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.util.Time; - -import javax.security.auth.Subject; -import javax.security.auth.callback.CallbackHandler; -import javax.security.auth.kerberos.KerberosPrincipal; -import javax.security.auth.kerberos.KerberosTicket; -import javax.security.auth.kerberos.KeyTab; -import javax.security.auth.login.AppConfigurationEntry; -import javax.security.auth.login.AppConfigurationEntry.LoginModuleControlFlag; -import javax.security.auth.login.LoginContext; -import javax.security.auth.login.LoginException; -import javax.security.auth.spi.LoginModule; -import java.io.File; -import java.io.IOException; -import java.lang.reflect.UndeclaredThrowableException; -import java.security.AccessControlContext; -import java.security.AccessController; -import java.security.Principal; -import java.security.PrivilegedAction; -import java.security.PrivilegedActionException; -import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS; -import static org.apache.hadoop.util.PlatformName.IBM_JAVA; - -/** - * User and group information for Hadoop. - * This class wraps around a JAAS Subject and provides methods to determine the - * user's username and groups. It supports both the Windows, Unix and Kerberos - * login modules. - */ -@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce", "HBase", "Hive", "Oozie"}) -@InterfaceStability.Evolving -public class UserGroupInformation { - private static final Log LOG = LogFactory.getLog(UserGroupInformation.class); - /** - * Percentage of the ticket window to use before we renew ticket. - */ - private static final float TICKET_RENEW_WINDOW = 0.80f; - private static boolean shouldRenewImmediatelyForTests = false; - private static ThreadLocal> threadLocal = new ThreadLocal<>(); - - static final String HADOOP_USER_NAME = "HADOOP_USER_NAME"; - static final String HADOOP_PROXY_USER = "HADOOP_PROXY_USER"; - - /** - * For the purposes of unit tests, we want to test login - * from keytab and don't want to wait until the renew - * window (controlled by TICKET_RENEW_WINDOW). - * @param immediate true if we should login without waiting for ticket window - */ - @VisibleForTesting - static void setShouldRenewImmediatelyForTests(boolean immediate) { - shouldRenewImmediatelyForTests = immediate; - } - - public static void setThreadLocalData(String key, String val){ - Map dataMap = threadLocal.get(); - if(dataMap == null){ - dataMap = new HashMap<>(); - } - - dataMap.put(key, val); - threadLocal.set(dataMap); - } - - /** - * UgiMetrics maintains UGI activity statistics - * and publishes them through the metrics interfaces. - */ - @Metrics(about="User and group related metrics", context="ugi") - static class UgiMetrics { - final MetricsRegistry registry = new MetricsRegistry("UgiMetrics"); - - @Metric("Rate of successful kerberos logins and latency (milliseconds)") - MutableRate loginSuccess; - @Metric("Rate of failed kerberos logins and latency (milliseconds)") - MutableRate loginFailure; - @Metric("GetGroups") MutableRate getGroups; - MutableQuantiles[] getGroupsQuantiles; - - static UgiMetrics create() { - return DefaultMetricsSystem.instance().register(new UgiMetrics()); - } - - void addGetGroups(long latency) { - getGroups.add(latency); - if (getGroupsQuantiles != null) { - for (MutableQuantiles q : getGroupsQuantiles) { - q.add(latency); - } - } - } - } - - /** - * A login module that looks at the Kerberos, Unix, or Windows principal and - * adds the corresponding UserName. - */ - @InterfaceAudience.Private - public static class HadoopLoginModule implements LoginModule { - private Subject subject; - - @Override - public boolean abort() throws LoginException { - return true; - } - - private T getCanonicalUser(Class cls) { - for(T user: subject.getPrincipals(cls)) { - return user; - } - return null; - } - - @Override - public boolean commit() throws LoginException { - if (LOG.isDebugEnabled()) { - LOG.debug("hadoop login commit"); - } - // if we already have a user, we are done. - if (!subject.getPrincipals(User.class).isEmpty()) { - if (LOG.isDebugEnabled()) { - LOG.debug("using existing subject:"+subject.getPrincipals()); - } - return true; - } - Principal user = null; - // if we are using kerberos, try it out - if (isAuthenticationMethodEnabled(AuthenticationMethod.KERBEROS)) { - user = getCanonicalUser(KerberosPrincipal.class); - if (LOG.isDebugEnabled()) { - LOG.debug("using kerberos user:"+user); - } - } - //If we don't have a kerberos user and security is disabled, check - //if user is specified in the environment or properties - if (!isSecurityEnabled() && (user == null)) { - - String envUser = null; - Map data = threadLocal.get(); - if(data != null){ - envUser = data.get(HADOOP_USER_NAME); - }else{ - envUser = System.getenv(HADOOP_USER_NAME); - - } - - if (envUser == null) { - envUser = System.getProperty(HADOOP_USER_NAME); - } - user = envUser == null ? null : new User(envUser); - } - // use the OS user - if (user == null) { - user = getCanonicalUser(OS_PRINCIPAL_CLASS); - if (LOG.isDebugEnabled()) { - LOG.debug("using local user:"+user); - } - } - // if we found the user, add our principal - if (user != null) { - if (LOG.isDebugEnabled()) { - LOG.debug("Using user: \"" + user + "\" with name " + user.getName()); - } - - User userEntry = null; - try { - userEntry = new User(user.getName()); - } catch (Exception e) { - throw (LoginException)(new LoginException(e.toString()).initCause(e)); - } - if (LOG.isDebugEnabled()) { - LOG.debug("User entry: \"" + userEntry.toString() + "\"" ); - } - - subject.getPrincipals().add(userEntry); - return true; - } - LOG.error("Can't find user in " + subject); - throw new LoginException("Can't find user name"); - } - - @Override - public void initialize(Subject subject, CallbackHandler callbackHandler, - Map sharedState, Map options) { - this.subject = subject; - } - - @Override - public boolean login() throws LoginException { - if (LOG.isDebugEnabled()) { - LOG.debug("hadoop login"); - } - return true; - } - - @Override - public boolean logout() throws LoginException { - if (LOG.isDebugEnabled()) { - LOG.debug("hadoop logout"); - } - return true; - } - } - - /** Metrics to track UGI activity */ - static UgiMetrics metrics = UgiMetrics.create(); - /** The auth method to use */ - private static AuthenticationMethod authenticationMethod; - /** Server-side groups fetching service */ - private static Groups groups; - /** The configuration to use */ - private static Configuration conf; - - - /** Leave 10 minutes between relogin attempts. */ - private static final long MIN_TIME_BEFORE_RELOGIN = 10 * 60 * 1000L; - - /**Environment variable pointing to the token cache file*/ - public static final String HADOOP_TOKEN_FILE_LOCATION = - "HADOOP_TOKEN_FILE_LOCATION"; - - /** - * A method to initialize the fields that depend on a configuration. - * Must be called before useKerberos or groups is used. - */ - private static void ensureInitialized() { - if (conf == null) { - synchronized(UserGroupInformation.class) { - if (conf == null) { // someone might have beat us - initialize(new Configuration(), false); - } - } - } - } - - /** - * Initialize UGI and related classes. - * @param conf the configuration to use - */ - private static synchronized void initialize(Configuration conf, - boolean overrideNameRules) { - authenticationMethod = SecurityUtil.getAuthenticationMethod(conf); - if (overrideNameRules || !HadoopKerberosName.hasRulesBeenSet()) { - try { - HadoopKerberosName.setConfiguration(conf); - } catch (IOException ioe) { - throw new RuntimeException( - "Problem with Kerberos auth_to_local name configuration", ioe); - } - } - // If we haven't set up testing groups, use the configuration to find it - if (!(groups instanceof TestingGroups)) { - groups = Groups.getUserToGroupsMappingService(conf); - } - UserGroupInformation.conf = conf; - - if (metrics.getGroupsQuantiles == null) { - int[] intervals = conf.getInts(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS); - if (intervals != null && intervals.length > 0) { - final int length = intervals.length; - MutableQuantiles[] getGroupsQuantiles = new MutableQuantiles[length]; - for (int i = 0; i < length; i++) { - getGroupsQuantiles[i] = metrics.registry.newQuantiles( - "getGroups" + intervals[i] + "s", - "Get groups", "ops", "latency", intervals[i]); - } - metrics.getGroupsQuantiles = getGroupsQuantiles; - } - } - } - - /** - * Set the static configuration for UGI. - * In particular, set the security authentication mechanism and the - * group look up service. - * @param conf the configuration to use - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public static void setConfiguration(Configuration conf) { - initialize(conf, true); - } - - @InterfaceAudience.Private - @VisibleForTesting - static void reset() { - authenticationMethod = null; - conf = null; - groups = null; - setLoginUser(null); - HadoopKerberosName.setRules(null); - } - - /** - * Determine if UserGroupInformation is using Kerberos to determine - * user identities or is relying on simple authentication - * - * @return true if UGI is working in a secure environment - */ - public static boolean isSecurityEnabled() { - return !isAuthenticationMethodEnabled(AuthenticationMethod.SIMPLE); - } - - @InterfaceAudience.Private - @InterfaceStability.Evolving - private static boolean isAuthenticationMethodEnabled(AuthenticationMethod method) { - ensureInitialized(); - return (authenticationMethod == method); - } - - /** - * Information about the logged in user. - */ - private static ThreadLocal userThreadLocal = new ThreadLocal<>(); - private static String keytabPrincipal = null; - private static String keytabFile = null; - - private final Subject subject; - // All non-static fields must be read-only caches that come from the subject. - private final User user; - private final boolean isKeytab; - private final boolean isKrbTkt; - - private static String OS_LOGIN_MODULE_NAME; - private static Class OS_PRINCIPAL_CLASS; - - private static final boolean windows = - System.getProperty("os.name").startsWith("Windows"); - private static final boolean is64Bit = - System.getProperty("os.arch").contains("64"); - private static final boolean aix = "AIX".equals(System.getProperty("os.name")); - - /* Return the OS login module class name */ - private static String getOSLoginModuleName() { - if (IBM_JAVA) { - if (windows) { - return is64Bit ? "com.ibm.security.auth.module.Win64LoginModule" - : "com.ibm.security.auth.module.NTLoginModule"; - } else if (aix) { - return is64Bit ? "com.ibm.security.auth.module.AIX64LoginModule" - : "com.ibm.security.auth.module.AIXLoginModule"; - } else { - return "com.ibm.security.auth.module.LinuxLoginModule"; - } - } else { - return windows ? "com.sun.security.auth.module.NTLoginModule" - : "com.sun.security.auth.module.UnixLoginModule"; - } - } - - /* Return the OS principal class */ - @SuppressWarnings("unchecked") - private static Class getOsPrincipalClass() { - ClassLoader cl = ClassLoader.getSystemClassLoader(); - try { - String principalClass = null; - if (IBM_JAVA) { - if (is64Bit) { - principalClass = "com.ibm.security.auth.UsernamePrincipal"; - } else { - if (windows) { - principalClass = "com.ibm.security.auth.NTUserPrincipal"; - } else if (aix) { - principalClass = "com.ibm.security.auth.AIXPrincipal"; - } else { - principalClass = "com.ibm.security.auth.LinuxPrincipal"; - } - } - } else { - principalClass = windows ? "com.sun.security.auth.NTUserPrincipal" - : "com.sun.security.auth.UnixPrincipal"; - } - return (Class) cl.loadClass(principalClass); - } catch (ClassNotFoundException e) { - LOG.error("Unable to find JAAS classes:" + e.getMessage()); - } - return null; - } - static { - OS_LOGIN_MODULE_NAME = getOSLoginModuleName(); - OS_PRINCIPAL_CLASS = getOsPrincipalClass(); - } - - private static class RealUser implements Principal { - private final UserGroupInformation realUser; - - RealUser(UserGroupInformation realUser) { - this.realUser = realUser; - } - - @Override - public String getName() { - return realUser.getUserName(); - } - - public UserGroupInformation getRealUser() { - return realUser; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } else if (o == null || getClass() != o.getClass()) { - return false; - } else { - return realUser.equals(((RealUser) o).realUser); - } - } - - @Override - public int hashCode() { - return realUser.hashCode(); - } - - @Override - public String toString() { - return realUser.toString(); - } - } - - /** - * A JAAS configuration that defines the login modules that we want - * to use for login. - */ - private static class HadoopConfiguration - extends javax.security.auth.login.Configuration { - private static final String SIMPLE_CONFIG_NAME = "hadoop-simple"; - private static final String USER_KERBEROS_CONFIG_NAME = - "hadoop-user-kerberos"; - private static final String KEYTAB_KERBEROS_CONFIG_NAME = - "hadoop-keytab-kerberos"; - - private static final Map BASIC_JAAS_OPTIONS = - new HashMap(); - static { - String jaasEnvVar = System.getenv("HADOOP_JAAS_DEBUG"); - if (jaasEnvVar != null && "true".equalsIgnoreCase(jaasEnvVar)) { - BASIC_JAAS_OPTIONS.put("debug", "true"); - } - } - - private static final AppConfigurationEntry OS_SPECIFIC_LOGIN = - new AppConfigurationEntry(OS_LOGIN_MODULE_NAME, - LoginModuleControlFlag.REQUIRED, - BASIC_JAAS_OPTIONS); - private static final AppConfigurationEntry HADOOP_LOGIN = - new AppConfigurationEntry(HadoopLoginModule.class.getName(), - LoginModuleControlFlag.REQUIRED, - BASIC_JAAS_OPTIONS); - private static final Map USER_KERBEROS_OPTIONS = - new HashMap(); - static { - if (IBM_JAVA) { - USER_KERBEROS_OPTIONS.put("useDefaultCcache", "true"); - } else { - USER_KERBEROS_OPTIONS.put("doNotPrompt", "true"); - USER_KERBEROS_OPTIONS.put("useTicketCache", "true"); - } - String ticketCache = System.getenv("KRB5CCNAME"); - if (ticketCache != null) { - if (IBM_JAVA) { - // The first value searched when "useDefaultCcache" is used. - System.setProperty("KRB5CCNAME", ticketCache); - } else { - USER_KERBEROS_OPTIONS.put("ticketCache", ticketCache); - } - } - USER_KERBEROS_OPTIONS.put("renewTGT", "true"); - USER_KERBEROS_OPTIONS.putAll(BASIC_JAAS_OPTIONS); - } - private static final AppConfigurationEntry USER_KERBEROS_LOGIN = - new AppConfigurationEntry(KerberosUtil.getKrb5LoginModuleName(), - LoginModuleControlFlag.OPTIONAL, - USER_KERBEROS_OPTIONS); - private static final Map KEYTAB_KERBEROS_OPTIONS = - new HashMap(); - static { - if (IBM_JAVA) { - KEYTAB_KERBEROS_OPTIONS.put("credsType", "both"); - } else { - KEYTAB_KERBEROS_OPTIONS.put("doNotPrompt", "true"); - KEYTAB_KERBEROS_OPTIONS.put("useKeyTab", "true"); - KEYTAB_KERBEROS_OPTIONS.put("storeKey", "true"); - } - KEYTAB_KERBEROS_OPTIONS.put("refreshKrb5Config", "true"); - KEYTAB_KERBEROS_OPTIONS.putAll(BASIC_JAAS_OPTIONS); - } - private static final AppConfigurationEntry KEYTAB_KERBEROS_LOGIN = - new AppConfigurationEntry(KerberosUtil.getKrb5LoginModuleName(), - LoginModuleControlFlag.REQUIRED, - KEYTAB_KERBEROS_OPTIONS); - - private static final AppConfigurationEntry[] SIMPLE_CONF = - new AppConfigurationEntry[]{OS_SPECIFIC_LOGIN, HADOOP_LOGIN}; - - private static final AppConfigurationEntry[] USER_KERBEROS_CONF = - new AppConfigurationEntry[]{OS_SPECIFIC_LOGIN, USER_KERBEROS_LOGIN, - HADOOP_LOGIN}; - - private static final AppConfigurationEntry[] KEYTAB_KERBEROS_CONF = - new AppConfigurationEntry[]{KEYTAB_KERBEROS_LOGIN, HADOOP_LOGIN}; - - @Override - public AppConfigurationEntry[] getAppConfigurationEntry(String appName) { - if (SIMPLE_CONFIG_NAME.equals(appName)) { - return SIMPLE_CONF; - } else if (USER_KERBEROS_CONFIG_NAME.equals(appName)) { - return USER_KERBEROS_CONF; - } else if (KEYTAB_KERBEROS_CONFIG_NAME.equals(appName)) { - if (IBM_JAVA) { - KEYTAB_KERBEROS_OPTIONS.put("useKeytab", - prependFileAuthority(keytabFile)); - } else { - KEYTAB_KERBEROS_OPTIONS.put("keyTab", keytabFile); - } - KEYTAB_KERBEROS_OPTIONS.put("principal", keytabPrincipal); - return KEYTAB_KERBEROS_CONF; - } - return null; - } - } - - private static String prependFileAuthority(String keytabPath) { - return keytabPath.startsWith("file://") ? keytabPath - : "file://" + keytabPath; - } - - /** - * Represents a javax.security configuration that is created at runtime. - */ - private static class DynamicConfiguration - extends javax.security.auth.login.Configuration { - private AppConfigurationEntry[] ace; - - DynamicConfiguration(AppConfigurationEntry[] ace) { - this.ace = ace; - } - - @Override - public AppConfigurationEntry[] getAppConfigurationEntry(String appName) { - return ace; - } - } - - private static LoginContext - newLoginContext(String appName, Subject subject, - javax.security.auth.login.Configuration loginConf) - throws LoginException { - // Temporarily switch the thread's ContextClassLoader to match this - // class's classloader, so that we can properly load HadoopLoginModule - // from the JAAS libraries. - Thread t = Thread.currentThread(); - ClassLoader oldCCL = t.getContextClassLoader(); - t.setContextClassLoader(HadoopLoginModule.class.getClassLoader()); - try { - return new LoginContext(appName, subject, null, loginConf); - } finally { - t.setContextClassLoader(oldCCL); - } - } - - private LoginContext getLogin() { - return user.getLogin(); - } - - private void setLogin(LoginContext login) { - user.setLogin(login); - } - - /** - * Create a UserGroupInformation for the given subject. - * This does not change the subject or acquire new credentials. - * @param subject the user's subject - */ - UserGroupInformation(Subject subject) { - this.subject = subject; - this.user = subject.getPrincipals(User.class).iterator().next(); - this.isKeytab = !subject.getPrivateCredentials(KeyTab.class).isEmpty(); - this.isKrbTkt = !subject.getPrivateCredentials(KerberosTicket.class).isEmpty(); - } - - /** - * checks if logged in using kerberos - * @return true if the subject logged via keytab or has a Kerberos TGT - */ - public boolean hasKerberosCredentials() { - return isKeytab || isKrbTkt; - } - - /** - * Return the current user, including any doAs in the current stack. - * @return the current user - * @throws IOException if login fails - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public synchronized - static UserGroupInformation getCurrentUser() throws IOException { - AccessControlContext context = AccessController.getContext(); - Subject subject = Subject.getSubject(context); - if (subject == null || subject.getPrincipals(User.class).isEmpty()) { - return getLoginUser(); - } else { - return new UserGroupInformation(subject); - } - } - - /** - * Find the most appropriate UserGroupInformation to use - * - * @param ticketCachePath The Kerberos ticket cache path, or NULL - * if none is specfied - * @param user The user name, or NULL if none is specified. - * - * @return The most appropriate UserGroupInformation - */ - public static UserGroupInformation getBestUGI( - String ticketCachePath, String user) throws IOException { - if (ticketCachePath != null) { - return getUGIFromTicketCache(ticketCachePath, user); - } else if (user == null) { - return getCurrentUser(); - } else { - return createRemoteUser(user); - } - } - - /** - * Create a UserGroupInformation from a Kerberos ticket cache. - * - * @param user The principal name to load from the ticket - * cache - * - * @throws IOException if the kerberos login fails - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public static UserGroupInformation getUGIFromTicketCache( - String ticketCache, String user) throws IOException { - if (!isAuthenticationMethodEnabled(AuthenticationMethod.KERBEROS)) { - return getBestUGI(null, user); - } - try { - Map krbOptions = new HashMap(); - if (IBM_JAVA) { - krbOptions.put("useDefaultCcache", "true"); - // The first value searched when "useDefaultCcache" is used. - System.setProperty("KRB5CCNAME", ticketCache); - } else { - krbOptions.put("doNotPrompt", "true"); - krbOptions.put("useTicketCache", "true"); - krbOptions.put("useKeyTab", "false"); - krbOptions.put("ticketCache", ticketCache); - } - krbOptions.put("renewTGT", "false"); - krbOptions.putAll(HadoopConfiguration.BASIC_JAAS_OPTIONS); - AppConfigurationEntry ace = new AppConfigurationEntry( - KerberosUtil.getKrb5LoginModuleName(), - LoginModuleControlFlag.REQUIRED, - krbOptions); - DynamicConfiguration dynConf = - new DynamicConfiguration(new AppConfigurationEntry[]{ ace }); - LoginContext login = newLoginContext( - HadoopConfiguration.USER_KERBEROS_CONFIG_NAME, null, dynConf); - login.login(); - - Subject loginSubject = login.getSubject(); - Set loginPrincipals = loginSubject.getPrincipals(); - if (loginPrincipals.isEmpty()) { - throw new RuntimeException("No login principals found!"); - } - if (loginPrincipals.size() != 1) { - LOG.warn("found more than one principal in the ticket cache file " + - ticketCache); - } - User ugiUser = new User(loginPrincipals.iterator().next().getName(), - AuthenticationMethod.KERBEROS, login); - loginSubject.getPrincipals().add(ugiUser); - UserGroupInformation ugi = new UserGroupInformation(loginSubject); - ugi.setLogin(login); - ugi.setAuthenticationMethod(AuthenticationMethod.KERBEROS); - return ugi; - } catch (LoginException le) { - throw new IOException("failure to login using ticket cache file " + - ticketCache, le); - } - } - - /** - * Create a UserGroupInformation from a Subject with Kerberos principal. - * - * @throws IOException if the kerberos login fails - */ - public static UserGroupInformation getUGIFromSubject(Subject subject) - throws IOException { - if (subject == null) { - throw new IOException("Subject must not be null"); - } - - if (subject.getPrincipals(KerberosPrincipal.class).isEmpty()) { - throw new IOException("Provided Subject must contain a KerberosPrincipal"); - } - - KerberosPrincipal principal = - subject.getPrincipals(KerberosPrincipal.class).iterator().next(); - - User ugiUser = new User(principal.getName(), - AuthenticationMethod.KERBEROS, null); - subject.getPrincipals().add(ugiUser); - UserGroupInformation ugi = new UserGroupInformation(subject); - ugi.setLogin(null); - ugi.setAuthenticationMethod(AuthenticationMethod.KERBEROS); - return ugi; - } - - /** - * Get the currently logged in user. - * @return the logged in user - * @throws IOException if login fails - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public synchronized - static UserGroupInformation getLoginUser() throws IOException { - UserGroupInformation loginUser = userThreadLocal.get(); - if (loginUser == null) { - loginUserFromSubject(null); - loginUser = userThreadLocal.get(); - } - return loginUser; - } - - /** - * remove the login method that is followed by a space from the username - * e.g. "jack (auth:SIMPLE)" -> "jack" - * - * @param userName - * @return userName without login method - */ - public static String trimLoginMethod(String userName) { - int spaceIndex = userName.indexOf(' '); - if (spaceIndex >= 0) { - userName = userName.substring(0, spaceIndex); - } - return userName; - } - - /** - * Log in a user using the given subject - * @parma subject the subject to use when logging in a user, or null to - * create a new subject. - * @throws IOException if login fails - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public synchronized - static void loginUserFromSubject(Subject subject) throws IOException { - ensureInitialized(); - UserGroupInformation loginUser; - try { - if (subject == null) { - subject = new Subject(); - } - LoginContext login = - newLoginContext(authenticationMethod.getLoginAppName(), - subject, new HadoopConfiguration()); - login.login(); - UserGroupInformation realUser = new UserGroupInformation(subject); - realUser.setLogin(login); - realUser.setAuthenticationMethod(authenticationMethod); - realUser = new UserGroupInformation(login.getSubject()); - // If the HADOOP_PROXY_USER environment variable or property - // is specified, create a proxy user as the logged in user. - String proxyUser = System.getenv(HADOOP_PROXY_USER); - if (proxyUser == null) { - proxyUser = System.getProperty(HADOOP_PROXY_USER); - } - loginUser = proxyUser == null ? realUser : createProxyUser(proxyUser, realUser); - userThreadLocal.set(loginUser); - - String fileLocation = System.getenv(HADOOP_TOKEN_FILE_LOCATION); - if (fileLocation != null) { - // Load the token storage file and put all of the tokens into the - // user. Don't use the FileSystem API for reading since it has a lock - // cycle (HADOOP-9212). - Credentials cred = Credentials.readTokenStorageFile( - new File(fileLocation), conf); - loginUser.addCredentials(cred); - } - loginUser.spawnAutoRenewalThreadForUserCreds(); - } catch (LoginException le) { - LOG.debug("failure to login", le); - throw new IOException("failure to login", le); - } - if (LOG.isDebugEnabled()) { - LOG.debug("UGI loginUser:"+loginUser); - } - } - - @InterfaceAudience.Private - @InterfaceStability.Unstable - @VisibleForTesting - public synchronized static void setLoginUser(UserGroupInformation ugi) { - // if this is to become stable, should probably logout the currently - // logged in ugi if it's different - //loginUser = ugi; - userThreadLocal.set(ugi); - } - - /** - * Is this user logged in from a keytab file? - * @return true if the credentials are from a keytab file. - */ - public boolean isFromKeytab() { - return isKeytab; - } - - /** - * Get the Kerberos TGT - * @return the user's TGT or null if none was found - */ - private synchronized KerberosTicket getTGT() { - Set tickets = subject - .getPrivateCredentials(KerberosTicket.class); - for (KerberosTicket ticket : tickets) { - if (SecurityUtil.isOriginalTGT(ticket)) { - if (LOG.isDebugEnabled()) { - LOG.debug("Found tgt " + ticket); - } - return ticket; - } - } - return null; - } - - private long getRefreshTime(KerberosTicket tgt) { - long start = tgt.getStartTime().getTime(); - long end = tgt.getEndTime().getTime(); - return start + (long) ((end - start) * TICKET_RENEW_WINDOW); - } - - /**Spawn a thread to do periodic renewals of kerberos credentials*/ - private void spawnAutoRenewalThreadForUserCreds() { - if (isSecurityEnabled()) { - //spawn thread only if we have kerb credentials - if (user.getAuthenticationMethod() == AuthenticationMethod.KERBEROS && - !isKeytab) { - Thread t = new Thread(new Runnable() { - - @Override - public void run() { - String cmd = conf.get("hadoop.kerberos.kinit.command", - "kinit"); - KerberosTicket tgt = getTGT(); - if (tgt == null) { - return; - } - long nextRefresh = getRefreshTime(tgt); - while (true) { - try { - long now = Time.now(); - if(LOG.isDebugEnabled()) { - LOG.debug("Current time is " + now); - LOG.debug("Next refresh is " + nextRefresh); - } - if (now < nextRefresh) { - Thread.sleep(nextRefresh - now); - } - Shell.execCommand(cmd, "-R"); - if(LOG.isDebugEnabled()) { - LOG.debug("renewed ticket"); - } - reloginFromTicketCache(); - tgt = getTGT(); - if (tgt == null) { - LOG.warn("No TGT after renewal. Aborting renew thread for " + - getUserName()); - return; - } - nextRefresh = Math.max(getRefreshTime(tgt), - now + MIN_TIME_BEFORE_RELOGIN); - } catch (InterruptedException ie) { - LOG.warn("Terminating renewal thread"); - return; - } catch (IOException ie) { - LOG.warn("Exception encountered while running the" + - " renewal command. Aborting renew thread. " + ie); - return; - } - } - } - }); - t.setDaemon(true); - t.setName("TGT Renewer for " + getUserName()); - t.start(); - } - } - } - /** - * Log a user in from a keytab file. Loads a user identity from a keytab - * file and logs them in. They become the currently logged-in user. - * @param user the principal name to load from the keytab - * @param path the path to the keytab file - * @throws IOException if the keytab file can't be read - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public synchronized - static void loginUserFromKeytab(String user, - String path - ) throws IOException { - if (!isSecurityEnabled()) { - return; - } - - keytabFile = path; - keytabPrincipal = user; - Subject subject = new Subject(); - LoginContext login; - long start = 0; - try { - login = newLoginContext(HadoopConfiguration.KEYTAB_KERBEROS_CONFIG_NAME, - subject, new HadoopConfiguration()); - start = Time.now(); - login.login(); - metrics.loginSuccess.add(Time.now() - start); - UserGroupInformation loginUser = new UserGroupInformation(subject); - loginUser.setLogin(login); - loginUser.setAuthenticationMethod(AuthenticationMethod.KERBEROS); - userThreadLocal.set(loginUser); - } catch (LoginException le) { - if (start > 0) { - metrics.loginFailure.add(Time.now() - start); - } - throw new IOException("Login failure for " + user + " from keytab " + - path+ ": " + le, le); - } - LOG.info("Login successful for user " + keytabPrincipal - + " using keytab file " + keytabFile); - } - - /** - * Log the current user out who previously logged in using keytab. - * This method assumes that the user logged in by calling - * {@link #loginUserFromKeytab(String, String)}. - * - * @throws IOException if a failure occurred in logout, or if the user did - * not log in by invoking loginUserFromKeyTab() before. - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public void logoutUserFromKeytab() throws IOException { - if (!isSecurityEnabled() || - user.getAuthenticationMethod() != AuthenticationMethod.KERBEROS) { - return; - } - LoginContext login = getLogin(); - if (login == null || keytabFile == null) { - throw new IOException("loginUserFromKeytab must be done first"); - } - - try { - if (LOG.isDebugEnabled()) { - LOG.debug("Initiating logout for " + getUserName()); - } - synchronized (UserGroupInformation.class) { - login.logout(); - } - } catch (LoginException le) { - throw new IOException("Logout failure for " + user + " from keytab " + - keytabFile, le); - } - - LOG.info("Logout successful for user " + keytabPrincipal - + " using keytab file " + keytabFile); - } - - /** - * Re-login a user from keytab if TGT is expired or is close to expiry. - * - * @throws IOException - */ - public synchronized void checkTGTAndReloginFromKeytab() throws IOException { - if (!isSecurityEnabled() - || user.getAuthenticationMethod() != AuthenticationMethod.KERBEROS - || !isKeytab) { - return; - } - KerberosTicket tgt = getTGT(); - if (tgt != null && !shouldRenewImmediatelyForTests && - Time.now() < getRefreshTime(tgt)) { - return; - } - reloginFromKeytab(); - } - - /** - * Re-Login a user in from a keytab file. Loads a user identity from a keytab - * file and logs them in. They become the currently logged-in user. This - * method assumes that {@link #loginUserFromKeytab(String, String)} had - * happened already. - * The Subject field of this UserGroupInformation object is updated to have - * the new credentials. - * @throws IOException on a failure - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public synchronized void reloginFromKeytab() - throws IOException { - if (!isSecurityEnabled() || - user.getAuthenticationMethod() != AuthenticationMethod.KERBEROS || - !isKeytab) { - return; - } - - long now = Time.now(); - if (!shouldRenewImmediatelyForTests && !hasSufficientTimeElapsed(now)) { - return; - } - - KerberosTicket tgt = getTGT(); - //Return if TGT is valid and is not going to expire soon. - if (tgt != null && !shouldRenewImmediatelyForTests && - now < getRefreshTime(tgt)) { - return; - } - - LoginContext login = getLogin(); - if (login == null || keytabFile == null) { - throw new IOException("loginUserFromKeyTab must be done first"); - } - - long start = 0; - // register most recent relogin attempt - user.setLastLogin(now); - try { - if (LOG.isDebugEnabled()) { - LOG.debug("Initiating logout for " + getUserName()); - } - synchronized (UserGroupInformation.class) { - // clear up the kerberos state. But the tokens are not cleared! As per - // the Java kerberos login module code, only the kerberos credentials - // are cleared - login.logout(); - // login and also update the subject field of this instance to - // have the new credentials (pass it to the LoginContext constructor) - login = newLoginContext( - HadoopConfiguration.KEYTAB_KERBEROS_CONFIG_NAME, getSubject(), - new HadoopConfiguration()); - if (LOG.isDebugEnabled()) { - LOG.debug("Initiating re-login for " + keytabPrincipal); - } - start = Time.now(); - login.login(); - metrics.loginSuccess.add(Time.now() - start); - setLogin(login); - } - } catch (LoginException le) { - if (start > 0) { - metrics.loginFailure.add(Time.now() - start); - } - throw new IOException("Login failure for " + keytabPrincipal + - " from keytab " + keytabFile, le); - } - } - - /** - * Re-Login a user in from the ticket cache. This - * method assumes that login had happened already. - * The Subject field of this UserGroupInformation object is updated to have - * the new credentials. - * @throws IOException on a failure - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public synchronized void reloginFromTicketCache() - throws IOException { - if (!isSecurityEnabled() || - user.getAuthenticationMethod() != AuthenticationMethod.KERBEROS || - !isKrbTkt) { - return; - } - LoginContext login = getLogin(); - if (login == null) { - throw new IOException("login must be done first"); - } - long now = Time.now(); - if (!hasSufficientTimeElapsed(now)) { - return; - } - // register most recent relogin attempt - user.setLastLogin(now); - try { - if (LOG.isDebugEnabled()) { - LOG.debug("Initiating logout for " + getUserName()); - } - //clear up the kerberos state. But the tokens are not cleared! As per - //the Java kerberos login module code, only the kerberos credentials - //are cleared - login.logout(); - //login and also update the subject field of this instance to - //have the new credentials (pass it to the LoginContext constructor) - login = - newLoginContext(HadoopConfiguration.USER_KERBEROS_CONFIG_NAME, - getSubject(), new HadoopConfiguration()); - if (LOG.isDebugEnabled()) { - LOG.debug("Initiating re-login for " + getUserName()); - } - login.login(); - setLogin(login); - } catch (LoginException le) { - throw new IOException("Login failure for " + getUserName(), le); - } - } - - - /** - * Log a user in from a keytab file. Loads a user identity from a keytab - * file and login them in. This new user does not affect the currently - * logged-in user. - * @param user the principal name to load from the keytab - * @param path the path to the keytab file - * @throws IOException if the keytab file can't be read - */ - public synchronized - static UserGroupInformation loginUserFromKeytabAndReturnUGI(String user, - String path - ) throws IOException { - if (!isSecurityEnabled()) { - return UserGroupInformation.getCurrentUser(); - } - String oldKeytabFile = null; - String oldKeytabPrincipal = null; - - long start = 0; - try { - oldKeytabFile = keytabFile; - oldKeytabPrincipal = keytabPrincipal; - keytabFile = path; - keytabPrincipal = user; - Subject subject = new Subject(); - - LoginContext login = newLoginContext( - HadoopConfiguration.KEYTAB_KERBEROS_CONFIG_NAME, subject, - new HadoopConfiguration()); - - start = Time.now(); - login.login(); - metrics.loginSuccess.add(Time.now() - start); - UserGroupInformation newLoginUser = new UserGroupInformation(subject); - newLoginUser.setLogin(login); - newLoginUser.setAuthenticationMethod(AuthenticationMethod.KERBEROS); - - return newLoginUser; - } catch (LoginException le) { - if (start > 0) { - metrics.loginFailure.add(Time.now() - start); - } - throw new IOException("Login failure for " + user + " from keytab " + - path, le); - } finally { - if(oldKeytabFile != null) { - keytabFile = oldKeytabFile; - } - if(oldKeytabPrincipal != null) { - keytabPrincipal = oldKeytabPrincipal; - } - } - } - - private boolean hasSufficientTimeElapsed(long now) { - if (now - user.getLastLogin() < MIN_TIME_BEFORE_RELOGIN ) { - LOG.warn("Not attempting to re-login since the last re-login was " + - "attempted less than " + (MIN_TIME_BEFORE_RELOGIN/1000) + " seconds"+ - " before."); - return false; - } - return true; - } - - /** - * Did the login happen via keytab - * @return true or false - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public synchronized static boolean isLoginKeytabBased() throws IOException { - return getLoginUser().isKeytab; - } - - /** - * Did the login happen via ticket cache - * @return true or false - */ - public static boolean isLoginTicketBased() throws IOException { - return getLoginUser().isKrbTkt; - } - - /** - * Create a user from a login name. It is intended to be used for remote - * users in RPC, since it won't have any credentials. - * @param user the full user principal name, must not be empty or null - * @return the UserGroupInformation for the remote user. - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public static UserGroupInformation createRemoteUser(String user) { - return createRemoteUser(user, AuthMethod.SIMPLE); - } - - /** - * Create a user from a login name. It is intended to be used for remote - * users in RPC, since it won't have any credentials. - * @param user the full user principal name, must not be empty or null - * @return the UserGroupInformation for the remote user. - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public static UserGroupInformation createRemoteUser(String user, AuthMethod authMethod) { - if (user == null || user.isEmpty()) { - throw new IllegalArgumentException("Null user"); - } - Subject subject = new Subject(); - subject.getPrincipals().add(new User(user)); - UserGroupInformation result = new UserGroupInformation(subject); - result.setAuthenticationMethod(authMethod); - return result; - } - - /** - * existing types of authentications' methods - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public static enum AuthenticationMethod { - // currently we support only one auth per method, but eventually a - // subtype is needed to differentiate, ex. if digest is token or ldap - SIMPLE(AuthMethod.SIMPLE, - HadoopConfiguration.SIMPLE_CONFIG_NAME), - KERBEROS(AuthMethod.KERBEROS, - HadoopConfiguration.USER_KERBEROS_CONFIG_NAME), - TOKEN(AuthMethod.TOKEN), - CERTIFICATE(null), - KERBEROS_SSL(null), - PROXY(null); - - private final AuthMethod authMethod; - private final String loginAppName; - - private AuthenticationMethod(AuthMethod authMethod) { - this(authMethod, null); - } - private AuthenticationMethod(AuthMethod authMethod, String loginAppName) { - this.authMethod = authMethod; - this.loginAppName = loginAppName; - } - - public AuthMethod getAuthMethod() { - return authMethod; - } - - String getLoginAppName() { - if (loginAppName == null) { - throw new UnsupportedOperationException( - this + " login authentication is not supported"); - } - return loginAppName; - } - - public static AuthenticationMethod valueOf(AuthMethod authMethod) { - for (AuthenticationMethod value : values()) { - if (value.getAuthMethod() == authMethod) { - return value; - } - } - throw new IllegalArgumentException( - "no authentication method for " + authMethod); - } - }; - - /** - * Create a proxy user using username of the effective user and the ugi of the - * real user. - * @param user - * @param realUser - * @return proxyUser ugi - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public static UserGroupInformation createProxyUser(String user, - UserGroupInformation realUser) { - if (user == null || user.isEmpty()) { - throw new IllegalArgumentException("Null user"); - } - if (realUser == null) { - throw new IllegalArgumentException("Null real user"); - } - Subject subject = new Subject(); - Set principals = subject.getPrincipals(); - principals.add(new User(user)); - principals.add(new RealUser(realUser)); - UserGroupInformation result =new UserGroupInformation(subject); - result.setAuthenticationMethod(AuthenticationMethod.PROXY); - return result; - } - - /** - * get RealUser (vs. EffectiveUser) - * @return realUser running over proxy user - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public UserGroupInformation getRealUser() { - for (RealUser p: subject.getPrincipals(RealUser.class)) { - return p.getRealUser(); - } - return null; - } - - - - /** - * This class is used for storing the groups for testing. It stores a local - * map that has the translation of usernames to groups. - */ - private static class TestingGroups extends Groups { - private final Map> userToGroupsMapping = - new HashMap>(); - private Groups underlyingImplementation; - - private TestingGroups(Groups underlyingImplementation) { - super(new Configuration()); - this.underlyingImplementation = underlyingImplementation; - } - - @Override - public List getGroups(String user) throws IOException { - List result = userToGroupsMapping.get(user); - - if (result == null) { - result = underlyingImplementation.getGroups(user); - } - - return result; - } - - private void setUserGroups(String user, String[] groups) { - userToGroupsMapping.put(user, Arrays.asList(groups)); - } - } - - /** - * Create a UGI for testing HDFS and MapReduce - * @param user the full user principal name - * @param userGroups the names of the groups that the user belongs to - * @return a fake user for running unit tests - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public static UserGroupInformation createUserForTesting(String user, - String[] userGroups) { - ensureInitialized(); - UserGroupInformation ugi = createRemoteUser(user); - // make sure that the testing object is setup - if (!(groups instanceof TestingGroups)) { - groups = new TestingGroups(groups); - } - // add the user groups - ((TestingGroups) groups).setUserGroups(ugi.getShortUserName(), userGroups); - return ugi; - } - - - /** - * Create a proxy user UGI for testing HDFS and MapReduce - * - * @param user - * the full user principal name for effective user - * @param realUser - * UGI of the real user - * @param userGroups - * the names of the groups that the user belongs to - * @return a fake user for running unit tests - */ - public static UserGroupInformation createProxyUserForTesting(String user, - UserGroupInformation realUser, String[] userGroups) { - ensureInitialized(); - UserGroupInformation ugi = createProxyUser(user, realUser); - // make sure that the testing object is setup - if (!(groups instanceof TestingGroups)) { - groups = new TestingGroups(groups); - } - // add the user groups - ((TestingGroups) groups).setUserGroups(ugi.getShortUserName(), userGroups); - return ugi; - } - - /** - * Get the user's login name. - * @return the user's name up to the first '/' or '@'. - */ - public String getShortUserName() { - for (User p: subject.getPrincipals(User.class)) { - return p.getShortName(); - } - return null; - } - - public String getPrimaryGroupName() throws IOException { - String[] groups = getGroupNames(); - if (groups.length == 0) { - throw new IOException("There is no primary group for UGI " + this); - } - return groups[0]; - } - - /** - * Get the user's full principal name. - * @return the user's full principal name. - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public String getUserName() { - return user.getName(); - } - - /** - * Add a TokenIdentifier to this UGI. The TokenIdentifier has typically been - * authenticated by the RPC layer as belonging to the user represented by this - * UGI. - * - * @param tokenId - * tokenIdentifier to be added - * @return true on successful add of new tokenIdentifier - */ - public synchronized boolean addTokenIdentifier(TokenIdentifier tokenId) { - return subject.getPublicCredentials().add(tokenId); - } - - /** - * Get the set of TokenIdentifiers belonging to this UGI - * - * @return the set of TokenIdentifiers belonging to this UGI - */ - public synchronized Set getTokenIdentifiers() { - return subject.getPublicCredentials(TokenIdentifier.class); - } - - /** - * Add a token to this UGI - * - * @param token Token to be added - * @return true on successful add of new token - */ - public boolean addToken(Token token) { - return (token != null) ? addToken(token.getService(), token) : false; - } - - /** - * Add a named token to this UGI - * - * @param alias Name of the token - * @param token Token to be added - * @return true on successful add of new token - */ - public boolean addToken(Text alias, Token token) { - synchronized (subject) { - getCredentialsInternal().addToken(alias, token); - return true; - } - } - - /** - * Obtain the collection of tokens associated with this user. - * - * @return an unmodifiable collection of tokens associated with user - */ - public Collection> getTokens() { - synchronized (subject) { - return Collections.unmodifiableCollection( - new ArrayList>(getCredentialsInternal().getAllTokens())); - } - } - - /** - * Obtain the tokens in credentials form associated with this user. - * - * @return Credentials of tokens associated with this user - */ - public Credentials getCredentials() { - synchronized (subject) { - Credentials creds = new Credentials(getCredentialsInternal()); - Iterator> iter = creds.getAllTokens().iterator(); - while (iter.hasNext()) { - if (iter.next() instanceof Token.PrivateToken) { - iter.remove(); - } - } - return creds; - } - } - - /** - * Add the given Credentials to this user. - * @param credentials of tokens and secrets - */ - public void addCredentials(Credentials credentials) { - synchronized (subject) { - getCredentialsInternal().addAll(credentials); - } - } - - private synchronized Credentials getCredentialsInternal() { - final Credentials credentials; - final Set credentialsSet = - subject.getPrivateCredentials(Credentials.class); - if (!credentialsSet.isEmpty()){ - credentials = credentialsSet.iterator().next(); - } else { - credentials = new Credentials(); - subject.getPrivateCredentials().add(credentials); - } - return credentials; - } - - /** - * Get the group names for this user. - * @return the list of users with the primary group first. If the command - * fails, it returns an empty list. - */ - public synchronized String[] getGroupNames() { - ensureInitialized(); - try { - Set result = new LinkedHashSet - (groups.getGroups(getShortUserName())); - return result.toArray(new String[result.size()]); - } catch (IOException ie) { - if (LOG.isDebugEnabled()) { - LOG.debug("Failed to get groups for user " + getShortUserName() - + " by " + ie); - LOG.trace("TRACE", ie); - } - return StringUtils.emptyStringArray; - } - } - - /** - * Return the username. - */ - @Override - public String toString() { - StringBuilder sb = new StringBuilder(getUserName()); - sb.append(" (auth:"+getAuthenticationMethod()+")"); - if (getRealUser() != null) { - sb.append(" via ").append(getRealUser().toString()); - } - return sb.toString(); - } - - /** - * Sets the authentication method in the subject - * - * @param authMethod - */ - public synchronized - void setAuthenticationMethod(AuthenticationMethod authMethod) { - user.setAuthenticationMethod(authMethod); - } - - /** - * Sets the authentication method in the subject - * - * @param authMethod - */ - public void setAuthenticationMethod(AuthMethod authMethod) { - user.setAuthenticationMethod(AuthenticationMethod.valueOf(authMethod)); - } - - /** - * Get the authentication method from the subject - * - * @return AuthenticationMethod in the subject, null if not present. - */ - public synchronized AuthenticationMethod getAuthenticationMethod() { - return user.getAuthenticationMethod(); - } - - /** - * Get the authentication method from the real user's subject. If there - * is no real user, return the given user's authentication method. - * - * @return AuthenticationMethod in the subject, null if not present. - */ - public synchronized AuthenticationMethod getRealAuthenticationMethod() { - UserGroupInformation ugi = getRealUser(); - if (ugi == null) { - ugi = this; - } - return ugi.getAuthenticationMethod(); - } - - /** - * Returns the authentication method of a ugi. If the authentication method is - * PROXY, returns the authentication method of the real user. - * - * @param ugi - * @return AuthenticationMethod - */ - public static AuthenticationMethod getRealAuthenticationMethod( - UserGroupInformation ugi) { - AuthenticationMethod authMethod = ugi.getAuthenticationMethod(); - if (authMethod == AuthenticationMethod.PROXY) { - authMethod = ugi.getRealUser().getAuthenticationMethod(); - } - return authMethod; - } - - /** - * Compare the subjects to see if they are equal to each other. - */ - @Override - public boolean equals(Object o) { - if (o == this) { - return true; - } else if (o == null || getClass() != o.getClass()) { - return false; - } else { - return subject == ((UserGroupInformation) o).subject; - } - } - - /** - * Return the hash of the subject. - */ - @Override - public int hashCode() { - return System.identityHashCode(subject); - } - - /** - * Get the underlying subject from this ugi. - * @return the subject that represents this user. - */ - protected Subject getSubject() { - return subject; - } - - /** - * Run the given action as the user. - * @param the return type of the run method - * @param action the method to execute - * @return the value from the run method - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public T doAs(PrivilegedAction action) { - logPrivilegedAction(subject, action); - return Subject.doAs(subject, action); - } - - /** - * Run the given action as the user, potentially throwing an exception. - * @param the return type of the run method - * @param action the method to execute - * @return the value from the run method - * @throws IOException if the action throws an IOException - * @throws Error if the action throws an Error - * @throws RuntimeException if the action throws a RuntimeException - * @throws InterruptedException if the action throws an InterruptedException - * @throws UndeclaredThrowableException if the action throws something else - */ - @InterfaceAudience.Public - @InterfaceStability.Evolving - public T doAs(PrivilegedExceptionAction action - ) throws IOException, InterruptedException { - try { - logPrivilegedAction(subject, action); - return Subject.doAs(subject, action); - } catch (PrivilegedActionException pae) { - Throwable cause = pae.getCause(); - if (LOG.isDebugEnabled()) { - LOG.debug("PrivilegedActionException as:" + this + " cause:" + cause); - } - if (cause instanceof IOException) { - throw (IOException) cause; - } else if (cause instanceof Error) { - throw (Error) cause; - } else if (cause instanceof RuntimeException) { - throw (RuntimeException) cause; - } else if (cause instanceof InterruptedException) { - throw (InterruptedException) cause; - } else { - throw new UndeclaredThrowableException(cause); - } - } - } - - private void logPrivilegedAction(Subject subject, Object action) { - if (LOG.isDebugEnabled()) { - // would be nice if action included a descriptive toString() - String where = new Throwable().getStackTrace()[2].toString(); - LOG.debug("PrivilegedAction as:"+this+" from:"+where); - } - } - - private void print() throws IOException { - System.out.println("User: " + getUserName()); - System.out.print("Group Ids: "); - System.out.println(); - String[] groups = getGroupNames(); - System.out.print("Groups: "); - for(int i=0; i < groups.length; i++) { - System.out.print(groups[i] + " "); - } - System.out.println(); - } - - /** - * A test method to print out the current user's UGI. - * @param args if there are two arguments, read the user from the keytab - * and print it out. - * @throws Exception - */ - public static void main(String [] args) throws Exception { - System.out.println("Getting UGI for current user"); - UserGroupInformation ugi = getCurrentUser(); - ugi.print(); - System.out.println("UGI: " + ugi); - System.out.println("Auth method " + ugi.user.getAuthenticationMethod()); - System.out.println("Keytab " + ugi.isKeytab); - System.out.println("============================================================"); - - if (args.length == 2) { - System.out.println("Getting UGI from keytab...."); - loginUserFromKeytab(args[0], args[1]); - getCurrentUser().print(); - System.out.println("Keytab: " + ugi); -// System.out.println("Auth method " + loginUser.user.getAuthenticationMethod()); -// System.out.println("Keytab " + loginUser.isKeytab); - } - } - -} diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210/spark-yarn-client/src/main/resources/META-INF/services/com.dtstack.taier.pluginapi.client.IClient b/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210/src/main/resources/META-INF/services/com.dtstack.taier.pluginapi.client.IClient similarity index 100% rename from taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210/spark-yarn-client/src/main/resources/META-INF/services/com.dtstack.taier.pluginapi.client.IClient rename to taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark210/src/main/resources/META-INF/services/com.dtstack.taier.pluginapi.client.IClient diff --git a/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark320/pom.xml b/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark320/pom.xml new file mode 100644 index 0000000000..2a5d5ac716 --- /dev/null +++ b/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark320/pom.xml @@ -0,0 +1,160 @@ + + + 4.0.0 + 1.0.0 + + + taier-worker-plugin.spark + com.dtstack.taier + 1.0.0 + ../pom.xml + + + taier-worker-plugin.spark.yarn2-hdfs2-spark320 + taier-worker-plugin.spark.yarn2-hdfs2-spark210 + jar + + + spark-yarn-client + yarn2-hdfs2-spark320 + UTF-8 + 3.2.2 + 2.12.8 + 3.2.2 + + + + + com.dtstack.taier + taier-worker-plugin.spark.yarn-hdfs-spark320-core + 1.0.0 + + + + + org.apache.spark + spark-hive_2.12 + ${spark.version} + + + log4j + apache-log4j-extras + + + + + + org.apache.spark + spark-core_2.12 + ${spark.version} + + + org.apache.hadoop + hadoop-confHdfsPath + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-client + + + + + + org.apache.spark + spark-yarn_2.12 + ${spark.version} + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hadoop + hadoop-yarn-api + + + + hadooop-yarn-server-web-proxy + org.apache.hadoop + + + + + + + org.apache.hadoop + hadoop-hdfs + ${hadoop2.version} + + + xerces + xercesImpl + + + xml-apis + xml-apis + + + + + + org.apache.hadoop + hadoop-common + ${hadoop2.version} + + + + + org.apache.hadoop + hadoop-client + ${hadoop2.version} + + + + org.apache.hadoop + hadoop-yarn-common + ${hadoop2.version} + + + + org.apache.hadoop + hadoop-yarn-api + ${hadoop2.version} + + + + org.apache.hadoop + hadoop-yarn-client + ${hadoop2.version} + + + + + + + src/main/java + + + src/main/resources + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + + org.apache.maven.plugins + maven-antrun-plugin + + + + diff --git a/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/spark-yarn-client/src/main/resources/META-INF/services/com.dtstack.taier.pluginapi.client.IClient b/taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark320/src/main/resources/META-INF/services/com.dtstack.taier.pluginapi.client.IClient similarity index 100% rename from taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/spark-yarn-client/src/main/resources/META-INF/services/com.dtstack.taier.pluginapi.client.IClient rename to taier-worker/taier-worker-plugin/spark/yarn2-hdfs2-spark320/src/main/resources/META-INF/services/com.dtstack.taier.pluginapi.client.IClient diff --git a/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/pom.xml b/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/pom.xml index 54e980ed47..496fd14e28 100644 --- a/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/pom.xml +++ b/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/pom.xml @@ -13,19 +13,166 @@ taier-worker-plugin.spark.yarn3-hdfs3-spark210 taier-worker-plugin.spark.yarn3-hdfs3-spark210 - pom + jar + spark-yarn-client + yarn3-hdfs3-spark210 UTF-8 2.1.3 2.11.7 - - spark-yarn-client - spark-sql-proxy - + + + com.dtstack.taier + taier-worker-plugin.spark.yarn-hdfs-spark210-core + 1.0.0 + + + + org.apache.spark + spark-hive_2.11 + ${spark.version} + + + log4j + apache-log4j-extras + + + + + org.apache.spark + spark-core_2.11 + ${spark.version} + + + org.apache.hadoop + hadoop-confHdfsPath + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-client + + + - \ No newline at end of file + + org.apache.spark + spark-yarn_2.11 + ${spark.version} + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hadoop + hadoop-yarn-api + + + + hadooop-yarn-server-web-proxy + org.apache.hadoop + + + + + + + org.apache.hadoop + hadoop-hdfs + ${hadoop3.version} + + + xml-apis + xml-apis + + + + + + org.apache.hadoop + hadoop-common + ${hadoop3.version} + + + + org.apache.hadoop + hadoop-client + ${hadoop3.version} + + + + org.apache.hadoop + hadoop-yarn-common + ${hadoop3.version} + + + + org.apache.hadoop + hadoop-yarn-api + ${hadoop3.version} + + + + org.apache.hadoop + hadoop-yarn-client + ${hadoop3.version} + + + + xerces + xercesImpl + 2.9.1 + + + xml-apis + xml-apis + + + + + + + + + + src/main/java + + + src/main/resources + + + + + + net.alchim31.maven + scala-maven-plugin + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + org.apache.maven.plugins + maven-shade-plugin + + + + org.apache.maven.plugins + maven-antrun-plugin + + + + + + diff --git a/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/spark-sql-proxy/pom.xml b/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/spark-sql-proxy/pom.xml deleted file mode 100644 index 1a866b4156..0000000000 --- a/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/spark-sql-proxy/pom.xml +++ /dev/null @@ -1,130 +0,0 @@ - - - - taier-worker-plugin.spark.yarn3-hdfs3-spark210 - com.dtstack.taier - 1.0.0 - ../pom.xml - - 4.0.0 - - taier-worker-plugin.spark.yarn3-hdfs3-spark210.spark-sql-proxy - taier-worker-plugin.spark.yarn3-hdfs3-spark210.spark-sql-proxy - - - spark-sql-proxy - yarn3-hdfs3-spark210 - 1.5.0-SNAPSHOT - - - jar - - - - com.dtstack.taier - taier-worker-plugin.spark.yarn2-hdfs2-spark210-core.spark-sql-proxy-core - 1.0.0 - - - commons-lang3 - org.apache.commons - - - - - - org.apache.spark - spark-sql_2.11 - ${spark.version} - provided - - - - org.apache.spark - spark-hive-thriftserver_2.11 - ${spark.version} - provided - - - - org.apache.spark - spark-hive_2.11 - ${spark.version} - provided - - - org.apache.hive - hive-exec - - - - - - - - - - org.apache.maven.plugins - maven-shade-plugin - 3.0.0 - - - package - - shade - - - false - - - org.slf4j - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - - - maven-antrun-plugin - 1.2 - - - copy-resources - - package - - run - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/spark-yarn-client/pom.xml b/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/spark-yarn-client/pom.xml deleted file mode 100644 index 19f5635b40..0000000000 --- a/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/spark-yarn-client/pom.xml +++ /dev/null @@ -1,215 +0,0 @@ - - - - taier-worker-plugin.spark.yarn3-hdfs3-spark210 - com.dtstack.taier - 1.0.0 - ../pom.xml - - 4.0.0 - - taier-worker-plugin.spark.yarn3-hdfs3-spark210.spark-yarn-client - taier-worker-plugin.spark.yarn3-hdfs3-spark210.spark-yarn-client - - - spark-yarn-client - yarn3-hdfs3-spark210 - - - jar - - - - com.dtstack.taier - taier-worker-plugin.spark.yarn2-hdfs2-spark210-core.spark-yarn-client-core - 1.0.0 - - - - - org.apache.spark - spark-hive_2.11 - ${spark.version} - - - - org.apache.spark - spark-core_2.11 - ${spark.version} - - - hadoop-confHdfsPath - org.apache.hadoop - - - hadoop-common - org.apache.hadoop - - - hadoop-client - org.apache.hadoop - - - - - - org.apache.spark - spark-yarn_2.11 - ${spark.version} - - - hadoop-yarn-common - org.apache.hadoop - - - hadoop-yarn-api - org.apache.hadoop - - - - org.apache.hadoop - hadooop-yarn-server-web-proxy - - - - - - - - org.apache.hadoop - hadoop-hdfs - ${hadoop3.version} - - - - org.apache.hadoop - hadoop-common - ${hadoop3.version} - - - - org.apache.hadoop - hadoop-client - ${hadoop3.version} - - - - org.apache.hadoop - hadoop-yarn-common - ${hadoop3.version} - - - - org.apache.hadoop - hadoop-yarn-api - ${hadoop3.version} - - - - org.apache.hadoop - hadoop-yarn-client - ${hadoop3.version} - - - - - - - - src/main/java - - - src/main/resources - - - - - - org.apache.maven.plugins - maven-shade-plugin - 3.0.0 - - - package - - shade - - - - false - true - ${project.basedir}/target/dependency-reduced-pom.xml - - true - - - - - - - - META-INF/MANIFEST.MF - MANIFEST.MF - - - - - - org.slf4j:slf4j-log4j12 - log4j:log4j - org.slf4j:slf4j-api - netty-all:io.netty - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - - - maven-antrun-plugin - 1.2 - - - copy-resources - - package - - run - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/src/main/resources/META-INF/services/com.dtstack.taier.pluginapi.client.IClient b/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/src/main/resources/META-INF/services/com.dtstack.taier.pluginapi.client.IClient new file mode 100644 index 0000000000..fb1cae8c55 --- /dev/null +++ b/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark210/src/main/resources/META-INF/services/com.dtstack.taier.pluginapi.client.IClient @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +com.dtstack.taier.sparkyarn.sparkyarn.SparkYarnClient diff --git a/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark320/pom.xml b/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark320/pom.xml new file mode 100644 index 0000000000..365f06b45b --- /dev/null +++ b/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark320/pom.xml @@ -0,0 +1,161 @@ + + + 4.0.0 + 1.0.0 + + + taier-worker-plugin.spark + com.dtstack.taier + 1.0.0 + ../pom.xml + + + taier-worker-plugin.spark.yarn3-hdfs3-spark320 + jar + + + spark-yarn-client + yarn3-hdfs3-spark320 + UTF-8 + 3.2.2 + 2.12.8 + 3.2.2 + + + + + com.dtstack.taier + taier-worker-plugin.spark.yarn-hdfs-spark320-core + 1.0.0 + + + + + + org.apache.spark + spark-hive_2.12 + ${spark.version} + + + log4j + apache-log4j-extras + + + + + + org.apache.spark + spark-core_2.12 + ${spark.version} + + + org.apache.hadoop + hadoop-confHdfsPath + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-client + + + + + + org.apache.spark + spark-yarn_2.12 + ${spark.version} + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hadoop + hadoop-yarn-api + + + + hadooop-yarn-server-web-proxy + org.apache.hadoop + + + + + + + org.apache.hadoop + hadoop-hdfs + ${hadoop3.version} + + + xerces + xercesImpl + + + xml-apis + xml-apis + + + + + + org.apache.hadoop + hadoop-common + ${hadoop3.version} + + + + + org.apache.hadoop + hadoop-client + ${hadoop3.version} + + + + org.apache.hadoop + hadoop-yarn-common + ${hadoop3.version} + + + + org.apache.hadoop + hadoop-yarn-api + ${hadoop3.version} + + + + org.apache.hadoop + hadoop-yarn-client + ${hadoop3.version} + + + + + + + + src/main/java + + + src/main/resources + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + + org.apache.maven.plugins + maven-antrun-plugin + + + + diff --git a/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark320/src/main/resources/META-INF/services/com.dtstack.taier.pluginapi.client.IClient b/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark320/src/main/resources/META-INF/services/com.dtstack.taier.pluginapi.client.IClient new file mode 100644 index 0000000000..fb1cae8c55 --- /dev/null +++ b/taier-worker/taier-worker-plugin/spark/yarn3-hdfs3-spark320/src/main/resources/META-INF/services/com.dtstack.taier.pluginapi.client.IClient @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +com.dtstack.taier.sparkyarn.sparkyarn.SparkYarnClient diff --git a/taier-worker/taier-worker-server/pom.xml b/taier-worker/taier-worker-server/pom.xml index 3a541037a7..f66b197562 100644 --- a/taier-worker/taier-worker-server/pom.xml +++ b/taier-worker/taier-worker-server/pom.xml @@ -53,7 +53,7 @@ org.apache.maven.plugins maven-shade-plugin - 2.4.3 + 3.2.4 package diff --git a/website/docs/functions/component/spark.md b/website/docs/functions/component/spark.md index fa42bf85dd..94f5d68d62 100644 --- a/website/docs/functions/component/spark.md +++ b/website/docs/functions/component/spark.md @@ -49,7 +49,8 @@ Spark在自定义参数中添加Spark官方参数来调整任务提交参数信 :::caution **sparkSqlProxyPath**是Spark SQL任务运行的jar,需要将pluginLibs/yarn2-hdfs2-spark210/spark-sql-proxy.jar 手动上传到HDFS对应的目录 **sparkYarnArchive**是Spark SQL程序运行时加载的外部包,需要将spark目录下的jar包上传到对应HDFS目录 -> 我们选择的是[spark2.1.3b](https://archive.apache.org/dist/spark/spark-2.1.3/spark-2.1.3-bin-hadoop2.7.tgz) +> 我们选择的是[spark2.1.3](https://archive.apache.org/dist/spark/spark-2.1.3/spark-2.1.3-bin-hadoop2.7.tgz) +[spark3.2.0](https://archive.apache.org/dist/spark/spark-3.2.0/spark-3.2.0-bin-hadoop2.7.tgz) TDH、CDH等Hadoop集群 需要根据具体环境实际调整 ::: diff --git a/website/docs/functions/task/sync.md b/website/docs/functions/task/sync.md index 8ebfcc8fc8..7d0d4af160 100644 --- a/website/docs/functions/task/sync.md +++ b/website/docs/functions/task/sync.md @@ -99,7 +99,7 @@ Oracle通过用户名来标识Schema,如果需要同步其他Schema下的数 :::caution -Batcworks仅支持关系型数据库的普通数据类型,暂时不支持blob、clob、地理空间等特殊类型的数据读/写 +数据同步仅支持关系型数据库的普通数据类型,暂时不支持blob、clob、地理空间等特殊类型的数据读/写 ::: @@ -223,32 +223,32 @@ Batcworks仅支持关系型数据库的普通数据类型,暂时不支持blob | 数据库 | 源(读取) | 目标(写入) | | -------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -| MySQL | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/mysql/mysql-source.md) | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/mysql/mysql-sink.md) | +| MySQL | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/mysql/mysql-source.md) | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/mysql/mysql-sink.md) | | TiDB | 参考MySQL | 参考MySQL | -| Oracle | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/oracle/oracle-source.md) | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/oracle/oracle-sink.md) | -| SqlServer | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/sqlserver/sqlserver-source.md) | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/sqlserver/sqlserver-sink.md) | -| PostgreSQL | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/postgres/postgres-source.md) | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/postgres/postgres-sink.md) | -| DB2 | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/db2/db2-source.md) | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/db2/db2-sink.md) | -| ClickHouse | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/clickhouse/clickhouse-source.md) | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/clickhouse/clickhouse-sink.md) | -| Greenplum | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/greenplum/greenplum-source.md) | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/greenplum/greenplum-sink.md) | -| KingBase | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/kingbase/kingbase-source.md) | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/kingbase/kingbase-sink.md) | -| MongoDB | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/mongodb/mongodb-source.md) | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/mongodb/mongodb-sink.md) | -| SAP HANA | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/saphana/saphana-source.md) | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/saphana/saphana-sink.md) | -| ElasticSearch7 | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/elasticsearch7/es7-source.md) | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/elasticsearch7/es7-sink.md) | -| FTP | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/ftp/ftp-source.md) | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/ftp/ftp-sink.md) | -| HDFS | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/hdfs/hdfs-source.md) | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/hdfs/hdfs-sink.md) | -| Stream | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/stream/stream-source.md) | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/stream/stream-sink.md) | -| Redis | 不支持 | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/redis/redis-sink.md) | -| Hive | 参考HDFS | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/hive/hive-sink.md) | -| Solr | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/solr/solr-source.md) | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/solr/solr-sink.md) | -| File | [doc](https://github.com/DTStack/flinkx/blob/master/docs/connectors/file/file-source.md) | 不支持 | +| Oracle | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/oracle/oracle-source.md) | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/oracle/oracle-sink.md) | +| SqlServer | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/sqlserver/sqlserver-source.md) | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/sqlserver/sqlserver-sink.md) | +| PostgreSQL | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/postgres/postgres-source.md) | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/postgres/postgres-sink.md) | +| DB2 | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/db2/db2-source.md) | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/db2/db2-sink.md) | +| ClickHouse | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/clickhouse/clickhouse-source.md) | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/clickhouse/clickhouse-sink.md) | +| Greenplum | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/greenplum/greenplum-source.md) | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/greenplum/greenplum-sink.md) | +| KingBase | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/kingbase/kingbase-source.md) | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/kingbase/kingbase-sink.md) | +| MongoDB | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/mongodb/mongodb-source.md) | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/mongodb/mongodb-sink.md) | +| SAP HANA | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/saphana/saphana-source.md) | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/saphana/saphana-sink.md) | +| ElasticSearch7 | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/elasticsearch7/es7-source.md) | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/elasticsearch7/es7-sink.md) | +| FTP | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/ftp/ftp-source.md) | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/ftp/ftp-sink.md) | +| HDFS | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/hdfs/hdfs-source.md) | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/hdfs/hdfs-sink.md) | +| Stream | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/stream/stream-source.md) | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/stream/stream-sink.md) | +| Redis | 不支持 | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/redis/redis-sink.md) | +| Hive | 参考HDFS | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/hive/hive-sink.md) | +| Solr | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/solr/solr-source.md) | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/solr/solr-sink.md) | +| File | [doc](https://github.com/DTStack/chunjun/blob/master/docs/docs_zh/ChunJun连接器/file/file-source.md) | 不支持 | ### 同步任务参数 在同步任务的「环境参数」中,运行方式(`flinkTaskRunMode`)参数较为重要,任务运行方式有2种: - per_job:单独为任务创建flink yarn session,任务运行的资源有保障,提高任务运行稳定性 -- new:多个任务共用一个flink yarn session,默认new,适合小数据量同步,节约集群资源 +- session:多个任务共用一个flink yarn session,默认session,适合小数据量同步,节约集群资源 设置方式,在任务的「环境参数」中,修改/添加此参数 `## flinkTaskRunMode=new`,其中 ##标识为注释状态,用户需要取消注释才能生效 @@ -260,7 +260,7 @@ Batcworks仅支持关系型数据库的普通数据类型,暂时不支持blob ```properties ## 任务运行方式: ## per_job:单独为任务创建flink yarn session,适用于低频率,大数据量同步 -## session:多个任务共用一个flink yarn session,适用于高频率、小数据量同步,默认per_job +## session:多个任务共用一个flink yarn session,适用于高频率、小数据量同步,默认session ## flinkTaskRunMode=per_job ## per_job模式下jobManager配置的内存大小,默认1024(单位M) ## jobmanager.memory.mb=1024 diff --git a/website/docs/quickstart/deploy/docker.md b/website/docs/quickstart/deploy/docker.md index 3bf7891bb9..27219cc38a 100644 --- a/website/docs/quickstart/deploy/docker.md +++ b/website/docs/quickstart/deploy/docker.md @@ -61,6 +61,6 @@ $ docker-compose up -d ::: :::caution -由于docker镜像大小问题,Chunjun、Flink插件包相关文件并未打包到容器内,有使用Flink相关功能,需要下载Chunjun、Flink插件包。自行挂载相关目录,并在[Flink组件](./functions/component/flink.md) +由于docker镜像大小问题,Chunjun、Flink插件包相关文件并未打包到容器内,有使用Flink相关功能,需要下载Chunjun、Flink插件包。自行挂载相关目录,并在[Flink组件](./functions/component/flink-on-yarn.md) 上配置对应目录 ::: \ No newline at end of file diff --git a/website/docs/quickstart/rely.md b/website/docs/quickstart/rely.md index c9f6e0a073..3415206e1f 100644 --- a/website/docs/quickstart/rely.md +++ b/website/docs/quickstart/rely.md @@ -39,8 +39,11 @@ Flink组件的flinkLibDir配置依赖Flink相关的jar包,建议参考文档 - [Spark](https://spark.apache.org/) - Spark SQL 任务运行依赖 - 版本:`spark2.1.3` - - 地址: https://archive.apache.org/dist/spark/spark-2.1.3/spark-2.1.3-bin-hadoop2.7.tgz + - 地址: https://archive.apache.org/dist/spark/spark-2.1.3/spark-2.1.3-bin-hadoop2.7.tgz + - 版本:`spark3.2.0` + - 地址: https://archive.apache.org/dist/spark/spark-3.2.0/spark-3.2.0-bin-hadoop2.7.tgz :::tip -Spark组件的sparkYarnArchive配置依赖spark相关的jar包,建议参考文档目录进行下载配置 +Spark组件的sparkYarnArchive配置依赖spark相关的jar包,建议参考文档目录进行下载配置 +Hadoop集群版本为3以上时候,请下载为Hadoop3相关版本 :::