Skip to content

Commit

Permalink
fix cluster threshold logs (#337)
Browse files Browse the repository at this point in the history
* fix linkis_cg_lm_across_cluster_rule to linkis_ps_configuration_across_cluster_rule

* fix cross cluster demo

* add cluster resource threshold

* set cluster resource threshold value

* update threshold name

* add cluster threshold log
  • Loading branch information
lemonjuicelove authored Nov 8, 2023
1 parent 9659533 commit 7e831b8
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,11 @@ object AMConfiguration {

val ACROSS_CLUSTER_MEMORY_PERCENTAGE_THRESHOLD = "MemoryPercentageThreshold"

val ACROSS_CLUSTER_TOTAL_MEMORY_PERCENTAGE_THRESHOLD: Double = CommonVars("linkis.yarn.across.cluster.memory.threshold", 0.8).getValue

val ACROSS_CLUSTER_TOTAL_CPU_PERCENTAGE_THRESHOLD: Double = CommonVars("linkis.yarn.across.cluster.cpu.threshold", 0.8).getValue
val ACROSS_CLUSTER_TOTAL_MEMORY_PERCENTAGE_THRESHOLD: Double =
CommonVars("linkis.yarn.across.cluster.memory.threshold", 0.8).getValue

val ACROSS_CLUSTER_TOTAL_CPU_PERCENTAGE_THRESHOLD: Double =
CommonVars("linkis.yarn.across.cluster.cpu.threshold", 0.8).getValue

val ECM_ADMIN_OPERATIONS = CommonVars("wds.linkis.governance.admin.operations", "")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,18 @@ class DriverAndYarnReqResourceService(
val (clusterMaxCapacity, clusterUsedCapacity) =
(clusterYarnResource.getMaxResource, clusterYarnResource.getUsedResource)

val clusterCPUPercentageThreshold =
AMConfiguration.ACROSS_CLUSTER_TOTAL_CPU_PERCENTAGE_THRESHOLD
val clusterMemoryPercentageThreshold =
AMConfiguration.ACROSS_CLUSTER_TOTAL_MEMORY_PERCENTAGE_THRESHOLD

logger.info(
s"user: $user, creator: $creator task enter cross cluster resource judgment, " +
s"CPUThreshold: $CPUThreshold, MemoryThreshold: $MemoryThreshold," +
s"CPUPercentageThreshold: $CPUPercentageThreshold, MemoryPercentageThreshold: $MemoryPercentageThreshold"
s"CPUPercentageThreshold: $CPUPercentageThreshold, MemoryPercentageThreshold: $MemoryPercentageThreshold" +
s"clusterCPUPercentageThreshold: $clusterCPUPercentageThreshold, clusterMemoryPercentageThreshold: $clusterMemoryPercentageThreshold"
)

try {
AcrossClusterRulesJudgeUtils.acrossClusterRuleCheck(
queueLeftResource.asInstanceOf[YarnResource],
Expand All @@ -120,7 +127,9 @@ class DriverAndYarnReqResourceService(
CPUThreshold.toInt,
MemoryThreshold.toInt,
CPUPercentageThreshold.toDouble,
MemoryPercentageThreshold.toDouble
MemoryPercentageThreshold.toDouble,
clusterCPUPercentageThreshold,
clusterMemoryPercentageThreshold
)
} catch {
case ex: Exception =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ object AcrossClusterRulesJudgeUtils extends Logging {
leftCPUThreshold: Int,
leftMemoryThreshold: Int,
CPUPercentageThreshold: Double,
MemoryPercentageThreshold: Double
MemoryPercentageThreshold: Double,
clusterCPUPercentageThreshold: Double,
clusterMemoryPercentageThreshold: Double
): Unit = {
if (
leftResource != null && usedResource != null && maxResource != null && clusterMaxCapacity != null && clusterUsedCapacity != null
Expand All @@ -44,13 +46,9 @@ object AcrossClusterRulesJudgeUtils extends Logging {
.asInstanceOf[Double] / clusterMaxCapacity.queueCores.asInstanceOf[Double]
val clusterUsedMemoryPercentage = clusterUsedCapacity.queueMemory
.asInstanceOf[Double] / clusterMaxCapacity.queueMemory.asInstanceOf[Double]
val clusterCPUPercentageThreshold =
AMConfiguration.ACROSS_CLUSTER_TOTAL_CPU_PERCENTAGE_THRESHOLD
val clusterMemoryPercentageThreshold =
AMConfiguration.ACROSS_CLUSTER_TOTAL_MEMORY_PERCENTAGE_THRESHOLD

if (
clusterUsedCPUPercentage > clusterCPUPercentageThreshold && clusterUsedMemoryPercentage > clusterMemoryPercentageThreshold
clusterUsedCPUPercentage > clusterCPUPercentageThreshold || clusterUsedMemoryPercentage > clusterMemoryPercentageThreshold
) {
throw new RMWarnException(
RMErrorCode.ACROSS_CLUSTER_RULE_FAILED.getErrorCode,
Expand Down

0 comments on commit 7e831b8

Please sign in to comment.