Skip to content

Commit

Permalink
Merge branch 'Azure:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
wolfgang-desalvador authored Jan 20, 2023
2 parents 9b22d8b + 9803b8d commit 2b459de
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 17 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ in CycleCloud by specifying the PBSPro OSS version.

Note: When using the cluster that is shipped with CycleCloud, the autoscaler and default queues are already installed.

First, download the installer pkg from GitHub. For example, you can download the [2.0.16 release here](https://github.com/Azure/cyclecloud-pbspro/releases/download/2.0.16/cyclecloud-pbspro-pkg-2.0.16.tar.gz)
First, download the installer pkg from GitHub. For example, you can download the [2.0.17 release here](https://github.com/Azure/cyclecloud-pbspro/releases/download/2.0.17/cyclecloud-pbspro-pkg-2.0.17.tar.gz)

```bash
# Prerequisite: python3, 3.6 or newer, must be installed and in the PATH
wget https://github.com/Azure/cyclecloud-pbspro/releases/download/2.0.16/cyclecloud-pbspro-pkg-2.0.16.tar.gz
tar xzf cyclecloud-pbspro-pkg-2.0.16.tar.gz
wget https://github.com/Azure/cyclecloud-pbspro/releases/download/2.0.17/cyclecloud-pbspro-pkg-2.0.17.tar.gz
tar xzf cyclecloud-pbspro-pkg-2.0.17.tar.gz
cd cyclecloud-pbspro
# Optional, but recommended. Adds relevant resources and enables strict placement
./initialize_pbs.sh
Expand Down
5 changes: 1 addition & 4 deletions package.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from subprocess import check_call
from typing import Dict, List, Optional

SCALELIB_VERSION = "0.2.11"
SCALELIB_VERSION = "0.2.13"
CYCLECLOUD_API_VERSION = "8.1.0"


Expand Down Expand Up @@ -140,9 +140,6 @@ def _add(name: str, path: Optional[str] = None, mode: Optional[int] = None) -> N
assert False

for fil in os.listdir(build_dir):
if fil.startswith("certifi-20"):
print("WARNING: Ignoring duplicate certifi {}".format(fil))
continue
path = os.path.join(build_dir, fil)
_add("packages/" + fil, path)

Expand Down
4 changes: 2 additions & 2 deletions pbspro/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from setuptools.command.test import Command
from setuptools.command.test import test as TestCommand # noqa: N812

__version__ = "2.0.16"
__version__ = "2.0.17"
CWD = os.path.dirname(os.path.abspath(__file__))


Expand Down Expand Up @@ -127,7 +127,7 @@ def run(self) -> None:
"../notices",
]
},
install_requires=["typing_extensions==3.7.4.3", "certifi==2020.12.5", "requests==2.24.0", "zipp==3.6"],
install_requires=["typing_extensions==3.7.4.3", "certifi==2022.12.07", "requests==2.24.0", "zipp==3.6"],
tests_require=["pytest==3.2.3"],
cmdclass={"test": PyTest, "format": Formatter, "types": TypeChecking},
url="http://www.cyclecomputing.com",
Expand Down
38 changes: 34 additions & 4 deletions pbspro/src/pbspro/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,29 @@ def ungrouped(node: Node) -> str:
return str(not bool(node.placement_group)).lower()

node_mgr.add_default_resource({}, "ungrouped", ungrouped)
pbsnodes_response = self.pbscmd.pbsnodes_parsed("-a")
by_hostname = partition(
pbsnodes_response, lambda x: x.get("name")
)

for node in node_mgr.get_nodes():
# close out any failed nodes up front
if node.state == "Failed":
node.closed = True

if not node.hostname:
continue

# assign keep_offline to these nodes and close them off from further
# assignment
pbsnodes_record = by_hostname.get(node.hostname)

if pbsnodes_record and pbsnodes_record[0].get("resources_available.ccnodeid"):
comment = pbsnodes_record[0].get("comment", "")
if comment.startswith("cyclecloud keep offline"):
node.assign("keep_offline")
node.closed = True
continue

def validate_nodes(
self, scheduler_nodes: List[SchedulerNode], cc_nodes: List[Node]
Expand Down Expand Up @@ -266,6 +289,7 @@ def add_nodes_to_cluster(self, nodes: List[Node]) -> List[Node]:
if node.delayed_node_id.node_id in ignored_node_ids:
node.metadata["pbs_state"] = "removed!"
continue

if not node.hostname:
continue

Expand All @@ -275,6 +299,10 @@ def add_nodes_to_cluster(self, nodes: List[Node]) -> List[Node]:
if node.state == "Failed":
continue

# special handling of "keep_offline" created during preprocess_node_mgr
if "keep_offline" in node.assignments:
continue

node_id = node.delayed_node_id.node_id

if not node_id:
Expand Down Expand Up @@ -316,9 +344,6 @@ def add_nodes_to_cluster(self, nodes: List[Node]) -> List[Node]:
ndicts = self.pbscmd.qmgr_parsed("list", "node", node.hostname)
if ndicts and ndicts[0].get("resources_available.ccnodeid"):
comment = ndicts[0].get("comment", "")
if comment.startswith("cyclecloud keep offline"):
node.assign("keep_offline")
continue

if "offline" in ndicts[0].get("state", "") and (
comment.startswith("cyclecloud offline")
Expand Down Expand Up @@ -458,9 +483,14 @@ def _handle_draining(
try:
self.pbscmd.pbsnodes(node.hostname)
except CalledProcessError as e:
if "Error: Unknown node" in str(e):

if "Error: Unknown node" in str(e.stderr):
ret.append(node)
continue
else:
logging.warning(
f"Unexpected failure while running 'pbsnodes {node.hostname}' - {e.stderr}"
)
try:
self.pbscmd.pbsnodes(
"-o", node.hostname, "-C", "cyclecloud offline"
Expand Down
4 changes: 2 additions & 2 deletions project.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
name = pbspro
label = OpenPBS
type = scheduler
version = 2.0.16
version = 2.0.17
autoupgrade = true

[blobs]
Files = cyclecloud-pbspro-pkg-2.0.16.tar.gz, hwloc-libs-1.11.9-3.el8.x86_64.rpm, pbspro-execution-18.1.4-0.x86_64.rpm, pbspro-server-18.1.4-0.x86_64.rpm, pbspro-client-18.1.4-0.x86_64.rpm, openpbs-client-20.0.1-0.x86_64.rpm, openpbs-server-20.0.1-0.x86_64.rpm, openpbs-execution-20.0.1-0.x86_64.rpm
Files = cyclecloud-pbspro-pkg-2.0.17.tar.gz, hwloc-libs-1.11.9-3.el8.x86_64.rpm, pbspro-execution-18.1.4-0.x86_64.rpm, pbspro-server-18.1.4-0.x86_64.rpm, pbspro-client-18.1.4-0.x86_64.rpm, openpbs-client-20.0.1-0.x86_64.rpm, openpbs-server-20.0.1-0.x86_64.rpm, openpbs-execution-20.0.1-0.x86_64.rpm

[spec server]
run_list = role[pbspro_server_role]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
#

default[:pbspro][:autoscale_version] = "2.0.16"
default[:pbspro][:autoscale_version] = "2.0.17"
default[:pbspro][:autoscale_installer] = "cyclecloud-pbspro-pkg-#{node[:pbspro][:autoscale_version]}.tar.gz"
default[:pbspro][:version] = "20.0.1-0"
default[:pbspro][:slots] = nil
Expand Down
2 changes: 1 addition & 1 deletion specs/default/chef/site-cookbooks/pbspro/metadata.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
license "MIT"
description "Installs/Configures Open PBS Pro"
long_description IO.read(File.join(File.dirname(__FILE__), 'README.md'))
version "2.0.16"
version "2.0.17"
depends "tandem"
%w{ cganglia cshared cuser cyclecloud }.each {|c| depends c}

0 comments on commit 2b459de

Please sign in to comment.