Skip to content

Commit

Permalink
WIP: allocate_nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
tcjennings committed Jan 21, 2025
1 parent 61bb25b commit 27367cf
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 3 deletions.
4 changes: 4 additions & 0 deletions applications/cm-service/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ Campaign Management for Rubin Data Release Production
| worker.butler.dbUrl | string | `nil` | URL of a Butler Registry Database |
| worker.butler.dbUsername | string | `nil` | Name of a user to use with the Butler Registry Database |
| worker.butler.mountPath | string | `nil` | If specified, location for htcondor config file to be injected into worker containers |
| worker.htcondor.allocateNodes.contentsCondorInfo | string | `nil` | If specified, contents of the condor-info.py file used by allocate_nodes |
| worker.htcondor.allocateNodes.contentsExecConfig | string | `nil` | If specified, contents of the execConfig.py file used by allocate_nodes |
| worker.htcondor.allocateNodes.contentsSlurmConfig | string | `nil` | If specified, contents of the slurmConfig.py file used by allocate_nodes |
| worker.htcondor.allocateNodes.mountPath | string | `nil` | If specified, location for ctrl_exec config files for use with allocate_nodes |
| worker.htcondor.config.contents | string | `nil` | If specified, contents of htcondor config file to be injected into worker containers |
| worker.htcondor.config.mountPath | string | `nil` | If specified, location for htcondor config file to be injected into worker containers |
| worker.htcondor.fsRemoteDir.storage | string | `"1Gi"` | Minimum storage requested in the condor remote area PVC |
Expand Down
1 change: 1 addition & 0 deletions applications/cm-service/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ data:
HTCONDOR__condor_submit_bin: /opt/htcondor/bin/condor_submit
HTCONDOR__condor_q_bin: /opt/htcondor/bin/condor_q
HTCONDOR__alias_path: /sdf/group/rubin/{{ .Values.config.outputVolume.subPath }}
CTRL_PLATFORM_S3DF_DIR: /home/lsstsvc1/.config/ctrl_exec
8 changes: 8 additions & 0 deletions applications/cm-service/templates/worker-configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,11 @@ data:
username: {{ .dbUsername }}
password: ${env:DAF_BUTLER_PASSWORD}
{{- end }}
{{- with .Values.worker.htcondor.allocateNodes }}
contentsExecConfig: |
{{- .contentsExecConfig | nindent 4 }}
contentsSlurmConfig: |
{{- .contentsSlurmConfig | nindent 4 }}
contentsCondorInfo: |
{{- .contentsCondorInfo | nindent 4 }}
{{- end }}
19 changes: 16 additions & 3 deletions applications/cm-service/templates/worker-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,12 @@ spec:
{{- with .Values.worker.htcondor.fsRemoteDir }}
- mountPath: {{ .mountPath }}
name: "condor-remote-volume"
{{- with .subPath }}
subPath: {{ . }}
{{- end }}
subPath: {{ .subPath }}
{{- end }}
{{- with .Values.worker.htcondor.allocateNodes }}
- mountPath: {{ .mountPath }}
name: "ctrl-exec-remote-volume"
readOnly: true
{{- end }}
volumes:
- name: output-volume
Expand All @@ -94,6 +97,16 @@ spec:
items:
- key: config
path: condor-config
- name: ctrl-exec-config-volume
configMap:
name: {{ $.Chart.Name }}-config-worker
items:
- key: contentsExecConfig
path: execConfig.py
- key: contentsSlurmConfig
path: slurmConfig.py
- key: contentsCondorInfo
path: condor-info.py
- name: condor-remote-volume
persistentVolumeClaim:
claimName: {{ $.Chart.Name }}-htcondor
Expand Down
15 changes: 15 additions & 0 deletions applications/cm-service/values-usdf-cm-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,18 @@ worker:
storageClassName: "sdf-group-rubin"
subPath: "services/htcondor/shared"
mountPath: "/sdf/group/rubin/services/htcondor/shared"
allocateNodes:
mountPath: "/home/lsstsvc1/.config/ctrl_exec/etc/config"
contentsExecConfig: |
config.platform.nodeSetRequired = False
config.platform.localScratch = "$USER_SCRATCH/condor_scratch"
config.platform.fileSystemDomain = "slac.stanford.edu"
config.platform.scheduler = "slurm"
contentsSlurmConfig: |
config.platform.queue = "$QUEUE"
config.platform.scratchDirectory = "$USER_SCRATCH/condor_scratch"
config.platform.loginHostName = "sdfiana012.sdf.slac.stanford.edu"
config.platform.utilityPath = "/usr/sbin/"
contentsCondorInfo: |
config.platform["s3df"].user.name="lsstsvc1"
config.platform["s3df"].user.home="/sdf/home/l/lsstsvc1"
13 changes: 13 additions & 0 deletions applications/cm-service/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,19 @@ worker:
# -- If specified, sub-path within bound PV to be mounted as condor remote area
subPath: null

allocateNodes:
# -- If specified, location for ctrl_exec config files for use with allocate_nodes
mountPath: null

# -- If specified, contents of the execConfig.py file used by allocate_nodes
contentsExecConfig: null

# -- If specified, contents of the slurmConfig.py file used by allocate_nodes
contentsSlurmConfig: null

# -- If specified, contents of the condor-info.py file used by allocate_nodes
contentsCondorInfo: null

# The following will be set by parameters injected by Argo CD and should not
# be set in the individual environment values files.
global:
Expand Down

0 comments on commit 27367cf

Please sign in to comment.