1
+ # An unique identifier for the head node and workers of this cluster.
2
+ cluster_name: default
3
+
4
+ # The maximum number of workers nodes to launch in addition to the head
5
+ # node.
6
+ max_workers: 2
7
+
8
+ # The autoscaler will scale up the cluster faster with higher upscaling speed.
9
+ # E.g., if the task requires adding more nodes then autoscaler will gradually
10
+ # scale up the cluster in chunks of upscaling_speed*currently_running_nodes.
11
+ # This number should be > 0.
12
+ upscaling_speed: 1.0
13
+
14
+ # This executes all commands on all nodes in the docker container,
15
+ # and opens all the necessary ports to support the Ray cluster.
16
+ # Empty string means disabled.
17
+ docker:
18
+ image: "orfs-autotuner:latest"
19
+ container_name: "ray_container"
20
+ # If true, pulls latest version of image. Otherwise, `docker run` will only pull the image
21
+ # if no cached version is present.
22
+ pull_before_run: false
23
+
24
+ # If a node is idle for this many minutes, it will be removed.
25
+ idle_timeout_minutes: 5
26
+
27
+ # Cloud-provider specific configuration.
28
+ provider:
29
+ type: gcp
30
+ region: us-west1
31
+ availability_zone: us-west1-a
32
+ project_id: foss-fpga-tools-ext-openroad
33
+
34
+ # How Ray will authenticate with newly launched nodes.
35
+ auth:
36
+ ssh_user: ubuntu
37
+
38
+ available_node_types:
39
+ ray_head_default:
40
+ resources: {"CPU": 2}
41
+ node_config:
42
+ machineType: n1-standard-2
43
+ disks:
44
+ - boot: true
45
+ autoDelete: true
46
+ type: PERSISTENT
47
+ initializeParams:
48
+ diskSizeGb: 50
49
+ sourceImage: projects/deeplearning-platform-release/global/images/common-cpu-v20240922
50
+ ray_worker_small:
51
+ # The minimum number of worker nodes of this type to launch.
52
+ # This number should be >= 0.
53
+ min_workers: 1
54
+ # The maximum number of worker nodes of this type to launch.
55
+ # This takes precedence over min_workers.
56
+ max_workers: 2
57
+ # The resources provided by this node type.
58
+ resources: {"CPU": 2}
59
+ node_config:
60
+ machineType: n1-standard-2
61
+ disks:
62
+ - boot: true
63
+ autoDelete: true
64
+ type: PERSISTENT
65
+ initializeParams:
66
+ diskSizeGb: 50
67
+ sourceImage: projects/deeplearning-platform-release/global/images/common-cpu-v20240922
68
+ # scheduling:
69
+ # - preemptible: true
70
+ # Un-Comment this to launch workers with the Service Account of the Head Node
71
+ # serviceAccounts:
72
+ # - email: ray-autoscaler-sa-v1@<project_id>.iam.gserviceaccount.com
73
+ # scopes:
74
+ # - https://www.googleapis.com/auth/cloud-platform
75
+
76
+ # Specify the node type of the head node (as configured above).
77
+ head_node_type: ray_head_default
78
+
79
+ # Files or directories to copy to the head and worker nodes. The format is a
80
+ # dictionary from REMOTE_PATH: LOCAL_PATH, e.g.
81
+ file_mounts: {
82
+ # "/path1/on/remote/machine": "/path1/on/local/machine",
83
+ # "/path2/on/remote/machine": "/path2/on/local/machine",
84
+ }
85
+
86
+ # Files or directories to copy from the head node to the worker nodes. The format is a
87
+ # list of paths. The same path on the head node will be copied to the worker node.
88
+ # This behavior is a subset of the file_mounts behavior. In the vast majority of cases
89
+ # you should just use file_mounts. Only use this if you know what you're doing!
90
+ cluster_synced_files: []
91
+
92
+ # Whether changes to directories in file_mounts or cluster_synced_files in the head node
93
+ # should sync to the worker node continuously
94
+ file_mounts_sync_continuously: False
95
+
96
+ # Patterns for files to exclude when running rsync up or rsync down
97
+ rsync_exclude:
98
+ - "**/.git"
99
+ - "**/.git/**"
100
+
101
+ # Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for
102
+ # in the source directory and recursively through all subdirectories. For example, if .gitignore is provided
103
+ # as a value, the behavior will match git's behavior for finding and using .gitignore files.
104
+ rsync_filter:
105
+ - ".gitignore"
106
+
107
+ initialization_commands:
108
+ - curl -fsSL https://get.docker.com -o get-docker.sh
109
+ - sudo sh get-docker.sh
110
+ - sudo usermod -aG docker $USER
111
+ - sudo systemctl restart docker -f
112
+
113
+ # List of shell commands to run to set up nodes.
114
+ setup_commands: []
115
+ # Note: if you're developing Ray, you probably want to create a Docker image that
116
+ # has your Ray repo pre-cloned. Then, you can replace the pip installs
117
+ # below with a git checkout <your_sha> (and possibly a recompile).
118
+ # To run the nightly version of ray (as opposed to the latest), either use a rayproject docker image
119
+ # that has the "nightly" (e.g. "rayproject/ray-ml:nightly-gpu") or uncomment the following line:
120
+ # - pip install -U "ray[default] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp37-cp37m-manylinux2014_x86_64.whl"
121
+
122
+
123
+ # Custom commands that will be run on the head node after common setup.
124
+ head_setup_commands:
125
+ - pip install google-api-python-client==1.7.8
126
+
127
+ # Custom commands that will be run on worker nodes after common setup.
128
+ worker_setup_commands: []
129
+
130
+ # Command to start ray on the head node. You don't need to change this.
131
+ head_start_ray_commands:
132
+ - ray stop
133
+ - >-
134
+ ray start
135
+ --head
136
+ --port=6379
137
+ --object-manager-port=8076
138
+ --autoscaling-config=~/ray_bootstrap_config.yaml
139
+
140
+ # Command to start ray on worker nodes. You don't need to change this.
141
+ worker_start_ray_commands:
142
+ - ray stop
143
+ - >-
144
+ ray start
145
+ --address=$RAY_HEAD_IP:6379
146
+ --object-manager-port=8076
0 commit comments