Skip to content

Commit

Permalink
Merge pull request #6 from gfieni/master
Browse files Browse the repository at this point in the history
Add flag for resource selection with OAR properties
  • Loading branch information
rouvoy authored Oct 7, 2016
2 parents f41e627 + aabb5c7 commit f90cb4b
Show file tree
Hide file tree
Showing 3 changed files with 154 additions and 91 deletions.
56 changes: 45 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,19 @@ A Docker Machine driver for the Grid5000 testbed infrastructure. It will provisi

You need a Grid5000 account to use this driver. See [this page](https://www.grid5000.fr/mediawiki/index.php/Grid5000:Get_an_account) to create an account.

## Installation
## Installation from GitHub releases
Binary releases are available for Linux, MacOS and Windows on the [releases page](https://github.com/Spirals-Team/docker-machine-driver-g5k/releases).
You can use the usual commands to install or upgrade the driver :

```bash
# download the binary for your OS
sudo curl -L -o /usr/local/bin/docker-machine-driver-g5k "<link to release>"

# grant execution rigths to the driver for everyone :
sudo chmod +x /usr/local/bin/docker-machine-driver-g5k
```

## Installation from sources
*This procedure was tested on Ubuntu 16.04 and MacOS.*

To use the Go tools, you need to set your [GOPATH](https://golang.org/doc/code.html#GOPATH) variable environment.
Expand All @@ -35,15 +47,26 @@ Please follow the instructions on the [Grid5000 Wiki](https://www.grid5000.fr/me
### Options
The driver needs a few options to create a machine. Here is a list of options:

| Option | Description | Default value | Required |
|--------------------------|---------------------------------------|-----------------------|------------|
| `--g5k-username` | Your Grid5000 account username | | Yes |
| `--g5k-password` | Your Grid5000 account password | | Yes |
| `--g5k-site` | Site to reserve the resources on | | Yes |
| `--g5k-walltime` | Timelife of the machine | "1:00:00" | No |
| `--g5k-ssh-private-key` | Path of your ssh private key | "~/.ssh/id_rsa" | No |
| `--g5k-ssh-public-key` | Path of your ssh public key | "< private-key >.pub" | No |
| `--g5k-image` | Name of the image to deploy | "jessie-x64-min" | No |
| Option | Description | Default value | Required |
|------------------------------|---------------------------------------------------------|-----------------------|------------|
| `--g5k-username` | Your Grid5000 account username | | Yes |
| `--g5k-password` | Your Grid5000 account password | | Yes |
| `--g5k-site` | Site to reserve the resources on | | Yes |
| `--g5k-walltime` | Timelife of the machine | "1:00:00" | No |
| `--g5k-ssh-private-key` | Path of your ssh private key | "~/.ssh/id_rsa" | No |
| `--g5k-ssh-public-key` | Path of your ssh public key | "< private-key >.pub" | No |
| `--g5k-image` | Name of the image to deploy | "jessie-x64-min" | No |
| `--g5k-resource-properties` | Resource selection with OAR properties (SQL format) | | No |

#### Resource properties
You can use [OAR properties](http://oar.imag.fr/docs/2.5/user/usecases.html#using-properties) to only select a node that matches your hardware requirements.
If you give incorrect properties or no resource matches your request, you will get this error :

```bash
Error with pre-create check: "[G5K_api] request failed: 400 Bad Request."
```

More informations about usage of OAR properties are available on the [Grid5000 Wiki](https://www.grid5000.fr/mediawiki/index.php/Advanced_OAR#Other_examples_using_properties).

### Example
An example of node provisioning :
Expand All @@ -53,7 +76,18 @@ docker-machine create -d g5k \
--g5k-username user \
--g5k-password ******** \
--g5k-site lille \
--g5k-walltime 2:45:00 \
--g5k-ssh-private-key ~/.ssh/g5k-key \
test-node
```

An example with resource properties (node in cluster 'chimint' with more thant 8Gb of ram and at least 4 CPU cores)

```bash
docker-machine create -d g5k \
--g5k-username user \
--g5k-password ******** \
--g5k-site lille \
--g5k-ssh-private-key ~/.ssh/g5k-key \
--g5k-resource-properties "cluster = 'chimint' and memnode > 8192 and cpucore >= 4" \
test-node
```
163 changes: 92 additions & 71 deletions api/jobs.go
Original file line number Diff line number Diff line change
@@ -1,108 +1,129 @@
package api

import (
"encoding/json"
"fmt"
"time"
"encoding/json"
"fmt"
"time"
)

type JobRequest struct {
Resources string `json:"resources"`
Command string `json:"command"`
Properties string `json:"properties,omitempty"`
Types []string `json:"types"`
}

type Job struct {
Uid int `json:"uid"`
State string `json:"state"`
Timelife int `json:"walltime"`
Types []string `json:"types"`
StartTime int `json:"started_at"`
Links []Link `json:"links"`
Nodes []string `json:"assigned_nodes"`
Uid int `json:"uid"`
State string `json:"state"`
Timelife int `json:"walltime"`
Types []string `json:"types"`
StartTime int `json:"started_at"`
Links []Link `json:"links"`
Nodes []string `json:"assigned_nodes"`
}

// convertDuration take a string "hh:mm:ss" and convert it in seconds
func convertDuration(t string) (int, error) {
var h, m, s int
var h, m, s int

if _, err := fmt.Sscanf(t, "%d:%d:%d", &h, &m, &s); err != nil {
return 0, err
}
if _, err := fmt.Sscanf(t, "%d:%d:%d", &h, &m, &s); err != nil {
return 0, err
}

return (h * 3600) + (m * 60) + s, nil
return (h * 3600) + (m * 60) + s, nil
}

// Submit a job on G5K. Returns the job ID.
func (a *Api) SubmitJob(walltime string) (int, error) {
urlSubmit := fmt.Sprintf("%s/sites/%s/jobs", G5kApiFrontend, a.Site)
seconds, err := convertDuration(walltime)
if err != nil {
return 0, err
}
params := fmt.Sprintf(`{"resources": "nodes=1,walltime=%s", "command": "sleep %v", "types": ["deploy"]}`, walltime, seconds)
var job Job
var resp []byte

if resp, err = a.post(urlSubmit, params); err != nil {
return 0, err
} else {
err = json.Unmarshal(resp, &job)
return job.Uid, err
}
func (a *Api) SubmitJob(walltime, resourceProperties string) (int, error) {
urlSubmit := fmt.Sprintf("%s/sites/%s/jobs", G5kApiFrontend, a.Site)

seconds, err := convertDuration(walltime)
if err != nil {
return 0, err
}

// create a new Job request (1 node)
params, err := json.Marshal(JobRequest{
Resources: fmt.Sprintf("nodes=1,walltime=%s", walltime),
Command: fmt.Sprintf("sleep %v", seconds),
Properties: resourceProperties,
Types: []string{"deploy"},
})

if err != nil {
return 0, err
}

var job Job
var resp []byte

if resp, err = a.post(urlSubmit, string(params)); err != nil {
return 0, err
} else {
err = json.Unmarshal(resp, &job)
return job.Uid, err
}
}

// Refresh job's state
func (a *Api) GetJob(jobId int) (*Job, error) {
job := new(Job)
url := fmt.Sprintf("%s/sites/%s/jobs/%v", G5kApiFrontend, a.Site, jobId)

if resp, err := a.get(url); err != nil {
return &Job{}, err
} else {
err = json.Unmarshal(resp, &job)
return job, err
}
job := new(Job)
url := fmt.Sprintf("%s/sites/%s/jobs/%v", G5kApiFrontend, a.Site, jobId)

if resp, err := a.get(url); err != nil {
return &Job{}, err
} else {
err = json.Unmarshal(resp, &job)
return job, err
}
}

// Returns the job's current state
func (a *Api) GetJobState(jobId int) (string, error) {
if job, err := a.GetJob(jobId); err != nil {
return "", err
} else if a.jobIsOver(job) {
return "terminated", nil
} else {
return job.State, nil
}
if job, err := a.GetJob(jobId); err != nil {
return "", err
} else if a.jobIsOver(job) {
return "terminated", nil
} else {
return job.State, nil
}
}

// Returns true if the job expired, false otherwise
func (a *Api) jobIsOver(job *Job) bool {
currentTime := time.Now().Unix()
startTime := int64(job.StartTime)
timelife := int64(job.Timelife)
currentTime := time.Now().Unix()
startTime := int64(job.StartTime)
timelife := int64(job.Timelife)

return (currentTime - startTime) >= timelife
return (currentTime - startTime) >= timelife
}

// Free the nodes allocated to the jobs
func (a *Api) KillJob(jobId int) error {
url := fmt.Sprintf("%s/sites/%s/jobs/%v", G5kApiFrontend, a.Site, jobId)
url := fmt.Sprintf("%s/sites/%s/jobs/%v", G5kApiFrontend, a.Site, jobId)

_, err := a.del(url)
_, err := a.del(url)

return err
return err
}

func (a *Api) waitJobIsReady(job *Job) bool {
var err error
tmp_job := new(Job)

for job.State == "waiting" || job.State == "tolaunch" || job.State == "launching" {
if tmp_job, err = a.GetJob(job.Uid); err != nil {
return false
}
*job = *tmp_job
time.Sleep(3*time.Second)
}

// If the launching failed
if job.State != "running" {
return false
} else {
return true
}
var err error
tmp_job := new(Job)

for job.State == "waiting" || job.State == "tolaunch" || job.State == "launching" {
if tmp_job, err = a.GetJob(job.Uid); err != nil {
return false
}
*job = *tmp_job
time.Sleep(3 * time.Second)
}

// If the launching failed
if job.State != "running" {
return false
} else {
return true
}
}
26 changes: 17 additions & 9 deletions driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@ type Driver struct {
*drivers.BaseDriver
*api.Api

JobID int
G5kUsername string
G5kPassword string
G5kSite string
g5kWalltime string
g5kSSHPrivateKeyPath string
g5kSSHPublicKeyPath string
g5kImage string
JobID int
G5kUsername string
G5kPassword string
G5kSite string
g5kWalltime string
g5kSSHPrivateKeyPath string
g5kSSHPublicKeyPath string
g5kImage string
g5kResourceProperties string
}

// NewDriver creates and returns a new instance of the driver
Expand Down Expand Up @@ -94,6 +95,12 @@ func (d *Driver) GetCreateFlags() []mcnflag.Flag {
Usage: "Name of the image to deploy",
Value: "jessie-x64-min",
},

mcnflag.StringFlag{
Name: "g5k-resource-properties",
Usage: "Resource selection with OAR properties (SQL format)",
Value: "",
},
}
}

Expand All @@ -113,6 +120,7 @@ func (d *Driver) SetConfigFromFlags(opts drivers.DriverOptions) error {
}

d.g5kImage = opts.String("g5k-image")
d.g5kResourceProperties = opts.String("g5k-resource-properties")

// Docker Swarm
d.BaseDriver.SetSwarmConfigFromFlags(opts)
Expand Down Expand Up @@ -208,7 +216,7 @@ func (d *Driver) PreCreateCheck() (err error) {
client := d.getAPI()

log.Info("Submitting job...")
if d.JobID, err = client.SubmitJob(d.g5kWalltime); err != nil {
if d.JobID, err = client.SubmitJob(d.g5kWalltime, d.g5kResourceProperties); err != nil {
return err
}
log.Info("Nodes allocated and ready")
Expand Down

0 comments on commit f90cb4b

Please sign in to comment.