Skip to content

Latest commit

 

History

History
255 lines (223 loc) · 23.4 KB

README.md

File metadata and controls

255 lines (223 loc) · 23.4 KB

Build AMI (AWS and Azure example)

# AWS-specific parameters
export AWS_ACCESS_KEY_ID=
export AWS_SECRET_ACCESS_KEY=

# Azure-specific parameters
export CP_AZURE_AUTH_LOCATION=
export CP_AZURE_RESOURCE_GROUP=

# Docker-specific parameters
export CP_DOCKER_DIST_USER=                                         # Optional, if non-default (lifescience/cloud-pipeline) dockerhub images will be used
export CP_DOCKER_DIST_PASS=                                         # Optional, if non-default (lifescience/cloud-pipeline) dockerhub images will be used
export CP_API_DIST_URL=                                             # Specify API distribution tarball URI. If not set - latest version will be used from https://s3.amazonaws.com/cloud-pipeline-oss-builds/builds/latest/develop/cloud-pipeline.latest.tgz

bash build.sh -aws eu-central-1,us-east-1 \                         # List of regions to build VM images in AWS. -im shall be set to "rebuild" to build images from scratch
              -az westeurope,centralus \                            # Same as -aws, but Azure environment
              -im ${PATH_TO_VM_IMAGES_MANIFEST} \                   # OR a path to a prebuilt VM images manifest. If both are not set - default manifest will be used (https://s3.amazonaws.com/cloud-pipeline-oss-builds/manifests/cloud-images-manifest.txt)
              -p ../workflows/pipe-templates/__SYSTEM/data_loader \ # Path to any packages that shall be included into the pipectl distr
              -p ../e2e/prerequisites \                             # E.g.: system data transfer pipeline or a list of users to regsiter by default
              -p ../workflow/pipe-demo \                            # Path to the demo pipelines directory. If it is specifed - pipelines will be registered, as defined in the corresponding spec.json
              -t \                                                  # Whether to include test docker images
              -v 0.15                                               # Cloud Pipeline distribution version (used to tag docker images)

Run pipectl

install

~/.pipe/pipectl install \
                # Docker distribution credentials
                -env CP_DOCKER_DIST_USER= \
                -env CP_DOCKER_DIST_PASS= \
                
                # Cluster SSH and network access
                -env CP_CLUSTER_SSH_PUB= \                          # Path to the SSH public key when deploying to Azure and GCP. For AWS use CP_PREF_CLUSTER_SSH_KEY_NAME
                -env CP_CLUSTER_SSH_KEY= \                          # Path to the SSH private key - required when deploying to all Clouds
                -env CP_PREF_CLUSTER_SSH_KEY_NAME= \                # Name of the SSH public key in AWS. Used only for AWS deployment, for Azure and GCP - use CP_CLUSTER_SSH_PUB
                -env CP_PREF_CLUSTER_INSTANCE_SECURITY_GROUPS= \    # 
                -env CP_PREF_CLUSTER_INSTANCE_IMAGE \               # Which VM image to use as a default for CPU-only workloads (if a VM manifest  for a current cloud provider exists - this is optional)
                -env CP_PREF_CLUSTER_INSTANCE_IMAGE_GPU \           # Which VM image to use as a default for GPU workloads (if a VM manifest for a current cloud provider exists - this is optional)
                -env CP_PREF_CLUSTER_INSTANCE_IMAGE_WIN \           # Which VM image to use as a default for Windows workloads (if a VM manifest for a current cloud provider exists - this is optional)

                # Cloud Provider credentials
                ## Common
                -env CP_CLOUD_CREDENTIALS_FILE= \                   # Cloud credentials can be specified as a file for any cloud provider (the only available option for GCP)
                ## AWS
                -env CP_AWS_ACCESS_KEY_ID= \                        # For AWS key id can be specified via environment variables
                -env CP_AWS_SECRET_ACCESS_KEY= \                    # For AWS key secret can be specified via environment variables
                -env CP_AWS_KMS_ARN= \
                -env CP_PREF_STORAGE_TEMP_CREDENTIALS_ROLE= \
                -env CP_DOCKER_STORAGE_ROOT_DIR= \                  # Root directory within a $CP_DOCKER_STORAGE_CONTAINER, used to store images blobs. If not set - "cloud-pipeline-${CP_DEPLOYMENT_ID}" will be used
                ## Azure
                -env CP_AZURE_PROFILE_FILE= \                       # Azure profile file from cli autentication
                -env CP_AZURE_ACCESS_TOKEN_FILE= \                  # Azure access token file from cli autentication
                -env CP_AZURE_STORAGE_ACCOUNT= \                    # Default storage account name, that will be used to manage BLOB/FS storages and persist docker images (if CP_DOCKER_STORAGE_TYPE=obj)
                -env CP_AZURE_STORAGE_KEY= \                        # Key for the default storage account (CP_AZURE_STORAGE_ACCOUNT)
                -env CP_AZURE_DEFAULT_RESOURCE_GROUP= \             # Which Azure resource group will be used by default
                -env CP_AZURE_OFFER_DURABLE_ID=\                    # 
                -env CP_AZURE_SUBSCRIPTION_ID=\                     # 

                # Core API
                -env CP_API_SRV_SAML_ID_TRAIL= \                    # SAML partner ID will be constructed as {CP_API_SRV_EXTERNAL_HOST}:{CP_API_SRV_EXTERNAL_PORT} and this parameter added in the end (default: /pipeline/)
                -env CP_API_SRV_SAML_AUTO_USER_CREATE= \            # Whether to register all users that have passed SAML authentication. Such users will be granted basic "ROLE_USER" permissions. The following value are available: AUTO (creates a new user if not exists), EXPLICIT (requires users pre-registration (performed by any admin), EXPLICIT_GROUP (requires specific groups pre-registration. If user's SAML groups have no intersections with registered groups the authentication will fail)
                -env CP_API_SRV_SAML_ALLOW_ANONYMOUS_USER= \        # Allows anonymous user access. Works in conjunction with EXPLICIT_GROUP strategy set via CP_API_SRV_SAML_AUTO_USER_CREATE.
                -env CP_API_SRV_SAML_USER_ATTRIBUTES= \             # Sets a list of the attributes, that will be parsed from the IdP SAML Response object and added to the user's profile. The value shall be comma-delimited list of "User_Attribute=IdP_Attribute" pairs (default: Email=email,FirstName=firstName,LastName=lastName,Name=firstName)
                -env CP_API_SRV_IDP_CERT_PATH= \                    # Allows to set the path to the directory containing IdP's signing certificate (idp-public-cert.pem). If not set - $CP_IDP_CERT_DIR will be used. This is useful if the IdP provides different signing certificate for different services
                -env CP_API_SRV_ANONYMOUS_URLS= \                   # Sets a list of urls that can be accessed by anonymous users if anonymous user access is enabled via CP_API_SRV_SAML_ALLOW_ANONYMOUS_USER.
                -env CP_PREF_CLUSTER_CADVISOR_DISABLE_PROXY= \      # Disables the proxy settings when API communicates to the cAdvisor service within worker nodes (Default: true)

                # GitLab
                -env CP_GITLAB_SSO_TARGET_URL= \                    # Sets idp_sso_target_url value of the gitlab.rb, if not defined - it will be constructed as "https://${CP_IDP_EXTERNAL_HOST}:${CP_IDP_EXTERNAL_PORT}${CP_GITLAB_SSO_TARGET_URL_TRAIL}"
                -env CP_GITLAB_SLO_TARGET_URL= \                    # Sets idp_slo_target_url value of the gitlab.rb, if not defined - it will be constructed as "https://${CP_IDP_EXTERNAL_HOST}:${CP_IDP_EXTERNAL_PORT}${CP_GITLAB_SLO_TARGET_URL_TRAIL}"
                -env CP_GITLAB_SSO_TARGET_URL_TRAIL= \              # Allows to add a trailing part to the idp_sso_target_url (default: "/saml/sso")
                -env CP_GITLAB_SLO_TARGET_URL_TRAIL= \              # Allows to add a trailing part to the idp_slo_target_url (default: "/saml/sso")
                -env CP_GITLAB_IDP_CERT_PATH= \                     # Allows to set the path to the directory containing IdP's signing certificate (idp-public-cert.pem). If not set - $CP_IDP_CERT_DIR will be used. This is useful if the IdP provides different signing certificate for different services
                -env CP_GITLAB_EXTERNAL_URL= \                      # Allows to specify a custom value for the gitlab's "external_url". This is used as a base URL for the repositories clone URLs. This value does not affect the gitlab's listen port. It will listen on $CP_GITLAB_INTERNAL_PORT(Default: https://${CP_GITLAB_INTERNAL_HOST}:${CP_GITLAB_INTERNAL_PORT})

                # SMTP notifications parameters
                -env CP_NOTIFIER_SMTP_SERVER_HOST= \
                -env CP_NOTIFIER_SMTP_SERVER_PORT= \
                -env CP_NOTIFIER_SMTP_FROM= \
                -env CP_NOTIFIER_SMTP_USER= \
                -env CP_NOTIFIER_SMTP_PASS= \

                # Docker registry
                -env CP_DOCKER_STORAGE_TYPE= \                      # Specify "obj" to use object storage backend for the docker registry (S3/Azure storage/GCS), otherwise - local filesystem will be used
                -env CP_DOCKER_STORAGE_CONTAINER="" \               # If CP_DOCKER_STORAGE_TYPE is set to "obj" - specify name of the object storage (bucket/container name)

                # Default administrator
                -env CP_DEFAULT_ADMIN_NAME= \
                -env CP_DEFAULT_ADMIN_PASS= \
                -env CP_DEFAULT_ADMIN_EMAIL= \
                
                # VM Monitor
                -env CP_VM_MONITOR_HOUR_INTERVAL= \                 # Specify interval in hours between VM Monitor checks. Value 1 (default) means that VM Monitor will check VMs each hour       
                -env CP_VM_MONITOR_INSTANCE_TAG_NAME= \             # VM Monitor will check status only of nodes labeled by this tag and value CP_VM_MONITOR_INSTANCE_TAG_VALUE 
                -env CP_VM_MONITOR_INSTANCE_TAG_VALUE= \            # VM Monitor will check status only of nodes labeled by tag CP_VM_MONITOR_INSTANCE_TAG_NAME and this value 
                -env CP_VM_MONITOR_TO_USER= \                       # Username that shall by notified when VM Monitor detects invalid VM state
                -env CP_VM_MONITOR_CC_USERS= \                      # Usernames that shall by additionaly notified (cc) when VM Monitor detects invalid VM state

                # Share Service
                -env CP_SHARE_SRV_SAML_ID_TRAIL = \                 # SAML partner ID will for Share Service be constructed as {CP_SHARE_SRV_EXTERNAL_HOST}:{CP_SHARE_SRV_EXTERNAL_PORT} and this parameter 
                -env CP_SHARE_SRV_SAMPLE_ROLE_CLAIMS = \            # SAML claims that shall be used as ROLEs while parsing user info receinved from IDP
                -env CP_SHARE_SRV_IDP_CERT_PATH= \                    # Allows to set the path to the directory containing IdP's signing certificate (idp-public-cert.pem). If not set - $CP_IDP_CERT_DIR will be used. This is useful if the IdP provides different signing certificate for different services

                # EDGE Service
                -env CP_EDGE_WEB_CLIENT_MAX_SIZE = \                # Sets the maximum file (request) size to be uploaded via the EDGE service, to remove the limit - set it to 0 (default: 500M)
                -env CP_EDGE_MAX_SSH_CONNECTIONS = \                # Sets maximum number of the SSH connections to a single run (default: 25)

                # Storage Lifecycle Service
                -env CP_PREF_STORAGE_LIFECYCLE_SERVICE_AWS_ACCOUNT = \             # AWS account where lifecycle actions will be performed
                -env CP_PREF_STORAGE_LIFECYCLE_SERVICE_S3_ROLE_ARN = \             # AWS IAM Role that will be assigned by batch operation jobs when storage lifecycle actions are executed
                -env CP_PREF_STORAGE_LIFECYCLE_SERVICE_REPORT_BUCKET_PREFIX = \    # Prefix inside CP_PREF_STORAGE_SYSTEM_STORAGE_NAME bucket where all failed batch operations will store their results

                # Pipectl options
                -m|--install-kube-master \                          # Install kuberneters master
                -d|--docker \                                       # Limit images to be pushed during deployment
                -id|--deployment-id \                               # Specify unique ID of the deployment. It will be used to name cloud entities (e.g. path within a docker registry object container). If not set - random 10-char string will be generated
                -s|--service \                                      # Limit services to be installed (e.g. cp-idp, cp-api-srv, etc.)
                --keep-kubedm-proxies \                             # Allow (http/https/no)_proxy settings to be included in to kube-api manifest by kubeadm. If option is not set - variables will be dropped before the kubeadm init command and then restored

                # Templates customization
                -env CP_PREF_TEMPLATES_DIRECTORY_EXT \              # If defined, shall point to a directory with pipelines templates, which override the defaults (cloud-pipeline/workflows/pipe-templates)
                -env CP_PREF_TEMPLATES_FOLDER_DIRECTORY_EXT \        # If defined, shall point to a directory with folders templates (e.g. "Project" template), which override the defaults (cloud-pipeline/deploy/docker/cp-api-srv/folder-templates)
                -env CP_PREF_TEMPLATES_ERROR_PAGES_DIRECTORY_EXT \  # If defined, shall point to a directory with error pages templates, which override the defaults (cloud-pipeline/deploy/docker/cp-api-srv/error-pages)
                -env CP_ERROR_REDIRECT_URL \                        # Allows to specify a custom value for the Cloud Pipeline main page redirection url to use in the error pages placeholders (default: https://$CP_API_SRV_EXTERNAL_HOST:$CP_API_SRV_EXTERNAL_PORT/pipeline/)
                -env CP_ERROR_PLATFORM_NAME \                       # Allows to specify a custom value for the deployment name to use in the error pages placeholders (default: $CP_PREF_UI_PIPELINE_DEPLOYMENT_NAME from the install-config)
                -env CP_ERROR_SUPPORT_EMAIL \                       # Allows to specify a custom value for the admins' support email to use in the error pages placeholders (default: $CP_DEFAULT_ADMIN_EMAIL from the install-config)

                # Misc
                -env CP_PREF_STORAGE_SYSTEM_STORAGE_NAME= \         # Name of the object storage, that is used to store system-level data (e.g. issues attachments)
                -env CP_CLOUD_REGION_FILE_STORAGE_HOSTS= \
                -env CP_CUSTOM_USERS_SPEC= \                        # Specify json file with the users to be registered during installation (if pipectl was built using -p /e2e/prerequisites/users.json - test users will be created)
                -env CP_COMMON_SSL_SELF_SIGNED= \                   # Use self-signed or CA signed certificates for SSL (true or false, default: false)
                -env CP_KUBE_MASTER_DOCKER_PATH= \                  # Allows to override a location of the folder where docker stores it's data. This is useful when docker generates too much I/O to the OS Disk and shall be pointed to another device mounted to a more custom location. If not defined - docker defaults are used.
                -env CP_KUBE_MASTER_ETCD_HOST_PATH= \               # Allows to override a location of the folder where etcd stores wal/data dirs. This is useful when etcd runs into I/O latency issues and shall be pointed to another device mounted to a more custom location, which leads to the kube control plane failures. If not defined - /var/lib/etcd path will be used
                -env CP_KUBE_MIN_DNS_REPLICAS= \                    # Allows to configure a minimal number of DNS replicas for the cluster (default: 1). DNS will be autoscaled based on the size of a cluster (1 new replica for each 128 cores or 5 nodes)
                -env CP_KUBE_SERVICES_TYPE= \                       # Allows to select a preferred services mode type: "node-port" or "external-ip" (default: "node-port")

Examples

AWS - install all services

~/.pipe/pipectl   install \
            -env CP_AWS_KMS_ARN="arn:aws:kms:{region}:{account_id}:key/{key_id}" \
            -env CP_AWS_ACCESS_KEY_ID=ABCDEFGHIJKLMNOPQRST \
            -env CP_AWS_SECRET_ACCESS_KEY=abcdefghijklmnopqstuvwxyz1234567890ABCDE \
            -env CP_PREF_CLUSTER_SSH_KEY_NAME={deploykey_name} \
            -env CP_PREF_CLUSTER_INSTANCE_SECURITY_GROUPS="sg-123456789,sg-qwertyui" \
            -env CP_PREF_STORAGE_TEMP_CREDENTIALS_ROLE="arn:aws:iam::{account_id}:role/{role_name}" \
            -env CP_NOTIFIER_SMTP_SERVER_HOST="smtp.server.name" \
            -env CP_NOTIFIER_SMTP_SERVER_PORT={smpt_port} \
            -env CP_NOTIFIER_SMTP_FROM="[email protected]" \
            -env CP_NOTIFIER_SMTP_USER="[email protected]" \
            -env CP_NOTIFIER_SMTP_PASS="{smtp_password}" \
            -env CP_DEFAULT_ADMIN_EMAIL="[email protected]" \
            -env CP_CLUSTER_SSH_KEY=/path/to/deploykey_name.pem \
            -env CP_PREF_STORAGE_SYSTEM_STORAGE_NAME=pipeline-system \
            -env CP_CLOUD_REGION_FILE_STORAGE_HOSTS="fs-123456789.efs.{region}.amazonaws.com" \
            -env CP_DOCKER_STORAGE_TYPE="obj" \
            -env CP_DOCKER_STORAGE_CONTAINER="{s3_bucket_name}" \
            -env CP_DEPLOYMENT_ID="my_cloud_pipeline" \
            -m

Azure - install all services

~/.pipe/pipectl   install \
            -env CP_CLUSTER_SSH_KEY=/path/to/deploykey_name.pem \
            -env CP_CLUSTER_SSH_PUB=/path/to/deploypub_name.pem \
            -env CP_CLOUD_CREDENTIALS_FILE=/path/to/az/credentials \
            -env CP_AZURE_STORAGE_ACCOUNT={storage_account} \
            -env CP_AZURE_STORAGE_KEY=ABCDEFGHI \
            -env CP_AZURE_DEFAULT_RESOURCE_GROUP={resource_group_name} \
            -env CP_AZURE_OFFER_DURABLE_ID=MS-AAA-0000A \
            -env CP_AZURE_SUBSCRIPTION_ID=12345678-1234-1234-1234-12345678 \
            -env CP_PREF_CLUSTER_INSTANCE_SECURITY_GROUPS="sg-123456789,sg-qwertyui" \
            -env CP_NOTIFIER_SMTP_SERVER_HOST="smtp.server.name" \
            -env CP_NOTIFIER_SMTP_SERVER_PORT={smpt_port} \
            -env CP_NOTIFIER_SMTP_FROM="[email protected]" \
            -env CP_NOTIFIER_SMTP_USER="[email protected]" \
            -env CP_NOTIFIER_SMTP_PASS="{smtp_password}" \
            -env CP_DEFAULT_ADMIN_EMAIL="[email protected]" \
            -env CP_PREF_STORAGE_SYSTEM_STORAGE_NAME=pipeline-system \
            -env CP_CLOUD_REGION_FILE_STORAGE_HOSTS="{fs_name}.file.core.windows.net/{share_name}" \
            -env CP_DOCKER_STORAGE_TYPE="obj" \
            -env CP_DOCKER_STORAGE_CONTAINER="{blob_contaner_name}" \
            -env CP_DEPLOYMENT_ID="my_cloud_pipeline" \
            -env CP_KUBE_MASTER_DOCKER_PATH="/docker/drive/" \
            -env CP_KUBE_MASTER_ETCD_HOST_PATH="/etcd/drive/" \
            -m

sync

This command allows to sync configuration between 2 Cloud Pipeline deployments.

Environment variables for controlling execution flow:

export CP_SYNC_USERS_METADATA_SKIP_KEYS= # describes keys, that shall be skipped during metadata sync in users sync routine (Optional, if not specified all metadata will be transferred without any filtering)
export CP_SYNC_TOOLS_METADATA_SKIP_KEYS= # the same, as described above, but for tools sync routine
export CP_SYNC_TOOLS_TRANSFER_POOL_SIZE= # amount of threads, that is going to be used for tools transfer.  
                                         # Note, that it boosts the sync speed, but results in higher storage volume
                                         # consumption, as all versions of a tool are pulled at first to 
                                         # a local docker and pushed after to remote registry
~/.pipe/pipectl   sync \
# Pipectl sync options
                --users \                       # enables users/groups synchronization
                --tools \                       # enables tools synchronization
                --source-url {url_a} \          # URL of `source` environment's API host
                --source-token {token_a} \      # JWT token to access `source` environment
                --target-url {url_b} \          # URL of `destination` environment's API host
                --target-token {token_b} \      # JWT token to access `destination` environment
                --docker-cmd {path_to_docker}   # specifies the command, that will be used instead of simple `docker` 
                                                # during tools synchronization
            

scrap

This command allows to scrap existing deployment of the Cloud-Pipeline and store its configuration and settings to the output directory. It will scrap: Cloud-Pipeline system-properties, users, settings from kubernetes config-map, list of the docker tools, list of installed services and version of the Cloud-Pipeline.

~/.pipe/pipectl scrap \
# Pipectl scrap options
                -o   |--output-dir "<path_to_directory>" \                     # Path to the directory where configuration will be saved.
                -cpa |--cp-api-address "<https://cloud-pipeline-url.com>" \    # URL for Cloud-Pipeline REST API,
                -cpt |--cp-api-token "<token>" \                               # JWT token to use for authentication in Cloud-Pipeline.
                -nk  |--node-ssh-key "<path_to_key>" \                         # SSH key for connecting to the Cloud-Pipeline node. 
                -nu  |--node-user "<username>" \                               # (Optional) Username for SSH connections to the Cloud-Pipeline node.
                -na  |--node-address "<ip-address>"\                           # (Optional) Address (IP or DNS) for SSH to the Cloud-Pipeline node. 
                -sc  |--stored-configuration "<list_to_scrap>" \               # (Optional) Scrap only selected parts of the configuration. Possible values: system_preferences,users,tools
                -kn  |--kube-namespace "<default>" \                           # (Optional) Namespace name where configmap and services are located.
                -kc  |--kube-configmap "<configmap_name>" \                    # (Optional) Name of the kubernetes configmap to save data from.
                -f   |--force \                                                # Write to the directory even if it is not empty.
                -h   |--help                                                   # Prints help massage.

deploy point-in-time configuration -pc

This flag deploys Cloud-Pipeline with scrapped previously configuration. By default, it will scan directory for configuration files and restore and configuration that was previously scrapped.

~/.pipe/pipectl install \ 
                -pc  "<path_to_directory>"                                  # Path to the directory with point-in-time configuration.
                -pcm "<list_of_modules>"                                    # (Optional) List of the settings that will be imported to the new deployment. Possible values: services,system_preference,users,configmap,tools
                                                                            # If not set, all settings from the point-in-time configuration directory will be imported.
                -env CP_POINT_IN_TIME_CONFIGURATION_PREFERENCES_FILTEROUT   # (Optional) Defines which preference values should be filtered out from point-in-time configuration, before apply it during install
                -env CP_POINT_IN_TIME_CONFIGURATION_CONFIGMAP_FILTEROUT     # (Optional) Defines which configMap values should be filtered out from point-in-time configuration, before apply it during install