diff --git a/admin/acceptance.py b/admin/acceptance.py index 84f2d52787..0c99249334 100644 --- a/admin/acceptance.py +++ b/admin/acceptance.py @@ -48,6 +48,7 @@ ) from flocker.provision._install import ( ManagedNode, + deconfigure_kubernetes, task_pull_docker_images, uninstall_flocker, install_flocker, @@ -305,7 +306,7 @@ def __init__(self, node_addresses, package_source, distribution, self.cert_path = cert_path self.logging_config = logging_config - def _upgrade_flocker(self, reactor, nodes, package_source): + def _upgrade_flocker(self, dispatcher, nodes, package_source): """ Put the version of Flocker indicated by ``package_source`` onto all of the given nodes. @@ -324,8 +325,6 @@ def _upgrade_flocker(self, reactor, nodes, package_source): :return: A ``Deferred`` that fires when the software has been upgraded. """ - dispatcher = make_dispatcher(reactor) - uninstalling = perform(dispatcher, uninstall_flocker(nodes)) uninstalling.addErrback(write_failure, logger=None) @@ -335,6 +334,7 @@ def install(ignored): install_flocker(nodes, package_source), ) installing = uninstalling.addCallback(install) + return installing def ensure_keys(self, reactor): @@ -349,13 +349,21 @@ def start_cluster(self, reactor): Don't start any nodes. Give back the addresses of the configured, already-started nodes. """ + dispatcher = make_dispatcher(reactor) if self.package_source is not None: upgrading = self._upgrade_flocker( - reactor, self._nodes, self.package_source + dispatcher, self._nodes, self.package_source ) else: upgrading = succeed(None) + deconfiguring_kubernetes = upgrading.addCallback( + lambda _ignored: perform( + dispatcher, + deconfigure_kubernetes(self._nodes), + ) + ) + def configure(ignored): return configured_cluster_for_nodes( reactor, @@ -375,7 +383,7 @@ def configure(ignored): provider="managed", logging_config=self.logging_config, ) - configuring = upgrading.addCallback(configure) + configuring = deconfiguring_kubernetes.addCallback(configure) return configuring def stop_cluster(self, reactor): @@ -1455,6 +1463,7 @@ def main(reactor, args, base_path, top_level): reached_finally = False def cluster_cleanup(): + return if not reached_finally: print "interrupted..." print "stopping cluster" diff --git a/build.yaml b/build.yaml index 6dddd6fa55..1082d32e69 100644 --- a/build.yaml +++ b/build.yaml @@ -1091,31 +1091,6 @@ job_type: timeout: 45 directories_to_delete: *run_acceptance_directories_to_delete - run_acceptance_loopback_on_AWS_Ubuntu_Trusty_for: - on_nodes_with_labels: 'aws-ubuntu-xenial-T2Medium' - with_modules: *run_full_acceptance_modules - with_steps: - - { type: 'shell', - cli: [ *hashbang, *add_shell_functions, - *cleanup, *setup_venv, *setup_flocker_modules, - *check_version, - 'export DISTRIBUTION_NAME=ubuntu-14.04', - *build_sdist, *build_package, - *build_repo_metadata, - *setup_authentication, - 'export ACCEPTANCE_TEST_MODULE=${MODULE}', - 'export ACCEPTANCE_TEST_PROVIDER=aws', - *run_acceptance_loopback_tests, - *convert_results_to_junit, - *clean_packages, - *exit_with_return_code_from_test ] - } - clean_repo: true - archive_artifacts: *acceptance_tests_artifacts_ubuntu_special_case - publish_test_results: true - timeout: 45 - directories_to_delete: *run_acceptance_directories_to_delete - run_acceptance_loopback_on_AWS_Ubuntu_Xenial_for: on_nodes_with_labels: 'aws-ubuntu-xenial-T2Medium' with_modules: *run_full_acceptance_modules @@ -1340,36 +1315,6 @@ job_type: directories_to_delete: [] notify_slack: '#nightly-builds' - run_acceptance_on_AWS_Ubuntu_Trusty_with_EBS: - at: '0 6 * * *' - # flocker.provision is responsible for creating the test nodes on - # so we can actually run run-acceptance-tests from GCE - on_nodes_with_labels: 'gce-ubuntu16' - with_steps: - - { type: 'shell', - cli: [ *hashbang, *add_shell_functions, - *cleanup, *setup_venv, *setup_flocker_modules, - *check_version, - 'export DISTRIBUTION_NAME=ubuntu-14.04', - *build_sdist, *build_package, - *build_repo_metadata, - *setup_authentication, - 'export ACCEPTANCE_TEST_MODULE=flocker.acceptance', - 'export ACCEPTANCE_TEST_PROVIDER=aws', - *run_acceptance_tests, - *convert_results_to_junit, - *clean_packages, - *exit_with_return_code_from_test ] - } - clean_repo: true - archive_artifacts: *acceptance_tests_artifacts_ubuntu_special_case - publish_test_results: true - # Similar to the reasoning for run_acceptance_on_AWS_CentOS_7_with_EBS - # but slightly shorter since Ubuntu runs the tests faster. - timeout: 90 - directories_to_delete: [] - notify_slack: '#nightly-builds' - run_acceptance_on_AWS_Ubuntu_Xenial_with_EBS: at: '0 6 * * *' # flocker.provision is responsible for creating the test nodes on @@ -1429,35 +1374,6 @@ job_type: directories_to_delete: [] notify_slack: '#nightly-builds' - run_acceptance_on_GCE_Ubuntu_Trusty_with_GCE: - at: '0 6 * * *' - # flocker.provision is responsible for creating the test nodes on - # so we can actually run run-acceptance-tests from GCE - on_nodes_with_labels: 'gce-ubuntu16' - with_steps: - - { type: 'shell', - cli: [ *hashbang, *add_shell_functions, - *cleanup, *setup_venv, *setup_flocker_modules, - *check_version, - 'export DISTRIBUTION_NAME=ubuntu-14.04', - *build_sdist, *build_package, - *build_repo_metadata, - *setup_authentication, - 'export ACCEPTANCE_TEST_MODULE=flocker.acceptance', - 'export ACCEPTANCE_TEST_PROVIDER=gce', - *run_acceptance_tests, - *convert_results_to_junit, - *clean_packages, - *exit_with_return_code_from_test ] - } - clean_repo: true - archive_artifacts: *acceptance_tests_artifacts_ubuntu_special_case - publish_test_results: true - # Reasoning as for run_acceptance_on_AWS_Ubuntu_Trusty_with_EBS - timeout: 90 - directories_to_delete: [] - notify_slack: '#nightly-builds' - run_acceptance_on_GCE_Ubuntu_Xenial_with_GCE: at: '0 6 * * *' # flocker.provision is responsible for creating the test nodes on @@ -1516,35 +1432,6 @@ job_type: directories_to_delete: [] notify_slack: '#nightly-builds' - run_acceptance_on_Rackspace_Ubuntu_Trusty_with_Cinder: - at: '0 6 * * *' - # flocker.provision is responsible for creating the test nodes on - # so we can actually run run-acceptance-tests from GCE - on_nodes_with_labels: 'gce-ubuntu16' - with_steps: - - { type: 'shell', - cli: [ *hashbang, *add_shell_functions, - *cleanup, *setup_venv, *setup_flocker_modules, - *check_version, - 'export DISTRIBUTION_NAME=ubuntu-14.04', - *build_sdist, *build_package, - *build_repo_metadata, - *setup_authentication, - 'export ACCEPTANCE_TEST_MODULE=flocker.acceptance', - 'export ACCEPTANCE_TEST_PROVIDER=rackspace', - *run_acceptance_tests, - *convert_results_to_junit, - *clean_packages, - *exit_with_return_code_from_test ] - } - clean_repo: true - archive_artifacts: *acceptance_tests_artifacts_ubuntu_special_case - publish_test_results: true - # Reasoning as for run_acceptance_on_AWS_Ubuntu_Trusty_with_EBS - timeout: 90 - directories_to_delete: [] - notify_slack: '#nightly-builds' - run_acceptance_on_Rackspace_Ubuntu_Xenial_with_Cinder: at: '0 6 * * *' # flocker.provision is responsible for creating the test nodes on diff --git a/flocker/acceptance/endtoend/test_kubernetesplugin.py b/flocker/acceptance/endtoend/test_kubernetesplugin.py new file mode 100644 index 0000000000..5536c2b75e --- /dev/null +++ b/flocker/acceptance/endtoend/test_kubernetesplugin.py @@ -0,0 +1,518 @@ +# Copyright ClusterHQ Inc. See LICENSE file for details. + +""" +Tests for the Flocker Kubernetes plugin. +""" +import os +import json +import yaml +from pyrsistent import PClass, field +from twisted.internet import reactor +from eliot import start_action +from eliot.twisted import DeferredContext +from ...testtools import AsyncTestCase, async_runner, random_name +from ..testtools import ( + require_cluster, ACCEPTANCE_TEST_TIMEOUT, check_and_decode_json, + create_dataset, +) + +from ...ca._validation import treq_with_ca +from twisted.web.http import ( + CREATED as HTTP_CREATED, + OK as HTTP_OK +) +from twisted.python.filepath import FilePath +FLOCKER_ROOT = FilePath(__file__).parent().parent().parent().parent() + +# Cached output of: +# curl ... https://kubernetes:6443/apis/extensions/v1beta1 +KUBERNETES_API_GROUPS = json.loads(""" +{"api": { + "kind": "APIResourceList", + "groupVersion": "v1", + "resources": [ + { + "name": "bindings", + "namespaced": true, + "kind": "Binding" + }, + { + "name": "componentstatuses", + "namespaced": false, + "kind": "ComponentStatus" + }, + { + "name": "configmaps", + "namespaced": true, + "kind": "ConfigMap" + }, + { + "name": "endpoints", + "namespaced": true, + "kind": "Endpoints" + }, + { + "name": "events", + "namespaced": true, + "kind": "Event" + }, + { + "name": "limitranges", + "namespaced": true, + "kind": "LimitRange" + }, + { + "name": "namespaces", + "namespaced": false, + "kind": "Namespace" + }, + { + "name": "namespaces/finalize", + "namespaced": false, + "kind": "Namespace" + }, + { + "name": "namespaces/status", + "namespaced": false, + "kind": "Namespace" + }, + { + "name": "nodes", + "namespaced": false, + "kind": "Node" + }, + { + "name": "nodes/proxy", + "namespaced": false, + "kind": "Node" + }, + { + "name": "nodes/status", + "namespaced": false, + "kind": "Node" + }, + { + "name": "persistentvolumeclaims", + "namespaced": true, + "kind": "PersistentVolumeClaim" + }, + { + "name": "persistentvolumeclaims/status", + "namespaced": true, + "kind": "PersistentVolumeClaim" + }, + { + "name": "persistentvolumes", + "namespaced": false, + "kind": "PersistentVolume" + }, + { + "name": "persistentvolumes/status", + "namespaced": false, + "kind": "PersistentVolume" + }, + { + "name": "pods", + "namespaced": true, + "kind": "Pod" + }, + { + "name": "pods/attach", + "namespaced": true, + "kind": "Pod" + }, + { + "name": "pods/binding", + "namespaced": true, + "kind": "Binding" + }, + { + "name": "pods/eviction", + "namespaced": true, + "kind": "Eviction" + }, + { + "name": "pods/exec", + "namespaced": true, + "kind": "Pod" + }, + { + "name": "pods/log", + "namespaced": true, + "kind": "Pod" + }, + { + "name": "pods/portforward", + "namespaced": true, + "kind": "Pod" + }, + { + "name": "pods/proxy", + "namespaced": true, + "kind": "Pod" + }, + { + "name": "pods/status", + "namespaced": true, + "kind": "Pod" + }, + { + "name": "podtemplates", + "namespaced": true, + "kind": "PodTemplate" + }, + { + "name": "replicationcontrollers", + "namespaced": true, + "kind": "ReplicationController" + }, + { + "name": "replicationcontrollers/scale", + "namespaced": true, + "kind": "Scale" + }, + { + "name": "replicationcontrollers/status", + "namespaced": true, + "kind": "ReplicationController" + }, + { + "name": "resourcequotas", + "namespaced": true, + "kind": "ResourceQuota" + }, + { + "name": "resourcequotas/status", + "namespaced": true, + "kind": "ResourceQuota" + }, + { + "name": "secrets", + "namespaced": true, + "kind": "Secret" + }, + { + "name": "serviceaccounts", + "namespaced": true, + "kind": "ServiceAccount" + }, + { + "name": "services", + "namespaced": true, + "kind": "Service" + }, + { + "name": "services/proxy", + "namespaced": true, + "kind": "Service" + }, + { + "name": "services/status", + "namespaced": true, + "kind": "Service" + } + ] +}, +"apis": { + "kind": "APIResourceList", + "groupVersion": "extensions/v1beta1", + "resources": [ + { + "name": "daemonsets", + "namespaced": true, + "kind": "DaemonSet" + }, + { + "name": "daemonsets/status", + "namespaced": true, + "kind": "DaemonSet" + }, + { + "name": "deployments", + "namespaced": true, + "kind": "Deployment" + }, + { + "name": "deployments/rollback", + "namespaced": true, + "kind": "DeploymentRollback" + }, + { + "name": "deployments/scale", + "namespaced": true, + "kind": "Scale" + }, + { + "name": "deployments/status", + "namespaced": true, + "kind": "Deployment" + }, + { + "name": "horizontalpodautoscalers", + "namespaced": true, + "kind": "HorizontalPodAutoscaler" + }, + { + "name": "horizontalpodautoscalers/status", + "namespaced": true, + "kind": "HorizontalPodAutoscaler" + }, + { + "name": "ingresses", + "namespaced": true, + "kind": "Ingress" + }, + { + "name": "ingresses/status", + "namespaced": true, + "kind": "Ingress" + }, + { + "name": "jobs", + "namespaced": true, + "kind": "Job" + }, + { + "name": "jobs/status", + "namespaced": true, + "kind": "Job" + }, + { + "name": "networkpolicies", + "namespaced": true, + "kind": "NetworkPolicy" + }, + { + "name": "replicasets", + "namespaced": true, + "kind": "ReplicaSet" + }, + { + "name": "replicasets/scale", + "namespaced": true, + "kind": "Scale" + }, + { + "name": "replicasets/status", + "namespaced": true, + "kind": "ReplicaSet" + }, + { + "name": "replicationcontrollers", + "namespaced": true, + "kind": "ReplicationControllerDummy" + }, + { + "name": "replicationcontrollers/scale", + "namespaced": true, + "kind": "Scale" + }, + { + "name": "thirdpartyresources", + "namespaced": false, + "kind": "ThirdPartyResource" + } + ] +}} +""") + + +FLOCKER_POD_TEMPLATE = """\ +apiVersion: v1 +kind: Pod +metadata: + name: flocker-web +spec: + containers: + - name: web + image: nginx + ports: + - name: web + containerPort: 80 + volumeMounts: + # name must match the volume name below + - name: www-root + mountPath: "/usr/share/nginx/html" + volumes: + - name: www-root + flocker: + datasetName: {flocker_volume_name} +""" + + +class KubernetesClient(PClass): + client = field() + baseurl = field() + token = field() + + def namespace_create(self, name): + namespace = { + "apiVersion": "v1", + "kind": "Namespace", + "metadata": { + "name": name, + } + } + d = self.client.post( + self.baseurl + b"/api/v1/namespaces", + json.dumps(namespace), + headers={ + b"content-type": b"application/json", + b"Authorization": b"Bearer {}".format(self.token), + }, + ) + d.addCallback(check_and_decode_json, HTTP_CREATED) + return d + + def namespace_delete(self, name): + d = self.client.delete( + self.baseurl + b"/api/v1/namespaces/" + name, + headers={ + b"content-type": b"application/json", + b"Authorization": b"Bearer {}".format(self.token), + }, + ) + d.addCallback(check_and_decode_json, HTTP_OK) + return d + + def _endpoint_url_for_resource(self, namespace, resource): + resource_group_version = resource["apiVersion"] + resource_kind = resource["kind"] + + # Lookup resource list + for first_url_segment, group_info in KUBERNETES_API_GROUPS.items(): + if group_info["groupVersion"] == resource_group_version: + break + else: + raise Exception( + "resource_group_version not recognized", + resource_group_version + ) + # Lookup the "kind" + for resource_meta in group_info["resources"]: + if resource_meta["kind"] == resource_kind: + break + else: + raise Exception( + "resource_kind not recognized", + resource_kind + ) + + return "/".join([ + self.baseurl, + first_url_segment, + resource_group_version, + "namespaces", + namespace, + resource_meta["name"] + ]) + + def create_resource(self, namespace, resource): + url = self._endpoint_url_for_resource(namespace, resource) + action = start_action( + action_type=u"create_resource", + namespace=namespace, + resource=resource, + url=url, + ) + + with action.context(): + d = self.client.post( + url, + json.dumps(resource), + headers={ + b"content-type": b"application/json", + b"Authorization": b"Bearer {}".format(self.token), + }, + ) + d = DeferredContext(d) + d.addCallback(check_and_decode_json, HTTP_CREATED) + d.addActionFinish() + return d.result + + +def kubernetes_client(reactor, api_address, api_port, token): + return KubernetesClient( + client=treq_with_ca( + reactor, + ca_path=FLOCKER_ROOT.descendant([".kube", "config", "ca.pem"]), + expected_common_name=u"kubernetes", + ), + baseurl=b"https://%s:%s" % (api_address, api_port), + token=token, + ) + + +def kubernetes_namespace_for_test(test, client): + """ + Create a unique Kubernetes namespace in which to create Kubernetes test + resources. The namespace will be deleted when the test completes. And + Kubernetes *should* then garbage collect all the resources in that + namespace. + XXX: Although it doesn't always seem to work: + https://github.com/kubernetes/kubernetes/issues/36891 + """ + # Namespace must be a DNS label and at most 63 characters + namespace_name = random_name(test) + namespace_name = namespace_name[-63:] + namespace_name = "-".join(filter(None, namespace_name.split("_"))) + namespace_name = namespace_name.lower() + + d = client.namespace_create(name=namespace_name) + + def delete_namespace(): + return client.namespace_delete(namespace_name) + + def setup_cleanup(ignored_result): + return + test.addCleanup(delete_namespace) + + d.addCallback(setup_cleanup) + d.addCallback(lambda _: namespace_name) + return d + + +class KubernetesPluginTests(AsyncTestCase): + """ + Tests for the Kubernetes plugin. + """ + run_tests_with = async_runner(timeout=ACCEPTANCE_TEST_TIMEOUT) + + @require_cluster(1) + def test_create_pod(self, cluster): + """ + A pod with a Flocker volume can be created. + """ + flocker_volume_name = random_name(self) + client = kubernetes_client( + reactor, + api_address=cluster.control_node.public_address, + api_port=6443, + token=os.environ["FLOCKER_ACCEPTANCE_KUBERNETES_TOKEN"] + ) + + d = kubernetes_namespace_for_test(self, client) + + def create_flocker_volume(namespace): + d = create_dataset( + test_case=self, + cluster=cluster, + metadata=dict( + name=flocker_volume_name + ) + ) + d.addCallback(lambda _ignored: namespace) + return d + d.addCallback(create_flocker_volume) + + def create_deployment(namespace): + return client.create_resource( + namespace, + yaml.safe_load( + FLOCKER_POD_TEMPLATE.format( + flocker_volume_name=flocker_volume_name + ) + ) + ) + d.addCallback(create_deployment) + return d diff --git a/flocker/acceptance/testtools.py b/flocker/acceptance/testtools.py index b353ad224b..e644a79063 100644 --- a/flocker/acceptance/testtools.py +++ b/flocker/acceptance/testtools.py @@ -874,7 +874,15 @@ def cleanup_all_containers(_): # they're left over from previous test; they might e.g. # have a volume bind-mounted, preventing its destruction. for container in client.containers(): - client.remove_container(container["Id"], force=True) + # Don't attempt to remove containers related to + # orchestration frameworks + protected_container = False + label_keys = container["Labels"].keys() + for key in label_keys: + if key.startswith("io.kubernetes."): + protected_container = True + if not protected_container: + client.remove_container(container["Id"], force=True) def cleanup_flocker_containers(_): cleaning_containers = api_clean_state( diff --git a/flocker/ca/_validation.py b/flocker/ca/_validation.py index 40f322edc8..3089eea57f 100644 --- a/flocker/ca/_validation.py +++ b/flocker/ca/_validation.py @@ -138,3 +138,38 @@ def treq_with_authentication(reactor, ca_path, user_cert_path, user_key_path): policy = ControlServicePolicy( ca_certificate=ca, client_credential=user_credential.credential) return HTTPClient(Agent(reactor, contextFactory=policy)) + + +@implementer(IPolicyForHTTPS) +class KubernetesPolicy(PClass): + """ + HTTPS TLS policy for validating the Kubernetes master identity. + + :ivar FlockerCredential client_credential: Client's certificate and + private key pair. + """ + ca_certificate = field(mandatory=True) + expected_common_name = field(mandatory=True, type=unicode) + + def creatorForNetloc(self, hostname, port): + return optionsForClientTLS( + self.expected_common_name, + trustRoot=self.ca_certificate, + ) + + +def treq_with_ca(reactor, ca_path, expected_common_name): + """ + Create a ``treq``-API object that trusts a custom certificate authority. + + :param reactor: The reactor to use. + :param FilePath ca_path: Absolute path to the public cluster certificate. + + :return: ``treq`` compatible object. + """ + ca = Certificate.loadPEM(ca_path.getContent()) + policy = KubernetesPolicy( + ca_certificate=ca, + expected_common_name=expected_common_name, + ) + return HTTPClient(Agent(reactor, contextFactory=policy)) diff --git a/flocker/provision/_install.py b/flocker/provision/_install.py index 18be8bfbf9..b3f435fc0f 100644 --- a/flocker/provision/_install.py +++ b/flocker/provision/_install.py @@ -13,6 +13,7 @@ from effect.retry import retry from time import time import yaml +from hashlib import sha256 from zope.interface import implementer @@ -132,6 +133,17 @@ def is_systemd_distribution(distribution): distribution == "ubuntu-16.04" ) +_distribution_to_package_format = { + "centos-7": "rpm", + "rhel-7.2": "rpm", + "ubuntu-16.04": "deb", + "ubuntu-14.04": "deb", +} + + +def package_format_for_distribution(distribution): + return _distribution_to_package_format[distribution] + def _from_args(sudo): """ @@ -817,7 +829,7 @@ def task_install_api_certificates(api_cert, api_key): ]) -def task_enable_docker(distribution): +def task_configure_docker(distribution): """ Configure docker. @@ -860,7 +872,6 @@ def task_enable_docker(distribution): ExecStart= ExecStart=/usr/bin/dockerd {} {} """.format(unixsock_opt, docker_tls_options))), - run_from_args(["systemctl", "enable", "docker.service"]), ]) elif is_ubuntu(distribution): return sequence([ @@ -873,6 +884,38 @@ def task_enable_docker(distribution): raise DistributionNotSupported(distribution=distribution) +def task_start_docker(distribution): + """ + Enable and (re)start the Docker daemon. + """ + if is_systemd_distribution(distribution): + commands = [ + run_from_args(["systemctl", "enable", "docker.service"]), + run_from_args(['systemctl', START, 'docker']), + ] + elif is_ubuntu(distribution): + commands = [ + run_from_args(['service', 'docker', 'restart']), + ] + return sequence(commands) + + +def task_start_kubelet(distribution): + """ + Enable and (re)start the Kubernetes Kubelet. + """ + if is_systemd_distribution(distribution): + commands = [ + run_from_args(['systemctl', 'enable', 'kubelet']), + run_from_args(['systemctl', START, 'kubelet']), + ] + elif is_ubuntu(distribution): + commands = [ + run_from_args(['service', 'kubelet', 'restart']), + ] + return sequence(commands) + + def open_firewalld(service): """ Open firewalld port for a service. @@ -962,16 +1005,57 @@ def task_enable_docker_plugin(distribution): return sequence([ run_from_args(['systemctl', 'enable', 'flocker-docker-plugin']), run_from_args(['systemctl', START, 'flocker-docker-plugin']), - run_from_args(['systemctl', START, 'docker']), ]) elif is_ubuntu(distribution): return sequence([ run_from_args(['service', 'flocker-docker-plugin', 'restart']), - run_from_args(['service', 'docker', 'restart']), ]) else: raise DistributionNotSupported(distribution=distribution) +# A systemd configuration snippet that compliments the systemd service +# configuration installed by kubeadm +KUBELET_FLOCKER_PLUGIN_SYSTEMD_CONFIG = """ +[Service] +EnvironmentFile=/etc/flocker/env +""" + +# A file containing the location of Flocker control service and certificates to +# allow the Kubernetes plugin to authenticate with the Flocker REST API. +ETC_FLOCKER_ENV_TEMPLATE = """ +FLOCKER_CONTROL_SERVICE_HOST={control_service_host} +FLOCKER_CONTROL_SERVICE_PORT=4523 +FLOCKER_CONTROL_SERVICE_CA_FILE=/etc/flocker/cluster.crt +FLOCKER_CONTROL_SERVICE_CLIENT_KEY_FILE=/etc/flocker/plugin.key +FLOCKER_CONTROL_SERVICE_CLIENT_CERT_FILE=/etc/flocker/plugin.crt +""" + + +def task_enable_kubernetes_plugin(flocker_control_service_host): + """ + Enable the Flocker Kubernetes plugin. + By adding a systemd configuration snippet that makes FLOCKER configuration + environment variables available to the kubelet. + + :param bytes flocker_control_service_host: The address or hostname of the + Flocker control service. + """ + return sequence([ + put( + content=ETC_FLOCKER_ENV_TEMPLATE.format( + control_service_host=flocker_control_service_host + ), + path=b"/etc/flocker/env", + ), + put( + content=KUBELET_FLOCKER_PLUGIN_SYSTEMD_CONFIG, + path=( + b"/etc/systemd/system/kubelet.service.d/20-flocker-plugin.conf" + ) + ), + run_from_args(['systemctl', 'restart', 'kubelet']), + ]) + def task_open_control_firewall(distribution): """ @@ -1377,6 +1461,223 @@ def task_install_docker(distribution): timeout=5.0 * 60.0, ) +# Used for signing yum and apt repo metadata +# pub 2048R/A7317B0F 2015-04-03 [expires: 2018-04-02] +# uid Google Cloud Packages Automatic Signing Key +GOOGLE_CLOUD_PACKAGES_KEY_AUTOMATIC = """ +-----BEGIN PGP PUBLIC KEY BLOCK----- +Version: GnuPG v1 + +mQENBFUd6rIBCAD6mhKRHDn3UrCeLDp7U5IE7AhhrOCPpqGF7mfTemZYHf/5Jdjx +cOxoSFlK7zwmFr3lVqJ+tJ9L1wd1K6P7RrtaNwCiZyeNPf/Y86AJ5NJwBe0VD0xH +TXzPNTqRSByVYtdN94NoltXUYFAAPZYQls0x0nUD1hLMlOlC2HdTPrD1PMCnYq/N +uL/Vk8sWrcUt4DIS+0RDQ8tKKe5PSV0+PnmaJvdF5CKawhh0qGTklS2MXTyKFoqj +XgYDfY2EodI9ogT/LGr9Lm/+u4OFPvmN9VN6UG+s0DgJjWvpbmuHL/ZIRwMEn/tp +uneaLTO7h1dCrXC849PiJ8wSkGzBnuJQUbXnABEBAAG0QEdvb2dsZSBDbG91ZCBQ +YWNrYWdlcyBBdXRvbWF0aWMgU2lnbmluZyBLZXkgPGdjLXRlYW1AZ29vZ2xlLmNv +bT6JAT4EEwECACgFAlUd6rICGy8FCQWjmoAGCwkIBwMCBhUIAgkKCwQWAgMBAh4B +AheAAAoJEDdGwginMXsPcLcIAKi2yNhJMbu4zWQ2tM/rJFovazcY28MF2rDWGOnc +9giHXOH0/BoMBcd8rw0lgjmOosBdM2JT0HWZIxC/Gdt7NSRA0WOlJe04u82/o3OH +WDgTdm9MS42noSP0mvNzNALBbQnlZHU0kvt3sV1YsnrxljoIuvxKWLLwren/GVsh +FLPwONjw3f9Fan6GWxJyn/dkX3OSUGaduzcygw51vksBQiUZLCD2Tlxyr9NvkZYT +qiaWW78L6regvATsLc9L/dQUiSMQZIK6NglmHE+cuSaoK0H4ruNKeTiQUw/EGFaL +ecay6Qy/s3Hk7K0QLd+gl0hZ1w1VzIeXLo2BRlqnjOYFX4A= +=HVTm +-----END PGP PUBLIC KEY BLOCK----- +""" + +# Used for signing RPM packages. +# pub 2048R/3E1BA8D5 2015-06-24 +# uid Google Cloud Packages RPM Signing Key +GOOGLE_CLOUD_PACKAGES_KEY_RPM = """ +-----BEGIN PGP PUBLIC KEY BLOCK----- +Version: GnuPG v1 + +mQENBFWKtqgBCADmKQWYQF9YoPxLEQZ5XA6DFVg9ZHG4HIuehsSJETMPQ+W9K5c5 +Us5assCZBjG/k5i62SmWb09eHtWsbbEgexURBWJ7IxA8kM3kpTo7bx+LqySDsSC3 +/8JRkiyibVV0dDNv/EzRQsGDxmk5Xl8SbQJ/C2ECSUT2ok225f079m2VJsUGHG+5 +RpyHHgoMaRNedYP8ksYBPSD6sA3Xqpsh/0cF4sm8QtmsxkBmCCIjBa0B0LybDtdX +XIq5kPJsIrC2zvERIPm1ez/9FyGmZKEFnBGeFC45z5U//pHdB1z03dYKGrKdDpID +17kNbC5wl24k/IeYyTY9IutMXvuNbVSXaVtRABEBAAG0Okdvb2dsZSBDbG91ZCBQ +YWNrYWdlcyBSUE0gU2lnbmluZyBLZXkgPGdjLXRlYW1AZ29vZ2xlLmNvbT6JATgE +EwECACIFAlWKtqgCGy8GCwkIBwMCBhUIAgkKCwQWAgMBAh4BAheAAAoJEPCcOUw+ +G6jV+QwH/0wRH+XovIwLGfkg6kYLEvNPvOIYNQWnrT6zZ+XcV47WkJ+i5SR+QpUI +udMSWVf4nkv+XVHruxydafRIeocaXY0E8EuIHGBSB2KR3HxG6JbgUiWlCVRNt4Qd +6udC6Ep7maKEIpO40M8UHRuKrp4iLGIhPm3ELGO6uc8rks8qOBMH4ozU+3PB9a0b +GnPBEsZdOBI1phyftLyyuEvG8PeUYD+uzSx8jp9xbMg66gQRMP9XGzcCkD+b8w1o +7v3J3juKKpgvx5Lqwvwv2ywqn/Wr5d5OBCHEw8KtU/tfxycz/oo6XUIshgEbS/+P +6yKDuYhRp6qxrYXjmAszIT25cftb4d4= +=/PbX +-----END PGP PUBLIC KEY BLOCK----- +""" + +KUBERNETES_REPO_YUM = """ +[kubernetes] +name=Kubernetes +baseurl=http://yum.kubernetes.io/repos/kubernetes-el7-x86_64 +enabled=1 +gpgcheck=1 +repo_gpgcheck=1 +gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg + https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg +""" + +KUBERNETES_REPO_PATH_YUM = "/etc/yum.repos.d/kubernetes.repo" + + +def kubeadm_token_from_cluster(cluster): + """ + Generate a stable ``kubeadmin --token`` parameter for the supplied + ``cluster``. + + See: https://github.com/kubernetes/kubernetes/blob/master/cmd/kubeadm/app/util/tokens.go # noqa + + :param Cluster cluster: The cluster supplied by the acceptance test runner. + :returns: A "6.16" byte string in the format expected by + kubeadm --token. + """ + # + hash_bytes = sha256( + cluster.certificates.cluster.certificate.getContent() + ).hexdigest().encode('ascii') + token = hash_bytes[:6] + b"." + hash_bytes[-16:] + return token + + +def task_install_kubernetes_apt(): + # The de-armored key must have a .gpg file extension + key_path = b"/etc/apt/trusted.gpg.d/google_cloud_packages_automatic.gpg" + return sequence([ + # Upload the public key rather than downloading from the kubernetes + # servers every time. + put(GOOGLE_CLOUD_PACKAGES_KEY_AUTOMATIC, key_path + b".asc"), + # Install the key Kubernetes key + run( + command=b"apt-key --keyring {} add {}.asc".format( + key_path, key_path + ) + ), + # Install Kubernetes repository + run(command=b"apt-get update"), + run(command=( + b"apt-get install -y " + b"apt-transport-https software-properties-common" + )), + run(command=( + b"add-apt-repository -y " + b'"deb http://apt.kubernetes.io/ ' + b'kubernetes-$(lsb_release --codename --short) main"' + )), + run(command=b"apt-get update"), + # Install Kubernetes packages + run(command=( + b"apt-get install -y " + b"kubelet kubeadm kubectl kubernetes-cni" + )) + ]) + + +def task_install_kubernetes_yum(): + key_paths = [ + (GOOGLE_CLOUD_PACKAGES_KEY_AUTOMATIC, + b"/etc/pki/rpm-gpg/google_cloud_packages_automatic"), + (GOOGLE_CLOUD_PACKAGES_KEY_RPM, + b"/etc/pki/rpm-gpg/google_cloud_packages_rpm"), + ] + key_operations = [] + for key_content, key_path in key_paths: + key_operations += [ + put(key_content, key_path), + run(b"rpmkeys --import " + key_path) + ] + return sequence( + # Upload and import YUM and RPM signing keys + key_operations + [ + # Upload the repo file + put( + KUBERNETES_REPO_YUM, + KUBERNETES_REPO_PATH_YUM + ), + # Install Kubernetes packages + # XXX The ebtables dependency isn't declared by the kubelet + # package. See https://github.com/kubernetes/release/pull/197 + run(command=( + b"yum install -y " + b"ebtables kubelet kubeadm kubectl kubernetes-cni" + )), + ] + ) + +_task_install_kubernetes_variants = { + 'deb': task_install_kubernetes_apt, + 'rpm': task_install_kubernetes_yum, +} + + +def task_install_kubernetes(distribution): + """ + Install Kubernetes packages. + + :param unicode distribution: The name of the target OS distribution. + :returns: an ``Effect`` for installing Kubernetes packages from the + Kubernetes repository. + """ + package_format = package_format_for_distribution(distribution) + return _task_install_kubernetes_variants[package_format]() + + +# XXX Maybe copy the entire configuration here to avoid failures due to flaky +# downloads. +KUBERNETES_ADDON_WEAVE = ( + b"https://raw.githubusercontent.com" + b"/weaveworks/weave-kube/v1.7.2/weave-daemonset.yaml" +) + + +def task_configure_kubernetes_master(distribution, token): + """ + Configure a Kubernetes master and allow that node to also run pods. + + :param unicode distribution: The name of the target OS distribution. + :param bytes token: A ``kubeadm`` token. + :returns: an ``Effect`` for configuring the Kubernetes master node. + """ + return sequence([ + run( + command=b"kubeadm init --token {}".format(token) + ), + # Allow pods to be scheduled to the master node too. + run(command=b"kubectl taint nodes --all dedicated-"), + # Install a network addon. The weave daemonset will be started on the + # nodes as they join the cluster in ``task_configure_kubernetes_node``. + run( + command=b"kubectl apply --filename " + KUBERNETES_ADDON_WEAVE + ), + ]) + + +def task_configure_kubernetes_node(distribution, token, master_ip): + """ + Configure a Kubernetes worker node and join it to the master configured in + ``task_configure_kubernetes_master``. + + XXX Skip preflight checks until + https://github.com/kubernetes/kubernetes/issues/36301 is resolved. + + :param unicode distribution: The name of the target OS distribution. + :param bytes token: A ``kubeadm`` token. + :returns: an ``Effect`` for running ``kubeadm --join``. + """ + return sequence([ + run( + command=( + b"kubeadm join --skip-preflight-checks --token " + + token + b" " + + master_ip + ) + ), + ]) + def task_install_flocker( distribution=None, @@ -1488,13 +1789,15 @@ def provision(distribution, package_source, variants): if Variants.DOCKER_HEAD in variants: commands.append(task_enable_docker_head_repository(distribution)) commands.append(task_install_docker(distribution)) + commands.append(task_install_kubernetes(distribution)) commands.append( task_install_flocker( package_source=package_source, distribution=distribution)) commands.append( task_install_docker_plugin( package_source=package_source, distribution=distribution)) - commands.append(task_enable_docker(distribution)) + commands.append(task_start_docker(distribution)) + commands.append(task_start_kubelet(distribution)) return sequence(commands) @@ -1545,6 +1848,49 @@ def install_flocker(nodes, package_source): ) +def deconfigure_kubernetes(nodes): + """ + See: http://kubernetes.io/docs/getting-started-guides/kubeadm/#cleanup + + :param nodes: An iterable of ``Node`` instances on which to de-configure + Kubernetes. + + :return: An ``Effect`` which removes all Kubernetes pods / containers and + configuration. + """ + return sequence([ + _run_on_all_nodes( + nodes, + task=lambda node: sequence([ + run(command=b"systemctl stop kubelet"), + run( + command=( + b"docker ps --all --quiet | " + b"xargs --no-run-if-empty docker rm --force" + ) + ), + run( + command=( + b"find /var/lib/kubelet " + b"| xargs --no-run-if-empty " + b" --max-args 1 " + b" findmnt --noheadings --types tmpfs " + b" --output TARGET --target " + b"| uniq | xargs --no-run-if-empty umount" + ) + ), + run( + command=( + b"rm -rf " + b"/etc/kubernetes /var/lib/kubelet /var/lib/etcd" + ) + ), + run(command=b"systemctl start kubelet"), + ]), + ), + ]) + + def configure_cluster( cluster, dataset_backend_configuration, provider, logging_config=None ): @@ -1724,6 +2070,10 @@ def configure_control_node( cluster.control_node.distribution ) ), + task_configure_kubernetes_master( + distribution=cluster.control_node.distribution, + token=kubeadm_token_from_cluster(cluster), + ), ]), ) @@ -1751,36 +2101,55 @@ def configure_node( if provider == "managed": setup_action = 'restart' - return run_remotely( - username='root', - address=node.address, - commands=sequence([ - task_install_node_certificates( - cluster.certificates.cluster.certificate, - certnkey.certificate, - certnkey.key), - task_install_api_certificates( - cluster.certificates.user.certificate, - cluster.certificates.user.key), - task_enable_docker(node.distribution), - if_firewall_available( - node.distribution, - open_firewall_for_docker_api(node.distribution), - ), - task_configure_flocker_agent( - control_node=cluster.control_node.address, - dataset_backend=cluster.dataset_backend, - dataset_backend_configuration=( - dataset_backend_configuration - ), - logging_config=logging_config, + commands = [ + task_install_node_certificates( + cluster.certificates.cluster.certificate, + certnkey.certificate, + certnkey.key), + task_install_api_certificates( + cluster.certificates.user.certificate, + cluster.certificates.user.key), + task_configure_docker(node.distribution), + if_firewall_available( + node.distribution, + open_firewall_for_docker_api(node.distribution), + ), + task_configure_flocker_agent( + control_node=cluster.control_node.address, + dataset_backend=cluster.dataset_backend, + dataset_backend_configuration=( + dataset_backend_configuration ), - task_enable_docker_plugin(node.distribution), - task_enable_flocker_agent( + logging_config=logging_config, + ), + task_enable_docker_plugin(node.distribution), + task_enable_flocker_agent( + distribution=node.distribution, + action=setup_action, + ), + task_enable_kubernetes_plugin(cluster.control_node.public_address), + ] + + if node is not cluster.control_node: + commands = [ + task_configure_kubernetes_node( distribution=node.distribution, - action=setup_action, + token=kubeadm_token_from_cluster(cluster), + master_ip=cluster.control_node.address, ), - ]), + ] + + commands.extend([ + # Restart docker after pushing the Flocker certificates and the Docker + # configuration modifications which make it use Flocker certificates + # for listening on a TLS / TCP port. + task_start_docker(node.distribution) + ]) + + return run_remotely( + username='root', + address=node.address, + commands=sequence(commands), ) diff --git a/flocker/provision/_tasks.py b/flocker/provision/_tasks.py index da729324a7..a8aab35884 100644 --- a/flocker/provision/_tasks.py +++ b/flocker/provision/_tasks.py @@ -6,7 +6,8 @@ from ._install import ( task_create_flocker_pool_file, - task_enable_docker, + task_configure_docker, + task_start_docker, task_install_flocker, task_install_ssh_key, task_cli_pkg_install, @@ -22,7 +23,8 @@ __all__ = [ 'task_create_flocker_pool_file', - 'task_enable_docker', + 'task_configure_docker', + 'task_start_docker', 'task_install_flocker', 'task_install_ssh_key', 'task_cli_pkg_install',