-
Notifications
You must be signed in to change notification settings - Fork 29
/
juju-unstick-upgrade
executable file
·177 lines (134 loc) · 5.12 KB
/
juju-unstick-upgrade
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#!/usr/bin/env python
#
# Copyright 2015 Menno Smits <[email protected]>
#
# juju-unstick-upgrade is free software: you can redistribute it
# and/or modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation, either version 3 of
# the License, or (at your option) any later version.
#
# juju-unstick-upgrade is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# For a full copy of the GNU General Public License, see
# <http://www.gnu.org/licenses/>.
import argparse
import os
import re
import subprocess
import sys
DEFAULT_TOOLS_DIR = '/var/lib/juju/tools'
VALID_VERSION = re.compile(r"^\d{1,9}\.\d{1,9}(\.|-(\w+))\d{1,9}(\.\d{1,9})?$")
SHORT_DESCRIPTION="Allow an upgrade to continue if the state server machine agents are not able to restart"
LONG_DESCRIPTION="""\
Allow a Juju upgrade to continue if the state server machine agents
are not able to restart. This is a workaround for problems in Juju
1.23, 1.24.0 and 1.24.1.
To use, start the upgrade as normal with "juju upgrade-juju". When the
state servers appear to be stuck run "juju unstick-upgrade", supplying
the version being upgraded to.
"""
def process_args():
args = sys.argv[1:]
if len(args) == 1 and args[0] == '--description':
print SHORT_DESCRIPTION
sys.exit(0)
parser = argparse.ArgumentParser(description=LONG_DESCRIPTION)
parser.add_argument('target_version', type=check_version,
help='the Juju version being upgraded to')
return parser.parse_args()
def check_version(version):
if not VALID_VERSION.match(version):
raise argparse.ArgumentTypeError("invalid version. valid examples: 1.23.0, 1.25-alpha1.2")
return version
def get_state_server_addrs():
output = subprocess.check_output(['juju', 'api-info', 'state-servers'])
return [line.split(':')[0] for line in output.strip().split('\n')]
def fix_local_environment(target_version):
# The process to fix a local provider environment is somewhat
# different (only one state server, no ssh)
tools_dir = os.path.join(get_juju_home(), get_env_name(), "tools")
run_fix(["sudo", "/bin/bash"], "machine-0", target_version, tools_dir)
def fix_environment(version, addrs):
seen_agents = set()
for addr in addrs:
print "\nchecking " + addr
agents = detect_agent(addr)
if not agents:
print "no machine agent found on {0}, skipping".format(addr)
continue
elif len(agents) > 1:
print "multiple machine agents found on {0}, skipping".format(addr)
continue
agent = agents[0]
print "{0} is {1}".format(addr, agent)
if agent in seen_agents:
print "{0} has already been fixed, skipping".format(agent)
continue
run_fix(make_ssh_args(addr, 'sudo /bin/bash'), agent, version, DEFAULT_TOOLS_DIR)
print "{0} fixed".format(agent)
seen_agents.add(agent)
def detect_agent(addr):
args = make_ssh_args(addr, 'ls -1d {0}/machine-*'.format(DEFAULT_TOOLS_DIR))
try:
output = subprocess.check_output(args)
except:
return []
return [line.rsplit('/', 1)[-1] for line in output.splitlines()]
def make_ssh_args(addr, cmd):
return [
'ssh',
'-o', 'ConnectTimeout 20s',
'-o', 'StrictHostKeyChecking no',
'ubuntu@'+addr,
cmd,
]
def run_fix(host_cmd, machine_tag, version, tools_dir):
script = create_fix_script(machine_tag, version, tools_dir)
p = subprocess.Popen(
host_cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
output, _ = p.communicate(input=script)
if p.returncode:
fatal("fix failed with:\n{0}".format(output))
def create_fix_script(machine_tag, version, tools_dir):
return """
function die() {{
echo $1
exit 1
}}
cd {tools_dir} || die "couldn't change to tools directory"
if [ `ls -1d {version}-*-* | wc -l` -ne 1 ]; then
die "error locating tools"
fi
ln --symbolic --force {version}-*-* {machine_tag} || die "symlink switch failed"
pkill jujud || die "could not kill machine agent"
""".format(machine_tag=machine_tag, version=version, tools_dir=tools_dir)
def get_env_name():
return subprocess.check_output(['juju', 'switch']).strip()
def is_local_provider(addrs):
return any(addr == 'localhost' for addr in addrs)
def get_juju_home():
return os.environ.get("JUJU_HOME", os.path.expanduser("~/.juju"))
def fatal(message):
sys.stderr.write(message + "\n")
sys.exit(1)
def main():
args = process_args()
addrs = get_state_server_addrs()
if not addrs:
fatal("unable to locate state server addresses")
addrs.append(addrs[0])
if is_local_provider(addrs):
print "fixing local environment upgrade"
fix_local_environment(args.target_version)
else:
fix_environment(args.target_version, addrs)
print "\nfix complete"
if __name__ == '__main__':
main()