Skip to content

Commit 8039f18

Browse files
Staggered upgrade procedure
1 parent 56a7e85 commit 8039f18

File tree

10 files changed

+441
-0
lines changed

10 files changed

+441
-0
lines changed

doc/source/operations/upgrading-openstack.rst

+65
Original file line numberDiff line numberDiff line change
@@ -1063,6 +1063,12 @@ This will block the upgrade, but may be overridden by setting
10631063
``etc/kayobe/kolla/globals.yml`` or
10641064
``etc/kayobe/environments/<env>/kolla/globals.yml``.
10651065

1066+
Depending on the networking architecture of your cloud, the steps used
1067+
to upgrade the containerised services will differ.
1068+
1069+
OVN
1070+
^^^
1071+
10661072
To upgrade the containerised control plane services:
10671073

10681074
.. code-block:: console
@@ -1076,6 +1082,65 @@ scope of the upgrade:
10761082
10771083
kayobe overcloud service upgrade --tags config --kolla-tags keystone
10781084
1085+
OVS (w/ Dedicated network nodes)
1086+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1087+
1088+
You should first stop the Octavia health manager to prevent alerts during
1089+
the service upgrade.
1090+
1091+
.. code-block:: console
1092+
1093+
kayobe overcloud host command run --command "docker stop octavia_health_manager" --limit controllers --become
1094+
1095+
Upgrade the control plane services
1096+
1097+
.. code-block:: console
1098+
1099+
kayobe overcloud serivce upgrade --kolla-limit controllers
1100+
1101+
To ensure L3 reliability during the upgrade, we will need to manually drain
1102+
the network nodes of all agents, and upgrade the nodes sequentially.
1103+
1104+
Kolla credentials will need to be activated before running the neutron-namespace-drain
1105+
role.
1106+
1107+
.. code-block:: console
1108+
1109+
source $KOLLA_CONFIG_PATH/public-openrc.sh
1110+
1111+
You should substitute <network0> with the first network node to be drained, To set
1112+
the node for maintenance and begin draining the agents:
1113+
1114+
.. code-block:: console
1115+
1116+
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/neutron-l3-drain.yml -e neutron_drain_host=<network0> -e maintenance=true -e drain_dhcp_agents=true
1117+
1118+
You can monitor the L3/DHCP agents being drained from the node by running:
1119+
1120+
.. code-block:: console
1121+
1122+
ssh -t <network0> watch ip netns ls
1123+
1124+
Once all agents have been drained, you can upgrade the containerised services
1125+
on the network node.
1126+
1127+
.. code-block:: console
1128+
1129+
kayobe overcloud service upgrade --kolla-limit <network0>
1130+
1131+
Following the service upgrade, the agents can be restored on the node by disabling maintenance:
1132+
1133+
.. code-block:: console
1134+
1135+
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/neutron-l3-drain.yml -e neutron_drain_host=<network0> -e maintenance=false -e drain_dhcp_agents=true
1136+
1137+
The above steps should be repeated for the remaining network nodes, once all network nodes have been upgraded
1138+
the remaining containerised services can be upgraded:
1139+
1140+
.. code-block:: console
1141+
1142+
kayobe overcloud service upgrade --kolla-tags common,nova,prometheus,openvswitch,neutron --skip-prechecks -kl controllers,compute --limit controllers,compute
1143+
10791144
Updating the Octavia Amphora Image
10801145
----------------------------------
10811146

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
---
2+
- name: Drain neutron of l3 agents and dhcp agents
3+
hosts: localhost
4+
gather_facts: true
5+
tags:
6+
- neutron-l3-drain
7+
vars:
8+
maintenance: false
9+
drain_ctrl1: false
10+
drain_ctrl2: false
11+
drain_ctrl3: false
12+
tasks:
13+
- import_role:
14+
name: neutron-namespace-drain
15+
tasks_from: main.yml
16+
when: drain_ctrl1 | bool or drain_ctrl2 | bool or drain_ctrl3 | bool or neutron_drain_host is defined
17+
- name: "Print Info"
18+
debug:
19+
msg:
20+
- "{{ neutron_drain_host }} is read for maintenance"
21+
- "rerun this play book with -e maintenance=false to re-add"
22+
- "routers"
23+
when: maintenance | bool
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
---
2+
neutron_drain_venv: "{{ virtualenv_path }}/openstack"
3+
neutron_drain_host: "{% if drain_ctrl1 | bool %}{{ groups['controllers'][0] }}{% elif drain_ctrl2 | bool %}{{ groups['controllers'][1] }}{% elif drain_ctrl3 | bool %}{{ groups['controllers'][2] }}{% endif %}"
4+
neutron_drain_venv_python: "{{ 'python' ~ ansible_facts.python.version.major ~ '.' ~ ansible_facts.python.version.minor }}"
5+
drain_ctrl1: false
6+
drain_ctrl2: false
7+
drain_ctrl3: false
8+
drain_dhcp_agents: false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
---
2+
- name: Query source SRC_DHCP_ID
3+
command: >
4+
{{ neutron_drain_venv }}/bin/openstack
5+
network agent list --host {{ neutron_drain_host }}
6+
--agent-type dhcp -f value -c ID
7+
register: SRC_DHCP_ID
8+
environment: "{{ openstack_auth_env }}"
9+
delegate_to: "{{ neutron_drain_host }}"
10+
vars:
11+
ansible_host: "{{ hostvars[neutron_drain_host].ansible_host }}"
12+
13+
- name: Set fact containing SRC_DHCP_ID
14+
set_fact:
15+
DHCP_SRC_ID: "{{ SRC_DHCP_ID.stdout }}"
16+
17+
- name: Enable DHCP agent
18+
command: >
19+
{{ neutron_drain_venv }}/bin/openstack
20+
network agent set "{{ DHCP_SRC_ID }}" --enable
21+
environment: "{{ openstack_auth_env }}"
22+
delegate_to: "{{ neutron_drain_host }}"
23+
vars:
24+
ansible_host: "{{ hostvars[neutron_drain_host].ansible_host }}"
25+
26+
- name: Get Network IDs
27+
command: >
28+
{{ neutron_drain_venv }}/bin/openstack
29+
network list -f value -c ID
30+
register: network_ids
31+
environment: "{{ openstack_auth_env }}"
32+
delegate_to: "{{ neutron_drain_host }}"
33+
vars:
34+
ansible_host: "{{ hostvars[neutron_drain_host].ansible_host }}"
35+
36+
- name: Set Network IDs
37+
set_fact:
38+
NETWORK_IDS: "{{ network_ids.stdout_lines }}"
39+
40+
- name: Add DHCP agent
41+
command: >
42+
{{ neutron_drain_venv }}/bin/openstack
43+
network agent add network {{ DHCP_SRC_ID }} {{ item }} --dhcp
44+
environment: "{{ openstack_auth_env }}"
45+
delegate_to: "{{ neutron_drain_host }}"
46+
vars:
47+
ansible_host: "{{ hostvars[neutron_drain_host].ansible_host }}"
48+
with_items:
49+
- "{{ NETWORK_IDS }}"
50+
loop_control:
51+
pause: 10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
---
2+
- name: Query L3_IDs
3+
command: >
4+
{{ neutron_drain_venv }}/bin/openstack
5+
network agent list --agent-type l3 -f value -c ID
6+
register: L3_ID
7+
environment: "{{ openstack_auth_env }}"
8+
delegate_to: "{{ neutron_drain_host }}"
9+
vars:
10+
ansible_host: "{{ hostvars[neutron_drain_host].ansible_host }}"
11+
12+
- name: Set fact containing SRC_L3_IDs
13+
set_fact:
14+
L3_IDS: "{{ L3_ID.stdout_lines }}"
15+
16+
- name: Get agents for each router
17+
command: >
18+
{{ neutron_drain_venv }}/bin/openstack
19+
network agent list --router {{ router_id }} --agent-type l3 -f value -c ID
20+
environment: "{{ openstack_auth_env }}"
21+
delegate_to: "{{ neutron_drain_host }}"
22+
failed_when: false
23+
register: ROUTER_L3_IDS
24+
vars:
25+
ansible_host: "{{ hostvars[neutron_drain_host].ansible_host }}"
26+
loop: "{{ ROUTER_IDS }}"
27+
loop_control:
28+
loop_var: router_id
29+
30+
- name: Add agent to router
31+
command: >
32+
{{ neutron_drain_venv }}/bin/openstack
33+
network agent add router --l3 {{ L3_ADD }} {{ item.router_id }}
34+
loop: "{{ ROUTER_L3_IDS.results }}"
35+
loop_control:
36+
label: "{{ item.router_id }}"
37+
environment: "{{ openstack_auth_env }}"
38+
delegate_to: "{{ neutron_drain_host }}"
39+
vars:
40+
ansible_host: "{{ hostvars[neutron_drain_host].ansible_host }}"
41+
L3_ADD_DIFF: "{{ L3_IDS | difference([L3_SRC_ID]) | difference(item.stdout_lines) }}"
42+
L3_ADD: "{{ L3_ADD_DIFF[:1] | first }}"
43+
when: L3_ADD_DIFF | length > 0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
---
2+
- name: Query source SRC_DHCP_ID
3+
command: >
4+
{{ neutron_drain_venv }}/bin/openstack
5+
network agent list --host {{ neutron_drain_host }}
6+
--agent-type dhcp -f value -c ID
7+
register: SRC_DHCP_ID
8+
environment: "{{ openstack_auth_env }}"
9+
delegate_to: "{{ neutron_drain_host }}"
10+
vars:
11+
ansible_host: "{{ hostvars[neutron_drain_host].ansible_host }}"
12+
13+
- name: Set fact containing SRC_DHCP_ID
14+
set_fact:
15+
DHCP_SRC_ID: "{{ SRC_DHCP_ID.stdout }}"
16+
17+
- name: Get DHCP agent network IDs
18+
command: >
19+
{{ neutron_drain_venv }}/bin/openstack
20+
network list --agent {{ DHCP_SRC_ID }} -f value -c ID
21+
register: dhcp_agent_ids
22+
environment: "{{ openstack_auth_env }}"
23+
delegate_to: "{{ neutron_drain_host }}"
24+
vars:
25+
ansible_host: "{{ hostvars[neutron_drain_host].ansible_host }}"
26+
27+
- name: Set DHCP agent network IDs
28+
set_fact:
29+
DHCP_AGENT_IDS: "{{ dhcp_agent_ids.stdout_lines }}"
30+
31+
- name: Remove DHCP agent
32+
command: >
33+
{{ neutron_drain_venv }}/bin/openstack
34+
network agent remove network {{ DHCP_SRC_ID }} {{ item }} --dhcp
35+
environment: "{{ openstack_auth_env }}"
36+
delegate_to: "{{ neutron_drain_host }}"
37+
vars:
38+
ansible_host: "{{ hostvars[neutron_drain_host].ansible_host }}"
39+
with_items:
40+
- "{{ DHCP_AGENT_IDS }}"
41+
42+
- name: Wait for no more dhcp agents to be attached to the host
43+
block:
44+
- name: Retry count
45+
set_fact:
46+
retry_count: "{{ 0 if retry_count is undefined or retry_count == 'reset' else retry_count | int + 1 }}"
47+
max_retries: 20
48+
49+
- name: Verify dhcp agents exist
50+
command: >
51+
{{ neutron_drain_venv }}/bin/openstack
52+
network list --agent {{ DHCP_SRC_ID }} -f value -c ID
53+
environment: "{{ openstack_auth_env }}"
54+
delegate_to: "{{ neutron_drain_host }}"
55+
vars:
56+
ansible_host: "{{ hostvars[neutron_drain_host].ansible_host }}"
57+
register: agent_status
58+
59+
- name: Fail if DHCP agent still attached
60+
fail:
61+
msg: a DHCP agent is still attached to "{{ L3_SRC_ID }}"
62+
when: agent_status.stdout | length > 0
63+
64+
- name: Reset retry count after success
65+
set_fact:
66+
retry_count: reset
67+
rescue:
68+
- fail:
69+
msg: |
70+
Maximum retries waiting for DHCP agents to be detached reached
71+
when: retry_count | int == max_retries
72+
73+
- name: Reset retry counter if max retries reached (exit loop)
74+
set_fact:
75+
retry_count: reset
76+
failed_when: retry_count == 'reset'
77+
when: retry_count | int >= max_retries | int
78+
79+
- name: Disable DHCP agent
80+
command: >
81+
{{ neutron_drain_venv }}/bin/openstack
82+
network agent set "{{ DHCP_SRC_ID }}" --disable
83+
environment: "{{ openstack_auth_env }}"
84+
delegate_to: "{{ neutron_drain_host }}"
85+
vars:
86+
ansible_host: "{{ hostvars[neutron_drain_host].ansible_host }}"
87+
when: agent_status.stdout | length == 0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
---
2+
- name: Query source SRC_L3_ID
3+
command: >
4+
{{ neutron_drain_venv }}/bin/openstack
5+
network agent list --host {{ neutron_drain_host }}
6+
--agent-type l3 -f value -c ID
7+
register: SRC_L3_ID
8+
environment: "{{ openstack_auth_env }}"
9+
delegate_to: "{{ neutron_drain_host }}"
10+
vars:
11+
ansible_host: "{{ hostvars[neutron_drain_host].ansible_host }}"
12+
13+
- name: Set fact containing SRC_L3_ID
14+
set_fact:
15+
L3_SRC_ID: "{{ SRC_L3_ID.stdout }}"
16+
17+
- name: Get Router IDs
18+
command: >
19+
{{ neutron_drain_venv }}/bin/openstack
20+
router list --agent {{ L3_SRC_ID }} -f value -c ID
21+
register: router_ids
22+
environment: "{{ openstack_auth_env }}"
23+
delegate_to: "{{ neutron_drain_host }}"
24+
vars:
25+
ansible_host: "{{ hostvars[neutron_drain_host].ansible_host }}"
26+
27+
- name: Set Router IDs
28+
set_fact:
29+
ROUTER_IDS: "{{ router_ids.stdout_lines }}"
30+
31+
- name: Add agents to router
32+
include_tasks: add-new-l3.yml
33+
when: ROUTER_IDS | length > 0
34+
35+
- name: Remove router
36+
command: >
37+
{{ neutron_drain_venv }}/bin/openstack
38+
network agent remove router {{ L3_SRC_ID }} {{ item }} --l3
39+
environment: "{{ openstack_auth_env }}"
40+
delegate_to: "{{ neutron_drain_host }}"
41+
vars:
42+
ansible_host: "{{ hostvars[neutron_drain_host].ansible_host }}"
43+
with_items:
44+
- "{{ ROUTER_IDS }}"
45+
46+
- name: Wait for no more routers to be attached to the host
47+
block:
48+
- name: Retry count
49+
set_fact:
50+
retry_count: "{{ 0 if retry_count is undefined or retry_count == 'reset' else retry_count | int + 1 }}"
51+
max_retries: 20
52+
53+
- name: Verify routers exist
54+
command: >
55+
{{ neutron_drain_venv }}/bin/openstack router list --agent {{ L3_SRC_ID }} -f value -c ID
56+
environment: "{{ openstack_auth_env }}"
57+
delegate_to: "{{ neutron_drain_host }}"
58+
register: agent_status
59+
vars:
60+
ansible_host: "{{ hostvars[neutron_drain_host].ansible_host }}"
61+
62+
- name: Fail if routers still attached
63+
fail:
64+
msg: a Router is still attached to agent "{{ L3_SRC_ID }}"
65+
when: agent_status.stdout | length > 0
66+
67+
- name: Reset retry count after success
68+
set_fact:
69+
retry_count: reset
70+
rescue:
71+
- fail:
72+
msg: |
73+
Maximum retries waiting for routers to be detached reached
74+
when: retry_count | int == max_retries
75+
76+
- name: Reset retry counter if max retries reached (exit loop)
77+
set_fact:
78+
retry_count: reset
79+
failed_when: retry_count == 'reset'
80+
when: retry_count | int >= max_retries | int
81+
82+
- name: Disable L3 agent
83+
command: >
84+
{{ neutron_drain_venv }}/bin/openstack
85+
network agent set "{{ L3_SRC_ID }}" --disable
86+
environment: "{{ openstack_auth_env }}"
87+
delegate_to: "{{ neutron_drain_host }}"
88+
when: agent_status.stdout | length == 0
89+
vars:
90+
ansible_host: "{{ hostvars[neutron_drain_host].ansible_host }}"

0 commit comments

Comments
 (0)