Merge "Add general logging conf"
diff --git a/README.rst b/README.rst
index 2c12678..4cc9fc1 100644
--- a/README.rst
+++ b/README.rst
@@ -1400,6 +1400,65 @@
             level: 'DEBUG'
     ......
 
+Upgrades
+========
+
+Each openstack formula provide set of phases (logical bloks) that will help to
+build flexible upgrade orchestration logic for particular components. The list
+of phases might and theirs descriptions are listed in table below:
+
++-------------------------------+------------------------------------------------------+
+| State                         | Description                                          |
++===============================+======================================================+
+| <app>.upgrade.service_running | Ensure that all services for particular application  |
+|                               | are enabled for autostart and running                |
++-------------------------------+------------------------------------------------------+
+| <app>.upgrade.service_stopped | Ensure that all services for particular application  |
+|                               | disabled for autostart and dead                      |
++-------------------------------+------------------------------------------------------+
+| <app>.upgrade.pkg_latest      | Ensure that packages used by particular application  |
+|                               | are installed to latest available version.           |
+|                               | This will not upgrade data plane packages like qemu  |
+|                               | and openvswitch as usually minimal required version  |
+|                               | in openstack services is really old. The data plane  |
+|                               | packages should be upgraded separately by `apt-get   |
+|                               | upgrade` or `apt-get dist-upgrade`                   |
+|                               | Applying this state will not autostart service.      |
++-------------------------------+------------------------------------------------------+
+| <app>.upgrade.render_config   | Ensure configuration is rendered actual version.     +
++-------------------------------+------------------------------------------------------+
+| <app>.upgrade.pre             | We assume this state is applied on all nodes in the  |
+|                               | cloud before running upgrade.                        |
+|                               | Only non destructive actions will be applied during  |
+|                               | this phase. Perform service built in service check   |
+|                               | like (keystone-manage doctor and nova-status upgrade)|
++-------------------------------+------------------------------------------------------+
+| <app>.upgrade.upgrade.pre     | Mostly applicable for data plane nodes. During this  |
+|                               | phase resources will be gracefully removed from      |
+|                               | current node if it is allowed. Services for upgraded |
+|                               | application will be set to admin disabled state to   |
+|                               | make sure node will not participate in resources     |
+|                               | scheduling. For example on gtw nodes this will set   |
+|                               | all agents to admin disable state and will move all  |
+|                               | routers to other agents.                             |
++-------------------------------+------------------------------------------------------+
+| <app>.upgrade.upgrade         | This state will basically upgrade application on     |
+|                               | particular target. Stop services, render             |
+|                               | configuration, install new packages, run offline     |
+|                               | dbsync (for ctl), start services. Data plane should  |
+|                               | not be affected, only OpenStack python services.     |
++-------------------------------+------------------------------------------------------+
+| <app>.upgrade.upgrade.post    | Add services back to scheduling.                     |
++-------------------------------+------------------------------------------------------+
+| <app>.upgrade.post            | This phase should be launched only when upgrade of   |
+|                               | the cloud is completed.                              |
++-------------------------------+------------------------------------------------------+
+| <app>.upgrade.verify          | Here we will do basic health checks (API CRUD        |
+|                               | operations, verify do not have dead network          |
+|                               | agents/compute services)                             |
++-------------------------------+------------------------------------------------------+
+
+
 Documentation and Bugs
 ======================
 
diff --git a/_modules/neutronv2/__init__.py b/_modules/neutronv2/__init__.py
index 3626669..85db2b7 100644
--- a/_modules/neutronv2/__init__.py
+++ b/_modules/neutronv2/__init__.py
@@ -60,6 +60,7 @@
 router_interface_add = routers.router_interface_add
 router_interface_remove = routers.router_interface_remove
 
+wait_for_network_services = agents.wait_for_network_services
 
 __all__ = (
     'network_get_details', 'network_update', 'network_delete', 'network_list',
diff --git a/_modules/neutronv2/agents.py b/_modules/neutronv2/agents.py
index 15703d2..b8f35eb 100644
--- a/_modules/neutronv2/agents.py
+++ b/_modules/neutronv2/agents.py
@@ -1,7 +1,14 @@
+import logging
+import time
+from salt.exceptions import CommandExecutionError
+
 from neutronv2.common import send
 from neutronv2.arg_converter import get_by_name_or_uuid_multiple
+from neutronv2.lists import agent_list
 
 
+log = logging.getLogger(__name__)
+
 try:
     from urllib.parse import urlencode
 except ImportError:
@@ -87,3 +94,40 @@
 def dhcp_agent_by_network_list(network_id, **kwargs):
     url = '/networks/{}/dhcp-agents'.format(network_id)
     return url, {}
+
+
+def wait_for_network_services(cloud_name, host_id=None,
+                              admin_up_only=True,
+                              retries=18, timeout=10):
+    """
+    Ensure services on specified host are alive, othervise fail with exception.
+
+    :param host_id:              host name to wait or None (to check for all hosts)
+    :param cloud_name:           name of cloud from os client config
+    :param admin_up_only:        do not check for admin disabled agents
+    :param timeout:              number of seconds to wait before retries
+    :param retries:              number of retries
+    """
+
+    kwargs = {'alive': False}
+
+    if admin_up_only:
+      kwargs['admin_state_up'] = True
+
+    if host_id is not None:
+      kwargs['host'] = host_id
+
+    res = None
+    for i in range(retries):
+        try:
+          agents = agent_list(cloud_name=cloud_name, **kwargs)['agents']
+          res = len(agents)
+        except Exception as e:
+          msg = "Failed to get agent list {0}".format(e)
+          log.trace(msg)
+          raise CommandExecutionError(e)
+
+        if res == 0:
+            return "All services are up"
+        time.sleep(timeout)
+    raise CommandExecutionError("Some agents are still down {}".format(agents))
diff --git a/_states/neutronv2.py b/_states/neutronv2.py
index 0c00c22..0ab8248 100644
--- a/_states/neutronv2.py
+++ b/_states/neutronv2.py
@@ -1,4 +1,5 @@
 import logging
+import random
 
 log = logging.getLogger(__name__)
 
@@ -190,13 +191,60 @@
     return _succeeded('update', name, 'agent', changes)
 
 
+def l3_resources_moved(name, cloud_name, target=None):
+    """
+    Ensure l3 resources are moved to target/other nodes
+    Move non-HA (legacy and DVR) routers.
+
+    :param name: agent host to remove routers from
+    :param target: target host to move routers to
+    :param cloud_name: name of cloud from os client config
+    """
+
+    all_agents = _neutronv2_call(
+        'agent_list', agent_type='L3 agent', cloud_name=cloud_name)['agents']
+
+    current_agent_id = [x['id'] for x in all_agents if x['host'] == name][0]
+
+    if target is not None:
+      target_agents = [x['id'] for x in all_agents if x['host'] == target]
+    else:
+      target_agents = [x['id'] for x in all_agents
+                       if x['host'] != name and x['alive'] and x['admin_state_up']]
+
+    if len(target_agents) == 0:
+        log.error("No candidate agents to move routers.")
+        return _failed('resources_moved', name, 'L3 agent')
+
+    routers_on_agent = _neutronv2_call(
+        'l3_agent_router_list', current_agent_id, cloud_name=cloud_name)['routers']
+
+    routers_on_agent = [x for x in routers_on_agent if x['ha'] == False]
+
+    try:
+        for router in routers_on_agent:
+            _neutronv2_call(
+                'l3_agent_router_remove', router_id=router['id'],
+                agent_id=current_agent_id, cloud_name=cloud_name)
+            _neutronv2_call(
+                'l3_agent_router_schedule', router_id=router['id'],
+                agent_id=random.choice(target_agents),
+                cloud_name=cloud_name)
+    except Exception as e:
+        log.exception("Failed to move router from {0}: {1}".format(name, e))
+        return _failed('resources_moved', name, 'L3 agent')
+
+    return _succeeded('resources_moved', name, 'L3 agent')
+
+
 def _succeeded(op, name, resource, changes=None):
     msg_map = {
         'create': '{0} {1} created',
         'delete': '{0} {1} removed',
         'update': '{0} {1} updated',
         'no_changes': '{0} {1} is in desired state',
-        'absent': '{0} {1} not present'
+        'absent': '{0} {1} not present',
+        'resources_moved': '{1} resources were moved from {0}',
     }
     changes_dict = {
         'name': name,
@@ -212,7 +260,8 @@
         'create': '{0} {1} failed to create',
         'delete': '{0} {1} failed to delete',
         'update': '{0} {1} failed to update',
-        'find': '{0} {1} found multiple {0}'
+        'find': '{0} {1} found multiple {0}',
+        'resources_moved': 'failed to move {1} from {0}',
     }
     changes_dict = {
         'name': name,
diff --git a/neutron/map.jinja b/neutron/map.jinja
index 13b4bd6..69c7144 100644
--- a/neutron/map.jinja
+++ b/neutron/map.jinja
@@ -9,7 +9,7 @@
 {% set compute = salt['grains.filter_by']({
     'BaseDefaults': default_params,
     'Debian': {
-        'pkgs': ['neutron-openvswitch-agent', 'openvswitch-switch', 'python-pycadf'],
+        'pkgs': ['neutron-openvswitch-agent', 'python-pycadf'],
         'pkgs_ovn': ['ovn-common', 'ovn-host'],
         'pkgs_bagpipe': ['python-networking-bagpipe'],
         'services': ['neutron-openvswitch-agent'],
@@ -30,7 +30,7 @@
         },
     },
     'RedHat': {
-        'pkgs': ['openstack-neutron-openvswitch', 'openvswitch', 'python-pycadf'],
+        'pkgs': ['openstack-neutron-openvswitch', 'python-pycadf'],
         'pkgs_ovn': ['openvswitch-ovn'],
         'pkgs_bagpipe': ['python-networking-bagpipe'],
         'services': ['neutron-openvswitch-agent'],
@@ -53,7 +53,7 @@
 }, merge=pillar.neutron.get('compute', {}), base='BaseDefaults') %}
 
 {%- set opendaylight_enabled = pillar.neutron.gateway is defined and pillar.neutron.gateway.opendaylight is defined %}
-{%- set pkgs_list = ['neutron-dhcp-agent', 'openvswitch-common', 'neutron-metadata-agent'] %}
+{%- set pkgs_list = ['neutron-dhcp-agent', 'neutron-metadata-agent'] %}
 {%- set services_list = ['neutron-metadata-agent', 'neutron-dhcp-agent'] %}
 {%- if not opendaylight_enabled %}
 {%- do pkgs_list.extend(['neutron-openvswitch-agent', 'neutron-l3-agent']) %}
@@ -187,3 +187,13 @@
         'agents_failed_major_threshold': 0.6,
     },
 }, grain='os_family', merge=salt['pillar.get']('neutron:monitoring')) %}
+
+{% set upgrade = salt['grains.filter_by']({
+    'default': {
+        'resource_migration': {
+          'l3': {
+            'enabled': True
+          }
+        }
+    },
+}, grain='os_family', merge=pillar.get('neutron', {}).get('upgrade')) %}
diff --git a/neutron/upgrade/pre/init.sls b/neutron/upgrade/pre/init.sls
index 55e654d..17e830f 100644
--- a/neutron/upgrade/pre/init.sls
+++ b/neutron/upgrade/pre/init.sls
@@ -1,17 +1,10 @@
 {%- from "neutron/map.jinja" import server,gateway with context %}
 
-include:
- - neutron.upgrade.verify.api
-
 neutron_pre:
   test.show_notification:
     - text: "Running neutron.upgrade.pre"
 
-{%- if gateway.get('enabled') %}
-{# Get os client config from mine #}
-
 {%- set os_content = salt['mine.get']('I@keystone:client:os_client_config:enabled:true', 'keystone_os_client_config', 'compound').values()[0] %}
-
 keystone_os_client_config:
   file.managed:
     - name: /etc/openstack/clouds.yml
@@ -20,5 +13,3 @@
     - user: 'root'
     - group: 'root'
     - makedirs: True
-
-{%- endif %}
diff --git a/neutron/upgrade/upgrade/pre.sls b/neutron/upgrade/upgrade/pre.sls
index 9a4a105..615f98a 100644
--- a/neutron/upgrade/upgrade/pre.sls
+++ b/neutron/upgrade/upgrade/pre.sls
@@ -1,4 +1,4 @@
-{%- from "neutron/map.jinja" import server,gateway with context %}
+{%- from "neutron/map.jinja" import upgrade,server,gateway with context %}
 
 {%- if gateway.get('enabled') %}
 {% set host_id = salt['network.get_hostname']() %}
@@ -8,4 +8,11 @@
     - name: {{ host_id }}
     - cloud_name: admin_identity
 
+{%- if upgrade.get('resource_migration').get('l3', {}).get('enabled') %}
+migrate_non_ha_l3:
+  neutronv2.l3_resources_moved:
+    - name: {{ host_id }}
+    - cloud_name: admin_identity
+{%- endif %}
+
 {%- endif %}
diff --git a/neutron/upgrade/verify/api.sls b/neutron/upgrade/verify/_api.sls
similarity index 100%
rename from neutron/upgrade/verify/api.sls
rename to neutron/upgrade/verify/_api.sls
diff --git a/neutron/upgrade/verify/_service.sls b/neutron/upgrade/verify/_service.sls
new file mode 100644
index 0000000..4ff3239
--- /dev/null
+++ b/neutron/upgrade/verify/_service.sls
@@ -0,0 +1,17 @@
+{%- from "neutron/map.jinja" import server,gateway,compute with context %}
+
+neutron_task_uprade_verify_service:
+  test.show_notification:
+    - text: "Running neutron.upgrade.verify.service"
+
+{%- if gateway.get('enabled') or compute.get('enabled') %}
+  {% set host_id = salt['network.get_hostname']() %}
+{%- endif %}
+
+wait_for_neutron_agents:
+  module.run:
+    - name: neutronv2.wait_for_network_services
+    {%- if host_id is defined %}
+    - host_id: {{ host_id }}
+    {%- endif %}
+    - cloud_name: admin_identity
diff --git a/neutron/upgrade/verify/init.sls b/neutron/upgrade/verify/init.sls
new file mode 100644
index 0000000..5a4f325
--- /dev/null
+++ b/neutron/upgrade/verify/init.sls
@@ -0,0 +1,3 @@
+include:
+ - neutron.upgrade.verify._api
+ - neutron.upgrade.verify._service