Merge pull request #9 from akutz/feature/default-ip

Default IP addresses, linting, requirements
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 0000000..aedec0b
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,2 @@
+[MASTER]
+extension-pkg-whitelist=netifaces
diff --git a/DataSourceVMwareGuestInfo.py b/DataSourceVMwareGuestInfo.py
index 3a3b884..d4ae0c2 100644
--- a/DataSourceVMwareGuestInfo.py
+++ b/DataSourceVMwareGuestInfo.py
@@ -18,17 +18,21 @@
 A cloud init datasource for VMware GuestInfo.
 '''
 
-import collections
 import base64
-import zlib
-import json
+import collections
 from distutils.spawn import find_executable
+import json
+import socket
+import zlib
 
 from cloudinit import log as logging
 from cloudinit import sources
 from cloudinit import util
 from cloudinit import safeyaml
 
+from deepmerge import always_merger
+import netifaces
+
 LOG = logging.getLogger(__name__)
 NOVAL = "No value found"
 VMTOOLSD = find_executable("vmtoolsd")
@@ -123,20 +127,14 @@
         brought up the OS at this point.
         """
 
-        # Set the hostname.
-        hostname = self.metadata.get('local-hostname')
-        if hostname:
-            self.distro.set_hostname(hostname)
-            LOG.info("set hostname %s", hostname)
-
-        # Update the metadata with the actual host name and actual network
-        # interface information.
+        # Get information about the host.
         host_info = get_host_info()
         LOG.info("got host-info: %s", host_info)
-        hostname = host_info.get('local-hostname', hostname)
-        self.metadata['local-hostname'] = hostname
-        interfaces = host_info['network']['interfaces']
-        self.metadata['network']['interfaces'] = interfaces
+
+        # Ensure the metadata gets updated with information about the
+        # host, including the network interfaces, default IP addresses,
+        # etc.
+        self.metadata = always_merger.merge(self.metadata, host_info)
 
         # Persist the instance data for versions of cloud-init that support
         # doing so. This occurs here rather than in the get_data call in
@@ -298,36 +296,115 @@
     return [DataSourceVMwareGuestInfo]
 
 
+def get_default_ip_addrs():
+    '''
+    Returns the default IPv4 and IPv6 addresses based on the device(s) used for
+    the default route. Please note that None may be returned for either address
+    family if that family has no default route or if there are multiple
+    addresses associated with the device used by the default route for a given
+    address.
+    '''
+    gateways = netifaces.gateways()
+    if 'default' not in gateways:
+        return None, None
+
+    default_gw = gateways['default']
+    if netifaces.AF_INET not in default_gw and netifaces.AF_INET6 not in default_gw:
+        return None, None
+
+    ipv4 = None
+    ipv6 = None
+
+    gw4 = default_gw.get(netifaces.AF_INET)
+    if gw4:
+        _, dev4 = gw4
+        addr4_fams = netifaces.ifaddresses(dev4)
+        if addr4_fams:
+            af_inet4 = addr4_fams.get(netifaces.AF_INET)
+            if af_inet4:
+                if len(af_inet4) > 1:
+                    LOG.warn(
+                        "device %s has more than one ipv4 address: %s", dev4, af_inet4)
+                elif 'addr' in af_inet4[0]:
+                    ipv4 = af_inet4[0]['addr']
+
+    # Try to get the default IPv6 address by first seeing if there is a default
+    # IPv6 route.
+    gw6 = default_gw.get(netifaces.AF_INET6)
+    if gw6:
+        _, dev6 = gw6
+        addr6_fams = netifaces.ifaddresses(dev6)
+        if addr6_fams:
+            af_inet6 = addr6_fams.get(netifaces.AF_INET6)
+            if af_inet6:
+                if len(af_inet6) > 1:
+                    LOG.warn(
+                        "device %s has more than one ipv6 address: %s", dev6, af_inet6)
+                elif 'addr' in af_inet6[0]:
+                    ipv6 = af_inet6[0]['addr']
+
+    # If there is a default IPv4 address but not IPv6, then see if there is a
+    # single IPv6 address associated with the same device associated with the
+    # default IPv4 address.
+    if ipv4 and not ipv6:
+        af_inet6 = addr4_fams.get(netifaces.AF_INET6)
+        if af_inet6:
+            if len(af_inet6) > 1:
+                LOG.warn(
+                    "device %s has more than one ipv6 address: %s", dev4, af_inet6)
+            elif 'addr' in af_inet6[0]:
+                ipv6 = af_inet6[0]['addr']
+
+    # If there is a default IPv6 address but not IPv4, then see if there is a
+    # single IPv4 address associated with the same device associated with the
+    # default IPv6 address.
+    if not ipv4 and ipv6:
+        af_inet4 = addr6_fams.get(netifaces.AF_INET4)
+        if af_inet4:
+            if len(af_inet4) > 1:
+                LOG.warn(
+                    "device %s has more than one ipv4 address: %s", dev6, af_inet4)
+            elif 'addr' in af_inet4[0]:
+                ipv4 = af_inet4[0]['addr']
+
+    return ipv4, ipv6
+
+
 def get_host_info():
     '''
     Returns host information such as the host name and network interfaces.
     '''
-    import netifaces
-    import socket
 
     host_info = {
         'network': {
             'interfaces': {
                 'by-mac': collections.OrderedDict(),
-                'by-ip4': collections.OrderedDict(),
-                'by-ip6': collections.OrderedDict(),
+                'by-ipv4': collections.OrderedDict(),
+                'by-ipv6': collections.OrderedDict(),
             },
         },
     }
 
     hostname = socket.getfqdn()
     if hostname:
+        host_info['hostname'] = hostname
         host_info['local-hostname'] = hostname
 
+    default_ipv4, default_ipv6 = get_default_ip_addrs()
+    if default_ipv4:
+        host_info['local-ipv4'] = default_ipv4
+    if default_ipv6:
+        host_info['local-ipv6'] = default_ipv6
+
     by_mac = host_info['network']['interfaces']['by-mac']
-    by_ip4 = host_info['network']['interfaces']['by-ip4']
-    by_ip6 = host_info['network']['interfaces']['by-ip6']
+    by_ipv4 = host_info['network']['interfaces']['by-ipv4']
+    by_ipv6 = host_info['network']['interfaces']['by-ipv6']
 
     ifaces = netifaces.interfaces()
     for dev_name in ifaces:
         addr_fams = netifaces.ifaddresses(dev_name)
         af_link = addr_fams.get(netifaces.AF_LINK)
-        af_inet = addr_fams.get(netifaces.AF_INET)
+        af_inet4 = addr_fams.get(netifaces.AF_INET)
         af_inet6 = addr_fams.get(netifaces.AF_INET6)
 
         mac = None
@@ -338,37 +415,51 @@
         if mac == "00:00:00:00:00:00":
             continue
 
-        if mac and (af_inet or af_inet6):
+        if mac and (af_inet4 or af_inet6):
             key = mac
             val = {}
-            if af_inet:
-                val["ip4"] = af_inet
+            if af_inet4:
+                val["ipv4"] = af_inet4
             if af_inet6:
-                val["ip6"] = af_inet6
+                val["ipv6"] = af_inet6
             by_mac[key] = val
 
-        if af_inet:
-            for ip_info in af_inet:
+        if af_inet4:
+            for ip_info in af_inet4:
                 key = ip_info['addr']
+                if key == '127.0.0.1':
+                    continue
                 val = ip_info.copy()
                 del val['addr']
                 if mac:
                     val['mac'] = mac
-                by_ip4[key] = val
+                by_ipv4[key] = val
 
         if af_inet6:
             for ip_info in af_inet6:
                 key = ip_info['addr']
+                if key == '::1':
+                    continue
                 val = ip_info.copy()
                 del val['addr']
                 if mac:
                     val['mac'] = mac
-                by_ip6[key] = val
+                by_ipv6[key] = val
 
     return host_info
 
 
+def main():
+    '''
+    Executed when this file is used as a program.
+    '''
+    metadata = {'network': {'config': {'dhcp': True}}}
+    host_info = get_host_info()
+    metadata = always_merger.merge(metadata, host_info)
+    print(util.json_dumps(metadata))
+
+
 if __name__ == "__main__":
-    print util.json_dumps(get_host_info())
+    main()
 
 # vi: ts=4 expandtab
diff --git a/install.sh b/install.sh
index 89c4bca..bf25191 100755
--- a/install.sh
+++ b/install.sh
@@ -1,43 +1,113 @@
 #!/bin/sh
 
+# Exit as soon as there is an unexpected error.
+set -e
+
 #
 # usage: install.sh
 #        curl -sSL https://raw.githubusercontent.com/vmware/cloud-init-vmware-guestinfo/master/install.sh | sh -
 #
 
-if ! command -v curl >/dev/null 2>&1; then
-  echo "curl is required" 1>&2
-  exit 1
-fi
-
-# The script to lookup the path to the cloud-init's datasource directory, "sources".
-PY_SCRIPT='import os; from cloudinit import sources; print(os.path.dirname(sources.__file__));'
-
-# Get the path to the cloud-init installation's datasource directory.
-CLOUD_INIT_SOURCES=$(python -c ''"${PY_SCRIPT}"'' 2>/dev/null || \
-  python3 -c ''"${PY_SCRIPT}"'' 2>/dev/null) ||
-  { exit_code="${?}"; echo "failed to find python runtime" 1>&2; exit "${exit_code}"; }
-
-# If no "sources" directory was located then it's likely cloud-init is not installed.
-[ -z "${CLOUD_INIT_SOURCES}" ] && echo "cloud-init not found" 1>&2 && exit 1
-
 # The repository from which to fetch the cloud-init datasource and config files.
 REPO_SLUG="${REPO_SLUG:-https://raw.githubusercontent.com/vmware/cloud-init-vmware-guestinfo}"
 
 # The git reference to use. This can be a branch or tag name as well as a commit ID.
 GIT_REF="${GIT_REF:-master}"
 
+if ! command -v curl >/dev/null 2>&1; then
+  echo "curl is required" 1>&2
+  exit 1
+fi
+
+if ! command -v python >/dev/null 2>&1 && \
+   ! command -v python3 >/dev/null 2>&1; then
+  echo "python 2 or 3 is required" 1>&2
+  exit 1
+fi
+
+# PYTHON_VERSION may be 2 or 3 and indicates which version of Python
+# is used by cloud-init. This variable is not set until PY_MOD_CLOUD_INIT
+# is resolved.
+PYTHON_VERSION=
+get_py_mod_dir() {
+  _script='import os; import '"${1-}"'; print(os.path.dirname('"${1-}"'.__file__));'
+  case "${PYTHON_VERSION}" in
+  2)
+    python -c ''"${_script}"'' 2>/dev/null || echo ""
+    ;;
+  3)
+    python3 -c ''"${_script}"'' 2>/dev/null || echo ""
+    ;;
+  *)
+    { python3 -c ''"${_script}"'' || python -c ''"${_script}"'' || echo ""; } 2>/dev/null
+    ;;
+  esac
+}
+
+# PY_MOD_CLOUD_INIT is set to the the "cloudinit" directory in either
+# the Python2 or Python3 lib directory. This is also used to determine
+# which version of Python is repsonsible for running cloud-init.
+PY_MOD_CLOUD_INIT="$(get_py_mod_dir cloudinit)"
+if [ -z "${PY_MOD_CLOUD_INIT}" ]; then
+  echo "cloudinit is required" 1>&2
+  exit 1
+fi
+if echo "${PY_MOD_CLOUD_INIT}" | grep -q python2; then
+  PYTHON_VERSION=2
+else
+  PYTHON_VERSION=3
+fi
+echo "using python ${PYTHON_VERSION}"
+
+# The python modules deepmerge and netifaces are required. If they are
+# already installed, an assumption is made they are the correct versions.
+# Otherwise an attempt is made to install them with pip.
+if [ -z "$(get_py_mod_dir deepmerge)" ] || [ -z "$(get_py_mod_dir netifaces)" ]; then
+  echo "installing requirements"
+  if [ -z "$(get_py_mod_dir pip)" ]; then
+    echo "pip is required" 1>&2
+    exit 1
+  fi
+  _requirements="requirements.txt"
+  if [ ! -f "${_requirements}" ]; then
+    _requirements="$(mktemp)"
+    curl -sSL -o "${_requirements}" "${REPO_SLUG}/${GIT_REF}/requirements.txt"
+  fi
+  case "${PYTHON_VERSION}" in
+  2)
+    python -m pip install -r "${_requirements}"
+    ;;
+  3)
+    python3 -m pip install -r "${_requirements}"
+    ;;
+  esac
+fi
+
 # Download the cloud init datasource into the cloud-init's "sources" directory.
-curl -sSL -o "${CLOUD_INIT_SOURCES}/DataSourceVMwareGuestInfo.py" \
+echo "installing datasource"
+curl -sSL -o "${PY_MOD_CLOUD_INIT}/sources/DataSourceVMwareGuestInfo.py" \
   "${REPO_SLUG}/${GIT_REF}/DataSourceVMwareGuestInfo.py"
 
+# Make sure that the datasource can execute without error on this host.
+echo "validating datasource"
+case "${PYTHON_VERSION}" in
+2)
+  python "${PY_MOD_CLOUD_INIT}/sources/DataSourceVMwareGuestInfo.py" 1>/dev/null
+  ;;
+3)
+  python3 "${PY_MOD_CLOUD_INIT}/sources/DataSourceVMwareGuestInfo.py" 1>/dev/null
+  ;;
+esac
+
 # Add the configuration file that tells cloud-init what datasource to use.
+echo "installing config"
 mkdir -p /etc/cloud/cloud.cfg.d
 curl -sSL -o /etc/cloud/cloud.cfg.d/99-DataSourceVMwareGuestInfo.cfg \
   "${REPO_SLUG}/${GIT_REF}/99-DataSourceVMwareGuestInfo.cfg"
 
 # Download program used by ds-identify to determine whether or not the
 # VMwareGuestInfo datasource is useable.
+echo "installing dscheck"
 curl -sSL -o "/usr/bin/dscheck_VMwareGuestInfo" \
   "${REPO_SLUG}/${GIT_REF}/dscheck_VMwareGuestInfo.sh"
 chmod 0755 "/usr/bin/dscheck_VMwareGuestInfo"
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..d726390
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+deepmerge >= 0.0.5
+netifaces >= 0.10.9
\ No newline at end of file