Refactored data source to cloud-init interface

This patch heavily refactors the data source so it follows the
cloud-init datasource interface standard. The hostname, network
configuration, and instance ID are all set via the "guestinfo.metadata"
property. Please see README.md for more information.
diff --git a/DataSourceVmxGuestinfo.py b/DataSourceVmxGuestinfo.py
index c5d7998..60b68dd 100644
--- a/DataSourceVmxGuestinfo.py
+++ b/DataSourceVmxGuestinfo.py
@@ -2,24 +2,54 @@
 #
 # Copyright (C) 2017 VMware Inc.
 #
-# Author: Anish Swaminathan <anishs@vmware.com>
+# Authors: Anish Swaminathan <anishs@vmware.com>
+#          Andrew Kutz <akutz@vmware.com>
 #
 import os
 import base64
+import zlib
+import json
 
 from cloudinit import log as logging
 from cloudinit import sources
 from cloudinit import util
+from cloudinit import safeyaml
 
 from distutils.spawn import find_executable
 
 LOG = logging.getLogger(__name__)
 
+# Used with CentOS 7 which installs cloud-init 0.7.9. The URL for
+# the DataSource class is http://bit.ly/cloudinit-datasource-0-7-9.
+#
+# Setting the hostname:
+#     The hostname is set by way of the metadata key "local-hostname".
+#
+# Setting the instance ID:
+#     The instance ID may be set by way of the metadata key "instance-id".
+#     However, if this value is absent then then the instance ID is
+#     read from the file /sys/class/dmi/id/product_uuid.
+#
+# Configuring the network:
+#     The network is configured by setting the metadata key "network"
+#     with a value consistent with Network Config Versions 1 or 2,
+#     depending on the Linux distro's version of cloud-init:
+#
+#         Network Config Version 1 - http://bit.ly/cloudinit-net-conf-v1
+#         Network Config Version 2 - http://bit.ly/cloudinit-net-conf-v2
+#
+#     For example, CentOS 7's official cloud-init package is version
+#     0.7.9 and does not support Network Config Version 2. However,
+#     this datasource still supports supplying Network Config Version 2
+#     data as long as the Linux distro's cloud-init package is new
+#     enough to parse the data.
+#
+#     The metadata key "network.encoding" may be used to indicate the
+#     format of the metadata key "network". Valid encodings are base64
+#     and gzip+base64.
 class DataSourceVmxGuestinfo(sources.DataSource):
     def __init__(self, sys_cfg, distro, paths, ud_proc=None):
         sources.DataSource.__init__(self, sys_cfg, distro, paths, ud_proc)
-        self.metadata = {}
-        self.userdata_raw = ''
         self.vmtoolsd = find_executable("vmtoolsd")
         if not self.vmtoolsd:
             LOG.error("Failed to find vmtoolsd")
@@ -28,118 +58,89 @@
         if not self.vmtoolsd:
             LOG.error("vmtoolsd is required to fetch guestinfo value")
             return False
-        hostname = self._get_guestinfo_value('hostname')
-        if hostname:
-            self.distro.set_hostname(hostname)
-        ud = self._get_guestinfo_value('userdata')
-        if ud:
-            LOG.debug("Decoding base64 format guestinfo.userdata")
-            self.userdata_raw = base64.b64decode(ud)
-        found = True
-        dev_index = 0
-        network_settings = ''
-        while found:
-            key_begin = 'interface.' + str(dev_index)
-            key_iname = key_begin + '.name'
-            interface_name = self._get_guestinfo_value(key_iname)
-            if interface_name:
-                network_settings += 'auto ' + interface_name + '\n'
-                network_settings += 'iface ' + interface_name
-                key_proto = key_begin + '.dhcp'
-                dhcp_enabled = self._get_guestinfo_value(key_proto)
-                key_address = key_begin + '.address'
-                address = self._get_guestinfo_value(key_address)
-                bootproto = 'dhcp'
-                if dhcp_enabled:
-                    if dhcp_enabled == 'yes':
-                        network_settings += ' dhcp\n'
-                    elif dhcp_enabled == 'no':
-                        network_settings += ' static\n'
-                        bootproto = 'static'
-                    else:
-                        LOG.warning("Invalid value for yes/no parameter for %s, setting to dhcp", key_proto)
-                elif address:
-                    bootproto = 'static'
-                    dhcp_enabled == 'no'
-                    network_settings += ' static\n'
-                else:
-                    dhcp_enabled == 'yes'
-                    network_settings += ' dhcp\n'
-                    LOG.debug("Setting network bootproto to dhcp by default")
-                key_mac = key_begin + '.mac'
-                mac = self._get_guestinfo_value(key_mac)
-                if address:
-                    network_settings += 'address ' + address + '\n'
-                if mac:
-                    network_settings += 'hwaddress ' + mac + '\n'
-                key_netmask = key_begin + '.netmask'
-                netmask = self._get_guestinfo_value(key_netmask)
-                if netmask:
-                    network_settings += 'netmask ' + netmask + '\n'
-                key_dnsserver = 'dns.servers'
-                dnsserver = self._get_guestinfo_value(key_dnsserver)
-                if dnsserver:
-                    network_settings += 'dns-nameservers '
-                    dnsserver = dnsserver.split(',')
-                    for d in dnsserver:
-                        network_settings += d + ' '
-                    network_settings += '\n'
-                key_dnsdomain = 'dns.domains'
-                dnsdomain = self._get_guestinfo_value(key_dnsdomain)
-                if dnsdomain:
-                    network_settings += 'dns-search '
-                    dnsdomain = dnsdomain.split(',')
-                    for d in dnsdomain:
-                        network_settings += d + ' '
-                    network_settings += '\n'
-                route_index = 0
-                default_destination_set = False
-                while True:
-                    key_route = key_begin + '.route.' + str(route_index)
-                    route = self._get_guestinfo_value(key_route)
-                    if route:
-                        network_settings += "routes.%s " % (route_index)
-                        route = route.split(',')
-                        if len(route) > 2:
-                            LOG.debug("Route information for %s route in %s device incorrect - ", 
-                                                "expected 2 values", route_index, dev_index)
-                            continue
-                        elif len(route) == 2:
-                            network_settings += route[0] + ' ' + route[1] + '\n'# Gateway Destination
-                        else: #length = 1
-                            if not default_destination_set:
-                                network_settings += route[0] + ' 0.0.0.0/0' + '\n'
-                                default_destination_set = True
-                            else:
-                                LOG.debug("Default destination set previously, not setting route %s", route_index) 
-                    else:
-                        break
-                    route_index += 1
-            else:
-                found = False
-            dev_index += 1
-        self.distro.apply_network(network_settings, False)
+
+        # Get the JSON metadata. Can be plain-text, base64, or gzip+base64.
+        metadata = self._get_encoded_guestinfo_data('metadata')
+        if metadata:
+            self.metadata = json.loads(metadata)
+
+        # Get the YAML userdata. Can be plain-text, base64, or gzip+base64.
+        self.userdata_raw = self._get_encoded_guestinfo_data('userdata')
+
+        # Get the YAML vendordata. Can be plain-text, base64, or gzip+base64.
+        self.vendordata_raw = self._get_encoded_guestinfo_data('vendordata')
+
         return True
 
-    def _get_guestinfo_value(self, key):
-        LOG.debug("Getting guestinfo value for key %s", key)
-        value = ''
-        try:
-            (value, _err) = util.subp([self.vmtoolsd, "--cmd", "info-get guestinfo." + key])
-            if _err:
-                LOG.error("Failed to get guestinfo value for key %s", key)
-        except util.ProcessExecutionError as error:
-            util.logexc(LOG,"Failed to get guestinfo value for key %s: %s", key, error)
-        except Exception:
-            util.logexc(LOG,"Unexpected error while trying to get guestinfo value for key %s", key)
-        return value.rstrip()
+    @property
+    def network_config(self):
+        # Pull the network configuration out of the metadata.
+        if self.metadata and 'network' in self.metadata:
+            data = self._get_encoded_metadata('network')
+            if data:
+                # Load the YAML-formatted network data into an object
+                # and return it.
+                net_config = safeyaml.load(data)
+                LOG.debug("Loaded network config: %s", net_config)
+                return net_config
+        return None
 
     def get_instance_id(self):
+        # Pull the instance ID out of the metadata if present. Otherwise
+        # read the file /sys/class/dmi/id/product_uuid for the instance ID.
+        if self.metadata and 'instance-id' in self.metadata:
+            return self.metadata['instance-id']
         with open('/sys/class/dmi/id/product_uuid', 'r') as id_file:
             return str(id_file.read()).rstrip()
 
+    def _get_encoded_guestinfo_data(self, key):
+        data = self._get_guestinfo_value(key)
+        if not data:
+            return None
+        enc_type = self._get_guestinfo_value(key + '.encoding')
+        return self._get_encoded_data('guestinfo.' + key, enc_type, data)
+
+    def _get_encoded_metadata(self, key):
+        if not self.metadata or not key in self.metadata:
+            return None
+        data = self.metadata[key]
+        enc_type = self.metadata.get(key + '.encoding')
+        return self._get_encoded_data('metadata.' + key, enc_type, data)
+
+    def _get_encoded_data(self, key, enc_type, data):
+        LOG.debug("Getting encoded data for key=%s, enc=%s", key, enc_type)
+        if enc_type == "gzip+base64" or enc_type == "gz+b64":
+            LOG.debug("Decoding %s format %s", enc_type, key)
+            return zlib.decompress(base64.b64decode(data), zlib.MAX_WBITS | 16)
+        elif enc_type == "base64" or enc_type == "b64":
+            LOG.debug("Decoding %s format %s", enc_type, key)
+            return base64.b64decode(data)
+        else:
+            LOG.debug("Plain-text data %s", key)
+            return data
+
+    def _get_guestinfo_value(self, key):
+        NOVAL = "No value found"
+        LOG.debug("Getting guestinfo value for key %s", key)
+        try:
+            (stdout, stderr) = util.subp([self.vmtoolsd, "--cmd", "info-get guestinfo." + key])
+            if stderr == NOVAL:
+                LOG.debug("No value found for key %s", key)
+            elif not stdout:
+                LOG.error("Failed to get guestinfo value for key %s", key)
+            else:
+                return stdout.rstrip()
+        except util.ProcessExecutionError as error:
+            if error.stderr == NOVAL:
+                LOG.debug("No value found for key %s", key)
+            else:
+                util.logexc(LOG,"Failed to get guestinfo value for key %s: %s", key, error)
+        except Exception:
+            util.logexc(LOG,"Unexpected error while trying to get guestinfo value for key %s", key)
+        return None
+
 def get_datasource_list(depends):
     """
     Return a list of data sources that match this set of dependencies
     """
-    return [DataSourceVmxGuestinfo]
\ No newline at end of file
+    return [DataSourceVmxGuestinfo]
diff --git a/README.md b/README.md
index 5bd27ad..236060e 100644
--- a/README.md
+++ b/README.md
@@ -1,27 +1,119 @@
-# CentOS Cloud-Init Datasource for VMware VMX Guestinfo
-This project uses Docker to build an RPM for CentOS that provides a
-cloud-init datasource for VMware's VMX Guestinfo interface.
-
-## Getting Started
-Docker is required to build the RPM. Once Docker is installed simply run:
-
-```shell
-$ make
-```
-
-The RPM is created at `rpmbuild/RPMS/noarch/cloud-init-vmx-guestinfo-VERSION-RELEASE.noarch.rpm`.
+# Cloud-Init Datasource for VMware VMX Guestinfo
+This project provides a cloud-init datasource for pulling meta,
+user, and vendor data from VMware's VMX Guestinfo interface.
 
 ## Installation
-Either the `rpm` or `yum` tools can be used to install the RPM on CentOS.
+There are multiple methods of installing the data source.
+
+### Installing on RHEL/CentOS 7
+There is an RPM available for installing on RedHat/CentOS:
 
 ```shell
-$ yum install https://s3-us-west-2.amazonaws.com/cnx.vmware/cicd/centos/cloud-init-vmx-guestinfo-1.0.4-0.noarch.rpm
+$ yum install https://s3-us-west-2.amazonaws.com/cnx.vmware/cicd/centos/cloud-init-vmx-guestinfo-1.0.0-0.noarch.rpm
 ```
 
-The above command will also install the required `cloud-init` dependency.
+### Installing on other Linux distributions
+The VMX Guestinfo datasource can be installed on any Linux distribution
+where cloud-init is already present. To do so, please follow these steps:
 
-## Creating a cloud-config file
-The first step to use the data source is to create a cloud config file:
+1. Find the path to the `cloudinit/sources` Python package:
+```shell
+$ PY_SCRIPT='import os; from cloudinit import sources; print(os.path.dirname(sources.__file__));'
+$ CLOUDINIT_SOURCES=$(python -c ''"${PY_SCRIPT}"'' 2>/dev/null || python3 -c ''"${PY_SCRIPT}"'')
+```
+
+2. Verify `CLOUDINIT_SOURCES` is set to a valid path. If it isn't, then
+cloud-init is likely not installed and these instructions should be
+aborted.
+```shell
+$ [ -n "${CLOUDINIT_SOURCES}" ] || echo "cloud-init not found"
+```
+
+3. Write `DataSourceVmxGuestinfo.py` to `$CLOUDINIT_SOURCES`:
+```shell
+$ curl -sSLo "${CLOUDINIT_SOURCES}/DataSourceVmxGuestinfo.py" \
+  https://raw.githubusercontent.com/akutz/cloudinit-ds-vmx-guestinfo/master/DataSourceVmxGuestinfo.py
+```
+
+4. Update the list of available datasources. This step can vary based on
+the Linux distribution. On Ubuntu 14.04 LTS (Trusty) the file
+`/etc/cloud.cfg.d/90_dpkg.cfg` contains the list of datasources that 
+needs to be amended to include `VmxGuestinfo`. On Ubuntu 18.04 (Bionic)
+and other Linux distributions this file may not be present. In those cases
+use the following command to add a configuration file to cloud-init that
+overrides any existing datasource selection with the VMX Guestinfo
+datasource:
+
+```shell
+$ curl -sSLo /etc/cloud/cloud.cfg.d/10_vmx_guestinfo.cfg \
+  https://raw.githubusercontent.com/akutz/cloudinit-ds-vmx-guestinfo/master/10_vmx_guestinfo.cfg
+```
+
+## Configuration
+The data source is configured by setting `guestinfo` properties on a 
+VM's `extraconfig` data or a customizable vApp's `properties` data.
+
+| Property | Description |
+|----------|-------------|
+| `guestinfo.metadata` | A JSON string containing the cloud-init metadata. |
+| `guestinfo.metadata.encoding` | The encoding type for `guestinfo.metadata`. |
+| `guestinfo.userdata` | A YAML document containing the cloud-init user data. |
+| `guestinfo.userdata.encoding` | The encoding type for `guestinfo.userdata`. |
+| `guestinfo.vendordata` | A YAML document containing the cloud-init vendor data. |
+| `guestinfo.vendordata.encoding` | The encoding type for `guestinfo.vendordata`. |
+
+All `guestinfo.*.encoding` property values may be set to `base64` or 
+`gzip+base64`.
+
+## Walkthrough
+The following series of steps is a demonstration on how to configure a VM
+with cloud-init and the VMX Guestinfo datasource.
+
+### Create a network configuration file
+First, create the network configuration for the VM. Save the following 
+YAML to a file named `network.config.yaml`:
+
+```yaml
+version: 1
+config:
+  - type: physical
+    name: ens192
+    subnets:
+      - type: static
+        address: 192.168.1.200
+        gateway: 192.168.1.1
+        dns_nameservers:
+          - 8.8.8.8
+          - 8.8.4.4
+        dns_search:
+          - vmware.ci
+```
+
+See the section on [configuring the network](#configuring-the-network) for
+more information on the network configuration schema.
+
+### Create a metadata file
+Next, create a JSON file named `metadata.json`:
+
+```json
+{
+  "network": "NETWORK_CONFIG",
+  "network.encoding": "gzip+base64",
+  "local-hostname": "cloud-vm",
+  "instance-id": "cloud-vm"
+}
+```
+
+Please note that in addition to the `network` key in the metadata there
+is also a key named `network.encoding`. This key informs the datasource
+how to decode the `network` data. Valid values for `network.encoding`
+include:
+
+* `base64`
+* `gzip+base64`
+
+### Create a cloud-config file
+Finally, create the cloud-config file `cloud-config.yaml`:
 
 ```yaml
 #cloud-config
@@ -38,22 +130,84 @@
       - ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDE0c5FczvcGSh/tG4iw+Fhfi/O5/EvUM/96js65tly4++YTXK1d9jcznPS5ruDlbIZ30oveCBd3kT8LLVFwzh6hepYTf0YmCTpF4eDunyqmpCXDvVscQYRXyasEm5olGmVe05RrCJSeSShAeptv4ueIn40kZKOghinGWLDSZG4+FFfgrmcMCpx5YSCtX2gvnEYZJr0czt4rxOZuuP7PkJKgC/mt2PcPjooeX00vAj81jjU2f3XKrjjz2u2+KIt9eba+vOQ6HiC8c2IzRkUAJ5i1atLy8RIbejo23+0P4N2jjk17QySFOVHwPBDTYb0/0M/4ideeU74EN/CgVsvO6JrLsPBR4dojkV5qNbMNxIVv5cUwIy2ThlLgqpNCeFIDLCWNZEFKlEuNeSQ2mPtIO7ETxEL2Cz5y/7AIuildzYMc6wi2bofRC8HmQ7rMXRWdwLKWsR0L7SKjHblIwarxOGqLnUI+k2E71YoP7SZSlxaKi17pqkr0OMCF+kKqvcvHAQuwGqyumTEWOlH6TCx1dSPrW+pVCZSHSJtSTfDW2uzL6y8k10MT06+pVunSrWo5LHAXcS91htHV1M1UrH/tZKSpjYtjMb5+RonfhaFRNzvj7cCE1f3Kp8UVqAdcGBTtReoE8eRUT63qIxjw03a7VwAyB2w+9cu1R9/vAo8SBeRqw== sakutz@gmail.com
 ```
 
-## Assigning the cloud-config data to the VM's Guestinfo
-Please note that this step requires that the VM be powered off.
+### Assigning the cloud-config data to the VM's Guestinfo
+Please note that this step requires that the VM be powered off. All of
+the commands below use the VMware CLI tool, 
+[`govc`](https://github.com/vmware/govmomi/blob/master/govc).
 
-Once the cloud config file has been created, use the 
-[`govc`](https://github.com/vmware/govmomi/blob/master/govc/USAGE.md#vmchange)
-tool's `vm.change` command to set the appropriate keys on the powered-off VM:
-
+Go ahead and assign the path to the VM to the environment variable `VM`:
 ```shell
-$ govc vm.change -vm $VM -e guestinfo.userdata=$(cat cloud-config.yaml | gzip -9 | base64)
-$ govc vm.change -vm $VM -e guestinfo.userdata.encoding=gzip+base64
+$ export VM="/inventory/path/to/the/vm"
 ```
 
-## Using the cloud-init VMX Guestinfo datasource
-Power the VM back on. If all went according to plan, the CentOS box has been
-locked down to SSH access only for the user defined in the above cloud-config
-YAML file.
+Next, power off the VM:
+```shell
+$ govc vm.power -off "${VM}"
+```
+
+Export the environment variables that contain the cloud-init metadata
+and cloud-config:
+```shell
+$ export CLOUD_CONFIG=$(gzip -c9 <cloud-config.yaml | base64)
+$ export METADATA=$(sed 's~NETWORK_CONFIG~'"$(gzip -c9 <network-config.yaml | \
+                    base64)"'~' <metadata.json | gzip -9 | base64)
+```
+
+Assign the metadata and cloud-config to the VM's extra configuration
+dictionary, `guestinfo`:
+```shell
+$ govc vm.change -vm "${VM}" -e guestinfo.metadata="${METADATA}"
+$ govc vm.change -vm "${VM}" -e guestinfo.metadata.encoding=gzip+base64
+$ govc vm.change -vm "${VM}" -e guestinfo.userdata="${CLOUD_CONFIG}"
+$ govc vm.change -vm "${VM}" -e guestinfo.userdata.encoding=gzip+base64
+```
+
+Please note the above commands include specifying the encoding for the
+properties. This is important as it informs the datasource how to decode
+the data for cloud-init. Valid values for `metadata.encoding` and
+`userdata.encoding` include:
+
+* `base64`
+* `gzip+base64`
+
+### Using the cloud-init VMX Guestinfo datasource
+Power the VM back on.
+```shell
+$ govc vm.power -vm "${VM}" -on
+``` 
+
+If all went according to plan, the CentOS box is:
+* Locked down, allosing SSH access only for the user in the cloud-config
+* Configured for a static IP address, 192.168.1.200
+* Has a hostname of `centos-cloud`
+
+## Examples
+This section reviews common configurations:
+
+### Setting the hostname
+The hostname is set by way of the metadata key `local-hostname`.
+
+### Setting the instance ID
+The instance ID may be set by way of the metadata key `instance-id`.
+However, if this value is absent then then the instance ID is
+read from the file `/sys/class/dmi/id/product_uuid`.
+
+### Configuring the network
+The network is configured by setting the metadata key `network`
+with a value consistent with Network Config Versions 
+[1](http://bit.ly/cloudinit-net-conf-v1) or 
+[2](http://bit.ly/cloudinit-net-conf-v2),
+depending on the Linux distro's version of cloud-init.
+
+For example, CentOS 7's official cloud-init package is version
+0.7.9 and does not support Network Config Version 2. However,
+this datasource still supports supplying Network Config Version 2
+data as long as the Linux distro's cloud-init package is new
+enough to parse the data.
+
+The metadata key `network.encoding` may be used to indicate the
+format of the metadata key "network". Valid encodings are `base64`
+and `gzip+base64`.
 
 ## Conclusion
 To learn more about how to use cloud-init with CentOS, please see the cloud-init
diff --git a/rpm.spec b/rpm.spec
index b2344d2..f08c956 100644
--- a/rpm.spec
+++ b/rpm.spec
@@ -6,7 +6,7 @@
 # common
 #################################################################################
 Name:           cloud-init-vmx-guestinfo
-Version:        1.0.4
+Version:        1.0.0
 Release:        0
 Summary:        A cloud-init datasource that uses VMX Guestinfo
 License:        Apache2