Merge pull request #25 from akutz/feature/cleanup-userdata

Cleanup guestinfo keys such as userdata
diff --git a/DataSourceVMwareGuestInfo.py b/DataSourceVMwareGuestInfo.py
index 3cf3450..ab392be 100644
--- a/DataSourceVMwareGuestInfo.py
+++ b/DataSourceVMwareGuestInfo.py
@@ -40,6 +40,7 @@
 NOVAL = "No value found"
 VMWARE_RPCTOOL = find_executable("vmware-rpctool")
 VMX_GUESTINFO = "VMX_GUESTINFO"
+GUESTINFO_EMPTY_YAML_VAL = "---"
 
 
 class NetworkConfigError(Exception):
@@ -106,7 +107,8 @@
         that the get_data functions in newer versions of cloud-init do,
         such as calling persist_instance_data.
         """
-        if not get_data_access_method():
+        data_access_method = get_data_access_method()
+        if not data_access_method:
             LOG.error("vmware-rpctool is required to fetch guestinfo value")
             return False
 
@@ -119,6 +121,10 @@
         # Get the vendor data.
         self.vendordata_raw = guestinfo('vendordata')
 
+        # Check to see if any of the guestinfo data should be removed.
+        if data_access_method == VMWARE_RPCTOOL:
+            clear_guestinfo_keys(self.metadata['cleanup-guestinfo'])
+
         if self.metadata or self.userdata_raw or self.vendordata_raw:
             return True
         else:
@@ -229,6 +235,39 @@
     return raw_data
 
 
+def get_none_if_empty_val(val):
+    '''
+    get_none_if_empty_val returns None if the provided value, once stripped
+    of its trailing whitespace, is empty or equal to GUESTINFO_EMPTY_YAML_VAL.
+
+    The return value is always a string, regardless of whether the input is
+    a bytes class or a string.
+    '''
+
+    # If the provided value is a bytes class, convert it to a string to
+    # simplify the rest of this function's logic.
+    if isinstance(val, bytes):
+        val = val.decode()
+
+    val = val.rstrip()
+    if len(val) == 0 or val == GUESTINFO_EMPTY_YAML_VAL:
+        return None
+    return val
+
+
+def handle_returned_guestinfo_val(key, val):
+    '''
+    handle_returned_guestinfo_val returns the provided value if it is
+    not empty or set to GUESTINFO_EMPTY_YAML_VAL, otherwise None is
+    returned
+    '''
+    val = get_none_if_empty_val(val)
+    if val:
+        return val
+    LOG.debug("No value found for key %s", key)
+    return None
+
+
 def get_guestinfo_value(key):
     '''
     Returns a guestinfo value for the specified key.
@@ -239,11 +278,7 @@
 
     if data_access_method == VMX_GUESTINFO:
         env_key = ("vmx.guestinfo." + key).upper().replace(".", "_", -1)
-        val = os.environ.get(env_key, "")
-        if val == "":
-            LOG.debug("No value found for key %s", key)
-        else:
-            return val
+        return handle_returned_guestinfo_val(key, os.environ.get(env_key, ""))
 
     if data_access_method == VMWARE_RPCTOOL:
         try:
@@ -254,7 +289,7 @@
             elif not stdout:
                 LOG.error("Failed to get guestinfo value for key %s", key)
             else:
-                return stdout.rstrip()
+                return handle_returned_guestinfo_val(key, stdout)
         except util.ProcessExecutionError as error:
             if error.stderr == NOVAL:
                 LOG.debug("No value found for key %s", key)
@@ -268,6 +303,60 @@
     return None
 
 
+def set_guestinfo_value(key, value):
+    '''
+    Sets a guestinfo value for the specified key. Set value to an empty string
+    to clear an existing guestinfo key.
+    '''
+
+    # If value is an empty string then set it to a single space as it is not
+    # possible to set a guestinfo key to an empty string. Setting a guestinfo
+    # key to a single space is as close as it gets to clearing an existing
+    # guestinfo key.
+    if value == "":
+        value = " "
+
+    LOG.debug("Setting guestinfo key=%s to value=%s", key, value)
+
+    data_access_method = get_data_access_method()
+
+    if data_access_method == VMX_GUESTINFO:
+        return True
+
+    if data_access_method == VMWARE_RPCTOOL:
+        try:
+            util.subp(
+                [VMWARE_RPCTOOL, ("info-set guestinfo.%s %s" % (key, value))])
+            return True
+        except util.ProcessExecutionError as error:
+            util.logexc(
+                LOG, "Failed to set guestinfo key=%s to value=%s: %s", key, value, error)
+        except Exception:
+            util.logexc(
+                LOG, "Unexpected error while trying to set guestinfo key=%s to value=%s", key, value)
+
+    return None
+
+
+def clear_guestinfo_keys(keys):
+    '''
+    clear_guestinfo_keys clears guestinfo of all of the keys in the given list.
+    each key will have its value set to "---". Since the value is valid YAML,
+    cloud-init can still read it if it tries.
+    '''
+    if not keys:
+        return
+    if not type(keys) in (list, tuple):
+        keys = [keys]
+    for key in keys:
+        LOG.info("clearing guestinfo.%s", key)
+        if not set_guestinfo_value(key, GUESTINFO_EMPTY_YAML_VAL):
+            LOG.error("failed to clear guestinfo.%s", key)
+        LOG.info("clearing guestinfo.%s.encoding", key)
+        if not set_guestinfo_value(key + ".encoding", ""):
+            LOG.error("failed to clear guestinfo.%s.encoding", key)
+
+
 def guestinfo(key):
     '''
     guestinfo returns the guestinfo value for the provided key, decoding
diff --git a/README.md b/README.md
index 73f3698..487f441 100644
--- a/README.md
+++ b/README.md
@@ -1,28 +1,30 @@
 # Cloud-Init Datasource for VMware GuestInfo
-This project provides a cloud-init datasource for pulling meta,
-user, and vendor data from VMware vSphere's GuestInfo [interface](https://github.com/vmware/govmomi/blob/master/govc/USAGE.md#vmchange).
+
+This project provides a cloud-init datasource for pulling meta, user, and vendor data from VMware vSphere's GuestInfo [interface](https://github.com/vmware/govmomi/blob/master/govc/USAGE.md#vmchange).
 
 ## Installation
+
 There are multiple methods of installing the data source.
 
 ### Installing on RHEL/CentOS 7
+
 There is an RPM available for installing on RedHat/CentOS:
 
 ```shell
-$ yum install https://github.com/vmware/cloud-init-vmware-guestinfo/releases/download/v1.1.0/cloud-init-vmware-guestinfo-1.1.0-1.el7.noarch.rpm
+yum install https://github.com/vmware/cloud-init-vmware-guestinfo/releases/download/v1.1.0/cloud-init-vmware-guestinfo-1.1.0-1.el7.noarch.rpm
 ```
 
 ### Installing on other Linux distributions
-The VMware GuestInfo datasource can be installed on any Linux distribution
-where cloud-init is already present. To do so, simply execute the following:
+
+The VMware GuestInfo datasource can be installed on any Linux distribution where cloud-init is already present. To do so, simply execute the following:
 
 ```shell
-$ curl -sSL https://raw.githubusercontent.com/vmware/cloud-init-vmware-guestinfo/master/install.sh | sh -
+curl -sSL https://raw.githubusercontent.com/vmware/cloud-init-vmware-guestinfo/master/install.sh | sh -
 ```
 
 ## Configuration
-The data source is configured by setting `guestinfo` properties on a
-VM's `extraconfig` data or a customizable vApp's `properties` data.
+
+The data source is configured by setting `guestinfo` properties on a VM's `extraconfig` data or a customizable vApp's `properties` data.
 
 | Property | Description |
 |----------|-------------|
@@ -33,58 +35,31 @@
 | `guestinfo.vendordata` | A YAML document containing the cloud-init vendor data. |
 | `guestinfo.vendordata.encoding` | The encoding type for `guestinfo.vendordata`. |
 
-All `guestinfo.*.encoding` property values may be set to `base64` or
-`gzip+base64`.
+All `guestinfo.*.encoding` property values may be set to `base64` or `gzip+base64`.
 
 ## Walkthrough
-The following series of steps is a demonstration on how to configure a VM
-with cloud-init and the VMX GuestInfo datasource.
 
-### Create a network configuration file
-First, create the network configuration for the VM. Save the following
-YAML to a file named `network.config.yaml`:
-
-```yaml
-version: 1
-config:
-  - type: physical
-    name: ens192
-    subnets:
-      - type: static
-        address: 192.168.1.200
-        gateway: 192.168.1.1
-        dns_nameservers:
-          - 8.8.8.8
-          - 8.8.4.4
-        dns_search:
-          - vmware.ci
-```
-
-See the section on [configuring the network](#configuring-the-network) for
-more information on the network configuration schema.
+The following series of steps is a demonstration on how to configure a VM with cloud-init and the VMX GuestInfo datasource.
 
 ### Create a metadata file
-Next, create a JSON file named `metadata.json`:
 
-```json
-{
-  "network": "NETWORK_CONFIG",
-  "network.encoding": "gzip+base64",
-  "local-hostname": "cloud-vm",
-  "instance-id": "cloud-vm"
-}
+First, create the metadata file for the VM. Save the following YAML to a file named `metadata.yaml`:
+
+```yaml
+instance-id: cloud-vm
+local-hostname: cloud-vm
+network:
+  version: 2
+  ethernets:
+    nics:
+      match:
+        name: ens*
+      dhcp4: yes
 ```
 
-Please note that in addition to the `network` key in the metadata there
-is also a key named `network.encoding`. This key informs the datasource
-how to decode the `network` data. Valid values for `network.encoding`
-include:
+### Create a userdata file
 
-* `base64`
-* `gzip+base64`
-
-### Create a cloud-config file
-Finally, create the cloud-config file `cloud-config.yaml`:
+Finally, create the userdata file `userdata.yaml`:
 
 ```yaml
 #cloud-config
@@ -98,109 +73,116 @@
     ssh_import_id: None
     lock_passwd: true
     ssh_authorized_keys:
-      - ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDE0c5FczvcGSh/tG4iw+Fhfi/O5/EvUM/96js65tly4++YTXK1d9jcznPS5ruDlbIZ30oveCBd3kT8LLVFwzh6hepYTf0YmCTpF4eDunyqmpCXDvVscQYRXyasEm5olGmVe05RrCJSeSShAeptv4ueIn40kZKOghinGWLDSZG4+FFfgrmcMCpx5YSCtX2gvnEYZJr0czt4rxOZuuP7PkJKgC/mt2PcPjooeX00vAj81jjU2f3XKrjjz2u2+KIt9eba+vOQ6HiC8c2IzRkUAJ5i1atLy8RIbejo23+0P4N2jjk17QySFOVHwPBDTYb0/0M/4ideeU74EN/CgVsvO6JrLsPBR4dojkV5qNbMNxIVv5cUwIy2ThlLgqpNCeFIDLCWNZEFKlEuNeSQ2mPtIO7ETxEL2Cz5y/7AIuildzYMc6wi2bofRC8HmQ7rMXRWdwLKWsR0L7SKjHblIwarxOGqLnUI+k2E71YoP7SZSlxaKi17pqkr0OMCF+kKqvcvHAQuwGqyumTEWOlH6TCx1dSPrW+pVCZSHSJtSTfDW2uzL6y8k10MT06+pVunSrWo5LHAXcS91htHV1M1UrH/tZKSpjYtjMb5+RonfhaFRNzvj7cCE1f3Kp8UVqAdcGBTtReoE8eRUT63qIxjw03a7VwAyB2w+9cu1R9/vAo8SBeRqw== sakutz@gmail.com
+    - ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDE0c5FczvcGSh/tG4iw+Fhfi/O5/EvUM/96js65tly4++YTXK1d9jcznPS5ruDlbIZ30oveCBd3kT8LLVFwzh6hepYTf0YmCTpF4eDunyqmpCXDvVscQYRXyasEm5olGmVe05RrCJSeSShAeptv4ueIn40kZKOghinGWLDSZG4+FFfgrmcMCpx5YSCtX2gvnEYZJr0czt4rxOZuuP7PkJKgC/mt2PcPjooeX00vAj81jjU2f3XKrjjz2u2+KIt9eba+vOQ6HiC8c2IzRkUAJ5i1atLy8RIbejo23+0P4N2jjk17QySFOVHwPBDTYb0/0M/4ideeU74EN/CgVsvO6JrLsPBR4dojkV5qNbMNxIVv5cUwIy2ThlLgqpNCeFIDLCWNZEFKlEuNeSQ2mPtIO7ETxEL2Cz5y/7AIuildzYMc6wi2bofRC8HmQ7rMXRWdwLKWsR0L7SKjHblIwarxOGqLnUI+k2E71YoP7SZSlxaKi17pqkr0OMCF+kKqvcvHAQuwGqyumTEWOlH6TCx1dSPrW+pVCZSHSJtSTfDW2uzL6y8k10MT06+pVunSrWo5LHAXcS91htHV1M1UrH/tZKSpjYtjMb5+RonfhaFRNzvj7cCE1f3Kp8UVqAdcGBTtReoE8eRUT63qIxjw03a7VwAyB2w+9cu1R9/vAo8SBeRqw== sakutz@gmail.com
 ```
 
-### Assigning the cloud-config data to the VM's GuestInfo
-Please note that this step requires that the VM be powered off. All of
-the commands below use the VMware CLI tool,
-[`govc`](https://github.com/vmware/govmomi/blob/master/govc).
+### Assigning the userdate data to the VM's GuestInfo
+
+Please note that this step requires that the VM be powered off. All of the commands below use the VMware CLI tool, [`govc`](https://github.com/vmware/govmomi/blob/master/govc).
 
 Go ahead and assign the path to the VM to the environment variable `VM`:
+
 ```shell
-$ export VM="/inventory/path/to/the/vm"
+export VM="/inventory/path/to/the/vm"
 ```
 
 Next, power off the VM:
+
 ```shell
-$ govc vm.power -off "${VM}"
+govc vm.power -off "${VM}"
 ```
 
-Export the environment variables that contain the cloud-init metadata
-and cloud-config:
+Export the environment variables that contain the cloud-init metadata and userdata:
+
 ```shell
-$ export CLOUD_CONFIG=$(gzip -c9 <cloud-config.yaml | base64)
-$ export METADATA=$(sed 's~NETWORK_CONFIG~'"$(gzip -c9 <network.config.yaml | \
-                    base64)"'~' <metadata.json | gzip -9 | base64)
+export METADATA=$(gzip -c9 <metadata.yaml | { base64 -w0 2>/dev/null || base64; }) \
+       USERDATA=$(gzip -c9 <userdata.yaml | { base64 -w0 2>/dev/null || base64; })
 ```
 
-Assign the metadata and cloud-config to the VM's extra configuration
-dictionary, `guestinfo`:
+Assign the metadata and userdate to the VM's extra configuration dictionary, `guestinfo`:
+
 ```shell
-$ govc vm.change -vm "${VM}" -e guestinfo.metadata="${METADATA}"
-$ govc vm.change -vm "${VM}" -e guestinfo.metadata.encoding=gzip+base64
-$ govc vm.change -vm "${VM}" -e guestinfo.userdata="${CLOUD_CONFIG}"
-$ govc vm.change -vm "${VM}" -e guestinfo.userdata.encoding=gzip+base64
+govc vm.change -vm "${VM}" \
+  -e guestinfo.metadata="${METADATA}" \
+  -e guestinfo.metadata.encoding="gzip+base64" \
+  -e guestinfo.userdata="${USERDATA}" \
+  -e guestinfo.userdata.encoding="gzip+base64"
 ```
 
-Please note the above commands include specifying the encoding for the
-properties. This is important as it informs the datasource how to decode
-the data for cloud-init. Valid values for `metadata.encoding` and
-`userdata.encoding` include:
+Please note the above commands include specifying the encoding for the properties. This is important as it informs the datasource how to decode the data for cloud-init. Valid values for `metadata.encoding` and `userdata.encoding` include:
 
 * `base64`
 * `gzip+base64`
 
 ### Using the cloud-init VMX GuestInfo datasource
+
 Power the VM back on.
+
 ```shell
-$ govc vm.power -vm "${VM}" -on
+govc vm.power -vm "${VM}" -on
 ```
 
 If all went according to plan, the CentOS box is:
-* Locked down, allosing SSH access only for the user in the cloud-config
-* Configured for a static IP address, 192.168.1.200
-* Has a hostname of `centos-cloud`
+
+* Locked down, allowing SSH access only for the user in the userdata
+* Configured for a dynamic IP address via DHCP
+* Has a hostname of `cloud-vm`
 
 ## Examples
+
 This section reviews common configurations:
 
 ### Setting the hostname
+
 The hostname is set by way of the metadata key `local-hostname`.
 
 ### Setting the instance ID
-The instance ID may be set by way of the metadata key `instance-id`.
-However, if this value is absent then then the instance ID is
-read from the file `/sys/class/dmi/id/product_uuid`.
+
+The instance ID may be set by way of the metadata key `instance-id`. However, if this value is absent then then the instance ID is read from the file `/sys/class/dmi/id/product_uuid`.
 
 ### Providing public SSH keys
-The public SSH keys may be set by way of the metadata key `public-keys-data`.
-Each newline-terminated string will be interpreted as a separate
-SSH public key, which will be placed in distro's default user's
-`~/.ssh/authorized_keys`. If the value is empty or absent,
-then nothing will be written to `~/.ssh/authorized_keys`.
+
+The public SSH keys may be set by way of the metadata key `public-keys-data`. Each newline-terminated string will be interpreted as a separate SSH public key, which will be placed in distro's default user's `~/.ssh/authorized_keys`. If the value is empty or absent, then nothing will be written to `~/.ssh/authorized_keys`.
 
 ### Configuring the network
-The network is configured by setting the metadata key `network`
-with a value consistent with Network Config Versions
-[1](http://bit.ly/cloudinit-net-conf-v1) or
-[2](http://bit.ly/cloudinit-net-conf-v2),
-depending on the Linux distro's version of cloud-init.
 
-For example, CentOS 7's official cloud-init package is version
-0.7.9 and does not support Network Config Version 2. However,
-this datasource still supports supplying Network Config Version 2
-data as long as the Linux distro's cloud-init package is new
-enough to parse the data.
+The network is configured by setting the metadata key `network` with a value consistent with Network Config Versions [1](http://bit.ly/cloudinit-net-conf-v1) or [2](http://bit.ly/cloudinit-net-conf-v2), depending on the Linux distro's version of cloud-init.
 
-The metadata key `network.encoding` may be used to indicate the
-format of the metadata key "network". Valid encodings are `base64`
-and `gzip+base64`.
+The metadata key `network.encoding` may be used to indicate the format of the metadata key "network". Valid encodings are `base64` and `gzip+base64`.
 
-## Building the RPM
-Building the RPM locally is handled via Docker. Simple execute the following
-command:
+### Cleaning up the guestinfo keys
 
-```shell
-$ make rpm
+Sometimes the cloud-init userdata might contain sensitive information, and it may be desirable to have the `guestinfo.userdata` key (or other guestinfo keys) cleared as soon as its data is read by the datasource. This is possible by adding the following to the metadata:
+
+```yaml
+cleanup-guestinfo:
+- userdata
+- vendordata
 ```
 
-The resulting RPMs are located in `rpmbuild/$OS/RPMS/noarch/`. The list
-of supported `$OS` platforms are:
+When the above snippet is added to the metadata, the datasource will iterate over the elements in the `cleanup-guestinfo` array and clear each of the keys. For example, the above snippet will cause the following commands to be executed:
+
+```shell
+vmware-rpctool "info-set guestinfo.userdata ---"
+vmware-rpctool "info-set guestinfo.userdata.encoding  "
+vmware-rpctool "info-set guestinfo.vendordata ---"
+vmware-rpctool "info-set guestinfo.vendordata.encoding  "
+```
+
+Please note that keys are set to the valid YAML string `---` as it is not possible remove an existing key from the guestinfo key-space. A key's analogous encoding property will be set to a single white-space character, causing the datasource to treat the actual key value as plain-text, thereby loading it as an empty YAML doc (hence the aforementioned `---`).
+
+## Building the RPM
+
+Building the RPM locally is handled via Docker. Simple execute the following command:
+
+```shell
+make rpm
+```
+
+The resulting RPMs are located in `rpmbuild/$OS/RPMS/noarch/`. The list of supported `$OS` platforms are:
 
 * el7 (RHEL/CentOS 7)
 
 ## Conclusion
-To learn more about how to use cloud-init with CentOS, please see the cloud-init
-[documentation](https://cloudinit.readthedocs.io/en/latest/index.html) for more
-examples and reference information for the cloud-config files.
+
+To learn more about how to use cloud-init with CentOS, please see the cloud-init [documentation](https://cloudinit.readthedocs.io/en/latest/index.html) for more examples and reference information for the cloud-config files.