Merge pull request #25 from akutz/feature/cleanup-userdata
Cleanup guestinfo keys such as userdata
diff --git a/DataSourceVMwareGuestInfo.py b/DataSourceVMwareGuestInfo.py
index 3cf3450..ab392be 100644
--- a/DataSourceVMwareGuestInfo.py
+++ b/DataSourceVMwareGuestInfo.py
@@ -40,6 +40,7 @@
NOVAL = "No value found"
VMWARE_RPCTOOL = find_executable("vmware-rpctool")
VMX_GUESTINFO = "VMX_GUESTINFO"
+GUESTINFO_EMPTY_YAML_VAL = "---"
class NetworkConfigError(Exception):
@@ -106,7 +107,8 @@
that the get_data functions in newer versions of cloud-init do,
such as calling persist_instance_data.
"""
- if not get_data_access_method():
+ data_access_method = get_data_access_method()
+ if not data_access_method:
LOG.error("vmware-rpctool is required to fetch guestinfo value")
return False
@@ -119,6 +121,10 @@
# Get the vendor data.
self.vendordata_raw = guestinfo('vendordata')
+ # Check to see if any of the guestinfo data should be removed.
+ if data_access_method == VMWARE_RPCTOOL:
+ clear_guestinfo_keys(self.metadata['cleanup-guestinfo'])
+
if self.metadata or self.userdata_raw or self.vendordata_raw:
return True
else:
@@ -229,6 +235,39 @@
return raw_data
+def get_none_if_empty_val(val):
+ '''
+ get_none_if_empty_val returns None if the provided value, once stripped
+ of its trailing whitespace, is empty or equal to GUESTINFO_EMPTY_YAML_VAL.
+
+ The return value is always a string, regardless of whether the input is
+ a bytes class or a string.
+ '''
+
+ # If the provided value is a bytes class, convert it to a string to
+ # simplify the rest of this function's logic.
+ if isinstance(val, bytes):
+ val = val.decode()
+
+ val = val.rstrip()
+ if len(val) == 0 or val == GUESTINFO_EMPTY_YAML_VAL:
+ return None
+ return val
+
+
+def handle_returned_guestinfo_val(key, val):
+ '''
+ handle_returned_guestinfo_val returns the provided value if it is
+ not empty or set to GUESTINFO_EMPTY_YAML_VAL, otherwise None is
+ returned
+ '''
+ val = get_none_if_empty_val(val)
+ if val:
+ return val
+ LOG.debug("No value found for key %s", key)
+ return None
+
+
def get_guestinfo_value(key):
'''
Returns a guestinfo value for the specified key.
@@ -239,11 +278,7 @@
if data_access_method == VMX_GUESTINFO:
env_key = ("vmx.guestinfo." + key).upper().replace(".", "_", -1)
- val = os.environ.get(env_key, "")
- if val == "":
- LOG.debug("No value found for key %s", key)
- else:
- return val
+ return handle_returned_guestinfo_val(key, os.environ.get(env_key, ""))
if data_access_method == VMWARE_RPCTOOL:
try:
@@ -254,7 +289,7 @@
elif not stdout:
LOG.error("Failed to get guestinfo value for key %s", key)
else:
- return stdout.rstrip()
+ return handle_returned_guestinfo_val(key, stdout)
except util.ProcessExecutionError as error:
if error.stderr == NOVAL:
LOG.debug("No value found for key %s", key)
@@ -268,6 +303,60 @@
return None
+def set_guestinfo_value(key, value):
+ '''
+ Sets a guestinfo value for the specified key. Set value to an empty string
+ to clear an existing guestinfo key.
+ '''
+
+ # If value is an empty string then set it to a single space as it is not
+ # possible to set a guestinfo key to an empty string. Setting a guestinfo
+ # key to a single space is as close as it gets to clearing an existing
+ # guestinfo key.
+ if value == "":
+ value = " "
+
+ LOG.debug("Setting guestinfo key=%s to value=%s", key, value)
+
+ data_access_method = get_data_access_method()
+
+ if data_access_method == VMX_GUESTINFO:
+ return True
+
+ if data_access_method == VMWARE_RPCTOOL:
+ try:
+ util.subp(
+ [VMWARE_RPCTOOL, ("info-set guestinfo.%s %s" % (key, value))])
+ return True
+ except util.ProcessExecutionError as error:
+ util.logexc(
+ LOG, "Failed to set guestinfo key=%s to value=%s: %s", key, value, error)
+ except Exception:
+ util.logexc(
+ LOG, "Unexpected error while trying to set guestinfo key=%s to value=%s", key, value)
+
+ return None
+
+
+def clear_guestinfo_keys(keys):
+ '''
+ clear_guestinfo_keys clears guestinfo of all of the keys in the given list.
+ each key will have its value set to "---". Since the value is valid YAML,
+ cloud-init can still read it if it tries.
+ '''
+ if not keys:
+ return
+ if not type(keys) in (list, tuple):
+ keys = [keys]
+ for key in keys:
+ LOG.info("clearing guestinfo.%s", key)
+ if not set_guestinfo_value(key, GUESTINFO_EMPTY_YAML_VAL):
+ LOG.error("failed to clear guestinfo.%s", key)
+ LOG.info("clearing guestinfo.%s.encoding", key)
+ if not set_guestinfo_value(key + ".encoding", ""):
+ LOG.error("failed to clear guestinfo.%s.encoding", key)
+
+
def guestinfo(key):
'''
guestinfo returns the guestinfo value for the provided key, decoding
diff --git a/README.md b/README.md
index 73f3698..487f441 100644
--- a/README.md
+++ b/README.md
@@ -1,28 +1,30 @@
# Cloud-Init Datasource for VMware GuestInfo
-This project provides a cloud-init datasource for pulling meta,
-user, and vendor data from VMware vSphere's GuestInfo [interface](https://github.com/vmware/govmomi/blob/master/govc/USAGE.md#vmchange).
+
+This project provides a cloud-init datasource for pulling meta, user, and vendor data from VMware vSphere's GuestInfo [interface](https://github.com/vmware/govmomi/blob/master/govc/USAGE.md#vmchange).
## Installation
+
There are multiple methods of installing the data source.
### Installing on RHEL/CentOS 7
+
There is an RPM available for installing on RedHat/CentOS:
```shell
-$ yum install https://github.com/vmware/cloud-init-vmware-guestinfo/releases/download/v1.1.0/cloud-init-vmware-guestinfo-1.1.0-1.el7.noarch.rpm
+yum install https://github.com/vmware/cloud-init-vmware-guestinfo/releases/download/v1.1.0/cloud-init-vmware-guestinfo-1.1.0-1.el7.noarch.rpm
```
### Installing on other Linux distributions
-The VMware GuestInfo datasource can be installed on any Linux distribution
-where cloud-init is already present. To do so, simply execute the following:
+
+The VMware GuestInfo datasource can be installed on any Linux distribution where cloud-init is already present. To do so, simply execute the following:
```shell
-$ curl -sSL https://raw.githubusercontent.com/vmware/cloud-init-vmware-guestinfo/master/install.sh | sh -
+curl -sSL https://raw.githubusercontent.com/vmware/cloud-init-vmware-guestinfo/master/install.sh | sh -
```
## Configuration
-The data source is configured by setting `guestinfo` properties on a
-VM's `extraconfig` data or a customizable vApp's `properties` data.
+
+The data source is configured by setting `guestinfo` properties on a VM's `extraconfig` data or a customizable vApp's `properties` data.
| Property | Description |
|----------|-------------|
@@ -33,58 +35,31 @@
| `guestinfo.vendordata` | A YAML document containing the cloud-init vendor data. |
| `guestinfo.vendordata.encoding` | The encoding type for `guestinfo.vendordata`. |
-All `guestinfo.*.encoding` property values may be set to `base64` or
-`gzip+base64`.
+All `guestinfo.*.encoding` property values may be set to `base64` or `gzip+base64`.
## Walkthrough
-The following series of steps is a demonstration on how to configure a VM
-with cloud-init and the VMX GuestInfo datasource.
-### Create a network configuration file
-First, create the network configuration for the VM. Save the following
-YAML to a file named `network.config.yaml`:
-
-```yaml
-version: 1
-config:
- - type: physical
- name: ens192
- subnets:
- - type: static
- address: 192.168.1.200
- gateway: 192.168.1.1
- dns_nameservers:
- - 8.8.8.8
- - 8.8.4.4
- dns_search:
- - vmware.ci
-```
-
-See the section on [configuring the network](#configuring-the-network) for
-more information on the network configuration schema.
+The following series of steps is a demonstration on how to configure a VM with cloud-init and the VMX GuestInfo datasource.
### Create a metadata file
-Next, create a JSON file named `metadata.json`:
-```json
-{
- "network": "NETWORK_CONFIG",
- "network.encoding": "gzip+base64",
- "local-hostname": "cloud-vm",
- "instance-id": "cloud-vm"
-}
+First, create the metadata file for the VM. Save the following YAML to a file named `metadata.yaml`:
+
+```yaml
+instance-id: cloud-vm
+local-hostname: cloud-vm
+network:
+ version: 2
+ ethernets:
+ nics:
+ match:
+ name: ens*
+ dhcp4: yes
```
-Please note that in addition to the `network` key in the metadata there
-is also a key named `network.encoding`. This key informs the datasource
-how to decode the `network` data. Valid values for `network.encoding`
-include:
+### Create a userdata file
-* `base64`
-* `gzip+base64`
-
-### Create a cloud-config file
-Finally, create the cloud-config file `cloud-config.yaml`:
+Finally, create the userdata file `userdata.yaml`:
```yaml
#cloud-config
@@ -98,109 +73,116 @@
ssh_import_id: None
lock_passwd: true
ssh_authorized_keys:
- - ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDE0c5FczvcGSh/tG4iw+Fhfi/O5/EvUM/96js65tly4++YTXK1d9jcznPS5ruDlbIZ30oveCBd3kT8LLVFwzh6hepYTf0YmCTpF4eDunyqmpCXDvVscQYRXyasEm5olGmVe05RrCJSeSShAeptv4ueIn40kZKOghinGWLDSZG4+FFfgrmcMCpx5YSCtX2gvnEYZJr0czt4rxOZuuP7PkJKgC/mt2PcPjooeX00vAj81jjU2f3XKrjjz2u2+KIt9eba+vOQ6HiC8c2IzRkUAJ5i1atLy8RIbejo23+0P4N2jjk17QySFOVHwPBDTYb0/0M/4ideeU74EN/CgVsvO6JrLsPBR4dojkV5qNbMNxIVv5cUwIy2ThlLgqpNCeFIDLCWNZEFKlEuNeSQ2mPtIO7ETxEL2Cz5y/7AIuildzYMc6wi2bofRC8HmQ7rMXRWdwLKWsR0L7SKjHblIwarxOGqLnUI+k2E71YoP7SZSlxaKi17pqkr0OMCF+kKqvcvHAQuwGqyumTEWOlH6TCx1dSPrW+pVCZSHSJtSTfDW2uzL6y8k10MT06+pVunSrWo5LHAXcS91htHV1M1UrH/tZKSpjYtjMb5+RonfhaFRNzvj7cCE1f3Kp8UVqAdcGBTtReoE8eRUT63qIxjw03a7VwAyB2w+9cu1R9/vAo8SBeRqw== sakutz@gmail.com
+ - ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDE0c5FczvcGSh/tG4iw+Fhfi/O5/EvUM/96js65tly4++YTXK1d9jcznPS5ruDlbIZ30oveCBd3kT8LLVFwzh6hepYTf0YmCTpF4eDunyqmpCXDvVscQYRXyasEm5olGmVe05RrCJSeSShAeptv4ueIn40kZKOghinGWLDSZG4+FFfgrmcMCpx5YSCtX2gvnEYZJr0czt4rxOZuuP7PkJKgC/mt2PcPjooeX00vAj81jjU2f3XKrjjz2u2+KIt9eba+vOQ6HiC8c2IzRkUAJ5i1atLy8RIbejo23+0P4N2jjk17QySFOVHwPBDTYb0/0M/4ideeU74EN/CgVsvO6JrLsPBR4dojkV5qNbMNxIVv5cUwIy2ThlLgqpNCeFIDLCWNZEFKlEuNeSQ2mPtIO7ETxEL2Cz5y/7AIuildzYMc6wi2bofRC8HmQ7rMXRWdwLKWsR0L7SKjHblIwarxOGqLnUI+k2E71YoP7SZSlxaKi17pqkr0OMCF+kKqvcvHAQuwGqyumTEWOlH6TCx1dSPrW+pVCZSHSJtSTfDW2uzL6y8k10MT06+pVunSrWo5LHAXcS91htHV1M1UrH/tZKSpjYtjMb5+RonfhaFRNzvj7cCE1f3Kp8UVqAdcGBTtReoE8eRUT63qIxjw03a7VwAyB2w+9cu1R9/vAo8SBeRqw== sakutz@gmail.com
```
-### Assigning the cloud-config data to the VM's GuestInfo
-Please note that this step requires that the VM be powered off. All of
-the commands below use the VMware CLI tool,
-[`govc`](https://github.com/vmware/govmomi/blob/master/govc).
+### Assigning the userdate data to the VM's GuestInfo
+
+Please note that this step requires that the VM be powered off. All of the commands below use the VMware CLI tool, [`govc`](https://github.com/vmware/govmomi/blob/master/govc).
Go ahead and assign the path to the VM to the environment variable `VM`:
+
```shell
-$ export VM="/inventory/path/to/the/vm"
+export VM="/inventory/path/to/the/vm"
```
Next, power off the VM:
+
```shell
-$ govc vm.power -off "${VM}"
+govc vm.power -off "${VM}"
```
-Export the environment variables that contain the cloud-init metadata
-and cloud-config:
+Export the environment variables that contain the cloud-init metadata and userdata:
+
```shell
-$ export CLOUD_CONFIG=$(gzip -c9 <cloud-config.yaml | base64)
-$ export METADATA=$(sed 's~NETWORK_CONFIG~'"$(gzip -c9 <network.config.yaml | \
- base64)"'~' <metadata.json | gzip -9 | base64)
+export METADATA=$(gzip -c9 <metadata.yaml | { base64 -w0 2>/dev/null || base64; }) \
+ USERDATA=$(gzip -c9 <userdata.yaml | { base64 -w0 2>/dev/null || base64; })
```
-Assign the metadata and cloud-config to the VM's extra configuration
-dictionary, `guestinfo`:
+Assign the metadata and userdate to the VM's extra configuration dictionary, `guestinfo`:
+
```shell
-$ govc vm.change -vm "${VM}" -e guestinfo.metadata="${METADATA}"
-$ govc vm.change -vm "${VM}" -e guestinfo.metadata.encoding=gzip+base64
-$ govc vm.change -vm "${VM}" -e guestinfo.userdata="${CLOUD_CONFIG}"
-$ govc vm.change -vm "${VM}" -e guestinfo.userdata.encoding=gzip+base64
+govc vm.change -vm "${VM}" \
+ -e guestinfo.metadata="${METADATA}" \
+ -e guestinfo.metadata.encoding="gzip+base64" \
+ -e guestinfo.userdata="${USERDATA}" \
+ -e guestinfo.userdata.encoding="gzip+base64"
```
-Please note the above commands include specifying the encoding for the
-properties. This is important as it informs the datasource how to decode
-the data for cloud-init. Valid values for `metadata.encoding` and
-`userdata.encoding` include:
+Please note the above commands include specifying the encoding for the properties. This is important as it informs the datasource how to decode the data for cloud-init. Valid values for `metadata.encoding` and `userdata.encoding` include:
* `base64`
* `gzip+base64`
### Using the cloud-init VMX GuestInfo datasource
+
Power the VM back on.
+
```shell
-$ govc vm.power -vm "${VM}" -on
+govc vm.power -vm "${VM}" -on
```
If all went according to plan, the CentOS box is:
-* Locked down, allosing SSH access only for the user in the cloud-config
-* Configured for a static IP address, 192.168.1.200
-* Has a hostname of `centos-cloud`
+
+* Locked down, allowing SSH access only for the user in the userdata
+* Configured for a dynamic IP address via DHCP
+* Has a hostname of `cloud-vm`
## Examples
+
This section reviews common configurations:
### Setting the hostname
+
The hostname is set by way of the metadata key `local-hostname`.
### Setting the instance ID
-The instance ID may be set by way of the metadata key `instance-id`.
-However, if this value is absent then then the instance ID is
-read from the file `/sys/class/dmi/id/product_uuid`.
+
+The instance ID may be set by way of the metadata key `instance-id`. However, if this value is absent then then the instance ID is read from the file `/sys/class/dmi/id/product_uuid`.
### Providing public SSH keys
-The public SSH keys may be set by way of the metadata key `public-keys-data`.
-Each newline-terminated string will be interpreted as a separate
-SSH public key, which will be placed in distro's default user's
-`~/.ssh/authorized_keys`. If the value is empty or absent,
-then nothing will be written to `~/.ssh/authorized_keys`.
+
+The public SSH keys may be set by way of the metadata key `public-keys-data`. Each newline-terminated string will be interpreted as a separate SSH public key, which will be placed in distro's default user's `~/.ssh/authorized_keys`. If the value is empty or absent, then nothing will be written to `~/.ssh/authorized_keys`.
### Configuring the network
-The network is configured by setting the metadata key `network`
-with a value consistent with Network Config Versions
-[1](http://bit.ly/cloudinit-net-conf-v1) or
-[2](http://bit.ly/cloudinit-net-conf-v2),
-depending on the Linux distro's version of cloud-init.
-For example, CentOS 7's official cloud-init package is version
-0.7.9 and does not support Network Config Version 2. However,
-this datasource still supports supplying Network Config Version 2
-data as long as the Linux distro's cloud-init package is new
-enough to parse the data.
+The network is configured by setting the metadata key `network` with a value consistent with Network Config Versions [1](http://bit.ly/cloudinit-net-conf-v1) or [2](http://bit.ly/cloudinit-net-conf-v2), depending on the Linux distro's version of cloud-init.
-The metadata key `network.encoding` may be used to indicate the
-format of the metadata key "network". Valid encodings are `base64`
-and `gzip+base64`.
+The metadata key `network.encoding` may be used to indicate the format of the metadata key "network". Valid encodings are `base64` and `gzip+base64`.
-## Building the RPM
-Building the RPM locally is handled via Docker. Simple execute the following
-command:
+### Cleaning up the guestinfo keys
-```shell
-$ make rpm
+Sometimes the cloud-init userdata might contain sensitive information, and it may be desirable to have the `guestinfo.userdata` key (or other guestinfo keys) cleared as soon as its data is read by the datasource. This is possible by adding the following to the metadata:
+
+```yaml
+cleanup-guestinfo:
+- userdata
+- vendordata
```
-The resulting RPMs are located in `rpmbuild/$OS/RPMS/noarch/`. The list
-of supported `$OS` platforms are:
+When the above snippet is added to the metadata, the datasource will iterate over the elements in the `cleanup-guestinfo` array and clear each of the keys. For example, the above snippet will cause the following commands to be executed:
+
+```shell
+vmware-rpctool "info-set guestinfo.userdata ---"
+vmware-rpctool "info-set guestinfo.userdata.encoding "
+vmware-rpctool "info-set guestinfo.vendordata ---"
+vmware-rpctool "info-set guestinfo.vendordata.encoding "
+```
+
+Please note that keys are set to the valid YAML string `---` as it is not possible remove an existing key from the guestinfo key-space. A key's analogous encoding property will be set to a single white-space character, causing the datasource to treat the actual key value as plain-text, thereby loading it as an empty YAML doc (hence the aforementioned `---`).
+
+## Building the RPM
+
+Building the RPM locally is handled via Docker. Simple execute the following command:
+
+```shell
+make rpm
+```
+
+The resulting RPMs are located in `rpmbuild/$OS/RPMS/noarch/`. The list of supported `$OS` platforms are:
* el7 (RHEL/CentOS 7)
## Conclusion
-To learn more about how to use cloud-init with CentOS, please see the cloud-init
-[documentation](https://cloudinit.readthedocs.io/en/latest/index.html) for more
-examples and reference information for the cloud-config files.
+
+To learn more about how to use cloud-init with CentOS, please see the cloud-init [documentation](https://cloudinit.readthedocs.io/en/latest/index.html) for more examples and reference information for the cloud-config files.