Support persisted metadata
This patch supports writing persisted metadata that includes network
information. The datasource collects information about the host,
including the network interfaces, and stores the information in the
persisted metadata.
diff --git a/DataSourceVMwareGuestInfo.py b/DataSourceVMwareGuestInfo.py
index b4d2fc0..3a3b884 100644
--- a/DataSourceVMwareGuestInfo.py
+++ b/DataSourceVMwareGuestInfo.py
@@ -1,5 +1,3 @@
-# vi: ts=4 expandtab
-#
# Cloud-Init Datasource for VMware Guestinfo
#
# Copyright (c) 2018 VMware, Inc. All Rights Reserved.
@@ -15,95 +13,150 @@
# Authors: Anish Swaminathan <anishs@vmware.com>
# Andrew Kutz <akutz@vmware.com>
#
-import os
+
+'''
+A cloud init datasource for VMware GuestInfo.
+'''
+
+import collections
import base64
import zlib
import json
+from distutils.spawn import find_executable
from cloudinit import log as logging
from cloudinit import sources
from cloudinit import util
from cloudinit import safeyaml
-from distutils.spawn import find_executable
-
LOG = logging.getLogger(__name__)
+NOVAL = "No value found"
+VMTOOLSD = find_executable("vmtoolsd")
-# This cloud-init datasource was designed for use with CentOS 7,
-# which uses cloud-init 0.7.9. However, this datasource should
-# work with any Linux distribution for which cloud-init is
-# avaialble.
-#
-# The documentation for cloud-init 0.7.9's datasource is
-# available at http://bit.ly/cloudinit-datasource-0-7-9. The
-# current documentation for cloud-init is found at
-# https://cloudinit.readthedocs.io/en/latest/.
-#
-# Setting the hostname:
-# The hostname is set by way of the metadata key "local-hostname".
-#
-# Setting the instance ID:
-# The instance ID may be set by way of the metadata key "instance-id".
-# However, if this value is absent then then the instance ID is
-# read from the file /sys/class/dmi/id/product_uuid.
-#
-# Configuring the network:
-# The network is configured by setting the metadata key "network"
-# with a value consistent with Network Config Versions 1 or 2,
-# depending on the Linux distro's version of cloud-init:
-#
-# Network Config Version 1 - http://bit.ly/cloudinit-net-conf-v1
-# Network Config Version 2 - http://bit.ly/cloudinit-net-conf-v2
-#
-# For example, CentOS 7's official cloud-init package is version
-# 0.7.9 and does not support Network Config Version 2. However,
-# this datasource still supports supplying Network Config Version 2
-# data as long as the Linux distro's cloud-init package is new
-# enough to parse the data.
-#
-# The metadata key "network.encoding" may be used to indicate the
-# format of the metadata key "network". Valid encodings are base64
-# and gzip+base64.
+
+class NetworkConfigError(Exception):
+ '''
+ NetworkConfigError is raised when there is an issue getting or
+ applying network configuration.
+ '''
+ pass
+
+
class DataSourceVMwareGuestInfo(sources.DataSource):
+ '''
+ This cloud-init datasource was designed for use with CentOS 7,
+ which uses cloud-init 0.7.9. However, this datasource should
+ work with any Linux distribution for which cloud-init is
+ avaialble.
+
+ The documentation for cloud-init 0.7.9's datasource is
+ available at http://bit.ly/cloudinit-datasource-0-7-9. The
+ current documentation for cloud-init is found at
+ https://cloudinit.readthedocs.io/en/latest/.
+
+ Setting the hostname:
+ The hostname is set by way of the metadata key "local-hostname".
+
+ Setting the instance ID:
+ The instance ID may be set by way of the metadata key "instance-id".
+ However, if this value is absent then then the instance ID is
+ read from the file /sys/class/dmi/id/product_uuid.
+
+ Configuring the network:
+ The network is configured by setting the metadata key "network"
+ with a value consistent with Network Config Versions 1 or 2,
+ depending on the Linux distro's version of cloud-init:
+
+ Network Config Version 1 - http://bit.ly/cloudinit-net-conf-v1
+ Network Config Version 2 - http://bit.ly/cloudinit-net-conf-v2
+
+ For example, CentOS 7's official cloud-init package is version
+ 0.7.9 and does not support Network Config Version 2. However,
+ this datasource still supports supplying Network Config Version 2
+ data as long as the Linux distro's cloud-init package is new
+ enough to parse the data.
+
+ The metadata key "network.encoding" may be used to indicate the
+ format of the metadata key "network". Valid encodings are base64
+ and gzip+base64.
+ '''
+
+ dsname = 'VMwareGuestInfo'
+
def __init__(self, sys_cfg, distro, paths, ud_proc=None):
sources.DataSource.__init__(self, sys_cfg, distro, paths, ud_proc)
- self.vmtoolsd = find_executable("vmtoolsd")
- if not self.vmtoolsd:
+ if not VMTOOLSD:
LOG.error("Failed to find vmtoolsd")
def get_data(self):
- if not self.vmtoolsd:
+ """
+ This method should really be _get_data in accordance with the most
+ recent versions of cloud-init. However, because the datasource
+ supports as far back as cloud-init 0.7.9, get_data is still used.
+
+ Because of this the method attempts to do some of the same things
+ that the get_data functions in newer versions of cloud-init do,
+ such as calling persist_instance_data.
+ """
+ if not VMTOOLSD:
LOG.error("vmtoolsd is required to fetch guestinfo value")
return False
- # Get the JSON metadata. Can be plain-text, base64, or gzip+base64.
- metadata = self._get_encoded_guestinfo_data('metadata')
- if metadata:
- try:
- self.metadata = json.loads(metadata)
- except:
- self.metadata = safeyaml.load(metadata)
+ # Get the metadata.
+ self.metadata = load_metadata()
- # Get the YAML userdata. Can be plain-text, base64, or gzip+base64.
- self.userdata_raw = self._get_encoded_guestinfo_data('userdata')
+ # Get the user data.
+ self.userdata_raw = guestinfo('userdata')
- # Get the YAML vendordata. Can be plain-text, base64, or gzip+base64.
- self.vendordata_raw = self._get_encoded_guestinfo_data('vendordata')
+ # Get the vendor data.
+ self.vendordata_raw = guestinfo('vendordata')
return True
+ def setup(self, is_new_instance):
+ """setup(is_new_instance)
+
+ This is called before user-data and vendor-data have been processed.
+
+ Unless the datasource has set mode to 'local', then networking
+ per 'fallback' or per 'network_config' will have been written and
+ brought up the OS at this point.
+ """
+
+ # Set the hostname.
+ hostname = self.metadata.get('local-hostname')
+ if hostname:
+ self.distro.set_hostname(hostname)
+ LOG.info("set hostname %s", hostname)
+
+ # Update the metadata with the actual host name and actual network
+ # interface information.
+ host_info = get_host_info()
+ LOG.info("got host-info: %s", host_info)
+ hostname = host_info.get('local-hostname', hostname)
+ self.metadata['local-hostname'] = hostname
+ interfaces = host_info['network']['interfaces']
+ self.metadata['network']['interfaces'] = interfaces
+
+ # Persist the instance data for versions of cloud-init that support
+ # doing so. This occurs here rather than in the get_data call in
+ # order to ensure that the network interfaces are up and can be
+ # persisted with the metadata.
+ try:
+ self.persist_instance_data()
+ except AttributeError:
+ pass
+
@property
def network_config(self):
- # Pull the network configuration out of the metadata.
- if self.metadata and 'network' in self.metadata:
- data = self._get_encoded_metadata('network')
- if data:
- # Load the YAML-formatted network data into an object
- # and return it.
- net_config = safeyaml.load(data)
- LOG.debug("Loaded network config: %s", net_config)
- return net_config
- return None
+ if 'network' in self.metadata:
+ LOG.debug("using metadata network config")
+ else:
+ LOG.debug("using fallback network config")
+ self.metadata['network'] = {
+ 'config': self.distro.generate_fallback_config(),
+ }
+ return self.metadata['network']['config']
def get_instance_id(self):
# Pull the instance ID out of the metadata if present. Otherwise
@@ -111,84 +164,211 @@
if self.metadata and 'instance-id' in self.metadata:
return self.metadata['instance-id']
with open('/sys/class/dmi/id/product_uuid', 'r') as id_file:
- return str(id_file.read()).rstrip()
+ self.metadata['instance-id'] = str(id_file.read()).rstrip()
+ return self.metadata['instance-id']
- def _get_encoded_guestinfo_data(self, key):
- data = self._get_guestinfo_value(key)
- if not data:
- return None
- enc_type = self._get_guestinfo_value(key + '.encoding')
- return self._get_encoded_data('guestinfo.' + key, enc_type, data)
- def _get_encoded_metadata(self, key):
- if not self.metadata or not key in self.metadata:
- return None
- data = self.metadata[key]
- enc_type = self.metadata.get(key + '.encoding')
- return self._get_encoded_data('metadata.' + key, enc_type, data)
+def decode(key, enc_type, data):
+ '''
+ decode returns the decoded string value of data
+ key is a string used to identify the data being decoded in log messages
+ ----
+ In py 2.7:
+ json.loads method takes string as input
+ zlib.decompress takes and returns a string
+ base64.b64decode takes and returns a string
+ -----
+ In py 3.6 and newer:
+ json.loads method takes bytes or string as input
+ zlib.decompress takes and returns a bytes
+ base64.b64decode takes bytes or string and returns bytes
+ -----
+ In py > 3, < 3.6:
+ json.loads method takes string as input
+ zlib.decompress takes and returns a bytes
+ base64.b64decode takes bytes or string and returns bytes
+ -----
+ Given the above conditions the output from zlib.decompress and
+ base64.b64decode would be bytes with newer python and str in older
+ version. Thus we would covert the output to str before returning
+ '''
+ LOG.debug("Getting encoded data for key=%s, enc=%s", key, enc_type)
- def _get_encoded_data(self, key, enc_type, data):
- '''
- The _get_encoded_data would always return a str
- ----
- In py 2.7:
- json.loads method takes string as input
- zlib.decompress takes and returns a string
- base64.b64decode takes and returns a string
- -----
- In py 3.6 and newer:
- json.loads method takes bytes or string as input
- zlib.decompress takes and returns a bytes
- base64.b64decode takes bytes or string and returns bytes
- -----
- In py > 3, < 3.6:
- json.loads method takes string as input
- zlib.decompress takes and returns a bytes
- base64.b64decode takes bytes or string and returns bytes
- -----
- Given the above conditions the output from zlib.decompress and
- base64.b64decode would be bytes with newer python and str in older
- version. Thus we would covert the output to str before returning
- '''
- rawdata = self._get_encoded_data_raw(key, enc_type, data)
- if type(rawdata) == bytes:
- return rawdata.decode('utf-8')
- return rawdata
+ raw_data = None
+ if enc_type == "gzip+base64" or enc_type == "gz+b64":
+ LOG.debug("Decoding %s format %s", enc_type, key)
+ raw_data = zlib.decompress(base64.b64decode(data), zlib.MAX_WBITS | 16)
+ elif enc_type == "base64" or enc_type == "b64":
+ LOG.debug("Decoding %s format %s", enc_type, key)
+ raw_data = base64.b64decode(data)
+ else:
+ LOG.debug("Plain-text data %s", key)
+ raw_data = data
- def _get_encoded_data_raw(self, key, enc_type, data):
- LOG.debug("Getting encoded data for key=%s, enc=%s", key, enc_type)
- if enc_type == "gzip+base64" or enc_type == "gz+b64":
- LOG.debug("Decoding %s format %s", enc_type, key)
- return zlib.decompress(base64.b64decode(data), zlib.MAX_WBITS | 16)
- elif enc_type == "base64" or enc_type == "b64":
- LOG.debug("Decoding %s format %s", enc_type, key)
- return base64.b64decode(data)
+ if isinstance(raw_data, bytes):
+ return raw_data.decode('utf-8')
+ return raw_data
+
+
+def get_guestinfo_value(key):
+ '''
+ Returns a guestinfo value for the specified key.
+ '''
+ LOG.debug("Getting guestinfo value for key %s", key)
+ try:
+ (stdout, stderr) = util.subp(
+ [VMTOOLSD, "--cmd", "info-get guestinfo." + key])
+ if stderr == NOVAL:
+ LOG.debug("No value found for key %s", key)
+ elif not stdout:
+ LOG.error("Failed to get guestinfo value for key %s", key)
else:
- LOG.debug("Plain-text data %s", key)
- return data
+ return stdout.rstrip()
+ except util.ProcessExecutionError as error:
+ if error.stderr == NOVAL:
+ LOG.debug("No value found for key %s", key)
+ else:
+ util.logexc(
+ LOG, "Failed to get guestinfo value for key %s: %s", key, error)
+ except Exception:
+ util.logexc(
+ LOG, "Unexpected error while trying to get guestinfo value for key %s", key)
+ return None
- def _get_guestinfo_value(self, key):
- NOVAL = "No value found"
- LOG.debug("Getting guestinfo value for key %s", key)
- try:
- (stdout, stderr) = util.subp([self.vmtoolsd, "--cmd", "info-get guestinfo." + key])
- if stderr == NOVAL:
- LOG.debug("No value found for key %s", key)
- elif not stdout:
- LOG.error("Failed to get guestinfo value for key %s", key)
- else:
- return stdout.rstrip()
- except util.ProcessExecutionError as error:
- if error.stderr == NOVAL:
- LOG.debug("No value found for key %s", key)
- else:
- util.logexc(LOG,"Failed to get guestinfo value for key %s: %s", key, error)
- except Exception:
- util.logexc(LOG,"Unexpected error while trying to get guestinfo value for key %s", key)
+
+def guestinfo(key):
+ '''
+ guestinfo returns the guestinfo value for the provided key, decoding
+ the value when required
+ '''
+ data = get_guestinfo_value(key)
+ if not data:
return None
+ enc_type = get_guestinfo_value(key + '.encoding')
+ return decode('guestinfo.' + key, enc_type, data)
+
+
+def load(data):
+ '''
+ load first attempts to unmarshal the provided data as JSON, and if
+ that fails then attempts to unmarshal the data as YAML. If data is
+ None then a new dictionary is returned.
+ '''
+ if not data:
+ return {}
+ try:
+ return json.loads(data)
+ except:
+ return safeyaml.load(data)
+
+
+def load_metadata():
+ '''
+ load_metadata loads the metadata from the guestinfo data, optionally
+ decoding the network config when required
+ '''
+ data = load(guestinfo('metadata'))
+
+ network = None
+ if 'network' in data:
+ network = data['network']
+ del data['network']
+
+ network_enc = None
+ if 'network.encoding' in data:
+ network_enc = data['network.encoding']
+ del data['network.encoding']
+
+ if network:
+ if not isinstance(network, collections.Mapping):
+ LOG.debug("decoding network data: %s", network)
+ dec_net = decode('metadata.network', network_enc, network)
+ network = load(dec_net)
+ if 'config' not in network:
+ raise NetworkConfigError("missing 'config' key")
+ data['network'] = network
+
+ return data
+
def get_datasource_list(depends):
- """
+ '''
Return a list of data sources that match this set of dependencies
- """
+ '''
return [DataSourceVMwareGuestInfo]
+
+
+def get_host_info():
+ '''
+ Returns host information such as the host name and network interfaces.
+ '''
+ import netifaces
+ import socket
+
+ host_info = {
+ 'network': {
+ 'interfaces': {
+ 'by-mac': collections.OrderedDict(),
+ 'by-ip4': collections.OrderedDict(),
+ 'by-ip6': collections.OrderedDict(),
+ },
+ },
+ }
+
+ hostname = socket.getfqdn()
+ if hostname:
+ host_info['local-hostname'] = hostname
+
+ by_mac = host_info['network']['interfaces']['by-mac']
+ by_ip4 = host_info['network']['interfaces']['by-ip4']
+ by_ip6 = host_info['network']['interfaces']['by-ip6']
+
+ ifaces = netifaces.interfaces()
+ for dev_name in ifaces:
+ addr_fams = netifaces.ifaddresses(dev_name)
+ af_link = addr_fams.get(netifaces.AF_LINK)
+ af_inet = addr_fams.get(netifaces.AF_INET)
+ af_inet6 = addr_fams.get(netifaces.AF_INET6)
+
+ mac = None
+ if af_link and 'addr' in af_link[0]:
+ mac = af_link[0]['addr']
+
+ # Do not bother recording localhost
+ if mac == "00:00:00:00:00:00":
+ continue
+
+ if mac and (af_inet or af_inet6):
+ key = mac
+ val = {}
+ if af_inet:
+ val["ip4"] = af_inet
+ if af_inet6:
+ val["ip6"] = af_inet6
+ by_mac[key] = val
+
+ if af_inet:
+ for ip_info in af_inet:
+ key = ip_info['addr']
+ val = ip_info.copy()
+ del val['addr']
+ if mac:
+ val['mac'] = mac
+ by_ip4[key] = val
+
+ if af_inet6:
+ for ip_info in af_inet6:
+ key = ip_info['addr']
+ val = ip_info.copy()
+ del val['addr']
+ if mac:
+ val['mac'] = mac
+ by_ip6[key] = val
+
+ return host_info
+
+
+if __name__ == "__main__":
+ print util.json_dumps(get_host_info())
+
+# vi: ts=4 expandtab