blob: d4ae0c23c64c6a2c69856f9bf3cf394ea5706156 [file] [log] [blame]
akutzdd794a42018-09-18 10:04:21 -05001# Cloud-Init Datasource for VMware Guestinfo
2#
3# Copyright (c) 2018 VMware, Inc. All Rights Reserved.
4#
5# This product is licensed to you under the Apache 2.0 license (the "License").
6# You may not use this product except in compliance with the Apache 2.0 License.
7#
8# This product may include a number of subcomponents with separate copyright
9# notices and license terms. Your use of these subcomponents is subject to the
10# terms and conditions of the subcomponent's license, as noted in the LICENSE
11# file.
akutz77457a62018-08-22 16:07:21 -050012#
akutz6501f902018-08-24 12:19:05 -050013# Authors: Anish Swaminathan <anishs@vmware.com>
14# Andrew Kutz <akutz@vmware.com>
akutz77457a62018-08-22 16:07:21 -050015#
akutz0d1fce52019-06-01 18:54:29 -050016
17'''
18A cloud init datasource for VMware GuestInfo.
19'''
20
akutz77457a62018-08-22 16:07:21 -050021import base64
akutzffc4dd52019-06-02 11:34:55 -050022import collections
akutz0d1fce52019-06-01 18:54:29 -050023from distutils.spawn import find_executable
akutzffc4dd52019-06-02 11:34:55 -050024import json
25import socket
26import zlib
akutz77457a62018-08-22 16:07:21 -050027
28from cloudinit import log as logging
29from cloudinit import sources
30from cloudinit import util
akutz6501f902018-08-24 12:19:05 -050031from cloudinit import safeyaml
akutz77457a62018-08-22 16:07:21 -050032
akutzffc4dd52019-06-02 11:34:55 -050033from deepmerge import always_merger
34import netifaces
35
akutz77457a62018-08-22 16:07:21 -050036LOG = logging.getLogger(__name__)
akutz0d1fce52019-06-01 18:54:29 -050037NOVAL = "No value found"
38VMTOOLSD = find_executable("vmtoolsd")
akutz77457a62018-08-22 16:07:21 -050039
akutz0d1fce52019-06-01 18:54:29 -050040
41class NetworkConfigError(Exception):
42 '''
43 NetworkConfigError is raised when there is an issue getting or
44 applying network configuration.
45 '''
46 pass
47
48
Andrew Kutz4f66b8b2018-09-16 18:28:59 -050049class DataSourceVMwareGuestInfo(sources.DataSource):
akutz0d1fce52019-06-01 18:54:29 -050050 '''
51 This cloud-init datasource was designed for use with CentOS 7,
52 which uses cloud-init 0.7.9. However, this datasource should
53 work with any Linux distribution for which cloud-init is
54 avaialble.
55
56 The documentation for cloud-init 0.7.9's datasource is
57 available at http://bit.ly/cloudinit-datasource-0-7-9. The
58 current documentation for cloud-init is found at
59 https://cloudinit.readthedocs.io/en/latest/.
60
61 Setting the hostname:
62 The hostname is set by way of the metadata key "local-hostname".
63
64 Setting the instance ID:
65 The instance ID may be set by way of the metadata key "instance-id".
66 However, if this value is absent then then the instance ID is
67 read from the file /sys/class/dmi/id/product_uuid.
68
69 Configuring the network:
70 The network is configured by setting the metadata key "network"
71 with a value consistent with Network Config Versions 1 or 2,
72 depending on the Linux distro's version of cloud-init:
73
74 Network Config Version 1 - http://bit.ly/cloudinit-net-conf-v1
75 Network Config Version 2 - http://bit.ly/cloudinit-net-conf-v2
76
77 For example, CentOS 7's official cloud-init package is version
78 0.7.9 and does not support Network Config Version 2. However,
79 this datasource still supports supplying Network Config Version 2
80 data as long as the Linux distro's cloud-init package is new
81 enough to parse the data.
82
83 The metadata key "network.encoding" may be used to indicate the
84 format of the metadata key "network". Valid encodings are base64
85 and gzip+base64.
86 '''
87
88 dsname = 'VMwareGuestInfo'
89
akutz77457a62018-08-22 16:07:21 -050090 def __init__(self, sys_cfg, distro, paths, ud_proc=None):
91 sources.DataSource.__init__(self, sys_cfg, distro, paths, ud_proc)
akutz0d1fce52019-06-01 18:54:29 -050092 if not VMTOOLSD:
akutz77457a62018-08-22 16:07:21 -050093 LOG.error("Failed to find vmtoolsd")
94
95 def get_data(self):
akutz0d1fce52019-06-01 18:54:29 -050096 """
97 This method should really be _get_data in accordance with the most
98 recent versions of cloud-init. However, because the datasource
99 supports as far back as cloud-init 0.7.9, get_data is still used.
100
101 Because of this the method attempts to do some of the same things
102 that the get_data functions in newer versions of cloud-init do,
103 such as calling persist_instance_data.
104 """
105 if not VMTOOLSD:
akutz77457a62018-08-22 16:07:21 -0500106 LOG.error("vmtoolsd is required to fetch guestinfo value")
107 return False
akutz6501f902018-08-24 12:19:05 -0500108
akutz0d1fce52019-06-01 18:54:29 -0500109 # Get the metadata.
110 self.metadata = load_metadata()
akutz6501f902018-08-24 12:19:05 -0500111
akutz0d1fce52019-06-01 18:54:29 -0500112 # Get the user data.
113 self.userdata_raw = guestinfo('userdata')
akutz6501f902018-08-24 12:19:05 -0500114
akutz0d1fce52019-06-01 18:54:29 -0500115 # Get the vendor data.
116 self.vendordata_raw = guestinfo('vendordata')
akutz6501f902018-08-24 12:19:05 -0500117
akutz77457a62018-08-22 16:07:21 -0500118 return True
119
akutz0d1fce52019-06-01 18:54:29 -0500120 def setup(self, is_new_instance):
121 """setup(is_new_instance)
122
123 This is called before user-data and vendor-data have been processed.
124
125 Unless the datasource has set mode to 'local', then networking
126 per 'fallback' or per 'network_config' will have been written and
127 brought up the OS at this point.
128 """
129
akutzffc4dd52019-06-02 11:34:55 -0500130 # Get information about the host.
akutz0d1fce52019-06-01 18:54:29 -0500131 host_info = get_host_info()
132 LOG.info("got host-info: %s", host_info)
akutzffc4dd52019-06-02 11:34:55 -0500133
134 # Ensure the metadata gets updated with information about the
135 # host, including the network interfaces, default IP addresses,
136 # etc.
137 self.metadata = always_merger.merge(self.metadata, host_info)
akutz0d1fce52019-06-01 18:54:29 -0500138
139 # Persist the instance data for versions of cloud-init that support
140 # doing so. This occurs here rather than in the get_data call in
141 # order to ensure that the network interfaces are up and can be
142 # persisted with the metadata.
143 try:
144 self.persist_instance_data()
145 except AttributeError:
146 pass
147
akutz6501f902018-08-24 12:19:05 -0500148 @property
149 def network_config(self):
akutz0d1fce52019-06-01 18:54:29 -0500150 if 'network' in self.metadata:
151 LOG.debug("using metadata network config")
152 else:
153 LOG.debug("using fallback network config")
154 self.metadata['network'] = {
155 'config': self.distro.generate_fallback_config(),
156 }
157 return self.metadata['network']['config']
akutz77457a62018-08-22 16:07:21 -0500158
159 def get_instance_id(self):
akutz6501f902018-08-24 12:19:05 -0500160 # Pull the instance ID out of the metadata if present. Otherwise
161 # read the file /sys/class/dmi/id/product_uuid for the instance ID.
162 if self.metadata and 'instance-id' in self.metadata:
163 return self.metadata['instance-id']
akutz77457a62018-08-22 16:07:21 -0500164 with open('/sys/class/dmi/id/product_uuid', 'r') as id_file:
akutz0d1fce52019-06-01 18:54:29 -0500165 self.metadata['instance-id'] = str(id_file.read()).rstrip()
166 return self.metadata['instance-id']
akutz77457a62018-08-22 16:07:21 -0500167
akutz6501f902018-08-24 12:19:05 -0500168
akutz0d1fce52019-06-01 18:54:29 -0500169def decode(key, enc_type, data):
170 '''
171 decode returns the decoded string value of data
172 key is a string used to identify the data being decoded in log messages
173 ----
174 In py 2.7:
175 json.loads method takes string as input
176 zlib.decompress takes and returns a string
177 base64.b64decode takes and returns a string
178 -----
179 In py 3.6 and newer:
180 json.loads method takes bytes or string as input
181 zlib.decompress takes and returns a bytes
182 base64.b64decode takes bytes or string and returns bytes
183 -----
184 In py > 3, < 3.6:
185 json.loads method takes string as input
186 zlib.decompress takes and returns a bytes
187 base64.b64decode takes bytes or string and returns bytes
188 -----
189 Given the above conditions the output from zlib.decompress and
190 base64.b64decode would be bytes with newer python and str in older
191 version. Thus we would covert the output to str before returning
192 '''
193 LOG.debug("Getting encoded data for key=%s, enc=%s", key, enc_type)
akutz6501f902018-08-24 12:19:05 -0500194
akutz0d1fce52019-06-01 18:54:29 -0500195 raw_data = None
196 if enc_type == "gzip+base64" or enc_type == "gz+b64":
197 LOG.debug("Decoding %s format %s", enc_type, key)
198 raw_data = zlib.decompress(base64.b64decode(data), zlib.MAX_WBITS | 16)
199 elif enc_type == "base64" or enc_type == "b64":
200 LOG.debug("Decoding %s format %s", enc_type, key)
201 raw_data = base64.b64decode(data)
202 else:
203 LOG.debug("Plain-text data %s", key)
204 raw_data = data
Sidharth Surana3a421682018-10-10 15:42:08 -0700205
akutz0d1fce52019-06-01 18:54:29 -0500206 if isinstance(raw_data, bytes):
207 return raw_data.decode('utf-8')
208 return raw_data
209
210
211def get_guestinfo_value(key):
212 '''
213 Returns a guestinfo value for the specified key.
214 '''
215 LOG.debug("Getting guestinfo value for key %s", key)
216 try:
217 (stdout, stderr) = util.subp(
218 [VMTOOLSD, "--cmd", "info-get guestinfo." + key])
219 if stderr == NOVAL:
220 LOG.debug("No value found for key %s", key)
221 elif not stdout:
222 LOG.error("Failed to get guestinfo value for key %s", key)
akutz6501f902018-08-24 12:19:05 -0500223 else:
akutz0d1fce52019-06-01 18:54:29 -0500224 return stdout.rstrip()
225 except util.ProcessExecutionError as error:
226 if error.stderr == NOVAL:
227 LOG.debug("No value found for key %s", key)
228 else:
229 util.logexc(
230 LOG, "Failed to get guestinfo value for key %s: %s", key, error)
231 except Exception:
232 util.logexc(
233 LOG, "Unexpected error while trying to get guestinfo value for key %s", key)
234 return None
akutz6501f902018-08-24 12:19:05 -0500235
akutz0d1fce52019-06-01 18:54:29 -0500236
237def guestinfo(key):
238 '''
239 guestinfo returns the guestinfo value for the provided key, decoding
240 the value when required
241 '''
242 data = get_guestinfo_value(key)
243 if not data:
akutz6501f902018-08-24 12:19:05 -0500244 return None
akutz0d1fce52019-06-01 18:54:29 -0500245 enc_type = get_guestinfo_value(key + '.encoding')
246 return decode('guestinfo.' + key, enc_type, data)
247
248
249def load(data):
250 '''
251 load first attempts to unmarshal the provided data as JSON, and if
252 that fails then attempts to unmarshal the data as YAML. If data is
253 None then a new dictionary is returned.
254 '''
255 if not data:
256 return {}
257 try:
258 return json.loads(data)
259 except:
260 return safeyaml.load(data)
261
262
263def load_metadata():
264 '''
265 load_metadata loads the metadata from the guestinfo data, optionally
266 decoding the network config when required
267 '''
268 data = load(guestinfo('metadata'))
269
270 network = None
271 if 'network' in data:
272 network = data['network']
273 del data['network']
274
275 network_enc = None
276 if 'network.encoding' in data:
277 network_enc = data['network.encoding']
278 del data['network.encoding']
279
280 if network:
281 if not isinstance(network, collections.Mapping):
282 LOG.debug("decoding network data: %s", network)
283 dec_net = decode('metadata.network', network_enc, network)
284 network = load(dec_net)
285 if 'config' not in network:
286 raise NetworkConfigError("missing 'config' key")
287 data['network'] = network
288
289 return data
290
akutz6501f902018-08-24 12:19:05 -0500291
akutz77457a62018-08-22 16:07:21 -0500292def get_datasource_list(depends):
akutz0d1fce52019-06-01 18:54:29 -0500293 '''
akutz77457a62018-08-22 16:07:21 -0500294 Return a list of data sources that match this set of dependencies
akutz0d1fce52019-06-01 18:54:29 -0500295 '''
Andrew Kutz4f66b8b2018-09-16 18:28:59 -0500296 return [DataSourceVMwareGuestInfo]
akutz0d1fce52019-06-01 18:54:29 -0500297
298
akutzffc4dd52019-06-02 11:34:55 -0500299def get_default_ip_addrs():
300 '''
301 Returns the default IPv4 and IPv6 addresses based on the device(s) used for
302 the default route. Please note that None may be returned for either address
303 family if that family has no default route or if there are multiple
304 addresses associated with the device used by the default route for a given
305 address.
306 '''
307 gateways = netifaces.gateways()
308 if 'default' not in gateways:
309 return None, None
310
311 default_gw = gateways['default']
312 if netifaces.AF_INET not in default_gw and netifaces.AF_INET6 not in default_gw:
313 return None, None
314
315 ipv4 = None
316 ipv6 = None
317
318 gw4 = default_gw.get(netifaces.AF_INET)
319 if gw4:
320 _, dev4 = gw4
akutz0b519f72019-06-02 14:58:57 -0500321 addr4_fams = netifaces.ifaddresses(dev4)
322 if addr4_fams:
323 af_inet4 = addr4_fams.get(netifaces.AF_INET)
324 if af_inet4:
325 if len(af_inet4) > 1:
akutzffc4dd52019-06-02 11:34:55 -0500326 LOG.warn(
akutz0b519f72019-06-02 14:58:57 -0500327 "device %s has more than one ipv4 address: %s", dev4, af_inet4)
328 elif 'addr' in af_inet4[0]:
329 ipv4 = af_inet4[0]['addr']
akutzffc4dd52019-06-02 11:34:55 -0500330
331 # Try to get the default IPv6 address by first seeing if there is a default
akutz0b519f72019-06-02 14:58:57 -0500332 # IPv6 route.
akutzffc4dd52019-06-02 11:34:55 -0500333 gw6 = default_gw.get(netifaces.AF_INET6)
334 if gw6:
335 _, dev6 = gw6
336 addr6_fams = netifaces.ifaddresses(dev6)
337 if addr6_fams:
338 af_inet6 = addr6_fams.get(netifaces.AF_INET6)
339 if af_inet6:
340 if len(af_inet6) > 1:
341 LOG.warn(
342 "device %s has more than one ipv6 address: %s", dev6, af_inet6)
343 elif 'addr' in af_inet6[0]:
344 ipv6 = af_inet6[0]['addr']
akutz0b519f72019-06-02 14:58:57 -0500345
346 # If there is a default IPv4 address but not IPv6, then see if there is a
347 # single IPv6 address associated with the same device associated with the
348 # default IPv4 address.
349 if ipv4 and not ipv6:
350 af_inet6 = addr4_fams.get(netifaces.AF_INET6)
akutzffc4dd52019-06-02 11:34:55 -0500351 if af_inet6:
352 if len(af_inet6) > 1:
353 LOG.warn(
354 "device %s has more than one ipv6 address: %s", dev4, af_inet6)
355 elif 'addr' in af_inet6[0]:
356 ipv6 = af_inet6[0]['addr']
357
akutz0b519f72019-06-02 14:58:57 -0500358 # If there is a default IPv6 address but not IPv4, then see if there is a
359 # single IPv4 address associated with the same device associated with the
360 # default IPv6 address.
361 if not ipv4 and ipv6:
362 af_inet4 = addr6_fams.get(netifaces.AF_INET4)
363 if af_inet4:
364 if len(af_inet4) > 1:
365 LOG.warn(
366 "device %s has more than one ipv4 address: %s", dev6, af_inet4)
367 elif 'addr' in af_inet4[0]:
368 ipv4 = af_inet4[0]['addr']
369
akutzffc4dd52019-06-02 11:34:55 -0500370 return ipv4, ipv6
371
372
akutz0d1fce52019-06-01 18:54:29 -0500373def get_host_info():
374 '''
375 Returns host information such as the host name and network interfaces.
376 '''
akutz0d1fce52019-06-01 18:54:29 -0500377
378 host_info = {
379 'network': {
380 'interfaces': {
381 'by-mac': collections.OrderedDict(),
akutzffc4dd52019-06-02 11:34:55 -0500382 'by-ipv4': collections.OrderedDict(),
383 'by-ipv6': collections.OrderedDict(),
akutz0d1fce52019-06-01 18:54:29 -0500384 },
385 },
386 }
387
388 hostname = socket.getfqdn()
389 if hostname:
akutzffc4dd52019-06-02 11:34:55 -0500390 host_info['hostname'] = hostname
akutz0d1fce52019-06-01 18:54:29 -0500391 host_info['local-hostname'] = hostname
392
akutzffc4dd52019-06-02 11:34:55 -0500393 default_ipv4, default_ipv6 = get_default_ip_addrs()
394 if default_ipv4:
395 host_info['local-ipv4'] = default_ipv4
396 if default_ipv6:
397 host_info['local-ipv6'] = default_ipv6
398
akutz0d1fce52019-06-01 18:54:29 -0500399 by_mac = host_info['network']['interfaces']['by-mac']
akutzffc4dd52019-06-02 11:34:55 -0500400 by_ipv4 = host_info['network']['interfaces']['by-ipv4']
401 by_ipv6 = host_info['network']['interfaces']['by-ipv6']
akutz0d1fce52019-06-01 18:54:29 -0500402
403 ifaces = netifaces.interfaces()
404 for dev_name in ifaces:
405 addr_fams = netifaces.ifaddresses(dev_name)
406 af_link = addr_fams.get(netifaces.AF_LINK)
akutz0b519f72019-06-02 14:58:57 -0500407 af_inet4 = addr_fams.get(netifaces.AF_INET)
akutz0d1fce52019-06-01 18:54:29 -0500408 af_inet6 = addr_fams.get(netifaces.AF_INET6)
409
410 mac = None
411 if af_link and 'addr' in af_link[0]:
412 mac = af_link[0]['addr']
413
414 # Do not bother recording localhost
415 if mac == "00:00:00:00:00:00":
416 continue
417
akutz0b519f72019-06-02 14:58:57 -0500418 if mac and (af_inet4 or af_inet6):
akutz0d1fce52019-06-01 18:54:29 -0500419 key = mac
420 val = {}
akutz0b519f72019-06-02 14:58:57 -0500421 if af_inet4:
422 val["ipv4"] = af_inet4
akutz0d1fce52019-06-01 18:54:29 -0500423 if af_inet6:
akutzffc4dd52019-06-02 11:34:55 -0500424 val["ipv6"] = af_inet6
akutz0d1fce52019-06-01 18:54:29 -0500425 by_mac[key] = val
426
akutz0b519f72019-06-02 14:58:57 -0500427 if af_inet4:
428 for ip_info in af_inet4:
akutz0d1fce52019-06-01 18:54:29 -0500429 key = ip_info['addr']
akutzffc4dd52019-06-02 11:34:55 -0500430 if key == '127.0.0.1':
431 continue
akutz0d1fce52019-06-01 18:54:29 -0500432 val = ip_info.copy()
433 del val['addr']
434 if mac:
435 val['mac'] = mac
akutzffc4dd52019-06-02 11:34:55 -0500436 by_ipv4[key] = val
akutz0d1fce52019-06-01 18:54:29 -0500437
438 if af_inet6:
439 for ip_info in af_inet6:
440 key = ip_info['addr']
akutzffc4dd52019-06-02 11:34:55 -0500441 if key == '::1':
442 continue
akutz0d1fce52019-06-01 18:54:29 -0500443 val = ip_info.copy()
444 del val['addr']
445 if mac:
446 val['mac'] = mac
akutzffc4dd52019-06-02 11:34:55 -0500447 by_ipv6[key] = val
akutz0d1fce52019-06-01 18:54:29 -0500448
449 return host_info
450
451
akutzffc4dd52019-06-02 11:34:55 -0500452def main():
453 '''
454 Executed when this file is used as a program.
455 '''
456 metadata = {'network': {'config': {'dhcp': True}}}
457 host_info = get_host_info()
458 metadata = always_merger.merge(metadata, host_info)
459 print(util.json_dumps(metadata))
460
461
akutz0d1fce52019-06-01 18:54:29 -0500462if __name__ == "__main__":
akutzffc4dd52019-06-02 11:34:55 -0500463 main()
akutz0d1fce52019-06-01 18:54:29 -0500464
465# vi: ts=4 expandtab