%PDF- %PDF-
Direktori : /usr/share/netplan/netplan_cli/cli/ |
Current File : //usr/share/netplan/netplan_cli/cli/sriov.py |
#!/usr/bin/python3 # # Copyright (C) 2020-2022 Canonical, Ltd. # Author: Łukasz 'sil2100' Zemczak <lukasz.zemczak@canonical.com> # Author: Lukas Märdian <slyon@ubuntu.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; version 3. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import json import logging import os import subprocess import typing from typing import Dict, List, Optional, Set from . import utils from ..configmanager import ConfigurationError import netplan import netifaces # PCIDevice class originates from mlnx_switchdev_mode/sriovify.py # Copyright 2019 Canonical Ltd, Apache License, Version 2.0 # https://github.com/openstack-charmers/mlnx-switchdev-mode class PCIDevice(object): """Helper class for interaction with a PCI device""" def __init__(self, pci_addr: str): """Initialise a new PCI device handler :param pci_addr: PCI address of device :type: str """ self.pci_addr = pci_addr @property def sys(self) -> str: """sysfs path (can be overridden for testing) :return: full path to /sys filesystem :rtype: str """ return "/sys" @property def path(self) -> str: """/sys path for PCI device :return: full path to PCI device in /sys filesystem :rtype: str """ return os.path.join(self.sys, "bus/pci/devices", self.pci_addr) def subpath(self, subpath: str) -> str: """/sys subpath helper for PCI device :param subpath: subpath to construct path for :type: str :return: self.path + subpath :rtype: str """ return os.path.join(self.path, subpath) @property def driver(self) -> str: """Kernel driver for PCI device :return: kernel driver in use for device :rtype: str """ driver = '' if os.path.exists(self.subpath("driver")): driver = os.path.basename(os.readlink(self.subpath("driver"))) return driver @property def bound(self) -> bool: """Determine if device is bound to a kernel driver :return: whether device is bound to a kernel driver :rtype: bool """ return os.path.exists(self.subpath("driver")) @property def is_pf(self) -> bool: """Determine if device is a SR-IOV Physical Function :return: whether device is a PF :rtype: bool """ return os.path.exists(self.subpath("sriov_numvfs")) @property def is_vf(self) -> bool: """Determine if device is a SR-IOV Virtual Function :return: whether device is a VF :rtype: bool """ return os.path.exists(self.subpath("physfn")) @property def vf_addrs(self) -> list: """List Virtual Function addresses associated with a Physical Function :return: List of PCI addresses of Virtual Functions :rtype: list[str] """ vf_addrs = [] i = 0 while True: try: vf_addrs.append( os.path.basename( os.readlink(self.subpath("virtfn{}".format(i))) ) ) except FileNotFoundError: break i += 1 return vf_addrs @property def vfs(self) -> list: """List Virtual Function associated with a Physical Function :return: List of PCI devices of Virtual Functions :rtype: list[PCIDevice] """ return [PCIDevice(addr) for addr in self.vf_addrs] def devlink_set(self, obj_name: str, prop: str, value: str): """Set devlink options for the PCI device :param obj_name: devlink object to set options on :type: str :param prop: property to set :type: str :param value: value to set for property :type: str """ subprocess.check_call( [ "/sbin/devlink", "dev", obj_name, "set", "pci/{}".format(self.pci_addr), prop, value, ] ) def devlink_eswitch_mode(self) -> str: """Query eswitch mode via devlink for the PCI device :return: the eswitch mode or '__undetermined' if it can't be retrieved :rtype: str """ pci = f"pci/{self.pci_addr}" try: output = subprocess.check_output( [ "/sbin/devlink", "-j", "dev", "eswitch", "show", pci, ], stderr=subprocess.DEVNULL, ) except subprocess.CalledProcessError: return '__undetermined' json_output = json.loads(output) # The JSON document looks like this when the 'mode' is available: # {"dev":{"pci/0000:03:00.0":{"mode":"switchdev"}}} # and like this when it's not available # {"dev":{}} return json_output.get("dev", {}).get(pci, {}).get('mode', '__undetermined') def __str__(self) -> str: """String represenation of object :return: PCI address of string :rtype: str """ return self.pci_addr def bind_vfs(vfs: typing.Iterable[PCIDevice], driver): """Bind unbound VFs to driver.""" bound_vfs = [] for vf in vfs: if not vf.bound: with open("/sys/bus/pci/drivers/{}/bind".format(driver), "wt") as f: f.write(vf.pci_addr) bound_vfs.append(vf) return bound_vfs def unbind_vfs(vfs: typing.Iterable[PCIDevice], driver) -> typing.Iterable[PCIDevice]: """Unbind bound VFs from driver.""" unbound_vfs = [] for vf in vfs: if vf.bound: with open("/sys/bus/pci/drivers/{}/unbind".format(driver), "wt") as f: f.write(vf.pci_addr) unbound_vfs.append(vf) return unbound_vfs def _interface_matches(netdef: netplan.NetDefinition, interface: str) -> bool: return netdef._match_interface( iface_name=interface, iface_driver=utils.get_interface_driver_name(interface), iface_mac=utils.get_interface_macaddress(interface)) def _get_interface_name_for_netdef(netdef: netplan.NetDefinition) -> Optional[str]: """ Try to match a netdef with the real system network interface. Throws ConfigurationError if there is more than one match. """ interfaces: List[str] = netifaces.interfaces() if netdef._has_match: # now here it's a bit tricky set_name: str = netdef.set_name if set_name and set_name in interfaces: # if we had a match: stanza and set-name: this means we should # assume that, if found, the interface has already been # renamed - use the new name return set_name else: matches: Set[str] = set() # we walk through all the system interfaces to determine if there is # more than one matched interface for interface in interfaces: if not _interface_matches(netdef, interface): continue # we have a matching PF # error out if we matched more than one if len(matches) > 1: raise ConfigurationError('matched more than one interface for a PF device: %s' % netdef.id) matches.add(interface) if matches: return list(matches)[0] else: # no match field, assume entry name is the interface name if netdef.id in interfaces: return netdef.id return None def _get_pci_slot_name(netdev): """ Read PCI slot name for given interface name """ uevent_path = os.path.join('/sys/class/net', netdev, 'device/uevent') try: with open(uevent_path) as f: pci_slot_name = None for line in f.readlines(): line = line.strip() if line.startswith('PCI_SLOT_NAME='): pci_slot_name = line.split('=', 2)[1] return pci_slot_name except IOError as e: raise RuntimeError('failed parsing PCI slot name for %s: %s' % (netdev, str(e))) def _get_physical_functions(np_state: netplan.State) -> Dict[str, str]: """ Go through the list of netplan ethernet devices and identify which are PFs matching them with actual network interfaces. """ pfs = {} for netdef in np_state.ethernets.values(): # If the sriov_link is present, the interface is a VF and link is the PF if link := netdef.links.get('sriov'): if iface := _get_interface_name_for_netdef(np_state[link.id]): pfs[link.id] = iface else: # If a netdef also defines the embedded_switch_mode key we consider it's a PF # This enables us to change the eswitch mode even when the PF has no VFs. if netdef._embedded_switch_mode: if iface := _get_interface_name_for_netdef(netdef): pfs[netdef.id] = iface # If the netdef has any (positive) number of VFs that's because it's a PF try: count = netdef._vf_count except netplan.NetplanException as e: raise ConfigurationError(str(e)) if count > 0: if iface := _get_interface_name_for_netdef(netdef): pfs[netdef.id] = iface return pfs def _get_vf_number_per_pf(np_state: netplan.State) -> Dict[str, int]: """ Go through the list of netplan ethernet devices and identify which ones have VFs. netdef._vf_count ultimately calls _netplan_state_get_vf_count_for_def from libnetplan which return MAX(sriov_explicit_vf_count, number of VF netdefs). """ vf_counts = {} for netdef in np_state.ethernets.values(): try: count = netdef._vf_count except netplan.NetplanException as e: raise ConfigurationError(str(e)) if count > 0: if iface := _get_interface_name_for_netdef(netdef): vf_counts[iface] = count return vf_counts def _get_virtual_functions(np_state: netplan.State) -> Set[str]: """ Go through the list of netplan ethernet devices and identify which ones are virtual functions """ vfs = set() for netdef in np_state.ethernets.values(): # If the sriov_link is present and the PF is also present in the system we save the VF if link := netdef.links.get('sriov'): if _get_interface_name_for_netdef(np_state[link.id]): vfs.add(netdef.id) return vfs def set_numvfs_for_pf(pf, vf_count): """ Allocate the required number of VFs for the selected PF. """ if vf_count > 256: raise ConfigurationError( 'cannot allocate more VFs for PF %s than the SR-IOV maximum: %s > 256' % (pf, vf_count)) devdir = os.path.join('/sys/class/net', pf, 'device') numvfs_path = os.path.join(devdir, 'sriov_numvfs') totalvfs_path = os.path.join(devdir, 'sriov_totalvfs') try: with open(totalvfs_path) as f: vf_max = int(f.read().strip()) except IOError as e: raise RuntimeError('failed parsing sriov_totalvfs for %s: %s' % (pf, str(e))) except ValueError: raise RuntimeError('invalid sriov_totalvfs value for %s' % pf) if vf_count > vf_max: raise ConfigurationError( 'cannot allocate more VFs for PF %s than supported: %s > %s (sriov_totalvfs)' % (pf, vf_count, vf_max)) try: with open(numvfs_path, 'w') as f: f.write(str(vf_count)) except IOError as e: bail = True if e.errno == 16: # device or resource busy logging.warning('device or resource busy while setting sriov_numvfs for %s, trying workaround' % pf) try: # doing this in two open/close sequences so that # it's as close to writing via shell as possible with open(numvfs_path, 'w') as f: f.write('0') with open(numvfs_path, 'w') as f: f.write(str(vf_count)) except IOError as e_inner: e = e_inner else: bail = False if bail: raise RuntimeError('failed setting sriov_numvfs to %s for %s: %s' % (vf_count, pf, str(e))) return True def perform_hardware_specific_quirks(pf): """ Perform any hardware-specific quirks for the given SR-IOV device to make sure all the VF-count changes are applied. """ devdir = os.path.join('/sys/class/net', pf, 'device') try: with open(os.path.join(devdir, 'vendor')) as f: device_id = f.read().strip()[2:] with open(os.path.join(devdir, 'device')) as f: vendor_id = f.read().strip()[2:] except IOError as e: raise RuntimeError('could not determine vendor and device ID of %s: %s' % (pf, str(e))) combined_id = ':'.join([vendor_id, device_id]) quirk_devices = () # TODO: add entries to the list if combined_id in quirk_devices: # pragma: nocover (empty quirk_devices) # some devices need special handling, so this is the place # Currently this part is empty, but has been added as a preemptive # measure, as apparently a lot of SR-IOV cards have issues with # dynamically allocating VFs. Some cards seem to require a full # kernel module reload cycle after changing the sriov_numvfs value # for the changes to come into effect. # Any identified card/vendor can then be special-cased here, if # needed. pass def apply_vlan_filter_for_vf(pf, vf, vlan_name, vlan_id, prefix='/'): """ Apply the hardware VLAN filtering for the selected VF. """ # this is more complicated, because to do this, we actually need to have # the vf index - just knowing the vf interface name is not enough vf_index = None # the prefix argument is here only for unit testing purposes vf_devdir = os.path.join(prefix, 'sys/class/net', vf, 'device') vf_dev_id = os.path.basename(os.readlink(vf_devdir)) pf_devdir = os.path.join(prefix, 'sys/class/net', pf, 'device') for f in os.listdir(pf_devdir): if 'virtfn' in f: dev_path = os.path.join(pf_devdir, f) dev_id = os.path.basename(os.readlink(dev_path)) if dev_id == vf_dev_id: vf_index = f[6:] break if not vf_index: raise RuntimeError( 'could not determine the VF index for %s while configuring vlan %s' % (vf, vlan_name)) # now, create the VLAN filter # TODO: would be best if we did this directl via python, without calling # the iproute tooling try: subprocess.check_call(['ip', 'link', 'set', 'dev', pf, 'vf', vf_index, 'vlan', str(vlan_id)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) except subprocess.CalledProcessError: raise RuntimeError( 'failed setting SR-IOV VLAN filter for vlan %s (ip link set command failed)' % vlan_name) def apply_sriov_config(config_manager, rootdir='/'): """ Go through all interfaces, identify which ones are SR-IOV VFs, create them and perform all other necessary setup. """ config_manager.parse() interfaces = netifaces.interfaces() np_state = config_manager.np_state # for sr-iov devices, we identify VFs by them having a link: field # pointing to an PF. So let's browse through all ethernet devices, # find all that are VFs and count how many of those are linked to # particular PFs, as we need to then set the numvfs for each. vf_counts = _get_vf_number_per_pf(np_state) # we also store all matches between VF/PF netplan entry names and # interface that they're currently matching to vfs_set = _get_virtual_functions(np_state) pfs = _get_physical_functions(np_state) # setup the required number of VFs per PF # at the same time store which PFs got changed in case the NICs # require some special quirks for the VF number to change vf_count_changed = [] if vf_counts: for pf, vf_count in vf_counts.items(): if not set_numvfs_for_pf(pf, vf_count): continue vf_count_changed.append(pf) if vf_count_changed: # some cards need special treatment when we want to change the # number of enabled VFs for pf in vf_count_changed: perform_hardware_specific_quirks(pf) # also, since the VF number changed, the interfaces list also # changed, so we need to refresh it interfaces = netifaces.interfaces() # now in theory we should have all the new VFs set up and existing; # this is needed because we will have to now match the defined VF # entries to existing interfaces, otherwise we won't be able to set # filtered VLANs for those. # XXX: does matching those even make sense? vfs = {} for vf in vfs_set: netdef = np_state[vf] if netdef._has_match: # right now we only match by name, as I don't think matching per # driver and/or macaddress makes sense # TODO: print warning if other matches are provided for interface in interfaces: if netdef._match_interface(iface_name=interface): if vf in vfs and vfs[vf]: raise ConfigurationError('matched more than one interface for a VF device: %s' % vf) vfs[vf] = interface else: if vf in interfaces: vfs[vf] = vf # Walk the SR-IOV PFs and check if we need to change the eswitch mode for netdef_id, iface in pfs.items(): netdef = np_state[netdef_id] eswitch_mode = netdef._embedded_switch_mode if eswitch_mode in ['switchdev', 'legacy']: pci_addr = _get_pci_slot_name(iface) pcidev = PCIDevice(pci_addr) current_eswitch_mode_system = pcidev.devlink_eswitch_mode() if eswitch_mode != current_eswitch_mode_system: if pcidev.is_pf: logging.debug("Found VFs of {}: {}".format(pcidev, pcidev.vf_addrs)) if pcidev.vfs: try: unbind_vfs(pcidev.vfs, pcidev.driver) except Exception as e: logging.warning(f'Unbinding of VFs for {netdef_id} failed: {str(e)}') logging.debug(f'Changing eswitch mode from {current_eswitch_mode_system} to {eswitch_mode} for: {netdef_id}') pcidev.devlink_set('eswitch', 'mode', eswitch_mode) if pcidev.vfs: if not netdef._delay_virtual_functions_rebind: bind_vfs(pcidev.vfs, pcidev.driver) filtered_vlans_set = set() for vlan, netdef in np_state.vlans.items(): # there is a special sriov vlan renderer that one can use to mark # a selected vlan to be done in hardware (VLAN filtering) if netdef._has_sriov_vlan_filter: # this only works for SR-IOV VF interfaces link = netdef.links.get('vlan') vlan_id = netdef._vlan_id vf = vfs.get(link.id) if not vf: # it is possible this is not an error, for instance when # the configuration has been defined 'for the future' # XXX: but maybe we should error out here as well? logging.warning( 'SR-IOV vlan defined for %s but link %s is either not a VF or has no matches' % (vlan, link.id)) continue # get the parent pf interface # first we fetch the related vf netplan entry # and finally, get the matched pf interface pf = pfs.get(link.links.get('sriov').id) if vf in filtered_vlans_set: raise ConfigurationError( 'interface %s for netplan device %s (%s) already has an SR-IOV vlan defined' % (vf, link.id, vlan)) # TODO: make sure that we don't apply the filter twice apply_vlan_filter_for_vf(pf, vf, vlan, vlan_id) filtered_vlans_set.add(vf)