kvm: Refactory VXLAN script and add IPv6 support (#3070)

* vxlan: Code indentation and styling fixes

This script was using TAB instead of 4 spaces and had many blank
lines containing whitespace.

This commit also fixes some Bash styling, but it does not touch the
functionality of the script.

Signed-off-by: Wido den Hollander <wido@widodh.nl>

* vxlan: Improve Bash if-statement logic

Bash suggest using double brackets instead of single brackets in
if-statement test logic

Signed-off-by: Wido den Hollander <wido@widodh.nl>

* vxlan: Disable IPv6 on bridge and VXLAN devices

They are only transport devices and should not be interacting
in the IPv6 traffic.

If IPv6 is enabled Instances can connect to the Hypervisor over
Link-Local IPv6 which is a potential security issue.

By disabling IPv6 on the Bridge and VXLAN device they still forward
Layer 2 packets as intended, but they do not respond on anything.

IPv4 and IPv6 traffic towards the Instances is untouched and works
as before.

Signed-off-by: Wido den Hollander <wido@widodh.nl>

* vxlan: Refactor modifyvxlan.sh for KVM by using only iproute2

This commit refactors the modifyvxlan.sh script by using only iproute2,
the 'ip' command for all functions.

brctl is deprecated and most bridge functionality can be performed with
the 'ip' command.

This commit also fixes various Bash coding fixes and removes a lot of exit
status checking which was redundant.

In addition it add IPv6 underlay for VXLAN transport. If the caller (KVM Agent)
adds the '-6' flag it will generate IPv6 multicast groups and routes which will
transport the VXLAN encapsulated packaes over IPv6 multicast groups.

Signed-off-by: Wido den Hollander <wido@widodh.nl>
This commit is contained in:
Wido den Hollander 2019-01-09 13:21:07 +01:00 committed by Gabriel Beims Bräscher
parent 7b0ff7f0c7
commit d3e95b98fc

View File

@ -16,180 +16,92 @@
# specific language governing permissions and limitations
# under the License.
# modifyvxlan.sh -- adds and deletes VXLANs from a Routing Server
# set -x
## TODO(VXLAN): MTU, IPv6 underlying
# modifyvxlan.sh -- Managed VXLAN devices and Bridges on Linux KVM hypervisor
usage() {
printf "Usage: %s: -o <op>(add | delete) -v <vxlan id> -p <pif> -b <bridge name>\n"
echo "Usage: $0: -o <op>(add | delete) -v <vxlan id> -p <pif> -b <bridge name> (-6)"
}
multicastGroup() {
local VNI=$1
local FAMILY=$2
if [[ -z "$FAMILY" || $FAMILY == "inet" ]]; then
echo "239.$(( (${VNI} >> 16) % 256 )).$(( (${VNI} >> 8) % 256 )).$(( ${VNI} % 256 ))"
fi
if [[ "$FAMILY" == "inet6" ]]; then
echo "ff05::$(( (${VNI} >> 16) % 256 )):$(( (${VNI} >> 8) % 256 )):$(( ${VNI} % 256 ))"
fi
}
addVxlan() {
local vxlanId=$1
local pif=$2
local vxlanDev=vxlan$vxlanId
local vxlanBr=$3
local mcastGrp="239.$(( ($vxlanId >> 16) % 256 )).$(( ($vxlanId >> 8) % 256 )).$(( $vxlanId % 256 ))"
local VNI=$1
local PIF=$2
local VXLAN_BR=$3
local FAMILY=$4
local VXLAN_DEV=vxlan${VNI}
local GROUP=$(multicastGroup ${VNI} ${FAMILY})
## TODO(VXLAN): $brif (trafficlabel) should be passed from caller because we cannot assume 1:1 mapping between pif and brif.
# lookup bridge interface
local sysfs_dir=/sys/devices/virtual/net/
local brif=`find ${sysfs_dir}*/brif/ -name $pif | sed -e "s,$sysfs_dir,," | sed -e 's,/brif/.*$,,'`
echo "multicast ${GROUP} for VNI ${VNI} on ${PIF}"
if [ "$brif " == " " ]
then
if [ -d "/sys/class/net/${pif}" ]
then
# if bridge is not found, but matches a pif, use it
brif=$pif
else
printf "Failed to lookup bridge interface which includes pif: $pif."
return 1
fi
else
# confirm ip address of $brif
ip addr show $brif | grep -w inet
if [ $? -gt 0 ]
then
printf "Failed to find vxlan multicast source ip address on brif: $brif."
return 1
fi
if [[ ! -d /sys/class/net/${VXLAN_DEV} ]]; then
ip -f ${FAMILY} link add ${VXLAN_DEV} type vxlan id ${VNI} group ${GROUP} ttl 10 dev ${PIF}
ip link set ${VXLAN_DEV} up
ip -f ${FAMILY} route add ${GROUP} dev ${PIF}
sysctl -qw net.ipv6.conf.${VXLAN_DEV}.disable_ipv6=1
fi
# mcast route
## TODO(VXLAN): Can we assume there're only one IP address which can be multicast src IP on the IF?
ip route get $mcastGrp | grep -w "dev $brif"
if [ $? -gt 0 ]
then
ip route add $mcastGrp/32 dev $brif
if [ $? -gt 0 ]
then
printf "Failed to add vxlan multicast route on brif: $brif."
return 1
fi
if [[ ! -d /sys/class/net/$VXLAN_BR ]]; then
ip link add name ${VXLAN_BR} type bridge
ip link set ${VXLAN_BR} up
sysctl -qw net.ipv6.conf.${VXLAN_BR}.disable_ipv6=1
fi
if [ ! -d /sys/class/net/$vxlanDev ]
then
ip link add $vxlanDev type vxlan id $vxlanId group $mcastGrp ttl 10 dev $brif
if [ $? -gt 0 ]
then
# race condition that someone already creates the vxlan
if [ ! -d /sys/class/net/$vxlanDev ]
then
printf "Failed to create vxlan $vxlanId on brif: $brif."
return 1
bridge link show|grep ${VXLAN_BR}|awk '{print $2}'|grep "^${VXLAN_DEV}\$" > /dev/null
if [[ $? -gt 0 ]]; then
ip link set ${VXLAN_DEV} master ${VXLAN_BR}
fi
fi
fi
# is up?
ip link show $vxlanDev | grep -w UP > /dev/null
if [ $? -gt 0 ]
then
ip link set $vxlanDev up > /dev/null
fi
if [ ! -d /sys/class/net/$vxlanBr ]
then
brctl addbr $vxlanBr > /dev/null
if [ $? -gt 0 ]
then
if [ ! -d /sys/class/net/$vxlanBr ]
then
printf "Failed to create br: $vxlanBr"
return 2
fi
fi
brctl setfd $vxlanBr 0
fi
#pif is eslaved into vxlanBr?
ls /sys/class/net/$vxlanBr/brif/ | grep -w "$vxlanDev" > /dev/null
if [ $? -gt 0 ]
then
brctl addif $vxlanBr $vxlanDev > /dev/null
if [ $? -gt 0 ]
then
ls /sys/class/net/$vxlanBr/brif/ | grep -w "$vxlanDev" > /dev/null
if [ $? -gt 0 ]
then
printf "Failed to add vxlan: $vxlanDev to $vxlanBr"
return 3
fi
fi
fi
# is vxlanBr up?
ip link show $vxlanBr | grep -w UP > /dev/null
if [ $? -gt 0 ]
then
ip link set $vxlanBr up
fi
return 0
}
deleteVxlan() {
local vxlanId=$1
local pif=$2
local vxlanDev=vxlan$vxlanId
local vxlanBr=$3
local mcastGrp="239.$(( ($vxlanId >> 16) % 256 )).$(( ($vxlanId >> 8) % 256 )).$(( $vxlanId % 256 ))"
local VNI=$1
local PIF=$2
local VXLAN_BR=$3
local FAMILY=$4
local VXLAN_DEV=vxlan${VNI}
local GROUP=$(multicastGroup ${VNI} ${FAMILY})
local sysfs_dir=/sys/devices/virtual/net/
local brif=`find ${sysfs_dir}*/brif/ -name $pif | sed -e "s,$sysfs_dir,," | sed -e 's,/brif/.*$,,'`
ip -f ${FAMILY} route del ${GROUP} dev ${PIF}
ip route del $mcastGrp/32 dev $brif
ip link set ${VXLAN_DEV} nomaster
ip link delete ${VXLAN_DEV}
ip link delete $vxlanDev
if [ $? -gt 0 ]
then
printf "Failed to del vxlan: $vxlanId"
printf "Continue..."
fi
ip link set $vxlanBr down
if [ $? -gt 0 ]
then
return 1
fi
brctl delbr $vxlanBr
if [ $? -gt 0 ]
then
printf "Failed to del bridge $vxlanBr"
return 1
fi
return 0
ip link set ${VXLAN_BR} down
ip link delete ${VXLAN_BR} type bridge
}
op=
vxlanId=
OP=
VNI=
FAMILY=inet
option=$@
while getopts 'o:v:p:b:' OPTION
while getopts 'o:v:p:b:6' OPTION
do
case $OPTION in
o) oflag=1
op="$OPTARG"
OP="$OPTARG"
;;
v) vflag=1
vxlanId="$OPTARG"
VNI="$OPTARG"
;;
p) pflag=1
pif="$OPTARG"
PIF="$OPTARG"
;;
b) bflag=1
brName="$OPTARG"
BRNAME="$OPTARG"
;;
6)
FAMILY=inet6
;;
?) usage
exit 2
@ -197,43 +109,37 @@ do
esac
done
# Check that all arguments were passed in
if [ "$oflag$vflag$pflag$bflag" != "1111" ]
then
if [[ "$oflag$vflag$pflag$bflag" != "1111" ]]; then
usage
exit 2
fi
# Do we support Vxlan?
lsmod|grep ^vxlan >& /dev/null
if [ $? -gt 0 ]
then
if [[ $? -gt 0 ]]; then
modprobe=`modprobe vxlan 2>&1`
if [ $? -gt 0 ]
then
printf "Failed to load vxlan kernel module: $modprobe"
if [[ $? -gt 0 ]]; then
echo "Failed to load vxlan kernel module: $modprobe"
exit 1
fi
fi
if [ "$op" == "add" ]
then
# Add the vxlan
addVxlan $vxlanId $pif $brName
# If the add fails then return failure
if [ $? -gt 0 ]
then
#
# Add a lockfile to prevent this script from running twice on the same host
# this can cause a race condition
#
LOCKFILE=/var/run/cloud/vxlan.lock
(
flock -x -w 10 200 || exit 1
if [[ "$OP" == "add" ]]; then
addVxlan ${VNI} ${PIF} ${BRNAME} ${FAMILY}
if [[ $? -gt 0 ]]; then
exit 1
fi
else
if [ "$op" == "delete" ]
then
# Delete the vxlan
deleteVxlan $vxlanId $pif $brName
# Always exit with success
exit 0
elif [[ "$OP" == "delete" ]]; then
deleteVxlan ${VNI} ${PIF} ${BRNAME} ${FAMILY}
fi
fi
) 200>${LOCKFILE}