kvm: Refactory VXLAN script and add IPv6 support (#3070)

* vxlan: Code indentation and styling fixes

This script was using TAB instead of 4 spaces and had many blank
lines containing whitespace.

This commit also fixes some Bash styling, but it does not touch the
functionality of the script.

Signed-off-by: Wido den Hollander <wido@widodh.nl>

* vxlan: Improve Bash if-statement logic

Bash suggest using double brackets instead of single brackets in
if-statement test logic

Signed-off-by: Wido den Hollander <wido@widodh.nl>

* vxlan: Disable IPv6 on bridge and VXLAN devices

They are only transport devices and should not be interacting
in the IPv6 traffic.

If IPv6 is enabled Instances can connect to the Hypervisor over
Link-Local IPv6 which is a potential security issue.

By disabling IPv6 on the Bridge and VXLAN device they still forward
Layer 2 packets as intended, but they do not respond on anything.

IPv4 and IPv6 traffic towards the Instances is untouched and works
as before.

Signed-off-by: Wido den Hollander <wido@widodh.nl>

* vxlan: Refactor modifyvxlan.sh for KVM by using only iproute2

This commit refactors the modifyvxlan.sh script by using only iproute2,
the 'ip' command for all functions.

brctl is deprecated and most bridge functionality can be performed with
the 'ip' command.

This commit also fixes various Bash coding fixes and removes a lot of exit
status checking which was redundant.

In addition it add IPv6 underlay for VXLAN transport. If the caller (KVM Agent)
adds the '-6' flag it will generate IPv6 multicast groups and routes which will
transport the VXLAN encapsulated packaes over IPv6 multicast groups.

Signed-off-by: Wido den Hollander <wido@widodh.nl>
This commit is contained in:
Wido den Hollander 2019-01-09 13:21:07 +01:00 committed by Gabriel Beims Bräscher
parent 7b0ff7f0c7
commit d3e95b98fc

View File

@ -16,224 +16,130 @@
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
# modifyvxlan.sh -- adds and deletes VXLANs from a Routing Server # modifyvxlan.sh -- Managed VXLAN devices and Bridges on Linux KVM hypervisor
# set -x
## TODO(VXLAN): MTU, IPv6 underlying
usage() { usage() {
printf "Usage: %s: -o <op>(add | delete) -v <vxlan id> -p <pif> -b <bridge name>\n" echo "Usage: $0: -o <op>(add | delete) -v <vxlan id> -p <pif> -b <bridge name> (-6)"
}
multicastGroup() {
local VNI=$1
local FAMILY=$2
if [[ -z "$FAMILY" || $FAMILY == "inet" ]]; then
echo "239.$(( (${VNI} >> 16) % 256 )).$(( (${VNI} >> 8) % 256 )).$(( ${VNI} % 256 ))"
fi
if [[ "$FAMILY" == "inet6" ]]; then
echo "ff05::$(( (${VNI} >> 16) % 256 )):$(( (${VNI} >> 8) % 256 )):$(( ${VNI} % 256 ))"
fi
} }
addVxlan() { addVxlan() {
local vxlanId=$1 local VNI=$1
local pif=$2 local PIF=$2
local vxlanDev=vxlan$vxlanId local VXLAN_BR=$3
local vxlanBr=$3 local FAMILY=$4
local mcastGrp="239.$(( ($vxlanId >> 16) % 256 )).$(( ($vxlanId >> 8) % 256 )).$(( $vxlanId % 256 ))" local VXLAN_DEV=vxlan${VNI}
local GROUP=$(multicastGroup ${VNI} ${FAMILY})
## TODO(VXLAN): $brif (trafficlabel) should be passed from caller because we cannot assume 1:1 mapping between pif and brif. echo "multicast ${GROUP} for VNI ${VNI} on ${PIF}"
# lookup bridge interface
local sysfs_dir=/sys/devices/virtual/net/
local brif=`find ${sysfs_dir}*/brif/ -name $pif | sed -e "s,$sysfs_dir,," | sed -e 's,/brif/.*$,,'`
if [ "$brif " == " " ] if [[ ! -d /sys/class/net/${VXLAN_DEV} ]]; then
then ip -f ${FAMILY} link add ${VXLAN_DEV} type vxlan id ${VNI} group ${GROUP} ttl 10 dev ${PIF}
if [ -d "/sys/class/net/${pif}" ] ip link set ${VXLAN_DEV} up
then ip -f ${FAMILY} route add ${GROUP} dev ${PIF}
# if bridge is not found, but matches a pif, use it sysctl -qw net.ipv6.conf.${VXLAN_DEV}.disable_ipv6=1
brif=$pif fi
else
printf "Failed to lookup bridge interface which includes pif: $pif."
return 1
fi
else
# confirm ip address of $brif
ip addr show $brif | grep -w inet
if [ $? -gt 0 ]
then
printf "Failed to find vxlan multicast source ip address on brif: $brif."
return 1
fi
fi
# mcast route if [[ ! -d /sys/class/net/$VXLAN_BR ]]; then
## TODO(VXLAN): Can we assume there're only one IP address which can be multicast src IP on the IF? ip link add name ${VXLAN_BR} type bridge
ip route get $mcastGrp | grep -w "dev $brif" ip link set ${VXLAN_BR} up
if [ $? -gt 0 ] sysctl -qw net.ipv6.conf.${VXLAN_BR}.disable_ipv6=1
then fi
ip route add $mcastGrp/32 dev $brif
if [ $? -gt 0 ]
then
printf "Failed to add vxlan multicast route on brif: $brif."
return 1
fi
fi
if [ ! -d /sys/class/net/$vxlanDev ] bridge link show|grep ${VXLAN_BR}|awk '{print $2}'|grep "^${VXLAN_DEV}\$" > /dev/null
then if [[ $? -gt 0 ]]; then
ip link add $vxlanDev type vxlan id $vxlanId group $mcastGrp ttl 10 dev $brif ip link set ${VXLAN_DEV} master ${VXLAN_BR}
fi
if [ $? -gt 0 ]
then
# race condition that someone already creates the vxlan
if [ ! -d /sys/class/net/$vxlanDev ]
then
printf "Failed to create vxlan $vxlanId on brif: $brif."
return 1
fi
fi
fi
# is up?
ip link show $vxlanDev | grep -w UP > /dev/null
if [ $? -gt 0 ]
then
ip link set $vxlanDev up > /dev/null
fi
if [ ! -d /sys/class/net/$vxlanBr ]
then
brctl addbr $vxlanBr > /dev/null
if [ $? -gt 0 ]
then
if [ ! -d /sys/class/net/$vxlanBr ]
then
printf "Failed to create br: $vxlanBr"
return 2
fi
fi
brctl setfd $vxlanBr 0
fi
#pif is eslaved into vxlanBr?
ls /sys/class/net/$vxlanBr/brif/ | grep -w "$vxlanDev" > /dev/null
if [ $? -gt 0 ]
then
brctl addif $vxlanBr $vxlanDev > /dev/null
if [ $? -gt 0 ]
then
ls /sys/class/net/$vxlanBr/brif/ | grep -w "$vxlanDev" > /dev/null
if [ $? -gt 0 ]
then
printf "Failed to add vxlan: $vxlanDev to $vxlanBr"
return 3
fi
fi
fi
# is vxlanBr up?
ip link show $vxlanBr | grep -w UP > /dev/null
if [ $? -gt 0 ]
then
ip link set $vxlanBr up
fi
return 0
} }
deleteVxlan() { deleteVxlan() {
local vxlanId=$1 local VNI=$1
local pif=$2 local PIF=$2
local vxlanDev=vxlan$vxlanId local VXLAN_BR=$3
local vxlanBr=$3 local FAMILY=$4
local mcastGrp="239.$(( ($vxlanId >> 16) % 256 )).$(( ($vxlanId >> 8) % 256 )).$(( $vxlanId % 256 ))" local VXLAN_DEV=vxlan${VNI}
local GROUP=$(multicastGroup ${VNI} ${FAMILY})
local sysfs_dir=/sys/devices/virtual/net/ ip -f ${FAMILY} route del ${GROUP} dev ${PIF}
local brif=`find ${sysfs_dir}*/brif/ -name $pif | sed -e "s,$sysfs_dir,," | sed -e 's,/brif/.*$,,'`
ip route del $mcastGrp/32 dev $brif ip link set ${VXLAN_DEV} nomaster
ip link delete ${VXLAN_DEV}
ip link delete $vxlanDev ip link set ${VXLAN_BR} down
ip link delete ${VXLAN_BR} type bridge
if [ $? -gt 0 ]
then
printf "Failed to del vxlan: $vxlanId"
printf "Continue..."
fi
ip link set $vxlanBr down
if [ $? -gt 0 ]
then
return 1
fi
brctl delbr $vxlanBr
if [ $? -gt 0 ]
then
printf "Failed to del bridge $vxlanBr"
return 1
fi
return 0
} }
op= OP=
vxlanId= VNI=
FAMILY=inet
option=$@ option=$@
while getopts 'o:v:p:b:' OPTION while getopts 'o:v:p:b:6' OPTION
do do
case $OPTION in case $OPTION in
o) oflag=1 o) oflag=1
op="$OPTARG" OP="$OPTARG"
;; ;;
v) vflag=1 v) vflag=1
vxlanId="$OPTARG" VNI="$OPTARG"
;; ;;
p) pflag=1 p) pflag=1
pif="$OPTARG" PIF="$OPTARG"
;; ;;
b) bflag=1 b) bflag=1
brName="$OPTARG" BRNAME="$OPTARG"
;; ;;
?) usage 6)
exit 2 FAMILY=inet6
;; ;;
?) usage
exit 2
;;
esac esac
done done
# Check that all arguments were passed in if [[ "$oflag$vflag$pflag$bflag" != "1111" ]]; then
if [ "$oflag$vflag$pflag$bflag" != "1111" ] usage
then exit 2
usage
exit 2
fi fi
# Do we support Vxlan?
lsmod|grep ^vxlan >& /dev/null lsmod|grep ^vxlan >& /dev/null
if [ $? -gt 0 ] if [[ $? -gt 0 ]]; then
then modprobe=`modprobe vxlan 2>&1`
modprobe=`modprobe vxlan 2>&1` if [[ $? -gt 0 ]]; then
if [ $? -gt 0 ] echo "Failed to load vxlan kernel module: $modprobe"
then exit 1
printf "Failed to load vxlan kernel module: $modprobe" fi
exit 1
fi
fi fi
if [ "$op" == "add" ]
then
# Add the vxlan
addVxlan $vxlanId $pif $brName
# If the add fails then return failure #
if [ $? -gt 0 ] # Add a lockfile to prevent this script from running twice on the same host
then # this can cause a race condition
exit 1 #
fi
else
if [ "$op" == "delete" ]
then
# Delete the vxlan
deleteVxlan $vxlanId $pif $brName
# Always exit with success LOCKFILE=/var/run/cloud/vxlan.lock
exit 0
fi
fi
(
flock -x -w 10 200 || exit 1
if [[ "$OP" == "add" ]]; then
addVxlan ${VNI} ${PIF} ${BRNAME} ${FAMILY}
if [[ $? -gt 0 ]]; then
exit 1
fi
elif [[ "$OP" == "delete" ]]; then
deleteVxlan ${VNI} ${PIF} ${BRNAME} ${FAMILY}
fi
) 200>${LOCKFILE}