eval '(exit $?0)' && eval 'exec perl -w -S $0 ${1+"$@"}'
    & eval 'exec perl -w -S $0 $argv:q'
    if 0;

#
# Copyright (c) 2006 Mellanox Technologies. All rights reserved.
#
# This Software is licensed under one of the following licenses:
#
# 1) under the terms of the "Common Public License 1.0" a copy of which is
#    available from the Open Source Initiative, see
#    http://www.opensource.org/licenses/cpl.php.
#
# 2) under the terms of the "The BSD License" a copy of which is
#    available from the Open Source Initiative, see
#    http://www.opensource.org/licenses/bsd-license.php.
#
# 3) under the terms of the "GNU General Public License (GPL) Version 2" a
#    copy of which is available from the Open Source Initiative, see
#    http://www.opensource.org/licenses/gpl-license.php.
#
# Licensee has the right to choose one of the above licenses.
#
# Redistributions of source code must retain the above copyright
# notice and one of the license notices.
#
# Redistributions in binary form must reproduce both the above copyright
# notice, one of the license notices in the documentation
# and/or other materials provided with the distribution.
#
#  $Id: ipoib_ha.pl 9854 2006-10-17 14:03:48Z vlad $
#

use strict;
use threads;
use threads::shared;

sub usage
{
   print "\n Usage: $0 [-p <primary IPoIB interface>] [-s <secondary IPoIB interface>] [--with-arping] [--with-multicast] [-v]\n";
   print "\n";
   print "  -p                  - primary IPoIB interface (default: ib0)\n";
   print "  -s                  - secondary IPoIB interface (default: ib1)\n";
   print "  --with-arping       - use modified arping utility to send unsolicited ARP REPLY\n";
   print "  --with-multicast    - support applications that are using multicast\n";
   print "  -v                  - verbose output\n";
   print "\n";
}

$| = 1;
my $netdir;
my $config;
my $verbose = 0;
my $with_arping = 0;
my $with_multicast = 0;
my $mc_thread;
my $mm_thread;

# The primary and secondary variables are user defined and will not be changed.
my $primary : shared;
$primary = 'ib0';
my $secondary : shared;
$secondary = 'ib1';
my $interface;
my $cont = 1;

my $ha_active : shared;
$ha_active = 0;
my $started_mcast = 0;
my $mcast_cache = '/var/cache/mcast.cache';
unlink($mcast_cache);

my @maddrs;
my @maddrs6;

while ( $#ARGV >= 0 ) {

   my $cmd_flag = shift(@ARGV);

    if ( $cmd_flag eq "-p" ) {
        $primary = shift(@ARGV);
    } elsif ( $cmd_flag eq "-s" ) {
        $secondary = shift(@ARGV);
    } elsif ( $cmd_flag eq "-v" ) {
        $verbose = 1;
    } elsif ( $cmd_flag eq "--with-arping" ) {
        $with_arping = 1;
    } elsif ( $cmd_flag eq "--with-multicast" ) {
        $with_multicast = 1;
    } else {
        &usage();
        exit 1;
    }
}

# active_if is a currently active interface.
# the default value is set to the primary
my $active_if : shared;
$active_if = 'none';

sub get_cfg
{
    my %dev = ();   
        
    my $name = $config;
    $name =~ s@.*ifcfg-@@;

    $dev{$name}{'HA'} = 0;
    $dev{$name}{'status'} = '';
    $dev{$name}{'DEVICE'} = $name;

    while ( ! open ( IFH , $config ) ) {
        print "Can't open config $config: $!\n" if ( $verbose );
        sleep(3);
    }

    while (<IFH>) {
        next unless not m@(\#).*@;
        my $key = (split ( "=", $_) )[0];
        chomp $key;
        my $value = (split ( "=", $_) )[1];
        $value =~ s/\'//g;
        chomp $value;

        $dev{$name}{$key} = $value;
        
    }
    close IFH || die "Can't close config $config: $!";

    return %dev;

}

sub print_cfg
{
    my %cfg = @_;
    print "\nDate:" . localtime(time) . "\n";
    print "Bond:\n";
    print "======================================\n";
    for my $key ( keys %cfg ) {
        print $key . ' = ' . $cfg{$key} . "\n";
    }
}

sub print_ifcfg
{
    my %cfg = @_;
    print "\nDate:" . localtime(time) . "\n";
    for my $key ( keys %cfg ) {
        print "$key:\n";
        print "======================================\n";
        for my $subkey ( keys %{$cfg{$key}} ) {
            print $subkey . ' = ' . $cfg{$key}->{$subkey} . "\n";
        }
    }
}

sub is_newmaddr
{
    my $addr;
    my $if;
    my $family;
    ($addr, $if, $family) = @_;
    my @lines = ();
    
    open (CHK, "/sbin/ipmaddr show $if |");
    while ( <CHK> ) {
        next unless m@inet[6]*@;
        push @lines, $_;
    }
    close CHK;

    if ( grep { /\b$addr\b/ } @lines ) {
        print "Found addr: $addr\n";
        return 1;
    }
    
    return 0;
}

# Set network environment directory following the linux distribution
if ( -f "/etc/SuSE-release" ) {
    $netdir = '/etc/sysconfig/network';
} else {
    $netdir = '/etc/sysconfig/network-scripts';
}
    

$config = $netdir . '/' . 'ifcfg-' . $primary;

if ( not -f $config )
{
    print "No configuration file found for IPoIB primary interface.\n";
    print "Exiting...\n";
    exit 1;
}

my %ifcfg = get_cfg();

my %bond = %{$ifcfg{$primary}};
if ( $verbose ) {
    print_ifcfg(%ifcfg);
    print_cfg(%bond);
}

# Get IPoIB multicast groups
sub get_maddrs
{
    @maddrs = ();
    @maddrs6 = ();
    open (MCAST, "/sbin/ipmaddr show $bond{'DEVICE'} |");
    my $i = 0;
    while ( <MCAST> ) {
        next if (not m@inet[6]*@ or m/\b224.0.0.251\b/ or m/\b224.0.0.1\b/ or m/\bff02::1\s+/);
        my $line = $_;
        $line =~ s/^\s+//;
        $line =~ s/\s+$//;
        my $family = ( split(" ", $line) )[0];
        my $maddr = ( split (" ", $line) )[1];
        if ( $maddr ) {
            if ( $family eq "inet6" ) {
                print "Maddr inet6: $maddr\n" if ( $verbose );
                push (@maddrs6, $maddr);
            } else {
                print "Maddr inet: $maddr\n" if ( $verbose );
                push (@maddrs, $maddr);
            }
        }
        
    }
    close (MCAST);
    
}

sub set_action
{
    my $action;
    my $family;
    my $ipaddr;
    my $multicast_addr;
    my $ifindex;

    ( $action, $family, $ipaddr, $multicast_addr, $ifindex) = @_;
    print "Action: $action, Family: $family, IP: $ipaddr, Multicast Address: $multicast_addr, Ifindex: $ifindex\n" if ( $verbose );
    open (MC, ">> $mcast_cache") or die "Can't open $mcast_cache: $!\n";
        printf MC "$action $family $ipaddr $multicast_addr $ifindex\n";
    close MC;
}

sub get_ifindex
{
    my $name = shift @_;
    while ( ! open (IF, "/sys/class/net/$name/ifindex") ) {
        print "Can't open /sys/class/net/$name/ifindex: $!\n" if ( $verbose );
        sleep(3);
    }
    my $ifindex = <IF>;
    chomp $ifindex;
    close (IF);
    return $ifindex;
}

sub set_down
{
    my $ifdown = shift @_;
    system ("/sbin/ifconfig $ifdown 0.0.0.0 > /dev/null 2>&1");
    # system ("/sbin/ifconfig $ifdown down > /dev/null 2>&1");
    # system("/usr/utils/ip link set $ifdown down > /dev/null 2>&1");
    # system("/usr/utils/ip link set $ifdown arp off > /dev/null 2>&1");
    # system("/usr/utils/ip link set $ifdown multicast off > /dev/null 2>&1");
}

sub set_up_bond
{
    if ( $bond{'BROADCAST'} ) {
	    system ("/sbin/ifconfig $bond{'DEVICE'} $bond{'IPADDR'} netmask $bond{'NETMASK'} broadcast $bond{'BROADCAST'} > /dev/null 2>&1");
    }
    else {
    	system ("/sbin/ifconfig $bond{'DEVICE'} $bond{'IPADDR'} netmask $bond{'NETMASK'} > /dev/null 2>&1");
    }
    # system("/usr/utils/ip link set $bond{'DEVICE'} arp on > /dev/null 2>&1");
    # system("/usr/utils/ip link set $bond{'DEVICE'} multicast on > /dev/null 2>&1");

    if ( $with_multicast ) {
        for my $i ( 0 .. $#maddrs ) {
            if ( ( my $rc = is_newmaddr($maddrs[$i], $bond{'DEVICE'}, "inet") ) == 0 ) {
                set_action("add", "inet", $bond{'IPADDR'}, $maddrs[$i], $bond{'ifindex'});
                print "New multicast address $i = $maddrs[$i]\n" if ( $verbose );
            }
            else {
                print "Existing multicast address $i = $maddrs[$i]\n" if ( $verbose );
            }
        }
        for my $i ( 0 .. $#maddrs6 ) {
            if ( ( my $rc = is_newmaddr($maddrs6[$i], $bond{'DEVICE'}, "inet6") ) == 0 ) {
                set_action("add", "inet6", $bond{'IPADDR'}, $maddrs6[$i], $bond{'ifindex'});
                print "New multicast address $i = $maddrs6[$i]\n" if ( $verbose );
            }
            else {
                print "Existing multicast address $i = $maddrs6[$i]\n" if ( $verbose );
            }
        }
        if ( not $started_mcast ) {
            $mc_thread = threads->new(\&mcasthandle);
            # $mm_thread = threads->new(\&monitor_multicast);
            $started_mcast = 1;
        }
    }
    # Send unsolicited arp reply to update neighbours
    if ( $with_arping ) {
        system("arpingib -c 10 -R  -I $bond{'DEVICE'} $bond{'IPADDR'} > /dev/null 2>&1");
        if ($? == -1) {
            print "failed to execute: $!\n";
        }
        elsif ($? & 127) {
            printf "child died with signal %d, %s coredump\n",
                ($? & 127),  ($? & 128) ? 'with' : 'without';
        }
        else {
            my $res = $? >> 8;
            if ( $res ) {
                printf "command exited with value %d\n", $res;
            }
        }
    }

}

# Migrate IPoIB configuration to the new_active IPoIB interface
sub migrate_conf
{
    my $new_active = shift @_;
    if ( $with_multicast ) {
        get_maddrs();
    }

    my $other = get_other( $new_active );
    print "migrate_conf: Migrating from $other to $new_active\n" if ( $verbose );
    # Disable failed active_if device
    set_down( $other );
    $bond{'DEVICE'} = $new_active;
    $bond{'ifindex'} = get_ifindex( $new_active );

    # Setup secondary device
    set_up_bond();

}

# Check if some of the multicst groups where removed from the active_if IPoIB port
# And update mcast_cache file
sub monitor_multicast
{
    while ( 1 )
    {
        # Wait for active_if IPoIB interface failure
        if ( not $ha_active ) {
            sleep (1);
            print "Waiting for activation\n";
            next;
        }
        my $i;
        my @lines = ();
         
        my $other = get_other( $active_if );
        open (MCASTMON, "/sbin/ipmaddr show $other |");
        while (<MCASTMON>) {
            next unless m@inet[6]*@;
            push @lines, $_;
        }
        close (MCASTMON);

        for $i ( 0 .. $#maddrs6 ) {
            if (not grep  { /$maddrs6[$i]/ } @lines ) {
                print "Removing maddr inet6: $maddrs6[$i]\n" if ( $verbose );
                set_action("del", "inet6", $bond{'IPADDR'}, $maddrs6[$i], $bond{'ifindex'});
                splice (@maddrs6, $i, 1);
            }
        }
        for $i ( 0 .. $#maddrs ) {
            if (not grep  { /$maddrs[$i]/ } @lines ) {
                print "Removing maddr inet: $maddrs[$i]\n" if ( $verbose );
                set_action("del", "inet", $bond{'IPADDR'}, $maddrs[$i], $bond{'ifindex'});
                splice (@maddrs, $i, 1);
            }
        }
        sleep(2);
    }
}

# Join/remove to/from the multicast groups the secondary IPoIB interface
sub mcasthandle
{
    system ("mcasthandle $mcast_cache");
}

sub is_up
{
    my $ifcheck = shift @_;
    open(IFSTATUS, "/usr/utils/ip link show dev $ifcheck |");
    while ( <IFSTATUS> ) {
            next unless m@(\s$ifcheck).*@;
        if( m/<BROADCAST,MULTICAST,UP>/ ) {
            close(IFSTATUS);
            return 1;
        }
    }
    close(IFSTATUS);
    return 0;

}

sub is_carrier
{
    my $ifcheck = shift @_;
    open(IFSTATUS, "/usr/utils/ip link show dev $ifcheck |");
    while ( <IFSTATUS> ) {
            next unless m@(\s$ifcheck).*@;
        if( m/NO-CARRIER/ ) {
            close(IFSTATUS);
            return 0;
        }
    }
    close(IFSTATUS);
    return 1;
}

sub get_other
{
    my $given = shift @_;
    if ( $given eq $primary ) {
        return $secondary;
    } else {
        return $primary;
    }
}

# Wait for one of the interfaces to be active
while ( $active_if eq 'none' ) {
        if ( is_carrier ( $primary ) ) {
                set_down( $secondary );
                $active_if = $primary;
        } elsif ( is_carrier ( $secondary ) ) {
                set_down( $primary );
                migrate_conf( $secondary );
                $active_if = $secondary;
        }
	sleep(1) if ( $active_if eq 'none' );
}

open ( MONITOR, "/usr/utils/ip monitor link |" );

while ( <MONITOR> ) {

    next unless m@(\sib[0-9]+:).*@;
    $interface = ( split ( ":" , $_ ) )[1];
    $interface =~ s/\s//g;

    if ( ( $interface ne $primary ) and ( $interface ne $secondary ) ) {
        next;
    }

    my $has_carrier = is_carrier ( $interface );

    if ( not $has_carrier ) {
        print "Got NO-CARRIER event on $interface.\n" if ( $verbose );
        if ( is_carrier ( $interface ) ) {
            print "Got NO-CARRIER but $interface is UP\n" if ( $verbose );
        }

        print "Interface $interface is down.\n" if ( $verbose );
        print "Currently Active : $active_if\n" if ( $verbose );
        if ( $interface eq $active_if ) {
            my $other = get_other( $interface );
            if ( is_carrier ( $other ) ) {
                print "Other device: $other is UP\n" if ( $verbose );
                $ha_active = 1;
                migrate_conf( $other );
                $active_if = $other;
            }
            else {
                print "Both interfaces are down\n" if ( $verbose );
                $active_if = 'none';
                next;
            }

        }
    
    } else {

    	    if  ( not is_carrier ( $interface ) ) {
                print "****** Got CARRIER-ON but $interface is DOWN\n" if ( $verbose );
            }

            print "Got CARRIER-ON event on $interface.\n" if ( $verbose );
            if ( $active_if eq 'none' ) {
                migrate_conf( $interface );
                $active_if = $interface;
            }
    }

        
    # print_cfg(%bond) if ( $verbose );
}



if ( $with_multicast ) {
    $mc_thread->detach();
    $mm_thread->detach();
}
my $rc = close (MONITOR);
exit($rc);

