Thursday, April 2, 2015

VCS script for troubleshooting

Pretty useful script for Veritas cluster, which will be helpful on solving cluster related issues fast.
With this script we get all the dependent resources and other dependent SGs in one platform. Else we need to do lot of manipulation with traditional commands in VCS.

root# more chk_cluconf
#!/usr/bin/ksh
# Script:           chk_cluconf
# Usage:            chk_cluconf [-dgn] <servicegroup list>
#                   chk_cluconf [-dgn] -G
#                   chk_cluconf [-dg]  -p
#                   chk_cluconf -h

#  By default NO fancy graphics options
bon=""   # `tput bold` = bold    characters on
ion=""   # `tput smso` = inverse characters on
bof=""   # `tput rmso` = output back to normal (bold/inverse off)

###
###  constants
###
HASYS=/opt/VRTS/bin/hasys
HAGRP=/opt/VRTS/bin/hagrp
HARES=/opt/VRTS/bin/hares

SYSINF=/tmp/sysinf.out
DEPINF=/tmp/depinf.out
CLUINF=/tmp/cluinf.out
GRPINF=/tmp/grpinf.out
RESINF=/tmp/resinf.out

S1FILE=/tmp/dep_s1.txt
S2FILE=/tmp/dep_s2.txt
LTFILE=/tmp/long_types.txt

#  Is there a running VERITAS cluster at all?
$HASYS -list >/dev/null 2>&1; RC=$?
if [ "$RC" -ne 0 ]; then
   #  if not, exit with exit code 1
   echo "No VERITAS cluster active on this system; exiting..."
   exit 1
fi

###
###  Options
###
SpcM=0   #  Space-Marker (newline before/after every sg; 0=no, 1=yes)

ALLSG=0  #  Servicegroups displayed: 0=only SGs given as an argument
         #                           1=ALL SGs
         #                           2=only SGs in PARTIAL state

NSTAT=0  #  Display node status:          0=no; 1=yes
DSDTL=0  #  Display detailed information: 0=no; 1=yes

while getopts "dgGhnp" OPT
do
   case $OPT in
   d)   DSDTL=1
        ;;
   g)   bon=`tput bold`   # bold    characters on
        ion=`tput smso`   # inverse characters on
        bof=`tput rmso`   # output back to normal (bold/inverse off)
        ;;
   G)   ALLSG=1
        SGLIST=""
        $HAGRP -list | awk '{print $1}' | sort -u | while read SG
        do
           SGLIST=$SGLIST" "$SG
        done
        ;;
   h)   echo "usage:    chk_cluconf [-dgn] <servicegroup list>"
        echo "          chk_cluconf [-dgn] -G"
        echo "          chk_cluconf [-dg] -p"
        echo "          chk_cluconf -h"
        echo
        echo "function:    show status and dependencies for resources in (blank separated) <servicegroup list>"
        echo "          or show status and dependencies for resources in ALL SGs (-G)"
        echo "          or show status and dependencies for resources in SGs which are in PARTIAL state (-p)"
        echo "          or display this help text (-h)"
        echo
        echo "options: -d: display detailed information"
        echo "         -g: enable fancy graphics"
        echo "         -h: display this help text and exit"
        echo "         -G: show resource information for ALL service groups"
        echo "         -n: display status of cluster nodes also"
        echo "         -p: show resource information for service groups in PARTIAL state"
        exit 0
        ;;
   n)   NSTAT=1
        ;;
   p)   ALLSG=2
        NSTAT=1
        SGLIST=""
        $HAGRP -list | awk '{print $1}' | sort -u | while read SG
        do
           $HAGRP -display $SG | grep -w State | grep PARTIAL >/dev/null 2>&1; RC=$?
           if [ "$RC" -eq 0 ]; then SGLIST=$SGLIST" "$SG; fi
        done
        ;;
   *)   echo "Option $OPT is not supported and will be ignored."
        ;;
   esac
done
shift `expr $OPTIND - 1`

if [ "$ALLSG" -eq 0 ]; then SGLIST=$*; fi   # Service group list as remaining parameters

###
###  Preliminaries: formatting
###
#  Determine nodes in cluster
integer CC=0
integer SPN=7
$HASYS -list | while read WRD # -> CLN[$CC]
do
   if [ "${#WRD}" -gt $SPN ]; then SPN=${#WRD}; fi
   CLN[$CC]=$WRD
   CC=$CC+1
done

#  Determine width of column for service group / resource information
integer SPR=6  #  SPR: value of longest service group / resource name length
               #       Offset = 6 because header "SG/Res" is 6 characters long
#  First check service group names
$HAGRP -list | awk '{print $1}' | sort -u | while read SG
do
   if [ "${#SG}" -gt $SPR ]; then SPR=${#SG}; fi
done

if [ "[$SGLIST]" == "[]" ]; then DSDTL=0; fi

#  Now check resource names in $SGLIST
for SG in $SGLIST
do
   $HAGRP -resources $SG | while read RES
   do
      if [ "${#RES}" -gt $SPR ]; then SPR=${#RES}; fi
   done
done

#  Print header. Begin first line
echo "NMB>  ${bon}SG${bof}/Res\c"

#  Spacing information line 1
integer i=6
while [ "$i" -lt "$SPR" ]; do printf " "; i=$i+1; done
printf "  "

if [ "$DSDTL" -eq 1 ]; then
   #  Information for resource type, line 1
   printf "Resourcetype  ACE  "
#  printf "(        Type)  "
fi

#  Finally print column headers for each cluster system
integer i=0
while [ "$i" -lt "$CC" ]
do
   printf "%-${SPN}s  " ${CLN[$i]}
   i=$i+1
done
echo

#  Print header line 2
printf "--->  "

#  Spacing information line 2
integer i=0
while [ "$i" -lt "$SPR" ]; do printf "-"; i=$i+1; done
printf "  "

if [ "$DSDTL" -eq 1 ]; then
   #  Information for resource type, line 2
   printf "------------  ---  "
#  printf "(------------)  "
fi

#  Print underlines for cluster nodes
integer i=0
while [ "$i" -lt "$CC" ]
do
   integer j=0
   while [ "$j" -lt "$SPN" ]; do printf "-"; j=$j+1; done
   printf "  "
   i=$i+1
done
echo

#  If flag NSTAT is set display the node status also
if [ "$NSTAT" -eq 1 ]; then
   printf "Node  status"

   integer i=6
   while [ "$i" -lt "$SPR" ]; do printf " "; i=$i+1; done

   if [ "$DSDTL" -eq 1 ]; then
      echo "  "`date '+%d.%m.%Y'`"  "`date '+%H:%M'`"  \c"
   else
      echo "  \c"
   fi

   integer i=0
   while [ "$i" -lt "$CC" ]
   do
      $HASYS -display ${CLN[$i]} | grep SysState | awk '{print $3}' | read SST
      if [ "$SST" == "FAULTED" -o "$SST" == "UNKNOWN" ]; then echo "${ion}\c"; fi
      printf "%-${SPN}s" $SST
      if [ "$SST" == "FAULTED" -o "$SST" == "UNKNOWN" ]; then echo "${bof}\c"; fi
      printf "  "
      i=$i+1
   done

   if [ "$DSDTL" -eq 0 ]; then
      echo `date '+%d.%m.%y %H:%M:%S'`"\c"
   fi
   echo

   echo  #  blank line betwenn node state info and rest
fi

#  List of service groups (needed for dependencies)
$HAGRP -list | awk '{print $1}' | sort -u > $SYSINF

###
###  Show information, service group by service group
###
integer g=1
cat $SYSINF | while read SGR
do
   #  Determine group dependency
   #    OLD STYLE: depending/dependent groups displayed unordered and ungrouped

   $HAGRP -dep $SGR 2>/dev/null | grep "^#Parent" >/dev/null 2>&1; RC=$?

   if [ "$RC" -eq 1 ]; then
      SGD="NO_DEP"
   else
      S1="~"  #  Collector for groups that need to be running for THIS sg to be able to start
      S2="~"  #  Collector for dependend groups that can only start when THIS sg is running

      $HAGRP -dep $SGR | grep -v "^#Parent" > $DEPINF

      awk '{print $1, $2}' $DEPINF | while read PAR CHL
      do
         if [ "$PAR" == "$SGR" ]; then
            grep -nw $CHL $SYSINF | awk -F: '{print $1, $2}' | while read Li Ex
            do
               #  Case 1: Collector S1 is not empty
               if [ "[$Ex]" == "[$CHL]" -a "[$S1]" != "[~]" ]; then S1=$Li","$S1; fi

               #  Case 2: Collector S1 is empty: S1==~
               if [ "[$Ex]" == "[$CHL]" -a "[$S1]" == "[~]" ]; then S1=$Li$S1; fi
            done
         fi

         if [ "$CHL" == "$SGR" ]; then
            grep -nw $PAR $SYSINF | awk -F: '{print $1, $2}' | while read Li Ex
            do
               #  Case 1: Collector S2 is not empty
               if [ "[$Ex]" == "[$PAR]" -a "[$S2]" != "[~]" ]; then S2=$S2","$Li; fi

               #  Case 2: Collector S2 is not empty: S2==~
               if [ "[$Ex]" == "[$PAR]" -a "[$S2]" == "[~]" ]; then S2=$S2$Li; fi
            done
         fi
      done

      SGD=$S1"*"$S2
   fi

   #  Determine which group is active on which node
   $HAGRP -display $SGR > $CLUINF
   integer i=0
   while [ "$i" -lt "$CC" ]
   do
      grep -w "State" $CLUINF | grep "${CLN[$i]}" >/dev/null 2>&1; RC=$?
      if [ "$RC" -eq 0 ]; then
         grep -w "State" $CLUINF | grep "${CLN[$i]}" | awk '{print $NF}' | tr '|' ' ' | read GRS[$i]
      else
         GRS[$i]="N/A"
      fi
      i=$i+1
   done

   #  Newline printed before the resource group?
   echo $SGLIST | grep -w $SGR >/dev/null 2>&1; RC=$?
   if [ "$RC" -eq 0 -a "$SpcM" -eq 0 ]; then
      if [ "$g" -gt 1 ]; then echo; fi
      SpcM=1
   fi

   #  Display group info
   echo "${bon}\c"
   if [ "$DSDTL" -eq 1 ]; then
      printf "SG%02d  %-$(($SPR+19))s  " $g $SGR
   else
      printf "SG%02d  %-${SPR}s  " $g $SGR
   fi
   integer i=0
   while [ "$i" -lt "$CC" ]
   do
      printf "%-${SPN}s  " ${GRS[$i]}
      i=$i+1
   done
   printf "[%s]\n" $SGD
   echo "${bof}\c"

   #  Determine resource info in group
   #    NEW STYLE: depending/dependent resources displayed ordered and grouped
   integer r=1
   $HAGRP -resources $SGR > $GRPINF

   echo $SGLIST | grep -w $SGR >/dev/null 2>&1; RC=$?
   if [ "$RC" -eq 0 ]; then
      #  Print resource details for this service group $SGR

      #  Now process resource info in group
      cat $GRPINF | while read RES
      do
         #  Determine resource dependency

         $HARES -dep $RES 2>/dev/null | grep "^#Group" >/dev/null 2>&1; RC=$?

         if [ "$RC" -eq 1 ]; then
            RSD="no_dep"
         else
            >$S1FILE  #  empty file $S1FILE
            >$S2FILE  #  empty file $S2FILE

            $HARES -dep $RES 2>/dev/null | grep -v "^#Group" > $RESINF

            awk '{print $2, $3}' $RESINF | while read PAR CHL
            do
               if [ "$PAR" == "$RES" ]; then
                  grep -nw $CHL $GRPINF | awk -F: '{print $1, $2}' | while read Li Ex
                  do
                     if [ "[$Ex]" == "[$CHL]" ]; then echo $Li >> $S1FILE; fi
                  done
               fi

               if [ "$CHL" == "$RES" ]; then
                  grep -nw $PAR $GRPINF | awk -F: '{print $1, $2}' | while read Li Ex
                  do
                     if [ "[$Ex]" == "[$PAR]" ]; then echo $Li >> $S2FILE; fi
                  done
               fi
            done

            RSD=""

            #  Process parents
            LASTR=""
            CNTNG=0
            sort -n $S1FILE | while read RESNR
            do
               if [ "[$LASTR]" == "[]" ]; then
                  RSD=$RESNR
               elif [ $(($RESNR-$LASTR)) -gt 1 -a "$CNTNG" -eq 0 ]; then
                  RSD=$RSD","$RESNR
               elif [ $(($RESNR-$LASTR)) -eq 1 -a "$CNTNG" -eq 0 ]; then
                  RSD=$RSD"-"
                  CNTNG=1
               elif [ $(($RESNR-$LASTR)) -eq 1 -a "$CNTNG" -eq 1 ]; then
                  sleep 0
               else # [ $(($RESNR-$LASTR)) -gt 1 -a "$CNTNG" -eq 1 ]
                  RSD=$RSD$LASTR","$RESNR
                  CNTNG=0
               fi

               LASTR=$RESNR
            done

            if [ "$CNTNG" -eq 1 ]; then RSD=$RSD$LASTR; fi

            RSD=$RSD"~*~"

            #  Process children
            LASTR=""
            CNTNG=0
            sort -n $S2FILE | while read RESNR
            do
               if [ "[$LASTR]" == "[]" ]; then
                  RSD=$RSD$RESNR
               elif [ $(($RESNR-$LASTR)) -gt 1 -a "$CNTNG" -eq 0 ]; then
                  RSD=$RSD","$RESNR
               elif [ $(($RESNR-$LASTR)) -eq 1 -a "$CNTNG" -eq 0 ]; then
                  RSD=$RSD"-"
                  CNTNG=1
               elif [ $(($RESNR-$LASTR)) -eq 1 -a "$CNTNG" -eq 1 ]; then
                  sleep 0
               else # [ $(($RESNR-$LASTR)) -gt 1 -a "$CNTNG" -eq 1 ]
                  RSD=$RSD$LASTR","$RESNR
                  CNTNG=0
               fi

               LASTR=$RESNR
            done

            if [ "$CNTNG" -eq 1 ]; then RSD=$RSD$LASTR; fi
         fi

         $HARES -display $RES > $CLUINF

         #  Resourcetype
         grep -w "Type" $CLUINF | awk '{print $NF}' | read RST
         if [ "${#RST}" -gt 12 ]; then
            SRT=`echo $RST | cut -c1-11`"."
            echo "$SRT $RST" >> $LTFILE
            RST=$SRT
         fi

         #  Resourcedetails
         grep -w "AutoStart" $CLUINF | awk '{print $NF}' | read FLA
         grep -w "Critical"  $CLUINF | awk '{print $NF}' | read FLC
         grep -w "Enabled"   $CLUINF | awk '{print $NF}' | read FLE

         #  Resourcestate
         integer i=0
         while [ "$i" -lt "$CC" ]
         do
            grep -w "State" $CLUINF | awk '$2=="State"' | grep "${CLN[$i]}" >/dev/null 2>&1; RD=$?
            #  Additional filtering 'grep -w "State" $CLUINF' has been made neccessary due to
            #  too long lines in $CLUINF which produce error 'awk: record `<res>...' has too many fields'
            #  for resources of type 'MultiVolume'
            if [ "$RD" -eq 0 ]; then
               grep -w "State" $CLUINF | awk '$2=="State"' | grep "${CLN[$i]}" | awk '{print $NF}' \
               | read RSS[$i]
            else
               RSS[$i]="-"
            fi
            i=$i+1
         done

         #  Display resource info
         if [ "$DSDTL" -eq 1 ]; then
            printf "%03s>  %-${SPR}s  %12s  %s%s%s  " $r $RES $RST $FLA $FLC $FLE
         else
            printf "%03s>  %-${SPR}s  " $r $RES
         fi

         integer i=0
         while [ "$i" -lt "$CC" ]
         do
            printf "%-${SPN}s  " ${RSS[$i]}
            i=$i+1
         done
         printf "(%s)\n" $RSD

         r=$r+1
      done

      echo
   else
      #  Ignore resource details for this service group $SGR
      SpcM=0
   fi

   g=$g+1
done

#  Display list of abbreviated resource type names - and delete afterwards
if [ -r $LTFILE -a "$DSDTL" -eq 1 ]; then
   echo
   echo "Long (>12 characters) resource type names:"
   echo "Abbreviated  Full name"
   echo "------------ ----------------"
   sort -u $LTFILE
   echo
   rm $LTFILE
fi

#  Cleaning up of temporary files
if [ -r $SYSINF ]; then rm $SYSINF; fi
if [ -r $DEPINF ]; then rm $DEPINF; fi
if [ -r $CLUINF ]; then rm $CLUINF; fi
if [ -r $GRPINF ]; then rm $GRPINF; fi
if [ -r $RESINF ]; then rm $RESINF; fi

if [ -r $S1FILE ]; then rm $S1FILE; fi
if [ -r $S2FILE ]; then rm $S2FILE; fi

exit 0
root#

No comments:

Post a Comment