#!/usr/local/bin/perl # # $Header: emdb/sysman/admin/scripts/clusterGeneric.pl /st_emdbsa_11.2/5 2009/07/02 22:34:19 rsamaved Exp $ # # # Copyright (c) 2007, 2009, Oracle and/or its affiliates. All rights reserved. # # NAME # clusterGeneric.pl - # # DESCRIPTION # To connect to other nodes and execute a perl script from the master agent. # # NOTES # OUTPUT: # em_result=| # [em_result=|]* # MODIFIED (MM/DD/YY) # rsamaved 11/03/08 - fix typos # pboopath 11/06/07 - removing extra threads for performance # pboopath 07/26/07 - getting scriptsdir, listofnodes and masteragent # from the java side # pboopath 07/19/07 - general cleanup of code # pboopath 05/03/07 - writing/parsing and calculating diffs # pboopath 04/12/07 - returing em_result for each node # pboopath 04/12/07 - removing the hardcode values # pboopath 04/12/07 - Creation # require "emd_common.pl"; use has::Common; use File::Spec::Functions; use File::Path; use Data::Dumper; use threads; use threads::shared; use strict; use warnings; my $timeout_cache_secs = 900; my $script_timeout_secs = 30; #------------------------------------------------------------------------------ # FUNCTION : has_exitfail # # DESC # clean up, print errors before failure exit # # ARGUMENTS # #------------------------------------------------------------------------------ #holds the output format information my $has_output_format; sub clusterGeneric_exitfail() { # log the message to the log file EMD_PERL_ERROR("clusterGeneric.pl:Failed execution , exiting with error "); has::Common::has_handle_error('ERROR:Failed Execution'); has::Common::has_printerrors('exit_fail'); #graceful exit with exit status success exit(1); } # temporary setting of SIGNAL handler for alarm in main thread to handle alarm bug in perl 5.8 $SIG{ALRM} = sub {}; #install signal handlers for warn and die $SIG{'__WARN__'} = sub { has::Common::has_handle_error( @_)}; $SIG{'__DIE__'} = sub { has::Common::has_handle_error( @_ ); clusterGeneric_exitfail() }; my $cache_host_list_ref; my %timeout_host_list:shared; my $timedout_host_list_file='clgentm.txt'; my $cachedir = has::Common::has_get_cache_dir(); my $cache_file = catfile($cachedir,$timedout_host_list_file) if $cachedir; stat($cache_file) if $cache_file; if ( -e $cache_file and -r $cache_file) { $cache_host_list_ref = do "$cache_file"; # do not look at timeouts older than 5 minutes my $time = time; my $cutofftime = $time - $timeout_cache_secs; if ( $cache_host_list_ref and ref($cache_host_list_ref) and ref($cache_host_list_ref) =~ /HASH/i ) { for my $host ( keys %{$cache_host_list_ref} ) { unless ( $cache_host_list_ref->{$host} and ref($cache_host_list_ref->{$host}) and ref($cache_host_list_ref->{$host}) =~ /HASH/i ) { $cache_host_list_ref->{$host} = undef; } for my $tm ( keys %{$cache_host_list_ref->{$host}} ) { next if $tm and $tm =~ /\d+/ and $tm >= $cutofftime; delete $cache_host_list_ref->{$host}{$tm}; } } } } if($ARGV[0] =~/--help/) { print "\n Usage: perl clusterGeneric.pl [] \n"; exit(0); } if(@ARGV <3) { #print("Missing input arguments: Try `perl clusterGeneric.pl --help' for more information\n"); EMD_PERL_DEBUG("Missing input arguments: Try `perl clusterGeneric.pl --help' for more information\n"); exit(0); } my ($crsHomeDir,$scriptsDir,$perlFileToBeCalled,$nodeList) = @ARGV; # CRSHome #clear the failed host cache my $totalColumns = 24; $perlFileToBeCalled =~ s/^\s+|\s+$//g if $perlFileToBeCalled; $scriptsDir =~ s/^\s+|\s+$//g if $scriptsDir; EMD_PERL_ERROR("Perl script name is not passed to clusterGeneric.p ") unless $perlFileToBeCalled; EMD_PERL_ERROR("Perl script directory is not passed to clusterGeneric.p ") unless $scriptsDir; $perlFileToBeCalled = $perlFileToBeCalled.".pl" if $perlFileToBeCalled !~ /\.pl$/; my $fullpath = catdir($scriptsDir,$perlFileToBeCalled); stat $fullpath; if ( not -e $fullpath ) { EMD_PERL_DEBUG("$perlFileToBeCalled: No such file in $fullpath \n"); exit(0); } # my $before = ` date +%m%s%N`; EMD_PERL_DEBUG("Starting $fullpath\n"); #$"=","; my $pipeline="|"; my $masterHostName = has::Common::hasGetLocalHostName(); if ( not $masterHostName ) { EMD_PERL_ERROR("Failed to get local nodename "); } #chomp $masterHostName if $masterHostName; my @hostnameList ; my $startPos; my $endPos; if($perlFileToBeCalled =~ /filesystem.*/i) { $startPos = 19; $endPos = 24; } elsif($perlFileToBeCalled =~ /osload.*/i ) { $startPos = 1; $endPos = 18; } if( $nodeList and $nodeList ne "NA") { @hostnameList = split(",",$nodeList); } else { my $hostOls; olsnodes_call($crsHomeDir,$hostOls); @hostnameList = split("\n",$hostOls); } my @threadArray ; my $threadCount = 0; for my $hostname ( @hostnameList ) { # if a host has failed the last 3 executions then do not run on that host if ( $cache_host_list_ref and ref($cache_host_list_ref) and ref($cache_host_list_ref) =~ /HASH/i and $cache_host_list_ref->{$hostname} and ref($cache_host_list_ref->{$hostname}) and ref($cache_host_list_ref->{$hostname}) =~ /HASH/i and keys %{$cache_host_list_ref->{$hostname}} > 2 ) { EMD_PERL_WARN("clusterGeneric.pl:Skipping running commands on host $hostname as host as timed out over 3 times in previous runs\n"); next; } my $thr = threads->new(\&emresultCall,$hostname,$masterHostName,$scriptsDir,$fullpath,$startPos,$endPos); $threadArray[$threadCount] = $thr; $threadCount = $threadCount + 1; } my $i; for($i = 0;$i < $threadCount;$i++ ) { $threadArray[$i]->join; } #Call 'olsnodes' if node list is not passed sub olsnodes_call { my ( $local_crsHomedir ) = @_; $local_crsHomedir =~ s/^\s+|\s+$//g if $local_crsHomedir; my $cmd = 'olsnodes'; $local_crsHomedir = $ENV{CRS_HOME} if $ENV{CRS_HOME} and not $local_crsHomedir; $local_crsHomedir = $ENV{ORACLE_HOME} if $ENV{ORACLE_HOME} and not $local_crsHomedir; unless ( $local_crsHomedir ) { $local_crsHomedir = $ENV{EMDROOT} if $ENV{EMDROOT}; $cmd = 'lsnodes'; unless ( $local_crsHomedir ) { print "em_error= No nodes in the cluster \n\n"; exit(0); } } my $crsbin = catdir($local_crsHomedir,'bin'); my $olspath = catfile($crsbin,$cmd); my %command_args = (exit_failure_list => [()]); my $olsList =has::Common::runsystemcommand($olspath,'',\%command_args); if ( $command_args{command_return_status} ) { EMD_PERL_ERROR("Failed executing the command $olspath "); print "em_error= Failed executing the command $olspath \n\n"; exit(0); } chomp $olsList if $olsList; $_[1] = $olsList if $olsList; if(!$_[1]) { print "em_error= No nodes in the cluster \n\n"; exit(0); } } # rshCall sub rshcall{ my ( $hostname, $scriptsDir ,$fullpath ) = @_; my %command_args = (exit_failure_list => [()],timeout=>$script_timeout_secs,tries=>1,return_timeout=>1,no_alarm_reset=>1); my $resultArray = has::Common::runsystemcommand("/usr/bin/ssh -o FallBackToRsh=yes -o PasswordAuthentication=no -o NumberOfPasswordPrompts=0 -o StrictHostKeyChecking=yes $hostname perl -I$scriptsDir $fullpath",'',\%command_args); if(!$resultArray) { EMD_PERL_DEBUG("ssh not configured for $hostname... connecting with rsh \n"); %command_args = (exit_failure_list => [()],timeout=>$script_timeout_secs,tries=>1,return_timeout=>1,no_alarm_reset=>1); $resultArray = has::Common::runsystemcommand("/usr/bin/rsh $hostname perl -I$scriptsDir $fullpath",'',\%command_args); } $_[3] = $resultArray; if ( $command_args{command_timedout_mesg} ) { EMD_PERL_WARN("clusterGeneric.pl:Timedout executing command $fullpath on $hostname\n"); my $tm = time; $timeout_host_list{$hostname.'_separator_'.$tm} = 1; } } #end of call #em_result calculation function sub emresultCall { my ($hostname,$masterHostName,$scriptsDir,$fullpath,$startPos,$endPos) = @_; my @emAllNodesResult=0; if( $masterHostName and $hostname =~ /^$masterHostName$/ )#masteragent no need to do rsh { my %command_args = (exit_failure_list => [()],timeout=>$script_timeout_secs,tries=>1,return_timeout=>1,no_alarm_reset=>1); @emAllNodesResult = has::Common::runsystemcommand("perl -I$scriptsDir $fullpath",'',\%command_args); if ( $command_args{command_timedout_mesg} ) { EMD_PERL_WARN("clusterGeneric.pl:Timedout executing command $fullpath on master host $hostname\n"); my $tm = time; $timeout_host_list{$hostname.'_separator_'.$tm} = 1; } } else { #print "Connecting to the host...$hostname","\n"; my $thr_rshReturn =""; rshcall($hostname,$scriptsDir,$fullpath,$thr_rshReturn); @emAllNodesResult=split("\n",$thr_rshReturn); } my $predecessor = $startPos - 1; my $sucessor = $totalColumns - $endPos; for my $eachResult (@emAllNodesResult) { my ($temp,$resultForOneNode) = split("=",$eachResult); ($resultForOneNode,$temp)=split("\n",$resultForOneNode); my $emresult=$hostname."|"; my $pipelineRepeat = $pipeline x $predecessor; $emresult = $emresult.$pipelineRepeat; $emresult=$emresult.$resultForOneNode; $pipelineRepeat = $pipeline x $sucessor; $emresult=$emresult.$pipelineRepeat; ($emresult,$temp)=split("\n",$emresult); print "\n","em_result=$emresult","\n"; } } EMD_PERL_DEBUG("Ending $perlFileToBeCalled\n"); END { if ( $cache_file ) { if ( keys %timeout_host_list ) { for my $k ( keys %timeout_host_list ) { my ( $hn,$tm) = ( $k =~ /(.+)\_separator\_(.+)/ ); next unless $hn or $tm or $tm !~ /\d+/; $cache_host_list_ref->{$hn}{$tm}=1; } } my $timedoutdmp = Dumper($cache_host_list_ref); stat($cache_file); if ( -e $cache_file and not -w $cache_file ) { EMD_PERL_WARN("clusterGeneric.pl:Failed to write the failed hosts to cache file $cache_file, no write access \n"); } else { open(TFH,">$cache_file"); print TFH $timedoutdmp; close(TFH); } } }