Rem drv:
Rem
Rem $Header: emcore/source/oracle/sysman/emdrep/sql/core/latest/ping/ping_pkgbodys.sql /st_emcore_10.2.0.4.2db11.2/1 2009/02/26 20:39:36 jsadras Exp $
Rem
Rem ping_pkgbodys.sql
Rem
Rem Copyright (c) 2002, 2009, Oracle and/or its affiliates.
Rem All rights reserved.
Rem
Rem NAME
Rem ping_pkgbodys.sql -
Rem
Rem DESCRIPTION
Rem
Rem
Rem NOTES
Rem
Rem
Rem MODIFIED (MM/DD/YY)
Rem jsadras 11/12/08 - Perf fixes
Rem pmodi 07/31/07 - Backport -Bug-6267173 - Insert CLEAR on clean HB
Rem if current severity of Agent is DOWN
Rem pmodi 03/29/07 - Backport pmodi_bug-5924953+4773137 from main
Rem mkiran 02/10/07 - 5448666: Fix unrch_start_ts for
Rem potentially down agents
Rem scgrover 07/12/06 - Backport scgrover_bug-5043773 from main
Rem rpinnama 08/28/05 - Fix 4538620 : Call job system callback if any
Rem unrch severties are inserted
Rem rkpandey 07/29/05 - Modify update_emd_status
Rem rpinnama 07/28/05 - Fix 4415154 : Do not log unreach clear messages
Rem if they are before marker time
Rem rkpandey 07/21/05 - Bug 3369474: Handle agent_errors
Rem rpinnama 07/07/05 - Fix 3317965 : Lock availability while recording
Rem heartbeats
Rem scgrover 07/07/05 - add extended sql trace
Rem gsbhatia 07/01/05 - New repmgr header impl
Rem gsbhatia 06/26/05 - New repmgr header impl
Rem pmodi 06/20/05 - Bug:4389284- Change for NLS Message
Rem rpinnama 06/06/05 - Fix 4397891 : default down reason to 1024 chars
Rem so as to avoid chained rows on this table
Rem rpinnama 06/14/05 - Add trace messages
Rem jsadras 03/10/05 - Bug:4186866: Remove timezone_delta
Rem neearora 03/02/05 - Added entry for emdw_log
Rem rpinnama 02/17/05 - Guard against errors in job_engine package
Rem gsbhatia 02/13/05 - updating repmgr header
Rem gsbhatia 02/09/05 - updating repmgr header
Rem gsbhatia 02/07/05 - updating repmgr header
Rem scgrover 01/27/05 - move update statement
Rem ktlaw 01/11/05 - add repmgr header
Rem rpinnama 12/16/04 - Fix 4069628: Store the reverse ping job name
Rem snakai 11/22/04 - update beacon avail
Rem rpinnama 09/13/04 - mgmt_severity -> mgmt_violations
Rem gan 08/19/04 - use active mt count
Rem dsahrawa 06/22/04 - use MGMT_JOBS.get_job_schedule_record
Rem rpinnama 04/15/04 - Raise exception on ORA-3113, while recording
Rem lgloyd 11/12/03 -
Rem lgloyd 11/12/03 - perf: remove user defined fn calls
Rem rzazueta 10/30/03 - Add procedures for detailed logging
Rem rzazueta 10/29/03 - Use mgmt_global.elapsed_time_msec function
Rem rpinnama 10/15/03 - Remove special checks for host
Rem skini 09/09/03 - Change in schedule_record structure
Rem rzazueta 08/25/03 - Fix bug 2992498
Rem streddy 04/24/03 - Master agent changes
Rem aholser 04/09/03 - move set_inactive_time to sdk
Rem streddy 04/07/03 - Handle repository only targets when updating marker
Rem snakai 06/26/03 - do not update the marker for beacon avail targets
Rem rpinnama 05/05/03 - truncate the recorder url
Rem rpinnama 04/07/03 - FIx bug 2859446 : Improve hb record performance
Rem sgrover 03/19/03 - remove load_timestamp
Rem rpinnama 02/26/03 -
Rem rpinnama 03/14/03 - Log time taken in milliseconds
Rem ancheng 02/14/03 - target version fix
Rem rpinnama 02/06/03 - Support for unrch_start_time
Rem rpinnama 02/03/03 - Fix bug 2779377; Do not record heartbeats for
Rem potentially down agents.
Rem rpinnama 01/13/03 - Add support for disabling ping system
Rem rpinnama 11/08/02 - Add add_agent API
Rem rpinnama 11/01/02 - Record heartbeats without logging errors
Rem skini 11/07/02 - Check for hosts in pending status
Rem rpinnama 10/23/02 - Record cleanheartbeats and error counts
Rem rpinnama 10/18/02 - Use Agent Ping
Rem rpinnama 10/18/02 - Add performance logging
Rem rpinnama 10/17/02 - Reset DBMSAPPLICATION module
Rem rpinnama 10/16/02 - Fix metric versioning query
Rem rpinnama 10/14/02 - Discard the check for beacon targets while updating avail marker
Rem rpinnama 10/09/02 - Use category_props to get metric guid
Rem rpinnama 10/08/02 - Support for multiple metric versions
Rem skini 10/04/02 - Check for blackout windows before moving marker
Rem rpinnama 09/19/02 - Use target timezone for avail marker
Rem rpinnama 09/16/02 -
Rem rpinnama 09/12/02 - Use global functions for translating dates.
Rem rpinnama 09/10/02 - Use UTC date for last_heartbeat
Rem snakai 07/25/02 - beacon funcs/procs moved to pkg
Rem rpinnama 07/31/02 - insert severity records without notification status
Rem rpinnama 07/16/02 - Exception for target having beacon availability
Rem rpinnama 07/03/02 - Fix the unreachable message
Rem rpinnama 06/28/02 - Append error message to severity message
Rem rpinnama 06/21/02 - Log better error messages
Rem rpinnama 06/18/02 - Generate unrch_clear only if the ping status is DOWN
Rem skini 06/16/02 - Fix args to SCHEDULE_RECORD
Rem rpinnama 06/04/02 - Use the new batch heartbeat definition.
Rem rpinnama 05/15/02 - rpinnama_reorg_rep_scripts
Rem rpinnama 05/15/02 - Created
Rem
CREATE OR REPLACE PACKAGE BODY EM_PING AS
S_DEFAULT_DOWN_REASON_MSG mgmt_emd_ping.down_reason_msg%TYPE;
PROCEDURE agt_up_on_blk_end(p_target_guid RAW, p_collection_timestamp DATE);
--
-- PURPOSE
-- Procedure(s) to enable/disable/verify detailed logging
--
PROCEDURE enable_detailed_logging IS
BEGIN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('enable_detailed_logging:Entry', MODULE_NAME);
END IF;
BEGIN
INSERT INTO MGMT_PARAMETERS
(parameter_name, parameter_value, parameter_comment,
internal_flag)
VALUES
(DETAILED_PING_LOGGING, 'TRUE', DETAILED_PING_LOGGING_CMT, 1);
EXCEPTION
WHEN DUP_VAL_ON_INDEX THEN
UPDATE MGMT_PARAMETERS
SET parameter_value = 'TRUE'
WHERE parameter_name = DETAILED_PING_LOGGING;
END;
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('enable_detailed_logging:Exit', MODULE_NAME);
END IF;
END enable_detailed_logging;
PROCEDURE disable_detailed_logging IS
BEGIN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('disable_detailed_logging:Entry', MODULE_NAME);
END IF;
BEGIN
INSERT INTO MGMT_PARAMETERS
(parameter_name, parameter_value, parameter_comment,
internal_flag)
VALUES
(DETAILED_PING_LOGGING, 'FALSE', DETAILED_PING_LOGGING_CMT, 1);
EXCEPTION
WHEN DUP_VAL_ON_INDEX THEN
UPDATE MGMT_PARAMETERS
SET parameter_value = 'FALSE'
WHERE parameter_name = DETAILED_PING_LOGGING;
END;
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('disable_detailed_logging:Exit', MODULE_NAME);
END IF;
END disable_detailed_logging;
FUNCTION is_detailed_logging_enabled
RETURN INTEGER IS
l_retVal INTEGER := 0;
l_cnt INTEGER := 0;
BEGIN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('is_detailed_logging_enabled:Entry', MODULE_NAME);
END IF;
IF G_DETAILED_LOGGING IS NOT NULL THEN
IF (G_DETAILED_LOGGING) THEN
l_retVal := 1;
END IF;
ELSE
-- Query the table when once per session only
-- (when G_DETAILED_LOGGING is null)
SELECT COUNT(*) into l_cnt
FROM MGMT_PARAMETERS
WHERE parameter_name=DETAILED_PING_LOGGING
AND upper(parameter_value)='TRUE';
IF (l_cnt > 0) THEN
-- detailed_ping_logging property is TRUE
l_retVal := 1;
G_DETAILED_LOGGING := TRUE;
ELSE
G_DETAILED_LOGGING := FALSE;
END IF;
END IF;
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('is_detailed_logging_enabled:Exit l_retVal = '|| l_retVal, MODULE_NAME);
END IF;
RETURN l_retVal;
END is_detailed_logging_enabled;
--
-- PURPOSE
-- Procedure(s) to enable/disable/verify ping subsystem
--
PROCEDURE enable_ping_system IS
BEGIN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('enable_ping_system:Entry', MODULE_NAME);
END IF;
UPDATE MGMT_PARAMETERS
SET parameter_value = 'FALSE'
WHERE parameter_name = DISABLE_PING_SYSTEM_PROP;
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('enable_ping_system:Exit', MODULE_NAME);
END IF;
END enable_ping_system;
PROCEDURE disable_ping_system IS
BEGIN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('disable_ping_system:Entry', MODULE_NAME);
END IF;
BEGIN
INSERT INTO MGMT_PARAMETERS
(parameter_name, parameter_value, parameter_comment,
internal_flag)
VALUES
(DISABLE_PING_SYSTEM_PROP, 'TRUE', DISABLE_PING_SYSTEM_PROP_CMT, 1);
EXCEPTION
WHEN DUP_VAL_ON_INDEX THEN
UPDATE MGMT_PARAMETERS
SET parameter_value = 'TRUE'
WHERE parameter_name = DISABLE_PING_SYSTEM_PROP;
END;
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('disable_ping_system:Exit', MODULE_NAME);
END IF;
END disable_ping_system;
FUNCTION is_ping_system_enabled
RETURN INTEGER IS
l_retVal INTEGER := 1;
l_cnt INTEGER := 0;
BEGIN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('is_ping_system_enabled:Entry', MODULE_NAME);
END IF;
SELECT COUNT(*) into l_cnt
FROM MGMT_PARAMETERS
WHERE parameter_name=DISABLE_PING_SYSTEM_PROP
AND upper(parameter_value)='TRUE';
IF (l_cnt > 0) THEN
-- disable_ping_system_property is TRUE
l_retVal := 0;
END IF;
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('is_ping_system_enabled:Exit l_retVal = '|| l_retVal, MODULE_NAME);
END IF;
RETURN l_retVal;
END is_ping_system_enabled;
--
-- PURPOSE
-- Marks the specified EMD as UP in the ping tables.
-- Down reason code and message are cleared on marking the node as Up.
--
-- IN PARAMETERS
-- emd_url_in - specifies the EMD that needs to be marked up.
--
-- NOTES
-- This is private routine called only in this package
--
PROCEDURE mark_emd_up(v_target_guid_in IN RAW)
IS
BEGIN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('mark_emd_up:Entry v_target_guid_in =' || v_target_guid_in, MODULE_NAME);
END IF;
-- Update the status and
-- reset the last_checked time, so that the mark_node_stat
-- does not select this agent
UPDATE mgmt_emd_ping
SET status = NODE_STATUS_UP,
down_reason_code = 0,
down_reason_msg = S_DEFAULT_DOWN_REASON_MSG
WHERE target_guid = v_target_guid_in;
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('mark_emd_up:Exit', MODULE_NAME);
END IF;
END mark_emd_up;
--
-- PURPOSE
-- Marks the specified EMD as DOWN with the specified reason code
-- and message.
--
-- IN PARAMETERS
-- emd_url_in - specifies the EMD that needs to be marked as down.
-- down_reason_code_in - error code that specifies the down reason
-- down_reason_msg_in - error message that specifies the down reason
--
-- NOTES
-- This is private routine called only in this package
--
PROCEDURE mark_emd_down(target_guid_in IN RAW,
down_reason_code_in IN NUMBER,
down_reason_msg_in IN VARCHAR2)
IS
BEGIN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('mark_emd_down:Entry target_guid_in = '|| target_guid_in ||
' down_reason_code_in' || down_reason_code_in ||
' down_reason_msg_in ' || down_reason_msg_in, MODULE_NAME);
END IF;
UPDATE mgmt_emd_ping
SET status = NODE_STATUS_DOWN,
down_reason_code = down_reason_code_in,
down_reason_msg = down_reason_msg_in,
unrch_start_ts = NULL
WHERE target_guid = target_guid_in;
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('mark_emd_down:Exit', MODULE_NAME);
END IF;
END mark_emd_down;
--
-- PURPOSE
-- Marks the specified EMD as POTENTIALLY DOWN
--
-- IN PARAMETERS
-- emd_url_in - specifies the EMD that needs to be marked
--
-- NOTES
-- This is private routine called only in this package
--
PROCEDURE mark_emd_potentially_down(v_target_guid_in IN RAW)
IS
BEGIN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('mark_emd_potentially_down:Entry v_target_guid_in = ' ||
v_target_guid_in, MODULE_NAME);
END IF;
-- 5448666: There could be delays in recording agent heartbeats.
-- So, set unrch_start_ts to last_heartbeat_ts +1sec instead of
-- +max_inactive_time. It'll prevent raise of UNREACHABLE_START
-- and UNREACHABLE_CLEAR severities in wrong order.
UPDATE mgmt_emd_ping
SET status = NODE_STATUS_POTENTIALLY_DOWN,
down_reason_code = 0,
down_reason_msg = S_DEFAULT_DOWN_REASON_MSG,
unrch_start_ts = last_heartbeat_ts + (1/(60*60*24))
WHERE target_guid = v_target_guid_in;
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('mark_emd_potentially_down:Exit', MODULE_NAME);
END IF;
END mark_emd_potentially_down;
--
-- PURPOSE
-- Insert data for metric name/col 'Response'/'Status'.
-- Inserting data for this will trigger the availability information
-- for the specified target.
--
-- IN PARAMETERS
-- target_guid - target guid of the agent for which data is inserted.
-- collect_time_in - collection timestamp for the data
-- data_value_in - value of the metric data.
--
-- NOTES
-- This is private routine called only in this package
--
PROCEDURE insert_metric_data(target_guid_in IN RAW,
collect_time_in IN DATE,
data_value_in IN NUMBER)
IS
mguid MGMT_METRICS.metric_guid%TYPE;
CURSOR resp_stat_metric_guid (v_target_guid_in RAW) IS
SELECT metric_guid
FROM mgmt_metrics
WHERE target_type = MGMT_GLOBAL.G_AGENT_TARGET_TYPE
AND metric_name = MGMT_GLOBAL.G_AVAIL_METRIC_NAME
AND metric_column = MGMT_GLOBAL.G_AVAIL_METRIC_COLUMN;
BEGIN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('insert_metric_data:Entry target_guid_in = ' || target_guid_in ||
' collect_time_in = ' || collect_time_in ||
' data_value_in = ' || data_value_in, MODULE_NAME);
END IF;
-- get the Response / Status metric guid for the agent
OPEN resp_stat_metric_guid(target_guid_in);
FETCH resp_stat_metric_guid INTO mguid;
IF (resp_stat_metric_guid%ISOPEN) THEN
CLOSE resp_stat_metric_guid;
END IF;
EMD_LOADER.ADD_METRIC_DATA(target_guid_in, mguid, ' ', data_value_in, collect_time_in);
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('insert_metric_data:Exit', MODULE_NAME);
END IF;
END insert_metric_data;
--
-- PURPOSE
-- Insert severity data for metric name/col 'Response'/'Status'.
-- Inserting severity data for this metric will trigger the
-- availability information for the specified target.
--
-- IN PARAMETERS
-- target_guid_in - target guid for which data has to be inserted.
-- metric_guid_in - metric guid for which data has to be inserted.
-- collect_time_in - timestamp of when the severity was detected.
-- severity_code_in - specified whether the severity is CLEAR/ALERT.
--
-- NOTES
-- This is private routine called only in this package
--
FUNCTION insert_unrch_severity(log_action_name_in IN VARCHAR2,
target_guid_in IN RAW,
metric_guid_in IN RAW,
collect_time_in IN DATE,
severity_code_in IN NUMBER,
severity_msg_in IN VARCHAR2,
severity_msg_nlsid_in IN VARCHAR2 DEFAULT NULL,
severity_msg_params_in IN VARCHAR2 DEFAULT NULL)
RETURN NUMBER
IS
is_beacon_avail BOOLEAN := FALSE;
l_insert_succ NUMBER := 0;
BEGIN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('insert_unrch_severity:Entry log_action_name_in = ' || log_action_name_in ||
' target_guid_in = ' || target_guid_in ||
' metric_guid_in = ' || metric_guid_in ||
' collect_time_in = ' || to_char(collect_time_in, 'YYYY-MM-DD HH24:MI:SS') ||
' severity_code_in = ' || severity_code_in ||
' severity_msg_in = ' || severity_msg_in ||
' severity_msg_nlsid_in = ' || severity_msg_nlsid_in ||
' severity_msg_params_in = ' || severity_msg_params_in, MODULE_NAME);
END IF;
BEGIN
is_beacon_avail := MGMT_GENSVC_AVAIL.PROCESS_UNREACHABLE(target_guid_in);
IF NOT is_beacon_avail THEN
-- Insert a severity record.
INSERT INTO mgmt_violations
(target_guid, policy_guid, collection_timestamp,
violation_level, message, message_nlsid, message_params)
VALUES
(target_guid_in, metric_guid_in, collect_time_in,
severity_code_in, severity_msg_in, severity_msg_nlsid_in, severity_msg_params_in);
l_insert_succ := 1;
END IF;
EXCEPTION
WHEN MGMT_GLOBAL.target_deletion_in_prog THEN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('insert_unrch_severity: Ignoring target_deletion_in_prog exception' ||
' (target_guid = ' || target_guid_in || ')' ||
' (metric_guid = ' || metric_guid_in || ')' ||
' (severity_code = ' || severity_code_in || ')' ||
' (error msg = ' || SQLERRM || ')',
MODULE_NAME);
END IF;
-- Ignore exceptions for targets being deleted..
NULL;
WHEN MGMT_GLOBAL.duplicate_record THEN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('insert_unrch_severity: Ignoring duplicate_record exception' ||
' (target_guid = ' || target_guid_in || ')' ||
' (metric_guid = ' || metric_guid_in || ')' ||
' (severity_code = ' || severity_code_in || ')' ||
' (error msg = ' || SQLERRM || ')',
MODULE_NAME);
END IF;
-- Ignore exceptions for duplicate severities..
NULL;
WHEN MGMT_GLOBAL.severity_in_blackout THEN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('insert_unrch_severity: Ignoring severity_in_blackout exception' ||
' (target_guid = ' || target_guid_in || ')' ||
' (metric_guid = ' || metric_guid_in || ')' ||
' (severity_code = ' || severity_code_in || ')' ||
' (error msg = ' || SQLERRM || ')',
MODULE_NAME);
END IF;
-- Ignore exceptions for severities inserted during blackout
NULL;
WHEN MGMT_GLOBAL.invalid_unrch_clr_viol THEN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('insert_unrch_severity: Ignoring invalid_unrch_clr_viol exception' ||
' (target_guid = ' || target_guid_in || ')' ||
' (metric_guid = ' || metric_guid_in || ')' ||
' (severity_code = ' || severity_code_in || ')' ||
' (error msg = ' || SQLERRM || ')',
MODULE_NAME);
END IF;
-- Ignore exceptions for unreach clear severities inserted with errors
NULL;
WHEN OTHERS THEN
mgmt_log.log_error(log_action_name_in, E_INSERT_SEVERITY,
EM_INSERT_SEVERITY ||
' (target_guid = ' || target_guid_in ||
') (metric_guid = ' || metric_guid_in ||
') (severity_code = ' || severity_code_in ||
') (error msg = ' || SQLERRM || ')');
NULL; -- continue inserting severities for other targets..
END;
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('insert_unrch_severity: Exit insert_succ = ' || l_insert_succ,
MODULE_NAME);
END IF;
RETURN l_insert_succ;
END insert_unrch_severity;
--
-- PURPOSE
-- Insert severity data for metric name/col 'Response'/'Status'.
-- Inserting severity data for this metric will trigger the
-- availability information for the specified target.
--
-- IN PARAMETERS
-- target_name_in - name of the host for which data has to be inserted.
-- collect_time_in - timestamp of when the severity was detected.
-- severity_code_in - specified whether the severity is CLEAR/ALERT.
--
-- NOTES
-- This is private routine called only in this package
--
FUNCTION insert_unrch_severities(log_action_name_in IN VARCHAR2,
emd_url_in IN VARCHAR2,
collect_time_in IN DATE,
severity_code_in IN NUMBER,
severity_msg_in IN VARCHAR2,
severity_msg_nlsid_in IN VARCHAR2 DEFAULT NULL,
severity_msg_params_in IN VARCHAR2 DEFAULT NULL)
RETURN NUMBER
IS
mguid MGMT_METRICS.metric_guid%TYPE;
tguids t_guid_list;
mguids t_guid_list;
ttypes t_target_type_list;
agent_guid MGMT_TARGETS.target_guid%TYPE;
l_insert_succ NUMBER := 0;
l_insert_cnt NUMBER := 0;
CURSOR targets_on_agent_cur(emd_url_in VARCHAR2) IS
SELECT t.target_guid, t.target_type, m.metric_guid
FROM mgmt_targets t, mgmt_metrics m
WHERE t.target_type = m.target_type
AND m.type_meta_ver = t.type_meta_ver
AND (m.category_prop_1 = t.category_prop_1 OR m.category_prop_1 = ' ')
AND (m.category_prop_2 = t.category_prop_2 OR m.category_prop_2 = ' ')
AND (m.category_prop_3 = t.category_prop_3 OR m.category_prop_3 = ' ')
AND (m.category_prop_4 = t.category_prop_4 OR m.category_prop_4 = ' ')
AND (m.category_prop_5 = t.category_prop_5 OR m.category_prop_5 = ' ')
AND t.emd_url = emd_url_in
AND t.rep_side_avail = 0
AND m.metric_name = MGMT_GLOBAL.G_AVAIL_METRIC_NAME
AND m.metric_column = MGMT_GLOBAL.G_AVAIL_METRIC_COLUMN;
BEGIN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('insert_unrch_severities:Entry log_action_name_in = ' || log_action_name_in ||
' emd_url_in = ' || emd_url_in ||
' collect_time_in = ' || TO_CHAR(collect_time_in, HEARTBEAT_TIME_FORMAT) ||
' severity_code_in = ' || severity_code_in ||
' severity_msg_in = ' || severity_msg_in, MODULE_NAME);
END IF;
IF (emd_url_in IS NOT NULL) THEN
-- In the case of multi-agent targets, if this agent happens to be
-- the current master agent for some targets, then we need to pick
-- someother agent that is currently up as the master agent.
IF (severity_code_in = MGMT_GLOBAL.G_SEVERITY_UNREACHABLE_START) THEN
SELECT target_guid INTO agent_guid
FROM mgmt_targets
WHERE emd_url = emd_url_in
AND target_type = mgmt_global.G_AGENT_TARGET_TYPE;
em_master_agent.process_agent_avail_change(agent_guid,
MGMT_GLOBAL.G_STATUS_UNREACHABLE,
collect_time_in);
END IF;
-- Lock the availability records before generating
-- severities
EM_SEVERITY.lock_avail_for_agt(emd_url_in);
OPEN targets_on_agent_cur (emd_url_in);
FETCH targets_on_agent_cur BULK COLLECT
INTO tguids, ttypes, mguids;
IF (targets_on_agent_cur%ISOPEN) THEN
CLOSE targets_on_agent_cur;
END IF;
END IF;
IF (EMDW_LOG.p_is_debug_set) THEN
EMDW_LOG.debug('Got targets on agent..', MODULE_NAME);
END IF;
IF (tguids.EXISTS(1)) THEN
FOR ctr IN tguids.FIRST..tguids.LAST LOOP
IF (EMDW_LOG.p_is_debug_set) THEN
EMDW_LOG.debug('Inserting sev for ttype ' || ttypes(ctr), MODULE_NAME);
END IF;
l_insert_succ := insert_unrch_severity(log_action_name_in, tguids(ctr), mguids(ctr),
collect_time_in, severity_code_in, severity_msg_in,
severity_msg_nlsid_in, severity_msg_params_in );
l_insert_cnt := l_insert_cnt + l_insert_succ;
END LOOP;
END IF;
IF (tguids IS NOT NULL) THEN
tguids.DELETE;
END IF;
IF (ttypes IS NOT NULL) THEN
ttypes.DELETE;
END IF;
IF (mguids IS NOT NULL) THEN
mguids.DELETE;
END IF;
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('insert_unrch_severities:Exit insert_cnt = ' || l_insert_cnt,
MODULE_NAME);
END IF;
RETURN l_insert_cnt;
END insert_unrch_severities;
--
-- PURPOSE
-- Insert severity data for metric name/col 'Response'/'Status'
-- on targets where current availability is unreachable.
-- Inserting severity data for this metric will trigger the
-- availability information for the specified target.
--
-- IN PARAMETERS
-- emd_url_in - emd url of targets for which data has to be inserted.
-- collect_time_in - timestamp of when the severity was detected.
-- severity_code_in - specified whether the severity is CLEAR/ALERT.
--
-- NOTES
-- This is private routine called only in this package
--
FUNCTION insert_unrch_severities2(log_action_name_in IN VARCHAR2,
emd_url_in IN VARCHAR2,
collect_time_in IN DATE,
severity_code_in IN NUMBER,
severity_msg_in IN VARCHAR2)
RETURN NUMBER
IS
tguids t_guid_list;
mguids t_guid_list;
ttypes t_target_type_list;
agent_guid MGMT_TARGETS.target_guid%TYPE;
l_insert_cnt NUMBER := 0;
l_insert_succ NUMBER := 0;
CURSOR unrch_targets_on_agent_cur(emd_url_in VARCHAR2) IS
SELECT t.target_guid, t.target_type, m.metric_guid
FROM mgmt_targets t, mgmt_metrics m, mgmt_current_availability a
WHERE t.target_type = m.target_type
AND t.target_guid = a.target_guid
AND a.current_status = MGMT_GLOBAL.G_STATUS_UNREACHABLE
AND m.type_meta_ver = t.type_meta_ver
AND (m.category_prop_1 = t.category_prop_1 OR m.category_prop_1 = ' ')
AND (m.category_prop_2 = t.category_prop_2 OR m.category_prop_2 = ' ')
AND (m.category_prop_3 = t.category_prop_3 OR m.category_prop_3 = ' ')
AND (m.category_prop_4 = t.category_prop_4 OR m.category_prop_4 = ' ')
AND (m.category_prop_5 = t.category_prop_5 OR m.category_prop_5 = ' ')
AND t.emd_url = emd_url_in
AND t.rep_side_avail = 0
AND m.metric_name = MGMT_GLOBAL.G_AVAIL_METRIC_NAME
AND m.metric_column = MGMT_GLOBAL.G_AVAIL_METRIC_COLUMN;
BEGIN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('insert_unrch_severities2:Entry log_action_name_in = ' || log_action_name_in ||
' emd_url_in = ' || emd_url_in ||
' collect_time_in = ' || collect_time_in ||
' severity_code_in = ' || severity_code_in ||
' severity_msg_in = ' || severity_msg_in, MODULE_NAME);
END IF;
IF (emd_url_in IS NOT NULL) THEN
OPEN unrch_targets_on_agent_cur (emd_url_in);
FETCH unrch_targets_on_agent_cur BULK COLLECT
INTO tguids, ttypes, mguids;
IF (unrch_targets_on_agent_cur%ISOPEN) THEN
CLOSE unrch_targets_on_agent_cur;
END IF;
END IF;
IF (EMDW_LOG.p_is_debug_set) THEN
EMDW_LOG.debug('Got targets on agent..', MODULE_NAME);
END IF;
IF (tguids.EXISTS(1)) THEN
FOR ctr IN tguids.FIRST..tguids.LAST LOOP
IF (EMDW_LOG.p_is_debug_set) THEN
EMDW_LOG.debug('Inserting sev for ttype ' || ttypes(ctr), MODULE_NAME);
END IF;
l_insert_succ := insert_unrch_severity(log_action_name_in, tguids(ctr), mguids(ctr),
collect_time_in, severity_code_in, severity_msg_in);
l_insert_cnt := l_insert_cnt + l_insert_succ;
END LOOP;
END IF;
IF (tguids IS NOT NULL) THEN
tguids.DELETE;
END IF;
IF (ttypes IS NOT NULL) THEN
ttypes.DELETE;
END IF;
IF (mguids IS NOT NULL) THEN
mguids.DELETE;
END IF;
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('insert_unrch_severities2:Exit insert_cnt = ' || l_insert_cnt,
MODULE_NAME);
END IF;
RETURN l_insert_cnt;
END insert_unrch_severities2;
--
-- PURPOSE
-- Procedure to bootstrap the ping sub system.
-- This procedure sets up the last_checked_ts for all EMDs to current time.
--
PROCEDURE start_ping_system
IS
l_grace_value NUMBER;
BEGIN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('start_ping_system:Entry', MODULE_NAME);
END IF;
BEGIN
SELECT to_number(parameter_value) INTO l_grace_value
FROM mgmt_parameters
WHERE parameter_name = G_PING_START_GRACE_PROP;
EXCEPTION
WHEN OTHERS THEN
l_grace_value := G_DEFAULT_PING_START_GRACE;
END;
IF (is_ping_system_enabled > 0) THEN
UPDATE mgmt_emd_ping_check
SET last_checked_utc = CAST(SYS_EXTRACT_UTC(SYSTIMESTAMP) AS DATE)
+ l_grace_value/(24*60*60);
END IF;
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('start_ping_system:Exit', MODULE_NAME);
END IF;
END start_ping_system;
PROCEDURE add_target(p_target_guid IN RAW,
p_timezone_region IN VARCHAR2) IS
l_start_time DATE := NULL;
BEGIN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('add_target:Entry p_target_guid = ' || p_target_guid ||
' p_timezone_region = ' || p_timezone_region, MODULE_NAME);
END IF;
l_start_time := TRUNC( (MGMT_GLOBAL.SYSDATE_TZRGN(p_timezone_region) - 32), 'DD') ;
-- Bug-5955813 : Default last HB time to 32 days back.
INSERT INTO MGMT_EMD_PING
(target_guid, clean_heartbeat_utc, status_sync_utc, emd_uptime_utc,
down_reason_code, down_reason_msg, last_heartbeat_ts)
VALUES
(p_target_guid, l_start_time, l_start_time, l_start_time,
0, S_DEFAULT_DOWN_REASON_MSG, l_start_time);
INSERT INTO MGMT_EMD_PING_CHECK
(target_guid, last_checked_utc)
VALUES
(p_target_guid, MGMT_GLOBAL.SYSDATE_UTC);
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('add_target:Exit', MODULE_NAME);
END IF;
END add_target;
--
--
-- PURPOSE
-- Procedure to bulk update the heartbeats of the Agents
--
PROCEDURE record_batch_heartbeat(
v_agent_hb_arr_in IN EM_AGENT_HEARTBEAT_ARRAY,
v_error_cnt OUT NUMBER,
v_error_info OUT EM_AGENT_HEARTBEAT_RESP_ARRAY) IS
l_agent_errors VARCHAR2(256);
l_agent_guid MGMT_EMD_PING.target_guid%TYPE;
l_agent_status MGMT_EMD_PING.status%TYPE;
l_agent_tzrgn MGMT_TARGETS.timezone_region%TYPE;
l_last_heartbeat_time MGMT_EMD_PING.clean_heartbeat_utc%TYPE;
l_last_unrch_start_ts MGMT_EMD_PING.clean_heartbeat_utc%TYPE;
l_heartbeat_time MGMT_EMD_PING.clean_heartbeat_utc%TYPE;
l_heartbeat_time_utc MGMT_EMD_PING.clean_heartbeat_utc%TYPE;
l_sync_upto_time MGMT_EMD_PING.status_sync_utc%TYPE;
l_sync_upto_time_utc MGMT_EMD_PING.status_sync_utc%TYPE;
l_emd_uptime_utc MGMT_EMD_PING.emd_uptime_utc%TYPE;
l_error_code NUMBER;
l_error_msg VARCHAR2(5000);
l_error_msg_nlsid mgmt_violations.message_nlsid%TYPE;
l_error_msg_params mgmt_violations.message_params%TYPE;
l_start_timestmp TIMESTAMP := SYSTIMESTAMP;
l_clean_hbs NUMBER := 0;
l_dirty_hbs NUMBER := 0;
l_prev_down_reason MGMT_EMD_PING.down_reason_msg%TYPE;
l_down_reason MGMT_EMD_PING.down_reason_msg%TYPE;
l_insert_cnt NUMBER := 0;
BEGIN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('record_batch_heartbeat:Entry', MODULE_NAME);
END IF;
--
-- If ping system is disabled, do not record the heartbeats
--
IF (is_ping_system_enabled <= 0) THEN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('record_batch_heartbeat:Exit PING system disabled', MODULE_NAME);
END IF;
RETURN;
END IF;
v_error_cnt := 0;
v_error_info := EM_AGENT_HEARTBEAT_RESP_ARRAY();
l_agent_status := NODE_STATUS_DOWN;
IF (EMDW_LOG.p_is_debug_set) THEN
EMDW_LOG.debug('Entered record_batch_heartbeat procedure :' ||
TO_CHAR(SYSDATE, 'DD-MON-YYYY HH24:MI:SS'), MODULE_NAME);
END IF;
FOR i IN 1..v_agent_hb_arr_in.count
LOOP
BEGIN
IF (EMDW_LOG.p_is_debug_set) THEN
EMDW_LOG.debug('Getting agentguid for i = ' ||
v_agent_hb_arr_in(i).emd_url, MODULE_NAME);
END IF;
l_insert_cnt := 0;
-- GET AGENT GUID and lock the row..
SELECT ping.target_guid, tgt.timezone_region, ping.status, ping.down_reason_msg,
ping.unrch_start_ts, ping.last_heartbeat_ts
INTO l_agent_guid, l_agent_tzrgn, l_agent_status, l_prev_down_reason,
l_last_unrch_start_ts, l_last_heartbeat_time
FROM mgmt_targets tgt, mgmt_emd_ping ping
WHERE tgt.target_guid = ping.target_guid
AND tgt.target_type = MGMT_GLOBAL.G_AGENT_TARGET_TYPE
AND tgt.emd_url = v_agent_hb_arr_in(i).emd_url
FOR UPDATE OF ping.target_guid;
-- perf: replace with direct system function call below to avoid context switch
-- associated with user defined functions
--
-- l_emd_uptime_utc := MGMT_GLOBAL.TO_UTC(
-- to_date(v_agent_hb_arr_in(i).emd_uptime,
-- HEARTBEAT_TIME_FORMAT),
-- l_agent_tzrgn);
l_emd_uptime_utc := CAST(FROM_TZ(CAST(to_date(v_agent_hb_arr_in(i).emd_uptime,
HEARTBEAT_TIME_FORMAT) as TIMESTAMP),
l_agent_tzrgn)
AT TIME ZONE 'GMT' AS DATE);
l_heartbeat_time := to_date(v_agent_hb_arr_in(i).heartbeat_time,
HEARTBEAT_TIME_FORMAT);
--BUG:5955813 - If new HB time is less then prev HB make new HB time same as
-- last unrch start ts/last HB time
-- We don''t do any spl handling for DST fallback time. Even if DST time gets rolled from
-- 1:59:59 to 1:00:59, we change the new HB time to 1:59:59.
IF (l_heartbeat_time < l_last_heartbeat_time) THEN
l_heartbeat_time := NVL(l_last_unrch_start_ts, l_last_heartbeat_time) ;
END IF;
IF (TRIM(v_agent_hb_arr_in(i).agent_errors) IS NOT NULL) THEN
l_down_reason := AGENT_UNRCH_WITH_ERRORS;
l_down_reason := REPLACE(l_down_reason, '{0}', v_agent_hb_arr_in(i).agent_errors);
l_error_msg := AGENT_UNRCH_WITH_ERRORS;
l_error_msg := REPLACE(l_error_msg, '{0}', v_agent_hb_arr_in(i).agent_errors);
l_error_msg_nlsid := NLSID_AGENT_UNRCH_WITH_ERRS;
l_error_msg_params := v_agent_hb_arr_in(i).agent_errors ;
IF (l_agent_status != NODE_STATUS_DOWN) THEN
-- mark the agent as down
UPDATE mgmt_emd_ping
SET last_heartbeat_ts = l_heartbeat_time,
last_heartbeat_utc = CAST(SYS_EXTRACT_UTC(SYSTIMESTAMP) AS DATE),
emd_uptime_utc = l_emd_uptime_utc,
heartbeat_recorder_url = SUBSTR(v_agent_hb_arr_in(i).recorder_url, 0, 256),
status = NODE_STATUS_DOWN,
down_reason_msg = l_down_reason,
down_reason_code = CODE_AGENT_UNRCH_WITH_ERR,
unrch_start_ts = l_heartbeat_time,
ping_job_name = NULL,
job_submit_time = NULL
WHERE target_guid = l_agent_guid;
-- Insert unreachable start severity records.
l_insert_cnt := insert_unrch_severities(HEARTBEAT_RECORD_ACTION,
v_agent_hb_arr_in(i).emd_url,
l_heartbeat_time,
MGMT_GLOBAL.G_SEVERITY_UNREACHABLE_START,
SUBSTR(l_error_msg, 0, 400),
l_error_msg_nlsid,
l_error_msg_params);
-- Call job system Call back.
MGMT_JOB_ENGINE.handle_emd_state_change(v_agent_hb_arr_in(i).emd_url, NODE_STATUS_DOWN);
ELSIF (l_down_reason != l_prev_down_reason) THEN
-- record the severity as the down error has changed.
l_insert_cnt := insert_unrch_severities(HEARTBEAT_RECORD_ACTION,
v_agent_hb_arr_in(i).emd_url,
l_heartbeat_time,
MGMT_GLOBAL.G_SEVERITY_UNREACHABLE_START,
SUBSTR(l_error_msg, 0, 4000),
l_error_msg_nlsid,
l_error_msg_params);
END IF;
ELSIF (v_agent_hb_arr_in(i).is_clean_hb = 'Y') THEN
l_clean_hbs := l_clean_hbs + 1;
-- perf: replace with direct system function call below to avoid context switch
-- associated with user defined functions
--
-- l_heartbeat_time_utc := MGMT_GLOBAL.TO_UTC(l_heartbeat_time,
-- l_agent_tzrgn);
l_heartbeat_time_utc := CAST(FROM_TZ(CAST(l_heartbeat_time as TIMESTAMP),
l_agent_tzrgn)
AT TIME ZONE 'GMT' AS DATE);
l_sync_upto_time := to_date(v_agent_hb_arr_in(i).status_sent_upto,
HEARTBEAT_TIME_FORMAT);
-- perf: replace with direct system function call below to avoid context switch
-- associated with user defined functions
--
-- l_sync_upto_time_utc := MGMT_GLOBAL.TO_UTC(l_sync_upto_time,
-- l_agent_tzrgn);
l_sync_upto_time_utc := CAST(FROM_TZ(CAST(l_sync_upto_time as TIMESTAMP),
l_agent_tzrgn)
AT TIME ZONE 'GMT' AS DATE);
-- RECORD 'CLEAN' HEARTBEAT
UPDATE mgmt_emd_ping
SET last_heartbeat_ts = l_heartbeat_time,
last_heartbeat_utc = CAST(SYS_EXTRACT_UTC(SYSTIMESTAMP) AS DATE),
clean_heartbeat_utc = l_heartbeat_time_utc,
status_sync_utc = l_sync_upto_time_utc,
emd_uptime_utc = l_emd_uptime_utc,
heartbeat_recorder_url = SUBSTR(v_agent_hb_arr_in(i).recorder_url, 0, 256),
unrch_start_ts = null
WHERE target_guid = l_agent_guid;
agt_up_on_blk_end(p_target_guid => l_agent_guid,
p_collection_timestamp => l_heartbeat_time) ;
-- MARK the agent as UP if the current PING status is NOT UP..
IF NOT (l_agent_status = NODE_STATUS_UP) THEN
-- mark_emd_up(l_agent_guid);
-- Set the ping status of the node as UP,
-- DONOT reset the job details..
UPDATE mgmt_emd_ping
SET status = NODE_STATUS_UP,
down_reason_code = 0,
down_reason_msg = S_DEFAULT_DOWN_REASON_MSG
WHERE target_guid = l_agent_guid;
END IF;
IF (l_agent_status = NODE_STATUS_DOWN) THEN
-- Insert data and severity records based on last heartbeat
-- as the Agent was up and heartbeating at that time.
BEGIN
insert_metric_data(l_agent_guid,
l_heartbeat_time,
1);
EXCEPTION
WHEN OTHERS THEN
mgmt_log.log_error(HEARTBEAT_RECORD_ACTION, E_UNKNOWN_HEARTBEAT,
'Error inserting metric data for tguid = ' ||
l_agent_guid || ' ERROR = ' || SQLERRM );
END;
-- Insert unreachable clear severity records.
l_insert_cnt := insert_unrch_severities(HEARTBEAT_RECORD_ACTION,
v_agent_hb_arr_in(i).emd_url,
l_heartbeat_time,
MGMT_GLOBAL.G_SEVERITY_UNREACHABLE_CLEAR,
AGENT_UNRCH_CLEAR_MSG);
ELSE
-- Insert unreachable clear severity records on targets where
-- current availability is unreachable
l_insert_cnt := insert_unrch_severities2(HEARTBEAT_RECORD_ACTION,
v_agent_hb_arr_in(i).emd_url,
l_heartbeat_time,
MGMT_GLOBAL.G_SEVERITY_UNREACHABLE_CLEAR,
AGENT_UNRCH_CLEAR_MSG);
END IF; -- IF NOT (l_agent_status = UP)
IF (l_insert_cnt > 0) THEN
-- Call job system callback, if any unrch clears are inserted
MGMT_JOB_ENGINE.handle_emd_state_change(
v_agent_hb_arr_in(i).emd_url, NODE_STATUS_UP);
END IF;
-- Get availability lock to update availability marker
EM_SEVERITY.lock_avail_for_agt(p_emd_url => v_agent_hb_arr_in(i).emd_url);
-- TO DO: Use l_sync_utc timestamp in GMT..
UPDATE MGMT_AVAILABILITY_MARKER
SET marker_timestamp = l_sync_upto_time
WHERE marker_timestamp <= l_sync_upto_time
AND target_guid IN (SELECT /*+ INDEX(t mgmt_targets_idx_02) */ t.target_guid
FROM mgmt_targets t
WHERE t.emd_url = v_agent_hb_arr_in(i).emd_url
AND t.rep_side_avail = 0
AND NOT EXISTS (SELECT 1 from mgmt_blackout_windows bw
WHERE bw.target_guid = t.target_guid
AND bw.start_time <= l_sync_upto_time
AND (bw.status NOT IN
(MGMT_BLACKOUT_ENGINE.BLK_STATE_STOPPED,
MGMT_BLACKOUT_ENGINE.BLK_STATE_ENDED)
)
)
AND NOT EXISTS (SELECT 1 from MGMT_GENSVC_AVAIL_CONFIG bcnav
WHERE bcnav.target_guid = t.target_guid)
);
ELSE
-- RECORD 'DIRTY' HEARTBEAT..
l_dirty_hbs := l_dirty_hbs + 1;
UPDATE mgmt_emd_ping
SET last_heartbeat_ts = l_heartbeat_time,
last_heartbeat_utc = CAST(SYS_EXTRACT_UTC(SYSTIMESTAMP) AS DATE),
emd_uptime_utc = l_emd_uptime_utc,
heartbeat_recorder_url = SUBSTR(v_agent_hb_arr_in(i).recorder_url, 0, 256)
WHERE target_guid = l_agent_guid;
END IF;
COMMIT; -- commit, so that the lock is gone
EXCEPTION
WHEN NO_DATA_FOUND THEN
ROLLBACK; -- rollback any changes done for this Agent
-- Dont log this error as this is a valid case
-- during agent bootstrap
l_error_msg := SQLERRM;
v_error_cnt := v_error_cnt + 1;
v_error_info.extend(1);
v_error_info(v_error_cnt) :=
EM_AGENT_HEARTBEAT_RESP(v_agent_hb_arr_in(i).emd_url,
0, l_error_msg);
WHEN OTHERS THEN
ROLLBACK; -- rollback any changes done for this Agent
IF (EMDW_LOG.p_is_debug_set) THEN
EMDW_LOG.debug('Error received processing emd_url = ' ||
v_agent_hb_arr_in(i).emd_url || ' message = ' ||
SQLERRM, MODULE_NAME);
END IF;
-- If 3113, raise_exception and retry
l_error_code := SQLCODE;
IF (l_error_code = -3113) THEN
-- Stop recording and raise the exception on ORA-03113 error.
RAISE;
ELSE
l_error_msg := SQLERRM;
mgmt_log.log_error(HEARTBEAT_RECORD_ACTION, E_UNKNOWN_HEARTBEAT,
EM_UNKNOWN_HEARTBEAT || v_agent_hb_arr_in(i).emd_url ||
'. ' || l_error_msg);
v_error_cnt := v_error_cnt + 1;
v_error_info.extend(1);
v_error_info(v_error_cnt) :=
EM_AGENT_HEARTBEAT_RESP(v_agent_hb_arr_in(i).emd_url,
0, l_error_msg);
END IF;
-- CONTINUE processing other heartbeats..
END;
END LOOP;
IF (is_detailed_logging_enabled > 0) THEN
-- Log the time taken
MGMT_LOG.LOG_PERFORMANCE(HEARTBEAT_RECORD_ACTION,
MGMT_GLOBAL.ELAPSED_TIME_MSEC(l_start_timestmp, SYSTIMESTAMP),
l_start_timestmp, 'Y', HEARTBEAT_CNT_NAME, v_agent_hb_arr_in.count);
MGMT_LOG.LOG_PERFORMANCE(HEARTBEAT_RECORD_ACTION, 0,
l_start_timestmp, 'N', CLEAN_HEARTBEAT_CNT_NAME, l_clean_hbs);
MGMT_LOG.LOG_PERFORMANCE(HEARTBEAT_RECORD_ACTION, 0,
l_start_timestmp, 'N', DIRTY_HEARTBEAT_CNT_NAME, l_dirty_hbs);
MGMT_LOG.LOG_PERFORMANCE(HEARTBEAT_RECORD_ACTION, 0,
l_start_timestmp, 'N', HEARTBEAT_ERROR_CNT_NAME, v_error_info.count);
END IF;
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('record_batch_heartbeat:Exit v_error_cnt = ' || v_error_cnt, MODULE_NAME);
END IF;
END record_batch_heartbeat;
PROCEDURE DBMSJOB_EXTENDED_SQL_TRACE_ON(p_value IN BOOLEAN) IS
BEGIN
MGMT_SQLTRACE.EXTENDED_SQL_TRACE_ON(EST_PING_NAME, p_value);
END DBMSJOB_EXTENDED_SQL_TRACE_ON;
--
--
-- PURPOSE
-- Procedure to mark the nodes as UP or 'potentially' DOWN based on
-- whether the EMD performed heartbeat in the last
-- max_emd_inactive_time seconds.
--
-- Pick up nodes that are either down or UP, not 'potentially' DOWN.
--
PROCEDURE mark_node_status
IS
--
-- This cursor gets the list of HOSTs that are not blacked out AND
-- that are marked as UP and not sent a heartbeat in the last max_inactive_time OR
--
CURSOR emd_list(c_curr_date_utc DATE) IS
SELECT /*+ INDEX(ping mgmt_emd_ping_idx_01) */
tgt.target_guid, tgt.emd_url, ping.status
FROM mgmt_emd_ping ping,
mgmt_emd_ping_check pingc,
mgmt_targets tgt,
mgmt_current_availability cavail
WHERE ping.target_guid = tgt.target_guid
AND ping.target_guid = cavail.target_guid
AND ping.target_guid = pingc.target_guid
AND tgt.target_type = MGMT_GLOBAL.G_AGENT_TARGET_TYPE
AND ping.max_inactive_time > 0
AND cavail.current_status != MGMT_GLOBAL.G_STATUS_BLACKOUT
AND ping.status = NODE_STATUS_UP
AND ping.ping_job_name IS NULL
AND ( (c_curr_date_utc-ping.last_heartbeat_utc)*24*60*60 >
ping.max_inactive_time)
AND ( (c_curr_date_utc-pingc.last_checked_utc)*24*60*60 >=
(ping.max_inactive_time)/2)
ORDER BY tgt.emd_url;
l_target_guid MGMT_TARGETS.target_guid%TYPE;
l_job_name MGMT_JOB.job_name%TYPE;
l_job_targets MGMT_JOB_TARGET_LIST;
l_job_params MGMT_JOB_PARAM_LIST;
l_job_id MGMT_JOB.job_id%TYPE;
l_execution_id MGMT_JOB_EXEC_SUMMARY.execution_id%TYPE;
l_schedule MGMT_JOB_SCHEDULE_RECORD;
l_down_emd_cnt NUMBER := 0;
l_down_emd_list MGMT_JOB_VECTOR_PARAMS := MGMT_JOB_VECTOR_PARAMS();
l_hb_rcdr_cnt NUMBER := 0;
l_updated_lscheck NUMBER := 0;
l_curr_date DATE;
l_tmstmptz TIMESTAMP WITH TIME ZONE;
l_error_code NUMBER;
l_error_msg VARCHAR2(5000);
l_start_timestmp TIMESTAMP := SYSTIMESTAMP;
BEGIN
MGMT_SQLTRACE.EXTENDED_SQL_TRACE(EST_PING_NAME);
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('mark_node_status:Entry', MODULE_NAME);
END IF;
-- If ping system is not enabled, do not process the heartbeats..
DBMS_APPLICATION_INFO.SET_MODULE(
module_name => MODULE_NAME,
action_name => MARK_NODE_STATUS_ACTION);
SELECT SYS_EXTRACT_UTC(SYSTIMESTAMP) INTO l_tmstmptz FROM DUAL;
l_curr_date := CAST(l_tmstmptz AS DATE);
l_job_name := PINGCFM_JOBNAME_PREFIX || SYS_GUID();
BEGIN
l_hb_rcdr_cnt := 0;
l_updated_lscheck := 0;
BEGIN
--
-- Call job system to process emd queue entries.
-- Should be done before as this would drop some
-- inactive middletiers.
--
MGMT_JOB_ENGINE.process_emd_queue_entries;
EXCEPTION
WHEN OTHERS THEN
mgmt_log.log_error(MARK_NODE_STATUS_ACTION, e_process_emd_queue,
em_process_emd_queue || SQLERRM);
END;
-- If no hearbeart recorders are registered, no status check
-- should be done.
l_hb_rcdr_cnt := MGMT_FAILOVER.get_active_mt_count;
-- If ping system is not enabled, do not process the heartbeats..
IF ( (l_hb_rcdr_cnt > 0) AND (is_ping_system_enabled > 0) ) THEN
FOR emd_list_rec IN emd_list(l_curr_date) LOOP
l_target_guid := emd_list_rec.target_guid;
-- Lock and update
BEGIN
SELECT target_guid INTO l_target_guid
FROM mgmt_emd_ping
WHERE target_guid = l_target_guid
FOR UPDATE NOWAIT;
-- 5448666: There could be delays in recording agent heartbeats.
-- So, set unrch_start_ts to last_heartbeat_ts +1sec instead of
-- +max_inactive_time. It'll prevent raise of UNREACHABLE_START
-- and UNREACHABLE_CLEAR severities in wrong order.
UPDATE mgmt_emd_ping
SET status = NODE_STATUS_POTENTIALLY_DOWN,
down_reason_code = 0,
down_reason_msg = S_DEFAULT_DOWN_REASON_MSG,
ping_job_name = l_job_name,
job_submit_time = SYSDATE,
unrch_start_ts = last_heartbeat_ts + (1/(60*60*24))
WHERE target_guid = l_target_guid;
l_down_emd_cnt := l_down_emd_cnt + 1;
l_down_emd_list.extend(1);
l_down_emd_list(l_down_emd_cnt) := emd_list_rec.emd_url;
EXCEPTION
WHEN NO_DATA_FOUND THEN
-- Dont log this error, as this could happen
-- if the target is being deleted..
NULL;
WHEN OTHERS THEN
l_error_code := SQLCODE;
IF (l_error_code != -54) THEN
-- Donot log Resource busy (ORA-00054) error
mgmt_log.log_error(MARK_NODE_STATUS_ACTION, e_lock_update_status,
em_lock_update_status || emd_list_rec.emd_url || '. ' || SQLERRM);
END IF;
END;
END LOOP;
--
-- Submit job with all potentially down EMDs.
--
IF (l_down_emd_cnt > 0) THEN
-- This job has no targets.
l_job_targets := MGMT_JOB_TARGET_LIST();
-- Its only parameter is the emd_url_list
l_job_params := MGMT_JOB_PARAM_LIST();
l_job_params.extend(1);
l_job_params(1) := MGMT_JOB_PARAM_RECORD(PINGCFM_JOBPARAM_NAME, 0, null,
l_down_emd_list);
-- Its schedule is immediate.
l_schedule := MGMT_JOBS.get_job_schedule_record(MGMT_JOBS.ONE_TIME_FREQUENCY_CODE,
SYSDATE, null, 0, 0, 0, null, null,
MGMT_JOBS.TIMEZONE_REPOSITORY,
0, 0, null);
-- Submit the job as system job so that system thread pool is used
MGMT_JOBS.submit_job(l_job_name,
PINGCFM_JOB_DESCRIPTION,
PINGCFM_JOBTYPE,
l_job_targets,
l_job_params,
l_schedule,
l_job_id,
l_execution_id,
USER,
MGMT_JOB_ENGINE.SYSTEM_JOB_RETRY);
END IF;
--
-- Update the last status checked time stamp for UP and DOWN EMDs
-- only, dont update the timestamp for potentially down EMDs
--
-- move to end of if so the rows in the table are not locked that long.
UPDATE mgmt_emd_ping_check
SET last_checked_utc = l_curr_date
WHERE target_guid IN (SELECT ping.target_guid
FROM mgmt_emd_ping ping, mgmt_emd_ping_check pingc
WHERE ping.target_guid = pingc.target_guid
AND ping.max_inactive_time > 0
AND ( (ping.status = NODE_STATUS_UP) OR
(ping.status = NODE_STATUS_DOWN) )
AND (l_curr_date - pingc.last_checked_utc)*24*60*60 >=
(ping.max_inactive_time)/2);
l_updated_lscheck := 1;
COMMIT; -- commit all ping changes and submit_job
-- Cleanup the dead job from mgmt_emd_ping table
-- This proc gets called every minute but execute following
-- stmt only once in an hour i.e on 59th Minute.
IF TO_NUMBER(TO_CHAR(SYSDATE, 'MI')) = 59 THEN
FOR ping_rec IN (SELECT p.target_guid
FROM mgmt_emd_ping p
WHERE ping_job_name IS NOT NULL
AND NOT EXISTS ( SELECT NULL
FROM MGMT_JOB j
WHERE j.job_name = p.ping_job_name )
)
LOOP
UPDATE mgmt_emd_ping
SET ping_job_name = NULL,
job_submit_time = NULL,
status = NODE_STATUS_UP
WHERE target_guid = ping_rec.target_guid;
COMMIT;
END LOOP;
END IF;
END IF; -- If (l_hb_rcdr_cnt > 0) (ping system enabled > 0)
EXCEPTION
WHEN OTHERS THEN
ROLLBACK;
IF (l_updated_lscheck = 0) THEN
mgmt_log.log_error(MARK_NODE_STATUS_ACTION, e_mark_node_status,
em_mark_node_status || SQLERRM);
ELSE
mgmt_log.log_error(MARK_NODE_STATUS_ACTION, e_pingjob_submission,
em_pingjob_submission || SQLERRM);
END IF;
END;
-- Log the time taken
MGMT_LOG.LOG_PERFORMANCE(MARK_NODE_STATUS_ACTION,
MGMT_GLOBAL.ELAPSED_TIME_MSEC(l_start_timestmp, SYSTIMESTAMP),
l_start_timestmp, 'Y', MARKED_AGENT_CNT_NAME, l_down_emd_cnt);
-- Reset the module info
DBMS_APPLICATION_INFO.SET_MODULE(
module_name => ' ',
action_name => ' ');
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('mark_node_status:Exit', MODULE_NAME);
END IF;
END mark_node_status;
--
-- Procedure to record the failure details for potentailly down
--
PROCEDURE update_emd_status
(
v_emd_url_in IN VARCHAR2,
v_ping_status_in IN NUMBER,
v_error_msg_1_in IN VARCHAR2 DEFAULT NULL,
v_error_msg_2_in IN VARCHAR2 DEFAULT NULL
)
IS
l_target_guid MGMT_TARGETS.target_guid%TYPE;
l_timezone_region MGMT_TARGETS.timezone_region%TYPE;
l_avail_status MGMT_AVAILABILITY.current_status%TYPE;
l_status MGMT_EMD_PING.status%TYPE;
l_max_inactive_time MGMT_EMD_PING.max_inactive_time%TYPE;
l_last_heartbeat_ts MGMT_EMD_PING.last_heartbeat_ts%TYPE;
l_unrch_start_ts MGMT_EMD_PING.unrch_start_ts%TYPE;
l_start_timestmp TIMESTAMP := SYSTIMESTAMP;
l_error_msg VARCHAR2(32576);
l_error_msg_nlsid mgmt_violations.message_nlsid%TYPE;
l_error_msg_params mgmt_violations.message_params%TYPE;
l_insert_cnt NUMBER := 0;
BEGIN
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('update_emd_status:Entry v_emd_url = ' || v_emd_url_in ||
' v_ping_status_in = ' || v_ping_status_in ||
' v_error_msg_1_in = ' || v_error_msg_1_in ||
' v_error_msg_2_in = ' || v_error_msg_2_in, MODULE_NAME);
END IF;
BEGIN
BEGIN
SELECT p.target_guid, t.timezone_region, p.status, p.max_inactive_time,
p.last_heartbeat_ts, p.unrch_start_ts, cavail.current_status
INTO l_target_guid, l_timezone_region, l_status, l_max_inactive_time,
l_last_heartbeat_ts, l_unrch_start_ts, l_avail_status
FROM mgmt_emd_ping p, mgmt_targets t, mgmt_current_availability cavail
WHERE t.emd_url = v_emd_url_in
AND p.target_guid = t.target_guid
AND p.target_guid = cavail.target_guid
-- AND p.status = NODE_STATUS_POTENTIALLY_DOWN
AND t.target_type = MGMT_GLOBAL.G_AGENT_TARGET_TYPE
FOR UPDATE OF p.target_guid;
EXCEPTION
WHEN NO_DATA_FOUND THEN
-- Dont log this error, as this could happen
-- if the metadata is already deleted for the agent
-- or if the agent started heartbeating and is marked as UP.
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('update_emd_status:Exit Either metadata is already deleted for' ||
' the agent or agent started heartbeating and is marked as UP', MODULE_NAME);
END IF;
RETURN;
END;
IF (l_status = NODE_STATUS_UP) THEN
-- The ping status of the node is already UP, Do nothing
-- This can happen because the agent can start heartbeating before the reverse-ping job
-- finished.
-- Since, we already received a clean heartbeat, retain the status as UP
-- just cleanup the ping job details..
UPDATE mgmt_emd_ping
SET ping_job_name = NULL,
job_submit_time = NULL
WHERE target_guid = l_target_guid;
ELSIF (l_status = NODE_STATUS_DOWN) THEN
-- The ping status of the node is already DOWN, Do nothing
-- This can happen because another reverse ping job might have already
-- marked the node as DOWN
-- So, leave the node status as DOWN, clean up the ping job details..
UPDATE mgmt_emd_ping
SET ping_job_name = NULL,
job_submit_time = NULL
WHERE target_guid = l_target_guid;
ELSE
-- The ping status of the node is POTENTIALLY DOWN,
--
-- If reverse ping succeeded or the node is currently in BLACKOUT,
-- set the ping status to UP
--
IF ( (v_ping_status_in = 0) OR
(l_avail_status = MGMT_GLOBAL.G_STATUS_BLACKOUT) ) THEN
IF (EMDW_LOG.p_is_debug_set) THEN
EMDW_LOG.debug('Marking the Agent ' ||
v_emd_url_in || ' as UP.', MODULE_NAME) ;
END IF;
-- mark_emd_up(l_target_guid);
-- As the ping succeeded, mark the ping status of the node as UP
-- Reset down reason details, unreach time and reverse ping job details
UPDATE mgmt_emd_ping
SET status = NODE_STATUS_UP,
down_reason_code = 0,
down_reason_msg = S_DEFAULT_DOWN_REASON_MSG,
unrch_start_ts = NULL,
ping_job_name = NULL,
job_submit_time = NULL
WHERE target_guid = l_target_guid;
ELSE
IF (EMDW_LOG.p_is_debug_set) THEN
EMDW_LOG.debug('Updating the down reason for Agent ' ||
v_emd_url_in, MODULE_NAME );
END IF;
-- Format a severity msg based on the v_ping_status_in as follows:
-- v_ping_status_in = -1 Agent Unreachable, Host Up.
-- v_ping_status_in = -2 Agent Unreachable, Host Unreachable.
-- v_ping_status_in = -3 Agent configured to upload to different repository.
-- v_ping_status_in = -4 Broken Agent to OMS communication.
IF v_ping_status_in = CODE_AGENT_UNRCH_HOST_UP THEN
l_error_msg := AGENT_UNRCH_HOST_UP ;
l_error_msg := REPLACE(l_error_msg, '{0}', v_error_msg_1_in);
l_error_msg_nlsid := NLSID_AGENT_UNRCH_HOST_UP ;
l_error_msg_params := v_error_msg_1_in ;
ELSIF v_ping_status_in = CODE_AGENT_UNRCH_HOST_UNRCH THEN
l_error_msg := AGENT_UNRCH_HOST_UNRCH;
l_error_msg := REPLACE(l_error_msg, '{0}', v_error_msg_1_in);
l_error_msg := REPLACE(l_error_msg, '{1}', v_error_msg_2_in);
l_error_msg_nlsid := NLSID_AGENT_UNRCH_HOST_UNRCH;
l_error_msg_params := v_error_msg_1_in ||'&'||v_error_msg_2_in;
ELSIF v_ping_status_in = CODE_AGENT_UNRCH_MISCFGD THEN
l_error_msg := AGENT_UNRCH_MISCFGD;
l_error_msg_nlsid := NLSID_AGENT_UNRCH_MISCFGD ;
l_error_msg_params := NULL;
ELSIF v_ping_status_in = CODE_BROKEN_AGENT_OMS_COMM THEN
l_error_msg := BROKEN_AGENT_OMS_COMM;
l_error_msg := REPLACE(l_error_msg, '{0}', v_error_msg_1_in);
l_error_msg_nlsid := NLSID_BROKEN_AGENT_OMS_COMM;
l_error_msg_params := v_error_msg_1_in;
END IF;
-- mark_emd_down(l_target_guid, p_agent_status, l_error_msg);
-- Reverse ping failed, so record the details,
-- Reset the ping job details
--Donot rset unrch start ts as we need it when recording next HB and to use the same
-- if HB clock has moved back in time.
UPDATE mgmt_emd_ping
SET status = NODE_STATUS_DOWN,
down_reason_code = v_ping_status_in,
down_reason_msg = l_error_msg,
ping_job_name = NULL,
job_submit_time = NULL
WHERE target_guid = l_target_guid;
-- 5448666: There could be delays in recording agent heartbeats.
-- So, set unrch_start_ts to last_heartbeat_ts +1sec instead of
-- +max_inactive_time. It'll prevent raise of UNREACHABLE_START
-- and UNREACHABLE_CLEAR severities in wrong order.
IF (l_unrch_start_ts IS NULL) THEN
l_unrch_start_ts := l_last_heartbeat_ts + (1/(60*60*24));
END IF;
BEGIN
insert_metric_data(l_target_guid,
l_unrch_start_ts,
0);
EXCEPTION
WHEN OTHERS THEN
mgmt_log.log_error(UPDATE_AGENT_ACTION, e_update_status,
'Error inserting metric data for tguid = ' ||
l_target_guid || ' ERROR = ' || SQLERRM );
END;
-- It is ONLY now can we confirm that the node is down.
-- So we should use the current targets timestamp here.
--
-- Agent has been in unreachability from lastHeartBeat + (inactive_time/2)
-- Both unreachable start and unreachble end should use the same clock
-- to avoid clock skew problems.
l_insert_cnt := insert_unrch_severities(UPDATE_AGENT_ACTION,
v_emd_url_in,
l_unrch_start_ts,
MGMT_GLOBAL.G_SEVERITY_UNREACHABLE_START,
SUBSTR(l_error_msg, 0, 4000),
l_error_msg_nlsid,
l_error_msg_params
);
-- Call job system Call back.
MGMT_JOB_ENGINE.handle_emd_state_change(v_emd_url_in, NODE_STATUS_DOWN);
END IF;
END IF;
EXCEPTION
WHEN OTHERS THEN
mgmt_log.log_error(UPDATE_AGENT_ACTION, e_update_status,
em_update_status || v_emd_url_in || '. ' || SQLERRM);
RAISE_APPLICATION_ERROR(-20000, 'EMD = ' || v_emd_url_in ||
' ERROR = ' || SQLERRM);
END;
-- Log the time taken
MGMT_LOG.LOG_PERFORMANCE(UPDATE_AGENT_ACTION,
MGMT_GLOBAL.ELAPSED_TIME_MSEC(l_start_timestmp, SYSTIMESTAMP),
l_start_timestmp, 'Y', CONFIRMED_AGENT_CNT_NAME, 1);
IF (EMDW_LOG.p_is_info_set)THEN
EMDW_LOG.info('update_emd_status:Exit', MODULE_NAME);
END IF;
END update_emd_status;
FUNCTION get_default_down_reason_msg
RETURN VARCHAR2
IS
l_down_reason_msg mgmt_emd_ping.down_reason_msg%TYPE;
BEGIN
-- Start with 32 chars 12345678901234567890123456789012
l_down_reason_msg := ' ';
l_down_reason_msg := l_down_reason_msg || l_down_reason_msg; -- 64 chars
l_down_reason_msg := l_down_reason_msg || l_down_reason_msg; -- 128 chars
l_down_reason_msg := l_down_reason_msg || l_down_reason_msg; -- 256 chars
l_down_reason_msg := l_down_reason_msg || l_down_reason_msg; -- 512 chars
l_down_reason_msg := l_down_reason_msg || l_down_reason_msg; -- 1024 chars
RETURN l_down_reason_msg;
END get_default_down_reason_msg;
/*
** Bug6267173 - On blackout end, Agent does not state its own new state upon
** clearstate. The only option is to force CLEAR severity on receipt of CLEAN HB and if
** current severity is DOWN
*/
PROCEDURE agt_up_on_blk_end(p_target_guid RAW, p_collection_timestamp DATE)
IS
l_chk NUMBER := 0;
BEGIN
-- To speed up the HB recording process Agent R/S guid has been stored as constant
SELECT count(*)
INTO l_chk
FROM mgmt_current_violation
WHERE target_guid = p_target_guid
AND policy_guid = AGT_RS_METRIC_GUID
AND violation_level = MGMT_GLOBAL.G_SEVERITY_CRITICAL ;
IF l_chk = 1 THEN
BEGIN
MGMT_VIOLATION.log_violation
(
p_target_guid => p_target_guid,
p_policy_guid => AGT_RS_METRIC_GUID,
p_collection_timestamp => p_collection_timestamp,
p_violation_level => MGMT_GLOBAL.G_SEVERITY_CLEAR,
p_message => AGENT_CLR_OMS_DETECTED,
p_message_nlsid => 'EM__SYS__PING_AGENT_CLR_OMS_DETECTED' );
EXCEPTION
WHEN OTHERS THEN
IF (EMDW_LOG.p_is_debug_set)THEN
EMDW_LOG.debug('agt_up_on_blk_end: Error inserting CLEAR for Agent' ||
' (target_guid = ' || p_target_guid || ')' ||
' (metric_guid = ' || AGT_RS_METRIC_GUID || ')' ||
' (error msg = ' || SQLERRM || ')',
MODULE_NAME);
END IF;
END;
END IF;
END agt_up_on_blk_end;
BEGIN
S_DEFAULT_DOWN_REASON_MSG := get_default_down_reason_msg;
END EM_PING;
/
show errors;