Rem drv: Rem Rem $Header: emcore/source/oracle/sysman/emdrep/sql/core/latest/ping/ping_pkgbodys.sql /st_emcore_10.2.0.4.2db11.2/1 2009/02/26 20:39:36 jsadras Exp $ Rem Rem ping_pkgbodys.sql Rem Rem Copyright (c) 2002, 2009, Oracle and/or its affiliates. Rem All rights reserved. Rem Rem NAME Rem ping_pkgbodys.sql - Rem Rem DESCRIPTION Rem Rem Rem NOTES Rem Rem Rem MODIFIED (MM/DD/YY) Rem jsadras 11/12/08 - Perf fixes Rem pmodi 07/31/07 - Backport -Bug-6267173 - Insert CLEAR on clean HB Rem if current severity of Agent is DOWN Rem pmodi 03/29/07 - Backport pmodi_bug-5924953+4773137 from main Rem mkiran 02/10/07 - 5448666: Fix unrch_start_ts for Rem potentially down agents Rem scgrover 07/12/06 - Backport scgrover_bug-5043773 from main Rem rpinnama 08/28/05 - Fix 4538620 : Call job system callback if any Rem unrch severties are inserted Rem rkpandey 07/29/05 - Modify update_emd_status Rem rpinnama 07/28/05 - Fix 4415154 : Do not log unreach clear messages Rem if they are before marker time Rem rkpandey 07/21/05 - Bug 3369474: Handle agent_errors Rem rpinnama 07/07/05 - Fix 3317965 : Lock availability while recording Rem heartbeats Rem scgrover 07/07/05 - add extended sql trace Rem gsbhatia 07/01/05 - New repmgr header impl Rem gsbhatia 06/26/05 - New repmgr header impl Rem pmodi 06/20/05 - Bug:4389284- Change for NLS Message Rem rpinnama 06/06/05 - Fix 4397891 : default down reason to 1024 chars Rem so as to avoid chained rows on this table Rem rpinnama 06/14/05 - Add trace messages Rem jsadras 03/10/05 - Bug:4186866: Remove timezone_delta Rem neearora 03/02/05 - Added entry for emdw_log Rem rpinnama 02/17/05 - Guard against errors in job_engine package Rem gsbhatia 02/13/05 - updating repmgr header Rem gsbhatia 02/09/05 - updating repmgr header Rem gsbhatia 02/07/05 - updating repmgr header Rem scgrover 01/27/05 - move update statement Rem ktlaw 01/11/05 - add repmgr header Rem rpinnama 12/16/04 - Fix 4069628: Store the reverse ping job name Rem snakai 11/22/04 - update beacon avail Rem rpinnama 09/13/04 - mgmt_severity -> mgmt_violations Rem gan 08/19/04 - use active mt count Rem dsahrawa 06/22/04 - use MGMT_JOBS.get_job_schedule_record Rem rpinnama 04/15/04 - Raise exception on ORA-3113, while recording Rem lgloyd 11/12/03 - Rem lgloyd 11/12/03 - perf: remove user defined fn calls Rem rzazueta 10/30/03 - Add procedures for detailed logging Rem rzazueta 10/29/03 - Use mgmt_global.elapsed_time_msec function Rem rpinnama 10/15/03 - Remove special checks for host Rem skini 09/09/03 - Change in schedule_record structure Rem rzazueta 08/25/03 - Fix bug 2992498 Rem streddy 04/24/03 - Master agent changes Rem aholser 04/09/03 - move set_inactive_time to sdk Rem streddy 04/07/03 - Handle repository only targets when updating marker Rem snakai 06/26/03 - do not update the marker for beacon avail targets Rem rpinnama 05/05/03 - truncate the recorder url Rem rpinnama 04/07/03 - FIx bug 2859446 : Improve hb record performance Rem sgrover 03/19/03 - remove load_timestamp Rem rpinnama 02/26/03 - Rem rpinnama 03/14/03 - Log time taken in milliseconds Rem ancheng 02/14/03 - target version fix Rem rpinnama 02/06/03 - Support for unrch_start_time Rem rpinnama 02/03/03 - Fix bug 2779377; Do not record heartbeats for Rem potentially down agents. Rem rpinnama 01/13/03 - Add support for disabling ping system Rem rpinnama 11/08/02 - Add add_agent API Rem rpinnama 11/01/02 - Record heartbeats without logging errors Rem skini 11/07/02 - Check for hosts in pending status Rem rpinnama 10/23/02 - Record cleanheartbeats and error counts Rem rpinnama 10/18/02 - Use Agent Ping Rem rpinnama 10/18/02 - Add performance logging Rem rpinnama 10/17/02 - Reset DBMSAPPLICATION module Rem rpinnama 10/16/02 - Fix metric versioning query Rem rpinnama 10/14/02 - Discard the check for beacon targets while updating avail marker Rem rpinnama 10/09/02 - Use category_props to get metric guid Rem rpinnama 10/08/02 - Support for multiple metric versions Rem skini 10/04/02 - Check for blackout windows before moving marker Rem rpinnama 09/19/02 - Use target timezone for avail marker Rem rpinnama 09/16/02 - Rem rpinnama 09/12/02 - Use global functions for translating dates. Rem rpinnama 09/10/02 - Use UTC date for last_heartbeat Rem snakai 07/25/02 - beacon funcs/procs moved to pkg Rem rpinnama 07/31/02 - insert severity records without notification status Rem rpinnama 07/16/02 - Exception for target having beacon availability Rem rpinnama 07/03/02 - Fix the unreachable message Rem rpinnama 06/28/02 - Append error message to severity message Rem rpinnama 06/21/02 - Log better error messages Rem rpinnama 06/18/02 - Generate unrch_clear only if the ping status is DOWN Rem skini 06/16/02 - Fix args to SCHEDULE_RECORD Rem rpinnama 06/04/02 - Use the new batch heartbeat definition. Rem rpinnama 05/15/02 - rpinnama_reorg_rep_scripts Rem rpinnama 05/15/02 - Created Rem CREATE OR REPLACE PACKAGE BODY EM_PING AS S_DEFAULT_DOWN_REASON_MSG mgmt_emd_ping.down_reason_msg%TYPE; PROCEDURE agt_up_on_blk_end(p_target_guid RAW, p_collection_timestamp DATE); -- -- PURPOSE -- Procedure(s) to enable/disable/verify detailed logging -- PROCEDURE enable_detailed_logging IS BEGIN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('enable_detailed_logging:Entry', MODULE_NAME); END IF; BEGIN INSERT INTO MGMT_PARAMETERS (parameter_name, parameter_value, parameter_comment, internal_flag) VALUES (DETAILED_PING_LOGGING, 'TRUE', DETAILED_PING_LOGGING_CMT, 1); EXCEPTION WHEN DUP_VAL_ON_INDEX THEN UPDATE MGMT_PARAMETERS SET parameter_value = 'TRUE' WHERE parameter_name = DETAILED_PING_LOGGING; END; IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('enable_detailed_logging:Exit', MODULE_NAME); END IF; END enable_detailed_logging; PROCEDURE disable_detailed_logging IS BEGIN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('disable_detailed_logging:Entry', MODULE_NAME); END IF; BEGIN INSERT INTO MGMT_PARAMETERS (parameter_name, parameter_value, parameter_comment, internal_flag) VALUES (DETAILED_PING_LOGGING, 'FALSE', DETAILED_PING_LOGGING_CMT, 1); EXCEPTION WHEN DUP_VAL_ON_INDEX THEN UPDATE MGMT_PARAMETERS SET parameter_value = 'FALSE' WHERE parameter_name = DETAILED_PING_LOGGING; END; IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('disable_detailed_logging:Exit', MODULE_NAME); END IF; END disable_detailed_logging; FUNCTION is_detailed_logging_enabled RETURN INTEGER IS l_retVal INTEGER := 0; l_cnt INTEGER := 0; BEGIN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('is_detailed_logging_enabled:Entry', MODULE_NAME); END IF; IF G_DETAILED_LOGGING IS NOT NULL THEN IF (G_DETAILED_LOGGING) THEN l_retVal := 1; END IF; ELSE -- Query the table when once per session only -- (when G_DETAILED_LOGGING is null) SELECT COUNT(*) into l_cnt FROM MGMT_PARAMETERS WHERE parameter_name=DETAILED_PING_LOGGING AND upper(parameter_value)='TRUE'; IF (l_cnt > 0) THEN -- detailed_ping_logging property is TRUE l_retVal := 1; G_DETAILED_LOGGING := TRUE; ELSE G_DETAILED_LOGGING := FALSE; END IF; END IF; IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('is_detailed_logging_enabled:Exit l_retVal = '|| l_retVal, MODULE_NAME); END IF; RETURN l_retVal; END is_detailed_logging_enabled; -- -- PURPOSE -- Procedure(s) to enable/disable/verify ping subsystem -- PROCEDURE enable_ping_system IS BEGIN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('enable_ping_system:Entry', MODULE_NAME); END IF; UPDATE MGMT_PARAMETERS SET parameter_value = 'FALSE' WHERE parameter_name = DISABLE_PING_SYSTEM_PROP; IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('enable_ping_system:Exit', MODULE_NAME); END IF; END enable_ping_system; PROCEDURE disable_ping_system IS BEGIN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('disable_ping_system:Entry', MODULE_NAME); END IF; BEGIN INSERT INTO MGMT_PARAMETERS (parameter_name, parameter_value, parameter_comment, internal_flag) VALUES (DISABLE_PING_SYSTEM_PROP, 'TRUE', DISABLE_PING_SYSTEM_PROP_CMT, 1); EXCEPTION WHEN DUP_VAL_ON_INDEX THEN UPDATE MGMT_PARAMETERS SET parameter_value = 'TRUE' WHERE parameter_name = DISABLE_PING_SYSTEM_PROP; END; IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('disable_ping_system:Exit', MODULE_NAME); END IF; END disable_ping_system; FUNCTION is_ping_system_enabled RETURN INTEGER IS l_retVal INTEGER := 1; l_cnt INTEGER := 0; BEGIN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('is_ping_system_enabled:Entry', MODULE_NAME); END IF; SELECT COUNT(*) into l_cnt FROM MGMT_PARAMETERS WHERE parameter_name=DISABLE_PING_SYSTEM_PROP AND upper(parameter_value)='TRUE'; IF (l_cnt > 0) THEN -- disable_ping_system_property is TRUE l_retVal := 0; END IF; IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('is_ping_system_enabled:Exit l_retVal = '|| l_retVal, MODULE_NAME); END IF; RETURN l_retVal; END is_ping_system_enabled; -- -- PURPOSE -- Marks the specified EMD as UP in the ping tables. -- Down reason code and message are cleared on marking the node as Up. -- -- IN PARAMETERS -- emd_url_in - specifies the EMD that needs to be marked up. -- -- NOTES -- This is private routine called only in this package -- PROCEDURE mark_emd_up(v_target_guid_in IN RAW) IS BEGIN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('mark_emd_up:Entry v_target_guid_in =' || v_target_guid_in, MODULE_NAME); END IF; -- Update the status and -- reset the last_checked time, so that the mark_node_stat -- does not select this agent UPDATE mgmt_emd_ping SET status = NODE_STATUS_UP, down_reason_code = 0, down_reason_msg = S_DEFAULT_DOWN_REASON_MSG WHERE target_guid = v_target_guid_in; IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('mark_emd_up:Exit', MODULE_NAME); END IF; END mark_emd_up; -- -- PURPOSE -- Marks the specified EMD as DOWN with the specified reason code -- and message. -- -- IN PARAMETERS -- emd_url_in - specifies the EMD that needs to be marked as down. -- down_reason_code_in - error code that specifies the down reason -- down_reason_msg_in - error message that specifies the down reason -- -- NOTES -- This is private routine called only in this package -- PROCEDURE mark_emd_down(target_guid_in IN RAW, down_reason_code_in IN NUMBER, down_reason_msg_in IN VARCHAR2) IS BEGIN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('mark_emd_down:Entry target_guid_in = '|| target_guid_in || ' down_reason_code_in' || down_reason_code_in || ' down_reason_msg_in ' || down_reason_msg_in, MODULE_NAME); END IF; UPDATE mgmt_emd_ping SET status = NODE_STATUS_DOWN, down_reason_code = down_reason_code_in, down_reason_msg = down_reason_msg_in, unrch_start_ts = NULL WHERE target_guid = target_guid_in; IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('mark_emd_down:Exit', MODULE_NAME); END IF; END mark_emd_down; -- -- PURPOSE -- Marks the specified EMD as POTENTIALLY DOWN -- -- IN PARAMETERS -- emd_url_in - specifies the EMD that needs to be marked -- -- NOTES -- This is private routine called only in this package -- PROCEDURE mark_emd_potentially_down(v_target_guid_in IN RAW) IS BEGIN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('mark_emd_potentially_down:Entry v_target_guid_in = ' || v_target_guid_in, MODULE_NAME); END IF; -- 5448666: There could be delays in recording agent heartbeats. -- So, set unrch_start_ts to last_heartbeat_ts +1sec instead of -- +max_inactive_time. It'll prevent raise of UNREACHABLE_START -- and UNREACHABLE_CLEAR severities in wrong order. UPDATE mgmt_emd_ping SET status = NODE_STATUS_POTENTIALLY_DOWN, down_reason_code = 0, down_reason_msg = S_DEFAULT_DOWN_REASON_MSG, unrch_start_ts = last_heartbeat_ts + (1/(60*60*24)) WHERE target_guid = v_target_guid_in; IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('mark_emd_potentially_down:Exit', MODULE_NAME); END IF; END mark_emd_potentially_down; -- -- PURPOSE -- Insert data for metric name/col 'Response'/'Status'. -- Inserting data for this will trigger the availability information -- for the specified target. -- -- IN PARAMETERS -- target_guid - target guid of the agent for which data is inserted. -- collect_time_in - collection timestamp for the data -- data_value_in - value of the metric data. -- -- NOTES -- This is private routine called only in this package -- PROCEDURE insert_metric_data(target_guid_in IN RAW, collect_time_in IN DATE, data_value_in IN NUMBER) IS mguid MGMT_METRICS.metric_guid%TYPE; CURSOR resp_stat_metric_guid (v_target_guid_in RAW) IS SELECT metric_guid FROM mgmt_metrics WHERE target_type = MGMT_GLOBAL.G_AGENT_TARGET_TYPE AND metric_name = MGMT_GLOBAL.G_AVAIL_METRIC_NAME AND metric_column = MGMT_GLOBAL.G_AVAIL_METRIC_COLUMN; BEGIN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('insert_metric_data:Entry target_guid_in = ' || target_guid_in || ' collect_time_in = ' || collect_time_in || ' data_value_in = ' || data_value_in, MODULE_NAME); END IF; -- get the Response / Status metric guid for the agent OPEN resp_stat_metric_guid(target_guid_in); FETCH resp_stat_metric_guid INTO mguid; IF (resp_stat_metric_guid%ISOPEN) THEN CLOSE resp_stat_metric_guid; END IF; EMD_LOADER.ADD_METRIC_DATA(target_guid_in, mguid, ' ', data_value_in, collect_time_in); IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('insert_metric_data:Exit', MODULE_NAME); END IF; END insert_metric_data; -- -- PURPOSE -- Insert severity data for metric name/col 'Response'/'Status'. -- Inserting severity data for this metric will trigger the -- availability information for the specified target. -- -- IN PARAMETERS -- target_guid_in - target guid for which data has to be inserted. -- metric_guid_in - metric guid for which data has to be inserted. -- collect_time_in - timestamp of when the severity was detected. -- severity_code_in - specified whether the severity is CLEAR/ALERT. -- -- NOTES -- This is private routine called only in this package -- FUNCTION insert_unrch_severity(log_action_name_in IN VARCHAR2, target_guid_in IN RAW, metric_guid_in IN RAW, collect_time_in IN DATE, severity_code_in IN NUMBER, severity_msg_in IN VARCHAR2, severity_msg_nlsid_in IN VARCHAR2 DEFAULT NULL, severity_msg_params_in IN VARCHAR2 DEFAULT NULL) RETURN NUMBER IS is_beacon_avail BOOLEAN := FALSE; l_insert_succ NUMBER := 0; BEGIN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('insert_unrch_severity:Entry log_action_name_in = ' || log_action_name_in || ' target_guid_in = ' || target_guid_in || ' metric_guid_in = ' || metric_guid_in || ' collect_time_in = ' || to_char(collect_time_in, 'YYYY-MM-DD HH24:MI:SS') || ' severity_code_in = ' || severity_code_in || ' severity_msg_in = ' || severity_msg_in || ' severity_msg_nlsid_in = ' || severity_msg_nlsid_in || ' severity_msg_params_in = ' || severity_msg_params_in, MODULE_NAME); END IF; BEGIN is_beacon_avail := MGMT_GENSVC_AVAIL.PROCESS_UNREACHABLE(target_guid_in); IF NOT is_beacon_avail THEN -- Insert a severity record. INSERT INTO mgmt_violations (target_guid, policy_guid, collection_timestamp, violation_level, message, message_nlsid, message_params) VALUES (target_guid_in, metric_guid_in, collect_time_in, severity_code_in, severity_msg_in, severity_msg_nlsid_in, severity_msg_params_in); l_insert_succ := 1; END IF; EXCEPTION WHEN MGMT_GLOBAL.target_deletion_in_prog THEN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('insert_unrch_severity: Ignoring target_deletion_in_prog exception' || ' (target_guid = ' || target_guid_in || ')' || ' (metric_guid = ' || metric_guid_in || ')' || ' (severity_code = ' || severity_code_in || ')' || ' (error msg = ' || SQLERRM || ')', MODULE_NAME); END IF; -- Ignore exceptions for targets being deleted.. NULL; WHEN MGMT_GLOBAL.duplicate_record THEN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('insert_unrch_severity: Ignoring duplicate_record exception' || ' (target_guid = ' || target_guid_in || ')' || ' (metric_guid = ' || metric_guid_in || ')' || ' (severity_code = ' || severity_code_in || ')' || ' (error msg = ' || SQLERRM || ')', MODULE_NAME); END IF; -- Ignore exceptions for duplicate severities.. NULL; WHEN MGMT_GLOBAL.severity_in_blackout THEN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('insert_unrch_severity: Ignoring severity_in_blackout exception' || ' (target_guid = ' || target_guid_in || ')' || ' (metric_guid = ' || metric_guid_in || ')' || ' (severity_code = ' || severity_code_in || ')' || ' (error msg = ' || SQLERRM || ')', MODULE_NAME); END IF; -- Ignore exceptions for severities inserted during blackout NULL; WHEN MGMT_GLOBAL.invalid_unrch_clr_viol THEN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('insert_unrch_severity: Ignoring invalid_unrch_clr_viol exception' || ' (target_guid = ' || target_guid_in || ')' || ' (metric_guid = ' || metric_guid_in || ')' || ' (severity_code = ' || severity_code_in || ')' || ' (error msg = ' || SQLERRM || ')', MODULE_NAME); END IF; -- Ignore exceptions for unreach clear severities inserted with errors NULL; WHEN OTHERS THEN mgmt_log.log_error(log_action_name_in, E_INSERT_SEVERITY, EM_INSERT_SEVERITY || ' (target_guid = ' || target_guid_in || ') (metric_guid = ' || metric_guid_in || ') (severity_code = ' || severity_code_in || ') (error msg = ' || SQLERRM || ')'); NULL; -- continue inserting severities for other targets.. END; IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('insert_unrch_severity: Exit insert_succ = ' || l_insert_succ, MODULE_NAME); END IF; RETURN l_insert_succ; END insert_unrch_severity; -- -- PURPOSE -- Insert severity data for metric name/col 'Response'/'Status'. -- Inserting severity data for this metric will trigger the -- availability information for the specified target. -- -- IN PARAMETERS -- target_name_in - name of the host for which data has to be inserted. -- collect_time_in - timestamp of when the severity was detected. -- severity_code_in - specified whether the severity is CLEAR/ALERT. -- -- NOTES -- This is private routine called only in this package -- FUNCTION insert_unrch_severities(log_action_name_in IN VARCHAR2, emd_url_in IN VARCHAR2, collect_time_in IN DATE, severity_code_in IN NUMBER, severity_msg_in IN VARCHAR2, severity_msg_nlsid_in IN VARCHAR2 DEFAULT NULL, severity_msg_params_in IN VARCHAR2 DEFAULT NULL) RETURN NUMBER IS mguid MGMT_METRICS.metric_guid%TYPE; tguids t_guid_list; mguids t_guid_list; ttypes t_target_type_list; agent_guid MGMT_TARGETS.target_guid%TYPE; l_insert_succ NUMBER := 0; l_insert_cnt NUMBER := 0; CURSOR targets_on_agent_cur(emd_url_in VARCHAR2) IS SELECT t.target_guid, t.target_type, m.metric_guid FROM mgmt_targets t, mgmt_metrics m WHERE t.target_type = m.target_type AND m.type_meta_ver = t.type_meta_ver AND (m.category_prop_1 = t.category_prop_1 OR m.category_prop_1 = ' ') AND (m.category_prop_2 = t.category_prop_2 OR m.category_prop_2 = ' ') AND (m.category_prop_3 = t.category_prop_3 OR m.category_prop_3 = ' ') AND (m.category_prop_4 = t.category_prop_4 OR m.category_prop_4 = ' ') AND (m.category_prop_5 = t.category_prop_5 OR m.category_prop_5 = ' ') AND t.emd_url = emd_url_in AND t.rep_side_avail = 0 AND m.metric_name = MGMT_GLOBAL.G_AVAIL_METRIC_NAME AND m.metric_column = MGMT_GLOBAL.G_AVAIL_METRIC_COLUMN; BEGIN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('insert_unrch_severities:Entry log_action_name_in = ' || log_action_name_in || ' emd_url_in = ' || emd_url_in || ' collect_time_in = ' || TO_CHAR(collect_time_in, HEARTBEAT_TIME_FORMAT) || ' severity_code_in = ' || severity_code_in || ' severity_msg_in = ' || severity_msg_in, MODULE_NAME); END IF; IF (emd_url_in IS NOT NULL) THEN -- In the case of multi-agent targets, if this agent happens to be -- the current master agent for some targets, then we need to pick -- someother agent that is currently up as the master agent. IF (severity_code_in = MGMT_GLOBAL.G_SEVERITY_UNREACHABLE_START) THEN SELECT target_guid INTO agent_guid FROM mgmt_targets WHERE emd_url = emd_url_in AND target_type = mgmt_global.G_AGENT_TARGET_TYPE; em_master_agent.process_agent_avail_change(agent_guid, MGMT_GLOBAL.G_STATUS_UNREACHABLE, collect_time_in); END IF; -- Lock the availability records before generating -- severities EM_SEVERITY.lock_avail_for_agt(emd_url_in); OPEN targets_on_agent_cur (emd_url_in); FETCH targets_on_agent_cur BULK COLLECT INTO tguids, ttypes, mguids; IF (targets_on_agent_cur%ISOPEN) THEN CLOSE targets_on_agent_cur; END IF; END IF; IF (EMDW_LOG.p_is_debug_set) THEN EMDW_LOG.debug('Got targets on agent..', MODULE_NAME); END IF; IF (tguids.EXISTS(1)) THEN FOR ctr IN tguids.FIRST..tguids.LAST LOOP IF (EMDW_LOG.p_is_debug_set) THEN EMDW_LOG.debug('Inserting sev for ttype ' || ttypes(ctr), MODULE_NAME); END IF; l_insert_succ := insert_unrch_severity(log_action_name_in, tguids(ctr), mguids(ctr), collect_time_in, severity_code_in, severity_msg_in, severity_msg_nlsid_in, severity_msg_params_in ); l_insert_cnt := l_insert_cnt + l_insert_succ; END LOOP; END IF; IF (tguids IS NOT NULL) THEN tguids.DELETE; END IF; IF (ttypes IS NOT NULL) THEN ttypes.DELETE; END IF; IF (mguids IS NOT NULL) THEN mguids.DELETE; END IF; IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('insert_unrch_severities:Exit insert_cnt = ' || l_insert_cnt, MODULE_NAME); END IF; RETURN l_insert_cnt; END insert_unrch_severities; -- -- PURPOSE -- Insert severity data for metric name/col 'Response'/'Status' -- on targets where current availability is unreachable. -- Inserting severity data for this metric will trigger the -- availability information for the specified target. -- -- IN PARAMETERS -- emd_url_in - emd url of targets for which data has to be inserted. -- collect_time_in - timestamp of when the severity was detected. -- severity_code_in - specified whether the severity is CLEAR/ALERT. -- -- NOTES -- This is private routine called only in this package -- FUNCTION insert_unrch_severities2(log_action_name_in IN VARCHAR2, emd_url_in IN VARCHAR2, collect_time_in IN DATE, severity_code_in IN NUMBER, severity_msg_in IN VARCHAR2) RETURN NUMBER IS tguids t_guid_list; mguids t_guid_list; ttypes t_target_type_list; agent_guid MGMT_TARGETS.target_guid%TYPE; l_insert_cnt NUMBER := 0; l_insert_succ NUMBER := 0; CURSOR unrch_targets_on_agent_cur(emd_url_in VARCHAR2) IS SELECT t.target_guid, t.target_type, m.metric_guid FROM mgmt_targets t, mgmt_metrics m, mgmt_current_availability a WHERE t.target_type = m.target_type AND t.target_guid = a.target_guid AND a.current_status = MGMT_GLOBAL.G_STATUS_UNREACHABLE AND m.type_meta_ver = t.type_meta_ver AND (m.category_prop_1 = t.category_prop_1 OR m.category_prop_1 = ' ') AND (m.category_prop_2 = t.category_prop_2 OR m.category_prop_2 = ' ') AND (m.category_prop_3 = t.category_prop_3 OR m.category_prop_3 = ' ') AND (m.category_prop_4 = t.category_prop_4 OR m.category_prop_4 = ' ') AND (m.category_prop_5 = t.category_prop_5 OR m.category_prop_5 = ' ') AND t.emd_url = emd_url_in AND t.rep_side_avail = 0 AND m.metric_name = MGMT_GLOBAL.G_AVAIL_METRIC_NAME AND m.metric_column = MGMT_GLOBAL.G_AVAIL_METRIC_COLUMN; BEGIN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('insert_unrch_severities2:Entry log_action_name_in = ' || log_action_name_in || ' emd_url_in = ' || emd_url_in || ' collect_time_in = ' || collect_time_in || ' severity_code_in = ' || severity_code_in || ' severity_msg_in = ' || severity_msg_in, MODULE_NAME); END IF; IF (emd_url_in IS NOT NULL) THEN OPEN unrch_targets_on_agent_cur (emd_url_in); FETCH unrch_targets_on_agent_cur BULK COLLECT INTO tguids, ttypes, mguids; IF (unrch_targets_on_agent_cur%ISOPEN) THEN CLOSE unrch_targets_on_agent_cur; END IF; END IF; IF (EMDW_LOG.p_is_debug_set) THEN EMDW_LOG.debug('Got targets on agent..', MODULE_NAME); END IF; IF (tguids.EXISTS(1)) THEN FOR ctr IN tguids.FIRST..tguids.LAST LOOP IF (EMDW_LOG.p_is_debug_set) THEN EMDW_LOG.debug('Inserting sev for ttype ' || ttypes(ctr), MODULE_NAME); END IF; l_insert_succ := insert_unrch_severity(log_action_name_in, tguids(ctr), mguids(ctr), collect_time_in, severity_code_in, severity_msg_in); l_insert_cnt := l_insert_cnt + l_insert_succ; END LOOP; END IF; IF (tguids IS NOT NULL) THEN tguids.DELETE; END IF; IF (ttypes IS NOT NULL) THEN ttypes.DELETE; END IF; IF (mguids IS NOT NULL) THEN mguids.DELETE; END IF; IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('insert_unrch_severities2:Exit insert_cnt = ' || l_insert_cnt, MODULE_NAME); END IF; RETURN l_insert_cnt; END insert_unrch_severities2; -- -- PURPOSE -- Procedure to bootstrap the ping sub system. -- This procedure sets up the last_checked_ts for all EMDs to current time. -- PROCEDURE start_ping_system IS l_grace_value NUMBER; BEGIN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('start_ping_system:Entry', MODULE_NAME); END IF; BEGIN SELECT to_number(parameter_value) INTO l_grace_value FROM mgmt_parameters WHERE parameter_name = G_PING_START_GRACE_PROP; EXCEPTION WHEN OTHERS THEN l_grace_value := G_DEFAULT_PING_START_GRACE; END; IF (is_ping_system_enabled > 0) THEN UPDATE mgmt_emd_ping_check SET last_checked_utc = CAST(SYS_EXTRACT_UTC(SYSTIMESTAMP) AS DATE) + l_grace_value/(24*60*60); END IF; IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('start_ping_system:Exit', MODULE_NAME); END IF; END start_ping_system; PROCEDURE add_target(p_target_guid IN RAW, p_timezone_region IN VARCHAR2) IS l_start_time DATE := NULL; BEGIN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('add_target:Entry p_target_guid = ' || p_target_guid || ' p_timezone_region = ' || p_timezone_region, MODULE_NAME); END IF; l_start_time := TRUNC( (MGMT_GLOBAL.SYSDATE_TZRGN(p_timezone_region) - 32), 'DD') ; -- Bug-5955813 : Default last HB time to 32 days back. INSERT INTO MGMT_EMD_PING (target_guid, clean_heartbeat_utc, status_sync_utc, emd_uptime_utc, down_reason_code, down_reason_msg, last_heartbeat_ts) VALUES (p_target_guid, l_start_time, l_start_time, l_start_time, 0, S_DEFAULT_DOWN_REASON_MSG, l_start_time); INSERT INTO MGMT_EMD_PING_CHECK (target_guid, last_checked_utc) VALUES (p_target_guid, MGMT_GLOBAL.SYSDATE_UTC); IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('add_target:Exit', MODULE_NAME); END IF; END add_target; -- -- -- PURPOSE -- Procedure to bulk update the heartbeats of the Agents -- PROCEDURE record_batch_heartbeat( v_agent_hb_arr_in IN EM_AGENT_HEARTBEAT_ARRAY, v_error_cnt OUT NUMBER, v_error_info OUT EM_AGENT_HEARTBEAT_RESP_ARRAY) IS l_agent_errors VARCHAR2(256); l_agent_guid MGMT_EMD_PING.target_guid%TYPE; l_agent_status MGMT_EMD_PING.status%TYPE; l_agent_tzrgn MGMT_TARGETS.timezone_region%TYPE; l_last_heartbeat_time MGMT_EMD_PING.clean_heartbeat_utc%TYPE; l_last_unrch_start_ts MGMT_EMD_PING.clean_heartbeat_utc%TYPE; l_heartbeat_time MGMT_EMD_PING.clean_heartbeat_utc%TYPE; l_heartbeat_time_utc MGMT_EMD_PING.clean_heartbeat_utc%TYPE; l_sync_upto_time MGMT_EMD_PING.status_sync_utc%TYPE; l_sync_upto_time_utc MGMT_EMD_PING.status_sync_utc%TYPE; l_emd_uptime_utc MGMT_EMD_PING.emd_uptime_utc%TYPE; l_error_code NUMBER; l_error_msg VARCHAR2(5000); l_error_msg_nlsid mgmt_violations.message_nlsid%TYPE; l_error_msg_params mgmt_violations.message_params%TYPE; l_start_timestmp TIMESTAMP := SYSTIMESTAMP; l_clean_hbs NUMBER := 0; l_dirty_hbs NUMBER := 0; l_prev_down_reason MGMT_EMD_PING.down_reason_msg%TYPE; l_down_reason MGMT_EMD_PING.down_reason_msg%TYPE; l_insert_cnt NUMBER := 0; BEGIN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('record_batch_heartbeat:Entry', MODULE_NAME); END IF; -- -- If ping system is disabled, do not record the heartbeats -- IF (is_ping_system_enabled <= 0) THEN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('record_batch_heartbeat:Exit PING system disabled', MODULE_NAME); END IF; RETURN; END IF; v_error_cnt := 0; v_error_info := EM_AGENT_HEARTBEAT_RESP_ARRAY(); l_agent_status := NODE_STATUS_DOWN; IF (EMDW_LOG.p_is_debug_set) THEN EMDW_LOG.debug('Entered record_batch_heartbeat procedure :' || TO_CHAR(SYSDATE, 'DD-MON-YYYY HH24:MI:SS'), MODULE_NAME); END IF; FOR i IN 1..v_agent_hb_arr_in.count LOOP BEGIN IF (EMDW_LOG.p_is_debug_set) THEN EMDW_LOG.debug('Getting agentguid for i = ' || v_agent_hb_arr_in(i).emd_url, MODULE_NAME); END IF; l_insert_cnt := 0; -- GET AGENT GUID and lock the row.. SELECT ping.target_guid, tgt.timezone_region, ping.status, ping.down_reason_msg, ping.unrch_start_ts, ping.last_heartbeat_ts INTO l_agent_guid, l_agent_tzrgn, l_agent_status, l_prev_down_reason, l_last_unrch_start_ts, l_last_heartbeat_time FROM mgmt_targets tgt, mgmt_emd_ping ping WHERE tgt.target_guid = ping.target_guid AND tgt.target_type = MGMT_GLOBAL.G_AGENT_TARGET_TYPE AND tgt.emd_url = v_agent_hb_arr_in(i).emd_url FOR UPDATE OF ping.target_guid; -- perf: replace with direct system function call below to avoid context switch -- associated with user defined functions -- -- l_emd_uptime_utc := MGMT_GLOBAL.TO_UTC( -- to_date(v_agent_hb_arr_in(i).emd_uptime, -- HEARTBEAT_TIME_FORMAT), -- l_agent_tzrgn); l_emd_uptime_utc := CAST(FROM_TZ(CAST(to_date(v_agent_hb_arr_in(i).emd_uptime, HEARTBEAT_TIME_FORMAT) as TIMESTAMP), l_agent_tzrgn) AT TIME ZONE 'GMT' AS DATE); l_heartbeat_time := to_date(v_agent_hb_arr_in(i).heartbeat_time, HEARTBEAT_TIME_FORMAT); --BUG:5955813 - If new HB time is less then prev HB make new HB time same as -- last unrch start ts/last HB time -- We don''t do any spl handling for DST fallback time. Even if DST time gets rolled from -- 1:59:59 to 1:00:59, we change the new HB time to 1:59:59. IF (l_heartbeat_time < l_last_heartbeat_time) THEN l_heartbeat_time := NVL(l_last_unrch_start_ts, l_last_heartbeat_time) ; END IF; IF (TRIM(v_agent_hb_arr_in(i).agent_errors) IS NOT NULL) THEN l_down_reason := AGENT_UNRCH_WITH_ERRORS; l_down_reason := REPLACE(l_down_reason, '{0}', v_agent_hb_arr_in(i).agent_errors); l_error_msg := AGENT_UNRCH_WITH_ERRORS; l_error_msg := REPLACE(l_error_msg, '{0}', v_agent_hb_arr_in(i).agent_errors); l_error_msg_nlsid := NLSID_AGENT_UNRCH_WITH_ERRS; l_error_msg_params := v_agent_hb_arr_in(i).agent_errors ; IF (l_agent_status != NODE_STATUS_DOWN) THEN -- mark the agent as down UPDATE mgmt_emd_ping SET last_heartbeat_ts = l_heartbeat_time, last_heartbeat_utc = CAST(SYS_EXTRACT_UTC(SYSTIMESTAMP) AS DATE), emd_uptime_utc = l_emd_uptime_utc, heartbeat_recorder_url = SUBSTR(v_agent_hb_arr_in(i).recorder_url, 0, 256), status = NODE_STATUS_DOWN, down_reason_msg = l_down_reason, down_reason_code = CODE_AGENT_UNRCH_WITH_ERR, unrch_start_ts = l_heartbeat_time, ping_job_name = NULL, job_submit_time = NULL WHERE target_guid = l_agent_guid; -- Insert unreachable start severity records. l_insert_cnt := insert_unrch_severities(HEARTBEAT_RECORD_ACTION, v_agent_hb_arr_in(i).emd_url, l_heartbeat_time, MGMT_GLOBAL.G_SEVERITY_UNREACHABLE_START, SUBSTR(l_error_msg, 0, 400), l_error_msg_nlsid, l_error_msg_params); -- Call job system Call back. MGMT_JOB_ENGINE.handle_emd_state_change(v_agent_hb_arr_in(i).emd_url, NODE_STATUS_DOWN); ELSIF (l_down_reason != l_prev_down_reason) THEN -- record the severity as the down error has changed. l_insert_cnt := insert_unrch_severities(HEARTBEAT_RECORD_ACTION, v_agent_hb_arr_in(i).emd_url, l_heartbeat_time, MGMT_GLOBAL.G_SEVERITY_UNREACHABLE_START, SUBSTR(l_error_msg, 0, 4000), l_error_msg_nlsid, l_error_msg_params); END IF; ELSIF (v_agent_hb_arr_in(i).is_clean_hb = 'Y') THEN l_clean_hbs := l_clean_hbs + 1; -- perf: replace with direct system function call below to avoid context switch -- associated with user defined functions -- -- l_heartbeat_time_utc := MGMT_GLOBAL.TO_UTC(l_heartbeat_time, -- l_agent_tzrgn); l_heartbeat_time_utc := CAST(FROM_TZ(CAST(l_heartbeat_time as TIMESTAMP), l_agent_tzrgn) AT TIME ZONE 'GMT' AS DATE); l_sync_upto_time := to_date(v_agent_hb_arr_in(i).status_sent_upto, HEARTBEAT_TIME_FORMAT); -- perf: replace with direct system function call below to avoid context switch -- associated with user defined functions -- -- l_sync_upto_time_utc := MGMT_GLOBAL.TO_UTC(l_sync_upto_time, -- l_agent_tzrgn); l_sync_upto_time_utc := CAST(FROM_TZ(CAST(l_sync_upto_time as TIMESTAMP), l_agent_tzrgn) AT TIME ZONE 'GMT' AS DATE); -- RECORD 'CLEAN' HEARTBEAT UPDATE mgmt_emd_ping SET last_heartbeat_ts = l_heartbeat_time, last_heartbeat_utc = CAST(SYS_EXTRACT_UTC(SYSTIMESTAMP) AS DATE), clean_heartbeat_utc = l_heartbeat_time_utc, status_sync_utc = l_sync_upto_time_utc, emd_uptime_utc = l_emd_uptime_utc, heartbeat_recorder_url = SUBSTR(v_agent_hb_arr_in(i).recorder_url, 0, 256), unrch_start_ts = null WHERE target_guid = l_agent_guid; agt_up_on_blk_end(p_target_guid => l_agent_guid, p_collection_timestamp => l_heartbeat_time) ; -- MARK the agent as UP if the current PING status is NOT UP.. IF NOT (l_agent_status = NODE_STATUS_UP) THEN -- mark_emd_up(l_agent_guid); -- Set the ping status of the node as UP, -- DONOT reset the job details.. UPDATE mgmt_emd_ping SET status = NODE_STATUS_UP, down_reason_code = 0, down_reason_msg = S_DEFAULT_DOWN_REASON_MSG WHERE target_guid = l_agent_guid; END IF; IF (l_agent_status = NODE_STATUS_DOWN) THEN -- Insert data and severity records based on last heartbeat -- as the Agent was up and heartbeating at that time. BEGIN insert_metric_data(l_agent_guid, l_heartbeat_time, 1); EXCEPTION WHEN OTHERS THEN mgmt_log.log_error(HEARTBEAT_RECORD_ACTION, E_UNKNOWN_HEARTBEAT, 'Error inserting metric data for tguid = ' || l_agent_guid || ' ERROR = ' || SQLERRM ); END; -- Insert unreachable clear severity records. l_insert_cnt := insert_unrch_severities(HEARTBEAT_RECORD_ACTION, v_agent_hb_arr_in(i).emd_url, l_heartbeat_time, MGMT_GLOBAL.G_SEVERITY_UNREACHABLE_CLEAR, AGENT_UNRCH_CLEAR_MSG); ELSE -- Insert unreachable clear severity records on targets where -- current availability is unreachable l_insert_cnt := insert_unrch_severities2(HEARTBEAT_RECORD_ACTION, v_agent_hb_arr_in(i).emd_url, l_heartbeat_time, MGMT_GLOBAL.G_SEVERITY_UNREACHABLE_CLEAR, AGENT_UNRCH_CLEAR_MSG); END IF; -- IF NOT (l_agent_status = UP) IF (l_insert_cnt > 0) THEN -- Call job system callback, if any unrch clears are inserted MGMT_JOB_ENGINE.handle_emd_state_change( v_agent_hb_arr_in(i).emd_url, NODE_STATUS_UP); END IF; -- Get availability lock to update availability marker EM_SEVERITY.lock_avail_for_agt(p_emd_url => v_agent_hb_arr_in(i).emd_url); -- TO DO: Use l_sync_utc timestamp in GMT.. UPDATE MGMT_AVAILABILITY_MARKER SET marker_timestamp = l_sync_upto_time WHERE marker_timestamp <= l_sync_upto_time AND target_guid IN (SELECT /*+ INDEX(t mgmt_targets_idx_02) */ t.target_guid FROM mgmt_targets t WHERE t.emd_url = v_agent_hb_arr_in(i).emd_url AND t.rep_side_avail = 0 AND NOT EXISTS (SELECT 1 from mgmt_blackout_windows bw WHERE bw.target_guid = t.target_guid AND bw.start_time <= l_sync_upto_time AND (bw.status NOT IN (MGMT_BLACKOUT_ENGINE.BLK_STATE_STOPPED, MGMT_BLACKOUT_ENGINE.BLK_STATE_ENDED) ) ) AND NOT EXISTS (SELECT 1 from MGMT_GENSVC_AVAIL_CONFIG bcnav WHERE bcnav.target_guid = t.target_guid) ); ELSE -- RECORD 'DIRTY' HEARTBEAT.. l_dirty_hbs := l_dirty_hbs + 1; UPDATE mgmt_emd_ping SET last_heartbeat_ts = l_heartbeat_time, last_heartbeat_utc = CAST(SYS_EXTRACT_UTC(SYSTIMESTAMP) AS DATE), emd_uptime_utc = l_emd_uptime_utc, heartbeat_recorder_url = SUBSTR(v_agent_hb_arr_in(i).recorder_url, 0, 256) WHERE target_guid = l_agent_guid; END IF; COMMIT; -- commit, so that the lock is gone EXCEPTION WHEN NO_DATA_FOUND THEN ROLLBACK; -- rollback any changes done for this Agent -- Dont log this error as this is a valid case -- during agent bootstrap l_error_msg := SQLERRM; v_error_cnt := v_error_cnt + 1; v_error_info.extend(1); v_error_info(v_error_cnt) := EM_AGENT_HEARTBEAT_RESP(v_agent_hb_arr_in(i).emd_url, 0, l_error_msg); WHEN OTHERS THEN ROLLBACK; -- rollback any changes done for this Agent IF (EMDW_LOG.p_is_debug_set) THEN EMDW_LOG.debug('Error received processing emd_url = ' || v_agent_hb_arr_in(i).emd_url || ' message = ' || SQLERRM, MODULE_NAME); END IF; -- If 3113, raise_exception and retry l_error_code := SQLCODE; IF (l_error_code = -3113) THEN -- Stop recording and raise the exception on ORA-03113 error. RAISE; ELSE l_error_msg := SQLERRM; mgmt_log.log_error(HEARTBEAT_RECORD_ACTION, E_UNKNOWN_HEARTBEAT, EM_UNKNOWN_HEARTBEAT || v_agent_hb_arr_in(i).emd_url || '. ' || l_error_msg); v_error_cnt := v_error_cnt + 1; v_error_info.extend(1); v_error_info(v_error_cnt) := EM_AGENT_HEARTBEAT_RESP(v_agent_hb_arr_in(i).emd_url, 0, l_error_msg); END IF; -- CONTINUE processing other heartbeats.. END; END LOOP; IF (is_detailed_logging_enabled > 0) THEN -- Log the time taken MGMT_LOG.LOG_PERFORMANCE(HEARTBEAT_RECORD_ACTION, MGMT_GLOBAL.ELAPSED_TIME_MSEC(l_start_timestmp, SYSTIMESTAMP), l_start_timestmp, 'Y', HEARTBEAT_CNT_NAME, v_agent_hb_arr_in.count); MGMT_LOG.LOG_PERFORMANCE(HEARTBEAT_RECORD_ACTION, 0, l_start_timestmp, 'N', CLEAN_HEARTBEAT_CNT_NAME, l_clean_hbs); MGMT_LOG.LOG_PERFORMANCE(HEARTBEAT_RECORD_ACTION, 0, l_start_timestmp, 'N', DIRTY_HEARTBEAT_CNT_NAME, l_dirty_hbs); MGMT_LOG.LOG_PERFORMANCE(HEARTBEAT_RECORD_ACTION, 0, l_start_timestmp, 'N', HEARTBEAT_ERROR_CNT_NAME, v_error_info.count); END IF; IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('record_batch_heartbeat:Exit v_error_cnt = ' || v_error_cnt, MODULE_NAME); END IF; END record_batch_heartbeat; PROCEDURE DBMSJOB_EXTENDED_SQL_TRACE_ON(p_value IN BOOLEAN) IS BEGIN MGMT_SQLTRACE.EXTENDED_SQL_TRACE_ON(EST_PING_NAME, p_value); END DBMSJOB_EXTENDED_SQL_TRACE_ON; -- -- -- PURPOSE -- Procedure to mark the nodes as UP or 'potentially' DOWN based on -- whether the EMD performed heartbeat in the last -- max_emd_inactive_time seconds. -- -- Pick up nodes that are either down or UP, not 'potentially' DOWN. -- PROCEDURE mark_node_status IS -- -- This cursor gets the list of HOSTs that are not blacked out AND -- that are marked as UP and not sent a heartbeat in the last max_inactive_time OR -- CURSOR emd_list(c_curr_date_utc DATE) IS SELECT /*+ INDEX(ping mgmt_emd_ping_idx_01) */ tgt.target_guid, tgt.emd_url, ping.status FROM mgmt_emd_ping ping, mgmt_emd_ping_check pingc, mgmt_targets tgt, mgmt_current_availability cavail WHERE ping.target_guid = tgt.target_guid AND ping.target_guid = cavail.target_guid AND ping.target_guid = pingc.target_guid AND tgt.target_type = MGMT_GLOBAL.G_AGENT_TARGET_TYPE AND ping.max_inactive_time > 0 AND cavail.current_status != MGMT_GLOBAL.G_STATUS_BLACKOUT AND ping.status = NODE_STATUS_UP AND ping.ping_job_name IS NULL AND ( (c_curr_date_utc-ping.last_heartbeat_utc)*24*60*60 > ping.max_inactive_time) AND ( (c_curr_date_utc-pingc.last_checked_utc)*24*60*60 >= (ping.max_inactive_time)/2) ORDER BY tgt.emd_url; l_target_guid MGMT_TARGETS.target_guid%TYPE; l_job_name MGMT_JOB.job_name%TYPE; l_job_targets MGMT_JOB_TARGET_LIST; l_job_params MGMT_JOB_PARAM_LIST; l_job_id MGMT_JOB.job_id%TYPE; l_execution_id MGMT_JOB_EXEC_SUMMARY.execution_id%TYPE; l_schedule MGMT_JOB_SCHEDULE_RECORD; l_down_emd_cnt NUMBER := 0; l_down_emd_list MGMT_JOB_VECTOR_PARAMS := MGMT_JOB_VECTOR_PARAMS(); l_hb_rcdr_cnt NUMBER := 0; l_updated_lscheck NUMBER := 0; l_curr_date DATE; l_tmstmptz TIMESTAMP WITH TIME ZONE; l_error_code NUMBER; l_error_msg VARCHAR2(5000); l_start_timestmp TIMESTAMP := SYSTIMESTAMP; BEGIN MGMT_SQLTRACE.EXTENDED_SQL_TRACE(EST_PING_NAME); IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('mark_node_status:Entry', MODULE_NAME); END IF; -- If ping system is not enabled, do not process the heartbeats.. DBMS_APPLICATION_INFO.SET_MODULE( module_name => MODULE_NAME, action_name => MARK_NODE_STATUS_ACTION); SELECT SYS_EXTRACT_UTC(SYSTIMESTAMP) INTO l_tmstmptz FROM DUAL; l_curr_date := CAST(l_tmstmptz AS DATE); l_job_name := PINGCFM_JOBNAME_PREFIX || SYS_GUID(); BEGIN l_hb_rcdr_cnt := 0; l_updated_lscheck := 0; BEGIN -- -- Call job system to process emd queue entries. -- Should be done before as this would drop some -- inactive middletiers. -- MGMT_JOB_ENGINE.process_emd_queue_entries; EXCEPTION WHEN OTHERS THEN mgmt_log.log_error(MARK_NODE_STATUS_ACTION, e_process_emd_queue, em_process_emd_queue || SQLERRM); END; -- If no hearbeart recorders are registered, no status check -- should be done. l_hb_rcdr_cnt := MGMT_FAILOVER.get_active_mt_count; -- If ping system is not enabled, do not process the heartbeats.. IF ( (l_hb_rcdr_cnt > 0) AND (is_ping_system_enabled > 0) ) THEN FOR emd_list_rec IN emd_list(l_curr_date) LOOP l_target_guid := emd_list_rec.target_guid; -- Lock and update BEGIN SELECT target_guid INTO l_target_guid FROM mgmt_emd_ping WHERE target_guid = l_target_guid FOR UPDATE NOWAIT; -- 5448666: There could be delays in recording agent heartbeats. -- So, set unrch_start_ts to last_heartbeat_ts +1sec instead of -- +max_inactive_time. It'll prevent raise of UNREACHABLE_START -- and UNREACHABLE_CLEAR severities in wrong order. UPDATE mgmt_emd_ping SET status = NODE_STATUS_POTENTIALLY_DOWN, down_reason_code = 0, down_reason_msg = S_DEFAULT_DOWN_REASON_MSG, ping_job_name = l_job_name, job_submit_time = SYSDATE, unrch_start_ts = last_heartbeat_ts + (1/(60*60*24)) WHERE target_guid = l_target_guid; l_down_emd_cnt := l_down_emd_cnt + 1; l_down_emd_list.extend(1); l_down_emd_list(l_down_emd_cnt) := emd_list_rec.emd_url; EXCEPTION WHEN NO_DATA_FOUND THEN -- Dont log this error, as this could happen -- if the target is being deleted.. NULL; WHEN OTHERS THEN l_error_code := SQLCODE; IF (l_error_code != -54) THEN -- Donot log Resource busy (ORA-00054) error mgmt_log.log_error(MARK_NODE_STATUS_ACTION, e_lock_update_status, em_lock_update_status || emd_list_rec.emd_url || '. ' || SQLERRM); END IF; END; END LOOP; -- -- Submit job with all potentially down EMDs. -- IF (l_down_emd_cnt > 0) THEN -- This job has no targets. l_job_targets := MGMT_JOB_TARGET_LIST(); -- Its only parameter is the emd_url_list l_job_params := MGMT_JOB_PARAM_LIST(); l_job_params.extend(1); l_job_params(1) := MGMT_JOB_PARAM_RECORD(PINGCFM_JOBPARAM_NAME, 0, null, l_down_emd_list); -- Its schedule is immediate. l_schedule := MGMT_JOBS.get_job_schedule_record(MGMT_JOBS.ONE_TIME_FREQUENCY_CODE, SYSDATE, null, 0, 0, 0, null, null, MGMT_JOBS.TIMEZONE_REPOSITORY, 0, 0, null); -- Submit the job as system job so that system thread pool is used MGMT_JOBS.submit_job(l_job_name, PINGCFM_JOB_DESCRIPTION, PINGCFM_JOBTYPE, l_job_targets, l_job_params, l_schedule, l_job_id, l_execution_id, USER, MGMT_JOB_ENGINE.SYSTEM_JOB_RETRY); END IF; -- -- Update the last status checked time stamp for UP and DOWN EMDs -- only, dont update the timestamp for potentially down EMDs -- -- move to end of if so the rows in the table are not locked that long. UPDATE mgmt_emd_ping_check SET last_checked_utc = l_curr_date WHERE target_guid IN (SELECT ping.target_guid FROM mgmt_emd_ping ping, mgmt_emd_ping_check pingc WHERE ping.target_guid = pingc.target_guid AND ping.max_inactive_time > 0 AND ( (ping.status = NODE_STATUS_UP) OR (ping.status = NODE_STATUS_DOWN) ) AND (l_curr_date - pingc.last_checked_utc)*24*60*60 >= (ping.max_inactive_time)/2); l_updated_lscheck := 1; COMMIT; -- commit all ping changes and submit_job -- Cleanup the dead job from mgmt_emd_ping table -- This proc gets called every minute but execute following -- stmt only once in an hour i.e on 59th Minute. IF TO_NUMBER(TO_CHAR(SYSDATE, 'MI')) = 59 THEN FOR ping_rec IN (SELECT p.target_guid FROM mgmt_emd_ping p WHERE ping_job_name IS NOT NULL AND NOT EXISTS ( SELECT NULL FROM MGMT_JOB j WHERE j.job_name = p.ping_job_name ) ) LOOP UPDATE mgmt_emd_ping SET ping_job_name = NULL, job_submit_time = NULL, status = NODE_STATUS_UP WHERE target_guid = ping_rec.target_guid; COMMIT; END LOOP; END IF; END IF; -- If (l_hb_rcdr_cnt > 0) (ping system enabled > 0) EXCEPTION WHEN OTHERS THEN ROLLBACK; IF (l_updated_lscheck = 0) THEN mgmt_log.log_error(MARK_NODE_STATUS_ACTION, e_mark_node_status, em_mark_node_status || SQLERRM); ELSE mgmt_log.log_error(MARK_NODE_STATUS_ACTION, e_pingjob_submission, em_pingjob_submission || SQLERRM); END IF; END; -- Log the time taken MGMT_LOG.LOG_PERFORMANCE(MARK_NODE_STATUS_ACTION, MGMT_GLOBAL.ELAPSED_TIME_MSEC(l_start_timestmp, SYSTIMESTAMP), l_start_timestmp, 'Y', MARKED_AGENT_CNT_NAME, l_down_emd_cnt); -- Reset the module info DBMS_APPLICATION_INFO.SET_MODULE( module_name => ' ', action_name => ' '); IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('mark_node_status:Exit', MODULE_NAME); END IF; END mark_node_status; -- -- Procedure to record the failure details for potentailly down -- PROCEDURE update_emd_status ( v_emd_url_in IN VARCHAR2, v_ping_status_in IN NUMBER, v_error_msg_1_in IN VARCHAR2 DEFAULT NULL, v_error_msg_2_in IN VARCHAR2 DEFAULT NULL ) IS l_target_guid MGMT_TARGETS.target_guid%TYPE; l_timezone_region MGMT_TARGETS.timezone_region%TYPE; l_avail_status MGMT_AVAILABILITY.current_status%TYPE; l_status MGMT_EMD_PING.status%TYPE; l_max_inactive_time MGMT_EMD_PING.max_inactive_time%TYPE; l_last_heartbeat_ts MGMT_EMD_PING.last_heartbeat_ts%TYPE; l_unrch_start_ts MGMT_EMD_PING.unrch_start_ts%TYPE; l_start_timestmp TIMESTAMP := SYSTIMESTAMP; l_error_msg VARCHAR2(32576); l_error_msg_nlsid mgmt_violations.message_nlsid%TYPE; l_error_msg_params mgmt_violations.message_params%TYPE; l_insert_cnt NUMBER := 0; BEGIN IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('update_emd_status:Entry v_emd_url = ' || v_emd_url_in || ' v_ping_status_in = ' || v_ping_status_in || ' v_error_msg_1_in = ' || v_error_msg_1_in || ' v_error_msg_2_in = ' || v_error_msg_2_in, MODULE_NAME); END IF; BEGIN BEGIN SELECT p.target_guid, t.timezone_region, p.status, p.max_inactive_time, p.last_heartbeat_ts, p.unrch_start_ts, cavail.current_status INTO l_target_guid, l_timezone_region, l_status, l_max_inactive_time, l_last_heartbeat_ts, l_unrch_start_ts, l_avail_status FROM mgmt_emd_ping p, mgmt_targets t, mgmt_current_availability cavail WHERE t.emd_url = v_emd_url_in AND p.target_guid = t.target_guid AND p.target_guid = cavail.target_guid -- AND p.status = NODE_STATUS_POTENTIALLY_DOWN AND t.target_type = MGMT_GLOBAL.G_AGENT_TARGET_TYPE FOR UPDATE OF p.target_guid; EXCEPTION WHEN NO_DATA_FOUND THEN -- Dont log this error, as this could happen -- if the metadata is already deleted for the agent -- or if the agent started heartbeating and is marked as UP. IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('update_emd_status:Exit Either metadata is already deleted for' || ' the agent or agent started heartbeating and is marked as UP', MODULE_NAME); END IF; RETURN; END; IF (l_status = NODE_STATUS_UP) THEN -- The ping status of the node is already UP, Do nothing -- This can happen because the agent can start heartbeating before the reverse-ping job -- finished. -- Since, we already received a clean heartbeat, retain the status as UP -- just cleanup the ping job details.. UPDATE mgmt_emd_ping SET ping_job_name = NULL, job_submit_time = NULL WHERE target_guid = l_target_guid; ELSIF (l_status = NODE_STATUS_DOWN) THEN -- The ping status of the node is already DOWN, Do nothing -- This can happen because another reverse ping job might have already -- marked the node as DOWN -- So, leave the node status as DOWN, clean up the ping job details.. UPDATE mgmt_emd_ping SET ping_job_name = NULL, job_submit_time = NULL WHERE target_guid = l_target_guid; ELSE -- The ping status of the node is POTENTIALLY DOWN, -- -- If reverse ping succeeded or the node is currently in BLACKOUT, -- set the ping status to UP -- IF ( (v_ping_status_in = 0) OR (l_avail_status = MGMT_GLOBAL.G_STATUS_BLACKOUT) ) THEN IF (EMDW_LOG.p_is_debug_set) THEN EMDW_LOG.debug('Marking the Agent ' || v_emd_url_in || ' as UP.', MODULE_NAME) ; END IF; -- mark_emd_up(l_target_guid); -- As the ping succeeded, mark the ping status of the node as UP -- Reset down reason details, unreach time and reverse ping job details UPDATE mgmt_emd_ping SET status = NODE_STATUS_UP, down_reason_code = 0, down_reason_msg = S_DEFAULT_DOWN_REASON_MSG, unrch_start_ts = NULL, ping_job_name = NULL, job_submit_time = NULL WHERE target_guid = l_target_guid; ELSE IF (EMDW_LOG.p_is_debug_set) THEN EMDW_LOG.debug('Updating the down reason for Agent ' || v_emd_url_in, MODULE_NAME ); END IF; -- Format a severity msg based on the v_ping_status_in as follows: -- v_ping_status_in = -1 Agent Unreachable, Host Up. -- v_ping_status_in = -2 Agent Unreachable, Host Unreachable. -- v_ping_status_in = -3 Agent configured to upload to different repository. -- v_ping_status_in = -4 Broken Agent to OMS communication. IF v_ping_status_in = CODE_AGENT_UNRCH_HOST_UP THEN l_error_msg := AGENT_UNRCH_HOST_UP ; l_error_msg := REPLACE(l_error_msg, '{0}', v_error_msg_1_in); l_error_msg_nlsid := NLSID_AGENT_UNRCH_HOST_UP ; l_error_msg_params := v_error_msg_1_in ; ELSIF v_ping_status_in = CODE_AGENT_UNRCH_HOST_UNRCH THEN l_error_msg := AGENT_UNRCH_HOST_UNRCH; l_error_msg := REPLACE(l_error_msg, '{0}', v_error_msg_1_in); l_error_msg := REPLACE(l_error_msg, '{1}', v_error_msg_2_in); l_error_msg_nlsid := NLSID_AGENT_UNRCH_HOST_UNRCH; l_error_msg_params := v_error_msg_1_in ||'&'||v_error_msg_2_in; ELSIF v_ping_status_in = CODE_AGENT_UNRCH_MISCFGD THEN l_error_msg := AGENT_UNRCH_MISCFGD; l_error_msg_nlsid := NLSID_AGENT_UNRCH_MISCFGD ; l_error_msg_params := NULL; ELSIF v_ping_status_in = CODE_BROKEN_AGENT_OMS_COMM THEN l_error_msg := BROKEN_AGENT_OMS_COMM; l_error_msg := REPLACE(l_error_msg, '{0}', v_error_msg_1_in); l_error_msg_nlsid := NLSID_BROKEN_AGENT_OMS_COMM; l_error_msg_params := v_error_msg_1_in; END IF; -- mark_emd_down(l_target_guid, p_agent_status, l_error_msg); -- Reverse ping failed, so record the details, -- Reset the ping job details --Donot rset unrch start ts as we need it when recording next HB and to use the same -- if HB clock has moved back in time. UPDATE mgmt_emd_ping SET status = NODE_STATUS_DOWN, down_reason_code = v_ping_status_in, down_reason_msg = l_error_msg, ping_job_name = NULL, job_submit_time = NULL WHERE target_guid = l_target_guid; -- 5448666: There could be delays in recording agent heartbeats. -- So, set unrch_start_ts to last_heartbeat_ts +1sec instead of -- +max_inactive_time. It'll prevent raise of UNREACHABLE_START -- and UNREACHABLE_CLEAR severities in wrong order. IF (l_unrch_start_ts IS NULL) THEN l_unrch_start_ts := l_last_heartbeat_ts + (1/(60*60*24)); END IF; BEGIN insert_metric_data(l_target_guid, l_unrch_start_ts, 0); EXCEPTION WHEN OTHERS THEN mgmt_log.log_error(UPDATE_AGENT_ACTION, e_update_status, 'Error inserting metric data for tguid = ' || l_target_guid || ' ERROR = ' || SQLERRM ); END; -- It is ONLY now can we confirm that the node is down. -- So we should use the current targets timestamp here. -- -- Agent has been in unreachability from lastHeartBeat + (inactive_time/2) -- Both unreachable start and unreachble end should use the same clock -- to avoid clock skew problems. l_insert_cnt := insert_unrch_severities(UPDATE_AGENT_ACTION, v_emd_url_in, l_unrch_start_ts, MGMT_GLOBAL.G_SEVERITY_UNREACHABLE_START, SUBSTR(l_error_msg, 0, 4000), l_error_msg_nlsid, l_error_msg_params ); -- Call job system Call back. MGMT_JOB_ENGINE.handle_emd_state_change(v_emd_url_in, NODE_STATUS_DOWN); END IF; END IF; EXCEPTION WHEN OTHERS THEN mgmt_log.log_error(UPDATE_AGENT_ACTION, e_update_status, em_update_status || v_emd_url_in || '. ' || SQLERRM); RAISE_APPLICATION_ERROR(-20000, 'EMD = ' || v_emd_url_in || ' ERROR = ' || SQLERRM); END; -- Log the time taken MGMT_LOG.LOG_PERFORMANCE(UPDATE_AGENT_ACTION, MGMT_GLOBAL.ELAPSED_TIME_MSEC(l_start_timestmp, SYSTIMESTAMP), l_start_timestmp, 'Y', CONFIRMED_AGENT_CNT_NAME, 1); IF (EMDW_LOG.p_is_info_set)THEN EMDW_LOG.info('update_emd_status:Exit', MODULE_NAME); END IF; END update_emd_status; FUNCTION get_default_down_reason_msg RETURN VARCHAR2 IS l_down_reason_msg mgmt_emd_ping.down_reason_msg%TYPE; BEGIN -- Start with 32 chars 12345678901234567890123456789012 l_down_reason_msg := ' '; l_down_reason_msg := l_down_reason_msg || l_down_reason_msg; -- 64 chars l_down_reason_msg := l_down_reason_msg || l_down_reason_msg; -- 128 chars l_down_reason_msg := l_down_reason_msg || l_down_reason_msg; -- 256 chars l_down_reason_msg := l_down_reason_msg || l_down_reason_msg; -- 512 chars l_down_reason_msg := l_down_reason_msg || l_down_reason_msg; -- 1024 chars RETURN l_down_reason_msg; END get_default_down_reason_msg; /* ** Bug6267173 - On blackout end, Agent does not state its own new state upon ** clearstate. The only option is to force CLEAR severity on receipt of CLEAN HB and if ** current severity is DOWN */ PROCEDURE agt_up_on_blk_end(p_target_guid RAW, p_collection_timestamp DATE) IS l_chk NUMBER := 0; BEGIN -- To speed up the HB recording process Agent R/S guid has been stored as constant SELECT count(*) INTO l_chk FROM mgmt_current_violation WHERE target_guid = p_target_guid AND policy_guid = AGT_RS_METRIC_GUID AND violation_level = MGMT_GLOBAL.G_SEVERITY_CRITICAL ; IF l_chk = 1 THEN BEGIN MGMT_VIOLATION.log_violation ( p_target_guid => p_target_guid, p_policy_guid => AGT_RS_METRIC_GUID, p_collection_timestamp => p_collection_timestamp, p_violation_level => MGMT_GLOBAL.G_SEVERITY_CLEAR, p_message => AGENT_CLR_OMS_DETECTED, p_message_nlsid => 'EM__SYS__PING_AGENT_CLR_OMS_DETECTED' ); EXCEPTION WHEN OTHERS THEN IF (EMDW_LOG.p_is_debug_set)THEN EMDW_LOG.debug('agt_up_on_blk_end: Error inserting CLEAR for Agent' || ' (target_guid = ' || p_target_guid || ')' || ' (metric_guid = ' || AGT_RS_METRIC_GUID || ')' || ' (error msg = ' || SQLERRM || ')', MODULE_NAME); END IF; END; END IF; END agt_up_on_blk_end; BEGIN S_DEFAULT_DOWN_REASON_MSG := get_default_down_reason_msg; END EM_PING; / show errors;