From 9be1a9bbb8cafe5fd1dea1d79d58c043cef9de80 Mon Sep 17 00:00:00 2001 From: DurgaPrasad-54 Date: Mon, 23 Feb 2026 10:46:06 +0530 Subject: [PATCH 1/4] feat(health,version): update version and health endpoints and add advance check for database --- pom.xml | 26 ++ .../controller/version/VersionController.java | 8 +- .../common/service/health/HealthService.java | 374 ++++++++++++++++++ .../utils/JwtUserIdValidationFilter.java | 4 +- 4 files changed, 409 insertions(+), 3 deletions(-) create mode 100644 src/main/java/com/iemr/common/service/health/HealthService.java diff --git a/pom.xml b/pom.xml index 171ab162..b1cb3ea4 100644 --- a/pom.xml +++ b/pom.xml @@ -520,6 +520,32 @@ ${artifactId}-${version} + + io.github.git-commit-id + git-commit-id-maven-plugin + 9.0.2 + + + get-the-git-infos + + revision + + initialize + + + + true + ${project.build.outputDirectory}/git.properties + + ^git.branch$ + ^git.commit.id.abbrev$ + ^git.build.version$ + ^git.build.time$ + + false + false + + org.apache.maven.plugins maven-jar-plugin diff --git a/src/main/java/com/iemr/common/controller/version/VersionController.java b/src/main/java/com/iemr/common/controller/version/VersionController.java index 705fccdc..814c99a1 100644 --- a/src/main/java/com/iemr/common/controller/version/VersionController.java +++ b/src/main/java/com/iemr/common/controller/version/VersionController.java @@ -28,8 +28,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RequestMethod; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.RestController; import com.iemr.common.utils.response.OutputResponse; @@ -52,6 +53,9 @@ public String versionInformation() { } catch (Exception e) { output.setError(e); } + logger.info("version Controller End"); + return ResponseEntity.ok(response); + } logger.info("version Controller End"); return output.toString(); diff --git a/src/main/java/com/iemr/common/service/health/HealthService.java b/src/main/java/com/iemr/common/service/health/HealthService.java new file mode 100644 index 00000000..d117337e --- /dev/null +++ b/src/main/java/com/iemr/common/service/health/HealthService.java @@ -0,0 +1,374 @@ +/* + * AMRIT – Accessible Medical Records via Integrated Technology + * Integrated EHR (Electronic Health Records) Solution + * + * Copyright (C) "Piramal Swasthya Management and Research Institute" + * + * This file is part of AMRIT. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see https://www.gnu.org/licenses/. + */ +package com.iemr.common.service.health; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.ObjectProvider; +import org.springframework.data.redis.connection.RedisConnection; +import org.springframework.data.redis.connection.RedisConnectionFactory; +import org.springframework.stereotype.Service; + +import javax.annotation.PreDestroy; +import javax.sql.DataSource; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.Statement; +import java.time.Instant; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; + +@Service +public class HealthService { + + private static final Logger logger = LoggerFactory.getLogger(HealthService.class); + + private static final String LOG_EVENT_STUCK_PROCESS = "MYSQL_STUCK_PROCESS"; + private static final String LOG_EVENT_LOCK_WAIT = "MYSQL_LOCK_WAIT"; + private static final String LOG_EVENT_DEADLOCK = "MYSQL_DEADLOCK"; + private static final String LOG_EVENT_SLOW_QUERIES = "MYSQL_SLOW_QUERIES"; + private static final String LOG_EVENT_CONN_USAGE = "MYSQL_CONNECTION_USAGE"; + private static final String LOG_EVENT_POOL_EXHAUSTED = "MYSQL_POOL_EXHAUSTED"; + private static final long RESPONSE_TIME_SLOW_MS = 2000; // > 2s → SLOW + private static final int STUCK_PROCESS_THRESHOLD = 5; // > 5 stuck → WARNING + private static final int STUCK_PROCESS_SECONDS = 30; // process age in seconds + private static final int CONNECTION_USAGE_WARNING = 80; // > 80% → WARNING + private static final int CONNECTION_USAGE_CRITICAL= 95; // > 95% → CRITICAL + private static final long DIAGNOSTIC_INTERVAL_SEC = 30; // background run interval + private static final long DIAGNOSTIC_GUARD_SEC = 25; // safety dedup guard + private final DataSource dataSource; + private final RedisConnectionFactory redisConnectionFactory; + + private final ScheduledExecutorService diagnosticScheduler = + Executors.newSingleThreadScheduledExecutor(r -> { + Thread t = new Thread(r, "mysql-diagnostic-thread"); + t.setDaemon(true); + return t; + }); + + private final AtomicLong lastDiagnosticRunAt = new AtomicLong(0); + private final AtomicReference cachedDbSeverity = + new AtomicReference<>("INFO"); + private final AtomicLong previousDeadlockCount = new AtomicLong(0); + public HealthService(ObjectProvider dataSourceProvider, + ObjectProvider redisProvider) { + this.dataSource = dataSourceProvider.getIfAvailable(); + this.redisConnectionFactory = redisProvider.getIfAvailable(); + + // Start background diagnostics only if DB is configured. + // Initial delay = 0 so the first run happens at startup. + if (this.dataSource != null) { + diagnosticScheduler.scheduleAtFixedRate( + this::runAdvancedMySQLDiagnostics, + 0, + DIAGNOSTIC_INTERVAL_SEC, + TimeUnit.SECONDS + ); + } + } + + @PreDestroy + public void shutdownDiagnostics() { + logger.info("[HEALTH_SERVICE_SHUTDOWN] Shutting down diagnostic scheduler..."); + diagnosticScheduler.shutdown(); + try { + if (!diagnosticScheduler.awaitTermination(5, TimeUnit.SECONDS)) { + logger.warn("[HEALTH_SERVICE_SHUTDOWN] Diagnostic scheduler did not terminate gracefully"); + diagnosticScheduler.shutdownNow(); + } + logger.info("[HEALTH_SERVICE_SHUTDOWN] Diagnostic scheduler shut down successfully"); + } catch (InterruptedException e) { + logger.error("[HEALTH_SERVICE_SHUTDOWN] Interrupted while shutting down scheduler", e); + diagnosticScheduler.shutdownNow(); + Thread.currentThread().interrupt(); + } + } + + // PUBLIC — Called by the /health controller + public Map checkHealth() { + Map response = new LinkedHashMap<>(); + + Map mysqlResult = checkDatabaseConnectivity(); + Map redisResult = checkRedisConnectivity(); + + String mysqlStatus = (String) mysqlResult.get("status"); + String redisStatus = (String) redisResult.get("status"); + + boolean overallUp = !"DOWN".equals(mysqlStatus) && !"DOWN".equals(redisStatus); + + response.put("status", overallUp ? "UP" : "DOWN"); + response.put("checkedAt", Instant.now().toString()); + + // Expose only status and severity, keep diagnostics internal + Map mysqlSummary = new LinkedHashMap<>(); + mysqlSummary.put("status", mysqlResult.get("status")); + mysqlSummary.put("severity", mysqlResult.get("severity")); + + Map redisSummary = new LinkedHashMap<>(); + redisSummary.put("status", redisResult.get("status")); + redisSummary.put("severity", redisResult.get("severity")); + + response.put("mysql", mysqlSummary); + response.put("redis", redisSummary); + + return response; + } + // Runs only SELECT 1 with a hard 3-second timeout. + private Map checkDatabaseConnectivity() { + Map result = new LinkedHashMap<>(); + + if (dataSource == null) { + result.put("status", "NOT_CONFIGURED"); + result.put("severity", "INFO"); + return result; + } + + try (Connection conn = dataSource.getConnection(); + Statement stmt = conn.createStatement()) { + + stmt.setQueryTimeout(3); // Hard cap — /health must never block > 3s + stmt.execute("SELECT 1"); + + // If SELECT 1 succeeds, use cached severity from background diagnostics + String severity = cachedDbSeverity.get(); + result.put("status", resolveDatabaseStatus(severity)); + result.put("severity", severity); + + } catch (Exception e) { + // Log connection failure as a structured event + logger.error( + "[MYSQL_CONNECT_FAILED] MySQL connectivity check failed | error=\"{}\"", + e.getMessage() + ); + + result.put("status", "DOWN"); + result.put("severity", "CRITICAL"); + } + + return result; + } + + private Map checkRedisConnectivity() { + Map result = new LinkedHashMap<>(); + + if (redisConnectionFactory == null) { + result.put("status", "NOT_CONFIGURED"); + result.put("severity", "INFO"); + return result; + } + + try (RedisConnection conn = redisConnectionFactory.getConnection()) { + conn.ping(); + result.put("status", "UP"); + result.put("severity", "OK"); + + } catch (Exception e) { + logger.error( + "[REDIS_CONNECT_FAILED] Redis connectivity check failed | error=\"{}\"", + e.getMessage() + ); + + result.put("status", "DOWN"); + result.put("severity", "CRITICAL"); + } + + return result; + } + + private void runAdvancedMySQLDiagnostics() { + // Dedup guard: skip if last run was within the past 25 seconds + long now = System.currentTimeMillis(); + if (now - lastDiagnosticRunAt.get() < TimeUnit.SECONDS.toMillis(DIAGNOSTIC_GUARD_SEC)) { + return; + } + lastDiagnosticRunAt.set(now); + + String worstSeverity = "INFO"; // Escalates during checks, never descends + + try (Connection conn = dataSource.getConnection()) { + + // CHECK 1 — Stuck / Long-Running Processes + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery( + "SELECT COUNT(*) AS cnt FROM information_schema.PROCESSLIST " + + "WHERE TIME > " + STUCK_PROCESS_SECONDS + " AND COMMAND != 'Sleep'")) { + + if (rs.next()) { + int stuckCount = rs.getInt("cnt"); + if (stuckCount > 0) { + logger.warn( + "[{}] Stuck MySQL processes detected | count={} | thresholdSeconds={}", + LOG_EVENT_STUCK_PROCESS, stuckCount, STUCK_PROCESS_SECONDS + ); + if (stuckCount > STUCK_PROCESS_THRESHOLD) { + worstSeverity = escalate(worstSeverity, "WARNING"); + } + } + } + } catch (Exception e) { + logger.error("[MYSQL_DIAGNOSTIC_ERROR] Stuck process check failed | error=\"{}\"", + e.getMessage()); + } + + // CHECK 2 — InnoDB Long-Running Transactions (MYSQL_LONG_TX) + // Note: INNODB_TRX shows all active transactions. True lock-wait detection via + // INNODB_LOCK_WAITS requires PERFORMANCE_SCHEMA enabled and explicit permissions. + // This query flags transactions older than STUCK_PROCESS_SECONDS as potentially problematic. + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery( + "SELECT COUNT(*) AS cnt FROM information_schema.INNODB_TRX " + + "WHERE TIME_TO_SEC(TIMEDIFF(NOW(), trx_started)) > " + STUCK_PROCESS_SECONDS)) { + + if (rs.next()) { + int lockCount = rs.getInt("cnt"); + if (lockCount > 0) { + logger.error( + "[{}] InnoDB long-running transaction detected | count={} | thresholdSeconds={}", + LOG_EVENT_LOCK_WAIT, lockCount, STUCK_PROCESS_SECONDS + ); + worstSeverity = escalate(worstSeverity, "CRITICAL"); + } + } + } catch (Exception e) { + logger.error("[MYSQL_DIAGNOSTIC_ERROR] Long transaction check failed | error=\"{}\"", + e.getMessage()); + } + + // CHECK 3 — InnoDB Deadlocks (Delta Tracking to avoid permanent WARNING) + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery("SHOW STATUS LIKE 'Innodb_deadlocks'")) { + + if (rs.next()) { + long currentDeadlocks = rs.getLong("Value"); + long previousDeadlocks = previousDeadlockCount.getAndSet(currentDeadlocks); + + // Only warn if deadlocks have *increased* since last run + if (currentDeadlocks > previousDeadlocks) { + long deltaDeadlocks = currentDeadlocks - previousDeadlocks; + logger.warn( + "[{}] InnoDB deadlocks detected since last run | deltaCount={} | cumulativeCount={}", + LOG_EVENT_DEADLOCK, deltaDeadlocks, currentDeadlocks + ); + worstSeverity = escalate(worstSeverity, "WARNING"); + } + } + } catch (Exception e) { + logger.error("[MYSQL_DIAGNOSTIC_ERROR] Deadlock check failed | error=\"{}\"", + e.getMessage()); + } + + // CHECK 4 — Slow Queries + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery("SHOW STATUS LIKE 'Slow_queries'")) { + + if (rs.next()) { + long slowQueries = rs.getLong("Value"); + if (slowQueries > 0) { + logger.warn( + "[{}] Slow queries detected | cumulativeCount={}", + LOG_EVENT_SLOW_QUERIES, slowQueries + ); + worstSeverity = escalate(worstSeverity, "WARNING"); + } + } + } catch (Exception e) { + logger.error("[MYSQL_DIAGNOSTIC_ERROR] Slow query check failed | error=\"{}\"", + e.getMessage()); + } + + // CHECK 5 — Server Connection Usage + try (Statement stmt = conn.createStatement()) { + int threadsConnected = 0; + int maxConnections = 0; + + try (ResultSet rs = stmt.executeQuery("SHOW STATUS LIKE 'Threads_connected'")) { + if (rs.next()) threadsConnected = rs.getInt("Value"); + } + + try (ResultSet rs = stmt.executeQuery("SHOW VARIABLES LIKE 'max_connections'")) { + if (rs.next()) maxConnections = rs.getInt("Value"); + } + + if (maxConnections > 0) { + int usagePct = (int) ((threadsConnected * 100.0) / maxConnections); + + if (usagePct >= CONNECTION_USAGE_CRITICAL) { + logger.error( + "[{}] MySQL connection pool near exhaustion | threadsConnected={} | maxConnections={} | usagePercent={}", + LOG_EVENT_POOL_EXHAUSTED, threadsConnected, maxConnections, usagePct + ); + worstSeverity = escalate(worstSeverity, "CRITICAL"); + + } else if (usagePct > CONNECTION_USAGE_WARNING) { + logger.warn( + "[{}] MySQL connection usage is high | threadsConnected={} | maxConnections={} | usagePercent={}", + LOG_EVENT_CONN_USAGE, threadsConnected, maxConnections, usagePct + ); + worstSeverity = escalate(worstSeverity, "WARNING"); + } + } + } catch (Exception e) { + logger.error("[MYSQL_DIAGNOSTIC_ERROR] Connection usage check failed | error=\"{}\"", + e.getMessage()); + } + + } catch (Exception e) { + // Cannot open connection for diagnostics — treat as CRITICAL + logger.error( + "[MYSQL_DIAGNOSTIC_ERROR] Could not open connection for diagnostics | error=\"{}\"", + e.getMessage() + ); + worstSeverity = "CRITICAL"; + } + + // Persist computed severity so /health can read it instantly + cachedDbSeverity.set(worstSeverity); + + logger.debug( + "[MYSQL_DIAGNOSTIC_COMPLETE] Background diagnostic cycle complete | severity={}", + worstSeverity + ); + } + private String resolveDatabaseStatus(String severity) { + return switch (severity) { + case "CRITICAL" -> "DOWN"; + case "WARNING" -> "DEGRADED"; + default -> "UP"; + }; + } + private String escalate(String current, String candidate) { + return severityRank(candidate) > severityRank(current) ? candidate : current; + } + + private int severityRank(String severity) { + return switch (severity) { + case "CRITICAL" -> 2; + case "WARNING" -> 1; + default -> 0; + }; + } +} \ No newline at end of file diff --git a/src/main/java/com/iemr/common/utils/JwtUserIdValidationFilter.java b/src/main/java/com/iemr/common/utils/JwtUserIdValidationFilter.java index 81d79221..364aa12d 100644 --- a/src/main/java/com/iemr/common/utils/JwtUserIdValidationFilter.java +++ b/src/main/java/com/iemr/common/utils/JwtUserIdValidationFilter.java @@ -251,7 +251,9 @@ private boolean shouldSkipAuthentication(String path, String contextPath) { || path.startsWith(contextPath + "/user/userLogout") || path.startsWith(contextPath + "/user/validateSecurityQuestionAndAnswer") || path.startsWith(contextPath + "/user/logOutUserFromConcurrentSession") - || path.startsWith(contextPath + "/user/refreshToken"); + || path.startsWith(contextPath + "/user/refreshToken") + || path.equals(contextPath + "/health") + || path.equals(contextPath + "/version"); } private String getJwtTokenFromCookies(HttpServletRequest request) { From af877c24a383781bf8886d2ed364f44e2cb14ec5 Mon Sep 17 00:00:00 2001 From: DurgaPrasad-54 Date: Mon, 23 Feb 2026 11:15:14 +0530 Subject: [PATCH 2/4] fix(health): normalize severity and fix slow query false positives --- .../common/service/health/HealthService.java | 336 ++++++++++-------- 1 file changed, 186 insertions(+), 150 deletions(-) diff --git a/src/main/java/com/iemr/common/service/health/HealthService.java b/src/main/java/com/iemr/common/service/health/HealthService.java index d117337e..894013a2 100644 --- a/src/main/java/com/iemr/common/service/health/HealthService.java +++ b/src/main/java/com/iemr/common/service/health/HealthService.java @@ -47,12 +47,35 @@ public class HealthService { private static final Logger logger = LoggerFactory.getLogger(HealthService.class); + // Event log constants private static final String LOG_EVENT_STUCK_PROCESS = "MYSQL_STUCK_PROCESS"; private static final String LOG_EVENT_LOCK_WAIT = "MYSQL_LOCK_WAIT"; private static final String LOG_EVENT_DEADLOCK = "MYSQL_DEADLOCK"; private static final String LOG_EVENT_SLOW_QUERIES = "MYSQL_SLOW_QUERIES"; private static final String LOG_EVENT_CONN_USAGE = "MYSQL_CONNECTION_USAGE"; private static final String LOG_EVENT_POOL_EXHAUSTED = "MYSQL_POOL_EXHAUSTED"; + + // Response field constants + private static final String FIELD_STATUS = "status"; + private static final String FIELD_SEVERITY = "severity"; + private static final String FIELD_MYSQL = "mysql"; + private static final String FIELD_REDIS = "redis"; + private static final String FIELD_CHECKED_AT = "checkedAt"; + + // Severity constants + private static final String SEVERITY_CRITICAL = "CRITICAL"; + private static final String SEVERITY_WARNING = "WARNING"; + private static final String SEVERITY_OK = "OK"; + private static final String SEVERITY_INFO = "INFO"; + + // Database query constants + private static final String STATUS_VALUE = "Value"; + private static final String STATUS_UP = "UP"; + private static final String STATUS_DOWN = "DOWN"; + private static final String STATUS_DEGRADED = "DEGRADED"; + private static final String STATUS_NOT_CONFIGURED = "NOT_CONFIGURED"; + + // Thresholds private static final long RESPONSE_TIME_SLOW_MS = 2000; // > 2s → SLOW private static final int STUCK_PROCESS_THRESHOLD = 5; // > 5 stuck → WARNING private static final int STUCK_PROCESS_SECONDS = 30; // process age in seconds @@ -72,8 +95,9 @@ public class HealthService { private final AtomicLong lastDiagnosticRunAt = new AtomicLong(0); private final AtomicReference cachedDbSeverity = - new AtomicReference<>("INFO"); + new AtomicReference<>(SEVERITY_OK); private final AtomicLong previousDeadlockCount = new AtomicLong(0); + private final AtomicLong previousSlowQueryCount = new AtomicLong(0); public HealthService(ObjectProvider dataSourceProvider, ObjectProvider redisProvider) { this.dataSource = dataSourceProvider.getIfAvailable(); @@ -115,25 +139,25 @@ public Map checkHealth() { Map mysqlResult = checkDatabaseConnectivity(); Map redisResult = checkRedisConnectivity(); - String mysqlStatus = (String) mysqlResult.get("status"); - String redisStatus = (String) redisResult.get("status"); + String mysqlStatus = (String) mysqlResult.get(FIELD_STATUS); + String redisStatus = (String) redisResult.get(FIELD_STATUS); - boolean overallUp = !"DOWN".equals(mysqlStatus) && !"DOWN".equals(redisStatus); + boolean overallUp = !STATUS_DOWN.equals(mysqlStatus) && !STATUS_DOWN.equals(redisStatus); - response.put("status", overallUp ? "UP" : "DOWN"); - response.put("checkedAt", Instant.now().toString()); + response.put(FIELD_STATUS, overallUp ? STATUS_UP : STATUS_DOWN); + response.put(FIELD_CHECKED_AT, Instant.now().toString()); // Expose only status and severity, keep diagnostics internal Map mysqlSummary = new LinkedHashMap<>(); - mysqlSummary.put("status", mysqlResult.get("status")); - mysqlSummary.put("severity", mysqlResult.get("severity")); + mysqlSummary.put(FIELD_STATUS, mysqlResult.get(FIELD_STATUS)); + mysqlSummary.put(FIELD_SEVERITY, mysqlResult.get(FIELD_SEVERITY)); Map redisSummary = new LinkedHashMap<>(); - redisSummary.put("status", redisResult.get("status")); - redisSummary.put("severity", redisResult.get("severity")); + redisSummary.put(FIELD_STATUS, redisResult.get(FIELD_STATUS)); + redisSummary.put(FIELD_SEVERITY, redisResult.get(FIELD_SEVERITY)); - response.put("mysql", mysqlSummary); - response.put("redis", redisSummary); + response.put(FIELD_MYSQL, mysqlSummary); + response.put(FIELD_REDIS, redisSummary); return response; } @@ -142,8 +166,8 @@ private Map checkDatabaseConnectivity() { Map result = new LinkedHashMap<>(); if (dataSource == null) { - result.put("status", "NOT_CONFIGURED"); - result.put("severity", "INFO"); + result.put(FIELD_STATUS, STATUS_NOT_CONFIGURED); + result.put(FIELD_SEVERITY, SEVERITY_INFO); return result; } @@ -155,8 +179,8 @@ private Map checkDatabaseConnectivity() { // If SELECT 1 succeeds, use cached severity from background diagnostics String severity = cachedDbSeverity.get(); - result.put("status", resolveDatabaseStatus(severity)); - result.put("severity", severity); + result.put(FIELD_STATUS, resolveDatabaseStatus(severity)); + result.put(FIELD_SEVERITY, severity); } catch (Exception e) { // Log connection failure as a structured event @@ -165,8 +189,8 @@ private Map checkDatabaseConnectivity() { e.getMessage() ); - result.put("status", "DOWN"); - result.put("severity", "CRITICAL"); + result.put(FIELD_STATUS, STATUS_DOWN); + result.put(FIELD_SEVERITY, SEVERITY_CRITICAL); } return result; @@ -176,15 +200,15 @@ private Map checkRedisConnectivity() { Map result = new LinkedHashMap<>(); if (redisConnectionFactory == null) { - result.put("status", "NOT_CONFIGURED"); - result.put("severity", "INFO"); + result.put(FIELD_STATUS, STATUS_NOT_CONFIGURED); + result.put(FIELD_SEVERITY, SEVERITY_INFO); return result; } try (RedisConnection conn = redisConnectionFactory.getConnection()) { conn.ping(); - result.put("status", "UP"); - result.put("severity", "OK"); + result.put(FIELD_STATUS, STATUS_UP); + result.put(FIELD_SEVERITY, SEVERITY_OK); } catch (Exception e) { logger.error( @@ -192,8 +216,8 @@ private Map checkRedisConnectivity() { e.getMessage() ); - result.put("status", "DOWN"); - result.put("severity", "CRITICAL"); + result.put(FIELD_STATUS, STATUS_DOWN); + result.put(FIELD_SEVERITY, SEVERITY_CRITICAL); } return result; @@ -207,157 +231,169 @@ private void runAdvancedMySQLDiagnostics() { } lastDiagnosticRunAt.set(now); - String worstSeverity = "INFO"; // Escalates during checks, never descends + String worstSeverity = SEVERITY_OK; try (Connection conn = dataSource.getConnection()) { + worstSeverity = escalate(worstSeverity, performStuckProcessCheck(conn)); + worstSeverity = escalate(worstSeverity, performLongTransactionCheck(conn)); + worstSeverity = escalate(worstSeverity, performDeadlockCheck(conn)); + worstSeverity = escalate(worstSeverity, performSlowQueryCheck(conn)); + worstSeverity = escalate(worstSeverity, performConnectionUsageCheck(conn)); - // CHECK 1 — Stuck / Long-Running Processes - try (Statement stmt = conn.createStatement(); - ResultSet rs = stmt.executeQuery( - "SELECT COUNT(*) AS cnt FROM information_schema.PROCESSLIST " + - "WHERE TIME > " + STUCK_PROCESS_SECONDS + " AND COMMAND != 'Sleep'")) { - - if (rs.next()) { - int stuckCount = rs.getInt("cnt"); - if (stuckCount > 0) { - logger.warn( - "[{}] Stuck MySQL processes detected | count={} | thresholdSeconds={}", - LOG_EVENT_STUCK_PROCESS, stuckCount, STUCK_PROCESS_SECONDS - ); - if (stuckCount > STUCK_PROCESS_THRESHOLD) { - worstSeverity = escalate(worstSeverity, "WARNING"); - } + } catch (Exception e) { + logger.error( + "[MYSQL_DIAGNOSTIC_ERROR] Could not open connection for diagnostics | error=\"{}\"", + e.getMessage() + ); + worstSeverity = SEVERITY_CRITICAL; + } + + cachedDbSeverity.set(worstSeverity); + logger.debug( + "[MYSQL_DIAGNOSTIC_COMPLETE] Background diagnostic cycle complete | severity={}", + worstSeverity + ); + } + + private String performStuckProcessCheck(Connection conn) { + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery( + "SELECT COUNT(*) AS cnt FROM information_schema.PROCESSLIST " + + "WHERE TIME > " + STUCK_PROCESS_SECONDS + " AND COMMAND != 'Sleep'")) { + + if (rs.next()) { + int stuckCount = rs.getInt("cnt"); + if (stuckCount > 0) { + logger.warn( + "[{}] Stuck MySQL processes detected | count={} | thresholdSeconds={}", + LOG_EVENT_STUCK_PROCESS, stuckCount, STUCK_PROCESS_SECONDS + ); + if (stuckCount > STUCK_PROCESS_THRESHOLD) { + return SEVERITY_WARNING; } } - } catch (Exception e) { - logger.error("[MYSQL_DIAGNOSTIC_ERROR] Stuck process check failed | error=\"{}\"", - e.getMessage()); } + } catch (Exception e) { + logger.error("[MYSQL_DIAGNOSTIC_ERROR] Stuck process check failed | error=\"{}\"", + e.getMessage()); + } + return SEVERITY_OK; + } - // CHECK 2 — InnoDB Long-Running Transactions (MYSQL_LONG_TX) - // Note: INNODB_TRX shows all active transactions. True lock-wait detection via - // INNODB_LOCK_WAITS requires PERFORMANCE_SCHEMA enabled and explicit permissions. - // This query flags transactions older than STUCK_PROCESS_SECONDS as potentially problematic. - try (Statement stmt = conn.createStatement(); - ResultSet rs = stmt.executeQuery( - "SELECT COUNT(*) AS cnt FROM information_schema.INNODB_TRX " + - "WHERE TIME_TO_SEC(TIMEDIFF(NOW(), trx_started)) > " + STUCK_PROCESS_SECONDS)) { - - if (rs.next()) { - int lockCount = rs.getInt("cnt"); - if (lockCount > 0) { - logger.error( - "[{}] InnoDB long-running transaction detected | count={} | thresholdSeconds={}", - LOG_EVENT_LOCK_WAIT, lockCount, STUCK_PROCESS_SECONDS - ); - worstSeverity = escalate(worstSeverity, "CRITICAL"); - } + private String performLongTransactionCheck(Connection conn) { + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery( + "SELECT COUNT(*) AS cnt FROM information_schema.INNODB_TRX " + + "WHERE TIME_TO_SEC(TIMEDIFF(NOW(), trx_started)) > " + STUCK_PROCESS_SECONDS)) { + + if (rs.next()) { + int lockCount = rs.getInt("cnt"); + if (lockCount > 0) { + logger.error( + "[{}] InnoDB long-running transaction detected | count={} | thresholdSeconds={}", + LOG_EVENT_LOCK_WAIT, lockCount, STUCK_PROCESS_SECONDS + ); + return SEVERITY_CRITICAL; } - } catch (Exception e) { - logger.error("[MYSQL_DIAGNOSTIC_ERROR] Long transaction check failed | error=\"{}\"", - e.getMessage()); } + } catch (Exception e) { + logger.error("[MYSQL_DIAGNOSTIC_ERROR] Long transaction check failed | error=\"{}\"", + e.getMessage()); + } + return SEVERITY_OK; + } - // CHECK 3 — InnoDB Deadlocks (Delta Tracking to avoid permanent WARNING) - try (Statement stmt = conn.createStatement(); - ResultSet rs = stmt.executeQuery("SHOW STATUS LIKE 'Innodb_deadlocks'")) { + private String performDeadlockCheck(Connection conn) { + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery("SHOW STATUS LIKE 'Innodb_deadlocks'")) { + + if (rs.next()) { + long currentDeadlocks = rs.getLong(STATUS_VALUE); + long previousDeadlocks = previousDeadlockCount.getAndSet(currentDeadlocks); - if (rs.next()) { - long currentDeadlocks = rs.getLong("Value"); - long previousDeadlocks = previousDeadlockCount.getAndSet(currentDeadlocks); - - // Only warn if deadlocks have *increased* since last run - if (currentDeadlocks > previousDeadlocks) { - long deltaDeadlocks = currentDeadlocks - previousDeadlocks; - logger.warn( - "[{}] InnoDB deadlocks detected since last run | deltaCount={} | cumulativeCount={}", - LOG_EVENT_DEADLOCK, deltaDeadlocks, currentDeadlocks - ); - worstSeverity = escalate(worstSeverity, "WARNING"); - } + if (currentDeadlocks > previousDeadlocks) { + long deltaDeadlocks = currentDeadlocks - previousDeadlocks; + logger.warn( + "[{}] InnoDB deadlocks detected since last run | deltaCount={} | cumulativeCount={}", + LOG_EVENT_DEADLOCK, deltaDeadlocks, currentDeadlocks + ); + return SEVERITY_WARNING; } - } catch (Exception e) { - logger.error("[MYSQL_DIAGNOSTIC_ERROR] Deadlock check failed | error=\"{}\"", - e.getMessage()); } + } catch (Exception e) { + logger.error("[MYSQL_DIAGNOSTIC_ERROR] Deadlock check failed | error=\"{}\"", + e.getMessage()); + } + return SEVERITY_OK; + } - // CHECK 4 — Slow Queries - try (Statement stmt = conn.createStatement(); - ResultSet rs = stmt.executeQuery("SHOW STATUS LIKE 'Slow_queries'")) { + private String performSlowQueryCheck(Connection conn) { + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery("SHOW STATUS LIKE 'Slow_queries'")) { + + if (rs.next()) { + long slowQueries = rs.getLong(STATUS_VALUE); + long previousSlow = previousSlowQueryCount.getAndSet(slowQueries); - if (rs.next()) { - long slowQueries = rs.getLong("Value"); - if (slowQueries > 0) { - logger.warn( - "[{}] Slow queries detected | cumulativeCount={}", - LOG_EVENT_SLOW_QUERIES, slowQueries - ); - worstSeverity = escalate(worstSeverity, "WARNING"); - } + // Only warn if slow queries have *increased* since last run + if (slowQueries > previousSlow) { + long delta = slowQueries - previousSlow; + logger.warn( + "[{}] New slow queries detected since last run | deltaCount={} | cumulativeCount={}", + LOG_EVENT_SLOW_QUERIES, delta, slowQueries + ); + return SEVERITY_WARNING; } - } catch (Exception e) { - logger.error("[MYSQL_DIAGNOSTIC_ERROR] Slow query check failed | error=\"{}\"", - e.getMessage()); } + } catch (Exception e) { + logger.error("[MYSQL_DIAGNOSTIC_ERROR] Slow query check failed | error=\"{}\"", + e.getMessage()); + } + return SEVERITY_OK; + } - // CHECK 5 — Server Connection Usage - try (Statement stmt = conn.createStatement()) { - int threadsConnected = 0; - int maxConnections = 0; + private String performConnectionUsageCheck(Connection conn) { + try (Statement stmt = conn.createStatement()) { + int threadsConnected = 0; + int maxConnections = 0; - try (ResultSet rs = stmt.executeQuery("SHOW STATUS LIKE 'Threads_connected'")) { - if (rs.next()) threadsConnected = rs.getInt("Value"); - } + try (ResultSet rs = stmt.executeQuery("SHOW STATUS LIKE 'Threads_connected'")) { + if (rs.next()) threadsConnected = rs.getInt(STATUS_VALUE); + } - try (ResultSet rs = stmt.executeQuery("SHOW VARIABLES LIKE 'max_connections'")) { - if (rs.next()) maxConnections = rs.getInt("Value"); - } + try (ResultSet rs = stmt.executeQuery("SHOW VARIABLES LIKE 'max_connections'")) { + if (rs.next()) maxConnections = rs.getInt(STATUS_VALUE); + } - if (maxConnections > 0) { - int usagePct = (int) ((threadsConnected * 100.0) / maxConnections); - - if (usagePct >= CONNECTION_USAGE_CRITICAL) { - logger.error( - "[{}] MySQL connection pool near exhaustion | threadsConnected={} | maxConnections={} | usagePercent={}", - LOG_EVENT_POOL_EXHAUSTED, threadsConnected, maxConnections, usagePct - ); - worstSeverity = escalate(worstSeverity, "CRITICAL"); - - } else if (usagePct > CONNECTION_USAGE_WARNING) { - logger.warn( - "[{}] MySQL connection usage is high | threadsConnected={} | maxConnections={} | usagePercent={}", - LOG_EVENT_CONN_USAGE, threadsConnected, maxConnections, usagePct - ); - worstSeverity = escalate(worstSeverity, "WARNING"); - } + if (maxConnections > 0) { + int usagePct = (int) ((threadsConnected * 100.0) / maxConnections); + + if (usagePct >= CONNECTION_USAGE_CRITICAL) { + logger.error( + "[{}] MySQL connection pool near exhaustion | threadsConnected={} | maxConnections={} | usagePercent={}", + LOG_EVENT_POOL_EXHAUSTED, threadsConnected, maxConnections, usagePct + ); + return SEVERITY_CRITICAL; + + } else if (usagePct > CONNECTION_USAGE_WARNING) { + logger.warn( + "[{}] MySQL connection usage is high | threadsConnected={} | maxConnections={} | usagePercent={}", + LOG_EVENT_CONN_USAGE, threadsConnected, maxConnections, usagePct + ); + return SEVERITY_WARNING; } - } catch (Exception e) { - logger.error("[MYSQL_DIAGNOSTIC_ERROR] Connection usage check failed | error=\"{}\"", - e.getMessage()); } - } catch (Exception e) { - // Cannot open connection for diagnostics — treat as CRITICAL - logger.error( - "[MYSQL_DIAGNOSTIC_ERROR] Could not open connection for diagnostics | error=\"{}\"", - e.getMessage() - ); - worstSeverity = "CRITICAL"; + logger.error("[MYSQL_DIAGNOSTIC_ERROR] Connection usage check failed | error=\"{}\"", + e.getMessage()); } - - // Persist computed severity so /health can read it instantly - cachedDbSeverity.set(worstSeverity); - - logger.debug( - "[MYSQL_DIAGNOSTIC_COMPLETE] Background diagnostic cycle complete | severity={}", - worstSeverity - ); + return SEVERITY_OK; } private String resolveDatabaseStatus(String severity) { return switch (severity) { - case "CRITICAL" -> "DOWN"; - case "WARNING" -> "DEGRADED"; - default -> "UP"; + case SEVERITY_CRITICAL -> STATUS_DOWN; + case SEVERITY_WARNING -> STATUS_DEGRADED; + default -> STATUS_UP; }; } private String escalate(String current, String candidate) { @@ -366,9 +402,9 @@ private String escalate(String current, String candidate) { private int severityRank(String severity) { return switch (severity) { - case "CRITICAL" -> 2; - case "WARNING" -> 1; - default -> 0; + case SEVERITY_CRITICAL -> 2; + case SEVERITY_WARNING -> 1; + default -> 0; }; } } \ No newline at end of file From 3f033dd4c04c36162aa08ddbb5b3c0b68878370c Mon Sep 17 00:00:00 2001 From: DurgaPrasad-54 Date: Mon, 23 Feb 2026 11:29:01 +0530 Subject: [PATCH 3/4] fix(health): avoid false CRITICAL on single long-running MySQL transaction --- .../common/service/health/HealthService.java | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/main/java/com/iemr/common/service/health/HealthService.java b/src/main/java/com/iemr/common/service/health/HealthService.java index 894013a2..532b0302 100644 --- a/src/main/java/com/iemr/common/service/health/HealthService.java +++ b/src/main/java/com/iemr/common/service/health/HealthService.java @@ -79,6 +79,9 @@ public class HealthService { private static final long RESPONSE_TIME_SLOW_MS = 2000; // > 2s → SLOW private static final int STUCK_PROCESS_THRESHOLD = 5; // > 5 stuck → WARNING private static final int STUCK_PROCESS_SECONDS = 30; // process age in seconds + private static final int LONG_TXN_WARNING_THRESHOLD = 1; // ≥1 long txn → WARNING + private static final int LONG_TXN_CRITICAL_THRESHOLD = 5; // ≥5 long txns → CRITICAL + private static final int LONG_TXN_SECONDS = 60; // transaction age threshold private static final int CONNECTION_USAGE_WARNING = 80; // > 80% → WARNING private static final int CONNECTION_USAGE_CRITICAL= 95; // > 95% → CRITICAL private static final long DIAGNOSTIC_INTERVAL_SEC = 30; // background run interval @@ -284,16 +287,18 @@ private String performLongTransactionCheck(Connection conn) { try (Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery( "SELECT COUNT(*) AS cnt FROM information_schema.INNODB_TRX " + - "WHERE TIME_TO_SEC(TIMEDIFF(NOW(), trx_started)) > " + STUCK_PROCESS_SECONDS)) { + "WHERE TIME_TO_SEC(TIMEDIFF(NOW(), trx_started)) > " + LONG_TXN_SECONDS)) { if (rs.next()) { int lockCount = rs.getInt("cnt"); - if (lockCount > 0) { - logger.error( - "[{}] InnoDB long-running transaction detected | count={} | thresholdSeconds={}", - LOG_EVENT_LOCK_WAIT, lockCount, STUCK_PROCESS_SECONDS + if (lockCount >= LONG_TXN_WARNING_THRESHOLD) { + logger.warn( + "[{}] InnoDB long-running transaction(s) detected | count={} | thresholdSeconds={}", + LOG_EVENT_LOCK_WAIT, lockCount, LONG_TXN_SECONDS ); - return SEVERITY_CRITICAL; + // Graduated escalation: WARNING for 1-4, CRITICAL for 5+ + return lockCount >= LONG_TXN_CRITICAL_THRESHOLD + ? SEVERITY_CRITICAL : SEVERITY_WARNING; } } } catch (Exception e) { From f131f3abf9b64d92ac22e6020e761063ae1b5647 Mon Sep 17 00:00:00 2001 From: DurgaPrasad-54 Date: Mon, 23 Feb 2026 11:46:52 +0530 Subject: [PATCH 4/4] fix(health): enforce 3s DB connection timeout via HikariCP --- .../common/service/health/HealthService.java | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/iemr/common/service/health/HealthService.java b/src/main/java/com/iemr/common/service/health/HealthService.java index 532b0302..7714efce 100644 --- a/src/main/java/com/iemr/common/service/health/HealthService.java +++ b/src/main/java/com/iemr/common/service/health/HealthService.java @@ -28,7 +28,7 @@ import org.springframework.data.redis.connection.RedisConnectionFactory; import org.springframework.stereotype.Service; -import javax.annotation.PreDestroy; +import jakarta.annotation.PreDestroy; import javax.sql.DataSource; import java.sql.Connection; import java.sql.ResultSet; @@ -164,7 +164,10 @@ public Map checkHealth() { return response; } - // Runs only SELECT 1 with a hard 3-second timeout. + // Runs only SELECT 1 with a hard 3-second timeout on query execution. + // NOTE: getConnection() is NOT bounded by this timeout — it respects the pool's + // connectionTimeout (default 30s in HikariCP). For true 3-second /health guarantees, + // configure the DataSource connectionTimeout ≤ 3 seconds or wrap in an ExecutorService timeout. private Map checkDatabaseConnectivity() { Map result = new LinkedHashMap<>(); @@ -177,7 +180,7 @@ private Map checkDatabaseConnectivity() { try (Connection conn = dataSource.getConnection(); Statement stmt = conn.createStatement()) { - stmt.setQueryTimeout(3); // Hard cap — /health must never block > 3s + stmt.setQueryTimeout(3); // Bounds only the SELECT 1 execution stmt.execute("SELECT 1"); // If SELECT 1 succeeds, use cached severity from background diagnostics @@ -267,12 +270,17 @@ private String performStuckProcessCheck(Connection conn) { if (rs.next()) { int stuckCount = rs.getInt("cnt"); if (stuckCount > 0) { - logger.warn( - "[{}] Stuck MySQL processes detected | count={} | thresholdSeconds={}", - LOG_EVENT_STUCK_PROCESS, stuckCount, STUCK_PROCESS_SECONDS - ); if (stuckCount > STUCK_PROCESS_THRESHOLD) { + logger.warn( + "[{}] Stuck MySQL processes detected above threshold | count={} | threshold={} | thresholdSeconds={}", + LOG_EVENT_STUCK_PROCESS, stuckCount, STUCK_PROCESS_THRESHOLD, STUCK_PROCESS_SECONDS + ); return SEVERITY_WARNING; + } else { + logger.info( + "[{}] Stuck MySQL processes below threshold | count={} | threshold={} | thresholdSeconds={}", + LOG_EVENT_STUCK_PROCESS, stuckCount, STUCK_PROCESS_THRESHOLD, STUCK_PROCESS_SECONDS + ); } } }