From 5daf358ecaa60b7af15abb100d87415315865770 Mon Sep 17 00:00:00 2001 From: labkey-jeckels Date: Mon, 23 Mar 2026 20:30:01 -0700 Subject: [PATCH 1/5] GitHub Issue 959: Fallback when indexing hits IO problems --- .../search/model/AbstractSearchService.java | 40 +++++++++++-- .../org/labkey/search/model/DavCrawler.java | 58 +++++-------------- .../search/model/IndexCommitException.java | 13 +++++ .../search/model/LuceneSearchServiceImpl.java | 5 +- .../model/WritableIndexManagerImpl.java | 15 ++--- 5 files changed, 75 insertions(+), 56 deletions(-) create mode 100644 search/src/org/labkey/search/model/IndexCommitException.java diff --git a/search/src/org/labkey/search/model/AbstractSearchService.java b/search/src/org/labkey/search/model/AbstractSearchService.java index 683b37437a1..c93867b8879 100644 --- a/search/src/org/labkey/search/model/AbstractSearchService.java +++ b/search/src/org/labkey/search/model/AbstractSearchService.java @@ -40,6 +40,7 @@ import org.labkey.api.search.SearchService; import org.labkey.api.security.User; import org.labkey.api.services.ServiceRegistry; +import org.labkey.api.util.ConfigurationException; import org.labkey.api.util.ContextListener; import org.labkey.api.util.DebugInfoDumper; import org.labkey.api.util.ExceptionUtil; @@ -436,6 +437,7 @@ public boolean isBusy() final Object _idleEvent = new Object(); boolean _idleWaiting = false; + static final Object INDEX_EVENT = new Object(); @Override public void waitForIdle() throws InterruptedException @@ -1032,7 +1034,7 @@ protected void startThreads() int countIndexingThreads = Math.max(1, getCountIndexingThreads()); for (int i = 0; i < countIndexingThreads; i++) { - startThread(new Thread(indexRunnable, "SearchService:index")); + startThread(new Thread(indexRunnable, "SearchService:index" + i)); } startThread(new Thread(runRunnable, "SearchService:runner")); @@ -1065,6 +1067,7 @@ public void shutdownPre() _itemQueue.clear(); for (Thread t : _threads) t.interrupt(); + INDEX_EVENT.notifyAll(); } @@ -1220,16 +1223,20 @@ public final void commit() Runnable indexRunnable = () -> { + int consecutiveCommitFailures = 0; while (!_shuttingDown) { try { _indexLoop(); + consecutiveCommitFailures = 0; } - catch (Throwable t) + catch (Throwable e) { - // this should only happen if the catch/finally of the inner loop throws - try {_log.warn("error in indexer", t);} catch (Throwable x){/* */} + if (!_shuttingDown) + { + postFailureDelay(e, "Error in indexer", _log, ++consecutiveCommitFailures, INDEX_EVENT); + } } } synchronized (_commitLock) @@ -1242,6 +1249,28 @@ public final void commit() } }; + public static void postFailureDelay(Throwable e, String logPrefix, Logger log, int consecutiveCommitFailures, final Object syncObject) + { + long delayMs = TimeUnit.SECONDS.toMillis(30L * Math.min(10, consecutiveCommitFailures)); + log.error("{}, delaying next attempt by {}s ({} consecutive failures)", + logPrefix, + TimeUnit.MILLISECONDS.toSeconds(delayMs), + consecutiveCommitFailures, + e); + if (delayMs > 0) + { + try + { + //noinspection SynchronizationOnLocalVariableOrMethodParameter + synchronized (syncObject) + { + syncObject.wait(delayMs); + } + } + catch (InterruptedException ignored) {} + } + } + private void commitCheck(long ms) { synchronized (_commitLock) @@ -1337,6 +1366,7 @@ private void _indexLoop() _log.debug("skipping " + i._id); } catch (InterruptedException ignored) {} + catch (IndexCommitException | ConfigurationException e) { throw e; } // let outer loop handle backoff catch (Throwable x) { _log.error("Error indexing " + (null != i ? i._id : ""), x); @@ -1424,7 +1454,7 @@ public List getCategories(String categories) } - protected abstract void commitIndex(); + protected abstract void commitIndex() throws ConfigurationException, IndexCommitException; protected abstract void deleteDocument(String id); protected abstract void deleteDocuments(Collection ids); protected abstract void deleteDocumentsForPrefix(String prefix); diff --git a/search/src/org/labkey/search/model/DavCrawler.java b/search/src/org/labkey/search/model/DavCrawler.java index f694a9ab94b..30ae722d79a 100644 --- a/search/src/org/labkey/search/model/DavCrawler.java +++ b/search/src/org/labkey/search/model/DavCrawler.java @@ -81,9 +81,6 @@ public class DavCrawler implements ShutdownListener { // SearchService.SearchCategory folderCategory = new SearchService.SearchCategory("Folder", "Folder"); - long _defaultWait = TimeUnit.SECONDS.toMillis(60); - long _defaultBusyWait = TimeUnit.SECONDS.toMillis(1); - // UNDONE: configurable // NOTE: we want to use these to control how fast we SUBMIT jobs to the indexer, // we don't want to hold up the actual indexer threads if possible @@ -194,9 +191,7 @@ public void shutdownStarted() { _crawlerThread.join(1000); } - catch (InterruptedException x) - { - } + catch (InterruptedException _) {} } @@ -465,7 +460,7 @@ void addRecent(WebdavResource r) while (!_recent.isEmpty() && _recent.getFirst().second.getTime() < d.getTime()-10*60000) _recent.removeFirst(); String text = r.isCollection() ? r.getName() + "/" : r.getName(); - _recent.add(new Pair(text,d)); + _recent.add(new Pair<>(text,d)); } } @@ -481,33 +476,6 @@ void pingCrawler() } - void _wait(Object event, long wait) - { - if (wait == 0 || _shuttingDown) - return; - try - { - synchronized (event) - { - event.wait(wait); - } - } - catch (InterruptedException ignored) {} - } - - -// final Runnable pingJob = new Runnable() -// { -// public void run() -// { -// synchronized (_crawlerEvent) -// { -// _crawlerEvent.notifyAll(); -// } -// } -// }; - - void waitForIndexerIdle() throws InterruptedException { SearchService ss = getSearchService(); @@ -526,7 +494,7 @@ public void run() { while (!_shuttingDown && null == getSearchService()) { - try { Thread.sleep(1000); } catch (InterruptedException x) {} + try { Thread.sleep(1000); } catch (InterruptedException _) {} } int consecutiveConfigExceptionCount = 0; @@ -544,7 +512,17 @@ public void run() } else { - _wait(_crawlerEvent, _defaultWait); + if (!_shuttingDown) + { + try + { + synchronized (_crawlerEvent) + { + _crawlerEvent.wait(TimeUnit.MINUTES.toMillis(1)); + } + } + catch (InterruptedException ignored) {} + } } consecutiveConfigExceptionCount = 0; } @@ -552,11 +530,7 @@ public void run() catch (ConfigurationException e) { // Issue 49785. Avoid spinning in a tight loop if the DB is unresponsive. - consecutiveConfigExceptionCount++; - _log.error("Unexpected error, delaying next attempt" + (consecutiveConfigExceptionCount > 1 ? (". " + consecutiveConfigExceptionCount + " consecutive ConfigurationExceptions") : ""), e); - - // Fallback strategy based on the number of consecutive failed attempts - _wait(_crawlerEvent, TimeUnit.MINUTES.toMillis(Math.min(10, consecutiveConfigExceptionCount))); + AbstractSearchService.postFailureDelay(e, "Unexpected error", _log, ++consecutiveConfigExceptionCount, _crawlerEvent); } catch (Throwable t) { @@ -728,7 +702,7 @@ String getActivityHtml() recent = new ArrayList<>(_recent); } - recent.sort(Comparator.comparing(Pair::getValue, Comparator.reverseOrder())); + recent.sort(Map.Entry.comparingByValue(Comparator.reverseOrder())); StringBuilder activity = new StringBuilder(""); //"); String last = ""; diff --git a/search/src/org/labkey/search/model/IndexCommitException.java b/search/src/org/labkey/search/model/IndexCommitException.java new file mode 100644 index 00000000000..4f28427593c --- /dev/null +++ b/search/src/org/labkey/search/model/IndexCommitException.java @@ -0,0 +1,13 @@ +package org.labkey.search.model; + +/** + * Thrown when a Lucene index commit fails. Propagates to the outer indexer loop so that backoff and retry + * are handled there, consistent with the pattern used by {@link DavCrawler}. + */ +public class IndexCommitException extends RuntimeException +{ + IndexCommitException(Throwable cause) + { + super(cause); + } +} diff --git a/search/src/org/labkey/search/model/LuceneSearchServiceImpl.java b/search/src/org/labkey/search/model/LuceneSearchServiceImpl.java index f3cea527fdf..4a04bd72d71 100644 --- a/search/src/org/labkey/search/model/LuceneSearchServiceImpl.java +++ b/search/src/org/labkey/search/model/LuceneSearchServiceImpl.java @@ -1573,7 +1573,7 @@ protected void clearIndexedFileSystemFiles(Container container) } @Override - protected void commitIndex() + protected void commitIndex() throws ConfigurationException, IndexCommitException { try { @@ -1588,9 +1588,10 @@ protected void commitIndex() catch (Throwable t) { // If any exceptions happen during commit() the IndexManager will attempt to close the IndexWriter, making - // the IndexManager unusable. Attempt to reset the index. + // the IndexManager unusable. Attempt to reset the index, then let the outer loop handle backoff. ExceptionUtil.logExceptionToMothership(null, t); initializeIndex(); + throw new IndexCommitException(t); } } diff --git a/search/src/org/labkey/search/model/WritableIndexManagerImpl.java b/search/src/org/labkey/search/model/WritableIndexManagerImpl.java index 96a13fd5522..30f3715e132 100644 --- a/search/src/org/labkey/search/model/WritableIndexManagerImpl.java +++ b/search/src/org/labkey/search/model/WritableIndexManagerImpl.java @@ -51,6 +51,7 @@ import org.quartz.TriggerKey; import org.quartz.impl.StdSchedulerFactory; +import java.io.FileNotFoundException; import java.io.IOException; import java.nio.file.AccessDeniedException; import java.nio.file.Path; @@ -258,16 +259,16 @@ public void commit() { iw.commit(); _manager.maybeRefreshBlocking(); - } - catch (AccessDeniedException e) - { - // Index is unwritable wrap in configuration exception to notify Admin - throw new ConfigurationException("Unable to write to Full-Text search index: " + e.getMessage(), e); + boolean b = false; + if (b) + { + throw new FileNotFoundException("fake"); + } } catch (IOException e) { - // Close IndexWriter here as well? - ExceptionUtil.logExceptionToMothership(null, e); + // Index is unwritable. Wrap in a ConfigurationException to notify admins + throw new ConfigurationException("Unable to write to Full-Text search index: " + e.getMessage(), e); } catch (OutOfMemoryError e) { From baa06d0eed02e013f2d16ff3b8cd6ae88015ee79 Mon Sep 17 00:00:00 2001 From: labkey-jeckels Date: Mon, 23 Mar 2026 20:32:09 -0700 Subject: [PATCH 2/5] Back out debugging code --- .../search/model/WritableIndexManagerImpl.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/search/src/org/labkey/search/model/WritableIndexManagerImpl.java b/search/src/org/labkey/search/model/WritableIndexManagerImpl.java index 30f3715e132..02f7a9f8e5d 100644 --- a/search/src/org/labkey/search/model/WritableIndexManagerImpl.java +++ b/search/src/org/labkey/search/model/WritableIndexManagerImpl.java @@ -259,11 +259,13 @@ public void commit() { iw.commit(); _manager.maybeRefreshBlocking(); - boolean b = false; - if (b) - { - throw new FileNotFoundException("fake"); - } + + // Forced repro for GitHub Issue 959 +// boolean b = false; +// if (b) +// { +// throw new FileNotFoundException("fake"); +// } } catch (IOException e) { From 27af9e02da4ac3daba0bea106ebf72bf49c365a1 Mon Sep 17 00:00:00 2001 From: labkey-jeckels Date: Mon, 23 Mar 2026 20:42:19 -0700 Subject: [PATCH 3/5] Cleanup --- .../src/org/labkey/search/model/AbstractSearchService.java | 5 ++++- .../org/labkey/search/model/WritableIndexManagerImpl.java | 4 +--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/search/src/org/labkey/search/model/AbstractSearchService.java b/search/src/org/labkey/search/model/AbstractSearchService.java index c93867b8879..692f51035bb 100644 --- a/search/src/org/labkey/search/model/AbstractSearchService.java +++ b/search/src/org/labkey/search/model/AbstractSearchService.java @@ -1067,7 +1067,10 @@ public void shutdownPre() _itemQueue.clear(); for (Thread t : _threads) t.interrupt(); - INDEX_EVENT.notifyAll(); + synchronized (INDEX_EVENT) + { + INDEX_EVENT.notifyAll(); + } } diff --git a/search/src/org/labkey/search/model/WritableIndexManagerImpl.java b/search/src/org/labkey/search/model/WritableIndexManagerImpl.java index 02f7a9f8e5d..48a13ad5ed1 100644 --- a/search/src/org/labkey/search/model/WritableIndexManagerImpl.java +++ b/search/src/org/labkey/search/model/WritableIndexManagerImpl.java @@ -51,9 +51,7 @@ import org.quartz.TriggerKey; import org.quartz.impl.StdSchedulerFactory; -import java.io.FileNotFoundException; import java.io.IOException; -import java.nio.file.AccessDeniedException; import java.nio.file.Path; import java.util.Collection; import java.util.Collections; @@ -260,7 +258,7 @@ public void commit() iw.commit(); _manager.maybeRefreshBlocking(); - // Forced repro for GitHub Issue 959 + // Forced repro for GitHub Issue 959: Fallback when indexing hits IO problems. Retained for debugging purposes. // boolean b = false; // if (b) // { From 367d5365065daa792fceba1330a5495340c00fb6 Mon Sep 17 00:00:00 2001 From: labkey-jeckels Date: Mon, 23 Mar 2026 20:43:33 -0700 Subject: [PATCH 4/5] Better parameter name --- .../src/org/labkey/search/model/AbstractSearchService.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/search/src/org/labkey/search/model/AbstractSearchService.java b/search/src/org/labkey/search/model/AbstractSearchService.java index 692f51035bb..fc285084eb8 100644 --- a/search/src/org/labkey/search/model/AbstractSearchService.java +++ b/search/src/org/labkey/search/model/AbstractSearchService.java @@ -1252,13 +1252,13 @@ public final void commit() } }; - public static void postFailureDelay(Throwable e, String logPrefix, Logger log, int consecutiveCommitFailures, final Object syncObject) + public static void postFailureDelay(Throwable e, String logPrefix, Logger log, int consecutiveFailures, final Object syncObject) { - long delayMs = TimeUnit.SECONDS.toMillis(30L * Math.min(10, consecutiveCommitFailures)); + long delayMs = TimeUnit.SECONDS.toMillis(30L * Math.min(10, consecutiveFailures)); log.error("{}, delaying next attempt by {}s ({} consecutive failures)", logPrefix, TimeUnit.MILLISECONDS.toSeconds(delayMs), - consecutiveCommitFailures, + consecutiveFailures, e); if (delayMs > 0) { From d293775baeb31d9b29fc865e7ab63db7bed535f3 Mon Sep 17 00:00:00 2001 From: labkey-jeckels Date: Wed, 25 Mar 2026 16:59:16 -0700 Subject: [PATCH 5/5] Don't start backing off immediately --- search/src/org/labkey/search/model/AbstractSearchService.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/search/src/org/labkey/search/model/AbstractSearchService.java b/search/src/org/labkey/search/model/AbstractSearchService.java index fc285084eb8..b108e01beb4 100644 --- a/search/src/org/labkey/search/model/AbstractSearchService.java +++ b/search/src/org/labkey/search/model/AbstractSearchService.java @@ -1238,7 +1238,8 @@ public final void commit() { if (!_shuttingDown) { - postFailureDelay(e, "Error in indexer", _log, ++consecutiveCommitFailures, INDEX_EVENT); + // Postincrement so that we don't start backing off until the second error + postFailureDelay(e, "Error in indexer", _log, consecutiveCommitFailures++, INDEX_EVENT); } } }