diff --git a/search/src/org/labkey/search/model/AbstractSearchService.java b/search/src/org/labkey/search/model/AbstractSearchService.java index 683b37437a1..b108e01beb4 100644 --- a/search/src/org/labkey/search/model/AbstractSearchService.java +++ b/search/src/org/labkey/search/model/AbstractSearchService.java @@ -40,6 +40,7 @@ import org.labkey.api.search.SearchService; import org.labkey.api.security.User; import org.labkey.api.services.ServiceRegistry; +import org.labkey.api.util.ConfigurationException; import org.labkey.api.util.ContextListener; import org.labkey.api.util.DebugInfoDumper; import org.labkey.api.util.ExceptionUtil; @@ -436,6 +437,7 @@ public boolean isBusy() final Object _idleEvent = new Object(); boolean _idleWaiting = false; + static final Object INDEX_EVENT = new Object(); @Override public void waitForIdle() throws InterruptedException @@ -1032,7 +1034,7 @@ protected void startThreads() int countIndexingThreads = Math.max(1, getCountIndexingThreads()); for (int i = 0; i < countIndexingThreads; i++) { - startThread(new Thread(indexRunnable, "SearchService:index")); + startThread(new Thread(indexRunnable, "SearchService:index" + i)); } startThread(new Thread(runRunnable, "SearchService:runner")); @@ -1065,6 +1067,10 @@ public void shutdownPre() _itemQueue.clear(); for (Thread t : _threads) t.interrupt(); + synchronized (INDEX_EVENT) + { + INDEX_EVENT.notifyAll(); + } } @@ -1220,16 +1226,21 @@ public final void commit() Runnable indexRunnable = () -> { + int consecutiveCommitFailures = 0; while (!_shuttingDown) { try { _indexLoop(); + consecutiveCommitFailures = 0; } - catch (Throwable t) + catch (Throwable e) { - // this should only happen if the catch/finally of the inner loop throws - try {_log.warn("error in indexer", t);} catch (Throwable x){/* */} + if (!_shuttingDown) + { + // Postincrement so that we don't start backing off until the second error + postFailureDelay(e, "Error in indexer", _log, consecutiveCommitFailures++, INDEX_EVENT); + } } } synchronized (_commitLock) @@ -1242,6 +1253,28 @@ public final void commit() } }; + public static void postFailureDelay(Throwable e, String logPrefix, Logger log, int consecutiveFailures, final Object syncObject) + { + long delayMs = TimeUnit.SECONDS.toMillis(30L * Math.min(10, consecutiveFailures)); + log.error("{}, delaying next attempt by {}s ({} consecutive failures)", + logPrefix, + TimeUnit.MILLISECONDS.toSeconds(delayMs), + consecutiveFailures, + e); + if (delayMs > 0) + { + try + { + //noinspection SynchronizationOnLocalVariableOrMethodParameter + synchronized (syncObject) + { + syncObject.wait(delayMs); + } + } + catch (InterruptedException ignored) {} + } + } + private void commitCheck(long ms) { synchronized (_commitLock) @@ -1337,6 +1370,7 @@ private void _indexLoop() _log.debug("skipping " + i._id); } catch (InterruptedException ignored) {} + catch (IndexCommitException | ConfigurationException e) { throw e; } // let outer loop handle backoff catch (Throwable x) { _log.error("Error indexing " + (null != i ? i._id : ""), x); @@ -1424,7 +1458,7 @@ public List getCategories(String categories) } - protected abstract void commitIndex(); + protected abstract void commitIndex() throws ConfigurationException, IndexCommitException; protected abstract void deleteDocument(String id); protected abstract void deleteDocuments(Collection ids); protected abstract void deleteDocumentsForPrefix(String prefix); diff --git a/search/src/org/labkey/search/model/DavCrawler.java b/search/src/org/labkey/search/model/DavCrawler.java index f694a9ab94b..30ae722d79a 100644 --- a/search/src/org/labkey/search/model/DavCrawler.java +++ b/search/src/org/labkey/search/model/DavCrawler.java @@ -81,9 +81,6 @@ public class DavCrawler implements ShutdownListener { // SearchService.SearchCategory folderCategory = new SearchService.SearchCategory("Folder", "Folder"); - long _defaultWait = TimeUnit.SECONDS.toMillis(60); - long _defaultBusyWait = TimeUnit.SECONDS.toMillis(1); - // UNDONE: configurable // NOTE: we want to use these to control how fast we SUBMIT jobs to the indexer, // we don't want to hold up the actual indexer threads if possible @@ -194,9 +191,7 @@ public void shutdownStarted() { _crawlerThread.join(1000); } - catch (InterruptedException x) - { - } + catch (InterruptedException _) {} } @@ -465,7 +460,7 @@ void addRecent(WebdavResource r) while (!_recent.isEmpty() && _recent.getFirst().second.getTime() < d.getTime()-10*60000) _recent.removeFirst(); String text = r.isCollection() ? r.getName() + "/" : r.getName(); - _recent.add(new Pair(text,d)); + _recent.add(new Pair<>(text,d)); } } @@ -481,33 +476,6 @@ void pingCrawler() } - void _wait(Object event, long wait) - { - if (wait == 0 || _shuttingDown) - return; - try - { - synchronized (event) - { - event.wait(wait); - } - } - catch (InterruptedException ignored) {} - } - - -// final Runnable pingJob = new Runnable() -// { -// public void run() -// { -// synchronized (_crawlerEvent) -// { -// _crawlerEvent.notifyAll(); -// } -// } -// }; - - void waitForIndexerIdle() throws InterruptedException { SearchService ss = getSearchService(); @@ -526,7 +494,7 @@ public void run() { while (!_shuttingDown && null == getSearchService()) { - try { Thread.sleep(1000); } catch (InterruptedException x) {} + try { Thread.sleep(1000); } catch (InterruptedException _) {} } int consecutiveConfigExceptionCount = 0; @@ -544,7 +512,17 @@ public void run() } else { - _wait(_crawlerEvent, _defaultWait); + if (!_shuttingDown) + { + try + { + synchronized (_crawlerEvent) + { + _crawlerEvent.wait(TimeUnit.MINUTES.toMillis(1)); + } + } + catch (InterruptedException ignored) {} + } } consecutiveConfigExceptionCount = 0; } @@ -552,11 +530,7 @@ public void run() catch (ConfigurationException e) { // Issue 49785. Avoid spinning in a tight loop if the DB is unresponsive. - consecutiveConfigExceptionCount++; - _log.error("Unexpected error, delaying next attempt" + (consecutiveConfigExceptionCount > 1 ? (". " + consecutiveConfigExceptionCount + " consecutive ConfigurationExceptions") : ""), e); - - // Fallback strategy based on the number of consecutive failed attempts - _wait(_crawlerEvent, TimeUnit.MINUTES.toMillis(Math.min(10, consecutiveConfigExceptionCount))); + AbstractSearchService.postFailureDelay(e, "Unexpected error", _log, ++consecutiveConfigExceptionCount, _crawlerEvent); } catch (Throwable t) { @@ -728,7 +702,7 @@ String getActivityHtml() recent = new ArrayList<>(_recent); } - recent.sort(Comparator.comparing(Pair::getValue, Comparator.reverseOrder())); + recent.sort(Map.Entry.comparingByValue(Comparator.reverseOrder())); StringBuilder activity = new StringBuilder(""); //"); String last = ""; diff --git a/search/src/org/labkey/search/model/IndexCommitException.java b/search/src/org/labkey/search/model/IndexCommitException.java new file mode 100644 index 00000000000..4f28427593c --- /dev/null +++ b/search/src/org/labkey/search/model/IndexCommitException.java @@ -0,0 +1,13 @@ +package org.labkey.search.model; + +/** + * Thrown when a Lucene index commit fails. Propagates to the outer indexer loop so that backoff and retry + * are handled there, consistent with the pattern used by {@link DavCrawler}. + */ +public class IndexCommitException extends RuntimeException +{ + IndexCommitException(Throwable cause) + { + super(cause); + } +} diff --git a/search/src/org/labkey/search/model/LuceneSearchServiceImpl.java b/search/src/org/labkey/search/model/LuceneSearchServiceImpl.java index f3cea527fdf..4a04bd72d71 100644 --- a/search/src/org/labkey/search/model/LuceneSearchServiceImpl.java +++ b/search/src/org/labkey/search/model/LuceneSearchServiceImpl.java @@ -1573,7 +1573,7 @@ protected void clearIndexedFileSystemFiles(Container container) } @Override - protected void commitIndex() + protected void commitIndex() throws ConfigurationException, IndexCommitException { try { @@ -1588,9 +1588,10 @@ protected void commitIndex() catch (Throwable t) { // If any exceptions happen during commit() the IndexManager will attempt to close the IndexWriter, making - // the IndexManager unusable. Attempt to reset the index. + // the IndexManager unusable. Attempt to reset the index, then let the outer loop handle backoff. ExceptionUtil.logExceptionToMothership(null, t); initializeIndex(); + throw new IndexCommitException(t); } } diff --git a/search/src/org/labkey/search/model/WritableIndexManagerImpl.java b/search/src/org/labkey/search/model/WritableIndexManagerImpl.java index 96a13fd5522..48a13ad5ed1 100644 --- a/search/src/org/labkey/search/model/WritableIndexManagerImpl.java +++ b/search/src/org/labkey/search/model/WritableIndexManagerImpl.java @@ -52,7 +52,6 @@ import org.quartz.impl.StdSchedulerFactory; import java.io.IOException; -import java.nio.file.AccessDeniedException; import java.nio.file.Path; import java.util.Collection; import java.util.Collections; @@ -258,16 +257,18 @@ public void commit() { iw.commit(); _manager.maybeRefreshBlocking(); - } - catch (AccessDeniedException e) - { - // Index is unwritable wrap in configuration exception to notify Admin - throw new ConfigurationException("Unable to write to Full-Text search index: " + e.getMessage(), e); + + // Forced repro for GitHub Issue 959: Fallback when indexing hits IO problems. Retained for debugging purposes. +// boolean b = false; +// if (b) +// { +// throw new FileNotFoundException("fake"); +// } } catch (IOException e) { - // Close IndexWriter here as well? - ExceptionUtil.logExceptionToMothership(null, e); + // Index is unwritable. Wrap in a ConfigurationException to notify admins + throw new ConfigurationException("Unable to write to Full-Text search index: " + e.getMessage(), e); } catch (OutOfMemoryError e) {