diff --git a/src/main/java/org/apache/commons/text/TextStringBuilder.java b/src/main/java/org/apache/commons/text/TextStringBuilder.java index 2977fa3232..d6618a717f 100644 --- a/src/main/java/org/apache/commons/text/TextStringBuilder.java +++ b/src/main/java/org/apache/commons/text/TextStringBuilder.java @@ -651,6 +651,15 @@ public TextStringBuilder append(final String str) { return append(str, 0, StringUtils.length(str)); } + private void validateRange(final int startIndex, final int length, final int strLength) { + if (startIndex < 0 || startIndex > strLength) { + throw new StringIndexOutOfBoundsException("startIndex must be valid"); + } + if (length < 0 || startIndex + length > strLength) { + throw new StringIndexOutOfBoundsException("length must be valid"); + } + } + /** * Appends part of a string to this string builder. Appending null will call {@link #appendNull()}. * @@ -666,12 +675,7 @@ public TextStringBuilder append(final String str, final int startIndex, final in if (str == null) { return appendNull(); } - if (startIndex < 0 || startIndex > str.length()) { - throw new StringIndexOutOfBoundsException("startIndex must be valid"); - } - if (length < 0 || startIndex + length > str.length()) { - throw new StringIndexOutOfBoundsException("length must be valid"); - } + validateRange(startIndex, length, str.length()); if (length > 0) { final int len = length(); ensureCapacityInternal(len + length); @@ -715,12 +719,7 @@ public TextStringBuilder append(final StringBuffer str, final int startIndex, fi if (str == null) { return appendNull(); } - if (startIndex < 0 || startIndex > str.length()) { - throw new StringIndexOutOfBoundsException("startIndex must be valid"); - } - if (length < 0 || startIndex + length > str.length()) { - throw new StringIndexOutOfBoundsException("length must be valid"); - } + validateRange(startIndex, length, str.length()); if (length > 0) { final int len = length(); ensureCapacityInternal(len + length); @@ -752,12 +751,7 @@ public TextStringBuilder append(final StringBuilder str, final int startIndex, f if (str == null) { return appendNull(); } - if (startIndex < 0 || startIndex > str.length()) { - throw new StringIndexOutOfBoundsException("startIndex must be valid"); - } - if (length < 0 || startIndex + length > str.length()) { - throw new StringIndexOutOfBoundsException("length must be valid"); - } + validateRange(startIndex, length, str.length()); if (length > 0) { final int len = length(); ensureCapacityInternal(len + length); @@ -789,12 +783,7 @@ public TextStringBuilder append(final TextStringBuilder str, final int startInde if (str == null) { return appendNull(); } - if (startIndex < 0 || startIndex > str.length()) { - throw new StringIndexOutOfBoundsException("startIndex must be valid"); - } - if (length < 0 || startIndex + length > str.length()) { - throw new StringIndexOutOfBoundsException("length must be valid"); - } + validateRange(startIndex, length, str.length()); if (length > 0) { final int len = length(); ensureCapacityInternal(len + length); diff --git a/src/main/java/org/apache/commons/text/similarity/DamerauLevenshteinDistance.java b/src/main/java/org/apache/commons/text/similarity/DamerauLevenshteinDistance.java index 93f4627cc1..ab963579ca 100644 --- a/src/main/java/org/apache/commons/text/similarity/DamerauLevenshteinDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/DamerauLevenshteinDistance.java @@ -41,6 +41,32 @@ private static int clampDistance(final int distance, final int threshold) { return distance > threshold ? -1 : distance; } + private static int calculateCost(final SimilarityInput left, final SimilarityInput right, + final int leftIndex, final int rightIndex, + final int[] curr, final int[] prev, final int[] prevPrev) { + final int cost = left.at(leftIndex - 1) == right.at(rightIndex - 1) ? 0 : 1; + + // Select cheapest operation + int value = Math.min( + Math.min( + prev[rightIndex] + 1, // Delete current character + curr[rightIndex - 1] + 1 // Insert current character + ), + prev[rightIndex - 1] + cost // Replace (or no cost if same character) + ); + + // Check if adjacent characters are the same -> transpose if cheaper + if (leftIndex > 1 + && rightIndex > 1 + && left.at(leftIndex - 1) == right.at(rightIndex - 2) + && left.at(leftIndex - 2) == right.at(rightIndex - 1)) { + // Use cost here, to properly handle two subsequent equal letters + value = Math.min(value, prevPrev[rightIndex - 2] + cost); + } + + return value; + } + /** * Finds the Damerau-Levenshtein distance between two CharSequences if it's less than or equal to a given threshold. * @@ -89,7 +115,7 @@ private static int limitedCompare(SimilarityInput left, SimilarityInput int limitedCompare(SimilarityInput left, SimilarityInput transpose if cheaper - if (leftIndex > 1 - && rightIndex > 1 - && left.at(leftIndex - 1) == right.at(rightIndex - 2) - && left.at(leftIndex - 2) == right.at(rightIndex - 1)) { - // Use cost here, to properly handle two subsequent equal letters - curr[rightIndex] = Math.min(curr[rightIndex], prevPrev[rightIndex - 2] + cost); - } + curr[rightIndex] = calculateCost(left, right, leftIndex, rightIndex, curr, prev, prevPrev); minCost = Math.min(curr[rightIndex], minCost); } @@ -196,7 +204,7 @@ private static int unlimitedCompare(SimilarityInput left, SimilarityInput int[] prevPrev = new int[rightLength + 1]; int[] temp; // Temp variable use to shuffle arrays at the end of each iteration - int rightIndex, leftIndex, cost; + int rightIndex, leftIndex; // Changing empty sequence to [0..i] requires i insertions for (rightIndex = 0; rightIndex <= rightLength; rightIndex++) { @@ -216,25 +224,7 @@ private static int unlimitedCompare(SimilarityInput left, SimilarityInput curr[0] = leftIndex; for (rightIndex = 1; rightIndex <= rightLength; rightIndex++) { - cost = left.at(leftIndex - 1) == right.at(rightIndex - 1) ? 0 : 1; - - // Select cheapest operation - curr[rightIndex] = Math.min( - Math.min( - prev[rightIndex] + 1, // Delete current character - curr[rightIndex - 1] + 1 // Insert current character - ), - prev[rightIndex - 1] + cost // Replace (or no cost if same character) - ); - - // Check if adjacent characters are the same -> transpose if cheaper - if (leftIndex > 1 - && rightIndex > 1 - && left.at(leftIndex - 1) == right.at(rightIndex - 2) - && left.at(leftIndex - 2) == right.at(rightIndex - 1)) { - // Use cost here, to properly handle two subsequent equal letters - curr[rightIndex] = Math.min(curr[rightIndex], prevPrev[rightIndex - 2] + cost); - } + curr[rightIndex] = calculateCost(left, right, leftIndex, rightIndex, curr, prev, prevPrev); } // Rotate arrays for next iteration