Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 13 additions & 24 deletions src/main/java/org/apache/commons/text/TextStringBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,15 @@ public TextStringBuilder append(final String str) {
return append(str, 0, StringUtils.length(str));
}

private void validateRange(final int startIndex, final int length, final int strLength) {
if (startIndex < 0 || startIndex > strLength) {
throw new StringIndexOutOfBoundsException("startIndex must be valid");
}
if (length < 0 || startIndex + length > strLength) {
throw new StringIndexOutOfBoundsException("length must be valid");
}
}

/**
* Appends part of a string to this string builder. Appending null will call {@link #appendNull()}.
*
Expand All @@ -666,12 +675,7 @@ public TextStringBuilder append(final String str, final int startIndex, final in
if (str == null) {
return appendNull();
}
if (startIndex < 0 || startIndex > str.length()) {
throw new StringIndexOutOfBoundsException("startIndex must be valid");
}
if (length < 0 || startIndex + length > str.length()) {
throw new StringIndexOutOfBoundsException("length must be valid");
}
validateRange(startIndex, length, str.length());
if (length > 0) {
final int len = length();
ensureCapacityInternal(len + length);
Expand Down Expand Up @@ -715,12 +719,7 @@ public TextStringBuilder append(final StringBuffer str, final int startIndex, fi
if (str == null) {
return appendNull();
}
if (startIndex < 0 || startIndex > str.length()) {
throw new StringIndexOutOfBoundsException("startIndex must be valid");
}
if (length < 0 || startIndex + length > str.length()) {
throw new StringIndexOutOfBoundsException("length must be valid");
}
validateRange(startIndex, length, str.length());
if (length > 0) {
final int len = length();
ensureCapacityInternal(len + length);
Expand Down Expand Up @@ -752,12 +751,7 @@ public TextStringBuilder append(final StringBuilder str, final int startIndex, f
if (str == null) {
return appendNull();
}
if (startIndex < 0 || startIndex > str.length()) {
throw new StringIndexOutOfBoundsException("startIndex must be valid");
}
if (length < 0 || startIndex + length > str.length()) {
throw new StringIndexOutOfBoundsException("length must be valid");
}
validateRange(startIndex, length, str.length());
if (length > 0) {
final int len = length();
ensureCapacityInternal(len + length);
Expand Down Expand Up @@ -789,12 +783,7 @@ public TextStringBuilder append(final TextStringBuilder str, final int startInde
if (str == null) {
return appendNull();
}
if (startIndex < 0 || startIndex > str.length()) {
throw new StringIndexOutOfBoundsException("startIndex must be valid");
}
if (length < 0 || startIndex + length > str.length()) {
throw new StringIndexOutOfBoundsException("length must be valid");
}
validateRange(startIndex, length, str.length());
if (length > 0) {
final int len = length();
ensureCapacityInternal(len + length);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,32 @@ private static int clampDistance(final int distance, final int threshold) {
return distance > threshold ? -1 : distance;
}

private static <E> int calculateCost(final SimilarityInput<E> left, final SimilarityInput<E> right,
final int leftIndex, final int rightIndex,
final int[] curr, final int[] prev, final int[] prevPrev) {
final int cost = left.at(leftIndex - 1) == right.at(rightIndex - 1) ? 0 : 1;

// Select cheapest operation
int value = Math.min(
Math.min(
prev[rightIndex] + 1, // Delete current character
curr[rightIndex - 1] + 1 // Insert current character
),
prev[rightIndex - 1] + cost // Replace (or no cost if same character)
);

// Check if adjacent characters are the same -> transpose if cheaper
if (leftIndex > 1
&& rightIndex > 1
&& left.at(leftIndex - 1) == right.at(rightIndex - 2)
&& left.at(leftIndex - 2) == right.at(rightIndex - 1)) {
// Use cost here, to properly handle two subsequent equal letters
value = Math.min(value, prevPrev[rightIndex - 2] + cost);
}

return value;
}

/**
* Finds the Damerau-Levenshtein distance between two CharSequences if it's less than or equal to a given threshold.
*
Expand Down Expand Up @@ -89,7 +115,7 @@ private static <E> int limitedCompare(SimilarityInput<E> left, SimilarityInput<E
int[] prevPrev = new int[rightLength + 1];
int[] temp; // Temp variable use to shuffle arrays at the end of each iteration

int rightIndex, leftIndex, cost, minCost;
int rightIndex, leftIndex, minCost;

// Changing empty sequence to [0..i] requires i insertions
for (rightIndex = 0; rightIndex <= rightLength; rightIndex++) {
Expand All @@ -111,25 +137,7 @@ private static <E> int limitedCompare(SimilarityInput<E> left, SimilarityInput<E
minCost = Integer.MAX_VALUE;

for (rightIndex = 1; rightIndex <= rightLength; rightIndex++) {
cost = left.at(leftIndex - 1) == right.at(rightIndex - 1) ? 0 : 1;

// Select cheapest operation
curr[rightIndex] = Math.min(
Math.min(
prev[rightIndex] + 1, // Delete current character
curr[rightIndex - 1] + 1 // Insert current character
),
prev[rightIndex - 1] + cost // Replace (or no cost if same character)
);

// Check if adjacent characters are the same -> transpose if cheaper
if (leftIndex > 1
&& rightIndex > 1
&& left.at(leftIndex - 1) == right.at(rightIndex - 2)
&& left.at(leftIndex - 2) == right.at(rightIndex - 1)) {
// Use cost here, to properly handle two subsequent equal letters
curr[rightIndex] = Math.min(curr[rightIndex], prevPrev[rightIndex - 2] + cost);
}
curr[rightIndex] = calculateCost(left, right, leftIndex, rightIndex, curr, prev, prevPrev);

minCost = Math.min(curr[rightIndex], minCost);
}
Expand Down Expand Up @@ -196,7 +204,7 @@ private static <E> int unlimitedCompare(SimilarityInput<E> left, SimilarityInput
int[] prevPrev = new int[rightLength + 1];
int[] temp; // Temp variable use to shuffle arrays at the end of each iteration

int rightIndex, leftIndex, cost;
int rightIndex, leftIndex;

// Changing empty sequence to [0..i] requires i insertions
for (rightIndex = 0; rightIndex <= rightLength; rightIndex++) {
Expand All @@ -216,25 +224,7 @@ private static <E> int unlimitedCompare(SimilarityInput<E> left, SimilarityInput
curr[0] = leftIndex;

for (rightIndex = 1; rightIndex <= rightLength; rightIndex++) {
cost = left.at(leftIndex - 1) == right.at(rightIndex - 1) ? 0 : 1;

// Select cheapest operation
curr[rightIndex] = Math.min(
Math.min(
prev[rightIndex] + 1, // Delete current character
curr[rightIndex - 1] + 1 // Insert current character
),
prev[rightIndex - 1] + cost // Replace (or no cost if same character)
);

// Check if adjacent characters are the same -> transpose if cheaper
if (leftIndex > 1
&& rightIndex > 1
&& left.at(leftIndex - 1) == right.at(rightIndex - 2)
&& left.at(leftIndex - 2) == right.at(rightIndex - 1)) {
// Use cost here, to properly handle two subsequent equal letters
curr[rightIndex] = Math.min(curr[rightIndex], prevPrev[rightIndex - 2] + cost);
}
curr[rightIndex] = calculateCost(left, right, leftIndex, rightIndex, curr, prev, prevPrev);
}

// Rotate arrays for next iteration
Expand Down
Loading