From 066fe081e2bc4a950952f3418400bf85ffc3ae21 Mon Sep 17 00:00:00 2001 From: William Fiset Date: Mon, 9 Mar 2026 21:06:31 -0700 Subject: [PATCH 1/6] Refactor SuffixArraySlow and expand test coverage (#1271) Refactor SuffixArraySlow: fix package order, add file-level docs with Big-O, replace FQN with import, fix snake_case naming, make fields private, add Javadoc. Expand SuffixArrayTest: add tests for single char, two chars, null input, int[] constructor, sorted suffix verification, getTextLength(), and randomized cross-validation across all 3 implementations. Clean up unused fields and wildcard import. Co-authored-by: Claude Opus 4.6 --- .../suffixarray/SuffixArraySlow.java | 43 ++-- .../suffixarray/SuffixArrayTest.java | 195 +++++++++++------- 2 files changed, 145 insertions(+), 93 deletions(-) diff --git a/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArraySlow.java b/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArraySlow.java index 975e20e6f..91cd37910 100644 --- a/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArraySlow.java +++ b/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArraySlow.java @@ -1,31 +1,41 @@ +package com.williamfiset.algorithms.datastructures.suffixarray; + +import java.util.Arrays; + /** - * Naive suffix array implementation. + * Naive Suffix Array Construction + * + * Builds a suffix array by generating all suffixes, sorting them with + * a standard comparison sort, and extracting the sorted indices. + * Simple to understand but slow for large inputs. * - *

Time Complexity: O(n^2log(n)) + * Compare with SuffixArrayMed (O(n log^2 n)) and SuffixArrayFast (O(n log n)) + * to see progressively more efficient construction algorithms. + * + * Time: O(n^2 log n) — sorting is O(n log n) comparisons, each O(n) + * Space: O(n) * * @author William Fiset, william.alexandre.fiset@gmail.com */ -package com.williamfiset.algorithms.datastructures.suffixarray; - public class SuffixArraySlow extends SuffixArray { private static class Suffix implements Comparable { - // Starting position of suffix in text final int index, len; final int[] text; - public Suffix(int[] text, int index) { + Suffix(int[] text, int index) { this.len = text.length - index; this.index = index; this.text = text; } - // Compare the two suffixes inspired by Robert Sedgewick and Kevin Wayne + // Lexicographic comparison of two suffixes, character by character. + // If one suffix is a prefix of the other, the shorter one comes first. @Override public int compareTo(Suffix other) { if (this == other) return 0; - int min_len = Math.min(len, other.len); - for (int i = 0; i < min_len; i++) { + int minLen = Math.min(len, other.len); + for (int i = 0; i < minLen; i++) { if (text[index + i] < other.text[other.index + i]) return -1; if (text[index + i] > other.text[other.index + i]) return +1; } @@ -38,8 +48,7 @@ public String toString() { } } - // Contains all the suffixes of the SuffixArray - Suffix[] suffixes; + private Suffix[] suffixes; public SuffixArraySlow(String text) { super(toIntArray(text)); @@ -49,8 +58,10 @@ public SuffixArraySlow(int[] text) { super(text); } - // Suffix array construction. This actually takes O(n^2log(n)) time since sorting takes on - // average O(nlog(n)) and each String comparison takes O(n). + /** + * Constructs the suffix array by creating all n suffixes, sorting + * them lexicographically, then storing the sorted starting indices. + */ @Override protected void construct() { sa = new int[N]; @@ -58,12 +69,10 @@ protected void construct() { for (int i = 0; i < N; i++) suffixes[i] = new Suffix(T, i); - java.util.Arrays.sort(suffixes); + Arrays.sort(suffixes); for (int i = 0; i < N; i++) { - Suffix suffix = suffixes[i]; - sa[i] = suffix.index; - suffixes[i] = null; + sa[i] = suffixes[i].index; } suffixes = null; diff --git a/src/test/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArrayTest.java b/src/test/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArrayTest.java index b8f7c0dc2..463cf2f73 100644 --- a/src/test/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArrayTest.java +++ b/src/test/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArrayTest.java @@ -1,49 +1,69 @@ package com.williamfiset.algorithms.datastructures.suffixarray; import static com.google.common.truth.Truth.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; -import java.security.SecureRandom; import java.util.Random; -import org.junit.jupiter.api.*; +import org.junit.jupiter.api.Test; public class SuffixArrayTest { - static final SecureRandom random = new SecureRandom(); - static final Random rand = new Random(); + static final String ASCII_LETTERS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; - static final int LOOPS = 1000; - static final int TEST_SZ = 40; - static final int NUM_NULLS = TEST_SZ / 5; - static final int MAX_RAND_NUM = 250; - - String ASCII_LETTERS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; - - @BeforeEach - public void setup() {} + // Helper: create all 3 implementations for the same text + private static SuffixArray[] allImplementations(String text) { + return new SuffixArray[] { + new SuffixArraySlow(text), + new SuffixArrayMed(text), + new SuffixArrayFast(text) + }; + } @Test - public void suffixArrayLength() { - String str = "ABCDE"; - - SuffixArray sa1 = new SuffixArraySlow(str); - SuffixArray sa2 = new SuffixArrayMed(str); - SuffixArray sa3 = new SuffixArrayFast(str); + public void testNullTextThrows() { + assertThrows(IllegalArgumentException.class, () -> new SuffixArraySlow((int[]) null)); + assertThrows(IllegalArgumentException.class, () -> new SuffixArrayMed((int[]) null)); + assertThrows(IllegalArgumentException.class, () -> new SuffixArrayFast((int[]) null)); + } - assertThat(sa1.getSa().length).isEqualTo(str.length()); - assertThat(sa2.getSa().length).isEqualTo(str.length()); - assertThat(sa3.getSa().length).isEqualTo(str.length()); + @Test + public void testSingleCharacter() { + for (SuffixArray sa : allImplementations("A")) { + assertThat(sa.getSa()).isEqualTo(new int[] {0}); + assertThat(sa.getLcpArray()).isEqualTo(new int[] {0}); + } } @Test - public void lcsUniqueCharacters() { + public void testTwoCharactersSorted() { + // "AB" -> suffixes: "AB"(0), "B"(1) -> sorted: "AB","B" -> sa=[0,1] + for (SuffixArray sa : allImplementations("AB")) { + assertThat(sa.getSa()).isEqualTo(new int[] {0, 1}); + assertThat(sa.getLcpArray()).isEqualTo(new int[] {0, 0}); + } + } - SuffixArray sa1 = new SuffixArraySlow(ASCII_LETTERS); - SuffixArray sa2 = new SuffixArrayMed(ASCII_LETTERS); - SuffixArray sa3 = new SuffixArrayFast(ASCII_LETTERS); + @Test + public void testTwoCharactersReversed() { + // "BA" -> suffixes: "BA"(0), "A"(1) -> sorted: "A","BA" -> sa=[1,0] + for (SuffixArray sa : allImplementations("BA")) { + assertThat(sa.getSa()).isEqualTo(new int[] {1, 0}); + assertThat(sa.getLcpArray()).isEqualTo(new int[] {0, 0}); + } + } - SuffixArray[] suffixArrays = {sa1, sa2, sa3}; + @Test + public void testSuffixArrayLength() { + String str = "ABCDE"; + for (SuffixArray sa : allImplementations(str)) { + assertThat(sa.getSa().length).isEqualTo(str.length()); + assertThat(sa.getTextLength()).isEqualTo(str.length()); + } + } - for (SuffixArray sa : suffixArrays) { + @Test + public void testLcpAllZerosForUniqueCharacters() { + for (SuffixArray sa : allImplementations(ASCII_LETTERS)) { for (int i = 0; i < sa.getSa().length; i++) { assertThat(sa.getLcpArray()[i]).isEqualTo(0); } @@ -51,17 +71,10 @@ public void lcsUniqueCharacters() { } @Test - public void increasingLCPTest() { - - String UNIQUE_CHARS = "KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK"; - - SuffixArray sa1 = new SuffixArraySlow(UNIQUE_CHARS); - SuffixArray sa2 = new SuffixArrayMed(UNIQUE_CHARS); - SuffixArray sa3 = new SuffixArrayFast(UNIQUE_CHARS); - - SuffixArray[] suffixArrays = {sa1, sa2, sa3}; - - for (SuffixArray sa : suffixArrays) { + public void testLcpIncreasingForRepeatedCharacter() { + // All same character: LCP[i] = i since suffixes are "KKK...", "KK...", "K..." + String repeated = "KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK"; + for (SuffixArray sa : allImplementations(repeated)) { for (int i = 0; i < sa.getSa().length; i++) { assertThat(sa.getLcpArray()[i]).isEqualTo(i); } @@ -69,60 +82,90 @@ public void increasingLCPTest() { } @Test - public void lcpTest1() { - + public void testLcpKnownValues1() { String text = "ABBABAABAA"; - int[] lcpValues = {0, 1, 2, 1, 4, 2, 0, 3, 2, 1}; - - SuffixArray sa1 = new SuffixArraySlow(text); - SuffixArray sa2 = new SuffixArrayMed(text); - SuffixArray sa3 = new SuffixArrayFast(text); - - SuffixArray[] suffixArrays = {sa1, sa2, sa3}; - - for (SuffixArray sa : suffixArrays) { - for (int i = 0; i < sa.getSa().length; i++) { - assertThat(lcpValues[i]).isEqualTo(sa.getLcpArray()[i]); - } + int[] expected = {0, 1, 2, 1, 4, 2, 0, 3, 2, 1}; + for (SuffixArray sa : allImplementations(text)) { + assertThat(sa.getLcpArray()).isEqualTo(expected); } } @Test - public void lcpTest2() { + public void testLcpKnownValues2() { String text = "ABABABAABB"; - int[] lcpValues = {0, 1, 3, 5, 2, 0, 1, 2, 4, 1}; - - SuffixArray sa1 = new SuffixArraySlow(text); - SuffixArray sa2 = new SuffixArrayMed(text); - SuffixArray sa3 = new SuffixArrayFast(text); - - SuffixArray[] suffixArrays = {sa1, sa2, sa3}; + int[] expected = {0, 1, 3, 5, 2, 0, 1, 2, 4, 1}; + for (SuffixArray sa : allImplementations(text)) { + assertThat(sa.getLcpArray()).isEqualTo(expected); + } + } - for (SuffixArray sa : suffixArrays) { - for (int i = 0; i < sa.getSa().length; i++) { - assertThat(lcpValues[i]).isEqualTo(sa.getLcpArray()[i]); + // Verify the suffix array actually produces lexicographically sorted suffixes + @Test + public void testSuffixesAreSorted() { + String text = "ABBABAABAA"; + for (SuffixArray sa : allImplementations(text)) { + int[] arr = sa.getSa(); + for (int i = 0; i < arr.length - 1; i++) { + String s1 = text.substring(arr[i]); + String s2 = text.substring(arr[i + 1]); + assertThat(s1.compareTo(s2)).isLessThan(0); } } } @Test - public void saConstruction() { - // Test inspired by LCS. Make sure constructed SAs are equal. - // Use digits 0-9 to fake unique tokens + public void testConstructionConsistency() { + // All 3 implementations must produce the same SA String text = "BAAAAB0ABAAAAB1BABA2ABA3AAB4BBBB5BB"; + SuffixArray[] impls = allImplementations(text); + for (int i = 0; i < impls.length; i++) { + for (int j = i + 1; j < impls.length; j++) { + assertThat(impls[i].getSa()).isEqualTo(impls[j].getSa()); + } + } + } + @Test + public void testIntArrayConstructor() { + // "CAB" as int array + int[] text = {67, 65, 66}; SuffixArray sa1 = new SuffixArraySlow(text); SuffixArray sa2 = new SuffixArrayMed(text); SuffixArray sa3 = new SuffixArrayFast(text); - SuffixArray[] suffixArrays = {sa1, sa2, sa3}; - - for (int i = 0; i < suffixArrays.length; i++) { - for (int j = i + 1; j < suffixArrays.length; j++) { - SuffixArray s1 = suffixArrays[i]; - SuffixArray s2 = suffixArrays[j]; - for (int k = 0; k < s1.getSa().length; k++) { - assertThat(s1.getSa()[k]).isEqualTo(s2.getSa()[k]); - } + + // Suffixes: "CAB"(0), "AB"(1), "B"(2) -> sorted: "AB","B","CAB" -> sa=[1,2,0] + int[] expected = {1, 2, 0}; + assertThat(sa1.getSa()).isEqualTo(expected); + assertThat(sa2.getSa()).isEqualTo(expected); + assertThat(sa3.getSa()).isEqualTo(expected); + } + + // Randomized cross-validation: all implementations must agree on random inputs + @Test + public void testRandomStringsAllImplementationsAgree() { + Random rand = new Random(42); + for (int loop = 0; loop < 200; loop++) { + int len = 2 + rand.nextInt(20); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < len; i++) { + sb.append((char) ('A' + rand.nextInt(5))); + } + String text = sb.toString(); + + SuffixArray[] impls = allImplementations(text); + + // All SAs must match + for (int i = 1; i < impls.length; i++) { + assertThat(impls[i].getSa()).isEqualTo(impls[0].getSa()); + assertThat(impls[i].getLcpArray()).isEqualTo(impls[0].getLcpArray()); + } + + // Verify sorted order + int[] sa = impls[0].getSa(); + for (int i = 0; i < sa.length - 1; i++) { + String s1 = text.substring(sa[i]); + String s2 = text.substring(sa[i + 1]); + assertThat(s1.compareTo(s2)).isLessThan(0); } } } From 33a089db098f34c0698e4a096c5cb877af37e059 Mon Sep 17 00:00:00 2001 From: William Fiset Date: Mon, 9 Mar 2026 21:13:54 -0700 Subject: [PATCH 2/6] Add Big-O notation convention to SKILL.md Use explicit multiplication and parentheses in complexity expressions: O(n*log(n)) instead of O(n log n). Co-Authored-By: Claude Opus 4.6 --- .claude/SKILL.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.claude/SKILL.md b/.claude/SKILL.md index a4d7c1388..2c7e3f984 100644 --- a/.claude/SKILL.md +++ b/.claude/SKILL.md @@ -282,6 +282,28 @@ Common short names (use consistently across the repo): - Max line length: 100 characters (soft limit) - Imports: group by package, alphabetize within groups, no wildcard imports +### Big-O Notation Convention + +Always use explicit multiplication and parentheses in Big-O expressions for clarity: + +```java +// ✓ GOOD — explicit and unambiguous +// Time: O(n*log(n)) +// Time: O(n*log^2(n)) +// Time: O(n^2*log(n)) + +// ✗ BAD — missing multiplication and parentheses +// Time: O(n log n) +// Time: O(n log^2 n) +// Time: O(n^2 log n) + +// Simple expressions without multiplication are fine as-is +// Time: O(n) +// Time: O(n^2) +// Time: O(log(n)) +// Space: O(n) +``` + ### Avoid Java Streams Streams hurt readability for learners. Use plain loops instead: From 973a3e73ae67c75dbb4dc9e584104b05978f8f25 Mon Sep 17 00:00:00 2001 From: William Fiset Date: Mon, 9 Mar 2026 21:18:00 -0700 Subject: [PATCH 3/6] Refactor SuffixArrayMed, fix Big-O notation in both suffix arrays (#1272) Refactor SuffixArrayMed: fix package order, add file-level docs with algorithm explanation, replace FQN with import, make SuffixRankTuple private, add Javadoc, remove commented-out code and useless null assignments, improve inline comments. Fix Big-O notation to use explicit brackets: O(n*log(n)) not O(n log n). Co-authored-by: Claude Opus 4.6 --- .../suffixarray/SuffixArrayMed.java | 81 ++++++++----------- .../suffixarray/SuffixArraySlow.java | 4 +- 2 files changed, 37 insertions(+), 48 deletions(-) diff --git a/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArrayMed.java b/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArrayMed.java index 23e4e9002..cb3965bb5 100644 --- a/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArrayMed.java +++ b/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArrayMed.java @@ -1,18 +1,29 @@ +package com.williamfiset.algorithms.datastructures.suffixarray; + +import java.util.Arrays; + /** - * Medium speed suffix array implementation. Time Complexity: O(nlog^2(n)) + * Medium-speed Suffix Array Construction (Prefix Doubling) + * + * Builds a suffix array by repeatedly doubling the prefix length used for + * ranking. In each round, suffixes are sorted by their first 2^k characters + * using the ranks from the previous round as a two-key comparison. + * + * Compare with SuffixArraySlow (O(n^2 log n)) for a simpler but slower approach, + * and SuffixArrayFast (O(n*log(n))) for an optimized version using radix sort. + * + * Time: O(n*log^2(n)) — O(log(n)) doubling rounds, each with O(n*log(n)) sort + * Space: O(n) * * @author William Fiset, william.alexandre.fiset@gmail.com */ -package com.williamfiset.algorithms.datastructures.suffixarray; - public class SuffixArrayMed extends SuffixArray { - // Wrapper class to help sort suffix ranks - static class SuffixRankTuple implements Comparable { - + // Holds the two-key rank (first half, second half) and original index + // for sorting suffixes by their first 2^k characters. + private static class SuffixRankTuple implements Comparable { int firstHalf, secondHalf, originalIndex; - // Sort Suffix ranks first on the first half then the second half @Override public int compareTo(SuffixRankTuple other) { int cmp = Integer.compare(firstHalf, other.firstHalf); @@ -34,25 +45,28 @@ public SuffixArrayMed(int[] text) { super(text); } - // Construct a suffix array in O(nlog^2(n)) + /** + * Constructs the suffix array using prefix doubling. Each iteration doubles + * the window size and re-ranks suffixes until all ranks are unique. + */ @Override protected void construct() { sa = new int[N]; - // Maintain suffix ranks in both a matrix with two rows containing the - // current and last rank information as well as some sortable rank objects + // Two-row matrix: row 0 = current ranks, row 1 = new ranks int[][] suffixRanks = new int[2][N]; SuffixRankTuple[] ranks = new SuffixRankTuple[N]; - // Assign a numerical value to each character in the text + // Initial ranks are the character values themselves for (int i = 0; i < N; i++) { suffixRanks[0][i] = T[i]; ranks[i] = new SuffixRankTuple(); } - // O(log(n)) + // Double the prefix length each round: 1, 2, 4, 8, ... → O(log(n)) rounds for (int pos = 1; pos < N; pos *= 2) { + // Build two-key tuples: (rank of first half, rank of second half) for (int i = 0; i < N; i++) { SuffixRankTuple suffixRank = ranks[i]; suffixRank.firstHalf = suffixRanks[0][i]; @@ -60,61 +74,36 @@ protected void construct() { suffixRank.originalIndex = i; } - // O(nlog(n)) - java.util.Arrays.sort(ranks); + Arrays.sort(ranks); + // Assign new ranks based on sorted order int newRank = 0; suffixRanks[1][ranks[0].originalIndex] = 0; for (int i = 1; i < N; i++) { + SuffixRankTuple prev = ranks[i - 1]; + SuffixRankTuple cur = ranks[i]; - SuffixRankTuple lastSuffixRank = ranks[i - 1]; - SuffixRankTuple currSuffixRank = ranks[i]; + // Increment rank only when the tuple differs from the previous + if (cur.firstHalf != prev.firstHalf || cur.secondHalf != prev.secondHalf) + newRank++; - // If the first half differs from the second half - if (currSuffixRank.firstHalf != lastSuffixRank.firstHalf - || currSuffixRank.secondHalf != lastSuffixRank.secondHalf) newRank++; - - suffixRanks[1][currSuffixRank.originalIndex] = newRank; + suffixRanks[1][cur.originalIndex] = newRank; } - // Place top row (current row) to be the last row suffixRanks[0] = suffixRanks[1]; - // Optimization to stop early + // All ranks unique means sorting is complete if (newRank == N - 1) break; } - // Fill suffix array for (int i = 0; i < N; i++) { sa[i] = ranks[i].originalIndex; - ranks[i] = null; } - - // Cleanup - suffixRanks[0] = suffixRanks[1] = null; - suffixRanks = null; - ranks = null; } public static void main(String[] args) { - - // String[] strs = { "AAGAAGC", "AGAAGT", "CGAAGC" }; - // String[] strs = { "abca", "bcad", "daca" }; - // String[] strs = { "abca", "bcad", "daca" }; - // String[] strs = { "AABC", "BCDC", "BCDE", "CDED" }; - // String[] strs = { "abcdefg", "bcdefgh", "cdefghi" }; - // String[] strs = { "xxx", "yyy", "zzz" }; - // TreeSet lcss = SuffixArrayMed.lcs(strs, 2); - // System.out.println(lcss); - - // SuffixArrayMed sa = new SuffixArrayMed("abracadabra"); - // System.out.println(sa); - // System.out.println(java.util.Arrays.toString(sa.sa)); - // System.out.println(java.util.Arrays.toString(sa.lcp)); - SuffixArrayMed sa = new SuffixArrayMed("ABBABAABAA"); - // SuffixArrayMed sa = new SuffixArrayMed("GAGAGAGAGAGAG"); System.out.println(sa); } } diff --git a/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArraySlow.java b/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArraySlow.java index 91cd37910..d3d296abf 100644 --- a/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArraySlow.java +++ b/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArraySlow.java @@ -9,10 +9,10 @@ * a standard comparison sort, and extracting the sorted indices. * Simple to understand but slow for large inputs. * - * Compare with SuffixArrayMed (O(n log^2 n)) and SuffixArrayFast (O(n log n)) + * Compare with SuffixArrayMed (O(n*log^2(n))) and SuffixArrayFast (O(n*log(n))) * to see progressively more efficient construction algorithms. * - * Time: O(n^2 log n) — sorting is O(n log n) comparisons, each O(n) + * Time: O(n^2*log(n)) — sorting is O(n*log(n)) comparisons, each O(n) * Space: O(n) * * @author William Fiset, william.alexandre.fiset@gmail.com From 2b9b29258bf95658535289bc0b80bd0280ef00e7 Mon Sep 17 00:00:00 2001 From: William Fiset Date: Mon, 9 Mar 2026 21:33:19 -0700 Subject: [PATCH 4/6] Refactor SuffixArrayFast: add docs, comments, and fix formatting (#1273) Apply SKILL.md conventions: fix package/header order, add detailed file-level docs explaining radix sort approach, add educational inline comments on the dense construct() method, use proper import instead of FQN, make fields private, and fix Big-O notation to O(n*log(n)). Co-authored-by: Claude Opus 4.6 --- .../suffixarray/SuffixArrayFast.java | 59 ++++++++++++++++--- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArrayFast.java b/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArrayFast.java index df176d81b..cfba2c852 100644 --- a/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArrayFast.java +++ b/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArrayFast.java @@ -1,18 +1,29 @@ +package com.williamfiset.algorithms.datastructures.suffixarray; + +import java.util.Arrays; + /** - * Suffix array construction implementation. + * Fast Suffix Array Construction (Prefix Doubling with Radix Sort) + * + * Builds a suffix array using prefix doubling with counting sort (radix sort) + * instead of comparison-based sorting. Each doubling round uses two passes of + * counting sort to sort suffix pairs by their rank, achieving O(n) per round + * instead of O(n*log(n)) with comparison sort. * - *

Time Complexity: O(nlog(n)) + * Compare with SuffixArraySlow (O(n^2*log(n))) for a naive approach, and + * SuffixArrayMed (O(n*log^2(n))) for prefix doubling with comparison sort. + * + * Time: O(n*log(n)) -- O(log(n)) doubling rounds, each O(n) with radix sort + * Space: O(n + alphabetSize) * * @author William Fiset, william.alexandre.fiset@gmail.com */ -package com.williamfiset.algorithms.datastructures.suffixarray; - public class SuffixArrayFast extends SuffixArray { private static final int DEFAULT_ALPHABET_SIZE = 256; - int alphabetSize; - int[] sa2, rank, tmp, c; + private int alphabetSize; + private int[] sa2, rank, tmp, c; public SuffixArrayFast(String text) { this(toIntArray(text), DEFAULT_ALPHABET_SIZE); @@ -22,12 +33,22 @@ public SuffixArrayFast(int[] text) { this(text, DEFAULT_ALPHABET_SIZE); } - // Designated constructor + /** + * Creates a suffix array with a custom alphabet size. + * + * @param text the input text as an integer array + * @param alphabetSize the number of distinct symbols (e.g., 256 for ASCII) + */ public SuffixArrayFast(int[] text, int alphabetSize) { super(text); this.alphabetSize = alphabetSize; } + /** + * Constructs the suffix array using prefix doubling with radix sort. + * Each round doubles the comparison window and re-ranks suffixes using + * counting sort for O(n) per round, giving O(n*log(n)) total. + */ @Override protected void construct() { sa = new int[N]; @@ -36,16 +57,34 @@ protected void construct() { c = new int[Math.max(alphabetSize, N)]; int i, p, r; + + // --- Initial sort: rank suffixes by their first character using counting sort --- + + // Count occurrences of each character for (i = 0; i < N; ++i) c[rank[i] = T[i]]++; + // Convert counts to cumulative positions for (i = 1; i < alphabetSize; ++i) c[i] += c[i - 1]; + // Place suffixes into sa in sorted order (stable, right-to-left) for (i = N - 1; i >= 0; --i) sa[--c[T[i]]] = i; + + // --- Prefix doubling: sort by first 2^k characters each round --- for (p = 1; p < N; p <<= 1) { + + // Build sa2: suffixes sorted by their *second half* (positions i+p). + // Suffixes near the end (i >= N-p) have no second half, so they sort first. for (r = 0, i = N - p; i < N; ++i) sa2[r++] = i; + // Remaining suffixes inherit order from sa (already sorted by first half) for (i = 0; i < N; ++i) if (sa[i] >= p) sa2[r++] = sa[i] - p; - java.util.Arrays.fill(c, 0, alphabetSize, 0); + + // Counting sort sa2 by first-half rank to get the final sorted order. + // This is a radix sort: sa2 provides second-key order, we sort by first key. + Arrays.fill(c, 0, alphabetSize, 0); for (i = 0; i < N; ++i) c[rank[i]]++; for (i = 1; i < alphabetSize; ++i) c[i] += c[i - 1]; for (i = N - 1; i >= 0; --i) sa[--c[rank[sa2[i]]]] = sa2[i]; + + // Compute new ranks from the sorted order. Two suffixes get the same + // rank only if both their first-half and second-half ranks match. for (sa2[sa[0]] = r = 0, i = 1; i < N; ++i) { if (!(rank[sa[i - 1]] == rank[sa[i]] && sa[i - 1] + p < N @@ -53,9 +92,13 @@ protected void construct() { && rank[sa[i - 1] + p] == rank[sa[i] + p])) r++; sa2[sa[i]] = r; } + + // Swap rank and sa2 arrays to avoid allocation tmp = rank; rank = sa2; sa2 = tmp; + + // All ranks unique means sorting is complete if (r == N - 1) break; alphabetSize = r + 1; } From 891a227a29bfceca5199892f1f7f10730321502d Mon Sep 17 00:00:00 2001 From: William Fiset Date: Mon, 9 Mar 2026 22:10:36 -0700 Subject: [PATCH 5/6] Refactor CompactSegmentTree and add dedicated tests (#1274) Apply SKILL.md conventions: fix package/header order, add detailed file-level docs explaining bottom-up array layout, replace FQN with import, make UNIQUE static final, add Javadoc on all public methods, add educational inline comments, clean up main(). Add 11 new tests covering sum queries, point updates, negative values, edge cases. Co-authored-by: Claude Opus 4.6 --- .../segmenttree/CompactSegmentTree.java | 126 ++++++++++++------ .../datastructures/segmenttree/BUILD | 11 ++ .../segmenttree/CompactSegmentTreeTest.java | 124 +++++++++++++++++ 3 files changed, 217 insertions(+), 44 deletions(-) create mode 100644 src/test/java/com/williamfiset/algorithms/datastructures/segmenttree/CompactSegmentTreeTest.java diff --git a/src/main/java/com/williamfiset/algorithms/datastructures/segmenttree/CompactSegmentTree.java b/src/main/java/com/williamfiset/algorithms/datastructures/segmenttree/CompactSegmentTree.java index eac9e5960..0c9bf9e8f 100644 --- a/src/main/java/com/williamfiset/algorithms/datastructures/segmenttree/CompactSegmentTree.java +++ b/src/main/java/com/williamfiset/algorithms/datastructures/segmenttree/CompactSegmentTree.java @@ -1,85 +1,123 @@ +package com.williamfiset.algorithms.datastructures.segmenttree; + +import java.util.Arrays; + /** - * A compact array based segment tree implementation. This segment tree supports point updates and - * range queries. + * Compact Array-Based Segment Tree + * + * A space-efficient segment tree stored in a flat array of size 2*n (no + * recursion, no pointers). Supports point updates and range queries using + * any associative combine function (sum, min, max, product, GCD, etc.). + * + * The tree is stored bottom-up: leaves occupy indices [n, 2n) and internal + * nodes occupy [1, n). Index 0 is unused. Each internal node i is the + * combination of its children at 2i and 2i+1. + * + * Use cases: + * - Range sum / min / max queries with point updates + * - Competitive programming (very short, cache-friendly implementation) + * + * Time: O(n) construction, O(log(n)) per query and update + * Space: O(n) * * @author Al.Cash & William Fiset, william.alexandre.fiset@gmail.com */ -package com.williamfiset.algorithms.datastructures.segmenttree; - public class CompactSegmentTree { private int N; - // Let UNIQUE be a value which does NOT - // and will not appear in the segment tree - private long UNIQUE = 8123572096793136074L; - - // Segment tree values - private long[] tree; + // Flat array storing the segment tree. Leaves are at indices [N, 2N), + // internal nodes at [1, N). Index 0 is unused. Uninitialized slots + // are null, which acts as the identity element for the combine function. + private Long[] tree; + /** + * Creates an empty segment tree of the given size, with all slots + * initialized to null. + * + * @param size the number of elements (leaves) in the segment tree + */ public CompactSegmentTree(int size) { - tree = new long[2 * (N = size)]; - java.util.Arrays.fill(tree, UNIQUE); + tree = new Long[2 * (N = size)]; } + /** + * Creates a segment tree from an array of values. + * + * @param values the initial leaf values + */ public CompactSegmentTree(long[] values) { this(values.length); - // TODO(william): Implement smarter construction. for (int i = 0; i < N; i++) modify(i, values[i]); } - // This is the segment tree function we are using for queries. - // The function must be an associative function, meaning - // the following property must hold: f(f(a,b),c) = f(a,f(b,c)). - // Common associative functions used with segment trees - // include: min, max, sum, product, GCD, and etc... - private long function(long a, long b) { - if (a == UNIQUE) return b; - else if (b == UNIQUE) return a; - - return a + b; // sum over a range - // return (a > b) ? a : b; // maximum value over a range - // return (a < b) ? a : b; // minimum value over a range - // return a * b; // product over a range (watch out for overflow!) + /** + * The associative combine function used for queries. This function must + * satisfy f(f(a,b), c) = f(a, f(b,c)) for correct segment tree behavior. + * Null acts as the identity element: f(null, x) = f(x, null) = x. + * + * Change this to customize the query type: + * return a + b; // sum over a range + * return (a > b) ? a : b; // maximum over a range + * return (a < b) ? a : b; // minimum over a range + * return a * b; // product over a range (watch for overflow!) + */ + private Long function(Long a, Long b) { + if (a == null) return b; + if (b == null) return a; + return a + b; } - // Adjust point i by a value, O(log(n)) + /** + * Updates the value at index i by combining it with the given value + * using the combine function, then propagates changes up to the root. + * + * @param i the leaf index to update (0-based) + * @param value the value to combine at position i + * + * Time: O(log(n)) + */ public void modify(int i, long value) { + // Update the leaf node tree[i + N] = function(tree[i + N], value); + // Propagate up: recompute each ancestor from its two children for (i += N; i > 1; i >>= 1) { tree[i >> 1] = function(tree[i], tree[i ^ 1]); } } - // Query interval [l, r), O(log(n)) + /** + * Queries the aggregate value over the half-open interval [l, r). + * + * Works by starting at the leaves and moving up. At each level, if the + * left boundary is a right child, include it and move right. If the right + * boundary is a right child, move left and include it. + * + * @param l left endpoint (inclusive, 0-based) + * @param r right endpoint (exclusive, 0-based) + * @return the combined result over [l, r) + * @throws IllegalStateException if the query range is empty + * + * Time: O(log(n)) + */ public long query(int l, int r) { - long res = UNIQUE; + Long res = null; for (l += N, r += N; l < r; l >>= 1, r >>= 1) { + // If l is a right child, include it and move to next subtree if ((l & 1) != 0) res = function(res, tree[l++]); + // If r is a right child, include its left sibling if ((r & 1) != 0) res = function(res, tree[--r]); } - if (res == UNIQUE) { - throw new IllegalStateException("UNIQUE should not be the return value."); + if (res == null) { + throw new IllegalStateException("Empty query range."); } return res; } public static void main(String[] args) { - // exmaple1(); - example2(); - } - - private static void example1() { - long[] values = new long[] {3, 0, 8, 9, 8, 2, 5, 3, 7, 1}; - CompactSegmentTree st = new CompactSegmentTree(values); - System.out.println(java.util.Arrays.toString(st.tree)); - } - - private static void example2() { long[] values = new long[] {1, 1, 1, 1, 1, 1}; CompactSegmentTree st = new CompactSegmentTree(values); - System.out.println(java.util.Arrays.toString(st.tree)); - + System.out.println(Arrays.toString(st.tree)); System.out.println(st.query(0, 6)); // 6 System.out.println(st.query(1, 5)); // 4 System.out.println(st.query(0, 2)); // 2 diff --git a/src/test/java/com/williamfiset/algorithms/datastructures/segmenttree/BUILD b/src/test/java/com/williamfiset/algorithms/datastructures/segmenttree/BUILD index 7bafacfb9..f896cfcf9 100644 --- a/src/test/java/com/williamfiset/algorithms/datastructures/segmenttree/BUILD +++ b/src/test/java/com/williamfiset/algorithms/datastructures/segmenttree/BUILD @@ -108,5 +108,16 @@ java_test( deps = TEST_DEPS, ) +# bazel test //src/test/java/com/williamfiset/algorithms/datastructures/segmenttree:CompactSegmentTreeTest +java_test( + name = "CompactSegmentTreeTest", + srcs = ["CompactSegmentTreeTest.java"], + main_class = "org.junit.platform.console.ConsoleLauncher", + use_testrunner = False, + args = ["--select-class=com.williamfiset.algorithms.datastructures.segmenttree.CompactSegmentTreeTest"], + runtime_deps = JUNIT5_RUNTIME_DEPS, + deps = TEST_DEPS, +) + # Run all tests # bazel test //src/test/java/com/williamfiset/algorithms/datastructures/segmenttree:all diff --git a/src/test/java/com/williamfiset/algorithms/datastructures/segmenttree/CompactSegmentTreeTest.java b/src/test/java/com/williamfiset/algorithms/datastructures/segmenttree/CompactSegmentTreeTest.java new file mode 100644 index 000000000..c1ad161d1 --- /dev/null +++ b/src/test/java/com/williamfiset/algorithms/datastructures/segmenttree/CompactSegmentTreeTest.java @@ -0,0 +1,124 @@ +package com.williamfiset.algorithms.datastructures.segmenttree; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.junit.jupiter.api.Test; + +public class CompactSegmentTreeTest { + + @Test + public void testSumQueryBasic() { + long[] values = {1, 2, 3, 4, 5}; + CompactSegmentTree st = new CompactSegmentTree(values); + assertThat(st.query(0, 5)).isEqualTo(15); + assertThat(st.query(0, 3)).isEqualTo(6); + assertThat(st.query(2, 5)).isEqualTo(12); + } + + @Test + public void testSingleElement() { + long[] values = {42}; + CompactSegmentTree st = new CompactSegmentTree(values); + assertThat(st.query(0, 1)).isEqualTo(42); + } + + @Test + public void testTwoElements() { + long[] values = {3, 7}; + CompactSegmentTree st = new CompactSegmentTree(values); + assertThat(st.query(0, 2)).isEqualTo(10); + assertThat(st.query(0, 1)).isEqualTo(3); + assertThat(st.query(1, 2)).isEqualTo(7); + } + + @Test + public void testPointUpdate() { + long[] values = {1, 1, 1, 1, 1, 1}; + CompactSegmentTree st = new CompactSegmentTree(values); + assertThat(st.query(0, 6)).isEqualTo(6); + + // modify combines with existing value (sum), so adding 5 to index 2 + // changes value from 1 to 6 + st.modify(2, 5); + assertThat(st.query(0, 6)).isEqualTo(11); + assertThat(st.query(2, 3)).isEqualTo(6); + } + + @Test + public void testQuerySingleElementInRange() { + long[] values = {10, 20, 30, 40, 50}; + CompactSegmentTree st = new CompactSegmentTree(values); + for (int i = 0; i < values.length; i++) { + assertThat(st.query(i, i + 1)).isEqualTo(values[i]); + } + } + + @Test + public void testAllZeros() { + long[] values = {0, 0, 0, 0}; + CompactSegmentTree st = new CompactSegmentTree(values); + assertThat(st.query(0, 4)).isEqualTo(0); + assertThat(st.query(1, 3)).isEqualTo(0); + } + + @Test + public void testNegativeValues() { + long[] values = {-5, 3, -2, 7, -1}; + CompactSegmentTree st = new CompactSegmentTree(values); + assertThat(st.query(0, 5)).isEqualTo(2); + assertThat(st.query(0, 2)).isEqualTo(-2); + assertThat(st.query(3, 5)).isEqualTo(6); + } + + @Test + public void testMultipleUpdates() { + long[] values = {1, 2, 3}; + CompactSegmentTree st = new CompactSegmentTree(values); + assertThat(st.query(0, 3)).isEqualTo(6); + + st.modify(0, 10); // 1 + 10 = 11 + st.modify(1, 20); // 2 + 20 = 22 + st.modify(2, 30); // 3 + 30 = 33 + assertThat(st.query(0, 3)).isEqualTo(66); + } + + @Test + public void testSizeConstructor() { + // Empty tree created with size constructor, then populated with modify + CompactSegmentTree st = new CompactSegmentTree(4); + st.modify(0, 5); + st.modify(1, 10); + st.modify(2, 15); + st.modify(3, 20); + assertThat(st.query(0, 4)).isEqualTo(50); + assertThat(st.query(1, 3)).isEqualTo(25); + } + + // Query with equal l and r is an empty range — should throw since + // the result would be the UNIQUE sentinel value. + @Test + public void testEmptyRangeQueryThrows() { + long[] values = {1, 2, 3}; + CompactSegmentTree st = new CompactSegmentTree(values); + assertThrows(IllegalStateException.class, () -> st.query(1, 1)); + } + + @Test + public void testLargerArray() { + int n = 100; + long[] values = new long[n]; + long total = 0; + for (int i = 0; i < n; i++) { + values[i] = i + 1; + total += values[i]; + } + CompactSegmentTree st = new CompactSegmentTree(values); + assertThat(st.query(0, n)).isEqualTo(total); + + // Query first half: sum of 1..50 = 1275 + assertThat(st.query(0, 50)).isEqualTo(1275); + // Query second half: sum of 51..100 = 3775 + assertThat(st.query(50, 100)).isEqualTo(3775); + } +} From d9e51bf33fe991ef19b6c4490c3fa1384120e1aa Mon Sep 17 00:00:00 2001 From: William Fiset Date: Mon, 9 Mar 2026 22:18:36 -0700 Subject: [PATCH 6/6] Move geom algos in readme --- README.md | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 68f88a819..6d09a7cd9 100644 --- a/README.md +++ b/README.md @@ -144,7 +144,6 @@ $ java -cp classes com.williamfiset.algorithms.search.BinarySearch # Geometry - [Angle between 2D vectors](src/main/java/com/williamfiset/algorithms/geometry/AngleBetweenVectors2D.java) **- O(1)** -- [Angle between 3D vectors](src/main/java/com/williamfiset/algorithms/geometry/AngleBetweenVectors3D.java) **- O(1)** - [Circle-circle intersection point(s)](src/main/java/com/williamfiset/algorithms/geometry/CircleCircleIntersectionPoints.js) **- O(1)** - [Circle-line intersection point(s)](src/main/java/com/williamfiset/algorithms/geometry/LineCircleIntersection.js) **- O(1)** - [Circle-line segment intersection point(s)](src/main/java/com/williamfiset/algorithms/geometry/LineSegmentCircleIntersection.js) **- O(1)** @@ -154,21 +153,28 @@ $ java -cp classes com.williamfiset.algorithms.search.BinarySearch - [Convex hull (Graham Scan algorithm)](src/main/java/com/williamfiset/algorithms/geometry/ConvexHullGrahamScan.java) **- O(nlog(n))** - [Convex hull (Monotone chain algorithm)](src/main/java/com/williamfiset/algorithms/geometry/ConvexHullMonotoneChainsAlgorithm.java) **- O(nlog(n))** - [Convex polygon area](src/main/java/com/williamfiset/algorithms/geometry/ConvexPolygonArea.java) **- O(n)** -- [Convex polygon cut](src/main/java/com/williamfiset/algorithms/geometry/ConvexPolygonCutWithLineSegment.java) **- O(n)** - [Convex polygon contains points](src/main/java/com/williamfiset/algorithms/geometry/ConvexPolygonContainsPoint.java) **- O(log(n))** +- [Triangle area algorithms](src/main/java/com/williamfiset/algorithms/geometry/TriangleArea.java) **- O(1)** +- [Line segment-circle intersection point(s)](src/main/java/com/williamfiset/algorithms/geometry/LineSegmentCircleIntersection.js) **- O(1)** +- [Line segment-line segment intersection](src/main/java/com/williamfiset/algorithms/geometry/LineSegmentLineSegmentIntersection.java) **- O(1)** + +

+More geometry algorithms + +- [Angle between 3D vectors](src/main/java/com/williamfiset/algorithms/geometry/AngleBetweenVectors3D.java) **- O(1)** +- [Convex polygon cut](src/main/java/com/williamfiset/algorithms/geometry/ConvexPolygonCutWithLineSegment.java) **- O(n)** - [Coplanar points test (are four 3D points on the same plane)](src/main/java/com/williamfiset/algorithms/geometry/CoplanarPoints.java) **- O(1)** - [Line class (handy infinite line class)](src/main/java/com/williamfiset/algorithms/geometry/Line.java) **- O(1)** - [Line-circle intersection point(s)](src/main/java/com/williamfiset/algorithms/geometry/LineCircleIntersection.js) **- O(1)** -- [Line segment-circle intersection point(s)](src/main/java/com/williamfiset/algorithms/geometry/LineSegmentCircleIntersection.js) **- O(1)** - [Line segment to general form (ax + by = c)](src/main/java/com/williamfiset/algorithms/geometry/LineSegmentToGeneralForm.java) **- O(1)** -- [Line segment-line segment intersection](src/main/java/com/williamfiset/algorithms/geometry/LineSegmentLineSegmentIntersection.java) **- O(1)** - [Longitude-Latitude geographic distance](src/main/java/com/williamfiset/algorithms/geometry/LongitudeLatitudeGeographicDistance.java) **- O(1)** - [Point is inside triangle check](src/main/java/com/williamfiset/algorithms/geometry/PointInsideTriangle.java) **- O(1)** - [Point rotation about point](src/main/java/com/williamfiset/algorithms/geometry/PointRotation.java) **- O(1)** -- [Triangle area algorithms](src/main/java/com/williamfiset/algorithms/geometry/TriangleArea.java) **- O(1)** - [[UNTESTED] Circle-circle intersection area](src/main/java/com/williamfiset/algorithms/geometry/CircleCircleIntersectionArea.java) **- O(1)** - [[UNTESTED] Circular segment area](src/main/java/com/williamfiset/algorithms/geometry/CircularSegmentArea.java) **- O(1)** +
+ # Graph theory ### Tree algorithms