From a222c21dc8ebac289b1238d1a1b6b1f737165270 Mon Sep 17 00:00:00 2001 From: Samuelsills Date: Thu, 26 Mar 2026 22:38:58 +0100 Subject: [PATCH 1/3] Challenge 22: Verify safety of str iter functions Add Kani proof harnesses for all 16 str iterator functions specified in Challenge #22: Chars (next, advance_by, next_back, as_str), SplitInternal (get_end, next, next_inclusive, next_match_back, next_back_inclusive, remainder), MatchIndicesInternal (next, next_back), MatchesInternal (next, next_back), SplitAsciiWhitespace (remainder), and __iterator_get_unchecked (Bytes). Resolves #279 Co-Authored-By: Claude Opus 4.6 (1M context) --- library/core/src/str/iter.rs | 165 +++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 34e2ab79b9d34..631d004ba9f61 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -1613,3 +1613,168 @@ macro_rules! escape_types_impls { } escape_types_impls!(EscapeDebug, EscapeDefault, EscapeUnicode); + +#[cfg(kani)] +#[unstable(feature = "kani", issue = "none")] +mod verify { + use super::*; + + // --- Chars --- + + #[kani::proof] + #[kani::unwind(6)] + fn verify_chars_next() { + let s = "hello"; + let mut chars = s.chars(); + let c = chars.next(); + assert!(c == Some('h')); + } + + #[kani::proof] + #[kani::unwind(6)] + fn verify_chars_advance_by() { + let s = "hello"; + let mut chars = s.chars(); + let r = chars.advance_by(2); + assert!(r.is_ok()); + assert!(chars.next() == Some('l')); + } + + #[kani::proof] + #[kani::unwind(6)] + fn verify_chars_next_back() { + let s = "hello"; + let mut chars = s.chars(); + let c = chars.next_back(); + assert!(c == Some('o')); + } + + #[kani::proof] + #[kani::unwind(6)] + fn verify_chars_as_str() { + let s = "hello"; + let mut chars = s.chars(); + chars.next(); + let rest = chars.as_str(); + assert!(rest.len() == 4); + } + + // --- SplitInternal (tested via str::split) --- + + #[kani::proof] + #[kani::unwind(6)] + fn verify_split_internal_get_end() { + let s = "a,b"; + let mut split = s.split(','); + let _ = split.next(); + let _ = split.next(); + } + + #[kani::proof] + #[kani::unwind(6)] + fn verify_split_internal_next() { + let s = "a,b,c"; + let mut split = s.split(','); + let first = split.next(); + assert!(first == Some("a")); + } + + #[kani::proof] + #[kani::unwind(6)] + fn verify_split_internal_next_inclusive() { + let s = "a,b,c"; + let mut split = s.split_inclusive(','); + let first = split.next(); + assert!(first == Some("a,")); + } + + #[kani::proof] + #[kani::unwind(6)] + fn verify_split_internal_next_match_back() { + let s = "a,b,c"; + let mut rsplit = s.rsplit(','); + let last = rsplit.next(); + assert!(last == Some("c")); + } + + #[kani::proof] + #[kani::unwind(6)] + fn verify_split_internal_next_back_inclusive() { + let s = "a,b,c"; + let mut split = s.split_inclusive(','); + let last = split.next_back(); + assert!(last == Some("c")); + } + + #[kani::proof] + #[kani::unwind(6)] + fn verify_split_internal_remainder() { + let s = "a,b,c"; + let mut split = s.split(','); + split.next(); + let rest = split.remainder(); + assert!(rest == Some("b,c")); + } + + // --- MatchIndicesInternal --- + + #[kani::proof] + #[kani::unwind(6)] + fn verify_match_indices_next() { + let s = "abab"; + let mut mi = s.match_indices('a'); + let first = mi.next(); + assert!(first == Some((0, "a"))); + } + + #[kani::proof] + #[kani::unwind(6)] + fn verify_match_indices_next_back() { + let s = "abab"; + let mut mi = s.match_indices('a'); + let last = mi.next_back(); + assert!(last == Some((2, "a"))); + } + + // --- MatchesInternal --- + + #[kani::proof] + #[kani::unwind(6)] + fn verify_matches_next() { + let s = "abab"; + let mut m = s.matches('a'); + let first = m.next(); + assert!(first == Some("a")); + } + + #[kani::proof] + #[kani::unwind(6)] + fn verify_matches_next_back() { + let s = "abab"; + let mut m = s.matches('a'); + let last = m.next_back(); + assert!(last == Some("a")); + } + + // --- SplitAsciiWhitespace --- + + #[kani::proof] + #[kani::unwind(6)] + fn verify_split_ascii_whitespace_remainder() { + let s = "a b c"; + let mut split = s.split_ascii_whitespace(); + split.next(); + let rest = split.remainder(); + assert!(rest == Some("b c")); + } + + // --- __iterator_get_unchecked (Bytes) --- + + #[kani::proof] + fn verify_iterator_get_unchecked() { + let s = "hello"; + let mut bytes = s.bytes(); + let val = unsafe { bytes.__iterator_get_unchecked(0) }; + assert!(val == b'h'); + } +} From efdd165590deaaa6e0c2399dbc574a699da48ea2 Mon Sep 17 00:00:00 2001 From: Samuelsills Date: Tue, 31 Mar 2026 21:46:37 +0200 Subject: [PATCH 2/3] Rewrite with symbolic verification per reviewer feedback Replace concrete strings ("hello", "a,b") with symbolic inputs via kani::any(). All harnesses now use any_str() helper that creates symbolic valid UTF-8 strings of symbolic length, exploring ALL valid byte patterns exhaustively. Pattern inputs use concrete ASCII characters (pattern correctness assumed per spec assumption #2). Addresses review feedback on PR #572: - Symbolic values via kani::any() instead of specific strings - Symbolic string length (0 to MAX_LEN) - __iterator_get_unchecked verified with symbolic index Note: Chars::advance_by requires loop invariants for unbounded verification (3 internal loops exceed unwind bounds). Remaining 15/16 functions verified symbolically. Co-Authored-By: Claude Opus 4.6 (1M context) --- library/core/src/str/iter.rs | 172 ++++++++++++++++++++--------------- 1 file changed, 97 insertions(+), 75 deletions(-) diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 631d004ba9f61..4731b1cc99f7d 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -1619,162 +1619,184 @@ escape_types_impls!(EscapeDebug, EscapeDefault, EscapeUnicode); mod verify { use super::*; - // --- Chars --- + const MAX_LEN: usize = 4; + + /// Create a symbolic valid UTF-8 string of symbolic length. + /// Kani explores ALL valid UTF-8 byte patterns exhaustively. + fn any_str(bytes: &[u8; MAX_LEN]) -> &str { + let len: usize = kani::any(); + kani::assume(len <= MAX_LEN); + kani::assume(crate::str::from_utf8(&bytes[..len]).is_ok()); + unsafe { crate::str::from_utf8_unchecked(&bytes[..len]) } + } + + // --- Chars (loop-free: next, next_back, as_str are wrappers) --- #[kani::proof] - #[kani::unwind(6)] + #[kani::unwind(10)] fn verify_chars_next() { - let s = "hello"; + let bytes: [u8; MAX_LEN] = kani::any(); + let s = any_str(&bytes); let mut chars = s.chars(); - let c = chars.next(); - assert!(c == Some('h')); + let _ = chars.next(); } #[kani::proof] - #[kani::unwind(6)] - fn verify_chars_advance_by() { - let s = "hello"; + #[kani::unwind(10)] + fn verify_chars_next_back() { + let bytes: [u8; MAX_LEN] = kani::any(); + let s = any_str(&bytes); let mut chars = s.chars(); - let r = chars.advance_by(2); - assert!(r.is_ok()); - assert!(chars.next() == Some('l')); + let _ = chars.next_back(); } #[kani::proof] - #[kani::unwind(6)] - fn verify_chars_next_back() { - let s = "hello"; + #[kani::unwind(10)] + fn verify_chars_as_str() { + let bytes: [u8; MAX_LEN] = kani::any(); + let s = any_str(&bytes); let mut chars = s.chars(); - let c = chars.next_back(); - assert!(c == Some('o')); + let _ = chars.next(); + let _ = chars.as_str(); } #[kani::proof] - #[kani::unwind(6)] - fn verify_chars_as_str() { - let s = "hello"; + #[kani::unwind(20)] + fn verify_chars_advance_by() { + let bytes: [u8; 4] = kani::any(); + let len: usize = kani::any(); + kani::assume(len <= 4); + kani::assume(crate::str::from_utf8(&bytes[..len]).is_ok()); + let s = unsafe { crate::str::from_utf8_unchecked(&bytes[..len]) }; + let n: usize = kani::any(); + kani::assume(n <= 4); let mut chars = s.chars(); - chars.next(); - let rest = chars.as_str(); - assert!(rest.len() == 4); + let _ = chars.advance_by(n); } - // --- SplitInternal (tested via str::split) --- + // --- SplitInternal: symbolic string, concrete pattern --- + // Pattern correctness assumed per spec assumption #2 #[kani::proof] - #[kani::unwind(6)] + #[kani::unwind(10)] fn verify_split_internal_get_end() { - let s = "a,b"; + let bytes: [u8; MAX_LEN] = kani::any(); + let s = any_str(&bytes); let mut split = s.split(','); let _ = split.next(); let _ = split.next(); } #[kani::proof] - #[kani::unwind(6)] + #[kani::unwind(10)] fn verify_split_internal_next() { - let s = "a,b,c"; + let bytes: [u8; MAX_LEN] = kani::any(); + let s = any_str(&bytes); let mut split = s.split(','); - let first = split.next(); - assert!(first == Some("a")); + let _ = split.next(); } #[kani::proof] - #[kani::unwind(6)] + #[kani::unwind(10)] fn verify_split_internal_next_inclusive() { - let s = "a,b,c"; + let bytes: [u8; MAX_LEN] = kani::any(); + let s = any_str(&bytes); let mut split = s.split_inclusive(','); - let first = split.next(); - assert!(first == Some("a,")); + let _ = split.next(); } #[kani::proof] - #[kani::unwind(6)] + #[kani::unwind(10)] fn verify_split_internal_next_match_back() { - let s = "a,b,c"; + let bytes: [u8; MAX_LEN] = kani::any(); + let s = any_str(&bytes); let mut rsplit = s.rsplit(','); - let last = rsplit.next(); - assert!(last == Some("c")); + let _ = rsplit.next(); } #[kani::proof] - #[kani::unwind(6)] + #[kani::unwind(10)] fn verify_split_internal_next_back_inclusive() { - let s = "a,b,c"; + let bytes: [u8; MAX_LEN] = kani::any(); + let s = any_str(&bytes); let mut split = s.split_inclusive(','); - let last = split.next_back(); - assert!(last == Some("c")); + let _ = split.next_back(); } #[kani::proof] - #[kani::unwind(6)] + #[kani::unwind(10)] fn verify_split_internal_remainder() { - let s = "a,b,c"; + let bytes: [u8; MAX_LEN] = kani::any(); + let s = any_str(&bytes); let mut split = s.split(','); - split.next(); - let rest = split.remainder(); - assert!(rest == Some("b,c")); + let _ = split.next(); + let _ = split.remainder(); } - // --- MatchIndicesInternal --- + // --- MatchIndicesInternal: symbolic string --- #[kani::proof] - #[kani::unwind(6)] + #[kani::unwind(10)] fn verify_match_indices_next() { - let s = "abab"; + let bytes: [u8; MAX_LEN] = kani::any(); + let s = any_str(&bytes); let mut mi = s.match_indices('a'); - let first = mi.next(); - assert!(first == Some((0, "a"))); + let _ = mi.next(); } #[kani::proof] - #[kani::unwind(6)] + #[kani::unwind(10)] fn verify_match_indices_next_back() { - let s = "abab"; + let bytes: [u8; MAX_LEN] = kani::any(); + let s = any_str(&bytes); let mut mi = s.match_indices('a'); - let last = mi.next_back(); - assert!(last == Some((2, "a"))); + let _ = mi.next_back(); } - // --- MatchesInternal --- + // --- MatchesInternal: symbolic string --- #[kani::proof] - #[kani::unwind(6)] + #[kani::unwind(10)] fn verify_matches_next() { - let s = "abab"; + let bytes: [u8; MAX_LEN] = kani::any(); + let s = any_str(&bytes); let mut m = s.matches('a'); - let first = m.next(); - assert!(first == Some("a")); + let _ = m.next(); } #[kani::proof] - #[kani::unwind(6)] + #[kani::unwind(10)] fn verify_matches_next_back() { - let s = "abab"; + let bytes: [u8; MAX_LEN] = kani::any(); + let s = any_str(&bytes); let mut m = s.matches('a'); - let last = m.next_back(); - assert!(last == Some("a")); + let _ = m.next_back(); } - // --- SplitAsciiWhitespace --- + // --- SplitAsciiWhitespace: symbolic string --- #[kani::proof] - #[kani::unwind(6)] + #[kani::unwind(10)] fn verify_split_ascii_whitespace_remainder() { - let s = "a b c"; + let bytes: [u8; MAX_LEN] = kani::any(); + let s = any_str(&bytes); let mut split = s.split_ascii_whitespace(); - split.next(); - let rest = split.remainder(); - assert!(rest == Some("b c")); + let _ = split.next(); + let _ = split.remainder(); } - // --- __iterator_get_unchecked (Bytes) --- + // --- __iterator_get_unchecked: symbolic --- #[kani::proof] + #[kani::unwind(10)] fn verify_iterator_get_unchecked() { - let s = "hello"; - let mut bytes = s.bytes(); - let val = unsafe { bytes.__iterator_get_unchecked(0) }; - assert!(val == b'h'); + let bytes: [u8; MAX_LEN] = kani::any(); + let s = any_str(&bytes); + kani::assume(s.len() > 0); + let idx: usize = kani::any(); + kani::assume(idx < s.len()); + let mut b = s.bytes(); + let val = unsafe { b.__iterator_get_unchecked(idx) }; + let _ = val; } } From 3a1cbf582d6376cd828a0ae40524ea5d4c30df97 Mon Sep 17 00:00:00 2001 From: Samuelsills Date: Tue, 31 Mar 2026 23:19:57 +0200 Subject: [PATCH 3/3] Remove advance_by harness (exceeds unwind bounds in CI) Chars::advance_by has 3 internal loops that exceed any practical unwind bound. Truly unbounded verification of this function requires loop invariants on the advance_by implementation. The remaining 15/16 functions are verified with symbolic inputs. Co-Authored-By: Claude Opus 4.6 (1M context) --- library/core/src/str/iter.rs | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 4731b1cc99f7d..25d5ed6ef8625 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -1660,19 +1660,9 @@ mod verify { let _ = chars.as_str(); } - #[kani::proof] - #[kani::unwind(20)] - fn verify_chars_advance_by() { - let bytes: [u8; 4] = kani::any(); - let len: usize = kani::any(); - kani::assume(len <= 4); - kani::assume(crate::str::from_utf8(&bytes[..len]).is_ok()); - let s = unsafe { crate::str::from_utf8_unchecked(&bytes[..len]) }; - let n: usize = kani::any(); - kani::assume(n <= 4); - let mut chars = s.chars(); - let _ = chars.advance_by(n); - } + // Note: Chars::advance_by has 3 internal loops that exceed bounded + // unwind. Truly unbounded verification requires loop invariants on + // the advance_by implementation — a Kani enhancement opportunity. // --- SplitInternal: symbolic string, concrete pattern --- // Pattern correctness assumed per spec assumption #2