From 574aaf483d83e6ae3e12066664016deaf0a71dac Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Thu, 26 Mar 2026 17:10:40 +0000 Subject: [PATCH 01/17] Implement `entry` for `BTreeMap` similar to std lib --- benchmarks/btreemap/src/main.rs | 40 +++ src/btreemap.rs | 458 ++++++++++++++++++-------------- src/btreemap/entry.rs | 129 +++++++++ src/btreemap/proptests.rs | 41 ++- 4 files changed, 466 insertions(+), 202 deletions(-) create mode 100644 src/btreemap/entry.rs diff --git a/benchmarks/btreemap/src/main.rs b/benchmarks/btreemap/src/main.rs index 167b973c..efee0740 100644 --- a/benchmarks/btreemap/src/main.rs +++ b/benchmarks/btreemap/src/main.rs @@ -1,6 +1,7 @@ use benchmarks::{random::Random, vec::UnboundedVecN}; use canbench_rs::{bench, bench_fn, BenchResult}; use candid::Principal; +use ic_stable_structures::btreemap::entry::Entry::Occupied; use ic_stable_structures::memory_manager::{MemoryId, MemoryManager}; use ic_stable_structures::{storable::Blob, BTreeMap, DefaultMemoryImpl, Memory, Storable}; use std::ops::Bound; @@ -396,6 +397,45 @@ pub fn btreemap_v2_get_10mib_values() -> BenchResult { }) } +#[bench(raw)] +pub fn btreemap_get_and_incr() -> BenchResult { + let count = 10000; + let mut btree = BTreeMap::new(DefaultMemoryImpl::default()); + let mut rng = Rng::from_seed(0); + let values = generate_random_kv::(count, &mut rng); + for (key, value) in values.iter().copied() { + btree.insert(key, value); + } + + bench_fn(|| { + for (key, _) in values { + let existing = btree.get(&key).unwrap(); + btree.insert(key, existing + 1); + } + }) +} + +#[bench(raw)] +pub fn btreemap_get_and_incr_via_entry() -> BenchResult { + let count = 10000; + let mut btree = BTreeMap::new(DefaultMemoryImpl::default()); + let mut rng = Rng::from_seed(0); + let values = generate_random_kv::(count, &mut rng); + for (key, value) in values.iter().copied() { + btree.insert(key, value); + } + + bench_fn(|| { + for (key, _) in values { + let Occupied(e) = btree.entry(key) else { + panic!() + }; + let existing = e.get(); + e.insert(existing + 1); + } + }) +} + // Benchmarks for `BTreeMap::contains_key`. bench_tests! { // blob K x 128 diff --git a/src/btreemap.rs b/src/btreemap.rs index c6f4ae92..ad8a5d51 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -49,6 +49,7 @@ //! ---------------------------------------- //! ``` mod allocator; +pub mod entry; mod iter; mod node; use crate::btreemap::iter::{IterInternal, KeysIter, ValuesIter}; @@ -59,7 +60,7 @@ use crate::{ }; use allocator::Allocator; pub use iter::Iter; -use node::{DerivedPageSize, Entry, Node, NodeType, PageSize, Version}; +use node::{DerivedPageSize, Node, NodeType, PageSize, Version}; use std::borrow::Cow; use std::marker::PhantomData; use std::ops::{Bound, RangeBounds}; @@ -500,64 +501,103 @@ where pub fn insert(&mut self, key: K, value: V) -> Option { let value = value.into_bytes_checked(); - let root = if self.root_addr == NULL { + let (mut node, search_result) = self.find_node_for_insert(&key); + + match search_result { + Ok(idx) => Some(V::from_bytes(Cow::Owned( + self.update_value(&mut node, idx, value), + ))), + Err(idx) => { + node.insert_entry(idx, (key, value)); + self.save_node(&mut node); + + // Update the length. + self.length += 1; + self.save_header(); + None + } + } + } + + pub fn entry(&mut self, key: K) -> entry::Entry { + let (node, search_result) = self.find_node_for_insert(&key); + + match search_result { + Ok(idx) => entry::Entry::Occupied(entry::OccupiedEntry { + map: self, + key, + node, + idx, + }), + Err(idx) => entry::Entry::Vacant(entry::VacantEntry { + map: self, + key, + node, + idx, + }), + } + } + + fn find_node_for_insert(&mut self, key: &K) -> (Node, Result) { + if self.root_addr == NULL { // No root present. Allocate one. let node = self.allocate_node(NodeType::Leaf); self.root_addr = node.address(); self.save_header(); - node - } else { - // Load the root from memory. - let mut root = self.load_node(self.root_addr); + return (node, Err(0)); + } - // Check if the key already exists in the root. - if let Ok(idx) = root.search(&key, self.memory()) { - // Key found, replace its value and return the old one. - return Some(V::from_bytes(Cow::Owned( - self.update_value(&mut root, idx, value), - ))); - } + // Load the root from memory. + let mut root = self.load_node(self.root_addr); - // If the root is full, we need to introduce a new node as the root. - // - // NOTE: In the case where we are overwriting an existing key, then introducing - // a new root node isn't strictly necessary. However, that's a micro-optimization - // that adds more complexity than it's worth. - if root.is_full() { - // The root is full. Allocate a new node that will be used as the new root. - let mut new_root = self.allocate_node(NodeType::Internal); - - // The new root has the old root as its only child. - new_root.push_child(self.root_addr); - - // Update the root address. - self.root_addr = new_root.address(); - self.save_header(); + // Check if the key already exists in the root. + if let Ok(idx) = root.search(&key, self.memory()) { + // Key found + return (root, Ok(idx)); + } - // Split the old (full) root. - self.split_child(&mut new_root, 0); + // If the root is full, we need to introduce a new node as the root. + // + // NOTE: In the case where we are overwriting an existing key, then introducing + // a new root node isn't strictly necessary. However, that's a micro-optimization + // that adds more complexity than it's worth. + if root.is_full() { + // The root is full. Allocate a new node that will be used as the new root. + let mut new_root = self.allocate_node(NodeType::Internal); - new_root - } else { - root - } - }; + // The new root has the old root as its only child. + new_root.push_child(self.root_addr); - self.insert_nonfull(root, key, value) - .map(Cow::Owned) - .map(V::from_bytes) + // Update the root address. + self.root_addr = new_root.address(); + self.save_header(); + + // Split the old (full) root. + self.split_child(&mut new_root, 0); + + root = new_root; + } + + self.find_node_for_insert_nonfull(root, key) } - /// Inserts an entry into a node that is *not full*. - fn insert_nonfull(&mut self, mut node: Node, key: K, value: Vec) -> Option> { + /// Finds the node the key (and its corresponding value) should be inserted into + /// + /// PRECONDITION: + /// - `node` is not full. + fn find_node_for_insert_nonfull( + &mut self, + mut node: Node, + key: &K, + ) -> (Node, Result) { // We're guaranteed by the caller that the provided node is not full. assert!(!node.is_full()); // Look for the key in the node. - match node.search(&key, self.memory()) { + match node.search(key, self.memory()) { Ok(idx) => { - // Key found, replace its value and return the old one. - Some(self.update_value(&mut node, idx, value)) + // Key found + (node, Ok(idx)) } Err(idx) => { // The key isn't in the node. `idx` is where that key should be inserted. @@ -565,16 +605,7 @@ where match node.node_type() { NodeType::Leaf => { // The node is a non-full leaf. - // Insert the entry at the proper location. - node.insert_entry(idx, (key, value)); - self.save_node(&mut node); - - // Update the length. - self.length += 1; - self.save_header(); - - // No previous value to return. - None + (node, Err(idx)) } NodeType::Internal => { // The node is an internal node. @@ -583,9 +614,9 @@ where if child.is_full() { // Check if the key already exists in the child. - if let Ok(idx) = child.search(&key, self.memory()) { - // Key found, replace its value and return the old one. - return Some(self.update_value(&mut child, idx, value)); + if let Ok(idx) = child.search(key, self.memory()) { + // Key found + return (child, Ok(idx)); } // The child is full. Split the child. @@ -593,14 +624,14 @@ where // The children have now changed. Search again for // the child where we need to store the entry in. - let idx = node.search(&key, self.memory()).unwrap_or_else(|idx| idx); + let idx = node.search(key, self.memory()).unwrap_or_else(|idx| idx); child = self.load_node(node.child(idx)); } // The child should now be not full. assert!(!child.is_full()); - self.insert_nonfull(child, key, value) + self.find_node_for_insert_nonfull(child, key) } } } @@ -796,25 +827,7 @@ where NodeType::Leaf => { match node.search(key, self.memory()) { Ok(idx) => { - // Case 1: The node is a leaf node and the key exists in it. - // This is the simplest case. The key is removed from the leaf. - let value = node.remove_entry(idx, self.memory()).1; - self.length -= 1; - - if node.entries_len() == 0 { - assert_eq!( - node.address(), self.root_addr, - "Removal can only result in an empty leaf node if that node is the root" - ); - - // Deallocate the empty node. - self.deallocate_node(node); - self.root_addr = NULL; - } else { - self.save_node(&mut node); - } - - self.save_header(); + let value = self.remove_from_leaf_node(node, idx); Some(value) } _ => None, // Key not found. @@ -823,129 +836,8 @@ where NodeType::Internal => { match node.search(key, self.memory()) { Ok(idx) => { - // Case 2: The node is an internal node and the key exists in it. - - let left_child = self.load_node(node.child(idx)); - if left_child.can_remove_entry_without_merging() { - // Case 2.a: A key can be removed from the left child without merging. - // - // parent - // [..., key, ...] - // / \ - // [left child] [...] - // / \ - // [...] [..., key predecessor] - // - // In this case, we replace `key` with the key's predecessor from the - // left child's subtree, then we recursively delete the key's - // predecessor for the following end result: - // - // parent - // [..., key predecessor, ...] - // / \ - // [left child] [...] - // / \ - // [...] [...] - - // Recursively delete the predecessor. - // TODO(EXC-1034): Do this in a single pass. - let predecessor = left_child.get_max(self.memory()); - self.remove_helper(left_child, &predecessor.0)?; - - // Replace the `key` with its predecessor. - let (_, old_value) = node.swap_entry(idx, predecessor, self.memory()); - - // Save the parent node. - self.save_node(&mut node); - return Some(old_value); - } - - let right_child = self.load_node(node.child(idx + 1)); - if right_child.can_remove_entry_without_merging() { - // Case 2.b: A key can be removed from the right child without merging. - // - // parent - // [..., key, ...] - // / \ - // [...] [right child] - // / \ - // [key successor, ...] [...] - // - // In this case, we replace `key` with the key's successor from the - // right child's subtree, then we recursively delete the key's - // successor for the following end result: - // - // parent - // [..., key successor, ...] - // / \ - // [...] [right child] - // / \ - // [...] [...] - - // Recursively delete the successor. - // TODO(EXC-1034): Do this in a single pass. - let successor = right_child.get_min(self.memory()); - self.remove_helper(right_child, &successor.0)?; - - // Replace the `key` with its successor. - let (_, old_value) = node.swap_entry(idx, successor, self.memory()); - - // Save the parent node. - self.save_node(&mut node); - return Some(old_value); - } - - // Case 2.c: Both the left and right child are at their minimum sizes. - // - // parent - // [..., key, ...] - // / \ - // [left child] [right child] - // - // In this case, we merge (left child, key, right child) into a single - // node. The result will look like this: - // - // parent - // [... ...] - // | - // [left child, `key`, right child] <= new child - // - // We then recurse on this new child to delete `key`. - // - // If `parent` becomes empty (which can only happen if it's the root), - // then `parent` is deleted and `new_child` becomes the new root. - assert!(left_child.at_minimum()); - assert!(right_child.at_minimum()); - - // Merge the right child into the left child. - let mut new_child = self.merge( - right_child, - left_child, - node.remove_entry(idx, self.memory()), - ); - - // Remove the right child from the parent node. - node.remove_child(idx + 1); - - if node.entries_len() == 0 { - // Can only happen if this node is root. - assert_eq!(node.address(), self.root_addr); - assert_eq!(node.child(0), new_child.address()); - assert_eq!(node.children_len(), 1); - - self.root_addr = new_child.address(); - - // Deallocate the root node. - self.deallocate_node(node); - self.save_header(); - } else { - self.save_node(&mut node); - } - - self.save_node(&mut new_child); - - // Recursively delete the key. - self.remove_helper(new_child, key) + let value = self.remove_from_internal_node(node, idx, key); + Some(value) } Err(idx) => { // Case 3: The node is an internal node and the key does NOT exist in it. @@ -1150,6 +1042,170 @@ where } } + /// PRECONDITION + /// - `node` is a leaf node + /// - `node.can_remove_entry_without_merging()` is true + fn remove_from_leaf_node(&mut self, mut node: Node, idx: usize) -> Vec { + debug_assert!(node.node_type() == NodeType::Leaf); + debug_assert!(node.can_remove_entry_without_merging()); + + // Case 1: The node is a leaf node and the key exists in it. + // This is the simplest case. The key is removed from the leaf. + let value = node.remove_entry(idx, self.memory()).1; + self.length -= 1; + + if node.entries_len() == 0 { + assert_eq!( + node.address(), + self.root_addr, + "Removal can only result in an empty leaf node if that node is the root" + ); + + // Deallocate the empty node. + self.deallocate_node(node); + self.root_addr = NULL; + } else { + self.save_node(&mut node); + } + + self.save_header(); + value + } + + /// PRECONDITION + /// - `node` is an internal node + /// - `node` contains `key` at index `idx` + /// - `node.can_remove_entry_without_merging()` is true + fn remove_from_internal_node(&mut self, mut node: Node, idx: usize, key: &K) -> Vec { + debug_assert!(node.node_type() == NodeType::Internal); + debug_assert!(node.search(key, self.memory()) == Ok(idx)); + debug_assert!(node.can_remove_entry_without_merging()); + + // Case 2: The node is an internal node and the key exists in it. + + let left_child = self.load_node(node.child(idx)); + if left_child.can_remove_entry_without_merging() { + // Case 2.a: A key can be removed from the left child without merging. + // + // parent + // [..., key, ...] + // / \ + // [left child] [...] + // / \ + // [...] [..., key predecessor] + // + // In this case, we replace `key` with the key's predecessor from the + // left child's subtree, then we recursively delete the key's + // predecessor for the following end result: + // + // parent + // [..., key predecessor, ...] + // / \ + // [left child] [...] + // / \ + // [...] [...] + + // Recursively delete the predecessor. + // TODO(EXC-1034): Do this in a single pass. + let predecessor = left_child.get_max(self.memory()); + self.remove_helper(left_child, &predecessor.0).unwrap(); + + // Replace the `key` with its predecessor. + let (_, old_value) = node.swap_entry(idx, predecessor, self.memory()); + + // Save the parent node. + self.save_node(&mut node); + return old_value; + } + + let right_child = self.load_node(node.child(idx + 1)); + if right_child.can_remove_entry_without_merging() { + // Case 2.b: A key can be removed from the right child without merging. + // + // parent + // [..., key, ...] + // / \ + // [...] [right child] + // / \ + // [key successor, ...] [...] + // + // In this case, we replace `key` with the key's successor from the + // right child's subtree, then we recursively delete the key's + // successor for the following end result: + // + // parent + // [..., key successor, ...] + // / \ + // [...] [right child] + // / \ + // [...] [...] + + // Recursively delete the successor. + // TODO(EXC-1034): Do this in a single pass. + let successor = right_child.get_min(self.memory()); + self.remove_helper(right_child, &successor.0).unwrap(); + + // Replace the `key` with its successor. + let (_, old_value) = node.swap_entry(idx, successor, self.memory()); + + // Save the parent node. + self.save_node(&mut node); + return old_value; + } + + // Case 2.c: Both the left and right child are at their minimum sizes. + // + // parent + // [..., key, ...] + // / \ + // [left child] [right child] + // + // In this case, we merge (left child, key, right child) into a single + // node. The result will look like this: + // + // parent + // [... ...] + // | + // [left child, `key`, right child] <= new child + // + // We then recurse on this new child to delete `key`. + // + // If `parent` becomes empty (which can only happen if it's the root), + // then `parent` is deleted and `new_child` becomes the new root. + assert!(left_child.at_minimum()); + assert!(right_child.at_minimum()); + + // Merge the right child into the left child. + let mut new_child = self.merge( + right_child, + left_child, + node.remove_entry(idx, self.memory()), + ); + + // Remove the right child from the parent node. + node.remove_child(idx + 1); + + if node.entries_len() == 0 { + // Can only happen if this node is root. + assert_eq!(node.address(), self.root_addr); + assert_eq!(node.child(0), new_child.address()); + assert_eq!(node.children_len(), 1); + + self.root_addr = new_child.address(); + + // Deallocate the root node. + self.deallocate_node(node); + self.save_header(); + } else { + self.save_node(&mut node); + } + + self.save_node(&mut new_child); + + // Recursively delete the key. + self.remove_helper(new_child, key).unwrap() + } + /// Returns an iterator over the entries of the map, sorted by key. /// /// # Example @@ -1272,7 +1328,7 @@ where /// Output: /// [1, 2, 3, 4, 5, 6, 7] (stored in the `into` node) /// `source` is deallocated. - fn merge(&mut self, source: Node, mut into: Node, median: Entry) -> Node { + fn merge(&mut self, source: Node, mut into: Node, median: node::Entry) -> Node { into.merge(source, median, &mut self.allocator); into } diff --git a/src/btreemap/entry.rs b/src/btreemap/entry.rs new file mode 100644 index 00000000..a1417c55 --- /dev/null +++ b/src/btreemap/entry.rs @@ -0,0 +1,129 @@ +use crate::btreemap::node::{Node, NodeType}; +use crate::{BTreeMap, Memory, Storable}; +use std::borrow::Cow; + +pub enum Entry<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> { + Vacant(VacantEntry<'a, K, V, M>), + Occupied(OccupiedEntry<'a, K, V, M>), +} + +pub struct VacantEntry<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> { + pub(crate) map: &'a mut BTreeMap, + pub(crate) key: K, + pub(crate) node: Node, + pub(crate) idx: usize, +} + +pub struct OccupiedEntry<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> { + pub(crate) map: &'a mut BTreeMap, + pub(crate) key: K, + pub(crate) node: Node, + pub(crate) idx: usize, +} + +impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> Entry<'a, K, V, M> { + pub fn key(&self) -> &K { + match self { + Entry::Occupied(entry) => entry.key(), + Entry::Vacant(entry) => entry.key(), + } + } + + pub fn into_key(self) -> K { + match self { + Entry::Occupied(entry) => entry.into_key(), + Entry::Vacant(entry) => entry.into_key(), + } + } +} + +impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> VacantEntry<'a, K, V, M> { + pub fn key(&self) -> &K { + &self.key + } + + pub fn into_key(self) -> K { + self.key + } + + pub fn insert(mut self, value: V) { + self.node + .insert_entry(self.idx, (self.key, value.into_bytes_checked())); + + self.map.save_node(&mut self.node); + self.map.length += 1; + self.map.save_header(); + } +} + +impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> OccupiedEntry<'a, K, V, M> { + pub fn key(&self) -> &K { + &self.key + } + + pub fn into_key(self) -> K { + self.key + } + + pub fn get(&self) -> V { + let value_bytes = self.node.value(self.idx, self.map.memory()); + V::from_bytes(Cow::Borrowed(value_bytes)) + } + + pub fn insert(mut self, value: V) { + self.map + .update_value(&mut self.node, self.idx, value.into_bytes_checked()); + } + + pub fn remove(self) { + if self.node.can_remove_entry_without_merging() { + match self.node.node_type() { + NodeType::Leaf => self.map.remove_from_leaf_node(self.node, self.idx), + NodeType::Internal => self + .map + .remove_from_internal_node(self.node, self.idx, &self.key), + }; + } else { + // TODO avoid using this slow path + let root = self.map.load_node(self.map.root_addr); + self.map.remove_helper(root, &self.key); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::cell::RefCell; + use std::rc::Rc; + + #[test] + fn entry_end_to_end() { + let mut map = BTreeMap::new(Rc::new(RefCell::new(Vec::new()))); + + for i in 0u32..100 { + let Entry::Vacant(e) = map.entry(i) else { + panic!(); + }; + e.insert(i); + } + + for i in 0u32..100 { + let Entry::Occupied(e) = map.entry(i) else { + panic!(); + }; + assert_eq!(i, e.get()); + e.insert(i + 1); + } + + for i in 0u32..100 { + let Entry::Occupied(e) = map.entry(i) else { + panic!(); + }; + assert_eq!(i + 1, e.get()); + e.remove(); + } + + assert!(map.is_empty()); + } +} diff --git a/src/btreemap/proptests.rs b/src/btreemap/proptests.rs index 4403a42b..318bf300 100644 --- a/src/btreemap/proptests.rs +++ b/src/btreemap/proptests.rs @@ -1,3 +1,4 @@ +use crate::btreemap::entry::Entry; use crate::{ btreemap::{ test::{b, make_memory, run_btree_test}, @@ -9,7 +10,8 @@ use crate::{ use proptest::collection::btree_set as pset; use proptest::collection::vec as pvec; use proptest::prelude::*; -use std::collections::{BTreeMap as StdBTreeMap, BTreeSet}; +use std::collections::{btree_map, BTreeMap as StdBTreeMap, BTreeSet}; +use std::ops::BitXor; use test_strategy::proptest; #[derive(Debug, Clone)] @@ -21,6 +23,7 @@ enum Operation { Values { from: usize, len: usize }, Get(usize), Remove(usize), + EntryInsertOrXor { key: Vec, value: Vec }, Range { from: usize, len: usize }, PopLast, PopFirst, @@ -43,6 +46,8 @@ fn operation_strategy() -> impl Strategy { .prop_map(|(from, len)| Operation::Values { from, len }), 50 => (any::()).prop_map(Operation::Get), 15 => (any::()).prop_map(Operation::Remove), + 10 => (any::>(), any::>()) + .prop_map(|(key, value)| Operation::EntryInsertOrXor { key, value }), 5 => (any::(), any::()) .prop_map(|(from, len)| Operation::Range { from, len }), 2 => Just(Operation::PopFirst), @@ -320,6 +325,40 @@ fn execute_operation( assert_eq!(btree.remove(&k), Some(v)); } } + Operation::EntryInsertOrXor { key, value } => { + match std_btree.entry(key.clone()) { + btree_map::Entry::Occupied(mut entry) => { + let existing = entry.get().clone(); + let xor = existing + .into_iter() + .zip(value.clone()) + .map(|(l, r)| l.bitxor(r)) + .collect::>(); + entry.insert(xor); + } + btree_map::Entry::Vacant(entry) => { + entry.insert(value.clone()); + } + }; + + match btree.entry(key.clone()) { + Entry::Occupied(entry) => { + let existing = entry.get(); + let xor = existing + .into_iter() + .zip(value.clone()) + .map(|(l, r)| l.bitxor(r)) + .collect::>(); + entry.insert(xor); + } + Entry::Vacant(entry) => { + entry.insert(value.clone()); + } + } + + assert_eq!(btree.get(&key).as_ref(), std_btree.get(&key)); + } + Operation::Range { from, len } => { assert_eq!(std_btree.len(), btree.len() as usize); if std_btree.is_empty() { From f6d06fb5190a0d75c3e296027b20ae225c6de10c Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Thu, 26 Mar 2026 22:56:04 +0000 Subject: [PATCH 02/17] clippy --- benchmarks/nns/src/nns_vote_cascading/tests.rs | 2 +- src/btreemap.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/nns/src/nns_vote_cascading/tests.rs b/benchmarks/nns/src/nns_vote_cascading/tests.rs index a6b7fd2f..2c26b725 100644 --- a/benchmarks/nns/src/nns_vote_cascading/tests.rs +++ b/benchmarks/nns/src/nns_vote_cascading/tests.rs @@ -168,7 +168,7 @@ fn set_up_worst_case( for neuron_id in num_followees..num_neurons { let previous_neuron_ids = (neuron_id - num_half_followees - 1)..neuron_id; let followee_neuron_ids = previous_neuron_ids - .map(|id| NeuronId::from(id)) + .map(NeuronId::from) .chain(not_voting_neuron_ids.clone().into_iter()) .collect::>(); neuron_store.set_followees( diff --git a/src/btreemap.rs b/src/btreemap.rs index ad8a5d51..8524a5fe 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -551,7 +551,7 @@ where let mut root = self.load_node(self.root_addr); // Check if the key already exists in the root. - if let Ok(idx) = root.search(&key, self.memory()) { + if let Ok(idx) = root.search(key, self.memory()) { // Key found return (root, Ok(idx)); } From b620979f21036750574270c13851a76e9eae88b6 Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Fri, 27 Mar 2026 07:12:03 +0000 Subject: [PATCH 03/17] Add doc comments --- src/btreemap.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/btreemap.rs b/src/btreemap.rs index 8524a5fe..66126a7f 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -519,6 +519,11 @@ where } } + /// Searches for the entry within the map where the key belongs. + /// The value returned is either a `VacantEntry` (if the key doesn't already exist), or an + /// `OccupiedEntry` (if the key does exist). + /// Once you have the entry you can `get` the value, `insert` a new value, or `remove` + /// the value. pub fn entry(&mut self, key: K) -> entry::Entry { let (node, search_result) = self.find_node_for_insert(&key); @@ -538,6 +543,10 @@ where } } + /// Finds the node the key (and its corresponding value) should be inserted into + /// Return value is a tuple, the first value is the node the key should be inserted into, the + /// seconds value is a `Result` which is `Ok` if the key exists in the node, or `Err` if not. + /// The value in either case is the index at which the key should be inserted. fn find_node_for_insert(&mut self, key: &K) -> (Node, Result) { if self.root_addr == NULL { // No root present. Allocate one. From 4e92c9256d9b9e8f4a4765e07f3704cb33dcbad7 Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Fri, 27 Mar 2026 08:00:00 +0000 Subject: [PATCH 04/17] Remove 2 debug asserts which I added but are overly restrictive --- src/btreemap.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/btreemap.rs b/src/btreemap.rs index 66126a7f..e65201f8 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -1053,10 +1053,9 @@ where /// PRECONDITION /// - `node` is a leaf node - /// - `node.can_remove_entry_without_merging()` is true + /// - `node.entries_len() > 1` or node is the root node fn remove_from_leaf_node(&mut self, mut node: Node, idx: usize) -> Vec { debug_assert!(node.node_type() == NodeType::Leaf); - debug_assert!(node.can_remove_entry_without_merging()); // Case 1: The node is a leaf node and the key exists in it. // This is the simplest case. The key is removed from the leaf. @@ -1084,11 +1083,9 @@ where /// PRECONDITION /// - `node` is an internal node /// - `node` contains `key` at index `idx` - /// - `node.can_remove_entry_without_merging()` is true fn remove_from_internal_node(&mut self, mut node: Node, idx: usize, key: &K) -> Vec { debug_assert!(node.node_type() == NodeType::Internal); debug_assert!(node.search(key, self.memory()) == Ok(idx)); - debug_assert!(node.can_remove_entry_without_merging()); // Case 2: The node is an internal node and the key exists in it. From ded30880b3aa3e1ae5fb09bf30d772d7c50d5ca4 Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Fri, 27 Mar 2026 08:04:24 +0000 Subject: [PATCH 05/17] Only apply restriction to Leaf nodes --- src/btreemap/entry.rs | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/btreemap/entry.rs b/src/btreemap/entry.rs index a1417c55..eb6b103b 100644 --- a/src/btreemap/entry.rs +++ b/src/btreemap/entry.rs @@ -76,18 +76,20 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> OccupiedEn } pub fn remove(self) { - if self.node.can_remove_entry_without_merging() { - match self.node.node_type() { - NodeType::Leaf => self.map.remove_from_leaf_node(self.node, self.idx), - NodeType::Internal => self - .map - .remove_from_internal_node(self.node, self.idx, &self.key), - }; - } else { - // TODO avoid using this slow path - let root = self.map.load_node(self.map.root_addr); - self.map.remove_helper(root, &self.key); - } + match self.node.node_type() { + NodeType::Leaf if self.node.can_remove_entry_without_merging() => { + self.map.remove_from_leaf_node(self.node, self.idx); + } + NodeType::Leaf => { + // TODO avoid using this slow path + let root = self.map.load_node(self.map.root_addr); + self.map.remove_helper(root, &self.key); + } + NodeType::Internal => { + self.map + .remove_from_internal_node(self.node, self.idx, &self.key); + } + }; } } From 1b1f24742b8916d5257d12b9a6e5857a23ede706 Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Fri, 27 Mar 2026 08:23:53 +0000 Subject: [PATCH 06/17] Use `debug_assert_eq` rather than `debug_assert` --- src/btreemap.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/btreemap.rs b/src/btreemap.rs index e65201f8..7e842204 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -1055,7 +1055,7 @@ where /// - `node` is a leaf node /// - `node.entries_len() > 1` or node is the root node fn remove_from_leaf_node(&mut self, mut node: Node, idx: usize) -> Vec { - debug_assert!(node.node_type() == NodeType::Leaf); + debug_assert_eq!(node.node_type(), NodeType::Leaf); // Case 1: The node is a leaf node and the key exists in it. // This is the simplest case. The key is removed from the leaf. @@ -1084,8 +1084,8 @@ where /// - `node` is an internal node /// - `node` contains `key` at index `idx` fn remove_from_internal_node(&mut self, mut node: Node, idx: usize, key: &K) -> Vec { - debug_assert!(node.node_type() == NodeType::Internal); - debug_assert!(node.search(key, self.memory()) == Ok(idx)); + debug_assert_eq!(node.node_type(), NodeType::Internal); + debug_assert_eq!(node.search(key, self.memory()), Ok(idx)); // Case 2: The node is an internal node and the key exists in it. From 787ff04740017a5d51a79b66269645739689d20f Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Fri, 27 Mar 2026 11:49:31 +0000 Subject: [PATCH 07/17] Add proptests --- src/btreemap/proptests.rs | 52 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/src/btreemap/proptests.rs b/src/btreemap/proptests.rs index 318bf300..2103b7fb 100644 --- a/src/btreemap/proptests.rs +++ b/src/btreemap/proptests.rs @@ -197,6 +197,58 @@ fn no_memory_leaks(#[strategy(pvec(pvec(0..u8::MAX, 100..10_000), 100))] keys: V assert_eq!(btree.allocator.num_allocated_chunks(), 0); } +#[proptest] +fn entry( + #[strategy(pvec(0..255u8, 10))] keys: Vec, + #[strategy(pvec(0..3u8, 10))] operations: Vec, +) { + run_btree_test(|mut btree| { + let mut std_map = StdBTreeMap::new(); + + // Operations (if Occupied): + // 0 - insert + // 1 - increment + // 2 - remove + // + // Operations (if Vacant): + // - always insert + for (key, operation) in keys.iter().copied().zip(operations.iter().copied()) { + let entry = btree.entry(key); + let std_entry = std_map.entry(key); + match (entry, std_entry) { + (Entry::Occupied(e), btree_map::Entry::Occupied(mut std_e)) => match operation { + 0 => { + e.insert(key); + std_e.insert(key); + } + 1 => { + let existing = e.get(); + let std_existing = *std_e.get(); + e.insert(existing.overflowing_add(1).0); + std_e.insert(std_existing.overflowing_add(1).0); + } + 2 => { + e.remove(); + std_e.remove(); + } + _ => unreachable!(), + }, + (Entry::Vacant(e), btree_map::Entry::Vacant(std_e)) => { + e.insert(key); + std_e.insert(key); + } + _ => prop_assert!(false, "Map out of sync with std lib BTreeMap"), + } + } + + let entries: Vec<_> = btree.iter().map(|e| (*e.key(), e.value())).collect(); + let std_entries: Vec<_> = std_map.into_iter().collect(); + + prop_assert_eq!(entries, std_entries); + Ok(()) + }); +} + // Given an operation, executes it on the given stable btreemap and standard btreemap, verifying // that the result of the operation is equal in both btrees. fn execute_operation( From 68f4f81f622195f313f3cb11bf284983ee3c32a7 Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Tue, 31 Mar 2026 09:00:25 +0100 Subject: [PATCH 08/17] Add more functionality --- src/btreemap.rs | 2 ++ src/btreemap/entry.rs | 68 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 64 insertions(+), 6 deletions(-) diff --git a/src/btreemap.rs b/src/btreemap.rs index 7e842204..fb971ebd 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -62,6 +62,7 @@ use allocator::Allocator; pub use iter::Iter; use node::{DerivedPageSize, Node, NodeType, PageSize, Version}; use std::borrow::Cow; +use std::cell::Cell; use std::marker::PhantomData; use std::ops::{Bound, RangeBounds}; @@ -533,6 +534,7 @@ where key, node, idx, + cached_value: Cell::default(), }), Err(idx) => entry::Entry::Vacant(entry::VacantEntry { map: self, diff --git a/src/btreemap/entry.rs b/src/btreemap/entry.rs index eb6b103b..c01915a2 100644 --- a/src/btreemap/entry.rs +++ b/src/btreemap/entry.rs @@ -1,6 +1,6 @@ use crate::btreemap::node::{Node, NodeType}; use crate::{BTreeMap, Memory, Storable}; -use std::borrow::Cow; +use std::cell::Cell; pub enum Entry<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> { Vacant(VacantEntry<'a, K, V, M>), @@ -19,6 +19,7 @@ pub struct OccupiedEntry<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M pub(crate) key: K, pub(crate) node: Node, pub(crate) idx: usize, + pub(crate) cached_value: Cell>, } impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> Entry<'a, K, V, M> { @@ -35,6 +36,37 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> Entry<'a, Entry::Vacant(entry) => entry.into_key(), } } + + pub fn and_modify(self, f: impl FnOnce(&mut V)) -> Self { + match self { + Entry::Occupied(entry) => Entry::Occupied(entry.and_modify(f)), + Entry::Vacant(entry) => Entry::Vacant(entry), + } + } + + pub fn or_insert(self, default: V) -> OccupiedEntry<'a, K, V, M> { + match self { + Entry::Occupied(entry) => entry, + Entry::Vacant(entry) => entry.insert(default), + } + } + + pub fn or_insert_with V>(self, default: F) -> OccupiedEntry<'a, K, V, M> { + match self { + Entry::Occupied(entry) => entry, + Entry::Vacant(entry) => entry.insert(default()), + } + } + + pub fn or_default(self) -> OccupiedEntry<'a, K, V, M> + where + V: Default, + { + match self { + Entry::Occupied(entry) => entry, + Entry::Vacant(entry) => entry.insert(Default::default()), + } + } } impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> VacantEntry<'a, K, V, M> { @@ -46,13 +78,23 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> VacantEntr self.key } - pub fn insert(mut self, value: V) { - self.node - .insert_entry(self.idx, (self.key, value.into_bytes_checked())); + pub fn insert(mut self, value: V) -> OccupiedEntry<'a, K, V, M> { + self.node.insert_entry( + self.idx, + (self.key.clone(), value.to_bytes_checked().into_owned()), + ); self.map.save_node(&mut self.node); self.map.length += 1; self.map.save_header(); + + OccupiedEntry { + map: self.map, + key: self.key, + node: self.node, + idx: self.idx, + cached_value: Cell::new(Some(value)), + } } } @@ -66,8 +108,22 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> OccupiedEn } pub fn get(&self) -> V { - let value_bytes = self.node.value(self.idx, self.map.memory()); - V::from_bytes(Cow::Borrowed(value_bytes)) + if let Some(cached) = self.cached_value.take() { + cached + } else { + self.get() + } + } + + pub fn and_modify(mut self, f: impl FnOnce(&mut V)) -> Self { + let mut value = self.get(); + f(&mut value); + self.map.update_value( + &mut self.node, + self.idx, + value.to_bytes_checked().into_owned(), + ); + self } pub fn insert(mut self, value: V) { From 29f86a3da734a29c034be06549ba12d0d7892e8f Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Tue, 31 Mar 2026 09:13:04 +0100 Subject: [PATCH 09/17] Get the AI to polish it up --- src/btreemap.rs | 18 +- src/btreemap/entry.rs | 397 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 359 insertions(+), 56 deletions(-) diff --git a/src/btreemap.rs b/src/btreemap.rs index fb971ebd..72de15c5 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -62,7 +62,6 @@ use allocator::Allocator; pub use iter::Iter; use node::{DerivedPageSize, Node, NodeType, PageSize, Version}; use std::borrow::Cow; -use std::cell::Cell; use std::marker::PhantomData; use std::ops::{Bound, RangeBounds}; @@ -526,6 +525,19 @@ where /// Once you have the entry you can `get` the value, `insert` a new value, or `remove` /// the value. pub fn entry(&mut self, key: K) -> entry::Entry { + // For an empty map the key is trivially absent. Avoid calling + // `find_node_for_insert` here because that eagerly allocates a root + // node and persists `root_addr` to the header, which would leave the + // map in an inconsistent state if the returned `VacantEntry` is dropped + // without calling `insert`. + if self.root_addr == NULL { + return entry::Entry::Vacant(entry::VacantEntry { + map: self, + key, + location: None, + }); + } + let (node, search_result) = self.find_node_for_insert(&key); match search_result { @@ -534,13 +546,11 @@ where key, node, idx, - cached_value: Cell::default(), }), Err(idx) => entry::Entry::Vacant(entry::VacantEntry { map: self, key, - node, - idx, + location: Some((node, idx)), }), } } diff --git a/src/btreemap/entry.rs b/src/btreemap/entry.rs index c01915a2..f9e2e9bc 100644 --- a/src/btreemap/entry.rs +++ b/src/btreemap/entry.rs @@ -1,28 +1,73 @@ +//! Entry API for [`BTreeMap`]. +//! +//! This module provides the [`Entry`] type, which gives efficient in-place access to a map's +//! entries, allowing inspection or modification without redundant key lookups. The API mirrors +//! [`std::collections::btree_map::Entry`] as closely as the stable-memory model allows. +//! +//! # Note on `or_insert` return type +//! +//! The standard library's `or_insert` returns `&mut V`, giving a direct reference into the +//! map. Because values in this [`BTreeMap`] live in stable memory, long-lived references are +//! not possible. Instead, `or_insert` (and its variants) return an [`OccupiedEntry`], which +//! lets you continue reading or modifying the entry without a second key lookup. +//! +//! # Examples +//! +//! ```rust +//! use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; +//! +//! let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); +//! +//! // Insert a value only when the key is absent. +//! map.entry(1).or_insert(42); +//! assert_eq!(map.get(&1), Some(42)); +//! +//! // Increment a counter, seeding it to 1 if absent. +//! map.entry(1).and_modify(|v| *v += 1).or_insert(1); +//! assert_eq!(map.get(&1), Some(43)); +//! ``` + use crate::btreemap::node::{Node, NodeType}; use crate::{BTreeMap, Memory, Storable}; -use std::cell::Cell; +use std::borrow::Cow; +/// A view into a single entry of a [`BTreeMap`], which may either be occupied or vacant. +/// +/// This type is returned by [`BTreeMap::entry`]. pub enum Entry<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> { + /// A vacant entry: the key is not present in the map. Vacant(VacantEntry<'a, K, V, M>), + /// An occupied entry: the key is already present in the map. Occupied(OccupiedEntry<'a, K, V, M>), } +/// A view into a vacant entry in a [`BTreeMap`]. +/// +/// Obtained from [`Entry::Vacant`]. pub struct VacantEntry<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> { pub(crate) map: &'a mut BTreeMap, pub(crate) key: K, - pub(crate) node: Node, - pub(crate) idx: usize, + /// Pre-computed insertion point from [`BTreeMap::entry`]. + /// + /// `None` when the map was empty at the time `entry` was called — the root + /// node had not yet been allocated, so we defer the full insert to + /// [`VacantEntry::insert`] to avoid corrupting the map if this entry is + /// dropped without inserting. + pub(crate) location: Option<(Node, usize)>, } +/// A view into an occupied entry in a [`BTreeMap`]. +/// +/// Obtained from [`Entry::Occupied`] or as the result of [`VacantEntry::insert`]. pub struct OccupiedEntry<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> { pub(crate) map: &'a mut BTreeMap, pub(crate) key: K, pub(crate) node: Node, pub(crate) idx: usize, - pub(crate) cached_value: Cell>, } impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> Entry<'a, K, V, M> { + /// Returns a reference to this entry's key. pub fn key(&self) -> &K { match self { Entry::Occupied(entry) => entry.key(), @@ -30,6 +75,7 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> Entry<'a, } } + /// Consumes the entry and returns its key. pub fn into_key(self) -> K { match self { Entry::Occupied(entry) => entry.into_key(), @@ -37,115 +83,256 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> Entry<'a, } } - pub fn and_modify(self, f: impl FnOnce(&mut V)) -> Self { + /// Ensures a value is present by inserting `default` if the entry is vacant, then returns + /// an [`OccupiedEntry`] for further operations. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// assert_eq!(map.entry(1).or_insert(10).get(), 10); + /// assert_eq!(map.entry(1).or_insert(99).get(), 10); // already present + /// ``` + pub fn or_insert(self, default: V) -> OccupiedEntry<'a, K, V, M> { match self { - Entry::Occupied(entry) => Entry::Occupied(entry.and_modify(f)), - Entry::Vacant(entry) => Entry::Vacant(entry), + Entry::Occupied(entry) => entry, + Entry::Vacant(entry) => entry.insert(default), } } - pub fn or_insert(self, default: V) -> OccupiedEntry<'a, K, V, M> { + /// Ensures a value is present by inserting the result of `default` if the entry is vacant, + /// then returns an [`OccupiedEntry`]. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// map.entry(1).or_insert_with(|| 42u32); + /// assert_eq!(map.get(&1), Some(42)); + /// ``` + pub fn or_insert_with(self, default: impl FnOnce() -> V) -> OccupiedEntry<'a, K, V, M> { match self { Entry::Occupied(entry) => entry, - Entry::Vacant(entry) => entry.insert(default), + Entry::Vacant(entry) => entry.insert(default()), } } - pub fn or_insert_with V>(self, default: F) -> OccupiedEntry<'a, K, V, M> { + /// Ensures a value is present by inserting the result of `default`, called with the + /// entry's key, if the entry is vacant. Returns an [`OccupiedEntry`]. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// map.entry(7).or_insert_with_key(|&k| k * 2); + /// assert_eq!(map.get(&7), Some(14)); + /// ``` + pub fn or_insert_with_key( + self, + default: impl FnOnce(&K) -> V, + ) -> OccupiedEntry<'a, K, V, M> { match self { Entry::Occupied(entry) => entry, - Entry::Vacant(entry) => entry.insert(default()), + Entry::Vacant(entry) => { + let val = default(&entry.key); + entry.insert(val) + } } } + /// Ensures a value is present by inserting `V::default()` if the entry is vacant, then + /// returns an [`OccupiedEntry`]. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// map.entry(1).or_default(); + /// assert_eq!(map.get(&1), Some(0u32)); + /// ``` pub fn or_default(self) -> OccupiedEntry<'a, K, V, M> where V: Default, { + self.or_insert_with(V::default) + } + + /// Provides in-place mutable access to an occupied entry before any potential inserts + /// via `or_insert` and friends. + /// + /// If the entry is vacant the closure is not called and the entry is returned unchanged, + /// making it possible to chain with `or_insert` and friends. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// map.insert(1, 10); + /// + /// // Increment existing value, or seed with 1 for a new key. + /// map.entry(1).and_modify(|v| *v += 1).or_insert(1); + /// assert_eq!(map.get(&1), Some(11)); + /// + /// map.entry(2).and_modify(|v| *v += 1).or_insert(1); + /// assert_eq!(map.get(&2), Some(1)); + /// ``` + pub fn and_modify(self, f: impl FnOnce(&mut V)) -> Self { match self { - Entry::Occupied(entry) => entry, - Entry::Vacant(entry) => entry.insert(Default::default()), + Entry::Occupied(entry) => Entry::Occupied(entry.and_modify(f)), + Entry::Vacant(entry) => Entry::Vacant(entry), } } } impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> VacantEntry<'a, K, V, M> { + /// Returns a reference to the entry's key. pub fn key(&self) -> &K { &self.key } + /// Consumes the entry and returns its key. pub fn into_key(self) -> K { self.key } - pub fn insert(mut self, value: V) -> OccupiedEntry<'a, K, V, M> { - self.node.insert_entry( - self.idx, - (self.key.clone(), value.to_bytes_checked().into_owned()), - ); - - self.map.save_node(&mut self.node); - self.map.length += 1; - self.map.save_header(); - - OccupiedEntry { - map: self.map, - key: self.key, - node: self.node, - idx: self.idx, - cached_value: Cell::new(Some(value)), + /// Inserts `value` into the map at this entry's key and returns an [`OccupiedEntry`] + /// pointing at the newly inserted value. + pub fn insert(self, value: V) -> OccupiedEntry<'a, K, V, M> { + match self.location { + Some((mut node, idx)) => { + node.insert_entry(idx, (self.key.clone(), value.into_bytes_checked())); + self.map.save_node(&mut node); + self.map.length += 1; + self.map.save_header(); + OccupiedEntry { + map: self.map, + key: self.key, + node, + idx, + } + } + None => { + // The map was empty when `entry()` was called. Delegate to the regular + // insert path which handles root allocation, then find the new entry. + let map = self.map; + let key = self.key; + map.insert(key.clone(), value); + let (node, result) = map.find_node_for_insert(&key); + let idx = result.expect("key was just inserted"); + OccupiedEntry { map, key, node, idx } + } } } } impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> OccupiedEntry<'a, K, V, M> { + /// Returns a reference to the entry's key. pub fn key(&self) -> &K { &self.key } + /// Consumes the entry and returns its key. pub fn into_key(self) -> K { self.key } + /// Returns the current value associated with this entry. pub fn get(&self) -> V { - if let Some(cached) = self.cached_value.take() { - cached - } else { - self.get() - } + let value_bytes = self.node.value(self.idx, self.map.memory()); + V::from_bytes(Cow::Borrowed(value_bytes)) } + /// Provides in-place mutable access to the value in this occupied entry. + /// + /// Reads the current value, calls `f` with a mutable reference to it, then writes the + /// modified value back. Returns `self` so the call can be chained. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl, btreemap::entry::Entry}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// map.insert(1, 10); + /// + /// if let Entry::Occupied(e) = map.entry(1) { + /// e.and_modify(|v| *v *= 2); + /// } + /// assert_eq!(map.get(&1), Some(20)); + /// ``` pub fn and_modify(mut self, f: impl FnOnce(&mut V)) -> Self { let mut value = self.get(); f(&mut value); - self.map.update_value( - &mut self.node, - self.idx, - value.to_bytes_checked().into_owned(), - ); + self.map + .update_value(&mut self.node, self.idx, value.into_bytes_checked()); self } - pub fn insert(mut self, value: V) { - self.map + /// Replaces the current value with `value` and returns the previous value. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl, btreemap::entry::Entry}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// map.insert(1, 10); + /// + /// if let Entry::Occupied(e) = map.entry(1) { + /// let old = e.insert(99); + /// assert_eq!(old, 10); + /// } + /// assert_eq!(map.get(&1), Some(99)); + /// ``` + pub fn insert(mut self, value: V) -> V { + let old_bytes = self + .map .update_value(&mut self.node, self.idx, value.into_bytes_checked()); + V::from_bytes(Cow::Owned(old_bytes)) } - pub fn remove(self) { - match self.node.node_type() { + /// Removes the entry from the map and returns the value that was stored. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl, btreemap::entry::Entry}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// map.insert(1, 42); + /// + /// if let Entry::Occupied(e) = map.entry(1) { + /// assert_eq!(e.remove(), 42); + /// } + /// assert!(map.is_empty()); + /// ``` + pub fn remove(self) -> V { + let bytes = match self.node.node_type() { NodeType::Leaf if self.node.can_remove_entry_without_merging() => { - self.map.remove_from_leaf_node(self.node, self.idx); + self.map.remove_from_leaf_node(self.node, self.idx) } NodeType::Leaf => { - // TODO avoid using this slow path + // TODO: avoid this slow path by walking the tree upward from the leaf. let root = self.map.load_node(self.map.root_addr); - self.map.remove_helper(root, &self.key); - } - NodeType::Internal => { self.map - .remove_from_internal_node(self.node, self.idx, &self.key); + .remove_helper(root, &self.key) + .expect("key must exist") } + NodeType::Internal => self + .map + .remove_from_internal_node(self.node, self.idx, &self.key), }; + V::from_bytes(Cow::Owned(bytes)) } } @@ -155,9 +342,13 @@ mod tests { use std::cell::RefCell; use std::rc::Rc; + fn new_map() -> BTreeMap>>> { + BTreeMap::new(Rc::new(RefCell::new(Vec::new()))) + } + #[test] fn entry_end_to_end() { - let mut map = BTreeMap::new(Rc::new(RefCell::new(Vec::new()))); + let mut map = new_map(); for i in 0u32..100 { let Entry::Vacant(e) = map.entry(i) else { @@ -171,7 +362,8 @@ mod tests { panic!(); }; assert_eq!(i, e.get()); - e.insert(i + 1); + let old = e.insert(i + 1); + assert_eq!(old, i); } for i in 0u32..100 { @@ -179,9 +371,110 @@ mod tests { panic!(); }; assert_eq!(i + 1, e.get()); - e.remove(); + let removed = e.remove(); + assert_eq!(removed, i + 1); } assert!(map.is_empty()); } + + #[test] + fn or_insert_vacant() { + let mut map = new_map(); + assert_eq!(map.entry(1).or_insert(42).get(), 42); + assert_eq!(map.get(&1), Some(42)); + } + + #[test] + fn or_insert_occupied() { + let mut map = new_map(); + map.insert(1, 10); + assert_eq!(map.entry(1).or_insert(99).get(), 10); // default ignored + } + + #[test] + fn or_insert_with() { + let mut map = new_map(); + map.entry(1).or_insert_with(|| 7u32); + assert_eq!(map.get(&1), Some(7)); + // closure is not called when key is present + map.entry(1) + .or_insert_with(|| panic!("should not be called")); + assert_eq!(map.get(&1), Some(7)); + } + + #[test] + fn or_insert_with_key() { + let mut map = new_map(); + map.entry(6).or_insert_with_key(|&k| k * 3); + assert_eq!(map.get(&6), Some(18)); + } + + #[test] + fn or_default() { + let mut map = new_map(); + map.entry(1).or_default(); + assert_eq!(map.get(&1), Some(0u32)); + } + + #[test] + fn and_modify_occupied() { + let mut map = new_map(); + map.insert(1, 10); + map.entry(1).and_modify(|v| *v += 5); + assert_eq!(map.get(&1), Some(15)); + } + + #[test] + fn and_modify_vacant() { + let mut map = new_map(); + // closure must not be called; map must stay empty + map.entry(1).and_modify(|_| panic!("should not be called")); + assert_eq!(map.get(&1), None); + } + + #[test] + fn and_modify_then_or_insert() { + let mut map = new_map(); + map.insert(1, 10u32); + + map.entry(1).and_modify(|v| *v += 1).or_insert(1); + assert_eq!(map.get(&1), Some(11)); + + map.entry(2).and_modify(|v| *v += 1).or_insert(1); + assert_eq!(map.get(&2), Some(1)); + } + + #[test] + fn occupied_insert_returns_old_value() { + let mut map = new_map(); + map.insert(1, 10); + let Entry::Occupied(e) = map.entry(1) else { + panic!(); + }; + assert_eq!(e.insert(99), 10); + assert_eq!(map.get(&1), Some(99)); + } + + #[test] + fn occupied_remove_returns_value() { + let mut map = new_map(); + map.insert(1, 42); + let Entry::Occupied(e) = map.entry(1) else { + panic!(); + }; + assert_eq!(e.remove(), 42); + assert!(map.is_empty()); + } + + #[test] + fn or_insert_on_empty_map_then_drop() { + // Dropping a VacantEntry on an empty map without inserting must not corrupt the map. + let mut map = new_map(); + map.entry(1).and_modify(|_| panic!("should not be called")); + assert_eq!(map.get(&1), None); + // The map must still be usable after the drop. + map.insert(1, 99); + assert_eq!(map.get(&1), Some(99)); + } } From d313e668359d1eb4cb8d9c97a6c2cbff58c26cad Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Tue, 31 Mar 2026 09:20:04 +0100 Subject: [PATCH 10/17] Return `LazyValue` to avoid deserialization if not reading value --- src/btreemap/entry.rs | 49 ++++++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/src/btreemap/entry.rs b/src/btreemap/entry.rs index f9e2e9bc..e40b827e 100644 --- a/src/btreemap/entry.rs +++ b/src/btreemap/entry.rs @@ -30,6 +30,7 @@ use crate::btreemap::node::{Node, NodeType}; use crate::{BTreeMap, Memory, Storable}; use std::borrow::Cow; +use std::marker::PhantomData; /// A view into a single entry of a [`BTreeMap`], which may either be occupied or vacant. /// @@ -66,6 +67,11 @@ pub struct OccupiedEntry<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M pub(crate) idx: usize, } +pub struct LazyValue { + bytes: Vec, + phantom_data: PhantomData, +} + impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> Entry<'a, K, V, M> { /// Returns a reference to this entry's key. pub fn key(&self) -> &K { @@ -133,10 +139,7 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> Entry<'a, /// map.entry(7).or_insert_with_key(|&k| k * 2); /// assert_eq!(map.get(&7), Some(14)); /// ``` - pub fn or_insert_with_key( - self, - default: impl FnOnce(&K) -> V, - ) -> OccupiedEntry<'a, K, V, M> { + pub fn or_insert_with_key(self, default: impl FnOnce(&K) -> V) -> OccupiedEntry<'a, K, V, M> { match self { Entry::Occupied(entry) => entry, Entry::Vacant(entry) => { @@ -229,7 +232,12 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> VacantEntr map.insert(key.clone(), value); let (node, result) = map.find_node_for_insert(&key); let idx = result.expect("key was just inserted"); - OccupiedEntry { map, key, node, idx } + OccupiedEntry { + map, + key, + node, + idx, + } } } } @@ -294,11 +302,11 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> OccupiedEn /// } /// assert_eq!(map.get(&1), Some(99)); /// ``` - pub fn insert(mut self, value: V) -> V { + pub fn insert(mut self, value: V) -> LazyValue { let old_bytes = self .map .update_value(&mut self.node, self.idx, value.into_bytes_checked()); - V::from_bytes(Cow::Owned(old_bytes)) + LazyValue::new(old_bytes) } /// Removes the entry from the map and returns the value that was stored. @@ -316,13 +324,13 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> OccupiedEn /// } /// assert!(map.is_empty()); /// ``` - pub fn remove(self) -> V { + pub fn remove(self) -> LazyValue { let bytes = match self.node.node_type() { NodeType::Leaf if self.node.can_remove_entry_without_merging() => { self.map.remove_from_leaf_node(self.node, self.idx) } NodeType::Leaf => { - // TODO: avoid this slow path by walking the tree upward from the leaf. + // TODO: avoid this slow path let root = self.map.load_node(self.map.root_addr); self.map .remove_helper(root, &self.key) @@ -332,7 +340,20 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> OccupiedEn .map .remove_from_internal_node(self.node, self.idx, &self.key), }; - V::from_bytes(Cow::Owned(bytes)) + LazyValue::new(bytes) + } +} + +impl LazyValue { + pub fn new(bytes: Vec) -> Self { + LazyValue { + bytes, + phantom_data: PhantomData, + } + } + + pub fn into_value(self) -> T { + T::from_bytes(Cow::Owned(self.bytes)) } } @@ -362,7 +383,7 @@ mod tests { panic!(); }; assert_eq!(i, e.get()); - let old = e.insert(i + 1); + let old = e.insert(i + 1).into_value(); assert_eq!(old, i); } @@ -371,7 +392,7 @@ mod tests { panic!(); }; assert_eq!(i + 1, e.get()); - let removed = e.remove(); + let removed = e.remove().into_value(); assert_eq!(removed, i + 1); } @@ -452,7 +473,7 @@ mod tests { let Entry::Occupied(e) = map.entry(1) else { panic!(); }; - assert_eq!(e.insert(99), 10); + assert_eq!(e.insert(99).into_value(), 10); assert_eq!(map.get(&1), Some(99)); } @@ -463,7 +484,7 @@ mod tests { let Entry::Occupied(e) = map.entry(1) else { panic!(); }; - assert_eq!(e.remove(), 42); + assert_eq!(e.remove().into_value(), 42); assert!(map.is_empty()); } From ca48b9aabb211a464c19981388530eedca46be64 Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Tue, 31 Mar 2026 09:33:05 +0100 Subject: [PATCH 11/17] Update proptests --- src/btreemap/proptests.rs | 81 ++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/src/btreemap/proptests.rs b/src/btreemap/proptests.rs index 2103b7fb..077cf833 100644 --- a/src/btreemap/proptests.rs +++ b/src/btreemap/proptests.rs @@ -215,29 +215,38 @@ fn entry( for (key, operation) in keys.iter().copied().zip(operations.iter().copied()) { let entry = btree.entry(key); let std_entry = std_map.entry(key); - match (entry, std_entry) { - (Entry::Occupied(e), btree_map::Entry::Occupied(mut std_e)) => match operation { - 0 => { - e.insert(key); - std_e.insert(key); - } - 1 => { - let existing = e.get(); - let std_existing = *std_e.get(); - e.insert(existing.overflowing_add(1).0); - std_e.insert(std_existing.overflowing_add(1).0); + let occupied = matches!(entry, Entry::Occupied(_)); + let std_occupied = matches!(std_entry, btree_map::Entry::Occupied(_)); + assert_eq!(occupied, std_occupied); + + match operation { + 0 => { + entry.and_modify(|v| *v = key).or_insert(key); + std_entry.and_modify(|v| *v = key).or_insert(key); + } + 1 => { + entry.and_modify(|v| *v += 1).or_insert(key); + std_entry.and_modify(|v| *v += 1).or_insert(key); + } + 2 => { + match entry { + Entry::Occupied(e) => { + e.remove(); + } + Entry::Vacant(e) => { + e.insert(key); + } } - 2 => { - e.remove(); - std_e.remove(); + match std_entry { + btree_map::Entry::Occupied(e) => { + e.remove(); + } + btree_map::Entry::Vacant(e) => { + e.insert(key); + } } - _ => unreachable!(), - }, - (Entry::Vacant(e), btree_map::Entry::Vacant(std_e)) => { - e.insert(key); - std_e.insert(key); } - _ => prop_assert!(false, "Map out of sync with std lib BTreeMap"), + _ => unreachable!(), } } @@ -378,35 +387,27 @@ fn execute_operation( } } Operation::EntryInsertOrXor { key, value } => { - match std_btree.entry(key.clone()) { - btree_map::Entry::Occupied(mut entry) => { - let existing = entry.get().clone(); - let xor = existing + std_btree + .entry(key.clone()) + .and_modify(|existing| { + *existing = existing .into_iter() .zip(value.clone()) .map(|(l, r)| l.bitxor(r)) .collect::>(); - entry.insert(xor); - } - btree_map::Entry::Vacant(entry) => { - entry.insert(value.clone()); - } - }; + }) + .or_insert(value.clone()); - match btree.entry(key.clone()) { - Entry::Occupied(entry) => { - let existing = entry.get(); - let xor = existing + btree + .entry(key.clone()) + .and_modify(|existing| { + *existing = existing .into_iter() .zip(value.clone()) .map(|(l, r)| l.bitxor(r)) .collect::>(); - entry.insert(xor); - } - Entry::Vacant(entry) => { - entry.insert(value.clone()); - } - } + }) + .or_insert(value); assert_eq!(btree.get(&key).as_ref(), std_btree.get(&key)); } From 0d1bbd42b0a9b2118d507e4fbeebba2231e9e9d9 Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Tue, 31 Mar 2026 09:43:43 +0100 Subject: [PATCH 12/17] clippy --- src/btreemap/proptests.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/btreemap/proptests.rs b/src/btreemap/proptests.rs index 077cf833..787f054c 100644 --- a/src/btreemap/proptests.rs +++ b/src/btreemap/proptests.rs @@ -391,7 +391,7 @@ fn execute_operation( .entry(key.clone()) .and_modify(|existing| { *existing = existing - .into_iter() + .iter() .zip(value.clone()) .map(|(l, r)| l.bitxor(r)) .collect::>(); @@ -402,7 +402,7 @@ fn execute_operation( .entry(key.clone()) .and_modify(|existing| { *existing = existing - .into_iter() + .iter() .zip(value.clone()) .map(|(l, r)| l.bitxor(r)) .collect::>(); From fdf8571f838194fbf02234340e6512f96916c1ec Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Tue, 31 Mar 2026 15:45:41 +0100 Subject: [PATCH 13/17] Improve doc comments --- src/btreemap.rs | 46 ++++++++++++++++++++++++++++++++++++++----- src/btreemap/entry.rs | 38 ++++++++++++++++++++++++++++++----- 2 files changed, 74 insertions(+), 10 deletions(-) diff --git a/src/btreemap.rs b/src/btreemap.rs index 72de15c5..78b97348 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -519,11 +519,47 @@ where } } - /// Searches for the entry within the map where the key belongs. - /// The value returned is either a `VacantEntry` (if the key doesn't already exist), or an - /// `OccupiedEntry` (if the key does exist). - /// Once you have the entry you can `get` the value, `insert` a new value, or `remove` - /// the value. + /// Gets an [`Entry`](entry::Entry) for the given `key`, which gives efficient in-place access + /// to a map's entries, allowing inspection or modification without redundant key lookups. The + /// API mirrors [`std::collections::btree_map::Entry`] as closely as the stable-memory model + /// allows. + /// + /// Returns [`Entry::Occupied`](entry::Entry::Occupied) when `key` is already present, or + /// [`Entry::Vacant`](entry::Entry::Vacant) when it is not. Both variants expose methods to + /// read, overwrite, or remove the value. + /// + /// # Differences from `std::collections::BTreeMap::entry` + /// + /// The standard library's `or_insert` returns `&mut V`, giving a direct reference into the + /// map. Because values live in stable memory, long-lived references are not possible here. + /// Instead, `or_insert` (and its variants) return an [`OccupiedEntry`](entry::OccupiedEntry), + /// which lets you continue operating on the entry without a second key lookup. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// + /// // Insert a default value when the key is absent, then read it back. + /// let val = map.entry(1).or_insert(0).get(); + /// assert_eq!(val, 0); + /// + /// // Increment a counter, seeding it to 1 when first seen. + /// for key in [1, 1, 2, 3, 1] { + /// map.entry(key).and_modify(|v| *v += 1).or_insert(1); + /// } + /// assert_eq!(map.get(&1), Some(3)); + /// assert_eq!(map.get(&2), Some(1)); + /// assert_eq!(map.get(&3), Some(1)); + /// ``` + /// + /// # See also + /// + /// - [`entry::Entry`] — the type returned by this method + /// - [`entry::OccupiedEntry`] — methods available when the key is present + /// - [`entry::VacantEntry`] — methods available when the key is absent pub fn entry(&mut self, key: K) -> entry::Entry { // For an empty map the key is trivially absent. Avoid calling // `find_node_for_insert` here because that eagerly allocates a root diff --git a/src/btreemap/entry.rs b/src/btreemap/entry.rs index e40b827e..73746dba 100644 --- a/src/btreemap/entry.rs +++ b/src/btreemap/entry.rs @@ -67,6 +67,31 @@ pub struct OccupiedEntry<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M pub(crate) idx: usize, } +/// A value returned by [`OccupiedEntry::insert`] or [`OccupiedEntry::remove`] that has not +/// yet been deserialised. +/// +/// Deserialisation is deferred so that callers who do not need the previous value pay no +/// decode cost. Call [`into_value`](LazyValue::into_value) to obtain the concrete `T`. +/// +/// # Examples +/// +/// ```rust +/// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl, btreemap::entry::Entry}; +/// +/// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); +/// map.insert(1, 10); +/// +/// if let Entry::Occupied(e) = map.entry(1) { +/// // Discard the old value without deserialising it. +/// let _old: _ = e.insert(99); +/// } +/// +/// if let Entry::Occupied(e) = map.entry(1) { +/// // Deserialise only when the value is actually needed. +/// let old_value = e.insert(0).into_value(); +/// assert_eq!(old_value, 99); +/// } +/// ``` pub struct LazyValue { bytes: Vec, phantom_data: PhantomData, @@ -286,7 +311,8 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> OccupiedEn self } - /// Replaces the current value with `value` and returns the previous value. + /// Replaces the current value with `value` and returns the previous value as a + /// [`LazyValue`], which is only deserialised if you call [`LazyValue::into_value`]. /// /// # Examples /// @@ -297,7 +323,7 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> OccupiedEn /// map.insert(1, 10); /// /// if let Entry::Occupied(e) = map.entry(1) { - /// let old = e.insert(99); + /// let old = e.insert(99).into_value(); /// assert_eq!(old, 10); /// } /// assert_eq!(map.get(&1), Some(99)); @@ -309,7 +335,8 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> OccupiedEn LazyValue::new(old_bytes) } - /// Removes the entry from the map and returns the value that was stored. + /// Removes the entry from the map and returns the stored value as a [`LazyValue`], which + /// is only deserialised if you call [`LazyValue::into_value`]. /// /// # Examples /// @@ -320,7 +347,7 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> OccupiedEn /// map.insert(1, 42); /// /// if let Entry::Occupied(e) = map.entry(1) { - /// assert_eq!(e.remove(), 42); + /// assert_eq!(e.remove().into_value(), 42); /// } /// assert!(map.is_empty()); /// ``` @@ -345,13 +372,14 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> OccupiedEn } impl LazyValue { - pub fn new(bytes: Vec) -> Self { + pub(crate) fn new(bytes: Vec) -> Self { LazyValue { bytes, phantom_data: PhantomData, } } + /// Deserialises and returns the value. pub fn into_value(self) -> T { T::from_bytes(Cow::Owned(self.bytes)) } From 51f4a84202dcc231aa59f354196887903c030914 Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Tue, 31 Mar 2026 15:46:56 +0100 Subject: [PATCH 14/17] s -> z --- src/btreemap/entry.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/btreemap/entry.rs b/src/btreemap/entry.rs index 73746dba..4a810c0c 100644 --- a/src/btreemap/entry.rs +++ b/src/btreemap/entry.rs @@ -68,9 +68,9 @@ pub struct OccupiedEntry<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M } /// A value returned by [`OccupiedEntry::insert`] or [`OccupiedEntry::remove`] that has not -/// yet been deserialised. +/// yet been deserialized. /// -/// Deserialisation is deferred so that callers who do not need the previous value pay no +/// Deserialization is deferred so that callers who do not need the previous value pay no /// decode cost. Call [`into_value`](LazyValue::into_value) to obtain the concrete `T`. /// /// # Examples @@ -82,12 +82,12 @@ pub struct OccupiedEntry<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M /// map.insert(1, 10); /// /// if let Entry::Occupied(e) = map.entry(1) { -/// // Discard the old value without deserialising it. +/// // Discard the old value without deserializing it. /// let _old: _ = e.insert(99); /// } /// /// if let Entry::Occupied(e) = map.entry(1) { -/// // Deserialise only when the value is actually needed. +/// // Deserialize only when the value is actually needed. /// let old_value = e.insert(0).into_value(); /// assert_eq!(old_value, 99); /// } @@ -312,7 +312,7 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> OccupiedEn } /// Replaces the current value with `value` and returns the previous value as a - /// [`LazyValue`], which is only deserialised if you call [`LazyValue::into_value`]. + /// [`LazyValue`], which is only deserialized if you call [`LazyValue::into_value`]. /// /// # Examples /// @@ -336,7 +336,7 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> OccupiedEn } /// Removes the entry from the map and returns the stored value as a [`LazyValue`], which - /// is only deserialised if you call [`LazyValue::into_value`]. + /// is only deserialized if you call [`LazyValue::into_value`]. /// /// # Examples /// @@ -379,7 +379,7 @@ impl LazyValue { } } - /// Deserialises and returns the value. + /// Deserializes and returns the value. pub fn into_value(self) -> T { T::from_bytes(Cow::Owned(self.bytes)) } From 63c7c587c632cdd282489e06fb4f94bd2b6e6aa5 Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Wed, 1 Apr 2026 10:43:49 +0100 Subject: [PATCH 15/17] Revert `insert` back to how it was since "find node -> insert" is slower --- src/btreemap.rs | 195 +++++++++++++++++++++++++++++------------- src/btreemap/entry.rs | 8 +- 2 files changed, 140 insertions(+), 63 deletions(-) diff --git a/src/btreemap.rs b/src/btreemap.rs index 79eeadbe..cf2f1063 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -637,20 +637,109 @@ where pub fn insert(&mut self, key: K, value: V) -> Option { let value = value.into_bytes_checked(); - let (mut node, search_result) = self.find_node_for_insert(&key); + let root = if self.root_addr == NULL { + // No root present. Allocate one. + let node = self.allocate_node(NodeType::Leaf); + self.root_addr = node.address(); + self.save_header(); + node + } else { + // Load the root from memory. + let mut root = self.load_node(self.root_addr); + + // Check if the key already exists in the root. + if let Ok(idx) = root.search(&key, self.memory()) { + // Key found, replace its value and return the old one. + return Some(V::from_bytes(Cow::Owned( + self.update_value(&mut root, idx, value), + ))); + } - match search_result { - Ok(idx) => Some(V::from_bytes(Cow::Owned( - self.update_value(&mut node, idx, value), - ))), + // If the root is full, we need to introduce a new node as the root. + // + // NOTE: In the case where we are overwriting an existing key, then introducing + // a new root node isn't strictly necessary. However, that's a micro-optimization + // that adds more complexity than it's worth. + if root.is_full() { + // The root is full. Allocate a new node that will be used as the new root. + let mut new_root = self.allocate_node(NodeType::Internal); + + // The new root has the old root as its only child. + new_root.push_child(self.root_addr); + + // Update the root address. + self.root_addr = new_root.address(); + self.save_header(); + + // Split the old (full) root. + self.split_child(&mut new_root, 0); + + new_root + } else { + root + } + }; + + self.insert_nonfull(root, key, value) + .map(Cow::Owned) + .map(V::from_bytes) + } + + /// Inserts an entry into a node that is *not full*. + fn insert_nonfull(&mut self, mut node: Node, key: K, value: Vec) -> Option> { + // We're guaranteed by the caller that the provided node is not full. + assert!(!node.is_full()); + + // Look for the key in the node. + match node.search(&key, self.memory()) { + Ok(idx) => { + // Key found, replace its value and return the old one. + Some(self.update_value(&mut node, idx, value)) + } Err(idx) => { - node.insert_entry(idx, (key, value)); - self.save_node(&mut node); + // The key isn't in the node. `idx` is where that key should be inserted. - // Update the length. - self.length += 1; - self.save_header(); - None + match node.node_type() { + NodeType::Leaf => { + // The node is a non-full leaf. + // Insert the entry at the proper location. + node.insert_entry(idx, (key, value)); + self.save_node(&mut node); + + // Update the length. + self.length += 1; + self.save_header(); + + // No previous value to return. + None + } + NodeType::Internal => { + // The node is an internal node. + // Load the child that we should add the entry to. + let mut child = self.load_node(node.child(idx)); + + if child.is_full() { + // Check if the key already exists in the child. + if let Ok(idx) = child.search(&key, self.memory()) { + // Key found, replace its value and return the old one. + return Some(self.update_value(&mut child, idx, value)); + } + + // The child is full. Split the child. + self.split_child(&mut node, idx); + + // The children have now changed. Search again for + // the child where we need to store the entry in. + let idx = node.search(&key, self.memory()).unwrap_or_else(|idx| idx); + child = self.load_node(node.child(idx)); + } + + // The child should now be not full. + assert!(!child.is_full()); + + self.insert_nonfull(child, key, value) + } + } } } } @@ -710,7 +799,40 @@ where }); } - let (node, search_result) = self.find_node_for_insert(&key); + // Load the root from memory. + let mut root = self.load_node(self.root_addr); + + let (node, search_result) = { + // Check if the key already exists in the root. + if let Ok(idx) = root.search(&key, self.memory()) { + // Key found + (root, Ok(idx)) + } else { + // If the root is full, we need to introduce a new node as the root. + // + // NOTE: In the case where we are overwriting an existing key, then introducing + // a new root node isn't strictly necessary. However, that's a micro-optimization + // that adds more complexity than it's worth. + if root.is_full() { + // The root is full. Allocate a new node that will be used as the new root. + let mut new_root = self.allocate_node(NodeType::Internal); + + // The new root has the old root as its only child. + new_root.push_child(self.root_addr); + + // Update the root address. + self.root_addr = new_root.address(); + self.save_header(); + + // Split the old (full) root. + self.split_child(&mut new_root, 0); + + root = new_root; + } + + self.find_node_for_insert_nonfull(root, &key) + } + }; match search_result { Ok(idx) => entry::Entry::Occupied(entry::OccupiedEntry { @@ -727,53 +849,6 @@ where } } - /// Finds the node the key (and its corresponding value) should be inserted into - /// Return value is a tuple, the first value is the node the key should be inserted into, the - /// seconds value is a `Result` which is `Ok` if the key exists in the node, or `Err` if not. - /// The value in either case is the index at which the key should be inserted. - fn find_node_for_insert(&mut self, key: &K) -> (Node, Result) { - if self.root_addr == NULL { - // No root present. Allocate one. - let node = self.allocate_node(NodeType::Leaf); - self.root_addr = node.address(); - self.save_header(); - return (node, Err(0)); - } - - // Load the root from memory. - let mut root = self.load_node(self.root_addr); - - // Check if the key already exists in the root. - if let Ok(idx) = root.search(key, self.memory()) { - // Key found - return (root, Ok(idx)); - } - - // If the root is full, we need to introduce a new node as the root. - // - // NOTE: In the case where we are overwriting an existing key, then introducing - // a new root node isn't strictly necessary. However, that's a micro-optimization - // that adds more complexity than it's worth. - if root.is_full() { - // The root is full. Allocate a new node that will be used as the new root. - let mut new_root = self.allocate_node(NodeType::Internal); - - // The new root has the old root as its only child. - new_root.push_child(self.root_addr); - - // Update the root address. - self.root_addr = new_root.address(); - self.save_header(); - - // Split the old (full) root. - self.split_child(&mut new_root, 0); - - root = new_root; - } - - self.find_node_for_insert_nonfull(root, key) - } - /// Finds the node the key (and its corresponding value) should be inserted into /// /// PRECONDITION: @@ -1308,6 +1383,7 @@ where /// PRECONDITION /// - `node` is a leaf node /// - `node.entries_len() > 1` or node is the root node + #[inline(always)] fn remove_from_leaf_node(&mut self, mut node: Node, idx: usize) -> Vec { debug_assert_eq!(node.node_type(), NodeType::Leaf); @@ -1337,6 +1413,7 @@ where /// PRECONDITION /// - `node` is an internal node /// - `node` contains `key` at index `idx` + #[inline(always)] fn remove_from_internal_node(&mut self, mut node: Node, idx: usize, key: &K) -> Vec { debug_assert_eq!(node.node_type(), NodeType::Internal); debug_assert_eq!(node.search(key, self.memory()), Ok(idx)); diff --git a/src/btreemap/entry.rs b/src/btreemap/entry.rs index 4a810c0c..2b4ee884 100644 --- a/src/btreemap/entry.rs +++ b/src/btreemap/entry.rs @@ -251,17 +251,17 @@ impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> VacantEntr } None => { // The map was empty when `entry()` was called. Delegate to the regular - // insert path which handles root allocation, then find the new entry. + // insert path which handles root allocation, then set the node and idx of the + // new `OccupiedEntry` to the root node and index 0. let map = self.map; let key = self.key; map.insert(key.clone(), value); - let (node, result) = map.find_node_for_insert(&key); - let idx = result.expect("key was just inserted"); + let node = map.load_node(map.root_addr); OccupiedEntry { map, key, node, - idx, + idx: 0, } } } From 13f74c30bd160d08a6371238ab1a539221c48307 Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Wed, 1 Apr 2026 10:44:47 +0100 Subject: [PATCH 16/17] Remove these --- src/btreemap.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/btreemap.rs b/src/btreemap.rs index cf2f1063..b4ab86c9 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -1383,7 +1383,6 @@ where /// PRECONDITION /// - `node` is a leaf node /// - `node.entries_len() > 1` or node is the root node - #[inline(always)] fn remove_from_leaf_node(&mut self, mut node: Node, idx: usize) -> Vec { debug_assert_eq!(node.node_type(), NodeType::Leaf); @@ -1413,7 +1412,6 @@ where /// PRECONDITION /// - `node` is an internal node /// - `node` contains `key` at index `idx` - #[inline(always)] fn remove_from_internal_node(&mut self, mut node: Node, idx: usize, key: &K) -> Vec { debug_assert_eq!(node.node_type(), NodeType::Internal); debug_assert_eq!(node.search(key, self.memory()), Ok(idx)); From f0a56ce61b48e07885fb9d41a879d373be63358e Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Wed, 1 Apr 2026 15:57:11 +0100 Subject: [PATCH 17/17] Deduplicate by passing callback into `find_node_for_insert` --- src/btreemap.rs | 212 ++++++++++++++++++++---------------------------- 1 file changed, 86 insertions(+), 126 deletions(-) diff --git a/src/btreemap.rs b/src/btreemap.rs index b4ab86c9..e9092a03 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -655,93 +655,25 @@ where ))); } - // If the root is full, we need to introduce a new node as the root. - // // NOTE: In the case where we are overwriting an existing key, then introducing // a new root node isn't strictly necessary. However, that's a micro-optimization // that adds more complexity than it's worth. - if root.is_full() { - // The root is full. Allocate a new node that will be used as the new root. - let mut new_root = self.allocate_node(NodeType::Internal); - - // The new root has the old root as its only child. - new_root.push_child(self.root_addr); - - // Update the root address. - self.root_addr = new_root.address(); - self.save_header(); - - // Split the old (full) root. - self.split_child(&mut new_root, 0); - - new_root - } else { - root - } + self.split_root_if_full(root) }; - self.insert_nonfull(root, key, value) - .map(Cow::Owned) - .map(V::from_bytes) - } - - /// Inserts an entry into a node that is *not full*. - fn insert_nonfull(&mut self, mut node: Node, key: K, value: Vec) -> Option> { - // We're guaranteed by the caller that the provided node is not full. - assert!(!node.is_full()); - - // Look for the key in the node. - match node.search(&key, self.memory()) { - Ok(idx) => { - // Key found, replace its value and return the old one. - Some(self.update_value(&mut node, idx, value)) - } - Err(idx) => { - // The key isn't in the node. `idx` is where that key should be inserted. - - match node.node_type() { - NodeType::Leaf => { - // The node is a non-full leaf. - // Insert the entry at the proper location. - node.insert_entry(idx, (key, value)); - self.save_node(&mut node); - - // Update the length. - self.length += 1; - self.save_header(); - - // No previous value to return. - None - } - NodeType::Internal => { - // The node is an internal node. - // Load the child that we should add the entry to. - let mut child = self.load_node(node.child(idx)); - - if child.is_full() { - // Check if the key already exists in the child. - if let Ok(idx) = child.search(&key, self.memory()) { - // Key found, replace its value and return the old one. - return Some(self.update_value(&mut child, idx, value)); - } - - // The child is full. Split the child. - self.split_child(&mut node, idx); - - // The children have now changed. Search again for - // the child where we need to store the entry in. - let idx = node.search(&key, self.memory()).unwrap_or_else(|idx| idx); - child = self.load_node(node.child(idx)); - } - - // The child should now be not full. - assert!(!child.is_full()); - - self.insert_nonfull(child, key, value) - } - } + self.find_node_for_insert(root, key, move |map, mut node, idx, key, key_exists| { + if key_exists { + Some(map.update_value(&mut node, idx, value)) + } else { + node.insert_entry(idx, (key, value)); + map.save_node(&mut node); + map.length += 1; + map.save_header(); + None } - } + }) + .map(Cow::Owned) + .map(V::from_bytes) } /// Gets an [`Entry`](entry::Entry) for the given `key`, which gives efficient in-place access @@ -802,37 +734,27 @@ where // Load the root from memory. let mut root = self.load_node(self.root_addr); - let (node, search_result) = { - // Check if the key already exists in the root. - if let Ok(idx) = root.search(&key, self.memory()) { - // Key found - (root, Ok(idx)) - } else { - // If the root is full, we need to introduce a new node as the root. - // - // NOTE: In the case where we are overwriting an existing key, then introducing - // a new root node isn't strictly necessary. However, that's a micro-optimization - // that adds more complexity than it's worth. - if root.is_full() { - // The root is full. Allocate a new node that will be used as the new root. - let mut new_root = self.allocate_node(NodeType::Internal); - - // The new root has the old root as its only child. - new_root.push_child(self.root_addr); - - // Update the root address. - self.root_addr = new_root.address(); - self.save_header(); - - // Split the old (full) root. - self.split_child(&mut new_root, 0); - - root = new_root; - } + // Check if the key already exists in the root. + if let Ok(idx) = root.search(&key, self.memory()) { + // Key found + return entry::Entry::Occupied(entry::OccupiedEntry { + map: self, + key, + node: root, + idx, + }); + } - self.find_node_for_insert_nonfull(root, &key) - } - }; + root = self.split_root_if_full(root); + + let (key, node, search_result) = + self.find_node_for_insert(root, key, |_, node, idx, key, key_exists| { + if key_exists { + (key, node, Ok(idx)) + } else { + (key, node, Err(idx)) + } + }); match search_result { Ok(idx) => entry::Entry::Occupied(entry::OccupiedEntry { @@ -849,23 +771,61 @@ where } } - /// Finds the node the key (and its corresponding value) should be inserted into + /// Ensures the root node is not full, called before an insertion. /// - /// PRECONDITION: - /// - `node` is not full. - fn find_node_for_insert_nonfull( + /// If `root` is full, a new internal node is allocated, made the parent of the old + /// root, and the old root is split into two children. The new (non-full) root is + /// returned. If `root` is not full it is returned unchanged. + fn split_root_if_full(&mut self, root: Node) -> Node { + if !root.is_full() { + return root; + } + + // Allocate a new node that will become the new root. + let mut new_root = self.allocate_node(NodeType::Internal); + + // The new root has the old root as its only child. + new_root.push_child(self.root_addr); + + // Persist the updated root address before splitting. + self.root_addr = new_root.address(); + self.save_header(); + + // Split the old (full) root into two children of new_root. + self.split_child(&mut new_root, 0); + + new_root + } + + /// Core B-tree insertion traversal, shared by [`BTreeMap::insert`] and [`BTreeMap::entry`]. + /// + /// Descends from `node` (which must not be full) to the leaf or internal node where + /// `key` either already exists or should be inserted, then invokes `callback` with: + /// + /// * `map` — mutable access to the map (for saving nodes, updating length, etc.) + /// * `node` — the target node + /// * `idx` — the relevant slot index within `node` + /// * `key` — the key being inserted + /// * `key_exists` — `true` if `key` is already present at `idx`; `false` if `idx` is + /// the position where `key` should be inserted + /// + /// The callback's return value is propagated back to the caller. + /// + /// PRECONDITION: `node` is not full. + fn find_node_for_insert( &mut self, mut node: Node, - key: &K, - ) -> (Node, Result) { + key: K, + callback: impl FnOnce(&mut Self, Node, usize, K, bool) -> R, + ) -> R { // We're guaranteed by the caller that the provided node is not full. assert!(!node.is_full()); // Look for the key in the node. - match node.search(key, self.memory()) { + match node.search(&key, self.memory()) { Ok(idx) => { - // Key found - (node, Ok(idx)) + // Key found. + callback(self, node, idx, key, true) } Err(idx) => { // The key isn't in the node. `idx` is where that key should be inserted. @@ -873,7 +833,7 @@ where match node.node_type() { NodeType::Leaf => { // The node is a non-full leaf. - (node, Err(idx)) + callback(self, node, idx, key, false) } NodeType::Internal => { // The node is an internal node. @@ -882,9 +842,9 @@ where if child.is_full() { // Check if the key already exists in the child. - if let Ok(idx) = child.search(key, self.memory()) { - // Key found - return (child, Ok(idx)); + if let Ok(idx) = child.search(&key, self.memory()) { + // Key found. + return callback(self, child, idx, key, true); } // The child is full. Split the child. @@ -892,14 +852,14 @@ where // The children have now changed. Search again for // the child where we need to store the entry in. - let idx = node.search(key, self.memory()).unwrap_or_else(|idx| idx); + let idx = node.search(&key, self.memory()).unwrap_or_else(|idx| idx); child = self.load_node(node.child(idx)); } // The child should now be not full. assert!(!child.is_full()); - self.find_node_for_insert_nonfull(child, key) + self.find_node_for_insert(child, key, callback) } } }