diff --git a/benchmarks/btreemap/src/main.rs b/benchmarks/btreemap/src/main.rs index 87c01902..50a6bc97 100644 --- a/benchmarks/btreemap/src/main.rs +++ b/benchmarks/btreemap/src/main.rs @@ -1,6 +1,7 @@ use benchmarks::{random::Random, vec::UnboundedVecN}; use canbench_rs::{bench, bench_fn, BenchResult}; use candid::Principal; +use ic_stable_structures::btreemap::entry::Entry::Occupied; use ic_stable_structures::memory_manager::{MemoryId, MemoryManager}; use ic_stable_structures::{storable::Blob, BTreeMap, DefaultMemoryImpl, Memory, Storable}; use std::ops::Bound; @@ -396,6 +397,45 @@ pub fn btreemap_v2_get_10mib_values() -> BenchResult { }) } +#[bench(raw)] +pub fn btreemap_get_and_incr() -> BenchResult { + let count = 10000; + let mut btree = BTreeMap::new(DefaultMemoryImpl::default()); + let mut rng = Rng::from_seed(0); + let values = generate_random_kv::(count, &mut rng); + for (key, value) in values.iter().copied() { + btree.insert(key, value); + } + + bench_fn(|| { + for (key, _) in values { + let existing = btree.get(&key).unwrap(); + btree.insert(key, existing + 1); + } + }) +} + +#[bench(raw)] +pub fn btreemap_get_and_incr_via_entry() -> BenchResult { + let count = 10000; + let mut btree = BTreeMap::new(DefaultMemoryImpl::default()); + let mut rng = Rng::from_seed(0); + let values = generate_random_kv::(count, &mut rng); + for (key, value) in values.iter().copied() { + btree.insert(key, value); + } + + bench_fn(|| { + for (key, _) in values { + let Occupied(e) = btree.entry(key) else { + panic!() + }; + let existing = e.get(); + e.insert(existing + 1); + } + }) +} + // Benchmarks for `BTreeMap::contains_key`. // Reduced grid: contains_key traversal is identical to get, only skips value deserialization. bench_tests! { diff --git a/src/btreemap.rs b/src/btreemap.rs index 80d1dea6..e9092a03 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -49,6 +49,7 @@ //! ---------------------------------------- //! ``` mod allocator; +pub mod entry; mod iter; mod node; mod node_cache; @@ -60,7 +61,7 @@ use crate::{ }; use allocator::Allocator; pub use iter::Iter; -use node::{DerivedPageSize, Entry, Node, NodeType, PageSize, Version}; +use node::{DerivedPageSize, Node, NodeType, PageSize, Version}; use node_cache::NodeCache; pub use node_cache::NodeCacheMetrics; use std::borrow::Cow; @@ -654,46 +655,177 @@ where ))); } - // If the root is full, we need to introduce a new node as the root. - // // NOTE: In the case where we are overwriting an existing key, then introducing // a new root node isn't strictly necessary. However, that's a micro-optimization // that adds more complexity than it's worth. - if root.is_full() { - // The root is full. Allocate a new node that will be used as the new root. - let mut new_root = self.allocate_node(NodeType::Internal); + self.split_root_if_full(root) + }; + + self.find_node_for_insert(root, key, move |map, mut node, idx, key, key_exists| { + if key_exists { + Some(map.update_value(&mut node, idx, value)) + } else { + node.insert_entry(idx, (key, value)); + map.save_node(&mut node); + map.length += 1; + map.save_header(); + None + } + }) + .map(Cow::Owned) + .map(V::from_bytes) + } + + /// Gets an [`Entry`](entry::Entry) for the given `key`, which gives efficient in-place access + /// to a map's entries, allowing inspection or modification without redundant key lookups. The + /// API mirrors [`std::collections::btree_map::Entry`] as closely as the stable-memory model + /// allows. + /// + /// Returns [`Entry::Occupied`](entry::Entry::Occupied) when `key` is already present, or + /// [`Entry::Vacant`](entry::Entry::Vacant) when it is not. Both variants expose methods to + /// read, overwrite, or remove the value. + /// + /// # Differences from `std::collections::BTreeMap::entry` + /// + /// The standard library's `or_insert` returns `&mut V`, giving a direct reference into the + /// map. Because values live in stable memory, long-lived references are not possible here. + /// Instead, `or_insert` (and its variants) return an [`OccupiedEntry`](entry::OccupiedEntry), + /// which lets you continue operating on the entry without a second key lookup. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// + /// // Insert a default value when the key is absent, then read it back. + /// let val = map.entry(1).or_insert(0).get(); + /// assert_eq!(val, 0); + /// + /// // Increment a counter, seeding it to 1 when first seen. + /// for key in [1, 1, 2, 3, 1] { + /// map.entry(key).and_modify(|v| *v += 1).or_insert(1); + /// } + /// assert_eq!(map.get(&1), Some(3)); + /// assert_eq!(map.get(&2), Some(1)); + /// assert_eq!(map.get(&3), Some(1)); + /// ``` + /// + /// # See also + /// + /// - [`entry::Entry`] — the type returned by this method + /// - [`entry::OccupiedEntry`] — methods available when the key is present + /// - [`entry::VacantEntry`] — methods available when the key is absent + pub fn entry(&mut self, key: K) -> entry::Entry { + // For an empty map the key is trivially absent. Avoid calling + // `find_node_for_insert` here because that eagerly allocates a root + // node and persists `root_addr` to the header, which would leave the + // map in an inconsistent state if the returned `VacantEntry` is dropped + // without calling `insert`. + if self.root_addr == NULL { + return entry::Entry::Vacant(entry::VacantEntry { + map: self, + key, + location: None, + }); + } - // The new root has the old root as its only child. - new_root.push_child(self.root_addr); + // Load the root from memory. + let mut root = self.load_node(self.root_addr); + + // Check if the key already exists in the root. + if let Ok(idx) = root.search(&key, self.memory()) { + // Key found + return entry::Entry::Occupied(entry::OccupiedEntry { + map: self, + key, + node: root, + idx, + }); + } - // Update the root address. - self.root_addr = new_root.address(); - self.save_header(); + root = self.split_root_if_full(root); - // Split the old (full) root. - self.split_child(&mut new_root, 0); + let (key, node, search_result) = + self.find_node_for_insert(root, key, |_, node, idx, key, key_exists| { + if key_exists { + (key, node, Ok(idx)) + } else { + (key, node, Err(idx)) + } + }); + + match search_result { + Ok(idx) => entry::Entry::Occupied(entry::OccupiedEntry { + map: self, + key, + node, + idx, + }), + Err(idx) => entry::Entry::Vacant(entry::VacantEntry { + map: self, + key, + location: Some((node, idx)), + }), + } + } - new_root - } else { - root - } - }; + /// Ensures the root node is not full, called before an insertion. + /// + /// If `root` is full, a new internal node is allocated, made the parent of the old + /// root, and the old root is split into two children. The new (non-full) root is + /// returned. If `root` is not full it is returned unchanged. + fn split_root_if_full(&mut self, root: Node) -> Node { + if !root.is_full() { + return root; + } - self.insert_nonfull(root, key, value) - .map(Cow::Owned) - .map(V::from_bytes) + // Allocate a new node that will become the new root. + let mut new_root = self.allocate_node(NodeType::Internal); + + // The new root has the old root as its only child. + new_root.push_child(self.root_addr); + + // Persist the updated root address before splitting. + self.root_addr = new_root.address(); + self.save_header(); + + // Split the old (full) root into two children of new_root. + self.split_child(&mut new_root, 0); + + new_root } - /// Inserts an entry into a node that is *not full*. - fn insert_nonfull(&mut self, mut node: Node, key: K, value: Vec) -> Option> { + /// Core B-tree insertion traversal, shared by [`BTreeMap::insert`] and [`BTreeMap::entry`]. + /// + /// Descends from `node` (which must not be full) to the leaf or internal node where + /// `key` either already exists or should be inserted, then invokes `callback` with: + /// + /// * `map` — mutable access to the map (for saving nodes, updating length, etc.) + /// * `node` — the target node + /// * `idx` — the relevant slot index within `node` + /// * `key` — the key being inserted + /// * `key_exists` — `true` if `key` is already present at `idx`; `false` if `idx` is + /// the position where `key` should be inserted + /// + /// The callback's return value is propagated back to the caller. + /// + /// PRECONDITION: `node` is not full. + fn find_node_for_insert( + &mut self, + mut node: Node, + key: K, + callback: impl FnOnce(&mut Self, Node, usize, K, bool) -> R, + ) -> R { // We're guaranteed by the caller that the provided node is not full. assert!(!node.is_full()); // Look for the key in the node. match node.search(&key, self.memory()) { Ok(idx) => { - // Key found, replace its value and return the old one. - Some(self.update_value(&mut node, idx, value)) + // Key found. + callback(self, node, idx, key, true) } Err(idx) => { // The key isn't in the node. `idx` is where that key should be inserted. @@ -701,16 +833,7 @@ where match node.node_type() { NodeType::Leaf => { // The node is a non-full leaf. - // Insert the entry at the proper location. - node.insert_entry(idx, (key, value)); - self.save_node(&mut node); - - // Update the length. - self.length += 1; - self.save_header(); - - // No previous value to return. - None + callback(self, node, idx, key, false) } NodeType::Internal => { // The node is an internal node. @@ -720,8 +843,8 @@ where if child.is_full() { // Check if the key already exists in the child. if let Ok(idx) = child.search(&key, self.memory()) { - // Key found, replace its value and return the old one. - return Some(self.update_value(&mut child, idx, value)); + // Key found. + return callback(self, child, idx, key, true); } // The child is full. Split the child. @@ -736,7 +859,7 @@ where // The child should now be not full. assert!(!child.is_full()); - self.insert_nonfull(child, key, value) + self.find_node_for_insert(child, key, callback) } } } @@ -873,12 +996,12 @@ where } #[inline(always)] - fn first_entry_inner(&self, node: &Node) -> Entry { + fn first_entry_inner(&self, node: &Node) -> node::Entry { self.find_first_or_last(node, true, 0, |n, i, m| n.get_key_read_value_uncached(i, m)) } #[inline(always)] - fn last_entry_inner(&self, node: &Node) -> Entry { + fn last_entry_inner(&self, node: &Node) -> node::Entry { self.find_first_or_last(node, false, 0, |n, i, m| { n.get_key_read_value_uncached(i, m) }) @@ -1002,25 +1125,7 @@ where NodeType::Leaf => { match node.search(key, self.memory()) { Ok(idx) => { - // Case 1: The node is a leaf node and the key exists in it. - // This is the simplest case. The key is removed from the leaf. - let value = node.remove_entry(idx, self.memory()).1; - self.length -= 1; - - if node.entries_len() == 0 { - assert_eq!( - node.address(), self.root_addr, - "Removal can only result in an empty leaf node if that node is the root" - ); - - // Deallocate the empty node. - self.deallocate_node(node); - self.root_addr = NULL; - } else { - self.save_node(&mut node); - } - - self.save_header(); + let value = self.remove_from_leaf_node(node, idx); Some(value) } _ => None, // Key not found. @@ -1029,129 +1134,8 @@ where NodeType::Internal => { match node.search(key, self.memory()) { Ok(idx) => { - // Case 2: The node is an internal node and the key exists in it. - - let left_child = self.load_node(node.child(idx)); - if left_child.can_remove_entry_without_merging() { - // Case 2.a: A key can be removed from the left child without merging. - // - // parent - // [..., key, ...] - // / \ - // [left child] [...] - // / \ - // [...] [..., key predecessor] - // - // In this case, we replace `key` with the key's predecessor from the - // left child's subtree, then we recursively delete the key's - // predecessor for the following end result: - // - // parent - // [..., key predecessor, ...] - // / \ - // [left child] [...] - // / \ - // [...] [...] - - // Recursively delete the predecessor. - // TODO(EXC-1034): Do this in a single pass. - let predecessor = self.last_entry_inner(&left_child); - self.remove_helper(left_child, &predecessor.0)?; - - // Replace the `key` with its predecessor. - let (_, old_value) = node.swap_entry(idx, predecessor, self.memory()); - - // Save the parent node. - self.save_node(&mut node); - return Some(old_value); - } - - let right_child = self.load_node(node.child(idx + 1)); - if right_child.can_remove_entry_without_merging() { - // Case 2.b: A key can be removed from the right child without merging. - // - // parent - // [..., key, ...] - // / \ - // [...] [right child] - // / \ - // [key successor, ...] [...] - // - // In this case, we replace `key` with the key's successor from the - // right child's subtree, then we recursively delete the key's - // successor for the following end result: - // - // parent - // [..., key successor, ...] - // / \ - // [...] [right child] - // / \ - // [...] [...] - - // Recursively delete the successor. - // TODO(EXC-1034): Do this in a single pass. - let successor = self.first_entry_inner(&right_child); - self.remove_helper(right_child, &successor.0)?; - - // Replace the `key` with its successor. - let (_, old_value) = node.swap_entry(idx, successor, self.memory()); - - // Save the parent node. - self.save_node(&mut node); - return Some(old_value); - } - - // Case 2.c: Both the left and right child are at their minimum sizes. - // - // parent - // [..., key, ...] - // / \ - // [left child] [right child] - // - // In this case, we merge (left child, key, right child) into a single - // node. The result will look like this: - // - // parent - // [... ...] - // | - // [left child, `key`, right child] <= new child - // - // We then recurse on this new child to delete `key`. - // - // If `parent` becomes empty (which can only happen if it's the root), - // then `parent` is deleted and `new_child` becomes the new root. - assert!(left_child.at_minimum()); - assert!(right_child.at_minimum()); - - // Merge the right child into the left child. - let mut new_child = self.merge( - right_child, - left_child, - node.remove_entry(idx, self.memory()), - ); - - // Remove the right child from the parent node. - node.remove_child(idx + 1); - - if node.entries_len() == 0 { - // Can only happen if this node is root. - assert_eq!(node.address(), self.root_addr); - assert_eq!(node.child(0), new_child.address()); - assert_eq!(node.children_len(), 1); - - self.root_addr = new_child.address(); - - // Deallocate the root node. - self.deallocate_node(node); - self.save_header(); - } else { - self.save_node(&mut node); - } - - self.save_node(&mut new_child); - - // Recursively delete the key. - self.remove_helper(new_child, key) + let value = self.remove_from_internal_node(node, idx, key); + Some(value) } Err(idx) => { // Case 3: The node is an internal node and the key does NOT exist in it. @@ -1356,6 +1340,167 @@ where } } + /// PRECONDITION + /// - `node` is a leaf node + /// - `node.entries_len() > 1` or node is the root node + fn remove_from_leaf_node(&mut self, mut node: Node, idx: usize) -> Vec { + debug_assert_eq!(node.node_type(), NodeType::Leaf); + + // Case 1: The node is a leaf node and the key exists in it. + // This is the simplest case. The key is removed from the leaf. + let value = node.remove_entry(idx, self.memory()).1; + self.length -= 1; + + if node.entries_len() == 0 { + assert_eq!( + node.address(), + self.root_addr, + "Removal can only result in an empty leaf node if that node is the root" + ); + + // Deallocate the empty node. + self.deallocate_node(node); + self.root_addr = NULL; + } else { + self.save_node(&mut node); + } + + self.save_header(); + value + } + + /// PRECONDITION + /// - `node` is an internal node + /// - `node` contains `key` at index `idx` + fn remove_from_internal_node(&mut self, mut node: Node, idx: usize, key: &K) -> Vec { + debug_assert_eq!(node.node_type(), NodeType::Internal); + debug_assert_eq!(node.search(key, self.memory()), Ok(idx)); + + // Case 2: The node is an internal node and the key exists in it. + + let left_child = self.load_node(node.child(idx)); + if left_child.can_remove_entry_without_merging() { + // Case 2.a: A key can be removed from the left child without merging. + // + // parent + // [..., key, ...] + // / \ + // [left child] [...] + // / \ + // [...] [..., key predecessor] + // + // In this case, we replace `key` with the key's predecessor from the + // left child's subtree, then we recursively delete the key's + // predecessor for the following end result: + // + // parent + // [..., key predecessor, ...] + // / \ + // [left child] [...] + // / \ + // [...] [...] + + // Recursively delete the predecessor. + // TODO(EXC-1034): Do this in a single pass. + let predecessor = self.last_entry_inner(&left_child); + self.remove_helper(left_child, &predecessor.0).unwrap(); + + // Replace the `key` with its predecessor. + let (_, old_value) = node.swap_entry(idx, predecessor, self.memory()); + + // Save the parent node. + self.save_node(&mut node); + return old_value; + } + + let right_child = self.load_node(node.child(idx + 1)); + if right_child.can_remove_entry_without_merging() { + // Case 2.b: A key can be removed from the right child without merging. + // + // parent + // [..., key, ...] + // / \ + // [...] [right child] + // / \ + // [key successor, ...] [...] + // + // In this case, we replace `key` with the key's successor from the + // right child's subtree, then we recursively delete the key's + // successor for the following end result: + // + // parent + // [..., key successor, ...] + // / \ + // [...] [right child] + // / \ + // [...] [...] + + // Recursively delete the successor. + // TODO(EXC-1034): Do this in a single pass. + let successor = self.first_entry_inner(&right_child); + self.remove_helper(right_child, &successor.0).unwrap(); + + // Replace the `key` with its successor. + let (_, old_value) = node.swap_entry(idx, successor, self.memory()); + + // Save the parent node. + self.save_node(&mut node); + return old_value; + } + + // Case 2.c: Both the left and right child are at their minimum sizes. + // + // parent + // [..., key, ...] + // / \ + // [left child] [right child] + // + // In this case, we merge (left child, key, right child) into a single + // node. The result will look like this: + // + // parent + // [... ...] + // | + // [left child, `key`, right child] <= new child + // + // We then recurse on this new child to delete `key`. + // + // If `parent` becomes empty (which can only happen if it's the root), + // then `parent` is deleted and `new_child` becomes the new root. + assert!(left_child.at_minimum()); + assert!(right_child.at_minimum()); + + // Merge the right child into the left child. + let mut new_child = self.merge( + right_child, + left_child, + node.remove_entry(idx, self.memory()), + ); + + // Remove the right child from the parent node. + node.remove_child(idx + 1); + + if node.entries_len() == 0 { + // Can only happen if this node is root. + assert_eq!(node.address(), self.root_addr); + assert_eq!(node.child(0), new_child.address()); + assert_eq!(node.children_len(), 1); + + self.root_addr = new_child.address(); + + // Deallocate the root node. + self.deallocate_node(node); + self.save_header(); + } else { + self.save_node(&mut node); + } + + self.save_node(&mut new_child); + + // Recursively delete the key. + self.remove_helper(new_child, key).unwrap() + } + /// Returns an iterator over the entries of the map, sorted by key. /// /// # Example @@ -1478,7 +1623,7 @@ where /// Output: /// [1, 2, 3, 4, 5, 6, 7] (stored in the `into` node) /// `source` is deallocated. - fn merge(&mut self, source: Node, mut into: Node, median: Entry) -> Node { + fn merge(&mut self, source: Node, mut into: Node, median: node::Entry) -> Node { let source_addr = source.address(); into.merge(source, median, &mut self.allocator); // Node::merge saves `into` and deallocates `source` directly through diff --git a/src/btreemap/entry.rs b/src/btreemap/entry.rs new file mode 100644 index 00000000..2b4ee884 --- /dev/null +++ b/src/btreemap/entry.rs @@ -0,0 +1,529 @@ +//! Entry API for [`BTreeMap`]. +//! +//! This module provides the [`Entry`] type, which gives efficient in-place access to a map's +//! entries, allowing inspection or modification without redundant key lookups. The API mirrors +//! [`std::collections::btree_map::Entry`] as closely as the stable-memory model allows. +//! +//! # Note on `or_insert` return type +//! +//! The standard library's `or_insert` returns `&mut V`, giving a direct reference into the +//! map. Because values in this [`BTreeMap`] live in stable memory, long-lived references are +//! not possible. Instead, `or_insert` (and its variants) return an [`OccupiedEntry`], which +//! lets you continue reading or modifying the entry without a second key lookup. +//! +//! # Examples +//! +//! ```rust +//! use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; +//! +//! let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); +//! +//! // Insert a value only when the key is absent. +//! map.entry(1).or_insert(42); +//! assert_eq!(map.get(&1), Some(42)); +//! +//! // Increment a counter, seeding it to 1 if absent. +//! map.entry(1).and_modify(|v| *v += 1).or_insert(1); +//! assert_eq!(map.get(&1), Some(43)); +//! ``` + +use crate::btreemap::node::{Node, NodeType}; +use crate::{BTreeMap, Memory, Storable}; +use std::borrow::Cow; +use std::marker::PhantomData; + +/// A view into a single entry of a [`BTreeMap`], which may either be occupied or vacant. +/// +/// This type is returned by [`BTreeMap::entry`]. +pub enum Entry<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> { + /// A vacant entry: the key is not present in the map. + Vacant(VacantEntry<'a, K, V, M>), + /// An occupied entry: the key is already present in the map. + Occupied(OccupiedEntry<'a, K, V, M>), +} + +/// A view into a vacant entry in a [`BTreeMap`]. +/// +/// Obtained from [`Entry::Vacant`]. +pub struct VacantEntry<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> { + pub(crate) map: &'a mut BTreeMap, + pub(crate) key: K, + /// Pre-computed insertion point from [`BTreeMap::entry`]. + /// + /// `None` when the map was empty at the time `entry` was called — the root + /// node had not yet been allocated, so we defer the full insert to + /// [`VacantEntry::insert`] to avoid corrupting the map if this entry is + /// dropped without inserting. + pub(crate) location: Option<(Node, usize)>, +} + +/// A view into an occupied entry in a [`BTreeMap`]. +/// +/// Obtained from [`Entry::Occupied`] or as the result of [`VacantEntry::insert`]. +pub struct OccupiedEntry<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> { + pub(crate) map: &'a mut BTreeMap, + pub(crate) key: K, + pub(crate) node: Node, + pub(crate) idx: usize, +} + +/// A value returned by [`OccupiedEntry::insert`] or [`OccupiedEntry::remove`] that has not +/// yet been deserialized. +/// +/// Deserialization is deferred so that callers who do not need the previous value pay no +/// decode cost. Call [`into_value`](LazyValue::into_value) to obtain the concrete `T`. +/// +/// # Examples +/// +/// ```rust +/// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl, btreemap::entry::Entry}; +/// +/// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); +/// map.insert(1, 10); +/// +/// if let Entry::Occupied(e) = map.entry(1) { +/// // Discard the old value without deserializing it. +/// let _old: _ = e.insert(99); +/// } +/// +/// if let Entry::Occupied(e) = map.entry(1) { +/// // Deserialize only when the value is actually needed. +/// let old_value = e.insert(0).into_value(); +/// assert_eq!(old_value, 99); +/// } +/// ``` +pub struct LazyValue { + bytes: Vec, + phantom_data: PhantomData, +} + +impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> Entry<'a, K, V, M> { + /// Returns a reference to this entry's key. + pub fn key(&self) -> &K { + match self { + Entry::Occupied(entry) => entry.key(), + Entry::Vacant(entry) => entry.key(), + } + } + + /// Consumes the entry and returns its key. + pub fn into_key(self) -> K { + match self { + Entry::Occupied(entry) => entry.into_key(), + Entry::Vacant(entry) => entry.into_key(), + } + } + + /// Ensures a value is present by inserting `default` if the entry is vacant, then returns + /// an [`OccupiedEntry`] for further operations. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// assert_eq!(map.entry(1).or_insert(10).get(), 10); + /// assert_eq!(map.entry(1).or_insert(99).get(), 10); // already present + /// ``` + pub fn or_insert(self, default: V) -> OccupiedEntry<'a, K, V, M> { + match self { + Entry::Occupied(entry) => entry, + Entry::Vacant(entry) => entry.insert(default), + } + } + + /// Ensures a value is present by inserting the result of `default` if the entry is vacant, + /// then returns an [`OccupiedEntry`]. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// map.entry(1).or_insert_with(|| 42u32); + /// assert_eq!(map.get(&1), Some(42)); + /// ``` + pub fn or_insert_with(self, default: impl FnOnce() -> V) -> OccupiedEntry<'a, K, V, M> { + match self { + Entry::Occupied(entry) => entry, + Entry::Vacant(entry) => entry.insert(default()), + } + } + + /// Ensures a value is present by inserting the result of `default`, called with the + /// entry's key, if the entry is vacant. Returns an [`OccupiedEntry`]. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// map.entry(7).or_insert_with_key(|&k| k * 2); + /// assert_eq!(map.get(&7), Some(14)); + /// ``` + pub fn or_insert_with_key(self, default: impl FnOnce(&K) -> V) -> OccupiedEntry<'a, K, V, M> { + match self { + Entry::Occupied(entry) => entry, + Entry::Vacant(entry) => { + let val = default(&entry.key); + entry.insert(val) + } + } + } + + /// Ensures a value is present by inserting `V::default()` if the entry is vacant, then + /// returns an [`OccupiedEntry`]. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// map.entry(1).or_default(); + /// assert_eq!(map.get(&1), Some(0u32)); + /// ``` + pub fn or_default(self) -> OccupiedEntry<'a, K, V, M> + where + V: Default, + { + self.or_insert_with(V::default) + } + + /// Provides in-place mutable access to an occupied entry before any potential inserts + /// via `or_insert` and friends. + /// + /// If the entry is vacant the closure is not called and the entry is returned unchanged, + /// making it possible to chain with `or_insert` and friends. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// map.insert(1, 10); + /// + /// // Increment existing value, or seed with 1 for a new key. + /// map.entry(1).and_modify(|v| *v += 1).or_insert(1); + /// assert_eq!(map.get(&1), Some(11)); + /// + /// map.entry(2).and_modify(|v| *v += 1).or_insert(1); + /// assert_eq!(map.get(&2), Some(1)); + /// ``` + pub fn and_modify(self, f: impl FnOnce(&mut V)) -> Self { + match self { + Entry::Occupied(entry) => Entry::Occupied(entry.and_modify(f)), + Entry::Vacant(entry) => Entry::Vacant(entry), + } + } +} + +impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> VacantEntry<'a, K, V, M> { + /// Returns a reference to the entry's key. + pub fn key(&self) -> &K { + &self.key + } + + /// Consumes the entry and returns its key. + pub fn into_key(self) -> K { + self.key + } + + /// Inserts `value` into the map at this entry's key and returns an [`OccupiedEntry`] + /// pointing at the newly inserted value. + pub fn insert(self, value: V) -> OccupiedEntry<'a, K, V, M> { + match self.location { + Some((mut node, idx)) => { + node.insert_entry(idx, (self.key.clone(), value.into_bytes_checked())); + self.map.save_node(&mut node); + self.map.length += 1; + self.map.save_header(); + OccupiedEntry { + map: self.map, + key: self.key, + node, + idx, + } + } + None => { + // The map was empty when `entry()` was called. Delegate to the regular + // insert path which handles root allocation, then set the node and idx of the + // new `OccupiedEntry` to the root node and index 0. + let map = self.map; + let key = self.key; + map.insert(key.clone(), value); + let node = map.load_node(map.root_addr); + OccupiedEntry { + map, + key, + node, + idx: 0, + } + } + } + } +} + +impl<'a, K: 'a + Storable + Ord + Clone, V: 'a + Storable, M: Memory> OccupiedEntry<'a, K, V, M> { + /// Returns a reference to the entry's key. + pub fn key(&self) -> &K { + &self.key + } + + /// Consumes the entry and returns its key. + pub fn into_key(self) -> K { + self.key + } + + /// Returns the current value associated with this entry. + pub fn get(&self) -> V { + let value_bytes = self.node.value(self.idx, self.map.memory()); + V::from_bytes(Cow::Borrowed(value_bytes)) + } + + /// Provides in-place mutable access to the value in this occupied entry. + /// + /// Reads the current value, calls `f` with a mutable reference to it, then writes the + /// modified value back. Returns `self` so the call can be chained. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl, btreemap::entry::Entry}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// map.insert(1, 10); + /// + /// if let Entry::Occupied(e) = map.entry(1) { + /// e.and_modify(|v| *v *= 2); + /// } + /// assert_eq!(map.get(&1), Some(20)); + /// ``` + pub fn and_modify(mut self, f: impl FnOnce(&mut V)) -> Self { + let mut value = self.get(); + f(&mut value); + self.map + .update_value(&mut self.node, self.idx, value.into_bytes_checked()); + self + } + + /// Replaces the current value with `value` and returns the previous value as a + /// [`LazyValue`], which is only deserialized if you call [`LazyValue::into_value`]. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl, btreemap::entry::Entry}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// map.insert(1, 10); + /// + /// if let Entry::Occupied(e) = map.entry(1) { + /// let old = e.insert(99).into_value(); + /// assert_eq!(old, 10); + /// } + /// assert_eq!(map.get(&1), Some(99)); + /// ``` + pub fn insert(mut self, value: V) -> LazyValue { + let old_bytes = self + .map + .update_value(&mut self.node, self.idx, value.into_bytes_checked()); + LazyValue::new(old_bytes) + } + + /// Removes the entry from the map and returns the stored value as a [`LazyValue`], which + /// is only deserialized if you call [`LazyValue::into_value`]. + /// + /// # Examples + /// + /// ```rust + /// use ic_stable_structures::{BTreeMap, DefaultMemoryImpl, btreemap::entry::Entry}; + /// + /// let mut map: BTreeMap = BTreeMap::new(DefaultMemoryImpl::default()); + /// map.insert(1, 42); + /// + /// if let Entry::Occupied(e) = map.entry(1) { + /// assert_eq!(e.remove().into_value(), 42); + /// } + /// assert!(map.is_empty()); + /// ``` + pub fn remove(self) -> LazyValue { + let bytes = match self.node.node_type() { + NodeType::Leaf if self.node.can_remove_entry_without_merging() => { + self.map.remove_from_leaf_node(self.node, self.idx) + } + NodeType::Leaf => { + // TODO: avoid this slow path + let root = self.map.load_node(self.map.root_addr); + self.map + .remove_helper(root, &self.key) + .expect("key must exist") + } + NodeType::Internal => self + .map + .remove_from_internal_node(self.node, self.idx, &self.key), + }; + LazyValue::new(bytes) + } +} + +impl LazyValue { + pub(crate) fn new(bytes: Vec) -> Self { + LazyValue { + bytes, + phantom_data: PhantomData, + } + } + + /// Deserializes and returns the value. + pub fn into_value(self) -> T { + T::from_bytes(Cow::Owned(self.bytes)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::cell::RefCell; + use std::rc::Rc; + + fn new_map() -> BTreeMap>>> { + BTreeMap::new(Rc::new(RefCell::new(Vec::new()))) + } + + #[test] + fn entry_end_to_end() { + let mut map = new_map(); + + for i in 0u32..100 { + let Entry::Vacant(e) = map.entry(i) else { + panic!(); + }; + e.insert(i); + } + + for i in 0u32..100 { + let Entry::Occupied(e) = map.entry(i) else { + panic!(); + }; + assert_eq!(i, e.get()); + let old = e.insert(i + 1).into_value(); + assert_eq!(old, i); + } + + for i in 0u32..100 { + let Entry::Occupied(e) = map.entry(i) else { + panic!(); + }; + assert_eq!(i + 1, e.get()); + let removed = e.remove().into_value(); + assert_eq!(removed, i + 1); + } + + assert!(map.is_empty()); + } + + #[test] + fn or_insert_vacant() { + let mut map = new_map(); + assert_eq!(map.entry(1).or_insert(42).get(), 42); + assert_eq!(map.get(&1), Some(42)); + } + + #[test] + fn or_insert_occupied() { + let mut map = new_map(); + map.insert(1, 10); + assert_eq!(map.entry(1).or_insert(99).get(), 10); // default ignored + } + + #[test] + fn or_insert_with() { + let mut map = new_map(); + map.entry(1).or_insert_with(|| 7u32); + assert_eq!(map.get(&1), Some(7)); + // closure is not called when key is present + map.entry(1) + .or_insert_with(|| panic!("should not be called")); + assert_eq!(map.get(&1), Some(7)); + } + + #[test] + fn or_insert_with_key() { + let mut map = new_map(); + map.entry(6).or_insert_with_key(|&k| k * 3); + assert_eq!(map.get(&6), Some(18)); + } + + #[test] + fn or_default() { + let mut map = new_map(); + map.entry(1).or_default(); + assert_eq!(map.get(&1), Some(0u32)); + } + + #[test] + fn and_modify_occupied() { + let mut map = new_map(); + map.insert(1, 10); + map.entry(1).and_modify(|v| *v += 5); + assert_eq!(map.get(&1), Some(15)); + } + + #[test] + fn and_modify_vacant() { + let mut map = new_map(); + // closure must not be called; map must stay empty + map.entry(1).and_modify(|_| panic!("should not be called")); + assert_eq!(map.get(&1), None); + } + + #[test] + fn and_modify_then_or_insert() { + let mut map = new_map(); + map.insert(1, 10u32); + + map.entry(1).and_modify(|v| *v += 1).or_insert(1); + assert_eq!(map.get(&1), Some(11)); + + map.entry(2).and_modify(|v| *v += 1).or_insert(1); + assert_eq!(map.get(&2), Some(1)); + } + + #[test] + fn occupied_insert_returns_old_value() { + let mut map = new_map(); + map.insert(1, 10); + let Entry::Occupied(e) = map.entry(1) else { + panic!(); + }; + assert_eq!(e.insert(99).into_value(), 10); + assert_eq!(map.get(&1), Some(99)); + } + + #[test] + fn occupied_remove_returns_value() { + let mut map = new_map(); + map.insert(1, 42); + let Entry::Occupied(e) = map.entry(1) else { + panic!(); + }; + assert_eq!(e.remove().into_value(), 42); + assert!(map.is_empty()); + } + + #[test] + fn or_insert_on_empty_map_then_drop() { + // Dropping a VacantEntry on an empty map without inserting must not corrupt the map. + let mut map = new_map(); + map.entry(1).and_modify(|_| panic!("should not be called")); + assert_eq!(map.get(&1), None); + // The map must still be usable after the drop. + map.insert(1, 99); + assert_eq!(map.get(&1), Some(99)); + } +} diff --git a/src/btreemap/proptests.rs b/src/btreemap/proptests.rs index 523c0b4c..f3431a5b 100644 --- a/src/btreemap/proptests.rs +++ b/src/btreemap/proptests.rs @@ -1,3 +1,4 @@ +use crate::btreemap::entry::Entry; use crate::{ btreemap::{ tests::{b, make_memory, run_btree_test}, @@ -9,7 +10,8 @@ use crate::{ use proptest::collection::btree_set as pset; use proptest::collection::vec as pvec; use proptest::prelude::*; -use std::collections::{BTreeMap as StdBTreeMap, BTreeSet}; +use std::collections::{btree_map, BTreeMap as StdBTreeMap, BTreeSet}; +use std::ops::BitXor; use test_strategy::proptest; #[derive(Debug, Clone)] @@ -21,6 +23,7 @@ enum Operation { Values { from: usize, len: usize }, Get(usize), Remove(usize), + EntryInsertOrXor { key: Vec, value: Vec }, Range { from: usize, len: usize }, PopLast, PopFirst, @@ -43,6 +46,8 @@ fn operation_strategy() -> impl Strategy { .prop_map(|(from, len)| Operation::Values { from, len }), 50 => (any::()).prop_map(Operation::Get), 15 => (any::()).prop_map(Operation::Remove), + 10 => (any::>(), any::>()) + .prop_map(|(key, value)| Operation::EntryInsertOrXor { key, value }), 5 => (any::(), any::()) .prop_map(|(from, len)| Operation::Range { from, len }), 2 => Just(Operation::PopFirst), @@ -204,6 +209,67 @@ fn no_memory_leaks(#[strategy(pvec(pvec(0..u8::MAX, 100..10_000), 100))] keys: V assert_eq!(btree.allocator.num_allocated_chunks(), 0); } +#[proptest] +fn entry( + #[strategy(pvec(0..255u8, 10))] keys: Vec, + #[strategy(pvec(0..3u8, 10))] operations: Vec, +) { + run_btree_test(|mut btree| { + let mut std_map = StdBTreeMap::new(); + + // Operations (if Occupied): + // 0 - insert + // 1 - increment + // 2 - remove + // + // Operations (if Vacant): + // - always insert + for (key, operation) in keys.iter().copied().zip(operations.iter().copied()) { + let entry = btree.entry(key); + let std_entry = std_map.entry(key); + let occupied = matches!(entry, Entry::Occupied(_)); + let std_occupied = matches!(std_entry, btree_map::Entry::Occupied(_)); + assert_eq!(occupied, std_occupied); + + match operation { + 0 => { + entry.and_modify(|v| *v = key).or_insert(key); + std_entry.and_modify(|v| *v = key).or_insert(key); + } + 1 => { + entry.and_modify(|v| *v += 1).or_insert(key); + std_entry.and_modify(|v| *v += 1).or_insert(key); + } + 2 => { + match entry { + Entry::Occupied(e) => { + e.remove(); + } + Entry::Vacant(e) => { + e.insert(key); + } + } + match std_entry { + btree_map::Entry::Occupied(e) => { + e.remove(); + } + btree_map::Entry::Vacant(e) => { + e.insert(key); + } + } + } + _ => unreachable!(), + } + } + + let entries: Vec<_> = btree.iter().map(|e| (*e.key(), e.value())).collect(); + let std_entries: Vec<_> = std_map.into_iter().collect(); + + prop_assert_eq!(entries, std_entries); + Ok(()) + }); +} + // Given an operation, executes it on the given stable btreemap and standard btreemap, verifying // that the result of the operation is equal in both btrees. fn execute_operation( @@ -332,6 +398,32 @@ fn execute_operation( assert_eq!(btree.remove(&k), Some(v)); } } + Operation::EntryInsertOrXor { key, value } => { + std_btree + .entry(key.clone()) + .and_modify(|existing| { + *existing = existing + .iter() + .zip(value.clone()) + .map(|(l, r)| l.bitxor(r)) + .collect::>(); + }) + .or_insert(value.clone()); + + btree + .entry(key.clone()) + .and_modify(|existing| { + *existing = existing + .iter() + .zip(value.clone()) + .map(|(l, r)| l.bitxor(r)) + .collect::>(); + }) + .or_insert(value); + + assert_eq!(btree.get(&key).as_ref(), std_btree.get(&key)); + } + Operation::Range { from, len } => { assert_eq!(std_btree.len(), btree.len() as usize); if std_btree.is_empty() {