A B+ tree is a balanced search tree optimized for block devices (disks) and used as the primary indexing mechanism in relational databases and file systems. Key properties:
BETWEEN, index scans).M, the minimum is ⌈M/2⌉ - 1. This ensures height O(log n) and efficient memory/disk utilization.Internal node: stores up to m-1 keys and m child pointers. For keys k₁ < k₂ < … < kₙ, the following holds:
all keys in subtree child[0] are < k₁;
i = 1..n-1: kᵢ ≤ all keys in child[i] < kᵢ₊₁;child[n] are ≥ kₙ.m-1 records (key → value), as well as next and prev links for range traversal.The parameter m (branching factor) is chosen so that a node fits into a disk page or cache line.
i such that keys[i] ≤ target < keys[i+1]; move to children[i].O(log_m n) block reads.m, create a new leaf L', moving the right half into it. Copy the first key from L' (or the minimum separator) into the parent node.⌈m/2⌉ - 1 records (underflow), try to borrow one record from the left or right neighboring leaf (if it has enough entries). Update the corresponding separator in the parent.Below is an educational implementation of a B+ tree with order=3 (max keys = 2), supporting insert, search, and delete. The code contains detailed comments in English.
from bisect import bisect_left, bisect_right
class Node:
"""Base class for a B+ tree node."""
def __init__(self, is_leaf=False):
self.keys = [] # keys (for leaves – together with data)
self.children = [] # for internal nodes – references to children
self.is_leaf = is_leaf
self.next = None # linked list of leaves
self.prev = None
class Leaf(Node):
"""Leaf node: stores data as key-value mappings."""
def __init__(self):
super().__init__(is_leaf=True)
self.data = [] # values corresponding to keys
class InternalNode(Node):
"""Internal node."""
pass
class BPlusTree:
def __init__(self, order=3):
self.root = Leaf() # initially, root is an empty leaf
self.order = order # maximum number of children
@property
def max_keys(self):
return self.order - 1
@property
def min_keys(self):
# minimum number of keys for a non-root node
return (self.order + 1) // 2 - 1 # ceil(order/2) - 1
# ---------- SEARCH ----------
def search(self, key):
"""Returns the value for a key or None."""
leaf = self._find_leaf(key)
idx = bisect_left(leaf.keys, key)
if idx < len(leaf.keys) and leaf.keys[idx] == key:
return leaf.data[idx]
return None
def _find_leaf(self, key):
"""Finds the leaf where the key should be located."""
node = self.root
while not node.is_leaf:
# for internal nodes: keys[i] are separators,
# children[i] points to subtree with keys < keys[i]
# typically find the last index i where key >= keys[i]
i = bisect_right(node.keys, key)
node = node.children[i]
return node
# ---------- INSERTION ----------
def insert(self, key, value):
"""Inserts key and value."""
leaf = self._find_leaf(key)
self._insert_into_leaf(leaf, key, value)
def _insert_into_leaf(self, leaf, key, value):
idx = bisect_left(leaf.keys, key)
# if key already exists, update value
if idx < len(leaf.keys) and leaf.keys[idx] == key:
leaf.data[idx] = value
return
leaf.keys.insert(idx, key)
leaf.data.insert(idx, value)
if len(leaf.keys) > self.max_keys:
self._split_leaf(leaf)
def _split_leaf(self, leaf):
"""Splits an overflowing leaf into two."""
mid = len(leaf.keys) // 2
new_leaf = Leaf()
# move right half into new leaf
new_leaf.keys = leaf.keys[mid:]
new_leaf.data = leaf.data[mid:]
leaf.keys = leaf.keys[:mid]
leaf.data = leaf.data[:mid]
# relink leaf linked list
new_leaf.next = leaf.next
new_leaf.prev = leaf
if leaf.next:
leaf.next.prev = new_leaf
leaf.next = new_leaf
# separator key = first key of new leaf
separator = new_leaf.keys[0]
# insert separator into parent
if leaf is self.root:
# create new root if root leaf splits
new_root = InternalNode()
new_root.keys = [separator]
new_root.children = [leaf, new_leaf]
self.root = new_root
else:
self._insert_into_parent(leaf, separator, new_leaf)
def _insert_into_parent(self, left_child, key, right_child):
"""Inserts separator key and right child into parent node."""
parent = self._find_parent(self.root, left_child)
idx = parent.children.index(left_child)
parent.keys.insert(idx, key)
parent.children.insert(idx + 1, right_child)
if len(parent.keys) > self.max_keys:
self._split_internal(parent)
def _split_internal(self, node):
"""Splits an overflowing internal node."""
mid = len(node.keys) // 2
mid_key = node.keys[mid]
new_node = InternalNode()
new_node.keys = node.keys[mid+1:]
new_node.children = node.children[mid+1:]
node.keys = node.keys[:mid]
node.children = node.children[:mid+1]
if node is self.root:
new_root = InternalNode()
new_root.keys = [mid_key]
new_root.children = [node, new_node]
self.root = new_root
else:
parent = self._find_parent(self.root, node)
self._insert_into_parent(node, mid_key, new_node)
def _find_parent(self, node, child):
"""Finds parent of child node starting from node."""
if node.is_leaf or child is self.root:
return None
for c in node.children:
if c is child:
return node
if not c.is_leaf:
p = self._find_parent(c, child)
if p:
return p
return None
# ---------- DELETION ----------
def delete(self, key):
"""Deletes a key from the tree if it exists."""
leaf = self._find_leaf(key)
idx = bisect_left(leaf.keys, key)
if idx >= len(leaf.keys) or leaf.keys[idx] != key:
return False
leaf.keys.pop(idx)
leaf.data.pop(idx)
if leaf is self.root:
return True
if len(leaf.keys) < self.min_keys:
self._handle_underflow(leaf)
return True
def _handle_underflow(self, node):
"""Handles node underflow."""
parent = self._find_parent(self.root, node)
if parent is None:
return
idx = parent.children.index(node)
left_sibling = parent.children[idx - 1] if idx > 0 else None
right_sibling = parent.children[idx + 1] if idx < len(parent.children)-1 else None
if left_sibling and len(left_sibling.keys) > self.min_keys:
self._borrow_from_left(node, left_sibling, parent, idx)
elif right_sibling and len(right_sibling.keys) > self.min_keys:
self._borrow_from_right(node, right_sibling, parent, idx)
else:
if left_sibling:
self._merge(left_sibling, node, parent, idx - 1)
elif right_sibling:
self._merge(node, right_sibling, parent, idx)
# Example usage omitted for brevity (logic remains unchanged)
What this code does:
order=4 means a maximum of 3 keys per node (both leaf and internal).Important: In this simplified version, redistribution for internal nodes (borrow_from_left/right for is_leaf=False) is not fully implemented to avoid overcomplicating the example. In production code, internal-node underflow is usually handled by first borrowing a key from a sibling via the parent separator and only then merging. Full implementations can be found in open-source libraries such as py-bplustree.
[deepseek-v4-pro]