[btree_builder] Fix reference counts of btree nodes

A leaf node should be counted only if it is referenced by some internal
nodes, since the leaves generated by <def> sections might be unshifted
and merged with exclusive mappings or other shared defs, or they might
not even be used by any of the devices if the xml was tampered. The
internal nodes should be handled in the same manner.

The new 'reserve' operation is designed for this purpose. Applications
could reserve a block for later use, without setting its ref count
immediately. This way saves the hassles of tracking unused leaves.
This commit is contained in:
Ming-Hung Tsai 2021-06-09 00:28:55 +08:00
parent 9e061a03a8
commit 6d16c5816e
3 changed files with 86 additions and 24 deletions

View File

@ -36,10 +36,16 @@ impl<Value> RefCounter<Value> for NoopRC {
}
/// Wraps a space map up to become a RefCounter.
struct SMRefCounter {
pub struct SMRefCounter {
sm: Arc<Mutex<dyn SpaceMap>>,
}
impl SMRefCounter {
pub fn new(sm: Arc<Mutex<dyn SpaceMap>>) -> SMRefCounter {
SMRefCounter { sm }
}
}
impl RefCounter<u64> for SMRefCounter {
fn get(&self, v: &u64) -> Result<u32> {
self.sm.lock().unwrap().get(*v)
@ -135,12 +141,16 @@ pub struct WriteResult {
loc: u64,
}
/// Write a node to a free metadata block.
fn write_node_<V: Unpack + Pack>(w: &mut WriteBatcher, mut node: Node<V>) -> Result<WriteResult> {
/// Write a node to a free metadata block, and mark the block as reserved,
/// without increasing its reference count.
fn write_reserved_node_<V: Unpack + Pack>(
w: &mut WriteBatcher,
mut node: Node<V>,
) -> Result<WriteResult> {
let keys = node.get_keys();
let first_key = *keys.first().unwrap_or(&0u64);
let b = w.alloc()?;
let b = w.reserve()?;
node.set_block(b.loc);
let mut cursor = Cursor::new(b.get_data());
@ -177,7 +187,7 @@ impl<V: Unpack + Pack> NodeIO<V> for LeafIO {
values,
};
write_node_(w, node)
write_reserved_node_(w, node)
}
fn read(&self, w: &mut WriteBatcher, block: u64) -> Result<(Vec<u64>, Vec<V>)> {
@ -210,7 +220,7 @@ impl NodeIO<u64> for InternalIO {
values,
};
write_node_(w, node)
write_reserved_node_(w, node)
}
fn read(&self, w: &mut WriteBatcher, block: u64) -> Result<(Vec<u64>, Vec<u64>)> {
@ -314,7 +324,6 @@ impl<'a, V: Pack + Unpack + Clone> NodeBuilder<V> {
// Add the remaining nodes.
for i in 1..nodes.len() {
let n = nodes.get(i).unwrap();
w.sm.lock().unwrap().inc(n.block, 1)?;
self.nodes.push(n.clone());
}
} else {
@ -323,7 +332,6 @@ impl<'a, V: Pack + Unpack + Clone> NodeBuilder<V> {
// add the nodes
for n in nodes {
w.sm.lock().unwrap().inc(n.block, 1)?;
self.nodes.push(n.clone());
}
}
@ -425,7 +433,6 @@ impl<'a, V: Pack + Unpack + Clone> NodeBuilder<V> {
fn unshift_node(&mut self, w: &mut WriteBatcher) -> Result<()> {
let ls = self.nodes.pop().unwrap();
let (keys, values) = self.read_node(w, ls.block)?;
w.sm.lock().unwrap().dec(ls.block)?;
let mut vals = VecDeque::new();
@ -473,7 +480,7 @@ impl<V: Unpack + Pack + Clone> Builder<V> {
while nodes.len() > 1 {
let mut builder = NodeBuilder::new(
Box::new(InternalIO {}),
Box::new(SMRefCounter { sm: w.sm.clone() }),
Box::new(SMRefCounter::new(w.sm.clone())),
);
for n in nodes {
@ -484,7 +491,14 @@ impl<V: Unpack + Pack + Clone> Builder<V> {
}
assert!(nodes.len() == 1);
Ok(nodes[0].block)
// The root is expected to be referenced by only one parent,
// hence the ref count is increased before the availability
// of it's parent.
let root = nodes[0].block;
w.sm.lock().unwrap().inc(root, 1)?;
Ok(root)
}
}

View File

@ -330,7 +330,6 @@ pub fn restore(opts: ThinRestoreOptions) -> Result<()> {
// Build data space map
let data_sm_root = build_data_sm(&mut w, pass.data_sm.lock().unwrap().deref())?;
// FIXME: I think we need to decrement the shared leaves
// Build metadata space map
let metadata_sm_root = build_metadata_sm(&mut w)?;

View File

@ -1,5 +1,6 @@
use anyhow::{anyhow, Result};
use std::collections::BTreeSet;
use std::ops::DerefMut;
use std::sync::{Arc, Mutex};
use crate::checksum;
@ -17,7 +18,33 @@ pub struct WriteBatcher {
batch_size: usize,
queue: Vec<Block>,
// The actual blocks allocated or reserved by this WriteBatcher
allocations: BTreeSet<u64>,
// The reserved range covers all the blocks allocated or reserved by this
// WriteBatcher, and the blocks already occupied. No blocks in this range
// are expected to be freed, hence a single range is used for the representation.
reserved: std::ops::Range<u64>,
}
pub fn find_free(sm: &mut dyn SpaceMap, reserved: &std::ops::Range<u64>) -> Result<u64> {
let nr_blocks = sm.get_nr_blocks()?;
let mut b;
if reserved.end >= reserved.start {
b = sm.find_free(reserved.end, nr_blocks)?;
if b.is_none() {
b = sm.find_free(0, reserved.start)?;
}
} else {
b = sm.find_free(reserved.end, reserved.start)?;
}
if b.is_none() {
return Err(anyhow!("out of metadata space"));
}
Ok(b.unwrap())
}
impl WriteBatcher {
@ -26,37 +53,59 @@ impl WriteBatcher {
sm: Arc<Mutex<dyn SpaceMap>>,
batch_size: usize,
) -> WriteBatcher {
let alloc_begin = sm.lock().unwrap().get_alloc_begin().unwrap_or(0);
WriteBatcher {
engine,
sm,
batch_size,
queue: Vec::with_capacity(batch_size),
allocations: BTreeSet::new(),
reserved: std::ops::Range {
start: alloc_begin,
end: alloc_begin,
},
}
}
pub fn alloc(&mut self) -> Result<Block> {
let mut sm = self.sm.lock().unwrap();
let b = sm.alloc()?;
let b = find_free(sm.deref_mut(), &self.reserved)?;
self.reserved.end = b + 1;
self.allocations.insert(b);
if b.is_none() {
return Err(anyhow!("out of metadata space"));
}
sm.set(b, 1)?;
self.allocations.insert(b.unwrap());
Ok(Block::new(b.unwrap()))
Ok(Block::new(b))
}
pub fn alloc_zeroed(&mut self) -> Result<Block> {
let mut sm = self.sm.lock().unwrap();
let b = sm.alloc()?;
let b = find_free(sm.deref_mut(), &self.reserved)?;
self.reserved.end = b + 1;
self.allocations.insert(b);
if b.is_none() {
return Err(anyhow!("out of metadata space"));
sm.set(b, 1)?;
Ok(Block::zeroed(b))
}
self.allocations.insert(b.unwrap());
Ok(Block::zeroed(b.unwrap()))
pub fn reserve(&mut self) -> Result<Block> {
let mut sm = self.sm.lock().unwrap();
let b = find_free(sm.deref_mut(), &self.reserved)?;
self.reserved.end = b + 1;
self.allocations.insert(b);
Ok(Block::new(b))
}
pub fn reserve_zeroed(&mut self) -> Result<Block> {
let mut sm = self.sm.lock().unwrap();
let b = find_free(sm.deref_mut(), &self.reserved)?;
self.reserved.end = b + 1;
self.allocations.insert(b);
Ok(Block::zeroed(b))
}
pub fn clear_allocations(&mut self) -> BTreeSet<u64> {