From 861b2f21ff66413826e7ef46a0198f7c45269bb4 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 24 Jun 2020 13:32:47 +0100 Subject: [PATCH 01/17] [thin_shrink] Write thinp xml format reader and writer. Still need to tidy up the error handling. --- Cargo.lock | 17 ++ Cargo.toml | 2 + src/bin/thin_shrink.rs | 41 +++++ src/lib.rs | 2 + src/shrink/mod.rs | 3 + src/shrink/toplevel.rs | 22 +++ src/shrink/xml.rs | 353 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 440 insertions(+) create mode 100644 src/bin/thin_shrink.rs create mode 100644 src/shrink/mod.rs create mode 100644 src/shrink/toplevel.rs create mode 100644 src/shrink/xml.rs diff --git a/Cargo.lock b/Cargo.lock index 0ca6d78..508bd52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -21,6 +21,11 @@ dependencies = [ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "anyhow" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "arrayvec" version = "0.4.12" @@ -240,6 +245,14 @@ dependencies = [ "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "quick-xml" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "quickcheck" version = "0.9.2" @@ -380,6 +393,7 @@ dependencies = [ name = "thinp" version = "0.1.0" dependencies = [ + "anyhow 1.0.31 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)", "crc32c 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -390,6 +404,7 @@ dependencies = [ "num-derive 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "num-traits 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "num_cpus 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)", + "quick-xml 0.18.1 (registry+https://github.com/rust-lang/crates.io-index)", "quickcheck 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", "quickcheck_macros 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", @@ -456,6 +471,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" "checksum aho-corasick 0.7.10 (registry+https://github.com/rust-lang/crates.io-index)" = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada" "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +"checksum anyhow 1.0.31 (registry+https://github.com/rust-lang/crates.io-index)" = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f" "checksum arrayvec 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9" "checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" "checksum autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" @@ -484,6 +500,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum num_cpus 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" "checksum ppv-lite86 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea" "checksum proc-macro2 1.0.18 (registry+https://github.com/rust-lang/crates.io-index)" = "beae6331a816b1f65d04c45b078fd8e6c93e8071771f41b8163255bbd8d7c8fa" +"checksum quick-xml 0.18.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3cc440ee4802a86e357165021e3e255a9143724da31db1e2ea540214c96a0f82" "checksum quickcheck 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a44883e74aa97ad63db83c4bf8ca490f02b2fc02f92575e720c8551e843c945f" "checksum quickcheck_macros 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "608c156fd8e97febc07dc9c2e2c80bf74cfc6ef26893eae3daf8bc2bc94a4b7f" "checksum quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "54a21852a652ad6f610c9510194f398ff6f8692e334fd1145fed931f7fbe44ea" diff --git a/Cargo.toml b/Cargo.toml index e1e1c7c..cf59ab7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,11 +6,13 @@ edition = "2018" license = "GPL3" [dependencies] +anyhow = "1.0" byteorder = "1.3" clap = "2.33" crc32c = "0.4" flate2 = "1.0" libc = "0.2.71" +quick-xml = "0.18" nix = "0.17" nom = "5.1" num_cpus = "1.13" diff --git a/src/bin/thin_shrink.rs b/src/bin/thin_shrink.rs new file mode 100644 index 0000000..55632c5 --- /dev/null +++ b/src/bin/thin_shrink.rs @@ -0,0 +1,41 @@ +extern crate clap; +extern crate thinp; + +use clap::{App, Arg}; +use std::process::exit; +use thinp::file_utils; + +fn main() { + let parser = App::new("thin_shrink") + .version(thinp::version::TOOLS_VERSION) + .about("Rewrite xml metadata and move data in an inactive pool.") + .arg(Arg::with_name("INPUT") + .help("Specify thinp metadata xml file") + .required(true) + .long("input") + .value_name("INPUT") + .takes_value(true)) + .arg(Arg::with_name("OUTPUT") + .help("Specify output xml file") + .required(true) + .long("output") + .value_name("OUTPUT") + .takes_value(true)); + + let matches = parser.get_matches(); + + // FIXME: check these look like xml + let input_file = matches.value_of("INPUT").unwrap(); + let map_file = matches.value_of("MAP").unwrap(); + let output_file = matches.value_of("OUTPUT").unwrap(); + + if !file_utils::file_exists(input_file) { + eprintln!("Couldn't find input file '{}'.", &input_file); + exit(1); + } + + if let Err(reason) = thinp::shrink::toplevel::shrink(&input_file, &output_file) { + println!("Application error: {}\n", reason); + exit(1); + } +} diff --git a/src/lib.rs b/src/lib.rs index cf410c5..412f468 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +extern crate anyhow; extern crate byteorder; extern crate crc32c; extern crate flate2; @@ -18,4 +19,5 @@ pub mod block_manager; pub mod check; pub mod file_utils; pub mod pack; +pub mod shrink; pub mod version; diff --git a/src/shrink/mod.rs b/src/shrink/mod.rs new file mode 100644 index 0000000..a2c8345 --- /dev/null +++ b/src/shrink/mod.rs @@ -0,0 +1,3 @@ +pub mod toplevel; + +mod xml; diff --git a/src/shrink/toplevel.rs b/src/shrink/toplevel.rs new file mode 100644 index 0000000..0a647c2 --- /dev/null +++ b/src/shrink/toplevel.rs @@ -0,0 +1,22 @@ +use anyhow::Result; +use std::fs::OpenOptions; +use std::os::unix::fs::OpenOptionsExt; + +use crate::shrink::xml; + +//--------------------------------------- + +pub fn shrink(input_file: &str, _output_file: &str, _map_file: &str) -> Result<()> { + let input = OpenOptions::new() + .read(true) + .write(false) + .custom_flags(libc::O_EXCL) + .open(input_file)?; + + let mut visitor = xml::XmlWriter::new(std::io::stdout()); + xml::read(input, &mut visitor)?; + + Ok(()) +} + +//--------------------------------------- diff --git a/src/shrink/xml.rs b/src/shrink/xml.rs new file mode 100644 index 0000000..60cd3c4 --- /dev/null +++ b/src/shrink/xml.rs @@ -0,0 +1,353 @@ +use anyhow::Result; +use std::{ + borrow::{Cow}, + fmt::Display, + io::prelude::*, + io::BufReader, + io::Write, +}; + +use quick_xml::events::attributes::Attribute; +use quick_xml::events::{BytesEnd, BytesStart, Event}; +use quick_xml::{Reader, Writer}; + +//--------------------------------------- + +pub struct Superblock { + uuid: String, + time: u64, + transaction: u64, + flags: Option, + version: Option, + data_block_size: u32, + nr_data_blocks: u64, + metadata_snap: Option, +} + +pub struct Device { + dev_id: u32, + mapped_blocks: u64, + transaction: u64, + creation_time: u64, + snap_time: u64, +} + +pub struct Map { + thin_begin: u64, + data_begin: u64, + time: u32, + len: u64, +} + +pub trait MetadataVisitor { + fn superblock_b(&mut self, sb: &Superblock) -> Result<()>; + fn superblock_e(&mut self) -> Result<()>; + + fn device_b(&mut self, d: &Device) -> Result<()>; + fn device_e(&mut self) -> Result<()>; + + fn map(&mut self, m: Map) -> Result<()>; + + fn eof(&mut self) -> Result<()>; +} + +pub struct XmlWriter { + w: Writer, +} + +impl XmlWriter { + pub fn new(w: W) -> XmlWriter { + XmlWriter { w: Writer::new_with_indent(w, 0x20, 2) } + } +} + +fn mk_attr_<'a, T: Display>(n: T) -> Cow<'a, [u8]> { + let str = format!("{}", n); + Cow::Owned(str.into_bytes()) +} + +fn mk_attr<'a, T: Display>(key: &[u8], value: T) -> Attribute { + Attribute { + key, + value: mk_attr_(value), + } +} + +const XML_VERSION: u32 = 2; + +impl MetadataVisitor for XmlWriter { + fn superblock_b(&mut self, sb: &Superblock) -> Result<()> { + let tag = b"superblock"; + let mut elem = BytesStart::owned(tag.to_vec(), tag.len()); + elem.push_attribute(mk_attr(b"uuid", sb.uuid.clone())); + elem.push_attribute(mk_attr(b"time", sb.time)); + elem.push_attribute(mk_attr(b"transaction", sb.transaction)); + if let Some(flags) = sb.flags { + // FIXME: is this really a nr? + elem.push_attribute(mk_attr(b"flags", flags)); + } + + elem.push_attribute(mk_attr(b"version", XML_VERSION)); + elem.push_attribute(mk_attr(b"data_block_size", sb.data_block_size)); + elem.push_attribute(mk_attr(b"nr_data_blocks", sb.nr_data_blocks)); + + if let Some(snap) = sb.metadata_snap { + elem.push_attribute(mk_attr(b"metadata_snap", snap)); + } + + self.w.write_event(Event::Start(elem))?; + Ok(()) + } + + fn superblock_e(&mut self) -> Result<()> { + self.w + .write_event(Event::End(BytesEnd::borrowed(b"superblock")))?; + Ok(()) + } + + fn device_b(&mut self, d: &Device) -> Result<()> { + let tag = b"device"; + let mut elem = BytesStart::owned(tag.to_vec(), tag.len()); + elem.push_attribute(mk_attr(b"dev_id", d.dev_id)); + elem.push_attribute(mk_attr(b"mapped_blocks", d.mapped_blocks)); + elem.push_attribute(mk_attr(b"transaction", d.transaction)); + elem.push_attribute(mk_attr(b"creation_time", d.creation_time)); + elem.push_attribute(mk_attr(b"snap_time", d.snap_time)); + self.w.write_event(Event::Start(elem))?; + Ok(()) + } + + fn device_e(&mut self) -> Result<()> { + self.w + .write_event(Event::End(BytesEnd::borrowed(b"device")))?; + Ok(()) + } + + fn map(&mut self, m: Map) -> Result<()> { + match m.len { + 1 => { + let tag = b"single_mapping"; + let mut elem = BytesStart::owned(tag.to_vec(), tag.len()); + elem.push_attribute(mk_attr(b"origin_block", m.thin_begin)); + elem.push_attribute(mk_attr(b"data_block", m.data_begin)); + elem.push_attribute(mk_attr(b"time", m.time)); + self.w.write_event(Event::Empty(elem))?; + } + _ => { + let tag = b"range_mapping"; + let mut elem = BytesStart::owned(tag.to_vec(), tag.len()); + elem.push_attribute(mk_attr(b"origin_begin", m.thin_begin)); + elem.push_attribute(mk_attr(b"data_begin", m.data_begin)); + elem.push_attribute(mk_attr(b"length", m.len)); + elem.push_attribute(mk_attr(b"time", m.time)); + self.w.write_event(Event::Empty(elem))?; + } + } + Ok(()) + } + + fn eof(&mut self) -> Result<()> { + let w = self.w.inner(); + w.flush()?; + Ok(()) + } +} + +//--------------------------------------- + +// FIXME: nasty unwraps +fn string_val(kv: &Attribute) -> String { + let v = kv.unescaped_value().unwrap(); + let bytes = v.to_vec(); + String::from_utf8(bytes).unwrap() +} + +// FIXME: there's got to be a way of doing this without copying the string +fn u64_val(kv: &Attribute) -> Result { + let n = string_val(kv).parse::()?; + Ok(n) +} + +fn u32_val(kv: &Attribute) -> Result { + let n = string_val(kv).parse::()?; + Ok(n) +} + +fn bad_attr(_tag: &str, _attr: &[u8]) -> Result { + todo!(); +} + +fn missing_attr(_tag: &str, _attr: &str) -> Result { + todo!(); +} + +fn check_attr(tag: &str, name: &str, maybe_v: Option) -> Result { + match maybe_v { + None => missing_attr(tag, name), + Some(v) => Ok(v) + } +} + +fn parse_superblock(e: &BytesStart) -> Result { + let mut uuid: Option = None; + let mut time: Option = None; + let mut transaction: Option = None; + let mut flags: Option = None; + let mut version: Option = None; + let mut data_block_size: Option = None; + let mut nr_data_blocks: Option = None; + let mut metadata_snap: Option = None; + + for a in e.attributes() { + let kv = a.unwrap(); + match kv.key { + b"uuid" => uuid = Some(string_val(&kv)), + b"time" => time = Some(u64_val(&kv)?), + b"transaction" => transaction = Some(u64_val(&kv)?), + b"flags" => flags = Some(u32_val(&kv)?), + b"version" => version = Some(u32_val(&kv)?), + b"data_block_size" => data_block_size = Some(u32_val(&kv)?), + b"nr_data_blocks" => nr_data_blocks = Some(u64_val(&kv)?), + b"metadata_snap" => metadata_snap = Some(u64_val(&kv)?), + _ => return bad_attr("superblock", kv.key), + } + } + + let tag = "superblock"; + + Ok(Superblock { + uuid: check_attr(tag, "uuid", uuid)?, + time: check_attr(tag, "time", time)?, + transaction: check_attr(tag, "transaction", transaction)?, + flags: flags, + version: version, + data_block_size: check_attr(tag, "data_block_size", data_block_size)?, + nr_data_blocks: check_attr(tag, "nr_data_blocks", nr_data_blocks)?, + metadata_snap: metadata_snap, + }) +} + +fn parse_device(e: &BytesStart) -> Result { + let mut dev_id: Option = None; + let mut mapped_blocks: Option = None; + let mut transaction: Option = None; + let mut creation_time: Option = None; + let mut snap_time: Option = None; + + for a in e.attributes() { + let kv = a.unwrap(); + match kv.key { + b"dev_id" => dev_id = Some(u32_val(&kv)?), + b"mapped_blocks" => mapped_blocks = Some(u64_val(&kv)?), + b"transaction" => transaction = Some(u64_val(&kv)?), + b"creation_time" => creation_time = Some(u64_val(&kv)?), + b"snap_time" => snap_time = Some(u64_val(&kv)?), + _ => return bad_attr("device", kv.key), + } + } + + let tag = "device"; + + Ok(Device { + dev_id: check_attr(tag, "dev_id", dev_id)?, + mapped_blocks: check_attr(tag, "mapped_blocks", mapped_blocks)?, + transaction: check_attr(tag, "transaction", transaction)?, + creation_time: check_attr(tag, "creation_time", creation_time)?, + snap_time: check_attr(tag, "snap_time", snap_time)?, + }) +} + +fn parse_single_map(e: &BytesStart) -> Result { + let mut thin_begin: Option = None; + let mut data_begin: Option = None; + let mut time: Option = None; + + for a in e.attributes() { + let kv = a.unwrap(); + match kv.key { + b"origin_block" => thin_begin = Some(u64_val(&kv)?), + b"data_block" => data_begin = Some(u64_val(&kv)?), + b"time" => time = Some(u32_val(&kv)?), + _ => return bad_attr("single_mapping", kv.key), + } + } + + let tag = "single_mapping"; + + Ok(Map { + thin_begin: check_attr(tag, "origin_block", thin_begin)?, + data_begin: check_attr(tag, "data_block", data_begin)?, + time: check_attr(tag, "time", time)?, + len: 1 + }) +} + +fn parse_range_map(e: &BytesStart) -> Result { + let mut thin_begin: Option = None; + let mut data_begin: Option = None; + let mut time: Option = None; + let mut length: Option = None; + + for a in e.attributes() { + let kv = a.unwrap(); + match kv.key { + b"origin_begin" => thin_begin = Some(u64_val(&kv)?), + b"data_begin" => data_begin = Some(u64_val(&kv)?), + b"time" => time = Some(u32_val(&kv)?), + b"length" => length = Some(u64_val(&kv)?), + _ => return bad_attr("range_mapping", kv.key), + } + } + + let tag = "range_mapping"; + + Ok(Map { + thin_begin: check_attr(tag, "origin_begin", thin_begin)?, + data_begin: check_attr(tag, "data_begin", data_begin)?, + time: check_attr(tag, "time", time)?, + len: check_attr(tag, "length", length)?, + }) +} + +pub fn read(input: R, visitor: &mut M) -> Result<()> +where + R: Read, + M: MetadataVisitor, +{ + let input = BufReader::new(input); + let mut reader = Reader::from_reader(input); + + reader.trim_text(true); + let mut buf = Vec::new(); + + loop { + match reader.read_event(&mut buf) { + Ok(Event::Start(ref e)) => match e.name() { + b"superblock" => visitor.superblock_b(&parse_superblock(e)?)?, + b"device" => visitor.device_b(&parse_device(e)?)?, + _ => todo!(), + }, + Ok(Event::End(ref e)) => match e.name() { + b"superblock" => visitor.superblock_e()?, + b"device" => visitor.device_e()?, + _ => todo!(), + }, + Ok(Event::Empty(ref e)) => match e.name() { + b"single_mapping" => visitor.map(parse_single_map(e)?)?, + b"range_mapping" => visitor.map(parse_range_map(e)?)?, + _ => todo!(), + }, + Ok(Event::Text(_)) => {} + Ok(Event::Comment(_)) => {} + Ok(Event::Eof) => break, + Ok(_) => todo!(), + + // FIXME: don't panic! + Err(e) => panic!("error parsing xml {:?}", e), + } + } + + Ok(()) +} + +//--------------------------------------- From 3f1b776359c2b344a26032947290e357eb2efb07 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 24 Jun 2020 13:55:08 +0100 Subject: [PATCH 02/17] [thin_shrink] Add NoopVisitor --- src/bin/thin_shrink.rs | 1 - src/shrink/toplevel.rs | 5 +++-- src/shrink/xml.rs | 19 +++++++++++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/bin/thin_shrink.rs b/src/bin/thin_shrink.rs index 55632c5..7c533be 100644 --- a/src/bin/thin_shrink.rs +++ b/src/bin/thin_shrink.rs @@ -26,7 +26,6 @@ fn main() { // FIXME: check these look like xml let input_file = matches.value_of("INPUT").unwrap(); - let map_file = matches.value_of("MAP").unwrap(); let output_file = matches.value_of("OUTPUT").unwrap(); if !file_utils::file_exists(input_file) { diff --git a/src/shrink/toplevel.rs b/src/shrink/toplevel.rs index 0a647c2..53dfbe1 100644 --- a/src/shrink/toplevel.rs +++ b/src/shrink/toplevel.rs @@ -6,14 +6,15 @@ use crate::shrink::xml; //--------------------------------------- -pub fn shrink(input_file: &str, _output_file: &str, _map_file: &str) -> Result<()> { +pub fn shrink(input_file: &str, _output_file: &str) -> Result<()> { let input = OpenOptions::new() .read(true) .write(false) .custom_flags(libc::O_EXCL) .open(input_file)?; - let mut visitor = xml::XmlWriter::new(std::io::stdout()); + // let mut visitor = xml::XmlWriter::new(std::io::stdout()); + let mut visitor = xml::NoopVisitor::new(); xml::read(input, &mut visitor)?; Ok(()) diff --git a/src/shrink/xml.rs b/src/shrink/xml.rs index 60cd3c4..f060016 100644 --- a/src/shrink/xml.rs +++ b/src/shrink/xml.rs @@ -51,6 +51,25 @@ pub trait MetadataVisitor { fn eof(&mut self) -> Result<()>; } +pub struct NoopVisitor { +} + +impl NoopVisitor { + pub fn new() -> NoopVisitor { NoopVisitor {} } +} + +impl MetadataVisitor for NoopVisitor { + fn superblock_b(&mut self, _sb: &Superblock) -> Result<()> {Ok(())} + fn superblock_e(&mut self) -> Result<()> {Ok(())} + + fn device_b(&mut self, _d: &Device) -> Result<()> {Ok(())} + fn device_e(&mut self) -> Result<()> {Ok(())} + + fn map(&mut self, _m: Map) -> Result<()> {Ok(())} + + fn eof(&mut self) -> Result<()> {Ok(())} +} + pub struct XmlWriter { w: Writer, } From 259eef9eee9d94b6b4a636ca30281eef77ae3b89 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 25 Jun 2020 10:44:57 +0100 Subject: [PATCH 03/17] [thin_shrink] calculate remaps --- Cargo.lock | 7 ++ Cargo.toml | 1 + src/bin/thin_shrink.rs | 42 +++++--- src/shrink/toplevel.rs | 212 ++++++++++++++++++++++++++++++++++++++++- src/shrink/xml.rs | 40 ++++---- 5 files changed, 265 insertions(+), 37 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 508bd52..23083a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -105,6 +105,11 @@ dependencies = [ "regex 1.3.9 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "fixedbitset" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "flate2" version = "1.0.14" @@ -397,6 +402,7 @@ dependencies = [ "byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)", "crc32c 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "fixedbitset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "flate2 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.71 (registry+https://github.com/rust-lang/crates.io-index)", "nix 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -483,6 +489,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum crc32c 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "77ba37ef26c12988c1cee882d522d65e1d5d2ad8c3864665b88ee92767ed84c5" "checksum crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1" "checksum env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" +"checksum fixedbitset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2fc4fcacf5cd3681968f6524ea159383132937739c6c40dabab9e37ed515911b" "checksum flate2 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2cfff41391129e0a856d6d822600b8d71179d46879e310417eb9c762eb178b42" "checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" "checksum hermit-abi 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "91780f809e750b0a89f5544be56617ff6b1227ee485bcb06ebe10cdf89bd3b71" diff --git a/Cargo.toml b/Cargo.toml index cf59ab7..dce61a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ byteorder = "1.3" clap = "2.33" crc32c = "0.4" flate2 = "1.0" +fixedbitset = "0.3" libc = "0.2.71" quick-xml = "0.18" nix = "0.17" diff --git a/src/bin/thin_shrink.rs b/src/bin/thin_shrink.rs index 7c533be..ffb29ad 100644 --- a/src/bin/thin_shrink.rs +++ b/src/bin/thin_shrink.rs @@ -7,33 +7,47 @@ use thinp::file_utils; fn main() { let parser = App::new("thin_shrink") - .version(thinp::version::TOOLS_VERSION) + .version(thinp::version::TOOLS_VERSION) .about("Rewrite xml metadata and move data in an inactive pool.") - .arg(Arg::with_name("INPUT") - .help("Specify thinp metadata xml file") - .required(true) - .long("input") - .value_name("INPUT") - .takes_value(true)) - .arg(Arg::with_name("OUTPUT") - .help("Specify output xml file") - .required(true) - .long("output") - .value_name("OUTPUT") - .takes_value(true)); + .arg( + Arg::with_name("INPUT") + .help("Specify thinp metadata xml file") + .required(true) + .long("input") + .value_name("INPUT") + .takes_value(true), + ) + .arg( + Arg::with_name("OUTPUT") + .help("Specify output xml file") + .required(true) + .long("output") + .value_name("OUTPUT") + .takes_value(true), + ) + // FIXME: support various disk units + .arg( + Arg::with_name("SIZE") + .help("Specify new size for the pool (in data blocks)") + .required(true) + .long("nr-blocks") + .value_name("SIZE") + .takes_value(true), + ); let matches = parser.get_matches(); // FIXME: check these look like xml let input_file = matches.value_of("INPUT").unwrap(); let output_file = matches.value_of("OUTPUT").unwrap(); + let size = matches.value_of("SIZE").unwrap().parse::().unwrap(); if !file_utils::file_exists(input_file) { eprintln!("Couldn't find input file '{}'.", &input_file); exit(1); } - if let Err(reason) = thinp::shrink::toplevel::shrink(&input_file, &output_file) { + if let Err(reason) = thinp::shrink::toplevel::shrink(&input_file, &output_file, size) { println!("Application error: {}\n", reason); exit(1); } diff --git a/src/shrink/toplevel.rs b/src/shrink/toplevel.rs index 53dfbe1..0e9da5b 100644 --- a/src/shrink/toplevel.rs +++ b/src/shrink/toplevel.rs @@ -1,4 +1,5 @@ use anyhow::Result; +use fixedbitset::{FixedBitSet, IndexRange}; use std::fs::OpenOptions; use std::os::unix::fs::OpenOptionsExt; @@ -6,7 +7,178 @@ use crate::shrink::xml; //--------------------------------------- -pub fn shrink(input_file: &str, _output_file: &str) -> Result<()> { +#[derive(Debug)] +struct Pass1 { + // FIXME: Inefficient, use a range_set of some description + allocated_blocks: FixedBitSet, + + nr_blocks: u64, + + /// High blocks are beyond the new, reduced end of the pool. These + /// will need to be moved. + nr_high_blocks: u64, +} + +impl Pass1 { + fn new(nr_blocks: u64) -> Pass1 { + Pass1 { + allocated_blocks: FixedBitSet::with_capacity(0), + nr_blocks, + nr_high_blocks: 0, + } + } +} + +impl xml::MetadataVisitor for Pass1 { + fn superblock_b(&mut self, sb: &xml::Superblock) -> Result<()> { + self.allocated_blocks.grow(sb.nr_data_blocks as usize); + Ok(()) + } + + fn superblock_e(&mut self) -> Result<()> { + Ok(()) + } + + fn device_b(&mut self, _d: &xml::Device) -> Result<()> { + Ok(()) + } + + fn device_e(&mut self) -> Result<()> { + Ok(()) + } + + fn map(&mut self, m: xml::Map) -> Result<()> { + for i in m.data_begin..(m.data_begin + m.len) { + if i > self.nr_blocks { + self.nr_high_blocks += 1; + } + self.allocated_blocks.insert(i as usize); + } + Ok(()) + } + + fn eof(&mut self) -> Result<()> { + Ok(()) + } +} + +type BlockRange = std::ops::Range; + +fn bits_to_ranges(bits: &FixedBitSet) -> Vec { + let mut ranges = Vec::new(); + let mut start = None; + + for i in 0..bits.len() { + match (bits[i], start) { + (false, None) => {} + (true, None) => { + start = Some((i as u64, 1)); + } + (false, Some((b, len))) => { + ranges.push(b..(b + len)); + start = None; + } + (true, Some((b, len))) => { + start = Some((b, len + 1)); + } + } + } + + if let Some((b, len)) = start { + ranges.push(b..(b + len)); + } + + ranges +} + +// Splits the ranges into those below threshold, and those equal or +// above threshold below threshold, and those equal or above threshold +fn ranges_split(ranges: &Vec, threshold: u64) -> (Vec, Vec) { + use std::ops::Range; + + let mut below = Vec::new(); + let mut above = Vec::new(); + for r in ranges { + match r { + Range { start, end } if *end <= threshold => below.push(*start..*end), + Range { start, end } if *start < threshold => { + below.push(*start..threshold); + above.push(threshold..*end); + } + Range { start, end } => above.push(*start..*end), + } + } + (below, above) +} + +fn negate_ranges(ranges: &Vec) -> Vec { + use std::ops::Range; + + let mut result = Vec::new(); + let mut cursor = 0; + + for r in ranges { + match r { + Range { start, end } if cursor < *start => { + result.push(cursor..*start); + cursor = *end; + } + Range { start: _, end } => { + cursor = *end; + } + } + } + + result +} + +fn range_len(r: &BlockRange) -> u64 { + r.end - r.start +} + +fn ranges_total(rs: &Vec) -> u64 { + rs.into_iter().fold(0, |sum, r| sum + range_len(r)) +} + +// Assumes there is enough space to remap. +fn remap_ranges(ranges: Vec, free: Vec) -> Vec<(BlockRange, BlockRange)> { + use std::cmp::Ordering; + + let mut remap = Vec::new(); + let mut range_iter = ranges.into_iter(); + let mut free_iter = free.into_iter(); + + let mut r_ = range_iter.next(); + let mut f_ = free_iter.next(); + + while let (Some(r), Some(f)) = (r_, f_) { + let rlen = range_len(&r); + let flen = range_len(&f); + + match rlen.cmp(&flen) { + Ordering::Less => { + // range fits into the free chunk + remap.push((r, f.start..(f.start + rlen))); + f_ = Some((f.start + rlen)..f.end); + r_ = range_iter.next(); + }, + Ordering::Equal => { + remap.push((r, f)); + r_ = range_iter.next(); + f_ = free_iter.next(); + }, + Ordering::Greater => { + remap.push((r.start..(r.start + flen), f)); + r_ = Some((r.start + flen)..r.end); + f_ = free_iter.next(); + } + } + } + + remap +} + +pub fn shrink(input_file: &str, _output_file: &str, nr_blocks: u64) -> Result<()> { let input = OpenOptions::new() .read(true) .write(false) @@ -14,8 +186,42 @@ pub fn shrink(input_file: &str, _output_file: &str) -> Result<()> { .open(input_file)?; // let mut visitor = xml::XmlWriter::new(std::io::stdout()); - let mut visitor = xml::NoopVisitor::new(); - xml::read(input, &mut visitor)?; + // let mut visitor = xml::NoopVisitor::new(); + let mut pass1 = Pass1::new(nr_blocks); + xml::read(input, &mut pass1)?; + eprintln!("{} blocks need moving", pass1.nr_high_blocks); + + let mut free_blocks = 0u64; + for i in 0..pass1.allocated_blocks.len() { + if !pass1.allocated_blocks[i] { + free_blocks += 1; + } + } + eprintln!("{} free blocks below new end.", free_blocks); + + let ranges = bits_to_ranges(&pass1.allocated_blocks); + eprintln!("{} allocated ranges:", ranges.len()); + + eprintln!("{:?}", &ranges); + + let (below, above) = ranges_split(&ranges, nr_blocks); + eprintln!("ranges split at {}: ({:?}, {:?})", nr_blocks, below, above); + + let free = negate_ranges(&below); + eprintln!("free {:?}.", free); + + let nr_moving = ranges_total(&above); + eprintln!("{} blocks need to be remapped.", nr_moving); + + let free_blocks = ranges_total(&free); + eprintln!("{} free blocks.", free_blocks); + + if free_blocks < nr_moving { + panic!("Insufficient space"); + } + + let remaps = remap_ranges(above, free); + eprintln!("remappings {:?}.", remaps); Ok(()) } diff --git a/src/shrink/xml.rs b/src/shrink/xml.rs index f060016..1a3c9b7 100644 --- a/src/shrink/xml.rs +++ b/src/shrink/xml.rs @@ -14,29 +14,29 @@ use quick_xml::{Reader, Writer}; //--------------------------------------- pub struct Superblock { - uuid: String, - time: u64, - transaction: u64, - flags: Option, - version: Option, - data_block_size: u32, - nr_data_blocks: u64, - metadata_snap: Option, + pub uuid: String, + pub time: u64, + pub transaction: u64, + pub flags: Option, + pub version: Option, + pub data_block_size: u32, + pub nr_data_blocks: u64, + pub metadata_snap: Option, } pub struct Device { - dev_id: u32, - mapped_blocks: u64, - transaction: u64, - creation_time: u64, - snap_time: u64, + pub dev_id: u32, + pub mapped_blocks: u64, + pub transaction: u64, + pub creation_time: u64, + pub snap_time: u64, } pub struct Map { - thin_begin: u64, - data_begin: u64, - time: u32, - len: u64, + pub thin_begin: u64, + pub data_begin: u64, + pub time: u32, + pub len: u64, } pub trait MetadataVisitor { @@ -238,11 +238,11 @@ fn parse_superblock(e: &BytesStart) -> Result { uuid: check_attr(tag, "uuid", uuid)?, time: check_attr(tag, "time", time)?, transaction: check_attr(tag, "transaction", transaction)?, - flags: flags, - version: version, + flags, + version, data_block_size: check_attr(tag, "data_block_size", data_block_size)?, nr_data_blocks: check_attr(tag, "nr_data_blocks", nr_data_blocks)?, - metadata_snap: metadata_snap, + metadata_snap, }) } From d8957e3d86fed432cd1603f78b65c088f0061e61 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 25 Jun 2020 14:57:37 +0100 Subject: [PATCH 04/17] [thin_shrink] Rewrites xml Just need to do copying now --- src/shrink/toplevel.rs | 272 +++++++++++++++++++++++++++++++++++++++-- src/shrink/xml.rs | 10 +- 2 files changed, 267 insertions(+), 15 deletions(-) diff --git a/src/shrink/toplevel.rs b/src/shrink/toplevel.rs index 0e9da5b..394f9d3 100644 --- a/src/shrink/toplevel.rs +++ b/src/shrink/toplevel.rs @@ -1,6 +1,7 @@ use anyhow::Result; use fixedbitset::{FixedBitSet, IndexRange}; use std::fs::OpenOptions; +use std::io::Write; use std::os::unix::fs::OpenOptionsExt; use crate::shrink::xml; @@ -47,7 +48,7 @@ impl xml::MetadataVisitor for Pass1 { Ok(()) } - fn map(&mut self, m: xml::Map) -> Result<()> { + fn map(&mut self, m: &xml::Map) -> Result<()> { for i in m.data_begin..(m.data_begin + m.len) { if i > self.nr_blocks { self.nr_high_blocks += 1; @@ -62,6 +63,80 @@ impl xml::MetadataVisitor for Pass1 { } } +//--------------------------------------- + +// Writes remapped xml +struct Pass2 { + writer: xml::XmlWriter, + nr_blocks: u64, + remaps: Vec<(BlockRange, BlockRange)>, +} + +impl Pass2 { + fn new(w: W, nr_blocks: u64, remaps: Vec<(BlockRange, BlockRange)>) -> Pass2 { + Pass2 { + writer: xml::XmlWriter::new(w), + nr_blocks, + remaps, + } + } + + fn remap(&self, r: BlockRange) -> Vec { + let mut rmap = Vec::new(); + + // id + rmap.push(r.clone()); + + rmap + } +} + +impl xml::MetadataVisitor for Pass2 { + fn superblock_b(&mut self, sb: &xml::Superblock) -> Result<()> { + self.writer.superblock_b(sb) + } + + fn superblock_e(&mut self) -> Result<()> { + self.writer.superblock_e() + } + + fn device_b(&mut self, d: &xml::Device) -> Result<()> { + self.writer.device_b(d) + } + + fn device_e(&mut self) -> Result<()> { + self.writer.device_e() + } + + fn map(&mut self, m: &xml::Map) -> Result<()> { + if m.data_begin + m.len < self.nr_blocks { + // no remapping needed. + self.writer.map(m)?; + } else { + let r = m.data_begin..(m.data_begin + m.len); + let remaps = remap(&r, &self.remaps); + let mut written = 0; + + for r in remaps { + self.writer.map(&xml::Map { + thin_begin: m.thin_begin + written, + data_begin: r.start, + time: m.time, + len: range_len(&r), + })?; + written += range_len(&r); + } + } + + Ok(()) + } + + fn eof(&mut self) -> Result<()> { + self.writer.eof() + } +} + +//--------------------------------------- type BlockRange = std::ops::Range; fn bits_to_ranges(bits: &FixedBitSet) -> Vec { @@ -141,7 +216,7 @@ fn ranges_total(rs: &Vec) -> u64 { } // Assumes there is enough space to remap. -fn remap_ranges(ranges: Vec, free: Vec) -> Vec<(BlockRange, BlockRange)> { +fn build_remaps(ranges: Vec, free: Vec) -> Vec<(BlockRange, BlockRange)> { use std::cmp::Ordering; let mut remap = Vec::new(); @@ -161,12 +236,12 @@ fn remap_ranges(ranges: Vec, free: Vec) -> Vec<(BlockRan remap.push((r, f.start..(f.start + rlen))); f_ = Some((f.start + rlen)..f.end); r_ = range_iter.next(); - }, + } Ordering::Equal => { remap.push((r, f)); r_ = range_iter.next(); f_ = free_iter.next(); - }, + } Ordering::Greater => { remap.push((r.start..(r.start + flen), f)); r_ = Some((r.start + flen)..r.end); @@ -178,17 +253,184 @@ fn remap_ranges(ranges: Vec, free: Vec) -> Vec<(BlockRan remap } -pub fn shrink(input_file: &str, _output_file: &str, nr_blocks: u64) -> Result<()> { +fn overlaps(r1: &BlockRange, r2: &BlockRange, index: usize) -> Option { + if r1.start >= r2.end { + return None; + } + + if r2.start >= r1.end { + return None; + } + + Some(index) +} + +// Finds the index of the first entry that overlaps r. +fn find_first(r: &BlockRange, remaps: &Vec<(BlockRange, BlockRange)>) -> Option { + if remaps.len() == 0 { + return None + } + + match remaps.binary_search_by_key(&r.start, |(from, _)| from.start) { + Ok(n) => Some(n), + Err(n) => { + if n == 0 { + let (from, _) = &remaps[n]; + overlaps(&r, &from, n) + } else if n == remaps.len() { + let (from, _) = &remaps[n - 1]; + overlaps(&r, from, n - 1) + } else { + // Need to check the previous entry + let (from, _) = &remaps[n - 1]; + overlaps(&r, &from, n - 1).or_else(|| { + let (from, to) = &remaps[n]; + overlaps(&r, &from, n) + }) + } + } + } +} + +fn is_empty(r: &BlockRange) -> bool { + r.start == r.end +} + +// remaps must be in sorted order by from.start. +fn remap(r: &BlockRange, remaps: &Vec<(BlockRange, BlockRange)>) -> Vec { + let mut remap = Vec::new(); + let mut r = r.start..r.end; + + if let Some(index) = find_first(&r, &remaps) { + let mut index = index; + loop { + let (from, to) = &remaps[index]; + println!("from = {:?}", from); + + // There may be a prefix that doesn't overlap with 'from' + if r.start < from.start { + println!("pushing prefix"); + let len = u64::min(range_len(&r), from.start - r.start); + remap.push(r.start..(r.start + len)); + r = (r.start + len)..r.end; + + if is_empty(&r) { + break; + } + } + + let to = (to.start + (r.start - from.start))..to.end; + let from = r.start..from.end; + println!("to = {:?}", to); + let rlen = range_len(&r); + let flen = range_len(&from); + + let len = u64::min(rlen, flen); + println!("pushing overlap"); + remap.push(to.start..(to.start + len)); + + r = (r.start + len)..r.end; + if is_empty(&r) { + break; + } + + if len == flen { + index += 1; + } + + if index == remaps.len() { + remap.push(r.start..r.end); + break; + } + } + } else { + remap.push(r.start..r.end); + } + + remap +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn remap_test() { + struct Test { + remaps: Vec<(BlockRange, BlockRange)>, + input: BlockRange, + output: Vec, + } + + let tests = [ + Test { + remaps: vec![], + input: 0..1, + output: vec![0..1], + }, + Test { + remaps: vec![], + input: 100..1000, + output: vec![100..1000], + }, + Test { + remaps: vec![(10..20, 110..120)], + input: 0..5, + output: vec![0..5], + }, + Test { + remaps: vec![(10..20, 110..120)], + input: 10..20, + output: vec![110..120], + }, + Test { + remaps: vec![(10..20, 110..120)], + input: 5..15, + output: vec![5..10, 110..115], + }, + Test { + remaps: vec![(10..20, 110..120)], + input: 5..25, + output: vec![5..10, 110..120, 20..25], + }, + Test { + remaps: vec![(10..20, 110..120)], + input: 15..25, + output: vec![115..120, 20..25], + }, + Test { + remaps: vec![(10..20, 110..120)], + input: 25..35, + output: vec![25..35], + }, + Test { + remaps: vec![(10..20, 110..120), (30..40, 230..240)], + input: 0..50, + output: vec![0..10, 110..120, 20..30, 230..240, 40..50], + }, + ]; + + for t in &tests { + let rs = remap(&t.input, &t.remaps); + assert_eq!(rs, t.output); + } + } +} + +fn process_xml(input_path: &str, pass: &mut MV) -> Result<()> { let input = OpenOptions::new() .read(true) .write(false) .custom_flags(libc::O_EXCL) - .open(input_file)?; + .open(input_path)?; - // let mut visitor = xml::XmlWriter::new(std::io::stdout()); - // let mut visitor = xml::NoopVisitor::new(); + xml::read(input, pass)?; + Ok(()) +} + +pub fn shrink(input_path: &str, output_path: &str, nr_blocks: u64) -> Result<()> { let mut pass1 = Pass1::new(nr_blocks); - xml::read(input, &mut pass1)?; + process_xml(input_path, &mut pass1); eprintln!("{} blocks need moving", pass1.nr_high_blocks); let mut free_blocks = 0u64; @@ -220,9 +462,19 @@ pub fn shrink(input_file: &str, _output_file: &str, nr_blocks: u64) -> Result<() panic!("Insufficient space"); } - let remaps = remap_ranges(above, free); + let remaps = build_remaps(above, free); eprintln!("remappings {:?}.", remaps); + let output = OpenOptions::new() + .read(false) + .write(true) + .create(true) + .open(output_path)?; + let mut pass2 = Pass2::new(output, nr_blocks, remaps); + eprint!("writing new xml..."); + process_xml(input_path, &mut pass2)?; + eprintln!("done."); + Ok(()) } diff --git a/src/shrink/xml.rs b/src/shrink/xml.rs index 1a3c9b7..6a70f31 100644 --- a/src/shrink/xml.rs +++ b/src/shrink/xml.rs @@ -46,7 +46,7 @@ pub trait MetadataVisitor { fn device_b(&mut self, d: &Device) -> Result<()>; fn device_e(&mut self) -> Result<()>; - fn map(&mut self, m: Map) -> Result<()>; + fn map(&mut self, m: &Map) -> Result<()>; fn eof(&mut self) -> Result<()>; } @@ -65,7 +65,7 @@ impl MetadataVisitor for NoopVisitor { fn device_b(&mut self, _d: &Device) -> Result<()> {Ok(())} fn device_e(&mut self) -> Result<()> {Ok(())} - fn map(&mut self, _m: Map) -> Result<()> {Ok(())} + fn map(&mut self, m: &Map) -> Result<()> {Ok(())} fn eof(&mut self) -> Result<()> {Ok(())} } @@ -142,7 +142,7 @@ impl MetadataVisitor for XmlWriter { Ok(()) } - fn map(&mut self, m: Map) -> Result<()> { + fn map(&mut self, m: &Map) -> Result<()> { match m.len { 1 => { let tag = b"single_mapping"; @@ -352,8 +352,8 @@ where _ => todo!(), }, Ok(Event::Empty(ref e)) => match e.name() { - b"single_mapping" => visitor.map(parse_single_map(e)?)?, - b"range_mapping" => visitor.map(parse_range_map(e)?)?, + b"single_mapping" => visitor.map(&parse_single_map(e)?)?, + b"range_mapping" => visitor.map(&parse_range_map(e)?)?, _ => todo!(), }, Ok(Event::Text(_)) => {} From 31abc468be6c75f079cf21c75a5fca23edf75d0e Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 25 Jun 2020 15:28:45 +0100 Subject: [PATCH 05/17] [thin_shrink] Stub the copier --- src/bin/thin_shrink.rs | 12 ++++++++++-- src/shrink/copier.rs | 14 ++++++++++++++ src/shrink/mod.rs | 1 + src/shrink/toplevel.rs | 21 ++++++++++++++++----- 4 files changed, 41 insertions(+), 7 deletions(-) create mode 100644 src/shrink/copier.rs diff --git a/src/bin/thin_shrink.rs b/src/bin/thin_shrink.rs index ffb29ad..26b70a4 100644 --- a/src/bin/thin_shrink.rs +++ b/src/bin/thin_shrink.rs @@ -25,7 +25,14 @@ fn main() { .value_name("OUTPUT") .takes_value(true), ) - // FIXME: support various disk units + .arg( + Arg::with_name("DATA") + .help("Specify pool data device where data will be moved") + .required(true) + .long("data") + .value_name("DATA") + .takes_value(true), + ) .arg( Arg::with_name("SIZE") .help("Specify new size for the pool (in data blocks)") @@ -41,13 +48,14 @@ fn main() { let input_file = matches.value_of("INPUT").unwrap(); let output_file = matches.value_of("OUTPUT").unwrap(); let size = matches.value_of("SIZE").unwrap().parse::().unwrap(); + let data_file = matches.value_of("DATA").unwrap(); if !file_utils::file_exists(input_file) { eprintln!("Couldn't find input file '{}'.", &input_file); exit(1); } - if let Err(reason) = thinp::shrink::toplevel::shrink(&input_file, &output_file, size) { + if let Err(reason) = thinp::shrink::toplevel::shrink(&input_file, &output_file, &data_file, size) { println!("Application error: {}\n", reason); exit(1); } diff --git a/src/shrink/copier.rs b/src/shrink/copier.rs new file mode 100644 index 0000000..3df0042 --- /dev/null +++ b/src/shrink/copier.rs @@ -0,0 +1,14 @@ +use anyhow::Result; + +pub type Sector = u64; + +pub struct Region { + src: Sector, + dest: Sector, + len: Sector, +} + +// FIXME: pass in +pub fn copy(path: &str, regions: &Vec) -> Result<()> { + Ok(()) +} diff --git a/src/shrink/mod.rs b/src/shrink/mod.rs index a2c8345..56f9dd5 100644 --- a/src/shrink/mod.rs +++ b/src/shrink/mod.rs @@ -1,3 +1,4 @@ pub mod toplevel; +mod copier; mod xml; diff --git a/src/shrink/toplevel.rs b/src/shrink/toplevel.rs index 394f9d3..a0f4007 100644 --- a/src/shrink/toplevel.rs +++ b/src/shrink/toplevel.rs @@ -5,6 +5,7 @@ use std::io::Write; use std::os::unix::fs::OpenOptionsExt; use crate::shrink::xml; +use crate::shrink::copier::{self, Region}; //--------------------------------------- @@ -268,7 +269,7 @@ fn overlaps(r1: &BlockRange, r2: &BlockRange, index: usize) -> Option { // Finds the index of the first entry that overlaps r. fn find_first(r: &BlockRange, remaps: &Vec<(BlockRange, BlockRange)>) -> Option { if remaps.len() == 0 { - return None + return None; } match remaps.binary_search_by_key(&r.start, |(from, _)| from.start) { @@ -319,9 +320,9 @@ fn remap(r: &BlockRange, remaps: &Vec<(BlockRange, BlockRange)>) -> Vec) -> Vec { + let rs = Vec::new(); + rs +} + fn process_xml(input_path: &str, pass: &mut MV) -> Result<()> { let input = OpenOptions::new() .read(true) @@ -428,7 +434,7 @@ fn process_xml(input_path: &str, pass: &mut MV) -> Res Ok(()) } -pub fn shrink(input_path: &str, output_path: &str, nr_blocks: u64) -> Result<()> { +pub fn shrink(input_path: &str, output_path: &str, data_path: &str, nr_blocks: u64) -> Result<()> { let mut pass1 = Pass1::new(nr_blocks); process_xml(input_path, &mut pass1); eprintln!("{} blocks need moving", pass1.nr_high_blocks); @@ -465,6 +471,11 @@ pub fn shrink(input_path: &str, output_path: &str, nr_blocks: u64) -> Result<()> let remaps = build_remaps(above, free); eprintln!("remappings {:?}.", remaps); + let regions = build_copy_regions(&remaps); + eprint!("Copying data..."); + copier::copy(data_path, ®ions); + eprintln!("done."); + let output = OpenOptions::new() .read(false) .write(true) From fcf44d46b7e403c67d3d4e0f2e93e520f249dbfc Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 25 Jun 2020 16:12:35 +0100 Subject: [PATCH 06/17] [thin_shrink] remove some debug printlns --- src/shrink/toplevel.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/shrink/toplevel.rs b/src/shrink/toplevel.rs index a0f4007..9aa800b 100644 --- a/src/shrink/toplevel.rs +++ b/src/shrink/toplevel.rs @@ -4,8 +4,8 @@ use std::fs::OpenOptions; use std::io::Write; use std::os::unix::fs::OpenOptionsExt; -use crate::shrink::xml; use crate::shrink::copier::{self, Region}; +use crate::shrink::xml; //--------------------------------------- @@ -306,11 +306,9 @@ fn remap(r: &BlockRange, remaps: &Vec<(BlockRange, BlockRange)>) -> Vec) -> Vec Date: Thu, 25 Jun 2020 16:50:11 +0100 Subject: [PATCH 07/17] [thin_shrink] Very simple copier implementation --- src/shrink/copier.rs | 55 ++++++++++++++++++++++++++++++++++++++---- src/shrink/toplevel.rs | 24 +++++++++++++----- 2 files changed, 68 insertions(+), 11 deletions(-) diff --git a/src/shrink/copier.rs b/src/shrink/copier.rs index 3df0042..9fca58a 100644 --- a/src/shrink/copier.rs +++ b/src/shrink/copier.rs @@ -1,14 +1,59 @@ use anyhow::Result; +use std::fs::OpenOptions; +use std::io::{Seek, SeekFrom, Write, Read}; +use std::os::unix::fs::OpenOptionsExt; pub type Sector = u64; +#[derive(Debug)] pub struct Region { - src: Sector, - dest: Sector, - len: Sector, + pub src: Sector, + pub dest: Sector, + pub len: Sector, } -// FIXME: pass in -pub fn copy(path: &str, regions: &Vec) -> Result<()> { + +fn copy_step(file: &mut W, src_byte: u64, dest_byte: u64, len: usize) -> Result<()> +where + W: Write + Seek + Read, +{ + let mut buf = vec![0; len]; + file.seek(SeekFrom::Start(src_byte))?; + file.read_exact(&mut buf[0..])?; + file.seek(SeekFrom::Start(dest_byte))?; + file.write_all(&buf)?; + Ok(()) +} + +fn copy_region(file: &mut W, r: &Region) -> Result<()> +where + W: Write + Seek + Read, +{ + const MAX_BYTES: Sector = 1024 * 1024 * 64; + + let src_bytes = r.src * 512; + let dest_bytes = r.dest * 512; + let len_bytes = r.len * 512; + let mut written = 0; + while written != len_bytes { + let step = u64::min(len_bytes - written, MAX_BYTES); + copy_step(file, src_bytes + written, dest_bytes + written, step as usize)?; + written += step; + } + Ok(()) +} + +pub fn copy(path: &str, regions: &Vec) -> Result<()> { + let mut input = OpenOptions::new() + .read(true) + .write(true) + //.custom_flags(libc::O_DIRECT) + .open(path)?; + + for r in regions { + eprintln!("copying {:?}", r); + copy_region(&mut input, r)?; + } + Ok(()) } diff --git a/src/shrink/toplevel.rs b/src/shrink/toplevel.rs index 9aa800b..bddc448 100644 --- a/src/shrink/toplevel.rs +++ b/src/shrink/toplevel.rs @@ -19,6 +19,7 @@ struct Pass1 { /// High blocks are beyond the new, reduced end of the pool. These /// will need to be moved. nr_high_blocks: u64, + block_size: Option, } impl Pass1 { @@ -27,6 +28,7 @@ impl Pass1 { allocated_blocks: FixedBitSet::with_capacity(0), nr_blocks, nr_high_blocks: 0, + block_size: None, } } } @@ -34,6 +36,7 @@ impl Pass1 { impl xml::MetadataVisitor for Pass1 { fn superblock_b(&mut self, sb: &xml::Superblock) -> Result<()> { self.allocated_blocks.grow(sb.nr_data_blocks as usize); + self.block_size = Some(sb.data_block_size as u64); Ok(()) } @@ -414,8 +417,17 @@ mod tests { } } -fn build_copy_regions(remaps: &Vec<(BlockRange, BlockRange)>) -> Vec { - let rs = Vec::new(); +fn build_copy_regions(remaps: &Vec<(BlockRange, BlockRange)>, block_size: u64) -> Vec { + let mut rs = Vec::new(); + + for (from, to) in remaps { + rs.push(Region { + src: from.start * block_size, + dest: to.start * block_size, + len: range_len(&from) * block_size, + }); + } + rs } @@ -467,10 +479,10 @@ pub fn shrink(input_path: &str, output_path: &str, data_path: &str, nr_blocks: u let remaps = build_remaps(above, free); eprintln!("remappings {:?}.", remaps); - let regions = build_copy_regions(&remaps); - eprint!("Copying data..."); - copier::copy(data_path, ®ions); - eprintln!("done."); + let regions = build_copy_regions(&remaps, pass1.block_size.unwrap() as u64); + //eprint!("Copying data..."); + copier::copy(data_path, ®ions)?; + //eprintln!("done."); let output = OpenOptions::new() .read(false) From 7df56a5a047056e9b45d15a188e6634c95ba9df5 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 26 Jun 2020 08:00:53 +0100 Subject: [PATCH 08/17] [thin_shrink] Squash some warnings --- src/shrink/copier.rs | 2 +- src/shrink/toplevel.rs | 15 +++------------ src/shrink/xml.rs | 19 ------------------- 3 files changed, 4 insertions(+), 32 deletions(-) diff --git a/src/shrink/copier.rs b/src/shrink/copier.rs index 9fca58a..83e3c51 100644 --- a/src/shrink/copier.rs +++ b/src/shrink/copier.rs @@ -1,7 +1,7 @@ use anyhow::Result; use std::fs::OpenOptions; use std::io::{Seek, SeekFrom, Write, Read}; -use std::os::unix::fs::OpenOptionsExt; +//use std::os::unix::fs::OpenOptionsExt; pub type Sector = u64; diff --git a/src/shrink/toplevel.rs b/src/shrink/toplevel.rs index bddc448..796110e 100644 --- a/src/shrink/toplevel.rs +++ b/src/shrink/toplevel.rs @@ -1,5 +1,5 @@ use anyhow::Result; -use fixedbitset::{FixedBitSet, IndexRange}; +use fixedbitset::{FixedBitSet}; use std::fs::OpenOptions; use std::io::Write; use std::os::unix::fs::OpenOptionsExt; @@ -84,15 +84,6 @@ impl Pass2 { remaps, } } - - fn remap(&self, r: BlockRange) -> Vec { - let mut rmap = Vec::new(); - - // id - rmap.push(r.clone()); - - rmap - } } impl xml::MetadataVisitor for Pass2 { @@ -288,7 +279,7 @@ fn find_first(r: &BlockRange, remaps: &Vec<(BlockRange, BlockRange)>) -> Option< // Need to check the previous entry let (from, _) = &remaps[n - 1]; overlaps(&r, &from, n - 1).or_else(|| { - let (from, to) = &remaps[n]; + let (from, _) = &remaps[n]; overlaps(&r, &from, n) }) } @@ -444,7 +435,7 @@ fn process_xml(input_path: &str, pass: &mut MV) -> Res pub fn shrink(input_path: &str, output_path: &str, data_path: &str, nr_blocks: u64) -> Result<()> { let mut pass1 = Pass1::new(nr_blocks); - process_xml(input_path, &mut pass1); + process_xml(input_path, &mut pass1)?; eprintln!("{} blocks need moving", pass1.nr_high_blocks); let mut free_blocks = 0u64; diff --git a/src/shrink/xml.rs b/src/shrink/xml.rs index 6a70f31..46c3f3b 100644 --- a/src/shrink/xml.rs +++ b/src/shrink/xml.rs @@ -51,25 +51,6 @@ pub trait MetadataVisitor { fn eof(&mut self) -> Result<()>; } -pub struct NoopVisitor { -} - -impl NoopVisitor { - pub fn new() -> NoopVisitor { NoopVisitor {} } -} - -impl MetadataVisitor for NoopVisitor { - fn superblock_b(&mut self, _sb: &Superblock) -> Result<()> {Ok(())} - fn superblock_e(&mut self) -> Result<()> {Ok(())} - - fn device_b(&mut self, _d: &Device) -> Result<()> {Ok(())} - fn device_e(&mut self) -> Result<()> {Ok(())} - - fn map(&mut self, m: &Map) -> Result<()> {Ok(())} - - fn eof(&mut self) -> Result<()> {Ok(())} -} - pub struct XmlWriter { w: Writer, } From d8a0805753687a09736914b71b211dea75cf937f Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 26 Jun 2020 08:31:02 +0100 Subject: [PATCH 09/17] [thin_shrink] fix some clippy warnings --- src/shrink/copier.rs | 2 +- src/shrink/toplevel.rs | 18 +++++++++--------- src/shrink/xml.rs | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/shrink/copier.rs b/src/shrink/copier.rs index 83e3c51..ef04a02 100644 --- a/src/shrink/copier.rs +++ b/src/shrink/copier.rs @@ -43,7 +43,7 @@ where Ok(()) } -pub fn copy(path: &str, regions: &Vec) -> Result<()> { +pub fn copy(path: &str, regions: &[Region]) -> Result<()> { let mut input = OpenOptions::new() .read(true) .write(true) diff --git a/src/shrink/toplevel.rs b/src/shrink/toplevel.rs index 796110e..2717631 100644 --- a/src/shrink/toplevel.rs +++ b/src/shrink/toplevel.rs @@ -1,5 +1,5 @@ use anyhow::Result; -use fixedbitset::{FixedBitSet}; +use fixedbitset::FixedBitSet; use std::fs::OpenOptions; use std::io::Write; use std::os::unix::fs::OpenOptionsExt; @@ -163,7 +163,7 @@ fn bits_to_ranges(bits: &FixedBitSet) -> Vec { // Splits the ranges into those below threshold, and those equal or // above threshold below threshold, and those equal or above threshold -fn ranges_split(ranges: &Vec, threshold: u64) -> (Vec, Vec) { +fn ranges_split(ranges: &[BlockRange], threshold: u64) -> (Vec, Vec) { use std::ops::Range; let mut below = Vec::new(); @@ -181,7 +181,7 @@ fn ranges_split(ranges: &Vec, threshold: u64) -> (Vec, V (below, above) } -fn negate_ranges(ranges: &Vec) -> Vec { +fn negate_ranges(ranges: &[BlockRange]) -> Vec { use std::ops::Range; let mut result = Vec::new(); @@ -206,8 +206,8 @@ fn range_len(r: &BlockRange) -> u64 { r.end - r.start } -fn ranges_total(rs: &Vec) -> u64 { - rs.into_iter().fold(0, |sum, r| sum + range_len(r)) +fn ranges_total(rs: &[BlockRange]) -> u64 { + rs.iter().fold(0, |sum, r| sum + range_len(r)) } // Assumes there is enough space to remap. @@ -261,8 +261,8 @@ fn overlaps(r1: &BlockRange, r2: &BlockRange, index: usize) -> Option { } // Finds the index of the first entry that overlaps r. -fn find_first(r: &BlockRange, remaps: &Vec<(BlockRange, BlockRange)>) -> Option { - if remaps.len() == 0 { +fn find_first(r: &BlockRange, remaps: &[(BlockRange, BlockRange)]) -> Option { + if remaps.is_empty() { return None; } @@ -292,7 +292,7 @@ fn is_empty(r: &BlockRange) -> bool { } // remaps must be in sorted order by from.start. -fn remap(r: &BlockRange, remaps: &Vec<(BlockRange, BlockRange)>) -> Vec { +fn remap(r: &BlockRange, remaps: &[(BlockRange, BlockRange)]) -> Vec { let mut remap = Vec::new(); let mut r = r.start..r.end; @@ -408,7 +408,7 @@ mod tests { } } -fn build_copy_regions(remaps: &Vec<(BlockRange, BlockRange)>, block_size: u64) -> Vec { +fn build_copy_regions(remaps: &[(BlockRange, BlockRange)], block_size: u64) -> Vec { let mut rs = Vec::new(); for (from, to) in remaps { diff --git a/src/shrink/xml.rs b/src/shrink/xml.rs index 46c3f3b..5c4999d 100644 --- a/src/shrink/xml.rs +++ b/src/shrink/xml.rs @@ -66,7 +66,7 @@ fn mk_attr_<'a, T: Display>(n: T) -> Cow<'a, [u8]> { Cow::Owned(str.into_bytes()) } -fn mk_attr<'a, T: Display>(key: &[u8], value: T) -> Attribute { +fn mk_attr(key: &[u8], value: T) -> Attribute { Attribute { key, value: mk_attr_(value), From abf06236b1605efaea858062bfc156f8cceb4a3d Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 26 Jun 2020 08:47:40 +0100 Subject: [PATCH 10/17] [thin_shrink] Tidy up some messages --- src/shrink/toplevel.rs | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/src/shrink/toplevel.rs b/src/shrink/toplevel.rs index 2717631..aaae3ea 100644 --- a/src/shrink/toplevel.rs +++ b/src/shrink/toplevel.rs @@ -435,45 +435,27 @@ fn process_xml(input_path: &str, pass: &mut MV) -> Res pub fn shrink(input_path: &str, output_path: &str, data_path: &str, nr_blocks: u64) -> Result<()> { let mut pass1 = Pass1::new(nr_blocks); + eprint!("Reading xml..."); process_xml(input_path, &mut pass1)?; + eprintln!("done"); eprintln!("{} blocks need moving", pass1.nr_high_blocks); - let mut free_blocks = 0u64; - for i in 0..pass1.allocated_blocks.len() { - if !pass1.allocated_blocks[i] { - free_blocks += 1; - } - } - eprintln!("{} free blocks below new end.", free_blocks); - let ranges = bits_to_ranges(&pass1.allocated_blocks); - eprintln!("{} allocated ranges:", ranges.len()); - - eprintln!("{:?}", &ranges); - let (below, above) = ranges_split(&ranges, nr_blocks); - eprintln!("ranges split at {}: ({:?}, {:?})", nr_blocks, below, above); let free = negate_ranges(&below); - eprintln!("free {:?}.", free); - - let nr_moving = ranges_total(&above); - eprintln!("{} blocks need to be remapped.", nr_moving); - let free_blocks = ranges_total(&free); eprintln!("{} free blocks.", free_blocks); - if free_blocks < nr_moving { + if free_blocks < pass1.nr_high_blocks { + // FIXME: return error panic!("Insufficient space"); } let remaps = build_remaps(above, free); - eprintln!("remappings {:?}.", remaps); let regions = build_copy_regions(&remaps, pass1.block_size.unwrap() as u64); - //eprint!("Copying data..."); copier::copy(data_path, ®ions)?; - //eprintln!("done."); let output = OpenOptions::new() .read(false) From 07da5704d50b1be01c09f7c3782ace0f399157f8 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 26 Jun 2020 08:57:26 +0100 Subject: [PATCH 11/17] [thin_shrink] add --no-copy --- src/bin/thin_shrink.rs | 11 ++++++++++- src/shrink/toplevel.rs | 10 +++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/bin/thin_shrink.rs b/src/bin/thin_shrink.rs index 26b70a4..54d053f 100644 --- a/src/bin/thin_shrink.rs +++ b/src/bin/thin_shrink.rs @@ -33,6 +33,14 @@ fn main() { .value_name("DATA") .takes_value(true), ) + .arg( + Arg::with_name("NOCOPY") + .help("Skip the copying of data, useful for benchmarking") + .required(false) + .long("no-copy") + .value_name("NOCOPY") + .takes_value(false), + ) .arg( Arg::with_name("SIZE") .help("Specify new size for the pool (in data blocks)") @@ -49,13 +57,14 @@ fn main() { let output_file = matches.value_of("OUTPUT").unwrap(); let size = matches.value_of("SIZE").unwrap().parse::().unwrap(); let data_file = matches.value_of("DATA").unwrap(); + let do_copy = !matches.is_present("NOCOPY"); if !file_utils::file_exists(input_file) { eprintln!("Couldn't find input file '{}'.", &input_file); exit(1); } - if let Err(reason) = thinp::shrink::toplevel::shrink(&input_file, &output_file, &data_file, size) { + if let Err(reason) = thinp::shrink::toplevel::shrink(&input_file, &output_file, &data_file, size, do_copy) { println!("Application error: {}\n", reason); exit(1); } diff --git a/src/shrink/toplevel.rs b/src/shrink/toplevel.rs index aaae3ea..0612abf 100644 --- a/src/shrink/toplevel.rs +++ b/src/shrink/toplevel.rs @@ -433,7 +433,7 @@ fn process_xml(input_path: &str, pass: &mut MV) -> Res Ok(()) } -pub fn shrink(input_path: &str, output_path: &str, data_path: &str, nr_blocks: u64) -> Result<()> { +pub fn shrink(input_path: &str, output_path: &str, data_path: &str, nr_blocks: u64, do_copy: bool) -> Result<()> { let mut pass1 = Pass1::new(nr_blocks); eprint!("Reading xml..."); process_xml(input_path, &mut pass1)?; @@ -454,8 +454,12 @@ pub fn shrink(input_path: &str, output_path: &str, data_path: &str, nr_blocks: u let remaps = build_remaps(above, free); - let regions = build_copy_regions(&remaps, pass1.block_size.unwrap() as u64); - copier::copy(data_path, ®ions)?; + if do_copy { + let regions = build_copy_regions(&remaps, pass1.block_size.unwrap() as u64); + copier::copy(data_path, ®ions)?; + } else { + eprintln!("skipping copy"); + } let output = OpenOptions::new() .read(false) From d03dac8f75288abf2ddecc551d18301212143130 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 26 Jun 2020 16:44:47 +0100 Subject: [PATCH 12/17] [thin_shrink] write test harness --- Cargo.lock | 30 +++++ Cargo.toml | 1 + src/file_utils.rs | 45 ++++--- src/lib.rs | 1 + src/shrink/mod.rs | 1 - src/shrink/toplevel.rs | 48 ++++--- src/thin/mod.rs | 1 + src/{shrink => thin}/xml.rs | 162 +++++++++++++++------- tests/thin_shrink.rs | 259 ++++++++++++++++++++++++++++++++++++ 9 files changed, 458 insertions(+), 90 deletions(-) create mode 100644 src/thin/mod.rs rename src/{shrink => thin}/xml.rs (73%) create mode 100644 tests/thin_shrink.rs diff --git a/Cargo.lock b/Cargo.lock index 23083a1..d42d598 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -324,6 +324,11 @@ dependencies = [ "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "redox_syscall" +version = "0.1.56" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "regex" version = "1.3.9" @@ -340,6 +345,14 @@ name = "regex-syntax" version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "rustc_version" version = "0.2.3" @@ -386,6 +399,19 @@ dependencies = [ "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "tempfile" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.71 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)", + "remove_dir_all 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "textwrap" version = "0.11.0" @@ -414,6 +440,7 @@ dependencies = [ "quickcheck 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", "quickcheck_macros 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", + "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -515,8 +542,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum rand_chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" "checksum rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" "checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +"checksum redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)" = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" "checksum regex 1.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6" "checksum regex-syntax 0.6.18 (registry+https://github.com/rust-lang/crates.io-index)" = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8" +"checksum remove_dir_all 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" "checksum ryu 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" "checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" @@ -524,6 +553,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum static_assertions 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3" "checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" "checksum syn 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)" = "93a56fabc59dce20fe48b6c832cc249c713e7ed88fa28b0ee0a3bfcaae5fe4e2" +"checksum tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" "checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" "checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" "checksum unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479" diff --git a/Cargo.toml b/Cargo.toml index dce61a9..892ea4c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ nix = "0.17" nom = "5.1" num_cpus = "1.13" rand = "0.7" +tempfile = "3.1" num-traits = "0.2" num-derive = "0.3" diff --git a/src/file_utils.rs b/src/file_utils.rs index 3f5dce8..064e59a 100644 --- a/src/file_utils.rs +++ b/src/file_utils.rs @@ -1,8 +1,10 @@ use nix::sys::stat; use nix::sys::stat::{FileStat, SFlag}; -use std::io; use std::fs::File; +use std::io; +use std::io::{Seek, Write}; use std::os::unix::io::AsRawFd; +use tempfile::tempfile; //--------------------------------------- @@ -11,15 +13,13 @@ fn check_bits(mode: u32, flag: &SFlag) -> bool { } pub fn is_file_or_blk(info: FileStat) -> bool { - check_bits(info.st_mode, &stat::SFlag::S_IFBLK) || - check_bits(info.st_mode, &stat::SFlag::S_IFREG) + check_bits(info.st_mode, &stat::SFlag::S_IFBLK) + || check_bits(info.st_mode, &stat::SFlag::S_IFREG) } pub fn file_exists(path: &str) -> bool { match stat::stat(path) { - Ok(info) => { - is_file_or_blk(info) - } + Ok(info) => is_file_or_blk(info), _ => { // FIXME: assuming all errors indicate the file doesn't // exist. @@ -40,14 +40,14 @@ pub fn fail(msg: &str) -> io::Result { } fn get_device_size(path: &str) -> io::Result { - let file = File::open(path)?; + let file = File::open(path)?; let fd = file.as_raw_fd(); let mut cap = 0u64; unsafe { - match ioctl_blkgetsize64(fd, &mut cap) { - Ok(_) => {Ok(cap)} - _ => {fail("BLKGETSIZE64 ioctl failed")} - } + match ioctl_blkgetsize64(fd, &mut cap) { + Ok(_) => Ok(cap), + _ => fail("BLKGETSIZE64 ioctl failed"), + } } } @@ -60,12 +60,25 @@ pub fn file_size(path: &str) -> io::Result { get_device_size(path) } else { fail("not a regular file or block device") - } + } } - _ => { - fail("stat failed") - } - } + _ => fail("stat failed"), + } +} + +//--------------------------------------- + +pub fn temp_file_sized(nr_bytes: u64) -> io::Result { + let mut file = tempfile()?; + + let zeroes: Vec = vec![0; 1]; + + if nr_bytes > 0 { + file.seek(io::SeekFrom::Start(nr_bytes - 1))?; + file.write_all(&zeroes)?; + } + + Ok(file) } //--------------------------------------- diff --git a/src/lib.rs b/src/lib.rs index 412f468..714c81c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,4 +20,5 @@ pub mod check; pub mod file_utils; pub mod pack; pub mod shrink; +pub mod thin; pub mod version; diff --git a/src/shrink/mod.rs b/src/shrink/mod.rs index 56f9dd5..235bf23 100644 --- a/src/shrink/mod.rs +++ b/src/shrink/mod.rs @@ -1,4 +1,3 @@ pub mod toplevel; mod copier; -mod xml; diff --git a/src/shrink/toplevel.rs b/src/shrink/toplevel.rs index 0612abf..500a5ef 100644 --- a/src/shrink/toplevel.rs +++ b/src/shrink/toplevel.rs @@ -5,7 +5,7 @@ use std::io::Write; use std::os::unix::fs::OpenOptionsExt; use crate::shrink::copier::{self, Region}; -use crate::shrink::xml; +use crate::thin::xml::{self, Visit}; //--------------------------------------- @@ -34,36 +34,36 @@ impl Pass1 { } impl xml::MetadataVisitor for Pass1 { - fn superblock_b(&mut self, sb: &xml::Superblock) -> Result<()> { + fn superblock_b(&mut self, sb: &xml::Superblock) -> Result { self.allocated_blocks.grow(sb.nr_data_blocks as usize); self.block_size = Some(sb.data_block_size as u64); - Ok(()) + Ok(Visit::Continue) } - fn superblock_e(&mut self) -> Result<()> { - Ok(()) + fn superblock_e(&mut self) -> Result { + Ok(Visit::Continue) } - fn device_b(&mut self, _d: &xml::Device) -> Result<()> { - Ok(()) + fn device_b(&mut self, _d: &xml::Device) -> Result { + Ok(Visit::Continue) } - fn device_e(&mut self) -> Result<()> { - Ok(()) + fn device_e(&mut self) -> Result { + Ok(Visit::Continue) } - fn map(&mut self, m: &xml::Map) -> Result<()> { + fn map(&mut self, m: &xml::Map) -> Result { for i in m.data_begin..(m.data_begin + m.len) { if i > self.nr_blocks { self.nr_high_blocks += 1; } self.allocated_blocks.insert(i as usize); } - Ok(()) + Ok(Visit::Continue) } - fn eof(&mut self) -> Result<()> { - Ok(()) + fn eof(&mut self) -> Result { + Ok(Visit::Continue) } } @@ -87,23 +87,23 @@ impl Pass2 { } impl xml::MetadataVisitor for Pass2 { - fn superblock_b(&mut self, sb: &xml::Superblock) -> Result<()> { + fn superblock_b(&mut self, sb: &xml::Superblock) -> Result { self.writer.superblock_b(sb) } - fn superblock_e(&mut self) -> Result<()> { + fn superblock_e(&mut self) -> Result { self.writer.superblock_e() } - fn device_b(&mut self, d: &xml::Device) -> Result<()> { + fn device_b(&mut self, d: &xml::Device) -> Result { self.writer.device_b(d) } - fn device_e(&mut self) -> Result<()> { + fn device_e(&mut self) -> Result { self.writer.device_e() } - fn map(&mut self, m: &xml::Map) -> Result<()> { + fn map(&mut self, m: &xml::Map) -> Result { if m.data_begin + m.len < self.nr_blocks { // no remapping needed. self.writer.map(m)?; @@ -123,10 +123,10 @@ impl xml::MetadataVisitor for Pass2 { } } - Ok(()) + Ok(Visit::Continue) } - fn eof(&mut self) -> Result<()> { + fn eof(&mut self) -> Result { self.writer.eof() } } @@ -181,7 +181,7 @@ fn ranges_split(ranges: &[BlockRange], threshold: u64) -> (Vec, Vec< (below, above) } -fn negate_ranges(ranges: &[BlockRange]) -> Vec { +fn negate_ranges(ranges: &[BlockRange], upper_limit: u64) -> Vec { use std::ops::Range; let mut result = Vec::new(); @@ -199,6 +199,10 @@ fn negate_ranges(ranges: &[BlockRange]) -> Vec { } } + if cursor < upper_limit { + result.push(cursor..upper_limit); + } + result } @@ -443,7 +447,7 @@ pub fn shrink(input_path: &str, output_path: &str, data_path: &str, nr_blocks: u let ranges = bits_to_ranges(&pass1.allocated_blocks); let (below, above) = ranges_split(&ranges, nr_blocks); - let free = negate_ranges(&below); + let free = negate_ranges(&below, nr_blocks); let free_blocks = ranges_total(&free); eprintln!("{} free blocks.", free_blocks); diff --git a/src/thin/mod.rs b/src/thin/mod.rs new file mode 100644 index 0000000..2910ec6 --- /dev/null +++ b/src/thin/mod.rs @@ -0,0 +1 @@ +pub mod xml; diff --git a/src/shrink/xml.rs b/src/thin/xml.rs similarity index 73% rename from src/shrink/xml.rs rename to src/thin/xml.rs index 5c4999d..7da0fe2 100644 --- a/src/shrink/xml.rs +++ b/src/thin/xml.rs @@ -1,11 +1,5 @@ use anyhow::Result; -use std::{ - borrow::{Cow}, - fmt::Display, - io::prelude::*, - io::BufReader, - io::Write, -}; +use std::{borrow::Cow, fmt::Display, io::prelude::*, io::BufReader, io::Write}; use quick_xml::events::attributes::Attribute; use quick_xml::events::{BytesEnd, BytesStart, Event}; @@ -13,6 +7,7 @@ use quick_xml::{Reader, Writer}; //--------------------------------------- +#[derive(Clone)] pub struct Superblock { pub uuid: String, pub time: u64, @@ -24,6 +19,7 @@ pub struct Superblock { pub metadata_snap: Option, } +#[derive(Clone)] pub struct Device { pub dev_id: u32, pub mapped_blocks: u64, @@ -32,6 +28,7 @@ pub struct Device { pub snap_time: u64, } +#[derive(Clone)] pub struct Map { pub thin_begin: u64, pub data_begin: u64, @@ -39,16 +36,22 @@ pub struct Map { pub len: u64, } +#[derive(Clone)] +pub enum Visit { + Continue, + Stop, +} + pub trait MetadataVisitor { - fn superblock_b(&mut self, sb: &Superblock) -> Result<()>; - fn superblock_e(&mut self) -> Result<()>; + fn superblock_b(&mut self, sb: &Superblock) -> Result; + fn superblock_e(&mut self) -> Result; - fn device_b(&mut self, d: &Device) -> Result<()>; - fn device_e(&mut self) -> Result<()>; + fn device_b(&mut self, d: &Device) -> Result; + fn device_e(&mut self) -> Result; - fn map(&mut self, m: &Map) -> Result<()>; + fn map(&mut self, m: &Map) -> Result; - fn eof(&mut self) -> Result<()>; + fn eof(&mut self) -> Result; } pub struct XmlWriter { @@ -57,7 +60,9 @@ pub struct XmlWriter { impl XmlWriter { pub fn new(w: W) -> XmlWriter { - XmlWriter { w: Writer::new_with_indent(w, 0x20, 2) } + XmlWriter { + w: Writer::new_with_indent(w, 0x20, 2), + } } } @@ -76,7 +81,7 @@ fn mk_attr(key: &[u8], value: T) -> Attribute { const XML_VERSION: u32 = 2; impl MetadataVisitor for XmlWriter { - fn superblock_b(&mut self, sb: &Superblock) -> Result<()> { + fn superblock_b(&mut self, sb: &Superblock) -> Result { let tag = b"superblock"; let mut elem = BytesStart::owned(tag.to_vec(), tag.len()); elem.push_attribute(mk_attr(b"uuid", sb.uuid.clone())); @@ -96,16 +101,16 @@ impl MetadataVisitor for XmlWriter { } self.w.write_event(Event::Start(elem))?; - Ok(()) + Ok(Visit::Continue) } - fn superblock_e(&mut self) -> Result<()> { + fn superblock_e(&mut self) -> Result { self.w .write_event(Event::End(BytesEnd::borrowed(b"superblock")))?; - Ok(()) + Ok(Visit::Continue) } - fn device_b(&mut self, d: &Device) -> Result<()> { + fn device_b(&mut self, d: &Device) -> Result { let tag = b"device"; let mut elem = BytesStart::owned(tag.to_vec(), tag.len()); elem.push_attribute(mk_attr(b"dev_id", d.dev_id)); @@ -114,16 +119,16 @@ impl MetadataVisitor for XmlWriter { elem.push_attribute(mk_attr(b"creation_time", d.creation_time)); elem.push_attribute(mk_attr(b"snap_time", d.snap_time)); self.w.write_event(Event::Start(elem))?; - Ok(()) + Ok(Visit::Continue) } - fn device_e(&mut self) -> Result<()> { + fn device_e(&mut self) -> Result { self.w .write_event(Event::End(BytesEnd::borrowed(b"device")))?; - Ok(()) + Ok(Visit::Continue) } - fn map(&mut self, m: &Map) -> Result<()> { + fn map(&mut self, m: &Map) -> Result { match m.len { 1 => { let tag = b"single_mapping"; @@ -143,13 +148,13 @@ impl MetadataVisitor for XmlWriter { self.w.write_event(Event::Empty(elem))?; } } - Ok(()) + Ok(Visit::Continue) } - fn eof(&mut self) -> Result<()> { + fn eof(&mut self) -> Result { let w = self.w.inner(); w.flush()?; - Ok(()) + Ok(Visit::Continue) } } @@ -184,7 +189,7 @@ fn missing_attr(_tag: &str, _attr: &str) -> Result { fn check_attr(tag: &str, name: &str, maybe_v: Option) -> Result { match maybe_v { None => missing_attr(tag, name), - Some(v) => Ok(v) + Some(v) => Ok(v), } } @@ -278,7 +283,7 @@ fn parse_single_map(e: &BytesStart) -> Result { thin_begin: check_attr(tag, "origin_block", thin_begin)?, data_begin: check_attr(tag, "data_block", data_begin)?, time: check_attr(tag, "time", time)?, - len: 1 + len: 1, }) } @@ -309,6 +314,40 @@ fn parse_range_map(e: &BytesStart) -> Result { }) } +fn handle_event(reader: &mut Reader, buf: &mut Vec, visitor: &mut M) -> Result +where + R: Read + BufRead, + M: MetadataVisitor, +{ + match reader.read_event(buf) { + Ok(Event::Start(ref e)) => match e.name() { + b"superblock" => visitor.superblock_b(&parse_superblock(e)?), + b"device" => visitor.device_b(&parse_device(e)?), + _ => todo!(), + }, + Ok(Event::End(ref e)) => match e.name() { + b"superblock" => visitor.superblock_e(), + b"device" => visitor.device_e(), + _ => todo!(), + }, + Ok(Event::Empty(ref e)) => match e.name() { + b"single_mapping" => visitor.map(&parse_single_map(e)?), + b"range_mapping" => visitor.map(&parse_range_map(e)?), + _ => todo!(), + }, + Ok(Event::Text(_)) => Ok(Visit::Continue), + Ok(Event::Comment(_)) => Ok(Visit::Continue), + Ok(Event::Eof) => { + visitor.eof()?; + Ok(Visit::Stop) + } + Ok(_) => todo!(), + + // FIXME: don't panic! + Err(e) => panic!("error parsing xml {:?}", e), + } +} + pub fn read(input: R, visitor: &mut M) -> Result<()> where R: Read, @@ -321,29 +360,9 @@ where let mut buf = Vec::new(); loop { - match reader.read_event(&mut buf) { - Ok(Event::Start(ref e)) => match e.name() { - b"superblock" => visitor.superblock_b(&parse_superblock(e)?)?, - b"device" => visitor.device_b(&parse_device(e)?)?, - _ => todo!(), - }, - Ok(Event::End(ref e)) => match e.name() { - b"superblock" => visitor.superblock_e()?, - b"device" => visitor.device_e()?, - _ => todo!(), - }, - Ok(Event::Empty(ref e)) => match e.name() { - b"single_mapping" => visitor.map(&parse_single_map(e)?)?, - b"range_mapping" => visitor.map(&parse_range_map(e)?)?, - _ => todo!(), - }, - Ok(Event::Text(_)) => {} - Ok(Event::Comment(_)) => {} - Ok(Event::Eof) => break, - Ok(_) => todo!(), - - // FIXME: don't panic! - Err(e) => panic!("error parsing xml {:?}", e), + match handle_event(&mut reader, &mut buf, visitor)? { + Visit::Continue => {} + Visit::Stop => break, } } @@ -351,3 +370,44 @@ where } //--------------------------------------- + +struct SBVisitor { + superblock: Option, +} + +impl MetadataVisitor for SBVisitor { + fn superblock_b(&mut self, sb: &Superblock) -> Result { + self.superblock = Some(sb.clone()); + Ok(Visit::Stop) + } + + fn superblock_e(&mut self) -> Result { + Ok(Visit::Continue) + } + + fn device_b(&mut self, _d: &Device) -> Result { + Ok(Visit::Continue) + } + fn device_e(&mut self) -> Result { + Ok(Visit::Continue) + } + + fn map(&mut self, _m: &Map) -> Result { + Ok(Visit::Continue) + } + + fn eof(&mut self) -> Result { + Ok(Visit::Stop) + } +} + +pub fn read_superblock(input: R) -> Result +where + R: Read, +{ + let mut v = SBVisitor {superblock: None}; + read(input, &mut v)?; + Ok(v.superblock.unwrap()) +} + +//--------------------------------------- diff --git a/tests/thin_shrink.rs b/tests/thin_shrink.rs new file mode 100644 index 0000000..82d6c78 --- /dev/null +++ b/tests/thin_shrink.rs @@ -0,0 +1,259 @@ +use anyhow::Result; +use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; +use rand::Rng; +use std::fs::OpenOptions; +use std::io::{Cursor, Read, Seek, SeekFrom, Write}; + +use thinp::file_utils; +use thinp::thin::xml::{self, Visit}; + +//------------------------------------ + +struct ThinBlock { + thin_id: u32, + thin_block: u64, + data_block: u64, + block_size: usize, +} + +struct ThinReadRef { + pub data: Vec, +} + +struct ThinWriteRef<'a, W: Write + Seek> { + file: &'a mut W, + pub data: Vec, +} + +impl ThinBlock { + fn read_ref(&self, r: &mut R) -> Result { + let mut rr = ThinReadRef { + data: vec![0; self.block_size], + }; + r.seek(SeekFrom::Start(self.data_block * (self.block_size as u64)))?; + r.read_exact(&mut rr.data[0..])?; + Ok(rr) + } + + fn zero_ref<'a, W: Write + Seek>(&self, w: &'a mut W) -> ThinWriteRef<'a, W> { + ThinWriteRef { + file: w, + data: vec![0; self.block_size], + } + } + + fn write_ref<'a, W>(&self, w: &'a mut W) -> Result> + where + W: Read + Write + Seek, + { + let mut data = vec![0; self.block_size]; + w.seek(SeekFrom::Start(self.data_block * (self.block_size as u64)))?; + w.read_exact(&mut data[0..])?; + + let wr = ThinWriteRef { + file: w, + data: vec![0; self.block_size], + }; + + Ok(wr) + } +} + +impl<'a, W: Write + Seek> Drop for ThinWriteRef<'a, W> { + fn drop(&mut self) { + self.file.write_all(&self.data[0..]).unwrap(); + } +} + +//------------------------------------ + +trait ThinVisitor { + fn thin_block(&mut self, tb: &ThinBlock) -> Result<()>; +} + +struct ThinXmlVisitor<'a, V: ThinVisitor> { + inner: &'a mut V, + block_size: Option, + thin_id: Option, +} + +impl<'a, V: ThinVisitor> xml::MetadataVisitor for ThinXmlVisitor<'a, V> { + fn superblock_b(&mut self, sb: &xml::Superblock) -> Result { + self.block_size = Some(sb.data_block_size); + Ok(Visit::Continue) + } + + fn superblock_e(&mut self) -> Result { + Ok(Visit::Continue) + } + + fn device_b(&mut self, d: &xml::Device) -> Result { + self.thin_id = Some(d.dev_id); + Ok(Visit::Continue) + } + + fn device_e(&mut self) -> Result { + Ok(Visit::Continue) + } + + fn map(&mut self, m: &xml::Map) -> Result { + for i in 0..m.len { + let block = ThinBlock { + thin_id: self.thin_id.unwrap(), + thin_block: m.thin_begin + i, + data_block: m.data_begin + i, + block_size: self.block_size.unwrap() as usize, + }; + self.inner.thin_block(&block)?; + } + Ok(Visit::Continue) + } + + fn eof(&mut self) -> Result { + Ok(Visit::Stop) + } +} + +fn thin_visit(input: R, visitor: &mut M) -> Result<()> +where + R: Read, + M: ThinVisitor, +{ + let mut xml_visitor = ThinXmlVisitor { + inner: visitor, + block_size: None, + thin_id: None, + }; + + xml::read(input, &mut xml_visitor) +} + +//------------------------------------ + +// To test thin_shrink we'd like to stamp a known pattern across the +// provisioned areas of the thins in the pool, do the shrink, verify +// the patterns. + +// A simple linear congruence generator used to create the data to +// go into the thin blocks. +struct Generator { + x: u64, + a: u64, + c: u64, +} + +impl Generator { + fn new() -> Generator { + Generator { + x: 0, + a: 6364136223846793005, + c: 1442695040888963407, + } + } + + fn step(&mut self) { + self.x = (self.a * self.x) + self.c + } + + fn fill_buffer(&mut self, seed: u64, bytes: &mut [u8]) { + self.x = seed; + + assert!(bytes.len() % 8 == 64); + let nr_words = bytes.len() / 8; + let mut out = Cursor::new(bytes); + + for _ in 0..nr_words { + out.write_u64::(self.x).unwrap(); + self.step(); + } + } + + fn verify_buffer(&mut self, seed: u64, bytes: &[u8]) { + self.x = seed; + + assert!(bytes.len() % 8 == 64); + let nr_words = bytes.len() / 8; + let mut input = Cursor::new(bytes); + + for _ in 0..nr_words { + let w = input.read_u64::().unwrap(); + assert_eq!(w, self.x); + self.step(); + } + } +} + +//------------------------------------ + +struct Stamper<'a, W: Write + Seek> { + data_file: &'a mut W, + seed: u64, +} + +impl<'a, W: Write + Seek> Stamper<'a, W> { + fn new(w: &'a mut W, seed: u64) -> Stamper<'a, W> { + Stamper { data_file: w, seed } + } +} + +impl<'a, W: Write + Seek> ThinVisitor for Stamper<'a, W> { + fn thin_block(&mut self, b: &ThinBlock) -> Result<()> { + let mut wr = b.zero_ref(self.data_file); + let mut gen = Generator::new(); + gen.fill_buffer( + self.seed ^ (b.thin_id as u64) ^ b.thin_block, + &mut wr.data[0..], + ); + Ok(()) + } +} + +//------------------------------------ + +struct Verifier<'a, R: Read + Seek> { + data_file: &'a mut R, + seed: u64, +} + +impl<'a, R: Read + Seek> Verifier<'a, R> { + fn new(r: &'a mut R, seed: u64) -> Verifier<'a, R> { + Verifier { data_file: r, seed } + } +} + +impl<'a, R: Read + Seek> ThinVisitor for Verifier<'a, R> { + fn thin_block(&mut self, b: &ThinBlock) -> Result<()> { + let rr = b.read_ref(self.data_file)?; + let mut gen = Generator::new(); + gen.verify_buffer(self.seed ^ (b.thin_id as u64) ^ b.thin_block, &rr.data[0..]); + Ok(()) + } +} + +//------------------------------------ + +fn create_data_file(xml_path: &str) -> Result { + let input = OpenOptions::new().read(true).write(false).open(xml_path)?; + + let sb = xml::read_superblock(input)?; + let nr_blocks = sb.nr_data_blocks as u64; + let block_size = sb.data_block_size as u64 * 512; + + let file = file_utils::temp_file_sized(nr_blocks * block_size)?; + Ok(file) +} + +fn main(xml_path: &str) -> Result<()> { + let mut data_file = create_data_file(xml_path)?; + let mut xml_in = OpenOptions::new().read(true).write(false).open(xml_path)?; + + let mut rng = rand::thread_rng(); + let seed = rng.gen::(); + + let mut stamper = Stamper::new(&mut data_file, seed); + thin_visit(&mut xml_in, &mut stamper)?; + + let mut verifier = Verifier::new(&mut data_file, seed); + thin_visit(&mut xml_in, &mut verifier)?; + Ok(()) +} From 3618da3a124747a870acd25f1151055eb086eb77 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 29 Jun 2020 10:49:40 +0100 Subject: [PATCH 13/17] [thin_shrink] Get test framework built This verifies data has been moved to the correct place. --- src/bin/thin_metadata_pack.rs | 11 ++-- src/bin/thin_metadata_unpack.rs | 39 +++++++----- src/bin/thin_shrink.rs | 13 ++-- src/file_utils.rs | 34 +++++++--- src/pack/toplevel.rs | 7 ++- src/shrink/copier.rs | 3 +- src/shrink/toplevel.rs | 11 +++- tests/thin_shrink.rs | 106 +++++++++++++++++++++++++++++--- 8 files changed, 172 insertions(+), 52 deletions(-) diff --git a/src/bin/thin_metadata_pack.rs b/src/bin/thin_metadata_pack.rs index ff3afa7..d4b731f 100644 --- a/src/bin/thin_metadata_pack.rs +++ b/src/bin/thin_metadata_pack.rs @@ -2,6 +2,7 @@ extern crate clap; extern crate thinp; use clap::{App, Arg}; +use std::path::Path; use std::process::exit; use thinp::file_utils; @@ -23,14 +24,14 @@ fn main() { .takes_value(true)); let matches = parser.get_matches(); - let input_file = matches.value_of("INPUT").unwrap(); - let output_file = matches.value_of("OUTPUT").unwrap(); + let input_file = Path::new(matches.value_of("INPUT").unwrap()); + let output_file = Path::new(matches.value_of("OUTPUT").unwrap()); - if !file_utils::file_exists(input_file) { - eprintln!("Couldn't find input file '{}'.", &input_file); + if !file_utils::file_exists(&input_file) { + eprintln!("Couldn't find input file '{}'.", &input_file.display()); exit(1); } - + if let Err(reason) = thinp::pack::toplevel::pack(&input_file, &output_file) { println!("Application error: {}\n", reason); exit(1); diff --git a/src/bin/thin_metadata_unpack.rs b/src/bin/thin_metadata_unpack.rs index a87dc8a..ca6403f 100644 --- a/src/bin/thin_metadata_unpack.rs +++ b/src/bin/thin_metadata_unpack.rs @@ -2,6 +2,7 @@ extern crate clap; extern crate thinp; use clap::{App, Arg}; +use std::path::Path; use std::process; use thinp::file_utils; @@ -9,30 +10,34 @@ use std::process::exit; fn main() { let parser = App::new("thin_metadata_unpack") - .version(thinp::version::TOOLS_VERSION) + .version(thinp::version::TOOLS_VERSION) .about("Unpack a compressed file of thin metadata.") - .arg(Arg::with_name("INPUT") - .help("Specify thinp metadata binary device/file") - .required(true) - .short("i") - .value_name("DEV") - .takes_value(true)) - .arg(Arg::with_name("OUTPUT") - .help("Specify packed output file") - .required(true) - .short("o") - .value_name("FILE") - .takes_value(true)); + .arg( + Arg::with_name("INPUT") + .help("Specify thinp metadata binary device/file") + .required(true) + .short("i") + .value_name("DEV") + .takes_value(true), + ) + .arg( + Arg::with_name("OUTPUT") + .help("Specify packed output file") + .required(true) + .short("o") + .value_name("FILE") + .takes_value(true), + ); let matches = parser.get_matches(); - let input_file = matches.value_of("INPUT").unwrap(); - let output_file = matches.value_of("OUTPUT").unwrap(); + let input_file = Path::new(matches.value_of("INPUT").unwrap()); + let output_file = Path::new(matches.value_of("OUTPUT").unwrap()); if !file_utils::file_exists(input_file) { - eprintln!("Couldn't find input file '{}'.", &input_file); + eprintln!("Couldn't find input file '{}'.", &input_file.display()); exit(1); } - + if let Err(reason) = thinp::pack::toplevel::unpack(&input_file, &output_file) { println!("Application error: {}", reason); process::exit(1); diff --git a/src/bin/thin_shrink.rs b/src/bin/thin_shrink.rs index 54d053f..2e4e270 100644 --- a/src/bin/thin_shrink.rs +++ b/src/bin/thin_shrink.rs @@ -2,6 +2,7 @@ extern crate clap; extern crate thinp; use clap::{App, Arg}; +use std::path::Path; use std::process::exit; use thinp::file_utils; @@ -53,18 +54,20 @@ fn main() { let matches = parser.get_matches(); // FIXME: check these look like xml - let input_file = matches.value_of("INPUT").unwrap(); - let output_file = matches.value_of("OUTPUT").unwrap(); + let input_file = Path::new(matches.value_of("INPUT").unwrap()); + let output_file = Path::new(matches.value_of("OUTPUT").unwrap()); let size = matches.value_of("SIZE").unwrap().parse::().unwrap(); - let data_file = matches.value_of("DATA").unwrap(); + let data_file = Path::new(matches.value_of("DATA").unwrap()); let do_copy = !matches.is_present("NOCOPY"); if !file_utils::file_exists(input_file) { - eprintln!("Couldn't find input file '{}'.", &input_file); + eprintln!("Couldn't find input file '{}'.", input_file.display()); exit(1); } - if let Err(reason) = thinp::shrink::toplevel::shrink(&input_file, &output_file, &data_file, size, do_copy) { + if let Err(reason) = + thinp::shrink::toplevel::shrink(&input_file, &output_file, &data_file, size, do_copy) + { println!("Application error: {}\n", reason); exit(1); } diff --git a/src/file_utils.rs b/src/file_utils.rs index 064e59a..85f0714 100644 --- a/src/file_utils.rs +++ b/src/file_utils.rs @@ -1,9 +1,10 @@ use nix::sys::stat; use nix::sys::stat::{FileStat, SFlag}; -use std::fs::File; +use std::fs::{File, OpenOptions}; use std::io; use std::io::{Seek, Write}; use std::os::unix::io::AsRawFd; +use std::path::Path; use tempfile::tempfile; //--------------------------------------- @@ -17,7 +18,7 @@ pub fn is_file_or_blk(info: FileStat) -> bool { || check_bits(info.st_mode, &stat::SFlag::S_IFREG) } -pub fn file_exists(path: &str) -> bool { +pub fn file_exists(path: &Path) -> bool { match stat::stat(path) { Ok(info) => is_file_or_blk(info), _ => { @@ -39,7 +40,7 @@ pub fn fail(msg: &str) -> io::Result { Err(e) } -fn get_device_size(path: &str) -> io::Result { +fn get_device_size(path: &Path) -> io::Result { let file = File::open(path)?; let fd = file.as_raw_fd(); let mut cap = 0u64; @@ -51,7 +52,7 @@ fn get_device_size(path: &str) -> io::Result { } } -pub fn file_size(path: &str) -> io::Result { +pub fn file_size(path: &Path) -> io::Result { match stat::stat(path) { Ok(info) => { if check_bits(info.st_mode, &SFlag::S_IFREG) { @@ -68,16 +69,31 @@ pub fn file_size(path: &str) -> io::Result { //--------------------------------------- -pub fn temp_file_sized(nr_bytes: u64) -> io::Result { - let mut file = tempfile()?; - +fn set_size(w: &mut W, nr_bytes: u64) -> io::Result<()> { let zeroes: Vec = vec![0; 1]; if nr_bytes > 0 { - file.seek(io::SeekFrom::Start(nr_bytes - 1))?; - file.write_all(&zeroes)?; + w.seek(io::SeekFrom::Start(nr_bytes - 1))?; + w.write_all(&zeroes)?; } + Ok(()) +} + +pub fn temp_file_sized(nr_bytes: u64) -> io::Result { + let mut file = tempfile()?; + set_size(&mut file, nr_bytes)?; + Ok(file) +} + +pub fn create_sized_file(path: &Path, nr_bytes: u64) -> io::Result { + let mut file = OpenOptions::new() + .read(false) + .write(true) + .create(true) + .truncate(true) + .open(path)?; + set_size(&mut file, nr_bytes)?; Ok(file) } diff --git a/src/pack/toplevel.rs b/src/pack/toplevel.rs index bd1757d..6179899 100644 --- a/src/pack/toplevel.rs +++ b/src/pack/toplevel.rs @@ -6,6 +6,7 @@ use std::os::unix::fs::OpenOptionsExt; use std::{ error::Error, fs::OpenOptions, + path::Path, io, io::prelude::*, io::Cursor, @@ -67,7 +68,7 @@ fn mk_chunk_vecs(nr_blocks: u64, nr_jobs: u64) -> Vec> { vs } -pub fn pack(input_file: &str, output_file: &str) -> Result<(), Box> { +pub fn pack(input_file: &Path, output_file: &Path) -> Result<(), Box> { let nr_blocks = get_nr_blocks(&input_file)?; let nr_jobs = std::cmp::max(1, std::cmp::min(num_cpus::get() as u64, nr_blocks / 128)); let chunk_vecs = mk_chunk_vecs(nr_blocks, nr_jobs); @@ -192,7 +193,7 @@ where r.read_u64::() } -fn get_nr_blocks(path: &str) -> io::Result { +fn get_nr_blocks(path: &Path) -> io::Result { let len = file_utils::file_size(path)?; Ok(len / (BLOCK_SIZE as u64)) } @@ -306,7 +307,7 @@ where Ok(()) } -pub fn unpack(input_file: &str, output_file: &str) -> Result<(), Box> { +pub fn unpack(input_file: &Path, output_file: &Path) -> Result<(), Box> { let mut input = OpenOptions::new() .read(true) .write(false) diff --git a/src/shrink/copier.rs b/src/shrink/copier.rs index ef04a02..f112e28 100644 --- a/src/shrink/copier.rs +++ b/src/shrink/copier.rs @@ -1,5 +1,6 @@ use anyhow::Result; use std::fs::OpenOptions; +use std::path::Path; use std::io::{Seek, SeekFrom, Write, Read}; //use std::os::unix::fs::OpenOptionsExt; @@ -43,7 +44,7 @@ where Ok(()) } -pub fn copy(path: &str, regions: &[Region]) -> Result<()> { +pub fn copy(path: &Path, regions: &[Region]) -> Result<()> { let mut input = OpenOptions::new() .read(true) .write(true) diff --git a/src/shrink/toplevel.rs b/src/shrink/toplevel.rs index 500a5ef..db08ee2 100644 --- a/src/shrink/toplevel.rs +++ b/src/shrink/toplevel.rs @@ -3,6 +3,7 @@ use fixedbitset::FixedBitSet; use std::fs::OpenOptions; use std::io::Write; use std::os::unix::fs::OpenOptionsExt; +use std::path::Path; use crate::shrink::copier::{self, Region}; use crate::thin::xml::{self, Visit}; @@ -426,7 +427,7 @@ fn build_copy_regions(remaps: &[(BlockRange, BlockRange)], block_size: u64) -> V rs } -fn process_xml(input_path: &str, pass: &mut MV) -> Result<()> { +fn process_xml(input_path: &Path, pass: &mut MV) -> Result<()> { let input = OpenOptions::new() .read(true) .write(false) @@ -437,7 +438,13 @@ fn process_xml(input_path: &str, pass: &mut MV) -> Res Ok(()) } -pub fn shrink(input_path: &str, output_path: &str, data_path: &str, nr_blocks: u64, do_copy: bool) -> Result<()> { +pub fn shrink( + input_path: &Path, + output_path: &Path, + data_path: &Path, + nr_blocks: u64, + do_copy: bool, +) -> Result<()> { let mut pass1 = Pass1::new(nr_blocks); eprint!("Reading xml..."); process_xml(input_path, &mut pass1)?; diff --git a/tests/thin_shrink.rs b/tests/thin_shrink.rs index 82d6c78..96c6296 100644 --- a/tests/thin_shrink.rs +++ b/tests/thin_shrink.rs @@ -3,6 +3,8 @@ use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use rand::Rng; use std::fs::OpenOptions; use std::io::{Cursor, Read, Seek, SeekFrom, Write}; +use std::path::{Path, PathBuf}; +use tempfile::tempdir; use thinp::file_utils; use thinp::thin::xml::{self, Visit}; @@ -22,6 +24,7 @@ struct ThinReadRef { struct ThinWriteRef<'a, W: Write + Seek> { file: &'a mut W, + block_byte: u64, pub data: Vec, } @@ -38,6 +41,7 @@ impl ThinBlock { fn zero_ref<'a, W: Write + Seek>(&self, w: &'a mut W) -> ThinWriteRef<'a, W> { ThinWriteRef { file: w, + block_byte: self.data_block * (self.block_size as u64), data: vec![0; self.block_size], } } @@ -52,6 +56,7 @@ impl ThinBlock { let wr = ThinWriteRef { file: w, + block_byte: self.data_block * (self.block_size as u64), data: vec![0; self.block_size], }; @@ -61,6 +66,10 @@ impl ThinBlock { impl<'a, W: Write + Seek> Drop for ThinWriteRef<'a, W> { fn drop(&mut self) { + // FIXME: We shouldn't panic in a drop function, so any IO + // errors will have to make their way back to the user + // another way (eg, via a flush() method). + self.file.seek(SeekFrom::Start(self.block_byte)).unwrap(); self.file.write_all(&self.data[0..]).unwrap(); } } @@ -232,28 +241,105 @@ impl<'a, R: Read + Seek> ThinVisitor for Verifier<'a, R> { //------------------------------------ -fn create_data_file(xml_path: &str) -> Result { +fn mk_path(dir: &Path, file: &str) -> PathBuf { + let mut p = PathBuf::new(); + p.push(dir); + p.push(PathBuf::from(file)); + p +} + +fn generate_xml(path: &Path, g: &mut G) -> Result<()> +where + G: XmlGenerator, +{ + let xml_out = OpenOptions::new() + .read(false) + .write(true) + .create(true) + .truncate(true) + .open(path)?; + let mut w = xml::XmlWriter::new(xml_out); + + g.generate(&mut w) +} + +fn create_data_file(data_path: &Path, xml_path: &Path) -> Result<()> { let input = OpenOptions::new().read(true).write(false).open(xml_path)?; let sb = xml::read_superblock(input)?; let nr_blocks = sb.nr_data_blocks as u64; let block_size = sb.data_block_size as u64 * 512; - let file = file_utils::temp_file_sized(nr_blocks * block_size)?; - Ok(file) + let _file = file_utils::create_sized_file(data_path, nr_blocks * block_size)?; + Ok(()) } -fn main(xml_path: &str) -> Result<()> { - let mut data_file = create_data_file(xml_path)?; - let mut xml_in = OpenOptions::new().read(true).write(false).open(xml_path)?; +fn stamp(xml_path: &Path, data_path: &Path, seed: u64) -> Result<()> { + let mut data = OpenOptions::new() + .read(false) + .write(true) + .open(&data_path)?; + let xml = OpenOptions::new().read(true).write(false).open(&xml_path)?; + + let mut stamper = Stamper::new(&mut data, seed); + thin_visit(xml, &mut stamper) +} + +fn verify(xml_path: &Path, data_path: &Path, seed: u64) -> Result<()> { + let mut data = OpenOptions::new() + .read(true) + .write(false) + .open(&data_path)?; + let xml = OpenOptions::new().read(true).write(false).open(&xml_path)?; + + let mut verifier = Verifier::new(&mut data, seed); + thin_visit(xml, &mut verifier) +} + +//------------------------------------ + +trait XmlGenerator { + fn generate(&mut self, v: &mut dyn xml::MetadataVisitor) -> Result<()>; +} + +struct EmptyPoolG {} + +impl XmlGenerator for EmptyPoolG { + fn generate(&mut self, v: &mut dyn xml::MetadataVisitor) -> Result<()> { + v.superblock_b(&xml::Superblock { + uuid: "".to_string(), + time: 0, + transaction: 0, + flags: None, + version: None, + data_block_size: 64, + nr_data_blocks: 1024, + metadata_snap: None, + })?; + v.superblock_e()?; + Ok(()) + } +} + +#[test] +fn shrink_empty_pool() -> Result<()> { + let dir = tempdir()?; + let xml_before = mk_path(dir.path(), "before.xml"); + let xml_after = mk_path(dir.path(), "after.xml"); + let data_path = mk_path(dir.path(), "bin"); + + let mut gen = EmptyPoolG {}; + generate_xml(&xml_before, &mut gen)?; + create_data_file(&data_path, &xml_before)?; let mut rng = rand::thread_rng(); let seed = rng.gen::(); - let mut stamper = Stamper::new(&mut data_file, seed); - thin_visit(&mut xml_in, &mut stamper)?; + stamp(&xml_before, &data_path, seed)?; - let mut verifier = Verifier::new(&mut data_file, seed); - thin_visit(&mut xml_in, &mut verifier)?; + let new_nr_blocks = 10; + thinp::shrink::toplevel::shrink(&xml_before, &xml_after, &data_path, new_nr_blocks, true)?; + + verify(&xml_after, &data_path, seed)?; Ok(()) } From 4d7f9c7ddcf29dd7f455ceea891b07fac57139fc Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 30 Jun 2020 08:32:04 +0100 Subject: [PATCH 14/17] [thin_shrink] add some more test cases --- src/shrink/copier.rs | 3 +- tests/thin_shrink.rs | 225 ++++++++++++++++++++++++++++++------------- 2 files changed, 161 insertions(+), 67 deletions(-) diff --git a/src/shrink/copier.rs b/src/shrink/copier.rs index f112e28..1be1f2a 100644 --- a/src/shrink/copier.rs +++ b/src/shrink/copier.rs @@ -20,7 +20,7 @@ where { let mut buf = vec![0; len]; file.seek(SeekFrom::Start(src_byte))?; - file.read_exact(&mut buf[0..])?; + file.read_exact(&mut buf)?; file.seek(SeekFrom::Start(dest_byte))?; file.write_all(&buf)?; Ok(()) @@ -55,6 +55,7 @@ pub fn copy(path: &Path, regions: &[Region]) -> Result<()> { eprintln!("copying {:?}", r); copy_region(&mut input, r)?; } + input.flush()?; Ok(()) } diff --git a/tests/thin_shrink.rs b/tests/thin_shrink.rs index 96c6296..1759890 100644 --- a/tests/thin_shrink.rs +++ b/tests/thin_shrink.rs @@ -1,4 +1,4 @@ -use anyhow::Result; +use anyhow::{anyhow, Result}; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use rand::Rng; use std::fs::OpenOptions; @@ -11,6 +11,7 @@ use thinp::thin::xml::{self, Visit}; //------------------------------------ +#[derive(Debug)] struct ThinBlock { thin_id: u32, thin_block: u64, @@ -33,35 +34,36 @@ impl ThinBlock { let mut rr = ThinReadRef { data: vec![0; self.block_size], }; - r.seek(SeekFrom::Start(self.data_block * (self.block_size as u64)))?; - r.read_exact(&mut rr.data[0..])?; + let byte = self.data_block * (self.block_size as u64) * 512; + r.seek(SeekFrom::Start(byte))?; + r.read_exact(&mut rr.data)?; Ok(rr) } fn zero_ref<'a, W: Write + Seek>(&self, w: &'a mut W) -> ThinWriteRef<'a, W> { ThinWriteRef { file: w, - block_byte: self.data_block * (self.block_size as u64), + block_byte: self.data_block * (self.block_size as u64) * 512, data: vec![0; self.block_size], } } - fn write_ref<'a, W>(&self, w: &'a mut W) -> Result> - where - W: Read + Write + Seek, - { - let mut data = vec![0; self.block_size]; - w.seek(SeekFrom::Start(self.data_block * (self.block_size as u64)))?; - w.read_exact(&mut data[0..])?; - - let wr = ThinWriteRef { - file: w, - block_byte: self.data_block * (self.block_size as u64), - data: vec![0; self.block_size], - }; - - Ok(wr) - } + //fn write_ref<'a, W>(&self, w: &'a mut W) -> Result> + //where + //W: Read + Write + Seek, + //{ + //let mut data = vec![0; self.block_size]; + //w.seek(SeekFrom::Start(self.data_block * (self.block_size as u64)))?; + //w.read_exact(&mut data[0..])?; + // + //let wr = ThinWriteRef { + //file: w, + //block_byte: self.data_block * (self.block_size as u64), + //data: vec![0; self.block_size], + //}; + // + //Ok(wr) + //} } impl<'a, W: Write + Seek> Drop for ThinWriteRef<'a, W> { @@ -70,7 +72,7 @@ impl<'a, W: Write + Seek> Drop for ThinWriteRef<'a, W> { // errors will have to make their way back to the user // another way (eg, via a flush() method). self.file.seek(SeekFrom::Start(self.block_byte)).unwrap(); - self.file.write_all(&self.data[0..]).unwrap(); + self.file.write_all(&self.data).unwrap(); } } @@ -161,34 +163,41 @@ impl Generator { } fn step(&mut self) { - self.x = (self.a * self.x) + self.c + self.x = self.a.wrapping_mul(self.x).wrapping_add(self.c) } - fn fill_buffer(&mut self, seed: u64, bytes: &mut [u8]) { + fn fill_buffer(&mut self, seed: u64, bytes: &mut [u8]) -> Result<()> { self.x = seed; - assert!(bytes.len() % 8 == 64); + assert!(bytes.len() % 8 == 0); let nr_words = bytes.len() / 8; let mut out = Cursor::new(bytes); for _ in 0..nr_words { - out.write_u64::(self.x).unwrap(); + out.write_u64::(self.x)?; self.step(); } + + Ok(()) } - fn verify_buffer(&mut self, seed: u64, bytes: &[u8]) { + fn verify_buffer(&mut self, seed: u64, bytes: &[u8]) -> Result { self.x = seed; - assert!(bytes.len() % 8 == 64); + assert!(bytes.len() % 8 == 0); let nr_words = bytes.len() / 8; let mut input = Cursor::new(bytes); for _ in 0..nr_words { - let w = input.read_u64::().unwrap(); - assert_eq!(w, self.x); + let w = input.read_u64::()?; + if w != self.x { + eprintln!("{} != {}", w, self.x); + return Ok(false); + } self.step(); } + + Ok(true) } } @@ -212,7 +221,7 @@ impl<'a, W: Write + Seek> ThinVisitor for Stamper<'a, W> { gen.fill_buffer( self.seed ^ (b.thin_id as u64) ^ b.thin_block, &mut wr.data[0..], - ); + )?; Ok(()) } } @@ -234,7 +243,9 @@ impl<'a, R: Read + Seek> ThinVisitor for Verifier<'a, R> { fn thin_block(&mut self, b: &ThinBlock) -> Result<()> { let rr = b.read_ref(self.data_file)?; let mut gen = Generator::new(); - gen.verify_buffer(self.seed ^ (b.thin_id as u64) ^ b.thin_block, &rr.data[0..]); + if !gen.verify_buffer(self.seed ^ (b.thin_id as u64) ^ b.thin_block, &rr.data[0..])? { + return Err(anyhow!("data verify failed for {:?}", b)); + } Ok(()) } } @@ -248,10 +259,7 @@ fn mk_path(dir: &Path, file: &str) -> PathBuf { p } -fn generate_xml(path: &Path, g: &mut G) -> Result<()> -where - G: XmlGenerator, -{ +fn generate_xml(path: &Path, g: &mut dyn Scenario) -> Result<()> { let xml_out = OpenOptions::new() .read(false) .write(true) @@ -260,7 +268,7 @@ where .open(path)?; let mut w = xml::XmlWriter::new(xml_out); - g.generate(&mut w) + g.generate_xml(&mut w) } fn create_data_file(data_path: &Path, xml_path: &Path) -> Result<()> { @@ -296,50 +304,135 @@ fn verify(xml_path: &Path, data_path: &Path, seed: u64) -> Result<()> { thin_visit(xml, &mut verifier) } -//------------------------------------ - -trait XmlGenerator { - fn generate(&mut self, v: &mut dyn xml::MetadataVisitor) -> Result<()>; +trait Scenario { + fn generate_xml(&mut self, v: &mut dyn xml::MetadataVisitor) -> Result<()>; + fn get_new_nr_blocks(&self) -> u64; } -struct EmptyPoolG {} - -impl XmlGenerator for EmptyPoolG { - fn generate(&mut self, v: &mut dyn xml::MetadataVisitor) -> Result<()> { - v.superblock_b(&xml::Superblock { - uuid: "".to_string(), - time: 0, - transaction: 0, - flags: None, - version: None, - data_block_size: 64, - nr_data_blocks: 1024, - metadata_snap: None, - })?; - v.superblock_e()?; - Ok(()) - } -} - -#[test] -fn shrink_empty_pool() -> Result<()> { +fn test_shrink(scenario: &mut dyn Scenario) -> Result<()> { let dir = tempdir()?; let xml_before = mk_path(dir.path(), "before.xml"); let xml_after = mk_path(dir.path(), "after.xml"); - let data_path = mk_path(dir.path(), "bin"); + let data_path = mk_path(dir.path(), "metadata.bin"); - let mut gen = EmptyPoolG {}; - generate_xml(&xml_before, &mut gen)?; + generate_xml(&xml_before, scenario)?; create_data_file(&data_path, &xml_before)?; let mut rng = rand::thread_rng(); let seed = rng.gen::(); stamp(&xml_before, &data_path, seed)?; - - let new_nr_blocks = 10; + + let new_nr_blocks = scenario.get_new_nr_blocks(); thinp::shrink::toplevel::shrink(&xml_before, &xml_after, &data_path, new_nr_blocks, true)?; verify(&xml_after, &data_path, seed)?; Ok(()) } + +//------------------------------------ + +fn common_sb(nr_blocks: u64) -> xml::Superblock { + xml::Superblock { + uuid: "".to_string(), + time: 0, + transaction: 0, + flags: None, + version: None, + data_block_size: 32, + nr_data_blocks: nr_blocks, + metadata_snap: None, + } +} + +struct EmptyPoolS {} + +impl Scenario for EmptyPoolS { + fn generate_xml(&mut self, v: &mut dyn xml::MetadataVisitor) -> Result<()> { + v.superblock_b(&common_sb(1024))?; + v.superblock_e()?; + Ok(()) + } + + fn get_new_nr_blocks(&self) -> u64 { + 512 + } +} + +#[test] +fn shrink_empty_pool() -> Result<()> { + let mut s = EmptyPoolS {}; + test_shrink(&mut s) +} + +//------------------------------------ + +struct SingleThinS { + offset: u64, + len: u64, + old_nr_data_blocks: u64, + new_nr_data_blocks: u64, +} + +impl SingleThinS { + fn new(offset: u64, len: u64, old_nr_data_blocks: u64, new_nr_data_blocks: u64) -> Self { + SingleThinS { + offset, + len, + old_nr_data_blocks, + new_nr_data_blocks, + } + } +} + +impl Scenario for SingleThinS { + fn generate_xml(&mut self, v: &mut dyn xml::MetadataVisitor) -> Result<()> { + v.superblock_b(&common_sb(self.old_nr_data_blocks))?; + v.device_b(&xml::Device { + dev_id: 0, + mapped_blocks: self.len, + transaction: 0, + creation_time: 0, + snap_time: 0, + })?; + v.map(&xml::Map { + thin_begin: 0, + data_begin: self.offset, + time: 0, + len: self.len, + })?; + v.device_e()?; + v.superblock_e()?; + Ok(()) + } + + fn get_new_nr_blocks(&self) -> u64 { + self.new_nr_data_blocks + } +} + +#[test] +fn shrink_single_no_move_1() -> Result<()> { + let mut s = SingleThinS::new(0, 1024, 2048, 1280); + test_shrink(&mut s) +} + +#[test] +fn shrink_single_no_move_2() -> Result<()> { + let mut s = SingleThinS::new(100, 1024, 2048, 1280); + test_shrink(&mut s) +} + +#[test] +fn shrink_single_no_move_3() -> Result<()> { + let mut s = SingleThinS::new(1024, 1024, 2048, 2048); + test_shrink(&mut s) +} + +#[test] +fn shrink_single_partial_move() -> Result<()> { + let mut s = SingleThinS::new(1024, 1024, 2048, 1280); + test_shrink(&mut s) +} + +//------------------------------------ From 470d0b7532c9615d3621962b76db1fc4bbf9402f Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 1 Jul 2020 07:52:53 +0100 Subject: [PATCH 15/17] [thin_shrink] more test cases --- src/shrink/toplevel.rs | 5 +- tests/thin_shrink.rs | 230 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 223 insertions(+), 12 deletions(-) diff --git a/src/shrink/toplevel.rs b/src/shrink/toplevel.rs index db08ee2..62a2052 100644 --- a/src/shrink/toplevel.rs +++ b/src/shrink/toplevel.rs @@ -1,4 +1,4 @@ -use anyhow::Result; +use anyhow::{anyhow, Result}; use fixedbitset::FixedBitSet; use std::fs::OpenOptions; use std::io::Write; @@ -459,8 +459,7 @@ pub fn shrink( eprintln!("{} free blocks.", free_blocks); if free_blocks < pass1.nr_high_blocks { - // FIXME: return error - panic!("Insufficient space"); + return Err(anyhow!("Insufficient space")); } let remaps = build_remaps(above, free); diff --git a/tests/thin_shrink.rs b/tests/thin_shrink.rs index 1759890..279a26a 100644 --- a/tests/thin_shrink.rs +++ b/tests/thin_shrink.rs @@ -1,6 +1,6 @@ use anyhow::{anyhow, Result}; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; -use rand::Rng; +use rand::prelude::*; use std::fs::OpenOptions; use std::io::{Cursor, Read, Seek, SeekFrom, Write}; use std::path::{Path, PathBuf}; @@ -32,7 +32,7 @@ struct ThinWriteRef<'a, W: Write + Seek> { impl ThinBlock { fn read_ref(&self, r: &mut R) -> Result { let mut rr = ThinReadRef { - data: vec![0; self.block_size], + data: vec![0; self.block_size * 512], }; let byte = self.data_block * (self.block_size as u64) * 512; r.seek(SeekFrom::Start(byte))?; @@ -44,7 +44,7 @@ impl ThinBlock { ThinWriteRef { file: w, block_byte: self.data_block * (self.block_size as u64) * 512, - data: vec![0; self.block_size], + data: vec![0; self.block_size * 512], } } @@ -218,10 +218,7 @@ impl<'a, W: Write + Seek> ThinVisitor for Stamper<'a, W> { fn thin_block(&mut self, b: &ThinBlock) -> Result<()> { let mut wr = b.zero_ref(self.data_file); let mut gen = Generator::new(); - gen.fill_buffer( - self.seed ^ (b.thin_id as u64) ^ b.thin_block, - &mut wr.data[0..], - )?; + gen.fill_buffer(self.seed ^ (b.thin_id as u64) ^ b.thin_block, &mut wr.data)?; Ok(()) } } @@ -243,7 +240,7 @@ impl<'a, R: Read + Seek> ThinVisitor for Verifier<'a, R> { fn thin_block(&mut self, b: &ThinBlock) -> Result<()> { let rr = b.read_ref(self.data_file)?; let mut gen = Generator::new(); - if !gen.verify_buffer(self.seed ^ (b.thin_id as u64) ^ b.thin_block, &rr.data[0..])? { + if !gen.verify_buffer(self.seed ^ (b.thin_id as u64) ^ b.thin_block, &rr.data)? { return Err(anyhow!("data verify failed for {:?}", b)); } Ok(()) @@ -322,7 +319,8 @@ fn test_shrink(scenario: &mut dyn Scenario) -> Result<()> { let seed = rng.gen::(); stamp(&xml_before, &data_path, seed)?; - + verify(&xml_before, &data_path, seed)?; + let new_nr_blocks = scenario.get_new_nr_blocks(); thinp::shrink::toplevel::shrink(&xml_before, &xml_after, &data_path, new_nr_blocks, true)?; @@ -435,4 +433,218 @@ fn shrink_single_partial_move() -> Result<()> { test_shrink(&mut s) } +#[test] +fn shrink_single_total_move() -> Result<()> { + let mut s = SingleThinS::new(2048, 1024, 1024 + 2048, 1280); + test_shrink(&mut s) +} + +#[test] +fn shrink_insufficient_space() -> Result<()> { + let mut s = SingleThinS::new(0, 2048, 3000, 1280); + match test_shrink(&mut s) { + Ok(_) => Err(anyhow!("Shrink unexpectedly succeeded")), + Err(_) => Ok(()), + } +} + +//------------------------------------ + +struct FragmentedS { + nr_thins: u32, + thin_size: u64, + old_nr_data_blocks: u64, + new_nr_data_blocks: u64, +} + +impl FragmentedS { + fn new(nr_thins: u32, thin_size: u64) -> Self { + let old_size = (nr_thins as u64) * thin_size; + FragmentedS { + nr_thins, + thin_size, + old_nr_data_blocks: (nr_thins as u64) * thin_size, + new_nr_data_blocks: old_size * 3 / 4, + } + } +} + +#[derive(Clone)] +struct ThinRun { + thin_id: u32, + thin_begin: u64, + len: u64, +} + +#[derive(Clone, Debug, Copy)] +struct MappedRun { + thin_id: u32, + thin_begin: u64, + data_begin: u64, + len: u64, +} + +fn mk_runs(thin_id: u32, total_len: u64, run_len: std::ops::Range) -> Vec { + let mut runs = Vec::new(); + let mut b = 0u64; + while b < total_len { + let len = u64::min( + total_len - b, + thread_rng().gen_range(run_len.start, run_len.end), + ); + runs.push(ThinRun { + thin_id: thin_id, + thin_begin: b, + len, + }); + b += len; + } + runs +} + +impl Scenario for FragmentedS { + fn generate_xml(&mut self, v: &mut dyn xml::MetadataVisitor) -> Result<()> { + // Allocate each thin fully, in runs between 1 and 16. + let mut runs = Vec::new(); + for thin in 0..self.nr_thins { + runs.append(&mut mk_runs(thin, self.thin_size, 1..17)); + } + + // Shuffle + runs.shuffle(&mut rand::thread_rng()); + + // map across the data + let mut maps = Vec::new(); + let mut b = 0; + for r in &runs { + maps.push(MappedRun { + thin_id: r.thin_id, + thin_begin: r.thin_begin, + data_begin: b, + len: r.len, + }); + b += r.len; + } + + // drop half the mappings, which leaves us free runs + let mut dropped = Vec::new(); + for i in 0..maps.len() { + if i % 2 == 0 { + dropped.push(maps[i].clone()); + } + } + + // Unshuffle. This isn't strictly necc. but makes the xml + // more readable. + use std::cmp::Ordering; + maps.sort_by(|&l, &r| match l.thin_id.cmp(&r.thin_id) { + Ordering::Equal => l.thin_begin.cmp(&r.thin_begin), + o => o, + }); + + // write the xml + v.superblock_b(&common_sb(self.old_nr_data_blocks))?; + for thin in 0..self.nr_thins { + v.device_b(&xml::Device { + dev_id: thin, + mapped_blocks: self.thin_size, + transaction: 0, + creation_time: 0, + snap_time: 0, + })?; + + for m in &dropped { + if m.thin_id != thin { + continue; + } + + v.map(&xml::Map { + thin_begin: m.thin_begin, + data_begin: m.data_begin, + time: 0, + len: m.len, + })?; + } + + v.device_e()?; + } + v.superblock_e()?; + Ok(()) + } + + fn get_new_nr_blocks(&self) -> u64 { + self.new_nr_data_blocks + } +} + +#[test] +fn shrink_fragmented_thin_1() -> Result<()> { + let mut s = FragmentedS::new(1, 2048); + test_shrink(&mut s) +} + +#[test] +fn shrink_fragmented_thin_2() -> Result<()> { + let mut s = FragmentedS::new(2, 2048); + test_shrink(&mut s) +} + +#[test] +fn shrink_fragmented_thin_8() -> Result<()> { + let mut s = FragmentedS::new(2, 2048); + test_shrink(&mut s) +} + +#[test] +fn shrink_fragmented_thin_64() -> Result<()> { + let mut s = FragmentedS::new(2, 2048); + test_shrink(&mut s) +} + +//------------------------------------ + +/* +// Snapshots share mappings, not neccessarily the entire ranges. +struct SnapS { + len: u64, + nr_snaps: u32, + + // Snaps will differ from the origin by this percentage + percent_change: usize, + old_nr_data_blocks: u64, + new_nr_data_blocks: u64, +} + +impl SnapS { + fn new(len: u64, nr_snaps: u32, percent_change: usize) -> Self { + let delta = len * (nr_snaps as u64) * (percent_change as u64) / 100; + let old_nr_data_blocks = len + 3 * delta; + let new_nr_data_blocks = len + 2 * delta; + + SnapS { + len, + nr_snaps, + percent_change, + old_nr_data_blocks, + new_nr_data_blocks, + } + } +} + +impl Scenario for SnapS { + fn generate_xml(&mut self, v: &mut dyn xml::MetadataVisitor) -> Result<()> { + let origin = mk_runs(0, self.len, 8..64); + } + + fn get_new_nr_blocks(&self) -> u64 { + self.new_nr_data_blocks + } +} + +#[test] +fn shrink_identical_snap() -> Result<()> { + let mut s = SnapS::new(1024, 1, 0); + test_shrink(&mut s) +} +*/ //------------------------------------ From 19138dbd8187d4a93cfa14f527d46b22c4f65e9e Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 6 Jul 2020 11:44:49 +0100 Subject: [PATCH 16/17] [thin_shrink] add unit tests for build_remaps --- src/shrink/toplevel.rs | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/shrink/toplevel.rs b/src/shrink/toplevel.rs index 62a2052..38ffe82 100644 --- a/src/shrink/toplevel.rs +++ b/src/shrink/toplevel.rs @@ -253,6 +253,47 @@ fn build_remaps(ranges: Vec, free: Vec) -> Vec<(BlockRan remap } +#[test] +fn test_build_remaps() { + struct Test { + ranges: Vec, + free: Vec, + result: Vec<(BlockRange, BlockRange)>, + } + + let tests = vec![ + Test { + ranges: vec![], + free: vec![], + result: vec![], + }, + Test { + ranges: vec![], + free: vec![0..100], + result: vec![], + }, + Test { + ranges: vec![1000..1002], + free: vec![0..100], + result: vec![(1000..1002, 0..2)], + }, + Test { + ranges: vec![1000..1002, 1100..1110], + free: vec![0..100], + result: vec![(1000..1002, 0..2), (1100..1110, 2..12)], + }, + Test { + ranges: vec![100..120], + free: vec![0..5, 20..23, 30..50], + result: vec![(100..105, 0..5), (105..108, 20..23), (108..120, 30..42)], + }, + ]; + + for t in tests { + assert_eq!(build_remaps(t.ranges, t.free), t.result); + } +} + fn overlaps(r1: &BlockRange, r2: &BlockRange, index: usize) -> Option { if r1.start >= r2.end { return None; From 32019ac388e0443bc61470cbb98659b3bc2e9bc2 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 22 Jul 2020 16:07:34 +0100 Subject: [PATCH 17/17] [thin_shrink] add a trivial snapshot test. Still need much more work on snap tests. --- tests/thin_shrink.rs | 290 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 287 insertions(+), 3 deletions(-) diff --git a/tests/thin_shrink.rs b/tests/thin_shrink.rs index 279a26a..0291340 100644 --- a/tests/thin_shrink.rs +++ b/tests/thin_shrink.rs @@ -1,8 +1,10 @@ use anyhow::{anyhow, Result}; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use rand::prelude::*; +use std::collections::VecDeque; use std::fs::OpenOptions; use std::io::{Cursor, Read, Seek, SeekFrom, Write}; +use std::ops::Range; use std::path::{Path, PathBuf}; use tempfile::tempdir; @@ -603,7 +605,282 @@ fn shrink_fragmented_thin_64() -> Result<()> { //------------------------------------ -/* +struct Allocator { + runs: VecDeque>, +} + +impl Allocator { + fn new_shuffled(total_len: u64, run_len: Range) -> Allocator { + let mut runs = Vec::new(); + + let mut b = 0u64; + while b < total_len { + let len = u64::min( + total_len - b, + thread_rng().gen_range(run_len.start, run_len.end), + ); + runs.push(b..(b + len)); + b += len; + } + + runs.shuffle(&mut thread_rng()); + let runs: VecDeque> = runs.iter().map(|r| r.clone()).collect(); + Allocator { runs } + } + + fn is_empty(&self) -> bool { + self.runs.is_empty() + } + + fn alloc(&mut self, len: u64) -> Result>> { + let mut len = len; + let mut runs = Vec::new(); + + while len > 0 { + let r = self.runs.pop_front(); + + if r.is_none() { + return Err(anyhow!("could not allocate; out of space")); + } + + let mut r = r.unwrap(); + let rlen = r.end - r.start; + if len < rlen { + runs.push(r.start..(r.start + len)); + + // We need to push something back. + self.runs.push_front((r.start + len)..r.end); + len = 0; + } else { + runs.push(r.start..r.end); + len -= rlen; + } + } + + Ok(runs) + } +} + +// Having explicitly unmapped regions makes it easier to +// apply snapshots. +#[derive(Clone)] +enum Run { + Mapped { data_begin: u64, len: u64 }, + UnMapped { len: u64 }, +} + +impl Run { + fn len(&self) -> u64 { + match self { + Run::Mapped { + data_begin: _data_begin, + len, + } => *len, + Run::UnMapped { len } => *len, + } + } + + fn split(&self, n: u64) -> (Option, Option) { + if n == 0 { + return (None, Some(self.clone())); + } else { + if self.len() <= n { + return (Some(self.clone()), None); + } else { + match self { + Run::Mapped { data_begin, len } => ( + Some(Run::Mapped { + data_begin: *data_begin, + len: n, + }), + Some(Run::Mapped { + data_begin: data_begin + n, + len: len - n, + }), + ), + Run::UnMapped { len } => ( + Some(Run::UnMapped { len: n }), + Some(Run::UnMapped { len: len - n }), + ), + } + } + } + } +} + +#[derive(Clone)] +struct ThinDev { + thin_id: u32, + dev_size: u64, + runs: Vec, +} + +impl ThinDev { + fn emit(&self, v: &mut dyn xml::MetadataVisitor) -> Result<()> { + v.device_b(&xml::Device { + dev_id: self.thin_id, + mapped_blocks: self.dev_size, + transaction: 0, + creation_time: 0, + snap_time: 0, + })?; + + let mut b = 0; + for r in &self.runs { + match r { + Run::Mapped { data_begin, len } => { + v.map(&xml::Map { + thin_begin: b, + data_begin: *data_begin, + time: 0, + len: *len, + })?; + b += len; + } + Run::UnMapped { len } => { + b += len; + } + } + } + + v.device_e()?; + Ok(()) + } +} + +#[derive(Clone)] +enum SnapRunType { + Same, + Diff, + Hole, +} + +#[derive(Clone)] +struct SnapRun(SnapRunType, u64); + +fn mk_origin(thin_id: u32, total_len: u64, allocator: &mut Allocator) -> Result { + let mut runs = Vec::new(); + let mut b = 0; + while b < total_len { + let len = u64::min(thread_rng().gen_range(16, 64), total_len - b); + match thread_rng().gen_range(0, 2) { + 0 => { + for data in allocator.alloc(len)? { + assert!(data.end >= data.start); + runs.push(Run::Mapped { + data_begin: data.start, + len: data.end - data.start, + }); + } + } + 1 => { + runs.push(Run::UnMapped { len }); + } + _ => { + return Err(anyhow!("bad value returned from rng")); + } + }; + + b += len; + } + + Ok(ThinDev { + thin_id, + dev_size: total_len, + runs, + }) +} + +fn mk_snap_mapping( + total_len: u64, + run_len: Range, + same_percent: usize, + diff_percent: usize, +) -> Vec { + let mut runs = Vec::new(); + + let mut b = 0u64; + while b < total_len { + let len = u64::min( + total_len - b, + thread_rng().gen_range(run_len.start, run_len.end), + ); + + let n = thread_rng().gen_range(0, 100); + + if n < same_percent { + runs.push(SnapRun(SnapRunType::Same, len)); + } else if n < diff_percent { + runs.push(SnapRun(SnapRunType::Diff, len)); + } else { + runs.push(SnapRun(SnapRunType::Hole, len)); + } + + b += len; + } + + runs +} + +fn split_runs(mut n: u64, runs: &Vec) -> (Vec, Vec) { + let mut before = Vec::new(); + let mut after = Vec::new(); + + for r in runs { + match r.split(n) { + (Some(lhs), None) => { + before.push(lhs); + } + (Some(lhs), Some(rhs)) => { + before.push(lhs); + after.push(rhs); + } + (None, Some(rhs)) => { + after.push(rhs); + } + (None, None) => {} + } + n -= r.len(); + } + + (before, after) +} + +fn apply_snap_runs( + origin: &Vec, + snap: &Vec, + allocator: &mut Allocator, +) -> Result> { + let mut origin = origin.clone(); + let mut runs = Vec::new(); + + for SnapRun(st, slen) in snap { + let (os, rest) = split_runs(*slen, &origin); + match st { + SnapRunType::Same => { + for o in os { + runs.push(o); + } + } + SnapRunType::Diff => { + for data in allocator.alloc(*slen)? { + runs.push(Run::Mapped { + data_begin: data.start, + len: data.end - data.start, + }); + } + } + SnapRunType::Hole => { + runs.push(Run::UnMapped { len: *slen }); + } + } + + origin = rest; + } + + Ok(runs) +} + // Snapshots share mappings, not neccessarily the entire ranges. struct SnapS { len: u64, @@ -633,7 +910,14 @@ impl SnapS { impl Scenario for SnapS { fn generate_xml(&mut self, v: &mut dyn xml::MetadataVisitor) -> Result<()> { - let origin = mk_runs(0, self.len, 8..64); + let mut allocator = Allocator::new_shuffled(self.old_nr_data_blocks, 64..512); + let origin = mk_origin(0, self.len, &mut allocator)?; + + v.superblock_b(&common_sb(self.old_nr_data_blocks))?; + origin.emit(v)?; + v.superblock_e()?; + + Ok(()) } fn get_new_nr_blocks(&self) -> u64 { @@ -646,5 +930,5 @@ fn shrink_identical_snap() -> Result<()> { let mut s = SnapS::new(1024, 1, 0); test_shrink(&mut s) } -*/ + //------------------------------------