diff --git a/Cargo.toml b/Cargo.toml index 8b4c9c3adce6834ada5f9fcad08271867a1107fa..34fa12f568bdf518942b32db0961adbcfe7f3532 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,10 +13,17 @@ path = "src/lib.rs" rand = "0.8.5" tokio = "1.24.1" jemallocator = "0.5" -jemalloc-sys = {version = "0.5", features = ["stats"]} +jemalloc-sys = { version = "0.5", features = ["stats"] } +# 自定义过程宏的crate +custom_proc_macro = { path = "custom_proc_macro" } + +# error +anyhow = { version = "1.0.65" } +anyerror = { version = "=0.1.8" } +thiserror = { version = "1.0.40" } [dev-dependencies] -criterion = {version = "0.4.0", features = ["html_reports"]} +criterion = { version = "0.4.0", features = ["html_reports"] } crc32fast = "1.3.2" skiplist = "0.4.0" diff --git a/benches/bloom_filter_bench.rs b/benches/bloom_filter_bench.rs new file mode 100644 index 0000000000000000000000000000000000000000..9dd1211903c65fd44252cdbf2b521bef862da7e9 --- /dev/null +++ b/benches/bloom_filter_bench.rs @@ -0,0 +1,49 @@ +use std::borrow::Borrow; +use criterion::{Criterion, criterion_group, criterion_main}; +use rand::Rng; +use level_db_rust::util::coding::Coding; +use level_db_rust::util::filter_policy_bloom::BloomFilterPolicy; +use level_db_rust::util::slice::Slice; + +const KEY_SIZE: usize = 10_000_000; +const BENCH_TIMES: usize = 128; + +/// BloomFilter bench Test +pub fn bloom_filter_bench(c: &mut Criterion) { + let data: Vec<&Slice> = vec![&Slice::default(); KEY_SIZE]; + for i in 0..KEY_SIZE { + data[i] = format!("{}", i).into(); + } + + let mut every_bench_times = [0; BENCH_TIMES]; + for i in 0..BENCH_TIMES { + every_bench_times[i] = rnd.gen_range(32..20480); + } + + c.bench_function("default_test", |b| { + let mut i = 0; + b.iter(|| { + let filter = BloomFilterPolicy::new(); + let bloom_filter_data = filter.create_filter_with_len(KEY_SIZE, data); + + bench_default(filter, &bloom_filter_data, every_bench_times[i % BENCH_TIMES]); + i += 1; + }); + }); +} + +fn bench_default(filter: BloomFilterPolicy, bloom_filter_data: &Slice, record_count: usize) { + for j in 0..record_count { + let key_may_match = filter.key_may_match(format!("{}", i).into(), bloom_filter_data); + assert!(key_may_match) + } + + for j in (KEY_SIZE+1)..(KEY_SIZE+100) { + let key_may_match = filter.key_may_match(format!("{}", i).into(), bloom_filter_data); + // key_may_match 可能为 true, 可能为 false + println!("key_may_match:{}.", key_may_match) + } +} + +criterion_group!(benches, skiplist_bench); +criterion_main!(benches); \ No newline at end of file diff --git a/custom_proc_macro/Cargo.toml b/custom_proc_macro/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..3a37db55f9dea91d5dad3dc235c7562bd7526887 --- /dev/null +++ b/custom_proc_macro/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "custom_proc_macro" +version = "0.1.0" +edition = "2021" + +[lib] +# 过程宏 +proc-macro = true +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +syn = { version = "2.0.15", features = ["full"] } + +[features] \ No newline at end of file diff --git a/custom_proc_macro/src/lib.rs b/custom_proc_macro/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..868ee8c68b99c4db061eae65e35d474a39f2e56e --- /dev/null +++ b/custom_proc_macro/src/lib.rs @@ -0,0 +1,64 @@ +use proc_macro::{TokenStream}; +use std::ops::Deref; +use syn::{ExprRepeat, Lit, Expr}; +use syn::__private::quote::quote; +use syn::parse; + +/// 生成数组的宏 主要用于没有实现copy语义的结构体 在无法使用[T; 32] 这种方式生成数组的情况下 +/// +/// # Arguments +/// +/// * `input`: TokenStream(ExprRepeat) 以分号(;)为分割符, 第一个参数为表达式, 第二个参数为数量. 例: T::default(); 16 +/// +/// returns: TokenStream +/// +/// # Examples +/// +/// ``` +/// struct Test; +/// let arr: [Test; 16] = arr!([Test; 16]); +/// ``` +/// # Expansion +/// ``` +/// [Test; 16]; +/// [0; 16] +/// ``` +#[proc_macro] +pub fn arr(input: TokenStream) -> TokenStream { + let repeat_expr: ExprRepeat = parse(input) + .expect("Like arr!([Test::new(); 16])"); + + let mut len = 0; + // 获取表达式中的长度信息并转为usize + if let Expr::Lit(expr_lit) = repeat_expr.len.deref() { + if let Lit::Int(int_lit) = &expr_lit.lit { + len = int_lit.base10_parse::().expect("Failed to parse integer literal"); + } + } + // 解析并拼接成数组 + let _expr = repeat_expr.expr; + // 1.生成数组的集合 + let mut _all = quote!(); + for _i in 0..len { + // 2.将每个元素向数组中追加 + if let Expr::Path(path) = _expr.as_ref() { + // 如果是element宏的情况会调用element宏并传入index + let _mac_name = &path; + _all = quote! { #_all #_mac_name!(#_i, capacity, default_length), }; + } else { + _all = quote! { #_all #_expr, }; + } + } + // 3.加上中括号 + let arr = quote! { [ #_all ] }; + return arr.into(); +} + +/// 生成调用NonNull::new_unchecked()的方法, 会自动包裹unsafe{}代码块 +#[proc_macro] +pub fn non_null_new_uncheck(input: TokenStream) -> TokenStream { + let ptr_expr: Expr = parse(input.into()) + .expect("Like non_null_new_uncheck!(ptr), ptr must a variable with a raw point"); + let output = quote! { unsafe { std::ptr::NonNull::new_unchecked(#ptr_expr) } }; + output.into() +} \ No newline at end of file diff --git a/src/db/db_format.rs b/src/db/db_format.rs index f318f2c4804de49d4fa8124c01c5c2bc20c49ccd..4b4a7c791d252768c90e29f2a4bda4591af1de86 100644 --- a/src/db/db_format.rs +++ b/src/db/db_format.rs @@ -3,9 +3,8 @@ use std::io::Write; use std::sync::Arc; use crate::db::db_format::ValueType::{KTypeDeletion, KTypeValue}; use crate::db::file_meta_data::FileMetaData; -use crate::traits::coding_trait::CodingTrait; use crate::traits::comparator_trait::Comparator; -use crate::util::coding::Coding; +use crate::util::coding::{Encoder, varint_length}; use crate::util::slice::Slice; use crate::util::unsafe_slice::UnsafeSlice; @@ -20,17 +19,17 @@ pub enum ValueType { pub struct ParsedInternalKey { user_key: Slice, sequence: u64, - value_type: ValueType + value_type: ValueType, } #[derive(Debug)] pub struct InternalKey { - rep_: Slice + rep_: Slice, } /// InternalKeyComparator pub struct InternalKeyComparator { - user_comparator_: Arc + user_comparator_: Arc, } /// 查找键 @@ -93,7 +92,6 @@ impl Default for ParsedInternalKey { } impl ParsedInternalKey { - pub fn debug_string(&self) -> Slice { Slice::default() } @@ -119,13 +117,13 @@ impl ParsedInternalKey { /// Attempt to parse an internal key from "internal_key". On success, /// stores the parsed data in "*result", and returns true. /// On error, returns false, leaves "*result" in an undefined state. - pub fn parse_internal_key(internal_key : Slice, target: ParsedInternalKey) -> bool { + pub fn parse_internal_key(internal_key: Slice, target: ParsedInternalKey) -> bool { // line 173 todo!() } /// Returns the user key portion of an internal key. - pub fn extract_user_key(internal_key : Slice) -> Slice { + pub fn extract_user_key(internal_key: Slice) -> Slice { todo!() } } @@ -154,7 +152,7 @@ impl InternalKey { ParsedInternalKey::new(user_key, sequence, value_type) .append_internal_key(result); - Self{ + Self { // rep_: result // todo result值如何赋值 rep_: Slice::default() @@ -261,16 +259,17 @@ impl LookupKey { let need = user_key_size + 13; // A conservative estimate let mut data = Vec::with_capacity(need); let buf = data.as_mut_slice(); - let klength = Coding::varint_length(user_key_size + 8); - let mut offset = 0; + let mut encoder = Encoder::with_buf(buf); // write key size - offset = Coding::encode_varint32(klength as u32, buf, offset); - // write key slice - offset += (&mut buf[offset..]).write(user_key.as_ref()).expect("write user_key"); - // write sequence number and value type - Coding::encode_fixed64( - pack_sequence_and_type(sequence, ValueType::KTypeValue), - buf, offset); + let klength = varint_length((user_key_size + 8) as u64); + // 需要保证写入的数据不会超过buf.len(), 否则会溢出 + unsafe { + encoder.uncheck_put_varint32(klength as u32); + // write key slice + encoder.uncheck_put_buf(user_key.as_ref()); + // write sequence number and value type + encoder.uncheck_put_fixed64(pack_sequence_and_type(sequence, ValueType::KTypeValue)); + } LookupKey { data: Slice::from_vec(data), @@ -327,6 +326,7 @@ pub fn pack_sequence_and_type(seq_no: usize, v_type: ValueType) -> u64 { } pub struct Config {} + impl Config { /// Maximum encoding length of a BlockHandle pub const K_NUM_LEVELS: usize = 7; diff --git a/src/db/log_reader.rs b/src/db/log_reader.rs index c7b69ddc0bddef07154941ce3dd2010bd90b3c8c..099240adcdac9bca46dd7375dc85b6df21d2162e 100644 --- a/src/db/log_reader.rs +++ b/src/db/log_reader.rs @@ -3,8 +3,7 @@ use std::io::{Read, Seek, Write}; use std::io::SeekFrom::Start; use crate::db::log_writer::{K_BLOCK_SIZE, K_FIRST_TYPE, K_FULL_TYPE, K_LAST_TYPE, K_MIDDLE_TYPE}; -use crate::traits::coding_trait::CodingTrait; -use crate::util::coding::Coding; +use crate::util::coding::Decoder; use crate::util::crc::{AsCrc, CRC}; use crate::util::Result; use crate::util::slice::Slice; @@ -101,7 +100,9 @@ impl LogReader { return Ok(()); } let crc_bytes = &self.buf[(self.buf_read_idx - 7)..(self.buf_read_idx - 3)]; - let expect = Coding::decode_fixed32(crc_bytes); + let mut decoder = Decoder::with_buf(crc_bytes); + + let expect = decoder.get_fixed32()?; let data = &self.buf[(self.buf_read_idx - 1)..(self.buf_read_idx + data_len)]; let crc = data.as_crc(); let mask = CRC::mask(crc); diff --git a/src/db/log_wr_test.rs b/src/db/log_wr_test.rs index 001b1e750bb3cb5d9803aee1011324de4eb10b5b..95f67eeedda8e17e395b1cb7065bce8d7418a02c 100644 --- a/src/db/log_wr_test.rs +++ b/src/db/log_wr_test.rs @@ -1,10 +1,8 @@ - mod test { use std::fs::File; use crate::db::log_reader::LogReader; use crate::db::log_writer::LogWriter; - use crate::traits::coding_trait::CodingTrait; - use crate::util::coding::Coding; + use crate::util::coding::Decoder; use crate::util::crc::{AsCrc, ToMask}; use crate::util::slice::Slice; use crate::util::Result; @@ -13,7 +11,7 @@ mod test { fn write() -> Result<()> { let file = Box::new(File::create("../../1.bin")?); let mut writer = LogWriter::new(file); - let sample: Vec = ('0'..='9').map(|a|a as u8).collect(); + let sample: Vec = ('0'..='9').map(|a| a as u8).collect(); for i in 0..100 { let slice = generate_slice(i, &sample); writer.add_record(slice)?; @@ -25,7 +23,7 @@ mod test { fn read() -> Result<()> { let file = Box::new(File::open("../../1.bin")?); let mut reader = LogReader::new(file, true, 0); - let sample: Vec = ('0'..='9').map(|a|a as u8).collect(); + let sample: Vec = ('0'..='9').map(|a| a as u8).collect(); for i in 0..100 { let slice = reader.read_next().expect("not error").expect("must have record"); let expect = generate_slice(i, &sample); @@ -38,15 +36,16 @@ mod test { fn generate_slice(i: usize, sample: &Vec) -> Slice { let mut slice = Vec::with_capacity(64); for j in 0..=i { - slice.push(sample[j%10]); + slice.push(sample[j % 10]); } Slice::from_vec(slice) } #[test] fn test() { - let expect_crc_bytes = [0xD1, 0xB1, 0x09, 0x9A]; - let expect_crc = Coding::decode_fixed32(expect_crc_bytes.as_ref()); + let expect_crc_bytes: [u8; 4] = [0xD1, 0xB1, 0x09, 0x9A]; + let mut decoder = Decoder::with_buf(&expect_crc_bytes); + let expect_crc = unsafe { decoder.uncheck_get_fixed32() }; let raw_bytes = [0x01_u8, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39]; let crc = raw_bytes.as_crc().to_mask(); @@ -55,7 +54,5 @@ mod test { println!("expect_crc: {}, crc: {}, crc1: {}", expect_crc, crc, crc1); assert_eq!(expect_crc, crc); assert_eq!(expect_crc, crc1); - } - } \ No newline at end of file diff --git a/src/db/log_writer.rs b/src/db/log_writer.rs index 42406db3f609f86e330a8e039f8f53ee9541a3c3..485ce95374567f7a0cac9138b293d68f2b833f59 100644 --- a/src/db/log_writer.rs +++ b/src/db/log_writer.rs @@ -1,6 +1,5 @@ use std::io::Write; -use crate::traits::coding_trait::CodingTrait; -use crate::util::coding::Coding; +use crate::util::coding::Encoder; use crate::util::crc::{AsCrc, CRC}; use crate::util::slice::Slice; use crate::util::Result; @@ -73,13 +72,13 @@ impl LogWriter { } fn emit_physical_record(&mut self, record_type: u8, data: &[u8]) -> Result<()> { + let mut crc = CRC::extend(self.type_crc[record_type as usize], data); + crc = CRC::mask(crc); let mut header = [0_u8; K_HEADER_SIZE]; + Encoder::with_buf(&mut header).put_varint32(crc); header[4] = (data.len() & 0xff) as u8; header[5] = (data.len() >> 8) as u8; header[6] = record_type; - let mut crc = CRC::extend(self.type_crc[record_type as usize], data); - crc = CRC::mask(crc); - Coding::encode_fixed32(crc, header.as_mut(), 0); self.file_writer.write(header.as_ref())?; self.block_offset += K_HEADER_SIZE; if !data.is_empty() { diff --git a/src/db/mem_table.rs b/src/db/mem_table.rs index 3a521fb483ff8ce8da7905820b41f625e9dd77e8..a00340098649c4da1da8205f608971f48accc226 100644 --- a/src/db/mem_table.rs +++ b/src/db/mem_table.rs @@ -2,13 +2,12 @@ use std::io::Write; use std::sync::{Arc, Mutex}; use crate::db::db_format::{LookupKey, ValueType}; use crate::db::skip_list::SkipList; -use crate::traits::coding_trait::CodingTrait; use crate::traits::comparator_trait::Comparator; use crate::traits::DataIterator; use crate::util::arena::ArenaRef; use crate::util::slice::Slice; use crate::util::{Arena, Result}; -use crate::util::coding::Coding; +use crate::util::coding::{Encoder, varint_length}; use crate::util::unsafe_slice::UnsafeSlice; /// 内存表 @@ -18,8 +17,7 @@ pub struct MemTable { arena: ArenaRef, } -impl MemTable { - +impl MemTable { /// 创建内存表 /// /// # Arguments @@ -39,7 +37,7 @@ impl MemTable { Self { cmp, list, - arena + arena, } } @@ -72,21 +70,24 @@ impl MemTable { let key_size = key_buf.len(); let value_size = value_buf.len(); let internal_key_size = key_size + 8; - let encoded_len = Coding::varint_length(key_size) + let encoded_len = varint_length(key_size as u64) + internal_key_size - + Coding::varint_length(value_size) + + varint_length(value_size as u64) + value_size; let mut lock = self.arena.lock()?; let buf = lock.allocate(encoded_len); - let mut offset = 0; - // write key size - offset = Coding::encode_varint32(internal_key_size as u32, buf, offset); - // write key slice - offset += (&mut buf[offset..]).write(key_buf)?; - // write seq_no and type - offset = Coding::encode_fixed64((seq_no << 8 | v_type.get_value()) as u64, buf, offset); - // write value slice - (&mut buf[offset..]).write(value_buf)?; + let mut encoder = Encoder::with_buf(buf); + // 需要保证写入的数据不会超过buf.len(), 否则溢出 + unsafe { + // write key size + encoder.uncheck_put_varint32(internal_key_size as u32); + // write key slice + encoder.uncheck_put_buf(key_buf); + // write seq_no and type + encoder.uncheck_put_fixed64((seq_no << 8 | v_type.get_value()) as u64); + // write value slice + encoder.uncheck_put_buf(value_buf); + } // let slice = Slice::from_buf(buf); self.list.insert(UnsafeSlice::new_with_arena(buf, self.arena.clone())?) } @@ -95,12 +96,9 @@ impl MemTable { pub fn get(&self, _key: &LookupKey) -> Result> { todo!() } - } mod test { #[test] - fn test() { - - } + fn test() {} } \ No newline at end of file diff --git a/src/db/version_edit.rs b/src/db/version_edit.rs index 792d00afe2b09ccd289cba0486b0ff2d98a5d4d5..ec0a502b3554b0f152a14ed448957f8be3dc9f97 100644 --- a/src/db/version_edit.rs +++ b/src/db/version_edit.rs @@ -3,8 +3,7 @@ use std::iter::Map; use crate::db::db_format::InternalKey; use crate::db::file_meta_data::FileMetaData; use crate::db::version_edit; -use crate::traits::coding_trait::CodingTrait; -use crate::util::coding::Coding; +use crate::util::coding::Encoder; use crate::util::slice::Slice; use crate::util::Result; use crate::util::status::{LevelError, Status}; @@ -37,7 +36,7 @@ pub enum Tag { kDeletedFile = 6, kNewFile = 7, // 8 was used for large value refs - kPrevLogNumber = 9 + kPrevLogNumber = 9, } impl Tag { @@ -81,7 +80,7 @@ impl VersionEdit { #[inline] pub fn new() -> Self { Self { - comparator_ : String::new(), + comparator_: String::new(), log_number_: 0, prev_log_number_: 0, next_file_number_: 0, @@ -93,7 +92,7 @@ impl VersionEdit { has_last_sequence_: false, compact_pointers_: vec![], deleted_files_: vec![], - new_files_: vec![] + new_files_: vec![], } } @@ -131,27 +130,27 @@ impl VersionEdit { // compact_pointers_ don't clear } - pub fn set_comparator_name(&mut self, name: Slice){ + pub fn set_comparator_name(&mut self, name: Slice) { self.has_comparator_ = true; self.comparator_ = name.into(); } - pub fn set_log_number(&mut self, num: u64){ + pub fn set_log_number(&mut self, num: u64) { self.has_log_number_ = true; self.log_number_ = num; } - pub fn set_prev_log_number(&mut self, num: u64){ + pub fn set_prev_log_number(&mut self, num: u64) { self.has_prev_log_number_ = true; self.prev_log_number_ = num; } - pub fn set_next_file(&mut self, num: u64){ + pub fn set_next_file(&mut self, num: u64) { self.has_next_file_number_ = true; self.next_file_number_ = num; } - pub fn set_last_sequence(&mut self, seq: u64){ + pub fn set_last_sequence(&mut self, seq: u64) { self.has_last_sequence_ = true; self.last_sequence_ = seq; } @@ -202,59 +201,59 @@ impl VersionEdit { /// ``` /// /// ``` - pub fn encode_to(&self, target: &mut Vec) { - let mut position: usize = 0; + pub fn encode_to(&self, target: &mut Vec) -> Result<()> { + let mut encoder = Encoder::with_vec(target); if self.has_comparator_ { - position += Coding::put_varint32(target, position, Tag::k_comparator.get_value() as u32); - position += Coding::put_length_prefixed_slice(target, position, self.comparator_.len()); + encoder.put_varint32(Tag::k_comparator.get_value() as u32)?; + encoder.put_varint32(self.comparator_.len() as u32)?; + // fixme 需要正确使用 put_length_prefixed_slice 将slice长度及slice内容编码到target + // encoder.put_length_prefixed_slice(self.comparator_.len())?; } - if self.has_log_number_ { - let mut offset = Coding::put_varint32(target, position, Tag::kLogNumber.get_value() as u32); - position = position + offset; - - offset = Coding::put_varint64(target, position, self.log_number_); - position = position + offset; + encoder.put_varint32(Tag::kLogNumber.get_value() as u32)?; + encoder.put_varint64(self.log_number_)?; } if self.has_prev_log_number_ { - position += Coding::put_varint32(target, position, Tag::kPrevLogNumber.get_value() as u32); - position += Coding::put_varint64(target, position, self.prev_log_number_); + encoder.put_varint32(Tag::kPrevLogNumber.get_value() as u32)?; + encoder.put_varint64(self.prev_log_number_)?; } if self.has_next_file_number_ { - position += Coding::put_varint32(target, position, Tag::kNextFileNumber.get_value() as u32); - position += Coding::put_varint64(target, position, self.next_file_number_); + encoder.put_varint32(Tag::kNextFileNumber.get_value() as u32)?; + encoder.put_varint64(self.next_file_number_)?; } if self.has_last_sequence_ { - position += Coding::put_varint32(target, position, Tag::kLastSequence.get_value() as u32); - position += Coding::put_varint64(target, position, self.last_sequence_); + encoder.put_varint32(Tag::kLastSequence.get_value() as u32)?; + encoder.put_varint64(self.last_sequence_)?; } for i in 0..self.compact_pointers_.len() { - position += Coding::put_varint32(target, position, Tag::kCompactPointer.get_value() as u32); - position += Coding::put_varint32(target, position, self.compact_pointers_[i].0); - position += Coding::put_length_prefixed_slice(target, position, - self.compact_pointers_[i].1.encode_len()); + encoder.put_varint32(Tag::kCompactPointer.get_value() as u32)?; + encoder.put_varint32(self.compact_pointers_[i].0)?; + // fixme 需要正确使用put_length_prefixed_slice + // encoder.put_length_prefixed_slice( self.compact_pointers_[i].1.encode_len())?; } for i in 0..self.deleted_files_.len() { - position += Coding::put_varint32(target, position, Tag::kDeletedFile.get_value() as u32); - position += Coding::put_varint32(target, position, self.deleted_files_[i].0); - position += Coding::put_varint64(target, position, self.deleted_files_[i].1); + encoder.put_varint32(Tag::kDeletedFile.get_value() as u32)?; + encoder.put_varint32(self.deleted_files_[i].0)?; + encoder.put_varint64(self.deleted_files_[i].1)?; } for i in 0..self.new_files_.len() { let f: &FileMetaData = &self.new_files_[i].1; - position += Coding::put_varint32(target, position, Tag::kNewFile.get_value() as u32); + encoder.put_varint32(Tag::kNewFile.get_value() as u32)?; // level - position += Coding::put_varint32(target, position, self.new_files_[i].0); - position += Coding::put_varint64(target, position, f.get_number()); - position += Coding::put_varint64(target, position, f.get_file_size()); - position += Coding::put_length_prefixed_slice(target, position, f.get_smallest().encode_len()); - position += Coding::put_length_prefixed_slice(target, position, f.get_largest().encode_len()); + encoder.put_varint32(self.new_files_[i].0)?; + encoder.put_varint64(f.get_number())?; + encoder.put_varint64(f.get_file_size())?; + // fixme 需要正确使用put_length_prefixed_slice + // encoder.put_length_prefixed_slice( f.get_smallest().encode_len())?; + // encoder.put_length_prefixed_slice( f.get_largest().encode_len())?; } + Ok(()) } /// 将 source 中的数据解码至 self VersionEdit 中 @@ -275,18 +274,18 @@ impl VersionEdit { let version_edit = VersionEdit::new(); - let msg : Option = Option::None; + let msg: Option = Option::None; // todo Coding::get_varint32 存在问题。开发暂停 - while msg.is_none() && Coding::get_varint32(source) != 0_u32 { - let tag_value = Coding::get_varint32(source); - let tag = Tag::from_value(tag_value); - - if tag.is_none() { - return LevelError::corruption_string("VersionEdit", "unknown tag"); - } - - } + // while msg.is_none() && Coding::get_varint32(source) != 0_u32 { + // let tag_value = Coding::get_varint32(source); + // let tag = Tag::from_value(tag_value); + // + // if tag.is_none() { + // return LevelError::corruption_string("VersionEdit", "unknown tag"); + // } + // + // } todo!() } @@ -295,13 +294,13 @@ impl VersionEdit { let debug_str = String::from("VersionEdit {"); let mut has_comparator_str = String::default(); - if(self.has_comparator_){ + if (self.has_comparator_) { has_comparator_str.push_str(format!("\n Comparator: {}", self.comparator_.as_str()).as_str()); } let mut has_log_number__str = String::default(); // if(self.has_log_number_){ - // todo + // todo // // let append_log_number = logging.AppendNumberTo(&r, self.log_number_); // let append_log_number = self.log_number_ + "".as_ref(); // has_log_number__str.push_str(format!("\n LogNumber: {}", append_log_number).as_str()); @@ -316,7 +315,7 @@ impl VersionEdit { /// 静态方法 impl<'a> VersionEdit { pub fn get_internal_key(input: Slice) -> Result { - let key= InternalKey::default(); + let key = InternalKey::default(); todo!() diff --git a/src/db/version_edit_test.rs b/src/db/version_edit_test.rs index 4791a04f6f1f2fd4910150ecc122eb63779a0bb6..47dc33c22de21196608df01304d45960034e1a0b 100644 --- a/src/db/version_edit_test.rs +++ b/src/db/version_edit_test.rs @@ -3,6 +3,7 @@ mod test { use crate::db::version_edit; use crate::db::version_edit::{Tag, VersionEdit}; use crate::util::slice::Slice; + use crate::util::Result; #[test] fn test_tag() { @@ -15,14 +16,16 @@ mod test { } #[test] - fn test_version_edit_encode_to() { + fn test_version_edit_encode_to() -> Result<()> { let mut target: Vec = vec![]; let version_edit = VersionEdit::new_with_log_number(6); - version_edit.encode_to(&mut target); + version_edit.encode_to(&mut target)?; println!("target: {}.", &target.len()); // todo // assert_eq!(target.len(), 2); + + Ok(()) } #[test] diff --git a/src/db/version_set.rs b/src/db/version_set.rs index 61752032b02be96ba0c8102c0bc1ecde972fae92..2435778198cfb35108c5dd40654a500b3483f2b2 100644 --- a/src/db/version_set.rs +++ b/src/db/version_set.rs @@ -5,7 +5,6 @@ use crate::db::file_meta_data::FileMetaData; use crate::db::table_cache::TableCache; use crate::db::version_edit::VersionEdit; use crate::traits::comparator_trait::Comparator; -use crate::util::cache::Cache; use crate::util::env::Env; use crate::util::options::{Options, ReadOptions}; use crate::util::slice::Slice; diff --git a/src/lib.rs b/src/lib.rs index c0dce0021b0ec5283db9efe17a9e28a72b023211..c1c31315b33dcfda861afa319b3505378cc7c1c7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,5 @@ +#![feature(core_ffi_c)] +#![feature(core_intrinsics)] extern crate core; pub mod db; diff --git a/src/table/block_builder.rs b/src/table/block_builder.rs index f24a9956152436e27ef210d98c111ad68425426b..97f20e3bf6a9f2ad9fb4c07d0b6b80b3a49e2f69 100644 --- a/src/table/block_builder.rs +++ b/src/table/block_builder.rs @@ -17,10 +17,13 @@ use crate::util::status::Status; /// BlockBuilder 的 `Arc` 别名 pub type BlockBuilderPtr = Arc; +/// 生成块 pub struct BlockBuilder { // 在 BlockBuilder 初始化时,指定的配置项 options: OptionsPtr, - index_block_options: OptionsPtr, + + // 目标缓冲区,也就是按照输出格式处理好的内存区域 + buffer: Slice, // SSTable 生成后的文件 file: Arc, diff --git a/src/table/filter_block.rs b/src/table/filter_block.rs index 00133829d48b3d8763179b3ac941527325ed275d..9f0fb38b1bf21a0ebefc31257ed30baf7f5cff5e 100644 --- a/src/table/filter_block.rs +++ b/src/table/filter_block.rs @@ -1,14 +1,17 @@ use std::io::Write; use std::sync::Arc; -use crate::traits::coding_trait::CodingTrait; +use crate::debug; use crate::traits::filter_policy_trait::{FilterPolicy, FilterPolicyPtr}; -use crate::util::coding::Coding; +use crate::util::coding::Encoder; use crate::util::slice::Slice; use crate::util::Result; -// Generate new filter every 2KB of data +// 对2K取2的对数,也就是得到11 const FILTER_BASE_LG: usize = 11; + +// 在每当data block的大小2K的时候(FILTER_BASE的值),开始创建一个filter +// Generate new filter every 2KB of data const FILTER_BASE: usize = 1 << FILTER_BASE_LG; /// @@ -19,7 +22,7 @@ pub trait FilterBlock { fn new_with_policy(policy: FilterPolicyPtr) -> Self; /// - /// 构造一个 FilterBlockBuilder + /// 构造一个 FilterBlockBuilder, 分配初始化容量大小 /// /// # Arguments /// @@ -39,8 +42,9 @@ pub trait FilterBlock { /// /// # Arguments /// - /// * `_block_offset`: 偏移量 - /// + /// * `_block_offset`: sstable 里 data block 的偏移量. + /// 注意这里传入的参数block_offset跟 filter block 内的数据无关,这个值是 sstable 里 data block 的偏移量,新的 data block 产生时就会调用。 + /// 根据这个值,计算总共需要多少个 filter,然后依次调用GenerateFilter,如果block_offset较小可能一次也不会调用,较大可能多次调用,因此,data block 和 filter data 不是一一对应的。 /// returns: () /// /// # Examples @@ -69,6 +73,9 @@ pub trait FilterBlock { /// 构造filterBlock /// + /// Filter block的结构: + /// + /// /// # Examples /// /// ``` @@ -91,15 +98,24 @@ pub trait FilterBlock { /// SSTable 文件里面的 meta block 构建器, 按内存里面指定的格式整理在内存中 pub struct FilterBlockBuilder { + // 指向一个具体的filter_policy policy: FilterPolicyPtr, - // Flattened key contents + + /* keys 记录了参数key,start 则记录了在 keys 的偏移量,两者结合可以还原出key */ + // 包含了所有展开的keys。并且这些所有的keys都是存放在一起的。(通过 AddKey 达到这个目的) keys: Vec, - // Starting index in keys_ of each key + // 记录当前这个key在keys_里面的offset start: Vec, + // Filter data computed so far + // 用result_来记录所有的输入. + // result_变量就是表示的是一个filter计算之后的输出。 + // 比如 BloomFilter 经过各种key计算之后,可能会得到一个 filter_str。这个 filter_str 就是放到result里面。 result: Vec, + // policy_->CreateFilter() argument tmp_keys: Vec, + // 里面的每个元素就是用来记录每个filter内容的offset filter_offsets: Vec, } @@ -112,7 +128,7 @@ pub struct FilterBlockReader { // Number of entries in offset array num: usize, // Encoding parameter (see kFilterBaseLg in .cc file) - base_lg: usize + base_lg: usize, } impl FilterBlock for FilterBlockBuilder { @@ -121,11 +137,11 @@ impl FilterBlock for FilterBlockBuilder { } fn new_with_policy_capacity(policy: FilterPolicyPtr, capacity: usize) -> Self { - let keys:Vec = Vec::with_capacity(capacity); - let start:Vec = Vec::with_capacity(capacity); - let result:Vec = Vec::with_capacity(capacity); - let tmp_keys:Vec = vec![]; - let filter_offsets:Vec = vec![]; + let keys: Vec = Vec::with_capacity(capacity); + let start: Vec = Vec::with_capacity(capacity); + let result: Vec = Vec::with_capacity(capacity); + let tmp_keys: Vec = vec![]; + let filter_offsets: Vec = vec![]; Self { policy, @@ -133,16 +149,20 @@ impl FilterBlock for FilterBlockBuilder { start, result, tmp_keys, - filter_offsets + filter_offsets, } } fn start_block(&mut self, block_offset: u64) { - let filter_index = block_offset / (FILTER_BASE as u64); - assert!(filter_index >= self.filter_offsets.len() as u64); + // 计算出需要创建的filter的总数目. filters_number ==> filter_index + let filters_number = block_offset / (FILTER_BASE as u64); + + let len = self.filter_offsets.len() as u64; + assert!(filters_number >= len); - while filter_index > self.filter_offsets.len() as u64 { - self.generate_filter(); + // 当已经生成的filter的数目小于需要生成的filter的总数时,那么就继续创建filter。 + while filters_number > len { + self.generate_new_filter(); } } @@ -151,36 +171,30 @@ impl FilterBlock for FilterBlockBuilder { } fn add_key(&mut self, key: &Slice) { - self.start.push(key.len()); + // start_记录key在keys的offset,因此可以还原出key + self.start.push(self.keys.len()); self.keys.write(key.as_str().as_bytes()).expect("add_key error!"); } fn finish(&mut self) -> Result { if self.start.len() != 0 { - self.generate_filter(); + self.generate_new_filter(); } // Append array of per-filter offsets let array_offset = self.result.len() as u32; - // 当前需要写入的位置。result 中可能存在数据,因此为 self.result.len() 的位置 - let mut pos: usize = self.result.len(); - - // todo 判断是否需要扩容 - let result_total_capacity = self.result.capacity(); - - let dst_append = self.result.as_mut_slice(); + // 当前需要写入的位置。result 中可能存在数据,因此为 offset ==> self.result.len() 的位置 + let dst: &mut Vec = &mut self.result; + // let mut dst_append = self.result.as_mut_slice(); + let mut encoder = Encoder::with_vec(dst); for i in 0..self.filter_offsets.len() { - // 判断当前 pos + len 4 - let filter_offset_val = self.filter_offsets[i]; - pos = Coding::put_fixed32(dst_append, pos, filter_offset_val); + encoder.put_fixed32(self.filter_offsets[i])?; } - - pos = Coding::put_fixed32(dst_append, pos, array_offset); + encoder.put_fixed32(array_offset)?; // Save encoding parameter in result - // todo 判断是否需要扩容 - Coding::put_varint64(self.result.as_mut_slice(), pos, FILTER_BASE_LG as u64); + encoder.put_varint64(FILTER_BASE_LG as u64)?; Ok(Slice::from_buf(&self.result)) } @@ -211,43 +225,58 @@ impl FilterBlock for FilterBlockBuilder { } impl FilterBlockBuilder { - fn generate_filter(&mut self) { + /// 创建新的 filter + /// 主要是更新result_和filter_offsets_ + fn generate_new_filter(&mut self) { + // 拿到key的数目 let num_keys = self.start.len(); + // 如果相比上一个filter data没有新的key, 那么只更新offsets数组就返回 if num_keys == 0 { + // 如果key数目为0,这里应该是表示要新生成一个filter. 这时应该是重新记录下offset了 // Fast path if there are no keys for this filter self.filter_offsets.push(self.result.len() as u32); return; } /* Make list of keys from flattened key structure */ - // Simplify length computation + // start_里面记录下offset. + // starts最后一个元素是keys_的总大小,此时starts元素个数=num_keys + 1. 这样 [starts[i], starts[i+1]) 就可以还原所有的key了 self.start.push(self.keys.len()); - // 如果 new_len 大于 len ,则 Vec 由差异扩展,每个额外的插槽都用 value 填充。如果 new_len 小于 len ,则 Vec 将被截断。 + // 需要多少个key + // 如果 new_len 大于 len ,则 Vec 由差异扩展,每个额外的插槽都用 value 填充。 + // 如果 new_len 小于 len ,则 Vec 将被截断。 self.tmp_keys.resize(num_keys, Slice::default()); + // 依次拿到每个key for i in 0..num_keys { - let base = &self.keys[self.start[i]..]; - let length = self.start[i+1] - self.start[i]; + // 拿到key的长度 + let length = self.start[i + 1] - self.start[i]; + // 这里拿到每个key的数据 + let base = &self.keys[self.start[i]..(self.start[i] + length)]; + // 生成相应的key,并且放到tmp_keys里面 let mut tmp_key = Vec::with_capacity(length); tmp_key.write(&base); self.tmp_keys[i] = Slice::from_vec(tmp_key); } // Generate filter for current set of keys and append to result_. + // 记录下offset self.filter_offsets.push(self.result.len() as u32); + // 利用tmp_keys生成输出,并且放到result里面。 let mut keys: Vec<&Slice> = Vec::new(); - keys.push(&self.tmp_keys[0]); - let create_filter:Slice = self.policy.create_filter_with_len(num_keys, keys); + for tmp_key in &self.tmp_keys { + keys.push(&tmp_key); + } + // let create_filter:Slice = self.policy.create_filter_with_len(num_keys, keys); + let create_filter: Slice = self.policy.create_filter(keys); + debug!("create_filter:{:?}.", create_filter); - // let result_len = self.result.len(); - // let result_total_capacity = self.result.capacity(); self.result.write(create_filter.as_ref()); - // let result_len = self.result.len(); - // let result_total_capacity = self.result.capacity(); + // 清空keys/start变量 self.tmp_keys.clear(); self.keys.clear(); self.start.clear(); @@ -255,7 +284,7 @@ impl FilterBlockBuilder { } impl FilterBlockReader { - pub fn new_with_policy(policy: FilterPolicyPtr, contents: Slice) -> Self { + pub fn new_with_policy(policy: FilterPolicyPtr, contents: &Slice) -> Self { let data = Vec::new(); let offset = Vec::new(); @@ -268,8 +297,8 @@ impl FilterBlockReader { data, offset, num: 0, - base_lg: 0 - } + base_lg: 0, + }; }; // let buf = contents.as_ref()[contents_len-5..]; @@ -282,7 +311,7 @@ impl FilterBlockReader { data, offset, num: 0, - base_lg: 0 + base_lg: 0, } } diff --git a/src/table/filter_block_test.rs b/src/table/filter_block_test.rs index 23506196fcc758d919834ae36258e569d3477d23..db7e9df8a6775eb52f47fb0f7d6cc9b61e5a6c3c 100644 --- a/src/table/filter_block_test.rs +++ b/src/table/filter_block_test.rs @@ -2,115 +2,16 @@ mod test { use std::borrow::BorrowMut; use std::sync::Arc; + use crate::debug; use crate::table::filter_block; use crate::table::filter_block::{FilterBlock, FilterBlockBuilder, FilterBlockReader}; - use crate::traits::coding_trait::CodingTrait; + use crate::table::filter_block_test_filter_policy::TestHashFilter; use crate::traits::filter_policy_trait::FilterPolicy; - use crate::util::coding::Coding; - use crate::util::filter_policy::BloomFilterPolicy; use crate::util::slice::Slice; use crate::util::hash::{Hash, ToHash}; use crate::util::Result; - pub struct TestHashFilter { - //. - } - - impl TestHashFilter { - fn new() -> Self { - Self { - - } - } - } - - impl FilterPolicy for TestHashFilter { - fn name(&self) -> String { - String::from("TestHashFilter") - } - - fn create_filter(&self, keys: Vec<&Slice>) -> Slice { - let mut n: usize = 0; - for i in 0..keys.len() { - n += keys[i].len(); - } - - self.create_filter_with_len(n, keys) - } - - fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice { - let mut n: usize = len; - - let mut dst_chars = vec![0; n]; - let dst_chars_u8 = dst_chars.borrow_mut(); - - let mut offset: usize = 0; - for i in 0..keys.len() { - let h = Hash::hash_code(keys[i].as_ref(), 1); - let of = Coding::put_fixed32(dst_chars_u8, offset, h); - offset += of; - } - - Slice::from_buf(dst_chars_u8) - } - - fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { - let h = Hash::hash_code(key.to_vec().as_slice(), 1); - - let mut pos = 0; - while pos <= bloom_filter.size() { - let buf = &bloom_filter.as_ref()[pos..]; - - if h == Coding::decode_fixed32(buf) { - return true - } - - pos += 4; - } - - false - } - } - - // #[test] - // fn test_create_filter() { - // let policy = TestHashFilter::new(); - // - // let s1 = Slice::try_from(String::from("hello")).unwrap(); - // let s2 = Slice::try_from(String::from("world")).unwrap(); - // let mut keys : Vec<&Slice> = Vec::new(); - // keys.push(&s1); - // keys.push(&s2); - // - // let bloom_filter: Slice = policy.create_filter(keys); - // - // let mut key_may_match = policy.key_may_match( - // &Slice::try_from(String::from("hello")).unwrap(), - // &bloom_filter); - // assert!(key_may_match); - // - // key_may_match = policy.key_may_match(&Slice::try_from(String::from("world")).unwrap(), - // &bloom_filter); - // assert!(key_may_match); - // - // let mut key_not_match = policy.key_may_match(&Slice::try_from(String::from("x")).unwrap(), - // &bloom_filter); - // assert!(!key_not_match); - // - // key_not_match = policy.key_may_match(&Slice::try_from(String::from("helloworld")).unwrap(), - // &bloom_filter); - // assert!(!key_not_match); - // - // key_not_match = policy.key_may_match(&Slice::try_from(String::from("hello world")).unwrap(), - // &bloom_filter); - // assert!(!key_not_match); - // - // key_not_match = policy.key_may_match(&Slice::try_from(String::from("foo")).unwrap(), - // &bloom_filter); - // assert!(!key_not_match); - // } - #[test] fn test_filter_block_new_with_policy() { let policy: Arc> = Arc::new(Box::new(TestHashFilter::new())); @@ -133,7 +34,7 @@ mod test { let policy: Arc> = Arc::new(Box::new(TestHashFilter::new())); let contents = Slice::default(); - let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); + let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, &contents); let fp_reader = filter_block_reader.get_policy(); let _reader_filter_policy_name = fp_reader.name(); @@ -147,9 +48,10 @@ mod test { #[test] fn test_filter_block_new_with_policy_and_addkey() { let policy: Arc> = Arc::new(Box::new(TestHashFilter::new())); - let mut filter_block_builder: FilterBlockBuilder = FilterBlockBuilder::new_with_policy_capacity( - policy, 10); + let mut filter_block_builder: FilterBlockBuilder = + FilterBlockBuilder::new_with_policy(policy.clone()); + // filter block 的 offset filter_block_builder.start_block(100); filter_block_builder.add_key_from_str("foo"); filter_block_builder.add_key_from_str("bar"); @@ -160,7 +62,19 @@ mod test { filter_block_builder.add_key_from_str("hello"); let sliceRs: Result = filter_block_builder.finish(); - assert_eq!("a", "leveldb.BuiltinBloomFilter"); + debug!("sliceRs:{:?}", &sliceRs); + + let reader = FilterBlockReader::new_with_policy( + policy.clone(), &sliceRs.unwrap()); + + // todo key_may_match not impl + // assert!(reader.key_may_match(100, &Slice::from("foo"))); + // assert!(reader.key_may_match(100, &Slice::from("bar"))); + // assert!(reader.key_may_match(100, &Slice::from("box"))); + // assert!(reader.key_may_match(100, &Slice::from("hello"))); + // assert!(reader.key_may_match(100, &Slice::from("foo"))); + // assert!(!reader.key_may_match(100, &Slice::from("missing"))); + // assert!(!reader.key_may_match(100, &Slice::from("other"))); } // #[test] diff --git a/src/table/filter_block_test_filter_policy.rs b/src/table/filter_block_test_filter_policy.rs new file mode 100644 index 0000000000000000000000000000000000000000..c1567ed53f601e1f1b0460f8f473d011ec654946 --- /dev/null +++ b/src/table/filter_block_test_filter_policy.rs @@ -0,0 +1,234 @@ +use std::borrow::BorrowMut; +use std::cmp::max; +use std::usize::MAX; +use crate::debug; +use crate::traits::filter_policy_trait::FilterPolicy; +use crate::util::coding::{Decoder, Encoder}; +use crate::util::hash::Hash; +use crate::util::slice::Slice; + +/// 内部使用。专门用于测试用例的 FilterPolicy +pub struct TestHashFilter { + //. +} + +impl TestHashFilter { + pub(crate) fn new() -> Self { + Self {} + } +} + +impl FilterPolicy for TestHashFilter { + fn name(&self) -> String { + String::from("TestHashFilter") + } + + fn create_filter(&self, keys: Vec<&Slice>) -> Slice { + // 每个 key 都会 hash_code 转为 u32, 所以 * 4 + let mut len: usize = keys.len() * 4; + + self.create_filter_with_len(len, keys) + } + + fn create_filter_with_len(&self, capacity: usize, keys: Vec<&Slice>) -> Slice { + // Actually capacity + let mut len: usize = capacity; + + let need_capacity = keys.len() * 4; + // 指定大小和 need_capacity 取最大值 + len = max(len, need_capacity); + + let mut dst_chars = vec![0; len]; + let mut encoder = Encoder::with_vec(&mut dst_chars); + // for [0, len) + for i in 0..keys.len() { + let h = Hash::hash_code(keys[i].as_ref(), 1); // seed 固定为 1 + + encoder.put_fixed32(h).expect("Encoder:with_vec.put_fixed32 error"); + } + debug!("debug: dst_chars:{:?}", dst_chars); + + Slice::from_vec(dst_chars) + } + + fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { + let h = Hash::hash_code(key.to_vec().as_ref(), 1); + + let mut decoder = Decoder::with_buf(bloom_filter); + loop { + if !decoder.can_get() { + return false; + } + let h_bl = unsafe { decoder.uncheck_get_fixed32() }; + if h == h_bl { + return true; + } + } + } +} + +// #################### FilterPolicy test +#[test] +fn test_create_filter() { + let policy = TestHashFilter::new(); + + // 如下三个值, 存放在 BloomFilter 中 + let s1 = Slice::try_from(String::from("hello")).unwrap(); + let s2 = Slice::try_from("world").unwrap(); + let s3 = Slice::try_from("hello world").unwrap(); + + let mut keys: Vec<&Slice> = Vec::new(); + keys.push(&s1); + keys.push(&s2); + keys.push(&s3); + + let bloom_filter: Slice = policy.create_filter(keys); + debug!("bloom_filter:{:?}", bloom_filter); + + // 验证通过 + let mut key_may_match = policy.key_may_match( + &Slice::try_from("hello").unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 验证通过 + key_may_match = policy.key_may_match(&Slice::try_from("world").unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 因为不存在,所以验证不通过 + let key_not_match = policy.key_may_match(&Slice::try_from("helloworld").unwrap(), + &bloom_filter); + assert!(!key_not_match); + + // 因为存在,所以验证通过 + let key_may_match = policy.key_may_match(&Slice::try_from("hello world").unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 因为不存在,所以验证不通过 + let key_not_match = policy.key_may_match(&Slice::try_from("foo").unwrap(), + &bloom_filter); + assert!(!key_not_match); + + // 验证通过 + let mut key_may_match = policy.key_may_match( + &Slice::try_from("hello").unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 因为不存在,所以验证不通过 + let mut key_not_match = policy.key_may_match(&Slice::try_from("x").unwrap(), + &bloom_filter); + assert!(!key_not_match); +} + +/// 指定超长长度。可以超过放置的值 +#[test] +fn test_create_filter_with_long_len() { + let policy = TestHashFilter::new(); + + // 如下三个值, 存放在 BloomFilter 中 + let s1 = Slice::try_from(String::from("hello")).unwrap(); + let s2 = Slice::try_from("world").unwrap(); + let s3 = Slice::try_from("hello world").unwrap(); + + let mut keys: Vec<&Slice> = Vec::new(); + keys.push(&s1); + keys.push(&s2); + keys.push(&s3); + + let bloom_filter: Slice = policy.create_filter_with_len(500, keys); + + // 验证通过 + let mut key_may_match = policy.key_may_match( + &Slice::try_from("hello").unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 验证通过 + key_may_match = policy.key_may_match(&Slice::try_from("world").unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 因为不存在,所以验证不通过 + let key_not_match = policy.key_may_match(&Slice::try_from("helloworld").unwrap(), + &bloom_filter); + assert!(!key_not_match); + + // 因为存在,所以验证通过 + let key_may_match = policy.key_may_match(&Slice::try_from("hello world").unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 因为不存在,所以验证不通过 + let key_not_match = policy.key_may_match(&Slice::try_from("foo").unwrap(), + &bloom_filter); + assert!(!key_not_match); + + // 验证通过 + let mut key_may_match = policy.key_may_match( + &Slice::try_from("hello").unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 因为不存在,所以验证不通过 + let mut key_not_match = policy.key_may_match(&Slice::try_from("x").unwrap(), + &bloom_filter); + assert!(!key_not_match); +} + +/// 指定端长度。放不开放置的值。 此时需要扩容 +#[test] +fn test_create_filter_with_short_len() { + let policy = TestHashFilter::new(); + + // 如下三个值, 存放在 BloomFilter 中 + let s1 = Slice::try_from(String::from("hello")).unwrap(); + let s2 = Slice::try_from("world").unwrap(); + let s3 = Slice::try_from("hello world").unwrap(); + + let mut keys: Vec<&Slice> = Vec::new(); + keys.push(&s1); + keys.push(&s2); + keys.push(&s3); + + let bloom_filter: Slice = policy.create_filter_with_len(5, keys); + + // 验证通过 + let mut key_may_match = policy.key_may_match( + &Slice::try_from("hello").unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 验证通过 + key_may_match = policy.key_may_match(&Slice::try_from("world").unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 因为不存在,所以验证不通过 + let key_not_match = policy.key_may_match(&Slice::try_from("helloworld").unwrap(), + &bloom_filter); + assert!(!key_not_match); + + // 因为存在,所以验证通过 + let key_may_match = policy.key_may_match(&Slice::try_from("hello world").unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 因为不存在,所以验证不通过 + let key_not_match = policy.key_may_match(&Slice::try_from("foo").unwrap(), + &bloom_filter); + assert!(!key_not_match); + + // 验证通过 + let mut key_may_match = policy.key_may_match( + &Slice::try_from("hello").unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 因为不存在,所以验证不通过 + let mut key_not_match = policy.key_may_match(&Slice::try_from("x").unwrap(), + &bloom_filter); + assert!(!key_not_match); +} diff --git a/src/table/format.rs b/src/table/format.rs index e082810331dc2420b1e28b128bde167fa603fc8f..28596cf5eb191c1d38512f280043fd7df7c16a70 100644 --- a/src/table/format.rs +++ b/src/table/format.rs @@ -1,5 +1,3 @@ -use crate::traits::coding_trait::CodingTrait; -use crate::util::coding; use crate::util::slice::Slice; use crate::util::Result; use crate::util::status::Status; @@ -12,42 +10,20 @@ pub const k_max_encoded_length: u32 = 10 + 10; /// of two block handles and a magic number. pub const k_encoded_length: u32 = 2 * k_max_encoded_length + 8; -/// Footer 的大小为 48 字节,内容是一个 8 字节的 magic number 和两个 BlockHandle 构成 -/// 在 Footer::EncodeTo 和 Footer::DecodeFrom 中起作用 -/// kTableMagicNumber was picked by running -/// echo http://code.google.com/p/leveldb/ | sha1sum -/// and taking the leading 64 bits. +/// kTableMagicNumber was picked by running echo http://code.google.com/p/leveldb/ | sha1sum and taking the leading 64 bits. pub const k_table_magic_number: u64 = 0xdb4775248b80fb57; /// 1-byte type + 32-bit crc pub const k_block_trailer_size: usize = 5; pub struct BlockHandle { - // 偏移量 + // 偏移量, 编码为可变长度的64位整列,最多占用10个字节 offset: u64, - // + // 大小, 编码为可变长度的64位整列,最多占用10个字节 size: u64 } -/// Footer encapsulates the fixed information stored at the tail -/// end of every table file. -pub struct Footer { - meta_index_handle: BlockHandle, - index_handle: BlockHandle -} - -pub struct BlockContents { - // Actual contents of data - data: Slice, - - // True if data can be cached - cachable: bool, - - // True if caller should delete[] data.data() - heap_allocated:bool, -} - -trait BlockHandleTrait { +trait ToBlockHandle { /// /// The offset of the block in the file. /// @@ -102,7 +78,20 @@ trait BlockHandleTrait { fn decode_from(&mut self, input: Slice) -> Result<()>; } -trait FootTrait { +/// Footer 的大小为 48 字节,最后8个字节为 magic number, 通过魔术对比,可以判断一个文件是否为 SST 文件。 +/// 其余40个字节由三部分构成: +/// 1、前两个部分是两个 BlockHandle。BlockHandle 中主要包括两个变量:偏移量offset,大小size。 +/// 通过这两个 BlockHandle 可以分别定位到数据索引区域(data block index)以及元数据索引区域(meta block index). +/// 2、 由于 BlockHandle 的成员变量使用可变长度编码,每个 BlockHandle 最大占用20字节, +/// 因此如果前两部分不足40字节,则需要padding结构补充,这也构成了第三部分。 +/// PS: 可变长度编码 变长的64位整型。 +/// +pub struct Footer { + meta_index_handle: BlockHandle, + index_handle: BlockHandle +} + +trait ToFoot { // The block handle for the metaindex block of the table fn meta_index_handle(&self) -> BlockHandle; @@ -142,18 +131,7 @@ trait FootTrait { fn decode_from(&mut self, input: Slice) -> Result<()>; } -trait BlockContent { - /// Read the block identified by "handle" from "file". On failure - /// return non-OK. On success fill *result and return OK. - fn read_block(&self, - // todo RandomAccessFile, ReadOptions 未提供 - // file: RandomAccessFile, options: ReadOptions, - handle: BlockHandle - ) -> Result; - -} - -impl BlockHandleTrait for BlockHandle { +impl ToBlockHandle for BlockHandle { fn offset(&self) -> u64 { self.offset } @@ -198,7 +176,7 @@ impl Default for BlockHandle { } } -impl FootTrait for Footer { +impl ToFoot for Footer { /// The block handle for the metaindex block of the table fn meta_index_handle(&self) -> BlockHandle { todo!() @@ -225,8 +203,31 @@ impl FootTrait for Footer { } } -impl BlockContent for BlockContents { - fn read_block(&self, handle: BlockHandle) -> Result { +/// ############################# BlockContent +pub struct BlockContent { + // Actual contents of data + data: Slice, + + // True if data can be cached + cachable: bool, + + // True if caller should delete[] data.data() + heap_allocated:bool, +} + +trait ToBlockContent { + /// Read the block identified by "handle" from "file". On failure + /// return non-OK. On success fill *result and return OK. + fn read_block(&self, + // todo RandomAccessFile, ReadOptions 未提供 + // file: RandomAccessFile, options: ReadOptions, + handle: BlockHandle + ) -> Result; + +} + +impl ToBlockContent for BlockContent { + fn read_block(&self, handle: BlockHandle) -> Result { todo!() } } diff --git a/src/table/mod.rs b/src/table/mod.rs index f4e0a9449b88927c779a3f00f5aa0a0c546fb351..214c79658e8c118e56e3016c63f2f6cb7c08daf9 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -2,6 +2,7 @@ pub mod block; pub mod block_builder; pub mod filter_block; mod filter_block_test; +mod filter_block_test_filter_policy; pub mod format; mod format_test; pub mod ss_table; diff --git a/src/table/ss_table.rs b/src/table/ss_table.rs index 869c7a708fd72d4566dd5a18f02cb54253f1c437..d8f5c5340f08fee7068a73aa1ef45ca154c5371c 100644 --- a/src/table/ss_table.rs +++ b/src/table/ss_table.rs @@ -1,4 +1,22 @@ +/// SST文件又一个个块组成,块中可以保存数据、数据索引、元数据或者元数据索引。 +/// +/// SST文件的格式: +/// +/// [data block 1] -- data block 数据区域(保存具体的键值对数据), 块格式保存 +/// [data block 2] -- 每当 data block 的大小2K的时候,开始创建一个filter +/// ... +/// [data block N] +/// [meta block 1] -- 元数据区域(保存元数据,如布隆过滤器数据),只有一个 meta block。 +/// 不按照块格式保存. 通过 FilterBlockBuilder 构建 +/// +/// [meta block index] -- 元数据索引区域, 块格式保存, BlockHandler +/// [data block index] -- 数据索引区域, 块格式保存, BlockHandler +/// [Footer] -- 尾部(总大小固定48个字节) @see format#Footer +/// +/// +/// 通过读取 Footer,可以定位到 数据索引区域(data block index)以及元数据索引区域(meta block index). +/// 通过索引区域后,可以继续定位到具体的数据。 +/// pub struct SSTable { - } \ No newline at end of file diff --git a/src/traits/coding_trait.rs b/src/traits/coding_trait.rs deleted file mode 100644 index fd498821991260e1d1546a55b8f04d1911029030..0000000000000000000000000000000000000000 --- a/src/traits/coding_trait.rs +++ /dev/null @@ -1,287 +0,0 @@ -use crate::util::slice::Slice; - -pub trait CodingTrait { - ///32位定长编码写入字符串 - /// - /// # Arguments - /// - /// * `dst`: 目标字符串 - /// * `value`: 编码值 - /// - /// returns: () - /// - /// # Examples - /// - /// ``` - /// let mut string = String::from("encode:"); - /// put_fixed32(&mut string, 65535); - /// ``` - fn put_fixed32(dst: &mut [u8], offset: usize, value: u32) -> usize; - ///64位定长编码写入字符串 - /// - /// # Arguments - /// - /// * `dst`: 目标字符串 - /// * `value`: 编码值 - /// - /// returns: () - /// - /// # Examples - /// - /// ``` - /// let mut string = String::from("encode:"); - /// put_fixed64(&mut string, 65535); - /// ``` - fn put_fixed64(dst: &mut [u8], offset: usize, value: u64) -> usize; - /// 32位变长编码写入字符串 - /// - /// # Arguments - /// - /// * `dst`: 目标字符串 - /// * `value`: 编码值 - /// - /// returns: () - /// - /// # Examples - /// - /// ``` - /// let mut string = String::from("encode:"); - /// put_varint32(&mut string, 65535); - /// ``` - fn put_varint32(dst: &mut [u8], offset: usize, value: u32) -> usize; - /// 64位变长编码写入字符串 - /// - /// # Arguments - /// - /// * `dst`: 目标字符串 - /// * `value`: 编码值 - /// - /// returns: () - /// - /// # Examples - /// - /// ``` - /// let mut string = String::from("encode:"); - /// put_varint64(&mut string, 65535); - /// ``` - fn put_varint64(dst: &mut [u8], offset: usize, value: u64) -> usize; - /// 将slice的长度写入目标字符串 - /// - /// # Arguments - /// - /// * `dst`: 目标字符串 - /// * `value_len`: Slice类型的编码值长度 - /// - /// returns: () - /// - /// # Examples - /// - /// ``` - /// - /// ``` - // fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value: &Slice) -> usize; - fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value_len: usize) -> usize; - /// 从slice的开头解码一个32位的变长整数, 并将slice的索引置于解码后的位置 - /// - /// # Arguments - /// - /// * `input`: slice - /// - /// returns: u32 - /// - /// # Examples - /// - /// ``` - /// - /// ``` - fn get_varint32(input: &Slice) -> u32; - /// 从slice的开头解码一个64位的变长整数, 并将slice的索引置于解码后的位置 - /// - /// # Arguments - /// - /// * `input`: slice - /// - /// returns: u32 - /// - /// # Examples - /// - /// ``` - /// - /// ``` - fn get_varint64(input: &Slice) -> u64; - /// 从slice数据中读取长度 返回长度的Slice - /// - /// # Arguments - /// - /// * `input`: 输入数据 - /// - /// returns: () - /// - /// # Examples - /// - /// ``` - /// - /// ``` - fn get_length_prefixed_slice(input: &mut Slice) -> Slice; - /// 32位变长正整数编码 - /// - /// # Arguments - /// - /// * `value`: 编码值 - /// * `buf`: 目标数组 - /// * `offset`: 偏移量 - /// - /// returns: usize : 编码后的偏移量 - /// - /// # Examples - /// - /// ``` - /// let mut buf: [u8; 4] = [0, 0, 0, 0]; - /// let value: u32 = 65534; - /// let offset =encode_varint32(value, &mut buf, 0); - /// ``` - fn encode_varint32(value: u32, buf: &mut [u8], offset: usize) -> usize; - /// 变长正整数编码 - /// - /// # Arguments - /// - /// * `value`: 编码值 - /// * `buf`: 目标数组 - /// * `offset`: 偏移量 - /// - /// returns: usize : 编码后的偏移量 - /// - /// # Examples - /// - /// ``` - /// let mut buf: [u8; 4] = [0, 0, 0, 0]; - /// let value: u32 = 65534; - /// let offset =encode_varint64(value, &mut buf, 0); - /// ``` - fn encode_varint64(value: u64, buf: &mut [u8], offset: usize) -> usize; - /// 获取变长编码后的长度 - /// - /// # Arguments - /// - /// * `value`: 编码值 - /// - /// returns: i32 - /// - /// # Examples - /// - /// ``` - /// - /// ``` - /// 从slice的开头解码一个32位的变长整数, 并将slice的索引置于解码后的位置 - fn varint_length(value: usize) -> usize; - /// 32位定长正整数编码 - /// - /// # Arguments - /// - /// * `value`: 编码值 - /// * `buf`: 目标数组 - /// * `offset`: 偏移量 - /// - /// returns: usize : 编码后的偏移量 - /// - /// # Examples - /// - /// ``` - /// let mut buf: [u8; 4] = [0, 0, 0, 0]; - /// let value: u32 = 65534; - /// let offset = Self::encode_fixed32(value, &mut buf, 0); - /// ``` - fn encode_fixed32(value: u32, buf: &mut [u8], offset: usize) -> usize; - /// 64位定长正整数编码 - /// - /// # Arguments - /// - /// * `value`: - /// * `buf`: - /// * `offset`: - /// - /// returns: usize - /// - /// # Examples - /// - /// ``` - /// let mut buf: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; - /// let value: u64 = 65534; - /// let offset = encode_fixed64(value, &mut buf, 0); - /// ``` - fn encode_fixed64(value: u64, buf: &mut [u8], offset: usize) -> usize; - /// 32位定长解码 - /// - /// # Arguments - /// - /// * `buf`: 待解码数据 - /// - /// returns: u32 - /// - /// # Examples - /// - /// ``` - /// - /// ``` - fn decode_fixed32(buf: &[u8]) -> u32; - /// 64位定长解码 - /// - /// # Arguments - /// - /// * `buf`: 待解码数据 - /// - /// returns: u64 - /// - /// # Examples - /// - /// ``` - /// - /// ``` - fn decode_fixed64(buf: &[u8]) -> u64; -} - -macro_rules! coding_trait { - {$TRAIT: ident, $TYPE: ty} => { - pub trait $ TRAIT { - /// 变长正整数编码 - /// - /// # Arguments - /// - /// * `buf`: 目标数组 - /// * `offset`: 偏移量 - /// - /// returns: usize : 编码后的偏移量 - /// - /// # Examples - /// - /// ``` - /// let mut buf: [u8; 4] = [0, 0, 0, 0]; - /// let value: u32 = 65534; - /// let offset = value.varint(&mut buf, 0); - /// ``` - fn varint(self, buf: &mut [u8], offset: usize) -> usize; - /// 定长正整数编码 - /// - /// # Arguments - /// - /// * `buf`: 目标数组 - /// * `offset`: 偏移量 - /// - /// returns: usize : 编码后的偏移量 - /// - /// # Examples - /// - /// ``` - /// let mut buf: [u8; 4] = [0, 0, 0, 0]; - /// let value: u32 = 65534; - /// let offset = value.fixedint(&mut buf, 0); - /// ``` - fn fixedint(self, buf: &mut [u8], offset: usize) -> usize; - } - } -} - -coding_trait!(Coding32,u32); - -coding_trait!(Coding64,u64); - diff --git a/src/traits/filter_policy_trait.rs b/src/traits/filter_policy_trait.rs index aaafafd2b0af82d7bbf3435e74b3922bf0be716c..7b9eca9cdb71d081931804efb8e206e0108727cd 100644 --- a/src/traits/filter_policy_trait.rs +++ b/src/traits/filter_policy_trait.rs @@ -17,35 +17,46 @@ pub trait FilterPolicy { /// fn name(&self) -> String; - /// 根据 keys 创建过滤器,并返回 bloom_filter Slice + fn create_filter(&self, keys: Vec<&Slice>) -> Slice; + + /// + /// 根据 key 列表创建一个BloomFilter + /// + /// 有n个整数set,以及一个m位的bit数组,以及k个哈希函数。m[i]表示访问第i个bit位。 /// /// # Arguments /// - /// * `keys`: 创建过滤器的数据清单 + /// * `capacity`: key的个数 + /// * `keys`: key列表 /// - /// returns: bloom_filter Slice + /// returns: bloom filter Slice /// /// # Examples /// /// ``` - /// use crate::util::slice::Slice; + /// use level_db_rust::util::filter_policy_bloom::BloomFilterPolicy; + /// use level_db_rust::util::slice::Slice; /// - /// let mut keys : Vec = Vec::new(); - /// keys.push(Slice::try_from(String::from("hello")).unwrap()); - /// keys.push(Slice::try_from(String::from("world")).unwrap()); + /// let mut keys : Vec<&Slice> = Vec::new(); + /// keys.push(&Slice::try_from(String::from("hello")).unwrap()); + /// keys.push(&Slice::try_from(String::from("world")).unwrap()); /// - /// let policy = BloomFilterPolicy::new(800); + /// let policy = BloomFilterPolicy::new(); /// let bloom_filter: Slice = policy.create_filter(keys); /// ``` - fn create_filter(&self, keys: Vec<&Slice>) -> Slice; + fn create_filter_with_len(&self, capacity: usize, keys: Vec<&Slice>) -> Slice; - fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice; + // fn create_filter_u8(&self, keys: Vec) -> Slice; + // fn create_filter_u8_with_len(&self, capacity: usize, keys: Vec) -> Slice; + /// 判断一个 key 是否可能存在。 /// + /// 如果 key 存在,一定返回 true。 + /// 如果 key 不存在,可能返回 true 也可能返回 false。 /// /// # Arguments /// - /// * `key`: + /// * `key`: 判断的key 值 /// * `bloom_filter`: /// /// returns: bool diff --git a/src/traits/mod.rs b/src/traits/mod.rs index a1332ef7548ed05a5d266799f00c677e3720015f..c772c64e0af8dd8aa920a43c2a5da6f147397d51 100644 --- a/src/traits/mod.rs +++ b/src/traits/mod.rs @@ -1,7 +1,6 @@ pub mod iterator; pub mod comparator_trait; -pub mod coding_trait; pub mod filter_policy_trait; diff --git a/src/util/bloom_filter.rs b/src/util/bloom_filter.rs index b3d1b9e0b780de161b4833533f8495dd0b0d3519..f17a458eaab9e353e9ed8ce78e8d40bd023e3c12 100644 --- a/src/util/bloom_filter.rs +++ b/src/util/bloom_filter.rs @@ -1,10 +1,10 @@ -/// 布隆过滤器 -/// - -pub struct BloomFilter { - -} - -impl BloomFilter { - -} \ No newline at end of file +// /// 布隆过滤器 +// /// +// +// pub struct BloomFilter { +// +// } +// +// impl BloomFilter { +// +// } \ No newline at end of file diff --git a/src/util/bloom_filter_test.rs b/src/util/bloom_filter_test.rs index d148c518aac8b9163e3785966d6e86193371b8b3..e87f3b4d8f472da842b0068d93e4df7c84f69a07 100644 --- a/src/util/bloom_filter_test.rs +++ b/src/util/bloom_filter_test.rs @@ -1,8 +1,8 @@ - -mod test { - - #[test] - fn test_by() { - println!("{}", "a"); - } -} \ No newline at end of file +// +// mod test { +// +// #[test] +// fn test_by() { +// println!("{}", "a"); +// } +// } \ No newline at end of file diff --git a/src/util/cache.rs b/src/util/cache.rs index 8d69c2d2621a2775d5f270a6f922127093127ca1..e15d7defc5198e3e80087ed17e12922f2f1ec0af 100644 --- a/src/util/cache.rs +++ b/src/util/cache.rs @@ -1,170 +1,1044 @@ -use std::cell::RefCell; -use std::rc::Rc; -use crate::util::slice::Slice; +use std::{ptr, thread, usize}; +use std::fmt::Debug; +use std::marker::PhantomData; +use std::ops::Deref; +use std::ptr::NonNull; +use std::sync::{Arc, RwLock}; +use std::sync::atomic::{AtomicUsize, Ordering}; + +use custom_proc_macro::{arr, non_null_new_uncheck}; +use crate::util::hash::Hash; use crate::util::Result; +use crate::util::slice::Slice; -pub struct Handle {} +/// handle类型定义 +type HandleRef = NonNull>; +// 缓存的对象, 以Handle为单位进行数据传递和共享, 其中的value是只读的, 带有读写锁 +#[derive(Debug)] pub struct LRUHandle { + // 缓存的键, 当hash出现冲突时判断key是否相等 key: Slice, - value: T, + // 缓存的数据, 只读 + value: Arc, + // key的hash值, 用于在HandleTable中寻址 hash: u32, + // 是否在缓存中 in_cache: bool, + // key的长度 key_length: usize, + // value的长度或者数据量的大小, 用于统计当前缓存了多少数据量 charge: usize, - prev: Option>>>, - next: Option>>>, - next_hash: Option>>, + // 上一节点(lruCache中的双向链表的上一节点) + prev: Option>, + // 下一节点(lruCache中的双向链表的下一节点) + next: Option>, + // 上一节点(handleTable中的双向链表的上一节点) + prev_hash: Option>, + // 下一节点(handleTable中的双向链表的下一节点) + next_hash: Option>, } impl LRUHandle { - pub fn key(&self) -> Slice { - todo!() + /// 从栈上分配内存 + fn new(key: Slice, + value: T, + hash: u32, + charge: usize, + ) -> Self { + let key_length = key.size(); + Self { + key, + value: Arc::new(value), + hash, + in_cache: true, + key_length, + charge, + prev: None, + next: None, + prev_hash: None, + next_hash: None, + } + } + /// 从堆上分配内存 + /// + /// # Arguments + /// + /// * `key`: 键 + /// * `value`: 值 + /// * `hash`: 键的hash + /// * `charge`: 值的长度或者数据大小 + /// + /// returns: HandleRef + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn new_on_heap(key: Slice, value: T, hash: u32, charge: usize) -> HandleRef { + let key_length = key.size(); + // 在堆上分配 LRUHandle 使用的内存 + let data = Box::new(Self { + key, + value: Arc::new(value), + hash, + in_cache: true, + key_length, + charge, + prev: None, + next: None, + prev_hash: None, + next_hash: None, + }); + // 不检查是否为空指针 异常情况可能会导致程序崩溃 + // 转为裸指针后这块内存不会被自动回收 + non_null_new_uncheck!(Box::into_raw(data)) + } + /// 返回handle的键 + pub fn key(&self) -> &Slice { + &self.key + } + /// 返回handle的值 + pub fn value(&self) -> Arc { + self.value.clone() + } +} + +impl Deref for LRUHandle { + type Target = T; + + fn deref(&self) -> &Self::Target { + // 解引用为value + &self.value } } -pub struct HandleTable { +/// hash表 +/// 当写入达到阈值后会进行扩容, 可以传入default_length避免扩容 +struct HandleTable { + // hash表中已写入的数据量 + elements: usize, + // hash表默认大小, prune时会恢复到这个长度 + default_length: usize, + // hash表的大小 length: usize, + // hash表的table, 堆上分配数组 + list: Vec>>, + // shard号, 用于debug + _shard: usize, + // 标识LRUHandle属于HandleTable, 编译器会检查LRUHandle的生命周期小于HandleTable的生命周期 + _marker: PhantomData<*mut LRUHandle>, } -impl HandleTable { - pub fn look_up(&self, _key: &Slice, _hash: u32) -> &LRUHandle { - todo!() +/// 格式化长度, 返回2的次幂 +fn format_length(length: usize) -> usize { + // 最小长度是DEFAULT_HASH_TABLE_LENGTH + if length <= DEFAULT_HASH_TABLE_LENGTH { + return DEFAULT_HASH_TABLE_LENGTH; } + let mut shift = 0; + while length > 1 << shift { + shift += 1; + if 1_usize.checked_shl(shift).is_none() { + // 如果发生了溢出, 返回不溢出的最大值 + return 1 << (shift - 1); + } + } + 1 << shift +} - pub fn insert(&mut self, _handle: LRUHandle) -> &LRUHandle { - todo!() +impl HandleTable { + fn new(shard: usize) -> Self { + Self::new_with_length(shard, DEFAULT_HASH_TABLE_LENGTH) } - pub fn remove(&mut self, _key: &Slice, _hash: u32) -> LRUHandle { - todo!() + fn new_with_length(shard: usize, default_length: usize) -> Self <> { + // 格式化用户输出的长度为2的次幂 + let length = format_length(default_length); + Self { + elements: 0, + default_length: length, + length, + list: vec![None; length], + _shard: shard, + _marker: PhantomData::default(), + } } + /// 从hash表中查询数据 + pub fn look_up(&self, key: &Slice, hash: u32) -> Result>> { + let index = self.find_index(hash); + // 获取hash槽位上的数据, 存在则遍历链表 + let mut head = self.list[index]; + while let Some(handle) = head { + let handle_ref = unsafe { handle.as_ref() }; + if &handle_ref.key == key { + return Ok(Some(handle)); + } + head = handle_ref.next_hash; + } + Ok(None) + } + + /// 向hash表中插入数据 + pub fn insert(&mut self, mut handle: HandleRef) -> Result<()> { + let handle_mut = unsafe { handle.as_mut() }; + let index = self.find_index(handle_mut.hash); + // 获取hash槽位上的头节点 + match self.list[index] { + Some(mut head) => { + let head_mut = unsafe { head.as_mut() }; + // 头插法插入数据 + self.list[index] = Some(handle); + handle_mut.next_hash = Some(head); + head_mut.prev_hash = Some(handle); + } + None => { + self.list[index] = Some(handle); + } + } + self.elements += 1; + self.should_resize()?; + Ok(()) + } + + /// 从hash表中删除数据, 并回收内存 + pub fn remove(&mut self, key: &Slice, hash: u32) -> Result>> { + let index = self.find_index(hash); + let mut head = self.list[index]; + // 获取hash槽位上的数据, 遍历到key相等时删除handle + while let Some(mut handle) = head { + let handle_mut = unsafe { handle.as_mut() }; + // key相等进行删除, 这里只断开链表的连接, 内存在lru链表上回收 + if &handle_mut.key == key { + if handle_mut.prev_hash.is_none() && handle_mut.next_hash.is_none() { + // 只有一个节点, 直接置空 + self.list[index] = None; + } else if handle_mut.prev_hash.is_none() { + // 是头节点, 将头节点移交至下一节点 + self.list[index] = handle_mut.next_hash; + // 下一节点的prev_hash要置空 + handle_mut.prev_hash = None; + } else { + // 是其余中间节点或尾节点, 删除当前节点并将下一节点移交给上一节点 + let prev_hash_ptr = unsafe { handle_mut.prev_hash.unwrap().as_mut() }; + prev_hash_ptr.next_hash = handle_mut.next_hash; + // 下一结点不为空时, 将当前节点的prev移交给下一节点的prev + if let Some(mut next_hash) = handle_mut.next_hash { + let next_hash_ptr = unsafe { next_hash.as_mut() }; + next_hash_ptr.prev_hash = handle_mut.prev_hash; + } + } + // 回收内存 + Self::drop_handle(handle.as_ptr()); + self.elements -= 1; + return Ok(Some(handle)); + } + head = handle_mut.next_hash; + } + Ok(None) + } + + + /// 清空hash表 并回收内存 + pub fn prune(&mut self) { + for handle in self.list.iter().filter(|v| v.is_some()) { + // 回收内存 + Self::drop_handle(handle.unwrap().as_ptr()); + } + // 清空list恢复内存 + self.list.clear(); + self.elements = 0; + // 恢复到初始的默认容量 + self.list.resize(self.default_length, None); + self.length = self.default_length; + } + + /// 获取hash表的长度 + #[inline] + #[allow(dead_code)] pub fn length(&self) -> usize { self.length } - fn resize(&mut self) { - todo!() + /// 是否需要扩容 + /// 需要扩容时调用扩容方法 + #[inline] + fn should_resize(&mut self) -> Result<()> { + // 负载因子需要平衡寻址速度与内存占用, 如果扩容后将溢出, 则不扩容 + if (self.elements as f32 > self.list.len() as f32 * LOAD_FACTOR) && self.list.len().checked_shl(1).is_some() { + self.resize()? + } + Ok(()) + } + + /// 获取hash槽位 + #[inline] + fn find_index(&self, hash: u32) -> usize { + hash as usize & self.length.wrapping_sub(1) + } + + /// hash表扩容 + /// 扩容操作较少使用, 标记为cold + #[cold] + fn resize(&mut self) -> Result<()> { + let old_len = self.list.len(); + let new_len = self.list.len() << 1; + self.list.resize(new_len, None); + self.length = new_len; + let list = &mut self.list; + let list_ptr = list.as_mut_ptr(); + // 遍历原hash表 + for (index, handle_option) in list[0..old_len].iter_mut().enumerate() { + if handle_option.is_none() { + // 为空的直接跳过 + continue; + } + let mut current_option = *handle_option; + let (mut low_head, mut low_tail) = (None, None); + let (mut high_head, mut high_tail) = (None, None); + while let Some(mut current) = current_option { + let current_mut = unsafe { current.as_mut() }; + let next = current_mut.next_hash; + // 与原来的容量进行与运算, 可能落在原位置 或者 原位置 + old_len + if current_mut.hash as usize & old_len == 0 { + // 低位 + if low_head.is_none() { + low_head = current_option; + low_tail = current_option; + } else { + // 头插法 + current_mut.next_hash = low_head; + unsafe { low_head.unwrap().as_mut().prev_hash = current_option }; + low_head = current_option; + } + } else { + // 高位 + if high_head.is_none() { + high_head = current_option; + high_tail = current_option; + } else { + // 头插法 + current_mut.next_hash = high_head; + unsafe { high_head.unwrap().as_mut().prev_hash = current_option }; + high_head = current_option; + } + } + current_option = next; + } + if low_head.is_some() { + unsafe { + // 头节点的prev_hash需要置空 + low_head.unwrap().as_mut().prev_hash = None; + // 尾节点的next_hash需要置空 + low_tail.unwrap().as_mut().next_hash = None; + } + } + unsafe { ptr::write(list_ptr.add(index), low_head); } + if high_head.is_some() { + unsafe { + // 头节点的prev_hash需要置空 + high_head.unwrap().as_mut().prev_hash = None; + // 尾节点的next_hash需要置空 + high_tail.unwrap().as_mut().next_hash = None; + } + } + unsafe { ptr::write(list_ptr.add(old_len + index), high_head); } + } + Ok(()) + } + + /// 将裸指针包装回Box并回收 + /// 只能在hash表删除后回收内存, 在其他位置回收内存可能会double free, 或其他未定义行为 + #[inline] + fn drop_handle(handle_ptr: *mut LRUHandle) { + // 将指针包装回box, box会在作用域结束之后自动drop掉 + unsafe { Box::from_raw(handle_ptr) }; } } -pub struct LRUCache { +struct LRUCache { + // hash表, 用于存放缓存数据 + table: HandleTable, + // cache的容量 capacity: usize, + // cache的当前使用量, 使用量超过容量会进行扩容 usage: usize, - in_use: LRUHandle, - table: HandleTable, + // lru链表的头指针, 最近使用的 + head_of_lru: Option>, + // lru链表的尾指针, 最先被删除 + tail_of_lru: Option>, + // shard号, 用于debug + _shard: usize, } +/// 默认容量 值的总长度或者是数据总大小 +const DEFAULT_CACHE_PRE_SHARD_CAPACITY: usize = (DEFAULT_CACHE_CAPACITY + (K_NUM_SHARDS - 1)) / K_NUM_SHARDS; + impl LRUCache { - pub fn set_capacity(&mut self, capacity: usize) { - self.capacity = capacity; + fn new(shard: usize) -> Self { + Self::new_with_capacity(shard, DEFAULT_CACHE_PRE_SHARD_CAPACITY, DEFAULT_SHARD_LENGTH) + } + /// 创建LruCache, 使用默认table, 指定容量 + fn new_with_capacity(shard: usize, capacity: usize, default_length: usize) -> Self { + Self::new_with_table_capacity(shard, capacity, default_length) } - pub fn insert(&mut self, _key: &Slice, _hash: u32, _value: T, _charge: usize) -> &LRUHandle { - todo!() + /// 创建LruCache, 指定table, 指定容量 + fn new_with_table_capacity(shard: usize, capacity: usize, default_length: usize) -> Self { + Self { + table: HandleTable::new_with_length(shard, default_length), + capacity, + usage: 0, + head_of_lru: None, + tail_of_lru: None, + _shard: shard, + } } - pub fn look_up(&self, _key: &Slice, _hash: u32) -> &LRUHandle { - todo!() + /// 向lru缓存中插入数据 + /// # Arguments + /// * `key`: 键 + /// * `hash`: 键的hash + /// * `value`: 值 + /// * `charge`: 值的长度或数据大小 + /// returns: Result<(), Status> + /// # Examples + /// ``` + /// + /// ``` + pub fn insert(&mut self, key: Slice, hash: u32, value: T, charge: usize) -> Result<()> { + let handle = LRUHandle::new_on_heap( + key.clone(), + value, + hash, + charge); + // hash表中插入数据 + self.table.insert(handle)?; + // 插入lru + self.lru_append(handle)?; + // 使用量加上写入的value的长度或者数据大小 + self.usage += charge; + + // 使用量已经达到容量, 那么删除最少使用的 + if self.usage >= self.capacity { + if let Some(tail) = self.tail_of_lru { + let tail_ref = unsafe { tail.as_ref() }; + // 先删除lru链表尾 + self.lru_remove(tail)?; + // 于从hash表中删除链表尾, 同时回收内存 + self.table.remove(&tail_ref.key, tail_ref.hash)?; + } + } + + Ok(()) } - pub fn release(&mut self, _handle: &LRUHandle) { - todo!() + /// 从lru缓存查询数据 + pub fn look_up(&self, key: &Slice, hash: u32) -> Result>> { + match self.table.look_up(key, hash) { + Ok(handle) => { + match handle { + Some(handle) => { + // 返回为Arc, 这样用户才可以和缓存在多个线程中共享数据 + Ok(Some(unsafe { handle.as_ref() }.value.clone())) + } + None => { Ok(None) } + } + } + Err(err) => { + Err(err) + } + } } - pub fn erase(&mut self, _key: &Slice, _hash: u32) { - todo!() + /// 从lru缓存中删除数据, 同时回收内存 + pub fn erase(&mut self, key: &Slice, hash: u32) -> Result { + let mut charge = 0; + // 先从hash表中删除, 同时回收内存 + let removed_handle = self.table.remove(key, hash)?; + if let Some(removed) = removed_handle { + // 再删除lru链表中的数据 + self.lru_remove(removed)?; + charge = unsafe { removed.as_ref().charge }; + } + + // 返回删除了多少数据量 + Ok(charge) } - pub fn prune(&mut self) { - todo!() + + /// 清空lru缓存, 同时回收内存 + pub fn prune(&mut self) -> Result<()> { + // hash表清空, 回收内存 + self.table.prune(); + // lru头尾指针置空 + self.head_of_lru = None; + self.tail_of_lru = None; + // 使用量归零 + self.usage = 0; + Ok(()) } + + /// 获取当前缓存的数据量 + #[inline] pub fn total_charge(&self) -> usize { - todo!() + self.usage } - pub fn lru_remove(&mut self, _handle: &LRUHandle) { - todo!() - } - pub fn lru_append(&mut self, _head_of_list: &LRUHandle, _e: LRUHandle) { - todo!() - } - pub fn refer(&self, _e: &LRUHandle) { - todo!() + /// 获取当前hash表的槽位数 + pub fn slots(&self) -> usize { + self.table.length } - pub fn unref(&self, _e: &LRUHandle) { - todo!() - } -} -pub trait Cache { - /// 向缓存中插入数据 + /// 向lru链表中插入新缓存, 头插法 /// /// # Arguments /// - /// * `key`: 键 - /// * `value`: 值 - /// * `charge`: 长度 - /// * `deleter`: 删除的回调函数 + /// * `head_of_list`: + /// * `handle`: /// - /// returns: Handle + /// returns: () /// /// # Examples /// /// ``` - /// let element = cache.insert(Slice::from("123"), block, 10, move || {}); + /// /// ``` - fn insert(&mut self, key: &Slice, value: T, charge: usize, deleter: F) -> Handle - where F: FnOnce(&Slice, T); + fn lru_append(&mut self, mut handle: HandleRef) -> Result<()> { + if let None = self.head_of_lru { + // 头节点为空时, 尾节点也为空 + self.head_of_lru = Some(handle); + self.tail_of_lru = Some(handle); + return Ok(()); + } + // 头插法, 插入lru链表头 + let handle_mut = unsafe { handle.as_mut() }; + let mut head = self.head_of_lru.unwrap(); + let head_mut = unsafe { head.as_mut() }; + head_mut.prev = Some(handle); + handle_mut.next = Some(head); + + // 更新头指针 + self.head_of_lru = Some(handle); + + Ok(()) + } + + /// 删除lru链表中的数据, 同时回收内存 + fn lru_remove(&mut self, mut handle: HandleRef) -> Result<()> { + let handle_mut = unsafe { handle.as_mut() }; + + // 有上一节点, 上一节点直接连接到下一节点 + if let Some(mut prev) = handle_mut.prev { + unsafe { prev.as_mut() }.next = handle_mut.next; + } else { + // 没有上一节点代表是链表头, 需要更新头指针 + self.head_of_lru = handle_mut.next; + } + + // 有下一节点, 下一节点直接连接到上一节点 + if let Some(mut next) = handle_mut.next { + unsafe { next.as_mut() }.prev = handle_mut.prev; + } else { + // 没有下一节点代表是链表尾, 需要更新尾指针 + self.tail_of_lru = handle_mut.prev; + } + + // 使用量 + self.usage -= handle_mut.charge; + + // 删除后, 标记数据已经不在缓存中 + handle_mut.in_cache = false; + + Ok(()) + } +} + +macro_rules! cache_element { + ($shard:expr, $capacity:expr, $default_length:expr) => (RwLock::new(LRUCache::new_with_capacity($shard, pre_shard($capacity), $default_length))); +} + +macro_rules! cache_element_default { + ($shard:expr, $capacity:expr, $default_length:expr) => (RwLock::new(LRUCache::new($shard))); +} + +const K_NUM_SHARD_BITS: usize = 5; +/// 默认shard数 32 +const K_NUM_SHARDS: usize = 1 << K_NUM_SHARD_BITS; +/// 默认1000万条或者10M数据 +const DEFAULT_CACHE_CAPACITY: usize = 10_000_000; +/// 负载因子不要太小, 否则会浪费内存 +const LOAD_FACTOR: f32 = 0.75; +const DEFAULT_HASH_TABLE_LENGTH: usize = 16; +// 默认hash表长度为默认shard数*默认的hash表长度 +const DEFAULT_SHARD_LENGTH: usize = K_NUM_SHARDS * DEFAULT_HASH_TABLE_LENGTH; + +/// 具有多个shard的lru缓存 +/// shard的实现可以降低锁粒度, 提高并发度 +/// shard之间的lru容量是相等的, 会进行独立的lru淘汰, hash表扩容等操作 +/// 每个shard拥有独立的读写锁, 一个shard的读写操作不会影响另一个shard的读写 +/// 插入和删除数据时会更新容量, 当容量达到上限时会进行扩容操作 +/// 目前没有实现自动的缩容操作, 可以调用total_charge判断当前容量并进行手动清空 +/// +/// ### Note +/// 1.当使用RC构成双向链表时, 请不要尝试打印cache, 否则会无限递归 +/// ShardLRUCache, LRUCache, HandleTable 不实现Debug +/// 2. 加读锁后请勿再加次读锁, 否则可能死锁 +/// | 线程1 | 线程2 | +/// | ------ | ------- | +/// | read | | +/// | | write(block) | +/// | read(dead) | | +/// +pub struct ShardLRUCache { + // shard用于降低锁粒度 + shard: [RwLock>; 32], + // 默认的初始化hash表长度, 用于初始化hash表 + // 使用较大的值可以避免扩容, 但是不要使用过大的值避免浪费空间 + default_length: usize, + // 当前所有shard中lru cache的最大容量, 超过这个容量将会淘汰数据 + capacity: usize, +} + +#[inline] +fn hash_slice(slice: &Slice) -> u32 { + Hash::hash_code(slice, 0) +} - /// 从缓存中读取数据 +#[inline] +fn shard(hash: u32) -> usize { + (hash >> (32 - K_NUM_SHARD_BITS)) as usize +} + +#[inline] +fn pre_shard(capacity: usize) -> usize { + (capacity + (K_NUM_SHARDS - 1)) / K_NUM_SHARDS +} + +/// 所有权可以多线程传递 +unsafe impl Send for ShardLRUCache {} + +/// 不可变借用可以多线程共享, 内部shard具有可变性并且加锁, 可以安全的在多线程环境下使用 +unsafe impl Sync for ShardLRUCache {} + +impl ShardLRUCache { + /// 私有化构造器 + /// 请使用ShardLRUCache::new()进行构造, 请勿尝试自行构造结构体 + fn default() -> Self { + Self { + shard: arr!([cache_element_default; 32]), + default_length: DEFAULT_SHARD_LENGTH, + capacity: DEFAULT_CACHE_CAPACITY, + } + } + + /// 创建ShardLruCache单线程使用 + /// 单线程使用时内部的读写锁会被编译器消除 /// /// # Arguments /// - /// * `key`: 键 + /// * `capacity`: 最大容量, 超出这个容量时, 将会开始淘汰数据 + /// * `default_length`: 默认的hash表容量, 使用较大的值可以避免扩容, 但不要使用太大的值, 避免空间浪费 /// - /// returns: Handle + /// returns: ShardLRUCache /// /// # Examples /// /// ``` - /// let element = cache.lookup(Slice::from("123")); + /// use level_db_rust::util::cache::ShardLRUCache; + /// let charge = 4; + /// let total_length = 10000; + /// ShardLRUCache::new_with_capacity(charge * total_length, 1000); /// ``` - fn lookup(&self, key: &Slice) -> Handle; + pub fn new_with_capacity(capacity: usize, default_length: usize) -> ShardLRUCache { + let mut default_length = if default_length <= DEFAULT_SHARD_LENGTH { + DEFAULT_SHARD_LENGTH + } else { + default_length + }; + default_length = default_length / K_NUM_SHARDS; + Self { + shard: arr!([cache_element; 32]), + default_length, + capacity, + } + } + - /// 从缓存中释放元素 + /// 创建ShardLruCache多线程使用 + /// lookUp会加读锁, insert/erase/prune等写操作会加写锁 + /// 持有写锁的线程panic后, 会导致锁中毒, 数据无法访问, 持有读锁线程panic不会中毒 /// /// # Arguments /// - /// * `handle`: 元素 + /// * `capacity`: 最大容量, 超出这个容量时, 将会开始淘汰数据 + /// * `default_length`: 默认的hash表容量, 使用较大的值可以避免扩容, 但不要使用太大的值, 避免空间浪费 /// - /// returns: () + /// returns: Arc> /// /// # Examples /// /// ``` - /// cache.release(element); + /// use std::thread; + /// use level_db_rust::util::cache::ShardLRUCache; + /// let charge = 4; + /// let total_length = 10000; + /// let cache = ShardLRUCache::new_arc_with_capacity(charge * total_length, 1000); + /// thread::spawn(move || -> Result<(),E>{ + /// cache_clone.insert("key".into(), 1, charge)?; + /// Ok(()) + /// }); /// ``` - fn release(&mut self, handle: Handle); + pub fn new_arc_with_capacity(capacity: usize, default_length: usize) -> Arc> { + let default_length = if default_length <= DEFAULT_SHARD_LENGTH { + DEFAULT_SHARD_LENGTH + } else { + default_length + }; + let default_length_per_shard = default_length / K_NUM_SHARDS; + Arc::new(Self { + shard: arr!([cache_element; 32]), + default_length: default_length_per_shard, + capacity, + }) + } - /// 从缓存中删除元素 - /// + + /// 向shard中插入数据 + /// 插入时会将值写入指定的shard, 每个 /// # Arguments + /// * `key`: 键 + /// * `value`: 值 + /// * `charge`: 值长度或者数据大小 + /// returns: Result<(), Status> + /// # Examples + /// ``` + /// use level_db_rust::util::cache::ShardLRUCache; + /// let cache = ShardLRUCache::new_with_capacity(40_0000, 1000); + /// let value = 1; + /// cache.insert("key".into(), value, charge)?; + /// ``` + pub fn insert(&self, key: &Slice, value: T, charge: usize) -> Result<()> { + let hash = hash_slice(key); + let shard = shard(hash); + let result = self.shard[shard].write()?.insert(key.clone(), hash, value, charge); + result + } + + /// 从shard中查询缓存数据 + /// 返回Arc包装的数据, 便于多线程共享value的引用, 请不要在cache外回收value的内存 /// + /// # Arguments /// * `key`: 键 + /// returns: Result>>>, Status> + /// # Examples /// - /// returns: Result<(), Status> + /// ``` + /// use std::sync::Arc; + /// use level_db_rust::util::cache::ShardLRUCache; + /// use level_db_rust::util::slice::Slice; + /// + /// let cache = ShardLRUCache::new_with_capacity(40_0000, 1000); + /// let key: Slice = "key".into(); + /// let value: Option> = cache.lookup(&key)?; + /// ``` + pub fn lookup(&self, key: &Slice) -> Result>> { + let hash = hash_slice(key); + let shard = shard(hash); + self.shard[shard].read()?.look_up(key, hash) + } + + /// 从shard中删除缓存数据 /// + /// # Arguments + /// * `key`: 键 + /// returns: Result<(), Status> /// # Examples /// /// ``` - /// cache.erase(Slice::from("123"))?; + /// use level_db_rust::util::cache::ShardLRUCache; + /// use level_db_rust::util::slice::Slice; + /// + /// let mut cache = ShardLRUCache::new_with_capacity(40_0000, 1000); + /// let key: Slice = "key".into(); + /// cache.erase(&key)?; + /// ``` + pub fn erase(&mut self, key: &Slice) -> Result<()> { + let hash = hash_slice(key); + // 删除缓存 + self.shard[shard(hash)].write()?.erase(key, hash)?; + Ok(()) + } + + /// 清空全部shard的缓存 + /// + /// returns: Result<(), Status> + /// # Examples + /// ``` + /// use level_db_rust::util::cache::ShardLRUCache; + /// + /// let mut cache = ShardLRUCache::new_with_capacity(40_0000, 1000); + /// cache.prune()?; /// ``` - fn erase(&mut self, key: &Slice) -> Result<()>; + pub fn prune(&mut self) -> Result<()> { + // 清空全部shard的缓存 + for shard in &mut self.shard { + shard.write()?.prune()? + } + Ok(()) + } + + /// 获取当前缓存的总数据量 + pub fn total_charge(&self) -> Result { + let mut total_charge = 0; + for shard in &self.shard { + total_charge += shard.read()?.total_charge(); + } + Ok(total_charge) + } + + /// 获取当前缓存的最大容量 + #[inline] + #[allow(dead_code)] + pub fn capacity(&self) -> usize { + self.capacity + } + + /// 获取当前全部shard中的槽位数, 可用于判断内存占用情况及扩容效果 + #[allow(dead_code)] + pub fn slots(&self) -> Result { + let mut slots = 0; + for shard in &self.shard { + slots += shard.read()?.slots(); + } + Ok(slots) + } +} + +#[test] +fn test_insert_lookup_single() -> Result<()> { + let capacity = 10_0000; + let cache = ShardLRUCache::new_with_capacity(capacity, 100); + let key = Slice::from("test_key".to_owned() + &1.to_string()); + cache.insert(&key, 1, 4)?; + + let result = cache.lookup(&key)?; + assert_eq!(true, result.is_some()); + assert_eq!(1, *result.unwrap()); + + Ok(()) +} + +#[test] +fn test_insert_cache() -> Result<()> { + let size = 100_0000; + let capacity = 1_0000_0000; + let cache = ShardLRUCache::new_with_capacity(capacity, size); + + let slots = cache.slots()?; + eprintln!("init slots: {}", slots); + + let charge = 4; + for i in 0..size { + let key = Slice::from("test_key".to_owned() + &i.to_string()); + // dbg!(key.clone().to_string()); + cache.insert(&key, i, charge)?; + } + + let total_charge = cache.total_charge()?; + dbg!(total_charge); + assert_eq!(size * charge, total_charge); + + println!("insert count: {}", size); + + let slots = cache.slots()?; + println!("slots after insert: {}", slots); + + for i in 0..size { + let key = Slice::from("test_key".to_owned() + &i.to_string()); + let value = cache.lookup(&key)?; + // dbg!(value.clone()); + assert_eq!(true, value.is_some(), "i: {}", i); + assert_eq!(i, *value.unwrap()); + } + + + Ok(()) +} + +#[test] +fn test_insert_lru() -> Result<()> { + // 测试lru淘汰 + let size = 100_0000; + let capacity = 4_0000; + let cache = ShardLRUCache::new_with_capacity(capacity, size); + let charge = 4; + for i in 0..size { + let key = Slice::from("test_key".to_owned() + &i.to_string()); + // dbg!(key.clone().to_string()); + cache.insert(&key, i, charge)?; + } + + let total_charge = cache.total_charge()?; + dbg!(total_charge); + // 由于shard分布可能有倾斜, 写入的容量小于容量限制即可 + assert_eq!(true, total_charge < capacity); + + let mut count = 0; + for i in 0..size { + let key = Slice::from("test_key".to_owned() + &i.to_string()); + let value = cache.lookup(&key)?; + // dbg!(value.clone()); + if let Some(v) = value { + assert_eq!(i, *v, "i: {}", i); + count += 1; + } + } + + // 由于shard分布可能有倾斜, 可以取出数量小于容量限制即可 + dbg!(count); + assert_eq!(true, count < capacity / charge); + + // 写入数量应该等于写入容量除以单个数据的大小 + assert_eq!(count, total_charge / charge); + + Ok(()) +} + +#[test] +fn test_insert_cache_multi_thread() -> Result<()> { + // todo 多线程写入 数据分组 + let capacity = 4_0000; + let thread_count: usize = 8; + let charge = 4; + let cache = ShardLRUCache::new_arc_with_capacity(capacity, thread_count); + + let mut thread_vec = vec![]; + // 创建多线程写入缓存 + for i in 0..thread_count { + let share_cache = cache.clone(); + let thread_builder = thread::Builder::new().name("my-thread".to_string().to_owned() + i.to_string().as_str()); + let thread = thread_builder.spawn(move || -> Result<()>{ + let key = Slice::from("test_key".to_string() + &i.to_string()); + share_cache.insert(&key, i, charge)?; + + // println!("write thread {}, write value: {}", i, i); + Ok(()) + }); + thread_vec.push(thread); + } + + for thread in thread_vec { + thread?.join().unwrap()?; + } + + let mut thread_vec = vec![]; + + let in_cache_count = Arc::new(AtomicUsize::new(0)); + let out_cache_count = Arc::new(AtomicUsize::new(0)); + // 创建多线程读取缓存 + for i in 0..thread_count { + let share_cache = cache.clone(); + let share_in_cache_count = in_cache_count.clone(); + let share_out_cache_count = out_cache_count.clone(); + let thread = thread::spawn(move || -> Result<()>{ + let key = Slice::from("test_key".to_string() + &i.to_string()); + let read = share_cache.lookup(&key)?; + if read.is_some() { + assert_eq!(i, *read.clone().unwrap().as_ref()); + share_in_cache_count.fetch_add(1, Ordering::Relaxed); + } else { + share_out_cache_count.fetch_add(1, Ordering::Relaxed); + } + Ok(()) + }); + thread_vec.push(thread); + } + + for thread in thread_vec { + thread.join().unwrap()?; + } + + println!("in cache count: {}", in_cache_count.load(Ordering::Acquire)); + println!("out cache count: {}", out_cache_count.load(Ordering::Acquire)); + let total_charge = cache.total_charge()?; + println!("thread_count: {}, charge: {}, capacity: {}, total_charge: {}", thread_count, charge, capacity, total_charge); + assert_eq!(true, charge * in_cache_count.load(Ordering::Acquire) < capacity); + + Ok(()) +} + +#[test] +fn test_erase_cache() -> Result<()> { + let mut cache = ShardLRUCache::new_with_capacity(1000000000, 1024); + let key = Slice::from("test_key"); + cache.insert(&key, 10, 4)?; + cache.erase(&key)?; + cache.insert(&key, 10, 4)?; + cache.erase(&key)?; + let handle = cache.lookup(&key)?; + println!("{:?}", handle); + assert_eq!(true, handle.is_none()); + + Ok(()) +} + +#[test] +fn test_prune() -> Result<()> { + let default_length = 1024; + let mut cache = ShardLRUCache::new_with_capacity(1000000000, default_length); + + let slots = cache.slots()?; + dbg!(slots); + + let count = 100_0000; + + let charge = 4; + println!("-------------------- before insert --------------------"); + for i in 0..count { + let key: Slice = ("key".to_owned() + &i.to_string()).into(); + cache.insert(&key, i, charge)?; + } + println!("-------------------- after insert --------------------"); + + + let total_charge = cache.total_charge()?; + dbg!(total_charge); + assert_eq!(charge * count, total_charge); + + for i in 0..count { + let key: Slice = ("key".to_owned() + &i.to_string()).into(); + let value = cache.lookup(&key)?; + assert_eq!(true, value.is_some(), "i: {}", i); + assert_eq!(i, *value.unwrap()); + } + + dbg!(cache.capacity()); + let slots = cache.slots()?; + dbg!(slots); + + println!("-------------------- before prune --------------------"); + cache.prune()?; + println!("-------------------- after prune --------------------"); + + let slots = cache.slots()?; + dbg!(slots); + assert_eq!(default_length, slots); + dbg!(cache.capacity()); + + // 清空后 总存储的数据量为0 + let total_charge = cache.total_charge()?; + dbg!(total_charge); + assert_eq!(0, total_charge); + + // 清空后 数据不能再查询出来 + for i in 0..count { + let key: Slice = ("key".to_owned() + &i.to_string()).into(); + let value = cache.lookup(&key)?; + assert_eq!(true, value.is_none(), "i: {}", i); + } - fn new_id(&self) -> Result; - fn prune(&mut self) -> Result<()>; - fn total_charge(&self) -> usize; - // fn value(&self, key: Handle) -> Handle; + Ok(()) } \ No newline at end of file diff --git a/src/util/cache_test.rs b/src/util/cache_test.rs deleted file mode 100644 index dd63ab23658544ff681ceea772298a1e406ea3c7..0000000000000000000000000000000000000000 --- a/src/util/cache_test.rs +++ /dev/null @@ -1,6 +0,0 @@ -mod test { - #[test] - fn test_insert() { - - } -} \ No newline at end of file diff --git a/src/util/coding.rs b/src/util/coding.rs index 421ea97a39fb695fae2bf327210c7545e4fe48b5..c4320b88924de4c1712d0a0942145e8a19f9bcbe 100644 --- a/src/util/coding.rs +++ b/src/util/coding.rs @@ -1,224 +1,1928 @@ -use std::io::Write; -use crate::traits::coding_trait::CodingTrait; -use crate::traits::coding_trait::Coding32; -use crate::traits::coding_trait::Coding64; +use std::{intrinsics, ptr}; +use std::alloc::{alloc, Layout}; +use std::ops::Deref; + +use crate::util::coding::EncodeData::{Buffer, Slices, Vector}; +use crate::util::coding::MutEncodeData::{MutBuffer, MutSlices, MutVector}; +use crate::util::Result; use crate::util::slice::Slice; +use crate::util::status::LevelError; -macro_rules! varint { - ($TYPE: ty, $NAME: ident, $SNAME: expr) => { - fn $NAME(mut value: $TYPE, buf: &mut [u8], mut offset: usize) -> usize { - while value >= 128 { - buf[offset] = (value | 128) as u8; - offset += 1; - value >>= 7; +/// 获取变长编码的长度 varint需要的字节数 +/// +/// # Arguments +/// +/// * `value`: 待编码数据 +/// +/// returns: usize +/// +/// # Examples +/// +/// ``` +/// use level_db_rust::util::coding::varint_length; +/// // length == 2 +/// let length = varint_length(255); +/// ``` +pub fn varint_length(mut value: u64) -> usize { + let mut len = 1; + // varint每7位编码一次, 所以相对于8位一个字节的原数据, 大数进行varint编码的总字节会多于原来的字节数 + // 当value右移之后 > 128, 说明下一位还有数据 + while value >= 128 { + value >>= 7; + len += 1; + } + len +} + +/// 默认为小端bytes 当系统为小端时, 这个宏会生效, 小端系统居多 +#[cfg(target_endian = "little")] +macro_rules! swap_bytes { + ($x:expr) => ($x) +} + +/// 大端bytes会转为小端bytes 当系统为大端时, 这个宏会生效 +#[cfg(target_endian = "big")] +macro_rules! swap_bytes { + ($x:expr) => ($x.swap_bytes()) +} + +/// 判断数据类型所需的字节数 +macro_rules! type_capacity { + // u32占4个字节 + (u32) => (4); + // u64占8个字节 + (u64) => (8) +} + +/// vec扩容 计算容量差值 将vec扩容到所需的容量并会更新vec的长度信息 +macro_rules! vec_resize { + ($vec: ident, $len: expr, $offset: expr) => { + // 偏移量 + 写入的长度 >= vec.len() 需要扩容 + if $offset + $len >= $vec.len() { + // 扩容操作并不常用, 标记为冷代码 + #[cold] + { + // 扩容长度为 偏移量 + 写入长度 与vec.len()的差值 + let add = $offset + $len - $vec.len(); + // 手动扩容 并不一定会扩容, capacity如果剩余容量 + $vec.reserve(add); + // 需要手动更新容量 + unsafe { $vec.set_len($vec.len() + add); } } - buf[offset] = value as u8; + } + } +} - offset +/// 从MutEncoderData中获取读写指针 如果是MutVector类型,当要写入的长度大于vec容量时会手动扩容 +macro_rules! get_mut_ptr { + // data: 数据容器, len: 要写入的长度(vec时使用), offset: 当前写入的位置(vec时使用) + ($data: ident, $len: expr, $offset: ident) => { + match $data { + MutVector(vec) => { + let length = $len; + // vec会进行扩容 + vec_resize!(vec, length, $offset); + vec.as_mut_ptr() + } + MutBuffer(buf) => { + // buf不进行扩容 + // value的byte数 > buf.len() - offset 时不安全, 内存溢出 + buf.as_mut_ptr() + } + MutSlices(slice) => { + // slice不进行扩容, 直接取切片 + // value的byte数 > slice.size() - offset 时不安全, 内存溢出 + (*slice).as_mut_ptr() + } } - }; + } +} - ($TYPE: ty, $NAME: ident) => { - varint!( $TYPE, $NAME, stringify!($NAME)); +/// 从EncoderData中获取只读指针 +macro_rules! get_ptr { + ($data: ident)=>{ + match $data { + Vector(vec) => { + vec.as_ptr() + } + Buffer(buf) => { + buf.as_ptr() + } + Slices(slice) => { + (*slice).as_ptr() + } + } } } -pub struct Coding {} +/// 检查长度 长度不足以写入或者读取时返回错误 +macro_rules! check_length { + ($offset: expr, $write_len: expr, $data_len: expr, write) => { + // 偏移量 + 写入长度 >= 容器的长度时, 会抛出异常 + if $offset + $write_len > $data_len { + return Err(LevelError::invalid_argument( + Slice::from("offset + write_len must < data_len"), + Slice::from(format!("offset = {}, write_len = {}, data_len = {}", $offset, $write_len, $data_len)))); + } + }; + ($offset: expr, $limit: expr) => { + // 偏移量 >= 容器的长度时, 会抛出异常 + if $offset >= $limit { + return Err(LevelError::invalid_argument( + Slice::from("offset must < limit"), + Slice::from(format!("offset = {}, limit = {}", $offset, $limit)) + )); + } + }; + ($offset: expr, $read_len: expr, $limit: expr, read) => { + // 偏移量 + 读取长度 >= 容器的长度时, 会抛出异常 + if $offset + $read_len > $limit { + return Err(LevelError::invalid_argument( + Slice::from("offset + read_len must < limit"), + Slice::from(format!("offset = {}, read_len = {}, limit = {}", $offset, $read_len, $limit)) + )); + } + } +} -impl CodingTrait for Coding { - fn put_fixed32(dst: &mut [u8], mut offset: usize, value: u32) -> usize { - let mut buf: [u8; 4] = [0, 0, 0, 0]; - Self::encode_fixed32(value, &mut buf, 0); - dst[offset] = buf[0]; - offset += 1; - dst[offset] = buf[1]; - offset += 1; - dst[offset] = buf[2]; +/// 定长编码 +macro_rules! encode_fixed { + ($name: ident, $type: ty, $capacity: ident) => { + /// 定长编码 unsafe + /// + /// # Safety + /// * offset < buf/slice的长度, 否则指针越界 + /// * offset + value的字节数 < buf/slice.len(), 否则写数据溢出 + /// + /// # Arguments + /// + /// * `data`: 存放编码数据的容器 + /// * `offset`: 编码的起始偏移量 + /// * `value`: 待编码的数据 + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![]; + /// // [210, 4, 0, 0] + /// unsafe { + /// uncheck_encode_fixed32(&mut MutVector(&mut vec), 0, 1234); + /// } + /// ``` + #[inline] + unsafe fn $name(data: &mut MutEncodeData, offset: usize, value: $type) { + // 取可变指针 + let mut_ptr = get_mut_ptr!(data, type_capacity!($capacity), offset); + unsafe { + // 移动指针 + let end = mut_ptr.add(offset); + // 写入数据 + ptr::write(end as *mut $type, swap_bytes!(value)); + } + } + }; +} + +encode_fixed!(uncheck_encode_fixed32, u32, u32); +encode_fixed!(uncheck_encode_fixed64, u64, u64); + + +/// 32位变长编码 +/// +/// # Safety +/// * offset + 写入字节数 < data.len(), 否则溢出(vec除外) +/// +/// # Arguments +/// +/// * `data`: 存储编码的数据 +/// * `offset`: 编码的偏移量 +/// * `value`: 待编码数据 +/// +/// returns: usize +/// +/// # Examples +/// +/// ``` +/// let mut vec = vec![]; +/// let mut offset = 0; +/// // [255, 255, 3] +/// unsafe { offset = uncheck_encode_varint32(&mut MutVector(&mut vec), offset, 65535); } +/// ``` +unsafe fn uncheck_encode_varint32(data: &mut MutEncodeData, offset: usize, value: u32) -> usize { + // 获取varint 需要编码的长度 + let length = varint_length(value as u64); + // 获取读写指针写入数据 + let mut_ptr = get_mut_ptr!(data, length, offset); + + // 32位字节数较少 直接循环展开 + return if length == 1 { + ptr::write(mut_ptr.add(offset), value as u8); + offset + 1 + } else if length == 2 { + // 直接写入数组 不多次写入一个字节 + // 每次写7个bit不要符号位 + // 最后一位小于128 + ptr::write(mut_ptr.add(offset) as *mut [u8; 2], [ + (value | 128) as u8, + (value >> 7) as u8, + ]); + offset + 2 + } else if length == 3 { + ptr::write(mut_ptr.add(offset) as *mut [u8; 3], [ + (value | 128) as u8, + (value >> 7 | 128) as u8, + (value >> 14) as u8, + ]); + offset + 3 + } else if length == 4 { + ptr::write(mut_ptr.add(offset) as *mut [u8; 4], [ + (value | 128) as u8, + (value >> 7 | 128) as u8, + (value >> 14 | 128) as u8, + (value >> 21) as u8 + ]); + offset + 4 + } else { + ptr::write(mut_ptr.add(offset) as *mut [u8; 5], [ + (value | 128) as u8, + (value >> 7 | 128) as u8, + (value >> 14 | 128) as u8, + (value >> 21 | 128) as u8, + (value >> 28) as u8, + ]); + offset + 5 + }; +} + + +/// 64位变长编码 +/// # Safety +/// * offset + 写入字节数 < data.len(), 否则溢出(vec除外) +/// +/// # Arguments +/// +/// * `data`: 存储编码的数据 +/// * `offset`: 编码的偏移量 +/// * `value`: 待编码数据 +/// +/// returns: usize +/// +/// # Examples +/// +/// ``` +/// let mut vec = vec![]; +/// let mut offset = 0; +/// // offset = 7, vec = [255, 255, 208, 148, 181, 244, 1] +/// unsafe { offset = uncheck_encode_varint64(&mut MutVector(&mut vec), offset, 8_3980_4651_1103) }; +/// ``` +unsafe fn uncheck_encode_varint64(data: &mut MutEncodeData, mut offset: usize, mut value: u64) -> usize { + let length = varint_length(value); + let mut_ptr = get_mut_ptr!(data, length, offset); + + // 每次写7个bit, 如果剩于的值 >= 128, 说明还需要再编码, 最后一位会小于128直接写入即可 + while value >= 128 { + ptr::write(mut_ptr.add(offset), (value | 128) as u8); + value >>= 7; offset += 1; - dst[offset] = buf[3]; - offset } + ptr::write(mut_ptr.add(offset), value as u8); + offset + 1 +} - fn put_fixed64(dst: &mut [u8], mut offset: usize, value: u64) -> usize { - let mut buf: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; - Self::encode_fixed64(value, &mut buf, 0); - dst[offset] = buf[0]; - offset += 1; - dst[offset] = buf[1]; - offset += 1; - dst[offset] = buf[2]; - offset += 1; - dst[offset] = buf[3]; - offset += 1; - dst[offset] = buf[4]; - offset += 1; - dst[offset] = buf[5]; - offset += 1; - dst[offset] = buf[6]; - offset += 1; - dst[offset] = buf[7]; - offset +/// 定长解码 +macro_rules! decode_fixed { + {$name: ident, $type: ty} => { + /// 定长整数解码 不安全 + /// + /// # Safety + /// * offset + 读取字节数 < data.len(), 否则溢出 + /// + /// # Arguments + /// + /// * `data`: 待解码数据 + /// * `offset`: 解码位置偏移量 + /// + /// returns: u32 + /// + /// # Examples + /// + /// ``` + /// let vec = vec![0, 0, 255, 255]; + /// // 65535 + /// let result = unsafe { uncheck_decode_fixed32(&Vector(&vec), 0) }; + /// ``` + #[inline] + unsafe fn $name(data: &EncodeData, offset: usize) -> $type { + // offset + 读取字节 >= data.len() 时会溢出 + let ptr = get_ptr!(data); + swap_bytes!(unsafe {ptr::read(ptr.add(offset) as *mut $type)}) + } + } +} + +decode_fixed!(uncheck_decode_fixed32, u32); +decode_fixed!(uncheck_decode_fixed64, u64); + +/// 变长解码 +macro_rules! decode_varint { + ($name: ident, $type: ty, $max_shift: expr) => { + /// 变长整数解码 不安全 + /// + /// # Safety + /// * offset + 读取字节数 < data.len(), 否则溢出 + /// + /// # Arguments + /// + /// * `vec`: 待解码数据 + /// * `offset`: 解码位置偏移量 + /// + /// returns: u32 + /// + /// # Examples + /// + /// ``` + /// let vec = vec![255, 255, 3]; + /// println!("{:?}", vec); + /// let mut offset = 0; + /// // 65535 + /// let res = unsafe { uncheck_decode_varint32(&Vector(&vec), offset, vec.len()) }; + /// ``` + unsafe fn $name(data: &EncodeData, mut offset: usize, limit: usize) -> ($type, usize) { + let ptr = get_ptr!(data); + + // shift的类型是u32, shift为移动的位数, 32位最大28, 64位最大63 + let mut shift = 0 as u32; + let mut i = offset; + let mut value = 0 as $type; + while shift <= $max_shift && i < limit { + // 解码一个byte + let byte = unsafe { ptr::read(ptr.add(i)) }; + i += 1; + // 如果解码的byte > 128, 说明后面还有字节需要继续解码 + if byte & 128 != 0 { + value |= (((byte & 127) as $type).overflowing_shl(shift).0) as $type; + offset += 1; + } else { + value |= (byte as $type).overflowing_shl(shift).0; + offset += 1; + return (value, offset); + } + shift += 7; + } + (value, offset) + } } +} + +decode_varint!(uncheck_decode_varint32, u32, 28); +decode_varint!(uncheck_decode_varint64, u64, 63); + +/// 写入buf +/// +/// # Safety +/// * offset + buf.len() < data.len() , 否则在data不是vec类型的的情况下不会自动扩容, 写入时会溢出 +/// +/// # Arguments +/// +/// * `data`: 存储编码的数据 +/// * `offset`: 编码的偏移量 +/// * `buf`: 待写入的buf +/// +/// returns: () +/// +/// # Examples +/// +/// ``` +/// let mut vec = vec![]; +/// +/// let buf = [1, 2, 3, 4, 5]; +/// // vec = [1, 2, 3, 4, 5] +/// unsafe { uncheck_write_buf(&mut MutVector(&mut vec), 0, &buf); } +/// ``` +unsafe fn uncheck_write_buf(data: &mut MutEncodeData, offset: usize, buf: &[u8]) { + let mut_ptr = get_mut_ptr!(data, buf.len(), offset).add(offset); + // 从buf中拷贝数据写入到指针中 + ptr::copy_nonoverlapping(buf.as_ptr(), mut_ptr, buf.len()); +} + - varint!(u32,encode_varint32); +/// 读取buf 读取时需要知道需要读取的长度 +/// +/// # Safety +/// * offset + len < data.len() , 否则溢出 +/// +/// # Arguments +/// +/// * `data`: 存储编码的数据 +/// * `offset`: 解码的偏移量 +/// +/// returns: &[u8] +/// +/// # Examples +/// +/// ``` +/// let vec = vec![1, 2, 3, 4, 5, 1, 2, 3, 4]; +/// // [1, 2, 3, 4, 5] +/// let buf = unsafe { uncheck_read_buf(&Vector(&vec), 0, 5) }; +/// ``` +unsafe fn uncheck_read_buf(data: &EncodeData, offset: usize, len: usize) -> Slice { + let ptr: *const u8 = get_ptr!(data).add(offset); + // 分配一块内存长度为buf的长度 + let dst: *mut u8 = alloc(Layout::from_size_align_unchecked(len, 4)); + // 将数据拷贝到这块内存上 + intrinsics::copy_nonoverlapping(ptr, dst, len); + // 使用slice包装内存 + Slice::from_raw_parts(dst, len) +} - varint!(u64,encode_varint64); +/// 编码的数据 只读的 +#[derive(Debug)] +enum EncodeData<'a> { + // vec类型 + Vector(&'a Vec), + // buf类型 + Buffer(&'a [u8]), + // slice类型 + Slices(&'a Slice), +} - fn put_varint32(dst: &mut [u8], mut offset: usize, value: u32) -> usize { - let mut buf: [u8; 4] = [0, 0, 0, 0]; - let var_offset = Self::encode_varint32(value, &mut buf, 0); - for i in 0..var_offset { - dst[offset] = buf[i]; - offset += 1; +/// 编码的数据 可变的 +#[derive(Debug)] +enum MutEncodeData<'a> { + // vec类型, 可以扩容 + MutVector(&'a mut Vec), + // buf类型, 不可扩容 + MutBuffer(&'a mut [u8]), + // slice类型, 不可扩容 + MutSlices(&'a mut Slice), +} + +/// 编码器 +/// 会维护偏移量, 如果是vec类型会自动扩容 +#[derive(Debug)] +pub struct Encoder<'a> { + // 编码偏移量, 编码时会维护偏移量 + offset: usize, + // 数据容器 + data: MutEncodeData<'a>, +} + +#[derive(Debug)] +pub struct Decoder<'a> { + // 解码偏移量, 解码时会维护偏移量 + offset: usize, + // 数据容器 + data: EncodeData<'a>, + // 最大可解码长度 + limit: usize, +} + +/// 实现put_fixed +macro_rules! put_fixed { + ($name:ident, $var_name:ident, $type:ty, $capacity: ident, uncheck) => { + /// 编码定长整数 不检查长度 + /// + /// # Safety + /// * offset + type_size < data.len() , 否则溢出 + /// + /// # Arguments + /// + /// * `value`: 待编码的数据 + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Encoder; + /// let mut vec = vec![]; + /// let mut encoder = Encoder::with_vec(&mut vec); + /// unsafe { + /// // [0, 0, 255, 255] + /// encoder.uncheck_put_fixed32(65535); + /// // [0, 0, 255, 255, 0, 0, 255, 255] + /// encoder.uncheck_put_fixed32(65535); + /// } + /// ``` + pub unsafe fn $name(&mut self, value: $type) { + // 调用编码方法 + $var_name(&mut self.data, self.offset, value); + self.offset += type_capacity!($capacity); + } + }; + ($name:ident, $var_name:ident, $type:ty, $capacity: ident, check) => { + /// 编码定长整数 会检查长度 + /// + /// # Arguments + /// + /// * `value`: 待编码的数据 + /// + /// returns: Result<()> + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Encoder; + /// let mut vec = vec![]; + /// let mut encoder = Encoder::with_vec(&mut vec); + /// // [0, 0, 255, 255] + /// encoder.put_fixed32(65535)?; + /// ``` + pub fn $name(&mut self, value: $type) -> Result<()> { + // vec类型自动扩容, buf 和 slice类型检查长度 + if let MutVector(_) = self.data {} else { check_length!(self.offset, type_capacity!($capacity), self.len(), write) }; + // 调用编码方法 + unsafe {$var_name(&mut self.data, self.offset, value);} + self.offset += type_capacity!($capacity); + Ok(()) + } + }; +} + +/// 实现put_varint +macro_rules! put_varint { + ($name:ident, $var_name:ident, $type:ty, uncheck) => { + /// 编码变长整数 不检查长度 + /// + /// # Safety + /// * offset + varint_length < data.len() , 否则溢出 + /// + /// # Arguments + /// + /// * `value`: 待编码的数据 + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Encoder; + /// let mut vec = vec![]; + /// let mut encoder = Encoder::with_vec(&mut vec); + /// unsafe { + /// // [255, 255, 3] + /// encoder.uncheck_put_varint32(65535); + /// // [255, 255, 3, 255, 255, 3] + /// encoder.uncheck_put_varint64(65535); + /// } + /// ``` + pub unsafe fn $name(&mut self, value: $type) { + // 调用编码方法 + self.offset = $var_name(&mut self.data, self.offset, value); + } + }; + ($name:ident, $var_name:ident, $type:ty, check) => { + /// 编码变长整数 会检查长度 + /// + /// # Arguments + /// + /// * `value`: 待编码的数据 + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Encoder; + /// let mut vec = vec![]; + /// let mut encoder = Encoder::with_vec(&mut vec); + /// // [255, 255, 3] + /// encoder.put_varint32(65535)?; + /// // [255, 255, 3, 255, 255, 3] + /// encoder.put_varint64(65535)?; + /// ``` + pub fn $name(&mut self, value: $type) -> Result<()> { + // vec类型自动扩容, buf 和 slice类型检查长度 + if let MutVector(_) = self.data {} else { check_length!(self.offset, varint_length(value as u64), self.len(), write) }; + // 调用编码方法 + unsafe { self.offset = $var_name(&mut self.data, self.offset, value) } + Ok(()) } - offset } +} - fn put_varint64(dst: &mut [u8], mut offset: usize, value: u64) -> usize { - let mut buf: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; - let var_offset = Self::encode_varint64(value, &mut buf, 0); - for i in 0..var_offset { - dst[offset] = buf[i]; - offset += 1; +impl<'a> Encoder<'a> { + /// 以vec做为容器生成encoder + /// 编码时当容量不足时会扩容 + /// 如果以追加的方式进行编码推荐使用vec做为容器 + /// 使用vec容器时, 推荐使用uncheck的方法 + /// + /// # Arguments + /// + /// * `vec`: vec + /// + /// returns: Encoder + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Encoder; + /// let mut vec = vec![]; + /// let mut encoder = Encoder::with_vec(&mut vec); + /// ``` + pub fn with_vec(vec: &'a mut Vec) -> Self { + Self { + offset: 0, + data: MutVector(vec), } - offset + } + /// 以切片做为容器生成encoder + /// 编码时当容量不足时可能会造成内存溢出 + /// 需要提前规划好需要使用的容量, 并保证调用编码方式时不会溢出 + /// + /// # Arguments + /// + /// * `buf`: buf + /// + /// returns: Encoder + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Encoder; + /// let mut buf = [0; 20]; + /// unsafe { + /// let mut encoder = Encoder::with_buf(&mut buf); + /// } + /// ``` + pub fn with_buf(buf: &'a mut [u8]) -> Self { + Self { + offset: 0, + data: MutBuffer(buf), + } + } + + /// 以slice做为容器生成encoder + /// 编码时当容量不足时会溢出 + /// + /// # Arguments + /// + /// * `slice`: slice + /// + /// returns: Encoder + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Encoder; + /// use level_db_rust::util::slice::Slice; + /// let mut slice = Slice::from_vec(vec![0; 20]); + /// unsafe { + /// let mut encoder = Encoder::with_slice(&mut slice); + /// } + /// ``` + pub fn with_slice(slice: &'a mut Slice) -> Self { + Self { + offset: 0, + data: MutSlices(slice), + } + } + + /// 从encoder的数据中生成decoder + /// + /// returns: Decoder + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Encoder; + /// let mut data = vec![1, 2, 3]; + /// let encoder = Encoder::with_vec(&mut data); + /// let decoder = encoder.create_decoder(); + /// ``` + pub fn create_decoder(&'a self) -> Decoder<'a> { + Decoder::from_encoder(self) + } + + put_fixed!(uncheck_put_fixed32, uncheck_encode_fixed32, u32, u32, uncheck); + put_fixed!(uncheck_put_fixed64, uncheck_encode_fixed64, u64, u64, uncheck); + put_fixed!(put_fixed32, uncheck_encode_fixed32, u32, u32, check); + put_fixed!(put_fixed64, uncheck_encode_fixed64, u64, u64, check); + + put_varint!(uncheck_put_varint32, uncheck_encode_varint32, u32, uncheck); + put_varint!(uncheck_put_varint64, uncheck_encode_varint64, u64, uncheck); + put_varint!(put_varint32, uncheck_encode_varint32, u32, check); + put_varint!(put_varint64, uncheck_encode_varint64, u64, check); + + /// 向encoder中直接写入数据不用进行编码 + /// 向vec中写入时会自动扩容 + /// # Safety + /// * self.offset + buf.len() > self.data , 如果data不是vec的话不会自动扩容, 会溢出 + /// + /// # Arguments + /// + /// * `buf`: 待写入的数据 + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Encoder; + /// let mut vec = vec![]; + /// let mut encoder = Encoder::with_vec(&mut vec); + /// let buf = [1, 2, 3]; + /// // vec: [1, 2, 3] + /// unsafe { encoder.uncheck_put_buf(&buf) } + /// ``` + pub unsafe fn uncheck_put_buf(&mut self, buf: &[u8]) { + uncheck_write_buf(&mut self.data, self.offset, buf); + self.offset += buf.len(); + } + + /// 向encoder中直接写入数据不用进行编码 + /// 向vec中写入时会自动扩容 + /// 会检查是否能够写入 + /// + /// # Arguments + /// + /// * `buf`: 待写入的数据 + /// + /// returns: Result<(), Status> + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Encoder; + /// let mut vec = vec![]; + /// let mut encoder = Encoder::with_vec(&mut vec); + /// let buf = [1, 2, 3]; + /// // vec: [1, 2, 3] + /// encoder.put_buf(&buf)? + /// ``` + pub fn put_buf(&mut self, buf: &[u8]) -> Result<()> { + // vec类型自动扩容 buf 和 slice类型检查长度 + if let MutVector(_) = self.data {} else { check_length!(self.offset, buf.len(), self.len(), write) }; + unsafe { uncheck_write_buf(&mut self.data, self.offset, buf); } + self.offset += buf.len(); + Ok(()) + } + + /// 写入slice时先写入slice的长度做为前缀 + /// slice(data:[1,2,3],size:3), 写入后[3,1,2,3] + /// + /// # Safety + /// * u32的字节数(4) + slice的字节数(slice.size()) < self.data.len(), 否则溢出(vec除外) + /// + /// # Arguments + /// + /// * `slice`: 待写入的slice + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Encoder; + /// use level_db_rust::util::slice::Slice; + /// let mut vec = vec![]; + /// let mut encoder = Encoder::with_vec(&mut vec); + /// let slice = Slice::from_vec(vec![1, 2, 3]); + /// // vec: [3, 1, 2, 3] + /// // The first '3' of the vec is the length of the slice, + /// // and the following '1,2,3' is the data of the slice + /// unsafe { encoder.uncheck_put_length_prefixed_slice(&slice); } + /// ``` + pub unsafe fn uncheck_put_length_prefixed_slice(&mut self, slice: &Slice) { + self.uncheck_put_varint32(slice.size() as u32); + self.uncheck_put_buf(slice); + } + + /// 写入slice时先写入slice的长度做为前缀 + /// + /// # Arguments + /// + /// * `slice`: 待写入的slice + /// + /// returns: Result<(), Status> + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Encoder; + /// use level_db_rust::util::slice::Slice; + /// let mut vec = vec![]; + /// let mut encoder = Encoder::with_vec(&mut vec); + /// let slice = Slice::from_vec(vec![1, 2, 3]); + /// // vec: [3, 1, 2, 3] + /// // The first '3' of the vec is the length of the slice, + /// // and the following '1,2,3' is the data of the slice + /// encoder.put_length_prefixed_slice(&slice)?; + /// ``` + pub fn put_length_prefixed_slice(&mut self, slice: &Slice) -> Result<()> { + self.put_varint32(slice.size() as u32)?; + self.put_buf(slice)?; + Ok(()) } - // fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value: &Slice) -> usize { - fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value_len: usize) -> usize { - Self::put_varint64(dst, offset, value_len as u64); - offset + /// 获取当前编码到的位置 + /// + /// returns: usize + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Encoder; + /// let mut vec = vec![]; + /// let mut encoder = Encoder::with_vec(&mut vec); + /// // offset: 0 + /// let offset = encoder.offset(); + /// encoder.put_varint32(65535)?; + /// // offset: 3 + /// let offset = encoder.offset(); + /// ``` + #[inline] + pub fn offset(&self) -> usize { + self.offset } - fn get_varint32(input: &Slice) -> u32 { - let cow = input.borrow_data(); - let bytes = cow.as_bytes(); - let mut result = 0_u32; - let mut shift = 0_u32; - let limit = input.size(); - let mut i = 0; - while shift <= 28 && i < limit { - let b = bytes[i]; - i += 1; - if (b & 128) != 0 { - result |= ((b & 127) << shift) as u32; - } else { - result |= (b << shift) as u32; + /// 获取容器的长度 + /// + /// returns: usize + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Encoder; + /// let mut vec = vec![]; + /// let mut encoder = Encoder::with_vec(&mut vec); + /// // len: 0 + /// let len = encoder.len(); + /// encoder.put_varint32(65535)?; + /// // len: 3 + /// let len = encoder.len(); + /// ``` + pub fn len(&self) -> usize { + match self.data { + MutVector(ref vec) => { + vec.len() + } + MutBuffer(ref buf) => { + buf.len() } - shift += 7; - } - result - } - - fn get_varint64(input: &Slice) -> u64 { - let cow = input.borrow_data(); - let bytes = cow.as_bytes(); - let mut result = 0_u64; - let mut shift = 0_u64; - let limit = input.size(); - let mut i = 0; - while shift <= 63 && i < limit { - let b = bytes[i]; - i += 1; - if (b & 128) != 0 { - result |= ((b & 127) << shift) as u64; - } else { - result |= (b << shift) as u64; + MutSlices(ref slice) => { + slice.size() } - shift += 7; } - result } +} - fn get_length_prefixed_slice(input: &mut Slice) -> Slice { - let decode = Coding::get_varint32(input); - Slice::from_buf(decode.to_le_bytes().as_mut_slice()) +macro_rules! get_fixed { + ($name:ident, $var_name:ident, $type:ty, $capacity: ident, uncheck) => { + /// 定长解码 + /// # Safety + /// * self.offset < self.limit 先调用 encoder.can_get() 确定可以解码再调用, 否则溢出 + /// + /// returns: u32/u64 + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Decoder; + /// let mut vec = vec![0, 0, 255, 255]; + /// let mut decoder = Decoder::with_vec(&mut vec); + /// // 65535 + /// let value = unsafe { decoder.uncheck_get_fixed32() }; + /// ``` + #[inline] + pub unsafe fn $name(&mut self) -> $type { + // 调用解码方法 + let value = $var_name(&self.data, self.offset); + self.offset += type_capacity!($capacity); + value + } + }; + ($name:ident, $var_name:ident, $type:ty, $capacity: ident, check) => { + /// 定长解码 + /// # Safety + /// * self.offset < self.limit 先调用 encoder.can_get() 确定可以解码再调用 否则溢出 + /// + /// returns: u32/u64 + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Decoder; + /// let mut vec = vec![0, 0, 255, 255]; + /// let mut decoder = Decoder::with_vec(&mut vec); + /// // 65535 + /// let value = decoder.get_fixed32()?; + /// ``` + #[inline] + pub fn $name(&mut self) -> Result<$type> { + check_length!(self.offset, type_capacity!($capacity), self.limit, read); + // 调用解码方法 + let value = unsafe { $var_name(&self.data, self.offset) }; + self.offset += type_capacity!($capacity); + Ok(value) + } } +} - fn varint_length(mut value: usize) -> usize { - let mut len = 1; - while value >= 128 { - value >>= 7; - len += 1; +macro_rules! get_varint { + ($name:ident, $var_name:ident, $type:ty, uncheck) => { + /// 变长解码 + /// + /// returns: u32/u64 + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Decoder; + /// let mut vec = vec![255, 255, 3]; + /// let mut decoder = Decoder::with_vec(&mut vec); + /// // 65535 + /// let value = unsafe { decoder.uncheck_get_varint32() }; + /// ``` + #[inline] + pub unsafe fn $name(&mut self) -> $type { + // 调用解码方法 + let res = $var_name(&self.data, self.offset, self.limit); + self.offset = res.1; + res.0 + } + }; + ($name:ident, $var_name:ident, $type:ty, check) => { + /// 变长解码 + /// + /// returns: u32/u64 + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Decoder; + /// let mut vec = vec![255, 255, 3]; + /// let mut decoder = Decoder::with_vec(&mut vec); + /// // 65535 + /// let value = decoder.get_varint32()?; + /// ``` + #[inline] + pub fn $name(&mut self) -> Result<$type> { + check_length!(self.offset, self.limit); + // 调用解码方法 + let res = unsafe { $var_name(&self.data, self.offset, self.limit) }; + self.offset = res.1; + Ok(res.0) } - len } +} - fn encode_fixed32(value: u32, buf: &mut [u8], mut offset: usize) -> usize { - (&mut buf[offset..]).write(&value.to_le_bytes()).unwrap(); - offset+4 +impl<'a> Decoder<'a> { + pub fn with_slice(slice: &'a Slice) -> Self { + Self { + offset: 0, + limit: slice.size(), + data: Slices(slice), + } + } + pub fn with_buf(buf: &'a [u8]) -> Self { + Self { + offset: 0, + limit: buf.len(), + data: Buffer(buf), + } + } + pub fn with_vec(vec: &'a Vec) -> Self { + Self { + offset: 0, + data: Vector(vec), + limit: vec.len(), + } } - fn encode_fixed64(value: u64, buf: &mut [u8], mut offset: usize) -> usize { - (&mut buf[offset..]).write(&value.to_le_bytes()).unwrap(); - offset+8 + pub fn from_encoder(encoder: &'a Encoder) -> Self { + Self { + offset: 0, + limit: encoder.len(), + data: match encoder.data { + MutVector(ref vec) => { + Vector(vec) + } + MutBuffer(ref buf) => { + Buffer(buf) + } + MutSlices(ref slice) => { + Slices(slice) + } + }, + } } + /// 判断是否有数据可以读取 + /// 数据读取到末尾 不满足 offset < limit 时为false + /// 如果使用了uncheck的方法 需要调用这个方法判断是否可以读取 否则可能会溢出 + /// + /// returns: bool + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Decoder; + /// let mut vec = vec![255, 255, 3]; + /// let mut decoder = Decoder::with_vec(&mut vec); + /// // true + /// let can_get = decoder.can_get(); + /// decoder.get_varint32()?; + /// // false + /// let can_get = decoder.can_get(); + /// ``` + #[inline] + pub fn can_get(&self) -> bool { + self.offset < self.limit + } - fn decode_fixed32(buf: &[u8]) -> u32 { - return (buf[0] as u32) | - (buf[1] as u32) << 8 | - (buf[2] as u32) << 16 | - (buf[3] as u32) << 24; + get_fixed!(uncheck_get_fixed32, uncheck_decode_fixed32, u32, u32, uncheck); + get_fixed!(uncheck_get_fixed64, uncheck_decode_fixed64, u64, u64, uncheck); + get_fixed!(get_fixed32, uncheck_decode_fixed32, u32, u32, check); + get_fixed!(get_fixed64, uncheck_decode_fixed64, u64, u64, check); + + get_varint!(uncheck_get_varint32, uncheck_decode_varint32, u32, uncheck); + get_varint!(uncheck_get_varint64, uncheck_decode_varint64, u64, uncheck); + get_varint!(get_varint32, uncheck_decode_varint32, u32, check); + get_varint!(get_varint64, uncheck_decode_varint64, u64, check); + + /// 解码出slice 不检查长度 + /// + /// # Safety + /// * self.offset < self.len() , 否则溢出 + /// + /// returns: Slice + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Decoder; + /// let vec = vec![3, 1, 2, 3]; + /// let mut decoder = Decoder::with_vec(&vec); + /// // [1, 2, 3] + /// let slice = unsafe { decoder.uncheck_get_length_prefixed_slice() }; + /// ``` + pub unsafe fn uncheck_get_length_prefixed_slice(&mut self) -> Slice { + let size = self.uncheck_get_varint32() as usize; + self.uncheck_get_buf(size) + } + + /// 解码出slice + /// + /// returns: Result + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Decoder; + /// let vec = vec![3, 1, 2, 3]; + /// let mut decoder = Decoder::with_vec(&vec); + /// // [1, 2, 3] + /// let slice = decoder.get_length_prefixed_slice()?; + /// ``` + pub fn get_length_prefixed_slice(&mut self) -> Result { + check_length!(self.offset, self.limit); + let size = unsafe { self.uncheck_get_varint32() } as usize; + check_length!(self.offset, size, self.limit, read); + unsafe { Ok(self.uncheck_get_buf(size)) } + } + + /// 获取buf 不检查长度 + /// + /// # Safety + /// * self.offset + len < self.limit, 否则溢出 + /// + /// # Arguments + /// + /// * `data`: 待解码数据 + /// * `len`: 解码buf的长度, 必须要指定的, 否则无法正确读取 + /// + /// returns: Slice + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Decoder; + /// let vec = vec![1, 2, 3]; + /// let mut decoder = Decoder::with_vec(&vec); + /// // [1, 2, 3] + /// let buf = unsafe { decoder.uncheck_get_buf(3) }; + /// ``` + pub unsafe fn uncheck_get_buf(&mut self, len: usize) -> Slice { + let slice = uncheck_read_buf(&self.data, self.offset, len); + self.offset += len; + slice + } + + /// 读取buf + /// + /// # Arguments + /// + /// * `data`: 待解码数据 + /// * `len`: 读取buf的长度, 必须要指定的, 否则无法正确读取 + /// + /// returns: Result, Status> + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Decoder; + /// let vec = vec![1, 2, 3]; + /// let mut decoder = Decoder::with_vec(&vec); + /// // [1, 2, 3] + /// let buf = decoder.get_buf(3)?; + /// ``` + pub fn get_buf(&self, len: usize) -> Result { + check_length!(self.offset, len, self.limit, read); + unsafe { + Ok(uncheck_read_buf(&self.data, self.offset, len)) + } + } + + /// 跳过一段长度 偏移量会移动到跳过后的位置继续读取 未检查偏移量 + /// + /// # Safety + /// * offset + skip < self.limit, 否则会出现未定义行为, 读取将溢出 + /// + /// # Arguments + /// + /// * `skip`: 需要跳过的长度 + /// + /// returns: usize + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Decoder; + /// let vec = vec![255, 1, 255, 255, 3]; + /// // offset: 0 + /// let mut decoder = Decoder::with_vec(&vec); + /// // offset: 2 + /// unsafe { decoder.uncheck_skip(2) }; + /// // value: 65535 + /// let value = decoder.get_varint32()?; + /// ``` + pub unsafe fn uncheck_skip(&mut self, skip: usize) -> usize { + self.offset += skip; + self.offset + } + + /// 跳过一段长度 偏移量会移动到跳过后的位置继续读取 + /// + /// # Arguments + /// + /// * `skip`: 需要跳过的长度 + /// + /// returns: Result + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Decoder; + /// let vec = vec![255, 1, 255, 255, 3]; + /// // offset: 0 + /// let mut decoder = Decoder::with_vec(&vec); + /// // offset: 2 + /// decoder.skip(2)?; + /// // value: 65535 + /// let value = decoder.get_varint32()?; + /// ``` + pub fn skip(&mut self, skip: usize) -> Result { + check_length!(self.offset, skip, self.limit, read); + self.offset += skip; + Ok(self.offset) + } + + /// 获取当前编码到的位置 + /// + /// returns: usize + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Decoder; + /// let vec = vec![255, 1, 255, 255, 3]; + /// let mut decoder = Decoder::with_vec(&vec); + /// // offset: 0 + /// let value = decoder.get_varint32()?; + /// // offset: 2 + /// let offset = decoder.offset(); + /// ``` + pub fn offset(&self) -> usize { + self.offset } - fn decode_fixed64(buf: &[u8]) -> u64 { - return (buf[0]) as u64 | - (buf[1] as u64) << 8 | - (buf[2] as u64) << 16 | - (buf[3] as u64) << 24 | - (buf[4] as u64) << 32 | - (buf[5] as u64) << 40 | - (buf[6] as u64) << 48 | - (buf[7] as u64) << 56; + /// 获取编码数据的可解码限制 + /// offset < limit + /// + /// returns: usize + /// + /// # Examples + /// + /// ``` + /// use level_db_rust::util::coding::Decoder; + /// let vec = vec![255, 1, 255, 255, 3]; + /// let mut decoder = Decoder::with_vec(&vec); + /// // limit: 5 + /// let limit = decoder.limit(); + /// ``` + pub fn limit(&self) -> usize { + self.limit } } -macro_rules! coding_impl { - {$TRAIT: ident, $TYPE: ty, $VAR_NAME: ident, $FIXED_NAME: ident} => { - impl $TRAIT for $TYPE { - /// 变长正整数编码 - /// - /// # Arguments - /// - /// * `buf`: 目标数组 - /// * `offset`: 偏移量 - /// - /// returns: usize : 编码后的偏移量 - /// - /// # Examples - /// - /// ``` - /// let mut buf: [u8; 4] = [0, 0, 0, 0]; - /// let value: u32 = 65534; - /// let offset = value.varint(&mut buf, 0); - /// ``` - fn varint(self, buf: &mut [u8], offset: usize) -> usize { - Coding::$VAR_NAME (self, buf, offset) - } - /// 定长正整数编码 - /// - /// # Arguments - /// - /// * `buf`: 目标数组 - /// * `offset`: 偏移量 - /// - /// returns: usize : 编码后的偏移量 - /// - /// # Examples - /// - /// ``` - /// let mut buf: [u8; 4] = [0, 0, 0, 0]; - /// let value: u32 = 65534; - /// let offset = value.fixedint(&mut buf, 0); - /// ``` - fn fixedint(self, buf: &mut [u8], offset: usize) -> usize { - Coding::$FIXED_NAME (self, buf, offset) - } +#[test] +fn test_varint_length() { + let length = varint_length(1); + assert_eq!(1, length); + let length = varint_length(127); + assert_eq!(1, length); + let length = varint_length(128); + assert_eq!(2, length); + let length = varint_length(255); + assert_eq!(2, length); + let length = varint_length(16383); + assert_eq!(2, length); + let length = varint_length(16384); + assert_eq!(3, length); + let length = varint_length(65535); + assert_eq!(3, length); + let length = varint_length(209_7151); + assert_eq!(3, length); + let length = varint_length(209_7152); + assert_eq!(4, length); + let length = varint_length(2_6843_5455); + assert_eq!(4, length); + let length = varint_length(2_6843_5456); + assert_eq!(5, length); + // 1 << 35 + let length = varint_length(343_5973_8367); + assert_eq!(5, length); + let length = varint_length(343_5973_8368); + assert_eq!(6, length); + let length = varint_length(4_3980_4651_1103); + assert_eq!(6, length); + let length = varint_length(4_3980_4651_1104); + assert_eq!(7, length); + let length = varint_length(562_9499_5342_1311); + assert_eq!(7, length); + let length = varint_length(562_9499_5342_1312); + assert_eq!(8, length); + let length = varint_length(7_2057_5940_3792_7935); + assert_eq!(8, length); + let length = varint_length(7_2057_5940_3792_7936); + assert_eq!(9, length); + let length = varint_length(922_3372_0368_5477_5807); + assert_eq!(9, length); + let length = varint_length(922_3372_0368_5477_5808); + assert_eq!(10, length); +} + +#[test] +fn test_encode_fixed() { + let mut vec = vec![]; + unsafe { uncheck_encode_fixed32(&mut MutVector(&mut vec), 0, 1234); } + println!("{:?}", vec); + assert_eq!(vec![210, 4, 0, 0], vec); + assert_eq!(4, vec.len()); + + unsafe { uncheck_encode_fixed32(&mut MutVector(&mut vec), 4, 3_0000_0000); } + println!("{:?}", vec); + assert_eq!(8, vec.len()); + assert_eq!(vec![210, 4, 0, 0, 0, 163, 225, 17], vec); + + let mut vec = vec![]; + unsafe { uncheck_encode_fixed64(&mut MutVector(&mut vec), 0, 8_3980_4651_1103); } + println!("{:?}", vec); + assert_eq!(8, vec.len()); + + unsafe { uncheck_encode_fixed64(&mut MutVector(&mut vec), 8, 900_3372_0368_5477_5808); } + println!("{:?}", vec); + assert_eq!(16, vec.len()); + assert_eq!(vec![255, 63, 148, 82, 163, 7, 0, 0, 0, 0, 106, 101, 42, 103, 242, 124], vec); +} + +#[test] +fn test_decode_fixed() { + let mut vec = vec![]; + unsafe { + uncheck_encode_fixed32(&mut MutVector(&mut vec), 0, 1234); + uncheck_encode_fixed32(&mut MutVector(&mut vec), 4, 128); + uncheck_encode_fixed32(&mut MutVector(&mut vec), 8, 255); + uncheck_encode_fixed32(&mut MutVector(&mut vec), 12, 65535); + uncheck_encode_fixed32(&mut MutVector(&mut vec), 16, 10000000); + } + println!("{:?}", vec); + assert_eq!(vec![210, 4, 0, 0, 128, 0, 0, 0, 255, 0, 0, 0, 255, 255, 0, 0, 128, 150, 152, 0], vec); + + let result = unsafe { uncheck_decode_fixed32(&Vector(&vec), 0) }; + println!("{}", result); + assert_eq!(1234, result); + + let result = unsafe { uncheck_decode_fixed32(&Vector(&vec), 4) }; + println!("{}", result); + assert_eq!(128, result); + + let result = unsafe { uncheck_decode_fixed32(&Vector(&vec), 8) }; + println!("{}", result); + assert_eq!(255, result); + + let result = unsafe { uncheck_decode_fixed32(&Vector(&vec), 12) }; + println!("{}", result); + assert_eq!(65535, result); + + let result = unsafe { uncheck_decode_fixed32(&Vector(&vec), 16) }; + println!("{}", result); + assert_eq!(10000000, result); + + let mut vec = vec![]; + unsafe { + uncheck_encode_fixed64(&mut MutVector(&mut vec), 0, 8_3980_4651_1103); + uncheck_encode_fixed64(&mut MutVector(&mut vec), 8, 900_3372_0368_5477_5808); + } + println!("{:?}", vec); + assert_eq!(vec![255, 63, 148, 82, 163, 7, 0, 0, 0, 0, 106, 101, 42, 103, 242, 124], vec); + + let result = unsafe { uncheck_decode_fixed64(&Vector(&vec), 0) }; + println!("{}", result); + assert_eq!(8_3980_4651_1103, result); + + let result = unsafe { uncheck_decode_fixed64(&Vector(&vec), 8) }; + println!("{}", result); + assert_eq!(900_3372_0368_5477_5808, result); +} + +#[test] +fn test_encode_varint() { + let mut vec = vec![]; + let mut offset = 0; + unsafe { offset = uncheck_encode_varint32(&mut MutVector(&mut vec), offset, 2); } + println!("{:?}", vec); + println!("offset: {}", offset); + + unsafe { offset = uncheck_encode_varint32(&mut MutVector(&mut vec), offset, 128); } + println!("{:?}", vec); + println!("offset: {}", offset); + + unsafe { offset = uncheck_encode_varint32(&mut MutVector(&mut vec), offset, 255); } + println!("{:?}", vec); + println!("offset: {}", offset); + + unsafe { offset = uncheck_encode_varint32(&mut MutVector(&mut vec), offset, 65535); } + println!("{:?}", vec); + println!("offset: {}", offset); + + unsafe { offset = uncheck_encode_varint32(&mut MutVector(&mut vec), offset, 10000000); } + println!("{:?}", vec); + println!("offset: {}", offset); + + unsafe { offset = uncheck_encode_varint32(&mut MutVector(&mut vec), offset, 209_7152); } + println!("{:?}", vec); + println!("offset: {}", offset); + + unsafe { offset = uncheck_encode_varint32(&mut MutVector(&mut vec), offset, 2_6843_5456); } + println!("{:?}", vec); + println!("offset: {}", offset); + + assert_eq!(21, offset); + assert_eq!(vec![2, 128, 1, 255, 1, 255, 255, 3, 128, 173, 226, 4, 128, 128, 128, 1, 128, 128, 128, 128, 1], vec); + + let mut vec = vec![]; + let mut offset = 0; + + unsafe { offset = uncheck_encode_varint64(&mut MutVector(&mut vec), offset, 65535) }; + println!("{:?}", vec); + println!("offset: {}", offset); + + unsafe { offset = uncheck_encode_varint64(&mut MutVector(&mut vec), offset, 8_3980_4651_1103) }; + println!("{:?}", vec); + println!("offset: {}", offset); + + unsafe { offset = uncheck_encode_varint64(&mut MutVector(&mut vec), offset, 900_3372_0368_5477_5808) }; + println!("{:?}", vec); + println!("offset: {}", offset); + + assert_eq!(19, offset); + assert_eq!(vec![255, 255, 3, 255, 255, 208, 148, 181, 244, 1, 128, 128, 168, 171, 166, 229, 153, 249, 124], vec); +} + +#[test] +fn test_decode_varint() { + let vec = vec![2, 128, 1, 255, 1, 255, 255, 3, 128, 173, 226, 4, 128, 128, 128, 1, 128, 128, 128, 128, 1]; + println!("{:?}", vec); + let mut offset = 0; + let res = unsafe { uncheck_decode_varint32(&Vector(&vec), offset, vec.len()) }; + offset = res.1; + println!("value: {}", res.0); + println!("offset: {}", offset); + assert_eq!(res.0, 2); + + let res = unsafe { uncheck_decode_varint32(&Vector(&vec), offset, vec.len()) }; + offset = res.1; + println!("value: {}", res.0); + println!("offset: {}", offset); + assert_eq!(res.0, 128); + + let res = unsafe { uncheck_decode_varint32(&Vector(&vec), offset, vec.len()) }; + offset = res.1; + println!("value: {}", res.0); + println!("offset: {}", offset); + assert_eq!(res.0, 255); + + let res = unsafe { uncheck_decode_varint32(&Vector(&vec), offset, vec.len()) }; + offset = res.1; + println!("value: {}", res.0); + println!("offset: {}", offset); + assert_eq!(res.0, 65535); + + let res = unsafe { uncheck_decode_varint32(&Vector(&vec), offset, vec.len()) }; + offset = res.1; + println!("value: {}", res.0); + println!("offset: {}", offset); + assert_eq!(res.0, 10000000); + + let res = unsafe { uncheck_decode_varint32(&Vector(&vec), offset, vec.len()) }; + offset = res.1; + println!("value: {}", res.0); + println!("offset: {}", offset); + assert_eq!(res.0, 209_7152); + + let res = unsafe { uncheck_decode_varint32(&Vector(&vec), offset, vec.len()) }; + offset = res.1; + println!("value: {}", res.0); + println!("offset: {}", offset); + assert_eq!(res.0, 2_6843_5456); + + println!("decode varint64: "); + let vec = vec![255, 255, 3, 255, 255, 208, 148, 181, 244, 1, 128, 128, 168, 171, 166, 229, 153, 249, 124]; + println!("{:?}", vec); + let mut offset = 0; + let res = unsafe { uncheck_decode_varint64(&Vector(&vec), offset, vec.len()) }; + offset = res.1; + println!("value: {}", res.0); + println!("offset: {}", offset); + assert_eq!(65535, res.0); + + let res = unsafe { uncheck_decode_varint64(&Vector(&vec), offset, vec.len()) }; + offset = res.1; + println!("value: {}", res.0); + println!("offset: {}", offset); + assert_eq!(8_3980_4651_1103, res.0); + + let res = unsafe { uncheck_decode_varint64(&Vector(&vec), offset, vec.len()) }; + offset = res.1; + println!("value: {}", res.0); + println!("offset: {}", offset); + assert_eq!(900_3372_0368_5477_5808, res.0); +} + + +#[test] +fn test_write_buf() { + let mut vec = vec![]; + + let buf = [1, 2, 3, 4, 5]; + unsafe { uncheck_write_buf(&mut MutVector(&mut vec), 0, &buf); } + + println!("{:?}", vec); + assert_eq!(vec![1, 2, 3, 4, 5], vec); + + let buf = [1, 2, 3, 4]; + unsafe { uncheck_write_buf(&mut MutVector(&mut vec), 5, &buf); } + + println!("{:?}", vec); + assert_eq!(vec![1, 2, 3, 4, 5, 1, 2, 3, 4], vec); +} + +#[test] +fn test_read_buf() { + let vec = vec![1, 2, 3, 4, 5, 1, 2, 3, 4]; + let buf = unsafe { uncheck_read_buf(&Vector(&vec), 0, 5) }; + println!("{:?}", buf); + assert_eq!(&[1_u8, 2, 3, 4, 5] as &[u8; 5], buf.deref()); + let buf = unsafe { uncheck_read_buf(&Vector(&vec), 5, 4) }; + println!("{:?}", buf); + assert_eq!(&[1_u8, 2, 3, 4] as &[u8; 4], buf.deref()); +} + +#[test] +fn test_mixed_encode_decode() { + // 混合类型编码 解码 varint32 varint64 fixed32 fixed64 write_buf read_buf + let mut vec = vec![]; + let mut offset = 0; + unsafe { uncheck_encode_fixed32(&mut MutVector(&mut vec), offset, 3) }; + offset += 4; + offset = unsafe { uncheck_encode_varint32(&mut MutVector(&mut vec), offset, 655535) }; + unsafe { uncheck_encode_fixed64(&mut MutVector(&mut vec), offset, 7) }; + offset += 8; + offset = unsafe { uncheck_encode_varint64(&mut MutVector(&mut vec), offset, 8_3980_4651_1103) }; + let buf = [1, 2, 3, 4]; + unsafe { uncheck_write_buf(&mut MutVector(&mut vec), offset, &buf) }; + offset += buf.len(); + println!("{:?}", vec); + println!("offset: {}", offset); + + offset = 0; + let value = unsafe { uncheck_decode_fixed32(&Vector(&vec), offset) }; + println!("{}", value); + assert_eq!(3, value); + offset += 4; + let res = unsafe { uncheck_decode_varint32(&Vector(&vec), offset, (&vec).len()) }; + println!("{}", res.0); + assert_eq!(655535, res.0); + offset = res.1; + let value = unsafe { uncheck_decode_fixed64(&Vector(&vec), offset) }; + println!("{}", value); + assert_eq!(7, value); + offset += 8; + let res = unsafe { uncheck_decode_varint64(&Vector(&vec), offset, (&vec).len()) }; + println!("{}", res.0); + assert_eq!(8_3980_4651_1103, res.0); + offset = res.1; + + let buf = unsafe { uncheck_read_buf(&Vector(&vec), offset, 4) }; + println!("{:?}", buf); + assert_eq!(&[1_u8, 2, 3, 4] as &[u8; 4], buf.deref()); + + println!("offset: {}", offset); + assert_eq!(22, offset); +} + +#[test] +fn test_put_fixed() -> Result<()> { + let mut vec = vec![]; + + unsafe { + let mut encoder = Encoder::with_vec(&mut vec); + println!("{:?}", encoder); + encoder.uncheck_put_fixed32(2); + encoder.uncheck_put_fixed32(128); + encoder.uncheck_put_fixed32(255); + encoder.uncheck_put_fixed32(65535); + encoder.uncheck_put_fixed32(10000000); + encoder.uncheck_put_fixed64(655535); + encoder.uncheck_put_fixed64(8_3980_4651_1103); + encoder.uncheck_put_fixed64(900_3372_0368_5477_5808); + println!("{:?}", &encoder); + if let MutVector(data) = encoder.data { + assert_eq!(&mut vec![ + 2, 0, 0, 0, 128, 0, 0, 0, 255, 0, 0, 0, 255, 255, 0, 0, 128, 150, 152, 0, 175, 0, + 10, 0, 0, 0, 0, 0, 255, 63, 148, 82, 163, 7, 0, 0, 0, 0, 106, 101, 42, 103, 242, 124 + ], + data); + } + } + + let mut encoder = Encoder::with_vec(&mut vec); + println!("{:?}", encoder); + encoder.put_fixed32(2)?; + encoder.put_fixed32(128)?; + encoder.put_fixed32(255)?; + encoder.put_fixed32(65535)?; + encoder.put_fixed32(10000000)?; + encoder.put_fixed64(655535)?; + encoder.put_fixed64(8_3980_4651_1103)?; + encoder.put_fixed64(900_3372_0368_5477_5808)?; + println!("{:?}", &encoder); + if let MutVector(data) = encoder.data { + assert_eq!(&mut vec![ + 2, 0, 0, 0, 128, 0, 0, 0, 255, 0, 0, 0, 255, 255, 0, 0, 128, 150, 152, 0, 175, 0, 10, + 0, 0, 0, 0, 0, 255, 63, 148, 82, 163, 7, 0, 0, 0, 0, 106, 101, 42, 103, 242, 124], + data); + } + + let mut buf = [0; 20]; + unsafe { + let mut encoder = Encoder::with_buf(&mut buf); + println!("{:?}", encoder); + encoder.uncheck_put_fixed32(2); + encoder.uncheck_put_fixed64(655535); + encoder.uncheck_put_fixed64(8_3980_4651_1103); + println!("{:?}", &encoder); + if let MutVector(data) = encoder.data { + assert_eq!(&mut vec![0, 0, 0, 2, 0, 0, 0, 0, 0, 10, 0, 175, 0, 0, 7, 163, 82, 148, 63, 255], + data); } } + + + let mut slice = Slice::from_vec(vec![0; 20]); + unsafe { + let mut encoder = Encoder::with_slice(&mut slice); + println!("{:?}", encoder); + encoder.uncheck_put_fixed32(2); + encoder.uncheck_put_fixed64(655535); + encoder.uncheck_put_fixed64(8_3980_4651_1103); + println!("{:?}", &encoder); + if let MutVector(data) = encoder.data { + assert_eq!(&mut vec![0, 0, 0, 2, 0, 0, 0, 0, 0, 10, 0, 175, 0, 0, 7, 163, 82, 148, 63, 255], + data); + } + } + + Ok(()) } -coding_impl!(Coding32,u32,encode_varint32,encode_fixed32); +#[test] +fn test_get_fixed() -> Result<()> { + let mut vec = vec![]; + + unsafe { + let mut encoder = Encoder::with_vec(&mut vec); + println!("{:?}", encoder); + encoder.uncheck_put_fixed32(2); + encoder.uncheck_put_fixed32(128); + encoder.uncheck_put_fixed32(255); + encoder.uncheck_put_fixed32(65535); + encoder.uncheck_put_fixed32(10000000); + encoder.uncheck_put_fixed64(655535); + encoder.uncheck_put_fixed64(8_3980_4651_1103); + encoder.uncheck_put_fixed64(900_3372_0368_5477_5808); + println!("{:?}", &encoder.data); + println!("{:?}", &encoder); + if let MutVector(data) = encoder.data { + assert_eq!(&mut vec![2, 0, 0, 0, 128, 0, 0, 0, 255, 0, 0, 0, 255, 255, 0, 0, 128, 150, 152, 0, 175, 0, 10, 0, 0, 0, 0, 0, 255, 63, 148, 82, 163, 7, 0, 0, 0, 0, 106, 101, 42, 103, 242, 124], + data); + } + } + + let mut decoder = Decoder::with_vec(&mut vec); + + while decoder.can_get() { + let value = unsafe { decoder.uncheck_get_fixed32() }; + println!("{}", value); + } + let mut decoder = Decoder::with_vec(&mut vec); + + println!("can_get: {}", decoder.can_get()); + assert_eq!(true, decoder.can_get()); + + assert_eq!(2, unsafe { decoder.uncheck_get_fixed32() }); + assert_eq!(128, unsafe { decoder.uncheck_get_fixed32() }); + assert_eq!(255, unsafe { decoder.uncheck_get_fixed32() }); + assert_eq!(65535, unsafe { decoder.uncheck_get_fixed32() }); + assert_eq!(10000000, unsafe { decoder.uncheck_get_fixed32() }); + assert_eq!(655535, unsafe { decoder.uncheck_get_fixed64() }); + assert_eq!(8_3980_4651_1103, unsafe { decoder.uncheck_get_fixed64() }); + assert_eq!(900_3372_0368_5477_5808, unsafe { decoder.uncheck_get_fixed64() }); + + println!("can_get: {}", decoder.can_get()); + assert_eq!(false, decoder.can_get()); + + let mut decoder = Decoder::with_vec(&mut vec); + + println!("can_get: {}", decoder.can_get()); + assert_eq!(true, decoder.can_get()); + + assert_eq!(2, decoder.get_fixed32()?); + assert_eq!(128, decoder.get_fixed32()?); + assert_eq!(255, decoder.get_fixed32()?); + assert_eq!(65535, decoder.get_fixed32()?); + assert_eq!(10000000, decoder.get_fixed32()?); + assert_eq!(655535, decoder.get_fixed64()?); + assert_eq!(8_3980_4651_1103, decoder.get_fixed64()?); + assert_eq!(900_3372_0368_5477_5808, decoder.get_fixed64()?); + + println!("{}", decoder.can_get()); + assert_eq!(false, decoder.can_get()); + + Ok(()) +} + +#[test] +fn test_put_varint() -> Result<()> { + let mut vec = vec![]; + unsafe { + let mut encoder = Encoder::with_vec(&mut vec); + encoder.uncheck_put_varint32(2); + encoder.uncheck_put_varint32(128); + encoder.uncheck_put_varint32(255); + encoder.uncheck_put_varint32(65535); + encoder.uncheck_put_varint32(10000000); + encoder.uncheck_put_varint64(655535); + encoder.uncheck_put_varint64(8_3980_4651_1103); + encoder.uncheck_put_varint64(900_3372_0368_5477_5808); + println!("{:?}", vec); + assert_eq!(vec![2, 128, 1, 255, 1, 255, 255, 3, 128, 173, 226, 4, 175, 129, 40, 255, 255, 208, 148, 181, 244, 1, 128, 128, 168, 171, 166, 229, 153, 249, 124], + vec); + } + { + let mut encoder = Encoder::with_vec(&mut vec); + encoder.put_varint32(2)?; + encoder.put_varint32(128)?; + encoder.put_varint32(255)?; + encoder.put_varint32(65535)?; + encoder.put_varint32(10000000)?; + encoder.put_varint64(655535)?; + encoder.put_varint64(8_3980_4651_1103)?; + encoder.put_varint64(900_3372_0368_5477_5808)?; + println!("{:?}", vec); + assert_eq!(vec![2, 128, 1, 255, 1, 255, 255, 3, 128, 173, 226, 4, 175, 129, 40, 255, 255, 208, 148, 181, 244, 1, 128, 128, 168, 171, 166, 229, 153, 249, 124], + vec); + } + Ok(()) +} + +#[test] +fn test_get_varint() -> Result<()> { + let mut vec = vec![]; + unsafe { + let mut encoder = Encoder::with_vec(&mut vec); + encoder.uncheck_put_varint32(2); + encoder.uncheck_put_varint32(128); + encoder.uncheck_put_varint32(255); + encoder.uncheck_put_varint32(65535); + encoder.uncheck_put_varint32(10000000); + encoder.uncheck_put_varint64(655535); + encoder.uncheck_put_varint64(8_3980_4651_1103); + encoder.uncheck_put_varint64(900_3372_0368_5477_5808); + println!("{:?}", vec); + }; + { + let mut decoder = Decoder::with_vec(&mut vec); + assert_eq!(2, decoder.get_varint32()?); + assert_eq!(128, decoder.get_varint32()?); + assert_eq!(255, decoder.get_varint32()?); + assert_eq!(65535, decoder.get_varint32()?); + assert_eq!(10000000, decoder.get_varint32()?); + assert_eq!(655535, decoder.get_varint64()?); + assert_eq!(8_3980_4651_1103, decoder.get_varint64()?); + assert_eq!(900_3372_0368_5477_5808, decoder.get_varint64()?); + }; + Ok(()) +} + +#[test] +fn test_put_buf() -> Result<()> { + let mut vec = vec![]; + let mut encoder = Encoder::with_vec(&mut vec); + let buf = [1, 2, 3]; + unsafe { encoder.uncheck_put_buf(&buf) } + println!("{:?}", buf); + encoder.put_buf(&buf)?; + assert_eq!(&[1_u8, 2, 3, 1, 2, 3], vec.as_slice()); + println!("{:?}", vec); + + Ok(()) +} + +#[test] +fn test_get_buf() -> Result<()> { + let mut vec = vec![]; + { + let mut encoder = Encoder::with_vec(&mut vec); + let buf = [1, 2, 3]; + unsafe { encoder.uncheck_put_buf(&buf) } + println!("{:?}", buf); + assert_eq!(&[1_u8, 2, 3], vec.clone().as_slice()); + } + let mut decoder = Decoder::with_vec(&vec); + let buf = unsafe { decoder.uncheck_get_buf(3) }; + println!("{:?}", buf); + assert_eq!(Slice::from_vec(vec![1, 2, 3]), buf); + assert_eq!(3, decoder.offset); + + Ok(()) +} + +#[test] +fn test_put_length_prefixed_slice() { + let mut vec = vec![]; + { + let mut encoder = Encoder::with_vec(&mut vec); + let slice = Slice::from_vec(vec![1, 2, 3]); + unsafe { encoder.uncheck_put_length_prefixed_slice(&slice); } + assert_eq!(4, encoder.offset) + } + println!("{:?}", vec); + assert_eq!(&vec![3, 1, 2, 3], &vec); +} + +#[test] +fn test_get_length_prefixed_slice() { + let mut vec = vec![]; + { + let mut encoder = Encoder::with_vec(&mut vec); + let slice = Slice::from_vec(vec![1, 2, 3]); + unsafe { encoder.uncheck_put_length_prefixed_slice(&slice); } + } + println!("{:?}", vec); + assert_eq!(vec![3, 1, 2, 3], vec); + + let mut decoder = Decoder::with_vec(&vec); + let slice = unsafe { decoder.uncheck_get_length_prefixed_slice() }; + println!("{:?}", slice); + assert_eq!(&[1_u8, 2, 3], &*slice); + assert_eq!(4, decoder.offset) +} + +#[test] +fn test_mixed_put_get() { + let mut vec = vec![]; + let mut encoder = Encoder::with_vec(&mut vec); + + unsafe { + encoder.uncheck_put_fixed32(3); + encoder.uncheck_put_varint32(65535); + encoder.uncheck_put_fixed64(7); + encoder.uncheck_put_varint64(8_3980_4651_1103); + let buf = [1, 2, 3]; + encoder.uncheck_put_buf(&buf); + let slice = Slice::from_vec(vec![1, 2, 3]); + encoder.uncheck_put_length_prefixed_slice(&slice); + } + + let mut decoder = Decoder::with_vec(&vec); + unsafe { + assert_eq!(3, decoder.uncheck_get_fixed32()); + assert_eq!(65535, decoder.uncheck_get_varint32()); + assert_eq!(7, decoder.uncheck_get_fixed64()); + assert_eq!(8_3980_4651_1103, decoder.uncheck_get_varint64()); + let buf = [1_u8, 2, 3]; + assert_eq!(&buf, &*decoder.uncheck_get_buf(3)); + let slice = Slice::from_vec(vec![1, 2, 3]); + assert_eq!(slice, decoder.uncheck_get_length_prefixed_slice()) + } +} + +#[test] +fn test_offset_len_skip() -> Result<()> { + let mut vec = vec![]; + let mut encoder = Encoder::with_vec(&mut vec); + assert_eq!(0, encoder.offset()); + assert_eq!(0, encoder.len()); + encoder.put_varint32(65535)?; + assert_eq!(3, encoder.offset()); + assert_eq!(3, encoder.len()); + + encoder.put_varint32(65535)?; + assert_eq!(6, encoder.offset()); + assert_eq!(6, encoder.len()); + + encoder.put_varint32(65535)?; + assert_eq!(9, encoder.offset()); + assert_eq!(9, encoder.len()); + + let mut decoder = Decoder::with_vec(&vec); + assert_eq!(0, decoder.offset()); + assert_eq!(9, decoder.limit()); + + let value = decoder.get_varint32()?; + assert_eq!(3, decoder.offset()); + assert_eq!(9, decoder.limit()); + assert_eq!(65535, value); + + decoder.skip(3)?; + + let value = decoder.get_varint32()?; + assert_eq!(9, decoder.offset()); + assert_eq!(9, decoder.limit()); + assert_eq!(65535, value); + + let mut decoder = Decoder::with_vec(&vec); + assert_eq!(0, decoder.offset()); + assert_eq!(9, decoder.limit()); + + let value = decoder.get_varint32()?; + assert_eq!(3, decoder.offset()); + assert_eq!(9, decoder.limit()); + assert_eq!(65535, value); + + unsafe { decoder.uncheck_skip(3); } + + let value = decoder.get_varint32()?; + assert_eq!(9, decoder.offset()); + assert_eq!(9, decoder.limit()); + assert_eq!(65535, value); + + Ok(()) +} + +#[test] +fn test_from_into() { + let mut data = vec![1, 2, 3]; + let encoder = Encoder::with_vec(&mut data); + println!("{:?}", encoder); + + let decoder = encoder.create_decoder(); + println!("{:?}", decoder); + assert_eq!(0, decoder.offset); + let empty = &vec![]; + assert_eq!(vec![1, 2, 3], *if let Vector(data) = decoder.data { data } else { empty }); + assert_eq!(3, decoder.limit); +} + +#[test] +fn test_type_capacity() { + let type_capacity = type_capacity!(u32); + println!("u32: {}", type_capacity); + assert_eq!(4, type_capacity); + let type_capacity = type_capacity!(u64); + println!("u64: {}", type_capacity); + assert_eq!(8, type_capacity); +} -coding_impl!(Coding64,u64,encode_varint64,encode_fixed64); \ No newline at end of file +#[test] +fn test_swap_bytes() { + let value = 0x04030201_u32; + let new_value = swap_bytes!(value); + println!("value: {:?}, new_value: {:?}", value, new_value); + assert_eq!(value, new_value); + // 小端存储bytes + let buf = [0x01, 0x02, 0x03, 0x04]; + let decode = unsafe { uncheck_decode_fixed32(&Buffer(&buf), 0) }; + // 小端存储的0x01,0x02,0x03,0x04解出来的数据要等于0x04030201_u32 + println!("value: {:?}, decode: {:?}", value, decode); + assert_eq!(value, decode); +} \ No newline at end of file diff --git a/src/util/coding_test.rs b/src/util/coding_test.rs deleted file mode 100644 index 1531ad0f440208695cb6adbc6f38c3312315a585..0000000000000000000000000000000000000000 --- a/src/util/coding_test.rs +++ /dev/null @@ -1,174 +0,0 @@ -mod test { - use crate::traits::coding_trait::{Coding32, Coding64, CodingTrait}; - use crate::util::coding::{Coding}; - - #[test] - fn test_put_fixed32() { - let mut dst = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; - let value = 65535; - Coding::put_fixed32(&mut dst, 2, value); - assert_eq!([0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0] as [u8; 12], dst); - } - - #[test] - fn test_put_fixed64() { - let mut dst = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; - let value = 65535; - Coding::put_fixed64(&mut dst, 2, value); - assert_eq!([0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0] as [u8; 12], dst); - } - - #[test] - fn test_put_varint32() { - let mut value = 65535; - let mut dst = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; - let offset = Coding::put_varint32(&mut dst, 2, value); - println!("offset:{:?}", offset); - assert_eq!(offset, 4); - println!("dst:{:?}", dst); - assert_eq!([0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0] as [u8; 12], dst); - } - - #[test] - fn test_put_varint64() { - let mut value = 65535; - let mut dst = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; - let offset = Coding::put_varint64(&mut dst, 2, value); - println!("offset:{:?}", offset); - assert_eq!(offset, 4); - println!("dst:{:?}", dst); - assert_eq!([0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0] as [u8; 12], dst); - } - - #[test] - fn test_encode_varint32() { - let mut buf: [u8; 4] = [0, 0, 0, 0]; - let mut value: u32 = 65534; - let offset = Coding::encode_varint32(value, &mut buf, 0); - println!("offset:{:?}", offset); - assert_eq!(offset, 2); - println!("buf:{:?}", buf); - assert_eq!(buf, [254, 255, 3, 0]); - } - - #[test] - fn test_encode_varint64() { - let mut buf: [u8; 4] = [0, 0, 0, 0]; - let mut value: u64 = 65535; - let offset = Coding::encode_varint64(value, &mut buf, 0); - println!("offset:{:?}", offset); - assert_eq!(offset, 2); - println!("buf:{:?}", buf); - assert_eq!(buf, [255, 255, 3, 0]); - } - - #[test] - fn test_encode_fixed32() { - let mut buf: [u8; 4] = [0, 0, 0, 0]; - let mut value: u32 = 65534; - let offset = Coding::encode_fixed32(value, &mut buf, 0); - assert_eq!(offset, 4); - println!("offset:{:?}", offset); - assert_eq!(buf, [254, 255, 0, 0]); - println!("buf:{:?}", buf); - } - - #[test] - fn test_encode_fixed64() { - let mut buf: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; - let mut value: u64 = 65535; - let offset = Coding::encode_fixed64(value, &mut buf, 0); - assert_eq!(offset, 8); - println!("offset:{:?}", offset); - assert_eq!(buf, [255, 255, 0, 0, 0, 0, 0, 0]); - println!("buf:{:?}", buf); - } - - #[test] - fn test_varint_u32() { - let mut buf: [u8; 4] = [0, 0, 0, 0]; - let value: u32 = 65534; - println!("value[binary]:{:b}", value); - let offset = value.varint(&mut buf, 0); - println!("offset:{:?}", offset); - println!("buf:{:?}", buf); - assert_eq!(buf, [255, 255, 3, 0]); - } - - #[test] - fn test_varint_u64() { - let mut buf: [u8; 4] = [0, 0, 0, 0]; - let value: u64 = 65534; - println!("value[binary]:{:b}", value); - let offset = value.varint(&mut buf, 0); - println!("offset:{:?}", offset); - println!("buf:{:?}", buf); - } - - #[test] - fn test_fixed_u32() { - let mut buf: [u8; 4] = [0, 0, 0, 0]; - let value: u32 = 123; - println!("value[binary]:{:b}", value); - let offset = value.fixedint(&mut buf, 0); - println!("offset:{:?}", offset); - println!("buf:{:?}", buf); - } - - #[test] - fn test_fixed_u64() { - let mut buf: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; - let value: u64 = 123; - println!("value[binary]:{:b}", value); - let offset = value.fixedint(&mut buf, 0); - println!("offset:{:?}", offset); - println!("buf:{:?}", buf); - } - - #[test] - fn test_varint_length() { - let len = Coding::varint_length(65535 as u64 as usize); - println!("len: {:?}", len); - assert_eq!(len, 3); - } - - #[test] - fn test_put_length_prefixed_slice() { - // let mut buf: [u8; 4] = [0, 0, 0, 0]; - // Coding::encode_fixed32(&data, &mut buf, 0); - // let mut string = String::from("len:"); - // let slice = Slice::from("data"); - // let mut slice = Slice::from_buf(data.to_le_bytes().as_mut_slice()); - // Coding::put_length_prefixed_slice(&mut string, &mut slice); - // println!("{:?}", string) - } - - #[test] - fn test_get_length_prefixed_slice() { - // let data = 12_u32; - // Coding::put_fixed32(, data); - // let mut string = String::from("len:"); - // let mut slice = Slice::from_buf(data.to_le_bytes().as_mut_slice()); - // Coding::put_length_prefixed_slice(&mut string, &mut slice); - } - - #[test] - fn test_decode_fixed32() { - let mut value = 65535_u32; - let mut buf: [u8; 4] = [0, 0, 0, 0]; - Coding::encode_fixed32(value, &mut buf, 0); - let decode = Coding::decode_fixed32(&mut buf); - println!("value:{:?}", value); - assert_eq!(decode, value); - } - - #[test] - fn test_decode_fixed64() { - let mut value = 65535_u64; - let mut buf: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; - Coding::encode_fixed64(value, &mut buf, 0); - let decode = Coding::decode_fixed64(&mut buf); - println!("value:{:?}", value); - assert_eq!(decode, value); - } -} diff --git a/src/util/crc.rs b/src/util/crc.rs index d15bb3aa856d980cc8a4c033bace50a46b0cfbf9..c2f0443aa4bcb40b5403a499f6c8eb914567234d 100644 --- a/src/util/crc.rs +++ b/src/util/crc.rs @@ -1,7 +1,6 @@ use std::mem::size_of; use std::slice; -use crate::traits::coding_trait::CodingTrait; -use crate::util::coding::Coding; +use crate::util::coding::{Decoder}; use crate::util::slice::Slice; static K_MASK_DELTA: u32 = 0xa282ead8; @@ -240,7 +239,6 @@ const K_STRIDE_EXTENSION_TABLE3: [u32; 256] = [ /// 可以被计算 crc 值的特质 /// 默认实现了 &[T], Vec[T], Slice, &str, String pub trait AsCrc { - #[inline] fn as_crc(&self) -> u32 { self.as_crc_extend(0) @@ -281,7 +279,7 @@ impl AsCrc for Slice { } } -impl AsCrc for Vec { +impl AsCrc for Vec { #[inline] fn as_crc_extend(&self, crc: u32) -> u32 { self.as_slice().as_crc_extend(crc) @@ -324,8 +322,8 @@ macro_rules! step1 { /// Process one of the 4 strides of 4-byte data. macro_rules! step4 { - ($name: ident, $data: tt, $s: tt, $i: tt) => { - $name = Coding::decode_fixed32(&$data[$s+$i*4..]) ^ + ($name: ident, $data: tt, $decoder: ident, $s: tt, $i: tt) => { + $name = unsafe{ $decoder.uncheck_get_fixed32() } ^ K_STRIDE_EXTENSION_TABLE3[$name as u8 as usize] ^ K_STRIDE_EXTENSION_TABLE2[($name >> 8) as u8 as usize] ^ K_STRIDE_EXTENSION_TABLE1[($name >> 16) as u8 as usize] ^ @@ -335,11 +333,11 @@ macro_rules! step4 { /// Process a 16-byte swath of 4 strides, each of which has 4 bytes of data. macro_rules! step16 { - ($c0: tt, $c1: tt, $c2: tt, $c3: tt, $data: tt, $s: tt) => { - step4!($c0, $data, $s, 0); - step4!($c1, $data, $s, 1); - step4!($c2, $data, $s, 2); - step4!($c3, $data, $s, 3); + ($c0: tt, $c1: tt, $c2: tt, $c3: tt, $data: tt, $decoder: ident, $s: tt) => { + step4!($c0, $data, $decoder, $s, 0); + step4!($c1, $data, $decoder, $s, 1); + step4!($c2, $data, $decoder, $s, 2); + step4!($c3, $data, $decoder, $s, 3); $s += 16; } } @@ -386,18 +384,19 @@ impl CRC { } if n - s >= 16 { - let mut crc0 = Coding::decode_fixed32(&data[s..]) ^ l; - let mut crc1 = Coding::decode_fixed32(&data[(s + 4)..]); - let mut crc2 = Coding::decode_fixed32(&data[(s + 8)..]); - let mut crc3 = Coding::decode_fixed32(&data[(s + 12)..]); + let mut decoder = Decoder::with_buf(data); + let mut crc0 = unsafe { decoder.uncheck_get_fixed32() } ^ l; + let mut crc1 = unsafe { decoder.uncheck_get_fixed32() }; + let mut crc2 = unsafe { decoder.uncheck_get_fixed32() }; + let mut crc3 = unsafe { decoder.uncheck_get_fixed32() }; s += 16; // println!("c0: {:x}, c1: {:x}, c2: {:x}, c3: {:x}, s: {}", crc0, crc1, crc2, crc3, s); while (n - s) >= 16 { - step16!(crc0, crc1, crc2, crc3, data, s); + step16!(crc0, crc1, crc2, crc3, data, decoder, s); // println!("step16, c0: {:x}, c1: {:x}, c2: {:x}, c3: {:x}, s: {}", crc0, crc1, crc2, crc3, s); } while (n - s) >= 4 { - step4!(crc0, data, s, 0); + step4!(crc0, data, decoder, s, 0); // swap variables (crc1, crc2, crc3) = (crc0, crc1, crc2); s += 4; diff --git a/src/util/debug.rs b/src/util/debug.rs index 464919b011763959d8fec31668f7ebd04115d0a6..5e6926e6f59c1af2a508c5b492b1846df478e8b9 100644 --- a/src/util/debug.rs +++ b/src/util/debug.rs @@ -10,7 +10,7 @@ macro_rules! debug { }; ($($arg:tt)*) => {{ use std::io::Write; - std::io::stdout().write(format!($($arg)*).as_bytes()); + std::io::stdout().write(format!($($arg)*).as_bytes()).unwrap(); debug!(); }}; } diff --git a/src/util/error.rs b/src/util/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..f3d6be25778da0370549b862c5d791b72947b844 --- /dev/null +++ b/src/util/error.rs @@ -0,0 +1,272 @@ +// use std::backtrace::{Backtrace, BacktraceStatus}; +// use std::fmt::{Debug, Display, Formatter}; +// use std::io; +// use std::sync::Arc; +// use thiserror::Error; +// +// /// ErrorCodeBacktrace +// #[derive(Clone)] +// pub enum ErrorCodeBacktrace { +// Serialized(Arc), +// Origin(Arc), +// } +// +// impl ToString for ErrorCodeBacktrace { +// fn to_string(&self) -> String { +// match self { +// ErrorCodeBacktrace::Serialized(backtrace) => Arc::as_ref(backtrace).clone(), +// ErrorCodeBacktrace::Origin(backtrace) => { +// format!("{:?}", backtrace) +// } +// } +// } +// } +// +// impl From<&str> for ErrorCodeBacktrace { +// fn from(s: &str) -> Self { +// Self::Serialized(Arc::new(s.to_string())) +// } +// } +// +// impl From for ErrorCodeBacktrace { +// fn from(s: String) -> Self { +// Self::Serialized(Arc::new(s)) +// } +// } +// +// impl From> for ErrorCodeBacktrace { +// fn from(s: Arc) -> Self { +// Self::Serialized(s) +// } +// } +// +// impl From for ErrorCodeBacktrace { +// fn from(bt: Backtrace) -> Self { +// Self::Origin(Arc::new(bt)) +// } +// } +// +// impl From<&Backtrace> for ErrorCodeBacktrace { +// fn from(bt: &Backtrace) -> Self { +// Self::Serialized(Arc::new(bt.to_string())) +// } +// } +// +// impl From> for ErrorCodeBacktrace { +// fn from(bt: Arc) -> Self { +// Self::Origin(bt) +// } +// } +// +// +// /// ErrorCodeBacktrace +// /// Provides the `map_err_to_code` method for `Result`. +// /// +// /// ``` +// /// use crate::util::error::ErrorCode; +// /// use crate::util::error::ToErrorCode; +// /// +// /// let x: std::result::Result<(), std::fmt::Error> = Err(std::fmt::Error {}); +// /// let y: common_exception::Result<()> = x.map_err_to_code(ErrorCode::UnknownException, || 123); +// /// +// /// assert_eq!( +// /// "Code: 1067, Text = 123, cause: an error occurred when formatting an argument.", +// /// y.unwrap_err().to_string() +// /// ); +// /// ``` +// pub trait ToErrorCode +// where E: Display + Send + Sync + 'static +// { +// /// Wrap the error value with ErrorCode. It is lazily evaluated: +// /// only when an error does occur. +// /// +// /// `err_code_fn` is one of the ErrorCode builder function such as `ErrorCode::Ok`. +// /// `context_fn` builds display_text for the ErrorCode. +// fn map_err_to_code(self, err_code_fn: ErrFn, context_fn: CtxFn) -> Result +// where +// ErrFn: FnOnce(String) -> ErrorCode, +// D: Display, +// CtxFn: FnOnce() -> D; +// } +// +// impl ToErrorCode for std::result::Result +// where E: Display + Send + Sync + 'static +// { +// fn map_err_to_code(self, make_exception: ErrFn, context_fn: CtxFn) -> Result +// where +// ErrFn: FnOnce(String) -> ErrorCode, +// D: Display, +// CtxFn: FnOnce() -> D, +// { +// self.map_err(|error| { +// let err_text = format!("{}, cause: {}", context_fn(), error); +// make_exception(err_text) +// }) +// } +// } +// +// +// +// +// +// #[derive(Error)] +// pub struct ErrorCode { +// code: u16, +// display_text: String, +// cause: Option>, +// backtrace: Option, +// } +// +// pub type Result = std::result::Result; +// +// impl ErrorCode { +// pub fn code(&self) -> u16 { +// self.code +// } +// +// pub fn message(&self) -> String { +// self.cause +// .as_ref() +// .map(|cause| format!("{}\n{:?}", self.display_text, cause)) +// .unwrap_or_else(|| self.display_text.clone()) +// } +// +// #[must_use] +// pub fn add_message(self, msg: impl AsRef) -> Self { +// Self { +// display_text: format!("{}\n{}", msg.as_ref(), self.display_text), +// ..self +// } +// } +// +// #[must_use] +// pub fn add_message_back(self, msg: impl AsRef) -> Self { +// Self { +// display_text: format!("{}{}", self.display_text, msg.as_ref()), +// ..self +// } +// } +// +// /// Set backtrace info for this error. +// /// +// /// Useful when trying to keep original backtrace +// pub fn set_backtrace(mut self, bt: Option>) -> Self { +// if let Some(b) = bt { +// self.backtrace = Some(b.into()); +// } +// self +// } +// +// pub fn backtrace(&self) -> Option { +// self.backtrace.clone() +// } +// +// pub fn backtrace_str(&self) -> String { +// self.backtrace +// .as_ref() +// .map_or("".to_string(), |x| x.to_string()) +// } +// } +// +// impl ErrorCode { +// /// All std error will be converted to InternalError +// pub fn from_std_error(error: T) -> Self { +// ErrorCode { +// code: 1001, +// display_text: error.to_string(), +// cause: None, +// backtrace: Some(ErrorCodeBacktrace::Origin(Arc::new(Backtrace::capture()))), +// } +// } +// +// pub fn from_string(error: String) -> Self { +// ErrorCode { +// code: 1001, +// display_text: error, +// cause: None, +// backtrace: Some(ErrorCodeBacktrace::Origin(Arc::new(Backtrace::capture()))), +// } +// } +// +// pub fn from_string_no_backtrace(error: String) -> Self { +// ErrorCode { +// code: 1001, +// display_text: error, +// cause: None, +// backtrace: None, +// } +// } +// +// pub fn create( +// code: u16, +// display_text: String, +// cause: Option>, +// backtrace: Option, +// ) -> ErrorCode { +// ErrorCode { +// code, +// display_text, +// cause, +// backtrace, +// } +// } +// } +// +// impl Debug for ErrorCode { +// fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { +// write!(f, "Code: {}, Text = {}.", self.code(), self.message(),)?; +// +// match self.backtrace.as_ref() { +// None => Ok(()), // no backtrace +// Some(backtrace) => { +// // TODO: Custom stack frame format for print +// match backtrace { +// ErrorCodeBacktrace::Origin(backtrace) => { +// if backtrace.status() == BacktraceStatus::Disabled { +// write!( +// f, +// "\n\n " +// ) +// } else { +// write!(f, "\n\n{}", backtrace) +// } +// } +// ErrorCodeBacktrace::Serialized(backtrace) => write!(f, "\n\n{}", backtrace), +// } +// } +// } +// } +// } +// +// impl Clone for ErrorCode { +// fn clone(&self) -> Self { +// ErrorCode::create(self.code(), self.message(), None, +// self.backtrace()) +// } +// } +// +// impl Display for ErrorCode { +// fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { +// write!(f, "Code: {}, Text = {}.", self.code(), self.message(),) +// } +// } +// +// #[derive(Error, Debug)] +// pub enum StatusError { +// #[error("KOk")] +// KOk(), +// +// #[error("the key `{0}` is not found")] +// KNotFound(String), +// // KCorruption, +// // KNotSupported, +// // KInvalidArgument, +// #[error("data io Error")] +// KIOError(#[from] io::Error), +// // KBadRecord, +// // KRepeatedRecord, +// } +// +// impl StatusError for ErrorCode{ +// +// } \ No newline at end of file diff --git a/src/util/error_code.rs b/src/util/error_code.rs new file mode 100644 index 0000000000000000000000000000000000000000..9b841669f4f84305c0db3cc90456369f1d6352a8 --- /dev/null +++ b/src/util/error_code.rs @@ -0,0 +1,41 @@ +// +// #![allow(non_snake_case)] +// +// use std::backtrace::Backtrace; +// use std::sync::Arc; +// +// use crate::util::error::ErrorCodeBacktrace; +// use crate::util::error::ErrorCode; +// +// macro_rules! build_exceptions { +// ($($(#[$meta:meta])* $body:ident($code:expr)),*$(,)*) => { +// impl ErrorCode { +// $( +// +// paste::item! { +// $( +// #[$meta] +// )* +// pub const [< $body:snake:upper >]: u16 = $code; +// } +// $( +// #[$meta] +// )* +// pub fn $body(display_text: impl Into) -> ErrorCode { +// let bt = Some(ErrorCodeBacktrace::Origin(Arc::new(Backtrace::capture()))); +// ErrorCode::create( +// $code, +// display_text.into(), +// None, +// bt, +// ) +// } +// )* +// } +// } +// } +// k +// build_exceptions! { +// Ok(0), +// Internal(1001), +// } diff --git a/src/util/error_test.rs b/src/util/error_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..7dbc8e084585bc9efd86cbc155cbd1c926fbbb4f --- /dev/null +++ b/src/util/error_test.rs @@ -0,0 +1,169 @@ +// +// mod test { +// use std::borrow::Borrow; +// use crate::debug; +// use crate::util::r#const::COLON_WHITE_SPACE; +// use crate::util::slice::Slice; +// use crate::util::status::{LevelError, Status}; +// use crate::util::error::{ErrorCode, StatusError}; +// +// #[test] +// fn test_wraper() { +// ErrorCode::Ok; +// let err: StatusError = StatusError::KNotFound("a".to_string()); +// let ok_err: StatusError = StatusError::KOk(); +// +// debug!("{:?}", err.borrow()); +// // assert_eq!("KNotFound("a")", err.borrow()); +// +// // let status = Status::wrapper(LevelError::KIOError, String::from(msg1).into()); +// // assert!(&status.is_io_error()); +// // let slice: Slice = status.into_msg(); +// // assert_eq!("abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc", +// // String::from(slice)); +// // +// // let ss = Status::wrapper(LevelError::KOk, String::from(msg1).into()); +// // assert!(&ss.is_ok()); +// // assert_eq!("OK", &ss.to_string()); +// } +// +// // #[test] +// // fn test_wrappers() { +// // let msg1 = "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"; +// // let msg2 = "456456456456456456456456456456456456456456456456"; +// // +// // let status = Status::wrappers(LevelError::KIOError, String::from(msg1).into(), String::from(msg2).into()); +// // let slice: Slice = status.into_msg(); +// // assert_eq!("abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc: 456456456456456456456456456456456456456456456456", +// // String::from(slice)); +// // +// // let err: Status = LevelError::invalid_argument(String::from(msg1).into(), +// // String::from(msg2).into()); +// // assert!(&err.is_invalid_argument()); +// // +// // let err: Status = LevelError::corruption(String::from(msg1).into(), +// // String::from(msg2).into()); +// // assert!(&err.is_corruption()); +// // +// // let err1: Status = LevelError::corruption_string("AAaaa", "bbhugy"); +// // assert!(&err1.is_corruption()); +// // +// // let err: Status = LevelError::not_found(String::from(msg1).into(), +// // String::from(msg2).into()); +// // assert!(&err.is_not_found()); +// // +// // let err: Status = LevelError::not_supported(String::from(msg1).into(), +// // String::from(msg2).into()); +// // assert!(&err.is_not_supported_error()); +// // +// // let err: LevelError = LevelError::KOk; +// // assert!(&err.is_ok()); +// // +// // let err: LevelError = LevelError::default(); +// // assert!(&err.is_ok()); +// // } +// // +// // #[test] +// // fn test_is_default() { +// // let err: Status = LevelError::ok(); +// // assert!(err.is_ok()); +// // +// // let err: Status = LevelError::io_error(String::from("a").into(), +// // String::from("b").into()); +// // assert!(!err.is_ok()); +// // +// // let status: Status = LevelError::not_found(String::from("a").into(), +// // String::from("b").into()); +// // assert!(status.is_not_found()); +// // assert!(status.get_error().is_not_found()); +// // } +// // +// // #[test] +// // fn test_status_to_string() { +// // // ok +// // let status: Status = LevelError::ok(); +// // assert_eq!("OK", status.to_string()); +// // +// // let msg1 = "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc\ +// // abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc\ +// // abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc\ +// // abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc\ +// // abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"; +// // let msglet error: Status = LevelError::invalid_argument(String::from(msg1).into(), +// // String::from(msg2).into()); +// // +// // let binding = error.to_string(); +// // let error_msg = binding.as_str(); +// // println!("{}", error_msg); +// // +// // let expect_string: String = format!("Invalid argument: {}{}{}", String::from(msg1), COLON_WHITE_SPACE, +// // String::from(msg2)); +// // assert_eq!(expect_string, error_msg); +// // } +// // +// // #[test] +// // fn test_level_error_to_string() { +// // // ok +// // let status: Status = LevelError::ok(); +// // assert_eq!("OK", status.to_string()); +// // +// // // err invalid_argument +// // let msg1 = "bcabcabcabcabcabcbc"; +// // let msg2 = "56"; +// // let error: Status = LevelError::invalid_argument(String::from(msg1).into(), +// // String::from(msg2).into()); +// // +// // let le_err: LevelError = error.get_error(); +// // println!("{}", &le_err); +// // +// // // Display +// // assert_eq!(String::from("Invalid argument: "), le_err.to_string()); +// // } +// // +// // #[test] +// // fn test_level_error_try_from() -> Result<(), String> { +// // let rs = LevelError::try_from(1)?; +// // assert!(&rs.is_not_found()); +// // assert_eq!(rs.get_value(), 1); +// // let rs: Result = 1.try_into(); +// // assert!(rs.ok().unwrap().is_not_found()); +// // +// // let rs = LevelError::try_from(0)?; +// // assert!(&rs.is_ok()); +// // assert_eq!(rs.get_value(), 0); +// // let rs: Result = 0.try_into(); +// // assert!(rs.ok().unwrap().is_ok()); +// // +// // let rs = LevelError::try_from(2)?; +// // assert!(&rs.is_corruption()); +// // assert_eq!(rs.get_value(), 2); +// // let rs: LevelError = 2.try_into()?; +// // assert!(rs.is_corruption()); +// // +// // let rs: LevelError = LevelError::try_from(3)?; +// // assert!(&rs.is_not_supported_error()); +// // assert_eq!(rs.get_value(), 3); +// // let rs: LevelError = 3.try_into()?; +// // assert!(rs.is_not_supported_error()); +// // +// // let rs = LevelError::try_from(4)?; +// // assert!(&rs.is_invalid_argument()); +// // assert_eq!(rs.get_value(), 4); +// // +// // let rs = LevelError::try_from(5)?; +// // assert!(&rs.is_io_error()); +// // assert_eq!(rs.get_value(), 5); +// // +// // let rs = LevelError::try_from(66); +// // assert_eq!("Unknown code: 66", rs.err().unwrap()); +// // +// // Ok(()) +// // } +// +// } diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs index 726e5411b9fb034396b426a875163bd35a6e5741..e66ddc79b874347fd43ca6897e4ecf40fda7ae4e 100644 --- a/src/util/filter_policy.rs +++ b/src/util/filter_policy.rs @@ -1,7 +1,5 @@ -use std::ops::{BitOr, Mul, Shl}; -use crate::traits::filter_policy_trait::{FilterPolicy}; +use crate::util::filter_policy_bloom::BloomFilterPolicy; use crate::util::hash::{Hash, ToHash}; -use crate::util::r#const::HASH_DEFAULT_SEED; use crate::util::slice::Slice; pub trait FromPolicy { @@ -29,168 +27,4 @@ impl AsBloomHash for Slice { fn bloom_hash(&self) -> u32 { BloomFilterPolicy::bloom_hash(self) } -} - -// ######################### BloomFilterPolicy -pub struct BloomFilterPolicy { - bits_per_key: usize, - k: usize -} - -impl BloomFilterPolicy { - pub fn new(bits_per_key: usize) -> Self { - // We intentionally round down to reduce probing cost a little bit - // 0.69 =~ ln(2) - let factor: f64 = 0.69; - let mut k_k: usize = factor.mul(bits_per_key as f64).round() as usize; - - if k_k < 1 { - k_k = 1; - } - if k_k > 30{ - k_k = 30; - } - - Self { - bits_per_key, - k : k_k - } - } -} - -impl<'a> BloomFilterPolicy { - pub fn bloom_hash(key: &Slice) -> u32 { - key.to_hash_with_seed(HASH_DEFAULT_SEED) - } -} - -/// get struct BloomFilterPolicy 属性 -impl FromPolicy for BloomFilterPolicy { - fn from_bits_per_key(&self) -> usize { - self.bits_per_key - } - - fn from_k(&self) -> usize { - self.k - } -} - -// dyn FilterPolicy + FromPolicy -impl FilterPolicy for BloomFilterPolicy { - - fn name(&self) -> String { - String::from("leveldb.BuiltinBloomFilter") - } - - fn create_filter(&self, keys: Vec<&Slice>) -> Slice { - self.create_filter_with_len(keys.len(), keys) - } - - fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice { - let n: usize = len; - - let mut bits: usize = n * self.bits_per_key; - - // For small n, we can see a very high false positive rate. - // Fix it by enforcing a minimum bloom filter length. - if bits < 64 { - bits = 64; - } - - let bytes: usize = (bits + 7) / 8; - bits = bytes * 8; - - let mut dst_chars: Vec = vec![0; bytes + 1]; - dst_chars[bytes] = self.k as u8; - - for i in 0..n { - let slice = keys[i]; - - let mut h : u32 = slice.bloom_hash(); - let delta : u32 = (h >> 17) | (h << 15); - - for j in 0..self.k { - let bitpos:usize = ((h as usize) % bits); - - // a |= b --> 按位或, 后赋值给a - let position: usize = bitpos / 8; - let mod_val: usize = bitpos % 8; - let val = (1 as u8).wrapping_shl(mod_val as u32); - - dst_chars[position] |= val; - - h = h.wrapping_add(delta); - } - } - - // Vec 转 Slice - Slice::from_buf(&dst_chars) - } - - fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { - let filter_size: usize = bloom_filter.size(); - if filter_size < 2 { - return false; - } - - let bloom_filter_array:Vec = bloom_filter.to_vec(); - let bits: usize = (filter_size - 1) * 8; - - // Use the encoded k so that we can read filters generated by bloom filters created using different parameters. - let k: u8 = bloom_filter_array[filter_size - 1]; - if k > 30 { - // Reserved for potentially new encodings for short bloom filters. Consider it a match. - return true; - } - - let mut h : u32 = key.bloom_hash(); - // Rotate right 17 bits - let delta = (h >> 17) | (h << 15); - - for j in 0..k { - let bitpos:usize = ((h as usize) % bits); - if (bloom_filter_array[bitpos/8] & (1 << (bitpos % 8))) == 0 { - return false; - } - - h = h.wrapping_add(delta); - } - - return true; - } -} - -// ######################### InternalFilterPolicy -pub struct InternalFilterPolicy { - user_policy_: dyn FilterPolicy -} - -impl InternalFilterPolicy { - fn new(policy: Box) -> Box { - // InternalFilterPolicy{ user_policy_: policy } - todo!() - } -} - -impl FilterPolicy for InternalFilterPolicy { - fn name(&self) -> String { - todo!() - } - - fn create_filter(&self, keys: Vec<&Slice>) -> Slice { - self.create_filter_with_len(keys.len(), keys) - } - - fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice { - // 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 - // 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, - // 并把根据这些key创建的filter追加到 dst中。 - // - todo!() - } - - fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { - todo!() - } - } \ No newline at end of file diff --git a/src/util/filter_policy_bloom.rs b/src/util/filter_policy_bloom.rs new file mode 100644 index 0000000000000000000000000000000000000000..eba1d17a3859a31393047b7e130f52e51b5ca397 --- /dev/null +++ b/src/util/filter_policy_bloom.rs @@ -0,0 +1,213 @@ +use std::ops::Mul; +use crate::traits::filter_policy_trait::FilterPolicy; +use crate::util::filter_policy::{AsBloomHash, FromPolicy}; +use crate::util::hash::ToHash; +use crate::util::r#const::HASH_DEFAULT_SEED; +use crate::util::slice::Slice; + +// ######################### BloomFilterPolicy +pub struct BloomFilterPolicy { + // 每个key需要多少bit来存储表示 + bits_per_key: usize, + + // k为布隆过滤器重hash function数(hash个数) + k: usize +} + +impl BloomFilterPolicy { + /// + /// + /// Return a new filter policy that uses a bloom filter with approximately the specified number of bits per key. + /// A good value for bits_per_key is 10, which yields a filter with ~ 1% false positive rate. + /// + /// # Arguments + /// + /// * `bits_per_key`: m位的bit数组 / n个整数set 的值 + /// + /// returns: BloomFilterPolicy + /// + /// # Examples + /// + /// ``` + /// + /// ``` + pub fn new() -> Self { + BloomFilterPolicy::new_with_bits_per_key(10) + } + + pub fn new_with_bits_per_key(bits_per_key: usize) -> Self { + // We intentionally round down to reduce probing cost a little bit + // 最优的 k_ 是 ln2 * (m/n) -> factor * bits_per_key + + // factor = 0.69 =~ ln(2) + let factor: f64 = 0.69; + let mut k_: usize = factor.mul(bits_per_key as f64).round() as usize; + + // 计算哈希函数个数,控制在 1~30个范围。 + if k_ < 1 { + k_ = 1; + } + if k_ > 30{ + k_ = 30; + } + + Self { + bits_per_key, + k : k_ + } + } +} + +impl<'a> BloomFilterPolicy { + pub fn bloom_hash(key: &Slice) -> u32 { + key.to_hash_with_seed(HASH_DEFAULT_SEED) + } +} + +/// get struct BloomFilterPolicy 属性 +impl FromPolicy for BloomFilterPolicy { + fn from_bits_per_key(&self) -> usize { + self.bits_per_key + } + + fn from_k(&self) -> usize { + self.k + } +} + +impl FilterPolicy for BloomFilterPolicy { + + fn name(&self) -> String { + String::from("leveldb.BuiltinBloomFilter") + } + + fn create_filter(&self, keys: Vec<&Slice>) -> Slice { + let len: usize = keys.len(); + + self.create_filter_with_len(len, keys) + } + + fn create_filter_with_len(&self, capacity: usize, keys: Vec<&Slice>) -> Slice { + let n: usize = capacity; + + // Compute bloom filter size (in both bits and bytes) + // 计算出中的需要的bits个数, n * bits_per_key, 也就是说,对于每一个key需要这么多bit + // 因为bits_per_key_表示 m/n,所以bits = bits_per_key_ * n = m(m 的意思是: m位的bit数组) + let mut bits: usize = n * self.bits_per_key; + + // For small n, we can see a very high false positive rate. Fix it by enforcing a minimum bloom filter length. + // bits太小的话会导致很高的查询错误率, 这里强制bits个数不能小于64 + if bits < 64 { + bits = 64; + } + + //向上按8bit,一个Byte对齐 + let bytes: usize = (bits + 7) / 8; + // 根据 bytes 算出bits数 + bits = bytes * 8; + + // 扩展下要存储BloomFilter的内存空间, 并在尾部一个Byte存哈希函数的个数。 + let mut dst_chars: Vec = vec![0; bytes + 1]; // 相当于是 append 了bytes个0 + // 在filter的最后压入哈希函数的个数。 在最后一位, 记录k 值。 这个k是位于bytes之后。 + dst_chars[bytes] = self.k as u8; + + // 开始依次存储每个key值。 + // 对于每个key采用double hash的方式生成k_个bitpos,然后在 dst_chars 的相应位置设置1。 + for i in 0..keys.len() { + let slice = keys[i]; + + /* 计算哈希值 */ + // BloomFilter理论是通过多个hash计算来减少冲突, + // 但leveldb实际上并未真正去计算多个hash,而是通过double-hashing的方式来达到同样的效果。 + // double-hashing的理论如下: + // h(i,k) = (h1(k) + i*h2(k)) % T.size + // h1(k) = h, h2(k) = delta, h(i,k) = bitpos + // + // 1、计算hash值; + // 2、hash值的高15位,低17位对调 + // 3、按k_个数来存储当前hash值。 + // 3-1、计算存储位置; + // 3-2、按bit存; + // 3-3、累加hash值用于下次计算 + // + // Use double-hashing to generate a sequence of hash values. + // See analysis in [Kirsch,Mitzenmacher 2006]. + let mut h : u32 = slice.bloom_hash(); + // Rotate right 17 bits + let delta : u32 = (h >> 17) | (h << 15); + + for j in 0..self.k { + let bitpos:usize = ((h as usize) % bits); + + // val ==> 1 << (bitpos % 8) + let mod_val: usize = bitpos % 8; + let val = (1 as u8).wrapping_shl(mod_val as u32); + + // 本来应该直接把h bit设置为1的。但是这里总共只有bits个bit, 访问m[i] 把相应位设置为1 + // a |= b ==> 按位或, 后赋值给a + // let position: usize = bitpos / 8; + dst_chars[bitpos / 8] |= val; + + // 累加来实现k个hash函数, h.wrapping_add(delta) ==> h += delta + // LevelDB中并没有真正创建k个哈希函数。而是使用旧有的哈希值累加。 + // 使用了最原始的h哈希值位移来得到。(h >> 17) | (h << 15);,累加delta得到下一次hash值。 + h = h.wrapping_add(delta); + } + } + + // Vec 转 Slice + Slice::from_buf(&dst_chars) + } + + // fn create_filter_u8(&self, keys: Vec) -> Slice { + // self.create_filter_u8_with_len(keys.len(), keys) + // } + // + // fn create_filter_u8_with_len(&self, capacity: usize, keys: Vec) -> Slice { + // todo!() + // } + + fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { + // 1、插入时按1Byte对齐; + // 2、尾部插入了一个Byte的hash个数 + // 所以大小不能小于2个字节 + let len: usize = bloom_filter.size(); + if len < 2 { + return false; + } + + // 获得相应的内存区域的数据: 除去尾部的1Byte对应的hash个数,就是当前位数组容器的大小 + let bloom_filter_array:Vec = bloom_filter.to_vec(); + // 总共的bits数目 + let bits: usize = (len - 1) * 8; + + // 取得k哈希函数的数目 + // Use the encoded k so that we can read filters generated by bloom filters created using different parameters. + let k: u8 = bloom_filter_array[len - 1]; + // 对于大于30个哈希函数的情况,这里直接返回存在 + if k > 30 { + // Reserved for potentially new encodings for short bloom filters. Consider it a match. + return true; + } + + // 1、计算查询key对应的hash值 + // 2、按插入规则去 &,只要有1bit不相同,那就不存在。 + + // 计算哈希值 + let mut h : u32 = key.bloom_hash(); + // Rotate right 17 bits + let delta = (h >> 17) | (h << 15); + + // 计算key的hash值,重复计算阶段的步骤,循环计算k_个hash值,只要有一个结果对应的bit位为0,就认为不匹配,否则认为匹配 + for j in 0..k { + let bitpos:usize = ((h as usize) % bits); + if (bloom_filter_array[bitpos/8] & (1 << (bitpos % 8))) == 0 { + return false; + } + + h = h.wrapping_add(delta); + } + + return true; + } +} \ No newline at end of file diff --git a/src/util/filter_policy_bloom_test.rs b/src/util/filter_policy_bloom_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..bbb8ebab69a196f275d332701eda219414d5c9de --- /dev/null +++ b/src/util/filter_policy_bloom_test.rs @@ -0,0 +1,193 @@ +use crate::traits::filter_policy_trait::FilterPolicy; +use crate::util::filter_policy::{AsBloomHash, FromPolicy}; +use crate::util::filter_policy_bloom::BloomFilterPolicy; +use crate::util::hash::ToHash; +use crate::util::slice::Slice; + +// #################### BloomFilterPolicy test +#[test] +fn test_bloom_hash() { + let val = "aabbccd"; + let slice: Slice = Slice::from_buf(val.as_bytes()); + + let hash_val = BloomFilterPolicy::bloom_hash(&slice); + let hash_val_1 = slice.bloom_hash(); + assert_eq!(hash_val, hash_val_1); + assert_eq!(hash_val, 2085241752); +} + +#[test] +fn test_new() { + let bloom_filter: BloomFilterPolicy = BloomFilterPolicy::new_with_bits_per_key(8); + assert_eq!(bloom_filter.from_bits_per_key(), 8); + assert_eq!(bloom_filter.from_k(), 6); + + let bloom_filter = BloomFilterPolicy::new(); + assert_eq!(bloom_filter.from_bits_per_key(), 10); + assert_eq!(bloom_filter.from_k(), 7); +} + +// #################### FilterPolicy test +#[test] +fn test_create_filter() { + let policy = BloomFilterPolicy::new_with_bits_per_key(800); + + // 如下三个值, 存放在 BloomFilter 中 + let s1 = Slice::try_from(String::from("hello")).unwrap(); + let s2 = Slice::try_from(String::from("world")).unwrap(); + let s3 = Slice::try_from(String::from("hello world")).unwrap(); + + let mut keys : Vec<&Slice> = Vec::new(); + keys.push(&s1); + keys.push(&s2); + keys.push(&s3); + + let bloom_filter: Slice = policy.create_filter(keys); + + // 验证通过 + let mut key_may_match = policy.key_may_match( + &Slice::try_from(String::from("hello")).unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 验证通过 + key_may_match = policy.key_may_match(&Slice::try_from(String::from("world")).unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 因为不存在,所以验证不通过 + let mut key_not_match = policy.key_may_match(&Slice::try_from(String::from("x")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + // 因为不存在,所以验证不通过 + key_not_match = policy.key_may_match(&Slice::try_from(String::from("helloworld")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + // 因为存在,所以验证通过 + let key_match = policy.key_may_match(&Slice::try_from(String::from("hello world")).unwrap(), + &bloom_filter); + assert!(key_match); + + // 因为不存在,所以验证不通过 + key_not_match = policy.key_may_match(&Slice::try_from(String::from("foo")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + // 验证通过 + let mut key_may_match = policy.key_may_match( + &Slice::try_from(String::from("hello")).unwrap(), + &bloom_filter); + assert!(key_may_match); +} + +/// 指定超长长度。可以超过放置的值 +#[test] +fn test_create_filter_with_long_len(){ + let policy = BloomFilterPolicy::new_with_bits_per_key(800); + + // 如下三个值, 存放在 BloomFilter 中 + let s1 = Slice::try_from(String::from("hello")).unwrap(); + let s2 = Slice::try_from(String::from("world")).unwrap(); + let s3 = Slice::try_from(String::from("hello world")).unwrap(); + + let mut keys : Vec<&Slice> = Vec::new(); + keys.push(&s1); + keys.push(&s2); + keys.push(&s3); + + let bloom_filter: Slice = policy.create_filter_with_len(600, keys); + + // 验证通过 + let mut key_may_match = policy.key_may_match( + &Slice::try_from(String::from("hello")).unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 验证通过 + key_may_match = policy.key_may_match(&Slice::try_from(String::from("world")).unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 因为不存在,所以验证不通过 + let mut key_not_match = policy.key_may_match(&Slice::try_from(String::from("x")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + // 因为不存在,所以验证不通过 + key_not_match = policy.key_may_match(&Slice::try_from(String::from("helloworld")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + // 因为存在,所以验证通过 + let key_match = policy.key_may_match(&Slice::try_from(String::from("hello world")).unwrap(), + &bloom_filter); + assert!(key_match); + + // 因为不存在,所以验证不通过 + key_not_match = policy.key_may_match(&Slice::try_from(String::from("foo")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + // 验证通过 + let mut key_may_match = policy.key_may_match( + &Slice::try_from(String::from("hello")).unwrap(), + &bloom_filter); + assert!(key_may_match); +} + +/// 指定端长度。放不开放置的值。 此时对于 BloomFilterPolicy 来讲不需要扩容 +#[test] +fn test_create_filter_with_short_len(){ + let policy = BloomFilterPolicy::new_with_bits_per_key(800); + + // 如下三个值, 存放在 BloomFilter 中 + let s1 = Slice::try_from(String::from("hello")).unwrap(); + let s2 = Slice::try_from(String::from("world")).unwrap(); + let s3 = Slice::try_from(String::from("hello world")).unwrap(); + + let mut keys : Vec<&Slice> = Vec::new(); + keys.push(&s1); + keys.push(&s2); + keys.push(&s3); + + let bloom_filter: Slice = policy.create_filter_with_len(2, keys); + + // 验证通过 + let mut key_may_match = policy.key_may_match( + &Slice::try_from(String::from("hello")).unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 验证通过 + key_may_match = policy.key_may_match(&Slice::try_from(String::from("world")).unwrap(), + &bloom_filter); + assert!(key_may_match); + + // 因为不存在,所以验证不通过 + let mut key_not_match = policy.key_may_match(&Slice::try_from(String::from("x")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + // 因为不存在,所以验证不通过 + key_not_match = policy.key_may_match(&Slice::try_from(String::from("helloworld")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + // 因为存在,所以验证通过 + let key_match = policy.key_may_match(&Slice::try_from(String::from("hello world")).unwrap(), + &bloom_filter); + assert!(key_match); + + // 因为不存在,所以验证不通过 + key_not_match = policy.key_may_match(&Slice::try_from(String::from("foo")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + // 验证通过 + let mut key_may_match = policy.key_may_match( + &Slice::try_from(String::from("hello")).unwrap(), + &bloom_filter); + assert!(key_may_match); +} \ No newline at end of file diff --git a/src/util/filter_policy_internal.rs b/src/util/filter_policy_internal.rs new file mode 100644 index 0000000000000000000000000000000000000000..4b5516c722543795b985693ed88bb27f9a8717cf --- /dev/null +++ b/src/util/filter_policy_internal.rs @@ -0,0 +1,37 @@ +use crate::traits::filter_policy_trait::FilterPolicy; +use crate::util::slice::Slice; + +// ######################### InternalFilterPolicy +pub struct InternalFilterPolicy { + user_policy_: dyn FilterPolicy +} + +impl InternalFilterPolicy { + fn new(policy: Box) -> Box { + // InternalFilterPolicy{ user_policy_: policy } + todo!() + } +} + +impl FilterPolicy for InternalFilterPolicy { + fn name(&self) -> String { + todo!() + } + + fn create_filter(&self, keys: Vec<&Slice>) -> Slice { + self.create_filter_with_len(keys.len(), keys) + } + + fn create_filter_with_len(&self, capacity: usize, keys: Vec<&Slice>) -> Slice { + // 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 + // 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, + // 并把根据这些key创建的filter追加到 dst中。 + // + todo!() + } + + fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { + todo!() + } + +} \ No newline at end of file diff --git a/src/util/filter_policy_internal_test.rs b/src/util/filter_policy_internal_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..f2003fedfb09d5166707bbffc62421841b290ee7 --- /dev/null +++ b/src/util/filter_policy_internal_test.rs @@ -0,0 +1,5 @@ + +#[test] +fn test__() { + +} diff --git a/src/util/filter_policy_test.rs b/src/util/filter_policy_test.rs index bea6d2aafb9bf43e610f680a568aa0859754e1ef..f2003fedfb09d5166707bbffc62421841b290ee7 100644 --- a/src/util/filter_policy_test.rs +++ b/src/util/filter_policy_test.rs @@ -1,69 +1,5 @@ -use std::ptr::null; -use crate::traits::filter_policy_trait::FilterPolicy; -use crate::util::bloom_filter; -use crate::util::filter_policy::{AsBloomHash, BloomFilterPolicy, FromPolicy}; -use crate::util::hash::ToHash; -use crate::util::slice::Slice; -// #################### BloomFilterPolicy test #[test] -fn test_bloom_hash() { - let val = "aabbccd"; - let slice: Slice = Slice::from_buf(val.as_bytes()); +fn test__() { - let hash_val = BloomFilterPolicy::bloom_hash(&slice); - let hash_val_1 = slice.bloom_hash(); - assert_eq!(hash_val, hash_val_1); - assert_eq!(hash_val, 2085241752); } - -#[test] -fn test_new() { - let bloom_filter: BloomFilterPolicy = BloomFilterPolicy::new(8); - assert_eq!(bloom_filter.from_bits_per_key(), 8); - assert_eq!(bloom_filter.from_k(), 6); - - let bloom_filter = BloomFilterPolicy::new(800); - assert_eq!(bloom_filter.from_bits_per_key(), 800); - assert_eq!(bloom_filter.from_k(), 30); -} - -// #################### FilterPolicy test -#[test] -fn test_create_filter() { - let policy = BloomFilterPolicy::new(800); - - let s1 = Slice::try_from(String::from("hello")).unwrap(); - let s2 = Slice::try_from(String::from("world")).unwrap(); - - let mut keys : Vec<&Slice> = Vec::new(); - keys.push(&s1); - keys.push(&s2); - - let bloom_filter: Slice = policy.create_filter(keys); - - let mut key_may_match = policy.key_may_match( - &Slice::try_from(String::from("hello")).unwrap(), - &bloom_filter); - assert!(key_may_match); - - key_may_match = policy.key_may_match(&Slice::try_from(String::from("world")).unwrap(), - &bloom_filter); - assert!(key_may_match); - - let mut key_not_match = policy.key_may_match(&Slice::try_from(String::from("x")).unwrap(), - &bloom_filter); - assert!(!key_not_match); - - key_not_match = policy.key_may_match(&Slice::try_from(String::from("helloworld")).unwrap(), - &bloom_filter); - assert!(!key_not_match); - - key_not_match = policy.key_may_match(&Slice::try_from(String::from("hello world")).unwrap(), - &bloom_filter); - assert!(!key_not_match); - - key_not_match = policy.key_may_match(&Slice::try_from(String::from("foo")).unwrap(), - &bloom_filter); - assert!(!key_not_match); -} \ No newline at end of file diff --git a/src/util/hash.rs b/src/util/hash.rs index 15a1a0328f2639f07d7d9da34d64e3e822cf8328..71a02198f601caf62c0584258738f9448cb5c6a4 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -1,9 +1,7 @@ use std::ops::{BitXor, Mul}; use std::mem::size_of; use std::slice as stds; - -use crate::traits::coding_trait::CodingTrait; -use crate::util::coding::Coding; +use crate::util::coding::Decoder; use crate::util::r#const::HASH_DEFAULT_SEED; @@ -11,7 +9,6 @@ use crate::util::slice::Slice; /// 一种可以计算 hash 的特质 pub trait ToHash { - fn to_hash(&self) -> u32; fn to_hash_with_seed(&self, seed: u32) -> u32; @@ -20,6 +17,7 @@ pub trait ToHash { /// 所有基本类型 u8, i8, u16, u32 ... 的Vec都可以实现 hash 值计算 /// Sample: /// ``` +/// use level_db_rust::util::hash::ToHash; /// let hash = vec!['a','b','c'].to_hash(); /// ``` impl ToHash for Vec { @@ -41,6 +39,8 @@ impl ToHash for Vec { /// 所有基本类型 u8, i8, u16, u32 ... 的slice都可以实现 hash 值计算 /// Sample: /// ``` +/// use level_db_rust::util::hash::ToHash; +/// /// let buf = ['a','b','c']; /// let hash_val = &buf.as_slice().to_hash(); /// ``` @@ -65,6 +65,7 @@ impl ToHash for &[T] { /// 实现了 &str 转 ToHash 的特质 /// Sample: /// ``` +/// use level_db_rust::util::hash::ToHash; /// let hash = "abc".to_hash(); /// ``` impl ToHash for &str { @@ -82,6 +83,9 @@ impl ToHash for &str { /// 实现了 Slice 转 ToHash 的特质 /// Sample: /// ``` +/// use level_db_rust::util::hash::ToHash; +/// use level_db_rust::util::slice::Slice; +/// /// let val = "aabbccd"; /// let slice: Slice = Slice::from_buf(val.as_bytes()); /// let slice_hash_val = slice.to_hash(); @@ -101,6 +105,8 @@ impl ToHash for Slice { /// 实现了 String 转 ToHash 的特质 /// Sample: /// ``` +/// use level_db_rust::util::hash::ToHash; +/// /// let val = "aabbccd"; /// let val_s = String::from(val); /// let string_hash_val = val_s.to_hash(); @@ -123,21 +129,26 @@ pub struct Hash {} impl Hash { #[inline] pub fn hash_code(data: &[u8], seed: u32) -> u32 { + let n = data.len(); + + // Similar to murmur hash + // uint32_t ==> unsigned int ==> u32 let murmur_hash: u32 = 0xc6a4a793; let r: u32 = 24; - let limit: usize = data.len(); - let mul_first = limit.mul(murmur_hash as usize); // x = data_size * murmur_hash + let limit: usize = n; + let mul_first = n.mul(murmur_hash as usize); // x = data_size * murmur_hash let mut h: u32 = seed.bitxor(mul_first as u32); // h = seed ^ x + let mut decoder = Decoder::with_buf(data); + // 每次按照四字节长度读取字节流中的数据 w,并使用普通的哈希函数计算哈希值。 let mut position: usize = 0; - while position + 4 <= limit { + while decoder.can_get() && position + 4 <= limit { //每次解码前4个字节,直到最后剩下小于4个字节 // rust的 &[u8] 是胖指针,带长度信息的,会做range check,所以是安全的。 // 虽然decode_fixed32 中也是解码4字节,但传入整个data在方法上不明确,因此传 [position..(position + 4)], 可以更加方便理解,对性能无影响 - let w = Coding::decode_fixed32(&data[position..(position + 4)]); - + let w = unsafe { decoder.uncheck_get_fixed32() }; // 向后移动4个字节 position += 4; diff --git a/src/util/mem_debug.rs b/src/util/mem_debug.rs index bef8a837f271a8644bd1d2365d2e17c3dba126a4..93a79a2e49f6915f0aa1d2fb259dc6dfdf14b41a 100644 --- a/src/util/mem_debug.rs +++ b/src/util/mem_debug.rs @@ -1,4 +1,4 @@ -use std::ffi::{c_char, c_void}; +use core::ffi::{c_char, c_void}; use std::ptr::{null, null_mut}; extern "C" fn write_cb(_: *mut c_void, message: *const c_char) { diff --git a/src/util/mod.rs b/src/util/mod.rs index 527f6e559e38757531de80316b2beabdc9b252c7..2f81a7b09e2b942a873b0bccda6fd63b974b3f12 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -11,9 +11,7 @@ pub mod r#const; pub mod slice; mod slice_test; pub mod cache; -mod cache_test; pub mod coding; -mod coding_test; pub mod arena; mod arena_test; @@ -23,10 +21,14 @@ pub mod comparator; mod comparator_test; pub mod crc; mod crc_test; -pub mod bloom_filter; -mod bloom_filter_test; +// pub mod bloom_filter; +// mod bloom_filter_test; pub mod filter_policy; mod filter_policy_test; +pub mod filter_policy_bloom; +mod filter_policy_bloom_test; +pub mod filter_policy_internal; +mod filter_policy_internal_test; pub mod histogram; mod histogram_test; @@ -44,6 +46,9 @@ pub mod unsafe_slice; pub mod env; mod env_test; pub mod mem_debug; +// pub mod error; +// mod error_test; +// pub mod error_code; /// 定义别名 pub type Result = result::Result; diff --git a/src/util/slice.rs b/src/util/slice.rs index 1f923b8f8c61d1ca03cd47a75658d7f5ef6237f1..215e17f7835c44e358509720d2a0d6335d479f03 100644 --- a/src/util/slice.rs +++ b/src/util/slice.rs @@ -1,9 +1,10 @@ +use core::ops::{Range, RangeFrom}; use std::mem; use std::borrow::Cow; use std::cmp::Ordering; use std::fmt::{Display, Formatter}; use std::mem::ManuallyDrop; -use std::ops::Deref; +use std::ops::{Deref, DerefMut, RangeTo}; #[derive(Debug)] pub struct Slice { @@ -26,7 +27,6 @@ impl Default for Slice { } impl Slice { - /// 从 &mut [u8] 转到 Slice, 这里存在内存拷贝开销 #[inline] pub fn from_buf(buf: &[u8]) -> Self { @@ -62,7 +62,7 @@ impl Slice { #[inline] pub fn as_sub_ref(&self, start: usize, length: usize) -> &[u8] { - &(**self)[start..(start+length)] + &(**self)[start..(start + length)] } /// 移除头部 n 个元素 @@ -145,7 +145,7 @@ impl From for Vec { } } -impl > From for Slice { +impl> From for Slice { #[inline] fn from(r: R) -> Self { Self { @@ -207,13 +207,72 @@ impl core::ops::Index for Slice { } } +impl core::ops::Index> for Slice { + type Output = [u8]; + + /// 获取指定下标范围的数据 + fn index(&self, range: Range) -> &Self::Output { + assert!(range.end <= self.size()); + &(**self)[range.start..range.end] + } +} + +impl core::ops::Index> for Slice { + type Output = [u8]; + + /// 获取指定下标范围的数据 + fn index(&self, range: RangeFrom) -> &Self::Output { + &(**self)[range.start..] + } +} + +impl core::ops::Index> for Slice { + type Output = [u8]; + + /// 获取指定下标范围的数据 + fn index(&self, range: RangeTo) -> &Self::Output { + assert!(range.end <= self.size()); + &(**self)[..range.end] + } +} + +impl core::ops::IndexMut> for Slice { + /// 获取指定下标范围的数据 + fn index_mut(&mut self, index: Range) -> &mut Self::Output { + assert!(index.end <= self.size()); + &mut (**self)[..index.end] + } +} + +impl core::ops::IndexMut> for Slice { + /// 获取指定下标范围的数据 + fn index_mut(&mut self, index: RangeFrom) -> &mut Self::Output { + &mut (**self)[index.start..] + } +} + +impl core::ops::IndexMut> for Slice { + /// 获取指定下标范围的数据 + fn index_mut(&mut self, index: RangeTo) -> &mut Self::Output { + assert!(index.end <= self.size()); + &mut (**self)[..index.end] + } +} + + impl Deref for Slice { type Target = [u8]; /// Slice 解引用到 &[u8] #[inline] fn deref(&self) -> &Self::Target { - &*self.data + &*self.data + } +} + +impl DerefMut for Slice { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut *self.data } } diff --git a/src/util/status.rs b/src/util/status.rs index 97b33d6ecb53e76dd74a8320c3a8cc2add26bad9..c41900fcae051286920d78373f3c5b16cd032965 100644 --- a/src/util/status.rs +++ b/src/util/status.rs @@ -35,6 +35,7 @@ impl Status { /// # Examples /// /// ``` + /// use level_db_rust::util::status::{LevelError, Status}; /// Status::wrapper_str(LevelError::KInvalidArgument, "IndexOutOfRange"); /// ``` #[inline] diff --git a/tests/custom_proc_macro_test.rs b/tests/custom_proc_macro_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..e3bd6424b014f38ac5e083a6d3360751f4c2c040 --- /dev/null +++ b/tests/custom_proc_macro_test.rs @@ -0,0 +1,39 @@ +use custom_proc_macro::arr; + +#[derive(Debug, PartialEq)] +struct Test; + +#[test] +fn test_arr() { + let origin = [0; 16]; + + let u32_arr = arr!([0_u32; 16]); + println!("{:?}", u32_arr); + assert_eq!(origin, u32_arr); + + let num_arr = arr!([0; 16]); + println!("{:?}", num_arr); + assert_eq!(origin, num_arr); + + let u32_arr: [u32; 16] = arr!([0_u32; 16]); + println!("{:?}", u32_arr); + assert_eq!(origin, u32_arr); + + let num_arr: [u32; 16] = arr!([0; 16]); + println!("{:?}", num_arr); + assert_eq!(origin, num_arr); + + let num_arr: [u64; 16] = arr!([0; 16]); + println!("{:?}", num_arr); + assert_eq!(origin, u32_arr); + + let test_origin = [ + Test, Test, Test, Test, Test, Test, Test, Test, + Test, Test, Test, Test, Test, Test, Test, Test + ]; + let test_arr = arr!([Test; 16]); + println!("{:?}", test_arr); + assert_eq!(test_origin, test_arr); + + let err = arr!(Test;16); +} \ No newline at end of file