diff --git a/Cargo.toml b/Cargo.toml index 6f688ef24e926e517e32af2d66ae4932fa50edca..663baf70a491415bd5d55f7d82e56a9ecc5ef241 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ path = "src/lib.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] - +tokio = "1.24.1" [profile.dev] diff --git a/README.md b/README.md index 2b201830652b816a4c7ccc9a6ae8a4c02d3e94b6..c31a5a8f45bee2d97f083203d49430375b6c3e1b 100644 --- a/README.md +++ b/README.md @@ -46,50 +46,51 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi ### 1.0.0 1.0.0 版本, 完成 util 相关的内容 -| 功能模块 | 完成人 | 进度 | -|----------------------------------------|-----------------|------| -| Arena (Memory Management) | wangboo | 100% | -| bloom | fengyang | 10% | -| Cache | colagy | 10% | -| Coding (Primitive Type SerDe) | colagy | 100% | -| Comparator | fengyang | 90% | -| CRC | wangboo、lxd5866 | 100% | -| Env | lxd5866 | | -| filter_policy | fengyang | 10% | -| Hash | fengyang | 100% | -| Histgram | kazeseiriou | 100% | -| loging | | | -| MutexLock | kazeseiriou | 100% | -| Random | colagy | | -| Status | fengyang | 100% | -| Slice | wangboo | 100% | +| 功能模块 | 完成人 | 进度 | +|-------------------------------|-----------------|------| +| Arena (Memory Management) | wangboo | 100% | +| bloom | fengyang | 100% | +| Cache | colagy | 10% | +| Coding (Primitive Type SerDe) | colagy | 100% | +| Comparator | fengyang | 100% | +| CRC | wangboo、lxd5866 | 100% | +| Env | lxd5866 | | +| filter_policy | fengyang | 100% | +| Hash | fengyang | 100% | +| Histgram | kazeseiriou | 100% | +| loging | | | +| MutexLock | kazeseiriou | 100% | +| Random | colagy | | +| Status | fengyang | 100% | +| Slice | wangboo | 100% | ### 1.1.0 1.1.0 版本, 完成基础零部件 -| 功能模块 | 完成人 | 进度 | -|----------------------------------------------------------------------------------|-------------|-----| -| util.Options(ReadOptions, WriteOptions) | kazeseiriou | | -| util.ENV(WritableFile, SequentialFile, RandomAccessFile, FileLock) | lxd5866 | | -| util.Logger | peach | | -| table.Block, BlockBuilder, FilterBlockBuilder | colagy | | -| FilterBlock, FilterBlockReader | colagy | | -| table.format(Footer, BlockHandle) | fengyang | | -| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | fengyang | | -| db.SkipList | wangboo | | -| table.Iterator(DBIter, MergingIterator, TwoLevelIterator...) | kazeseiriou | | -| IteratorWrapper | kazeseiriou | | -| db.MemTable(MemTable, MemTableIterator) | wangboo | | -| SSTable | fengyang | | -| table.Table | peach | | -| db.leveldb_util | hui | | -| db.log_format | hui | | -| db.LogReader | wangboo | 90% | -| db.LogWriter | wangboo | 90% | -| db.TableCache | colagy | | -| db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | | -| db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | | -| WriteBatch | peach | | +| 功能模块 | 完成人 | 进度 | +|----------------------------------------------------------------------------------|--------------|-----| +| util.Options(ReadOptions, WriteOptions) | kazeseiriou | | +| util.ENV(WritableFile, SequentialFile, RandomAccessFile, FileLock) | lxd5866 | | +| util.Logger | peach | | +| table.Block, BlockBuilder, FilterBlockBuilder | colagy | | +| FilterBlock, FilterBlockReader | colagy | | +| table.format(Footer, BlockHandle) | fengyang、半支烟 | | +| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | fengyang、半支烟 | | +| db.SkipList | wangboo | | +| table.Iterator(DBIter, MergingIterator, TwoLevelIterator...) | kazeseiriou | | +| IteratorWrapper | kazeseiriou | | +| db.MemTable(MemTable, MemTableIterator) | wangboo | | +| SSTable | fengyang | | +| table.Table | peach | | +| db.leveldb_util | wangboo | | +| db.log_format | wangboo | | +| db.LogReader | wangboo | 90% | +| db.LogWriter | wangboo | 90% | +| db.TableCache | colagy | | +| LinkedList | fengyang | | +| db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | | +| db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | | +| WriteBatch | peach | | #### 1.1.0 计划 * 完成gitee -> github (同步) 主仓库gitee diff --git a/src/db/db.rs b/src/db/db.rs new file mode 100644 index 0000000000000000000000000000000000000000..84902bce5a6cdd6272387451d60621bf69e42029 --- /dev/null +++ b/src/db/db.rs @@ -0,0 +1,62 @@ +use std::ops::Range; +use crate::traits::DataIterator; +use crate::util::options::{Options, ReadOptions, WriteOptions}; +use crate::util::Result; +use crate::util::slice::Slice; + +struct DB { + +} + +//TODO temp +struct WriteBatchOptions {} +struct WriteBatch {} +pub trait Snapshot {} + +impl DB { + + async fn open(_opt: Options, _name: String) -> Result { + todo!() + } + + async fn put(&mut self, _opt: WriteOptions, _key: Slice, _value: Slice) -> Result<()> { + todo!() + } + + async fn delete(&mut self, _key: Slice) -> Result { + todo!() + } + + async fn write(&mut self, _opt: WriteBatchOptions, _updates: WriteBatch) -> Result<()> { + todo!() + } + + async fn get(&mut self, _opt: ReadOptions, _key: Slice) -> Result> { + todo!() + } + + fn new_iterator(&mut self, _opt: ReadOptions) -> Result> { + todo!() + } + + fn get_snapshot(&self) -> Box { + todo!() + } + + fn release_snapshot(&mut self, _snap: Box) -> Result<()> { + todo!() + } + + fn get_property(&self, _key: Slice) -> Option { + todo!() + } + + fn get_approximate_sizes(&self, _rng: Range, _n: u32) -> Result> { + todo!() + } + + async fn compact_range(&mut self, _rng: Range) -> Result<()> { + todo!() + } + +} \ No newline at end of file diff --git a/src/db/db_format.rs b/src/db/db_format.rs new file mode 100644 index 0000000000000000000000000000000000000000..891b90651e6c4f77f50fa837693675794d3045b9 --- /dev/null +++ b/src/db/db_format.rs @@ -0,0 +1,329 @@ +use std::cmp::Ordering; +use std::ops::Deref; +use crate::db::db_format::ValueType::{K_TYPE_DELETION, K_TYPE_VALUE}; +use crate::db::file_meta_data::FileMetaData; +use crate::traits::comparator_trait::Comparator; +use crate::util::slice::Slice; + +pub enum ValueType { + /// 0x0 + K_TYPE_DELETION, + + /// 0x1 + K_TYPE_VALUE, +} + +pub struct ParsedInternalKey { + user_key: Slice, + sequence: u64, + value_type: ValueType +} + +#[derive(Debug)] +pub struct InternalKey { + rep_: Slice +} + +/// InternalKeyComparator +pub struct InternalKeyComparator { + user_comparator_: dyn Comparator +} + +/// 查找键 +// todo add clone trait +pub struct LookupKey { + // We construct a char array of the form: + // klength varint32 <-- start_ + // userkey char[klength] <-- kstart_ + // tag uint64 + // <-- end_ + // The array is a suitable MemTable key. + // The suffix starting with "userkey" can be used as an InternalKey. + + start_: Slice, + kstart_: Slice, + end_: Slice, + + // Avoid allocation for short keys + space_: [u8; 200], +} + +impl ValueType { + pub fn get_value(&self) -> i32 { + let le = match self { + K_TYPE_DELETION => 0, + K_TYPE_VALUE => 1 + }; + + le + } +} + +impl TryFrom for ValueType { + type Error = String; + + /// i32 转 ValueType + /// + /// # Arguments + /// + /// * `value`: 值 + /// + /// returns: Result>::Error> + /// + /// # Examples + /// + /// ``` + /// let rs: ValueType = ValueType::try_from(1)?; + /// assert!(&rs == K_TYPE_DELETION); + /// ``` + #[inline] + fn try_from(value: i32) -> Result { + match value { + 0 => Ok(K_TYPE_DELETION), + 1 => Ok(K_TYPE_VALUE), + // all other numbers + _ => Err(String::from(format!("Unknown code: {}", value))) + } + } +} + +impl Default for ParsedInternalKey { + #[inline] + fn default() -> Self { + ParsedInternalKey { + user_key: Default::default(), + sequence: 0, + value_type: K_TYPE_DELETION, + } + } +} + +impl ParsedInternalKey { + + fn debug_string(&self) -> Slice { + Slice::default() + } + + /// Return the length of the encoding of "key". + fn internal_key_encoding_length(&self, key: ParsedInternalKey) -> usize { + key.user_key.size() + 8 + } + + // 将 self 的数据追加到 result 中 + fn append_internal_key(&self, result: Slice) { + todo!() + } + + fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { + Self { + user_key, + sequence, + value_type, + } + } + + /// Attempt to parse an internal key from "internal_key". On success, + /// stores the parsed data in "*result", and returns true. + /// On error, returns false, leaves "*result" in an undefined state. + fn parse_internal_key(internal_key : Slice, target: ParsedInternalKey) -> bool { + // line 173 + todo!() + } + + /// Returns the user key portion of an internal key. + fn extract_user_key(internal_key : Slice) -> Slice { + todo!() + } +} + +impl Default for InternalKey { + #[inline] + fn default() -> Self { + Self { + rep_: Slice::default() + } + } +} + +impl PartialEq for InternalKey { + /// 判断两个 InternalKey 是否相同 + #[inline] + fn eq(&self, other: &Self) -> bool { + self.rep_.eq(&other.rep_) + } +} + +impl InternalKey { + fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { + // line 145 + let result: Slice = Slice::default(); + ParsedInternalKey::new(user_key, sequence, value_type) + .append_internal_key(result); + + Self{ + // rep_: result + // todo result值如何赋值 + rep_: Slice::default() + } + } + + /// + /// xxx + /// + /// # Arguments + /// * `input`: + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn decode_from(&self, input: Slice) { + todo!() + + // wangbo + // self.rep_.assign(input.borrow_data().bytes()); + } + + /// 输出 InternalKey 调试信息 + fn debug_string(&self) -> Slice { + // line 164 + todo!() + } + + fn encode(self) -> Slice { + self.rep_ + } + + fn user_key(self) -> Slice { + ParsedInternalKey::extract_user_key(self.rep_) + } + + fn set_from(self, p: ParsedInternalKey) { + // self.rep_.clear(); + p.append_internal_key(self.rep_); + } + + fn clear(self) { + // self.rep_.clear(); + } +} + +impl InternalKeyComparator { + pub fn create(c: Box) -> Box { + todo!() + } + + pub fn user_comparator(&self) -> Box { + todo!() + } + + pub fn compare_internal_key(&self, key1: &InternalKey, key2: &InternalKey) -> u32 { + // line 122, 167 + todo!() + } +} + +/// InternalKeyComparator 比较器: 用来比较内部键(Internal Key)。 +/// 内部键值是为了方便处理,将原普通键、序列号和值类型组成的新键。 +impl Comparator for InternalKeyComparator { + // fn new(c: Box) -> InternalKeyComparator { + // todo!() + // } + + fn compare(&self, _a: &Slice, _b: &Slice) -> Option { + todo!() + } + + fn get_name(&self) -> String { + String::from("leveldb.InternalKeyComparator") + } + + fn find_shortest_separator(&self, _start: &String, _limit: &Slice) -> String { + todo!() + } + + fn find_short_successor(&self, _key: &String) -> String { + todo!() + } +} + +impl LookupKey { + /// Initialize *this for looking up user_key at a snapshot with + /// the specified sequence number. + fn new(user_key: Slice, sequence: u64) -> Self { + // todo + todo!() + } + + /// Return a key suitable for lookup in a MemTable. + fn mem_table_key(&self) -> Slice { + todo!() + } + + /// Return an internal key (suitable for passing to an internal iterator) + fn internal_key(&self) -> Slice { + // line 204 + todo!() + } + + /// Return the user key + fn user_key(&self) -> Slice { + // line 207 + todo!() + } +} + +// like ~LookupKey() +// impl Drop for LookupKey { +// fn drop(&mut self) { +// todo!() +// } +// } +// +// impl Default for LookupKey { +// #[inline] +// fn default() -> Self { +// Self { +// start_: Default::default(), +// kstart_: Default::default(), +// end_: Default::default(), +// space_: [u8; 200], +// } +// } +// } + + +pub struct Config {} +impl Config { + /// Maximum encoding length of a BlockHandle + pub const K_NUM_LEVELS: usize = 7; + + // Level-0 compaction is started when we hit this many files. + pub const KL0_COMPACTION_TRIGGER: usize = 4; + + // Soft limit on number of level-0 files. We slow down writes at this point. + pub const KL0_SLOWDOWN_WRITES_TRIGGER: usize = 8; + + // Maximum number of level-0 files. We stop writes at this point. + pub const K_L0_STOP_WRITES_TRIGGER: usize = 12; + + // Maximum level to which a new compacted memtable is pushed if it +// does not create overlap. We try to push to level 2 to avoid the +// relatively expensive level 0=>1 compactions and to avoid some +// expensive manifest file operations. We do not push all the way to +// the largest level since that can generate a lot of wasted disk +// space if the same key space is being repeatedly overwritten. + pub const K_MAX_MEM_COMPACT_LEVEL: usize = 2; + + // Approximate gap in bytes between samples of data read during iteration. + pub const K_READ_BYTES_PERIOD: usize = 1048576; + + // kValueTypeForSeek defines the ValueType that should be passed when +// constructing a ParsedInternalKey object for seeking to a particular +// sequence number (since we sort sequence numbers in decreasing order +// and the value type is embedded as the low 8 bits in the sequence +// number in internal keys, we need to use the highest-numbered +// ValueType, not the lowest). + pub const K_VALUE_TYPE_FOR_SEEK: ValueType = ValueType::K_TYPE_VALUE; +} \ No newline at end of file diff --git a/src/db/db_format_test.rs b/src/db/db_format_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..da87a75674d604de74b340edf3bf3367c59728b4 --- /dev/null +++ b/src/db/db_format_test.rs @@ -0,0 +1,6 @@ +use crate::db::db_format::Config; + +#[test] +fn test_db_format() { + Config::K_NUM_LEVELS; +} \ No newline at end of file diff --git a/src/db/file_meta_data.rs b/src/db/file_meta_data.rs new file mode 100644 index 0000000000000000000000000000000000000000..bc67682954616e298755d3232aff10decc9772d9 --- /dev/null +++ b/src/db/file_meta_data.rs @@ -0,0 +1,108 @@ +use std::cmp; +use std::cmp::Ordering; +use std::collections::HashMap; +use crate::db::db_format::{InternalKey, InternalKeyComparator}; + +/// @see version_edit FileMetaData +#[derive(Debug)] +pub struct FileMetaData { + // todo 参考rc的实现 + refs: i32, + // Seeks allowed until compaction + allowed_seeks: i32, + number: u64, + // File size in bytes + file_size: u64, + // Smallest internal key served by table + smallest: InternalKey, + // Largest internal key served by table + largest: InternalKey +} + +impl Default for FileMetaData { + #[inline] + fn default() -> Self { + Self { + refs: 0, + allowed_seeks: 1 << 30, + number: 0, + file_size: 0, + smallest: InternalKey::default(), + largest: InternalKey::default() + } + } +} + +impl FileMetaData { + #[inline] + pub fn new_with_refs(refs: i32) -> Self { + FileMetaData::new_with_refs_allowed_seeks(refs, 1 << 30) + } + + #[inline] + pub fn new_with_refs_allowed_seeks(refs: i32, allowed_seeks: i32) -> Self { + FileMetaData::new_with_refs_allowed_seeks_file_size(refs, allowed_seeks, 0) + } + + pub fn new_with_refs_allowed_seeks_file_size(refs: i32, allowed_seeks: i32, file_size: u64) -> Self { + FileMetaData::new_with_refs_allowed_seeks_file_size_internal_key(refs, allowed_seeks, file_size, InternalKey::default(), InternalKey::default()) + } + + pub fn new_with_refs_allowed_seeks_file_size_internal_key(refs: i32, allowed_seeks: i32, file_size: u64, + smallest: InternalKey, largest: InternalKey) -> Self { + FileMetaData::new(refs, allowed_seeks, 0, file_size, smallest, largest) + } + + pub fn new_with_number_file_size_internal_key(number: u64, file_size: u64, smallest: InternalKey, largest: InternalKey) -> Self { + FileMetaData::new(0, 1 << 30, number, file_size, smallest, largest) + } + + pub fn new(refs: i32, allowed_seeks: i32, number: u64, file_size: u64, smallest: InternalKey, largest: InternalKey) -> Self { + Self { + refs, + allowed_seeks, + number, + file_size, + smallest, + largest + } + } + + pub fn get_smallest(&self) -> &InternalKey { + &self.smallest + } + + pub fn get_number(&self) -> u64 { + self.number + } + + pub fn get_refs(&self) -> i32 { + self.refs + } + + pub fn add_refs(&mut self, num: i32) { + self.refs += num; + } +} + +impl PartialEq for FileMetaData { + /// 判断两个 FileMetaData 是否相同 + #[inline] + fn eq(&self, other: &Self) -> bool { + self.refs == other.refs + && self.allowed_seeks == other.allowed_seeks + && self.number == other.number + && self.file_size == other.file_size + && self.smallest.eq(&other.smallest) + && self.largest.eq(&other.largest) + } +} + +impl PartialOrd for FileMetaData { + /// 判断两个 FileMetaData 的大小关系 + fn partial_cmp(&self, other: &Self) -> Option { + // todo + // InternalKeyComparator::compare() + Option::Some(Ordering::Equal) + } +} \ No newline at end of file diff --git a/src/db/file_meta_data_test.rs b/src/db/file_meta_data_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..7c1b079b0c7bb1663718333dcadb8b7e5b171d4b --- /dev/null +++ b/src/db/file_meta_data_test.rs @@ -0,0 +1,29 @@ + +mod test { + use std::cmp::Ordering; + use crate::db::file_meta_data::FileMetaData; + + #[test] + fn test_eq() { + let meta1: FileMetaData = FileMetaData::default(); + let meta2: FileMetaData = FileMetaData::default(); + let meta3: FileMetaData = FileMetaData::new_with_refs(6); + let meta3_1: FileMetaData = FileMetaData::new_with_refs(6); + + assert!(meta1.eq(&meta2)); + assert!(!meta1.eq(&meta3)); + assert!(meta3.eq(&meta3_1)); + } + + #[test] + fn test_partial_ord() { + let meta0: FileMetaData = FileMetaData::default(); + let meta0_1: FileMetaData = FileMetaData::default(); + let meta3: FileMetaData = FileMetaData::new_with_refs(3); + let meta6: FileMetaData = FileMetaData::new_with_refs(6); + let meta7: FileMetaData = FileMetaData::new_with_refs(7); + + assert!(meta0.partial_cmp(&meta0_1).is_some()); + } + +} \ No newline at end of file diff --git a/src/db/mem_table.rs b/src/db/mem_table.rs new file mode 100644 index 0000000000000000000000000000000000000000..e0d99ce520db67dc86b2af526805eec584638ebb --- /dev/null +++ b/src/db/mem_table.rs @@ -0,0 +1,80 @@ +use std::rc::Rc; +use crate::traits::comparator_trait::Comparator; +use crate::traits::DataIterator; +use crate::util::slice::Slice; + +use crate::util::Result; + +pub enum ValueType { + Insert, + Deletion, +} + +/// 内存表 +pub struct MemTable { + cmp: Rc, +} + +/// 临时, 查找键 +pub struct LookupKey {} + +impl MemTable { + + /// 创建内存表 + /// + /// # Arguments + /// + /// * `_comparator`: 比较器 + /// + /// returns: MemTable + /// + /// # Examples + /// + /// ``` + /// let mt = MemTable::create(cmp); + /// ``` + pub fn create(cmp: Rc) -> Self { + Self { + cmp, + } + } + + /// 返回该表使用的内存近似值 + pub fn approximate_memory_usage(&self) -> usize { + todo!() + } + + /// 创建内存表迭代器 + /// + /// # Arguments + /// + /// returns: MemTable + /// + /// # Examples + /// + /// ``` + /// let mem = MemTable::create(comp); + /// let it = mem::new_new_iterator()?; + /// ``` + pub fn new_iterator(&self) -> Result> { + todo!() + } + + /// 像内存表中写入或删除一个元素 + pub fn add(&mut self, _seq_no: usize, _v_type: ValueType, _key: &Slice, _value: Slice) -> Result<()> { + todo!() + } + + /// 通过 key 查找结果 + pub fn get(&self, _key: &LookupKey) -> Result> { + todo!() + } + +} + +mod test { + #[test] + fn test() { + + } +} \ No newline at end of file diff --git a/src/db/mod.rs b/src/db/mod.rs index e696750c9f453f8fb7390e07732e4fed64189c27..0acd557b822986f8a0c2af170953efbcd4f949b2 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -1,3 +1,26 @@ +use crate::db::db_format::InternalKeyComparator; +use crate::db::skip_list::SkipList; +use crate::db::mem_table::MemTable; +use crate::util::comparator::{BytewiseComparatorImpl}; +use crate::util::slice::Slice; + pub mod log_writer; pub mod log_reader; -mod log_wr_test; \ No newline at end of file +pub mod table_cache; +mod log_wr_test; +pub mod skip_list; +pub mod mem_table; +pub mod db; +pub mod db_format; +mod db_format_test; +pub mod file_meta_data; +mod file_meta_data_test; +pub mod version_set; +mod version_set_test; +pub mod version_edit; +mod version_edit_test; + +/// 默认调表 +pub type DefaultSkipList = SkipList; +/// 默认内存表 +pub type DefaultMemTable = MemTable; \ No newline at end of file diff --git a/src/db/skip_list.rs b/src/db/skip_list.rs new file mode 100644 index 0000000000000000000000000000000000000000..01331f22de72120f0ce50c5255778640c6a2bc2b --- /dev/null +++ b/src/db/skip_list.rs @@ -0,0 +1,39 @@ +use std::rc::Rc; + +use crate::traits::comparator_trait::Comparator; +use crate::util::Arena; +use crate::util::comparator::BytewiseComparatorImpl; +use crate::util::Result; +use crate::util::slice::Slice; + +// todo +struct Node { + value: T, +} + +pub struct SkipList { + node: Option>, + comp: Rc, +} + +impl SkipList { + + pub fn create(comparator: Rc, _arena: Rc) -> Self { + Self { + node: None, + comp: comparator, + } + } + + pub fn insert(&mut self, _seq_no: usize, _key: &Slice) -> Result<()> { + todo!() + } + + pub fn contains(&self, _key: &Slice) -> bool { + todo!() + } + + pub fn get_max_height(&self) -> usize { + todo!() + } +} \ No newline at end of file diff --git a/src/db/table_cache.rs b/src/db/table_cache.rs new file mode 100644 index 0000000000000000000000000000000000000000..ad6079bddebb55f87430bf04a6d0f08f777b0e91 --- /dev/null +++ b/src/db/table_cache.rs @@ -0,0 +1,71 @@ +use crate::traits::DataIterator; +use crate::util::slice::Slice; +use crate::util::Result; + +struct Saver {} + +struct ReadOptions {} + +struct Table {} + +pub struct TableCache {} + +impl TableCache { + /// 从缓存中获取Table + /// + /// # Arguments + /// + /// * `options`: 读取的配置 + /// * `file_number`: 文件号 + /// * `file_size`: 文件大小 + /// * `k`: key + /// * `handle_result`: 回调函数 + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn get(&self, _options: &ReadOptions, _file_number: u64, _file_size: usize, _k: &Slice, _arg: &mut Saver, _handle_result: F) + where F: FnMut(&mut Saver, &Slice, &Slice) -> Result<()> { + () + } + /// 根据文件号消除缓存 + /// + /// # Arguments + /// + /// * `file_number`: 文件号 + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn evict(&mut self, _file_number: u64) { + todo!() + } + + /// 获取一个迭代器 + /// + /// # Arguments + /// + /// * `options`: 读取的配置 + /// * `file_number`: 文件号 + /// * `file_size`: 文件大小 + /// * `table`: 表 + /// + /// returns: Result>, Status> + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn new_iterator(&self, _options: &ReadOptions, _file_number: u64, _file_size: usize, _table: &Table) -> Result> { + todo!() + } +} \ No newline at end of file diff --git a/src/db/version_edit.rs b/src/db/version_edit.rs new file mode 100644 index 0000000000000000000000000000000000000000..e820e778d9c66e6d1b9c3c9349b33411a9792c30 --- /dev/null +++ b/src/db/version_edit.rs @@ -0,0 +1,181 @@ +use std::iter::Map; +use crate::db::db_format::InternalKey; +use crate::db::file_meta_data::FileMetaData; +use crate::util::slice::Slice; +use crate::util::Result; + +pub struct VersionEdit { + comparator_: String, + log_number_: u64, + prev_log_number_: u64, + next_file_number_: u64, + last_sequence_: u64, + has_comparator_: bool, + has_log_number_: bool, + has_prev_log_number_: bool, + has_next_file_number_: bool, + has_last_sequence_: bool, + + compact_pointers_: Vec<(u32, InternalKey)>, + deleted_files_: Vec<(u32, u64)>, + new_files_: Vec<(u32, FileMetaData)>, +} + +enum Tag { + // kComparator = 1, + // kLogNumber = 2, + // kNextFileNumber = 3, + // kLastSequence = 4, + // kCompactPointer = 5, + // kDeletedFile = 6, + // kNewFile = 7, + // // 8 was used for large value refs + // kPrevLogNumber = 9 + + kComparator, + kLogNumber, + kNextFileNumber, + kLastSequence, + kCompactPointer, + kDeletedFile, + kNewFile, + // 8 was used for large value refs + kPrevLogNumber +} + +impl VersionEdit { + /// 清空 + fn clear(&mut self) { + self.comparator_.clear(); + self.log_number_ = 0; + self.prev_log_number_ = 0; + self.last_sequence_ = 0; + self.next_file_number_ = 0; + self.has_comparator_ = false; + self.has_log_number_ = false; + self.has_prev_log_number_ = false; + self.has_next_file_number_ = false; + self.has_last_sequence_ = false; + self.deleted_files_.clear(); + self.new_files_.clear(); + + // compact_pointers_ don't clear + } + + fn set_comparator_name(&mut self, name: Slice){ + self.has_comparator_ = true; + self.comparator_ = name.into(); + } + + fn set_prev_log_number(&mut self, num: u64){ + self.has_prev_log_number_ = true; + self.prev_log_number_ = num; + } + + fn set_next_file(&mut self, num: u64){ + self.has_next_file_number_ = true; + self.next_file_number_ = num; + } + + fn set_last_sequence(&mut self, seq: u64){ + self.has_last_sequence_ = true; + self.last_sequence_ = seq; + } + + fn set_compact_pointer(&mut self, level: u32, key: InternalKey) { + self.compact_pointers_.push((level, key)) + } + + /// Add the specified file at the specified number. + /// REQUIRES: This version has not been saved (see VersionSet::SaveTo) + /// REQUIRES: "smallest" and "largest" are smallest and largest keys in file + /// + /// # Arguments + /// + /// * `level`: + /// * `file`: + /// * `file_size`: + /// * `smallest`: 移交所有权 + /// * `largest`: 移交所有权 + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn add_file(&mut self, level: u32, file: u64, file_size: u64, smallest: InternalKey, largest: InternalKey) { + let file_meta_data = FileMetaData::new_with_number_file_size_internal_key(file, file_size, smallest, largest); + + self.new_files_.push((level, file_meta_data)); + } + + fn delete_file(&mut self, level: u32, file: u64) { + self.deleted_files_.push((level, file)); + } + + /// 将 VersionEdit 对象编码至 target 中 + /// + /// # Arguments + /// + /// * `target`: + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn encode_to(&self, target: Vec) { + todo!() + } + + /// 将 source 中的数据解码至 self VersionEdit 中 + /// + /// # Arguments + /// + /// * `source`: + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn decode_from(&mut self, source: Slice) { + self.clear(); + + todo!() + } + + /// VersionEdit 输出调试信息 + fn debug_string(&self) -> Slice { + todo!() + } +} + +impl<'a> VersionEdit { + pub fn get_internal_key(inout: Slice) -> Result { + todo!() + } + + /// 从 Slice 中解出 level 值 + /// + /// # Arguments + /// + /// * `input`: + /// + /// returns: bool + /// + /// # Examples + /// + /// ``` + /// + /// ``` + pub fn get_level(input: Slice) -> Result { + todo!() + } +} \ No newline at end of file diff --git a/src/db/version_edit_test.rs b/src/db/version_edit_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..3024e557cdf3bbb6f694195504a00074f8eaa364 --- /dev/null +++ b/src/db/version_edit_test.rs @@ -0,0 +1,10 @@ + +mod test { + #[test] + fn test_() { + + println!("get_name: {}", "a"); + + + } +} \ No newline at end of file diff --git a/src/db/version_set.rs b/src/db/version_set.rs new file mode 100644 index 0000000000000000000000000000000000000000..15dabb08fee721512b2ddc46b95a5cc0cf463376 --- /dev/null +++ b/src/db/version_set.rs @@ -0,0 +1,415 @@ +use std::collections::{BTreeMap, BTreeSet}; +use std::rc::Rc; +use crate::db::db_format::{Config, InternalKeyComparator, LookupKey}; +use crate::db::file_meta_data::FileMetaData; +use crate::db::table_cache::TableCache; +use crate::db::version_edit::VersionEdit; +use crate::traits::comparator_trait::Comparator; +use crate::util::cache::Cache; +use crate::util::options::{Env, Options, ReadOptions}; +use crate::util::slice::Slice; +use crate::util::Result; + +// .h line 58 - 162 +pub struct Version { + // todo 链表实现,前驱后继 + // VersionSet to which this Version belongs + vset_: Rc, + // Next version in linked list + next_: Rc, + // Previous version in linked list + prev_: Rc, + + // Number of live refs to this version + refs_: i32, + + // List of files per level, 内部vec 初始化长度 config::kNumLevels + files_: Vec>, + + // Next file to compact based on seek stats. + file_to_compact_: FileMetaData, + file_to_compact_level_: i32, + + // Level that should be compacted next and its compaction score. + // Score < 1 means compaction is not strictly needed. These fields are initialized by Finalize(). + compaction_score_: f64, + compaction_level_: i32 +} + +// .h line 164 - 320 +pub struct VersionSet { + env_: Env, + dbname_: Slice, + options_: Options, + table_cache_: TableCache, + icmp_: Box, + next_file_number_: u64, + manifest_file_number_: u64, + last_sequence_: u64, + log_number_: u64, + // 0 or backing store for memtable being compacted + prev_log_number_: u64, + + // Opened lazily + // todo WritableFile + // descriptor_file_: tokio.File, + // todo log::Writer + // descriptor_log_: log::Writer, + + // Head of circular doubly-linked list of versions. + dummy_versions_: Version, + // dummy_versions_.prev_ + current_: Version, + + // Per-level key at which the next compaction at that level should start. + // Either an empty string, or a valid InternalKey. + compact_pointer_: [String; Config::K_NUM_LEVELS] +} + +// .h line 323 - 393 +pub struct Compaction { + level_: u32, + max_output_file_size_: u64, + input_version_: Version, + edit_: VersionEdit + + // // Each compaction reads inputs from "level_" and "level_+1" + // std::vector inputs_[2]; // The two sets of inputs + // + // // State used to check for number of overlapping grandparent files + // // (parent == level_ + 1, grandparent == level_ + 2) + // std::vector grandparents_; + // size_t grandparent_index_; // Index in grandparent_starts_ + // bool seen_key_; // Some output key has been seen + // int64_t overlapped_bytes_; // Bytes of overlap between current output + // // and grandparent files + // + // // State for implementing IsBaseLevelForKey + // + // // level_ptrs_ holds indices into input_version_->levels_: our state + // // is that we are positioned at one of the file ranges for each + // // higher level than the ones involved in this compaction (i.e. for + // // all L >= level_ + 2). + // size_t level_ptrs_[config::kNumLevels]; +} + +// .h line 68 - 71 +struct GetStats { + seek_file: Rc, + seek_file_level: i32 +} + +// ,cc line 163 +struct LevelFileNumIterator { + icmp_: Rc, + flist_: Vec, + index_: u32, + + // // Backing store for value(). Holds the file number and size. + // mutable char value_buf_[16]; + value_buf_: [u8] +} + +// line 604 +pub struct Builder { + vset_: Rc, + + // VersionSet* ; + // Version* base_; + // LevelState levels_[config::kNumLevels]; + + // a : BTreeSet, +} + +struct BySmallestKey { + internal_comparator: InternalKeyComparator +} + +struct LevelState { + deleted_files: Vec, + + // replace std::set FileSet -> added_files + added_files: BTreeSet +} + +impl Version { + fn new(vset: Rc) -> Self { + todo!() + } + + fn set_next(&mut self, data: &Rc) -> bool { + todo!() + } + + fn set_prev(&mut self, data: &Rc) -> bool { + todo!() + } + + fn get_refs(&self) -> i32 { + self.refs_ + } + + /// todo 等待 Iterator 接口 + /// + /// 通过self.versionSet中的TableCache.NewIterator, 将 self 对象数据追加到 iters 中 + /// + /// Append to *iters a sequence of iterators that will yield the contents of this Version when merged together. + /// + /// REQUIRES: This version has been saved (see VersionSet::SaveTo) + /// + /// # Arguments + /// + /// * `options`: + /// * `iters`: + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn add_iterators(&self, options: ReadOptions, /* mut iters: Vec */) { + todo!() + } + + /// 数据搜索 + /// + /// 一级一级地搜索,因为条目不会跨越级别。如果在较小的级别上发现数据,则后面的级别是不相关的。 + /// + /// # Arguments + /// + /// * `options`: + /// * `key`: + /// * `value`: + /// + /// returns: Result + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn get(&self, options: ReadOptions, key: LookupKey, value: Slice) -> Result { + todo!() + } + + /// Adds "stats" into the current state. + /// Returns true if a new compaction may need to be triggered, false otherwise. + /// + /// REQUIRES: lock is held + /// + /// # Arguments + /// + /// * `stats`: + /// + /// returns: bool + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn update_stats(&mut self, stats: GetStats) -> bool{ + todo!() + } + + /// 记录在指定内部键处读取的字节样本。 + /// 大约每 config::K_READ_BYTES_PERIOD 字节采样一次。如果可能需要触发新的压缩,则返回true + /// + /// Record a sample of bytes read at the specified internal key. + /// Samples are taken approximately once every config::kReadBytesPeriod + /// bytes. Returns true if a new compaction may need to be triggered. + /// + /// REQUIRES: lock is held 要求:锁定 + /// + /// # Arguments + /// + /// * `key`: + /// + /// returns: bool + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn record_read_sample(&self, key: Slice) -> bool { + todo!() + } + + // // + // // Reference count management (so Versions do not disappear out from + // // under live iterators) + // void Ref(); + // void Unref(); + // + // void GetOverlappingInputs( + // int level, + // const InternalKey* begin, // nullptr means before all keys + // const InternalKey* end, // nullptr means after all keys + // std::vector* inputs); + // + // // Returns true iff some file in the specified level overlaps + // // some part of [*smallest_user_key,*largest_user_key]. + // // smallest_user_key==nullptr represents a key smaller than all the DB's keys. + // // largest_user_key==nullptr represents a key largest than all the DB's keys. + // bool OverlapInLevel(int level, + // const Slice* smallest_user_key, + // const Slice* largest_user_key); + // + // // Return the level at which we should place a new memtable compaction + // // result that covers the range [smallest_user_key,largest_user_key]. + // int PickLevelForMemTableOutput(const Slice& smallest_user_key, + // const Slice& largest_user_key); + // + // int NumFiles(int level) const { return files_[level].size(); } + // + // // Return a human readable string that describes this version's contents. + // std::string DebugString() const; + + // Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const; + // + // // Call func(arg, level, f) for every file that overlaps user_key in + // // order from newest to oldest. If an invocation of func returns + // // false, makes no more calls. + // // + // // REQUIRES: user portion of internal_key == user_key. + // void ForEachOverlapping(Slice user_key, Slice internal_key, + // void* arg, + // bool (*func)(void*, int, FileMetaData*)); +} + +impl Drop for Version { + /// version_set.cc 中析构方法 Version::~Version() 的对应实现 + fn drop(&mut self) { + assert_eq!(self.get_refs(), 0); + + // // Remove from linked list + // self.prev_.set_next(&self.next_); + // self.next_.set_prev(&self.prev_); + + // // Drop references to files + // for level in 0..Config::K_NUM_LEVELS { + // for i in 0..self.files_[level].len() { + // let mut meta: FileMetaData = files_[level][i]; + // assert!(meta.get_refs() > 0); + // + // meta.add_refs(-1); + // if meta.get_refs() <= 0 { + // // todo C 语义 delete f + // // delete f; + // } + // } + // } + } +} + +// impl VersionSet { +// //. +// } + +impl VersionSet { + /// 返回文件源数据中最小的索引。 如果文件不存在,则返回文件数量 + /// + /// Return the smallest index i such that files[i]->largest >= key. + /// Return files.size() if there is no such file. + /// + /// REQUIRES: "files" contains a sorted list of non-overlapping files. + /// # Arguments + /// + /// * `icmp`: + /// * `files`: + /// * `key`: + /// + /// returns: u32 + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn find_file(icmp: &InternalKeyComparator, files:&Vec, key:&Slice) -> u32 { + todo!() + } + + /// 如果 user key 范围[smallest_user_key, largest_user_key] 与 “files”中的 [smallest.user_key(), largest.user_key()] 重叠,则返回true + /// minimal==nullptr表示比DB中的所有键都小的键。 + /// maximum==nullptr表示比DB中的所有键都大的键。 + /// + /// Returns true iff some file in "files" overlaps the user key range [smallest_user_key, largest_user_key]. + /// smallest==nullptr represents a key smaller than all keys in the DB. + /// largest==nullptr represents a key largest than all keys in the DB. + /// + /// REQUIRES: If disjoint_sorted_files, files[] contains disjoint ranges + /// in sorted order. + /// + /// # Arguments + /// + /// * `icmp`: + /// * `disjoint_sorted_files`: + /// * `files`: + /// * `smallest_user_key`: + /// * `largest_user_key`: + /// + /// returns: bool + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn some_file_overlaps_range(icmp: &InternalKeyComparator, disjoint_sorted_files:bool, + files:&Vec, smallest_user_key:&Slice,largest_user_key:&Slice) -> bool { + todo!() + } +} + +impl Compaction { + // fn create(options: Options, level: u32) -> Self { + // Self { + // + // } + // } +} + +// todo 等到 Iterator 接口 +// impl Iterator for LevelFileNumIterator { +// +// } + +impl Builder { + +} + +impl BySmallestKey { + + /// FileMetaData 比较 + /// + /// # Arguments + /// + /// * `f1`: + /// * `f2`: + /// + /// returns: bool + /// + /// # Examples + /// + /// ``` + /// + /// ``` + pub fn operator(&self, f1: &FileMetaData, f2: &FileMetaData) -> bool { + // line 607 + let r: u32 = self.internal_comparator.compare_internal_key( + f1.get_smallest(), + f2.get_smallest() + ); + + if r != 0 { + return r < 0; + } + + // Break ties by file number + f1.get_number() < f2.get_number() + } +} \ No newline at end of file diff --git a/src/db/version_set_test.rs b/src/db/version_set_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..44c68c4e8377677941131d584124539c8c27fd11 --- /dev/null +++ b/src/db/version_set_test.rs @@ -0,0 +1,9 @@ + +mod test { + #[test] + fn test_() { + + println!("get_name: {}", "a"); + + } +} \ No newline at end of file diff --git a/src/table/block.rs b/src/table/block.rs new file mode 100644 index 0000000000000000000000000000000000000000..e5592f972f63bfe454acd719ad09dd07643531ce --- /dev/null +++ b/src/table/block.rs @@ -0,0 +1,37 @@ +use std::rc::Rc; +use crate::traits::comparator_trait::Comparator; +use crate::traits::DataIterator; + +use crate::util::Result; + +pub struct Block {} + +impl Block { + /// 获取block的大小 + /// + /// # Examples + /// + /// ``` + /// let block = Block {}; + /// let size = block.size(); + /// ``` + pub fn size(&self) { + todo!() + } + /// 生成迭代器 + /// + /// # Arguments + /// + /// * `_comparator`: 比较器 + /// + /// returns: Result, Status> + /// + /// # Examples + /// + /// ``` + /// + /// ``` + pub fn new_iterator(&self, _comparator: Rc) -> Result> { + todo!() + } +} \ No newline at end of file diff --git a/src/table/block_builder.rs b/src/table/block_builder.rs new file mode 100644 index 0000000000000000000000000000000000000000..a58922a09598e533c26255af1605d7dcd7362acb --- /dev/null +++ b/src/table/block_builder.rs @@ -0,0 +1,66 @@ +use crate::util::slice::Slice; + +use crate::util::Result; + +pub struct BlockBuilder {} + +impl BlockBuilder { + /// 添加数据到block + /// + /// # Arguments + /// + /// * `key`: 键 + /// * `value`: 值 + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// + /// ``` + pub fn add(&mut self, _key: &Slice, _value: &Slice) { + todo!() + } + /// 重置builder + /// + /// # Examples + /// + /// ``` + /// block_builder.reset(); + /// ``` + pub fn reset(&mut self) { + todo!() + } + /// 构造block + /// + /// + /// # Examples + /// + /// ``` + /// let block = block_builder.finish(); + /// ``` + pub fn finish(&mut self) -> Result { + todo!() + } + /// 判断builder是否为空 + /// + /// # Examples + /// + /// ``` + /// let is_empty = block_builder.empty(); + /// ``` + pub fn empty(&self) -> bool { + todo!() + } + /// 估算当前的block大小 + /// + /// # Examples + /// + /// ``` + /// let estimate_size = block_builder.current_size_estimate(); + /// ``` + pub fn current_size_estimate(&self) -> usize { + todo!() + } +} \ No newline at end of file diff --git a/src/table/filter_block.rs b/src/table/filter_block.rs new file mode 100644 index 0000000000000000000000000000000000000000..524c324c4c7c82cbfef98718c4f3617e15104828 --- /dev/null +++ b/src/table/filter_block.rs @@ -0,0 +1,59 @@ +use crate::util::slice::Slice; + +use crate::util::Result; + +pub struct FilterBlockBuilder {} + +impl FilterBlockBuilder { + /// 设置block的起始位置 + /// + /// # Arguments + /// + /// * `_block_offset`: 偏移量 + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// filter_block_builder.start_block(1024_u64); + /// ``` + pub fn start_block(&mut self, _block_offset: u64) { + todo!() + } + + /// 添加key到builder + /// + /// # Arguments + /// + /// * `_key`: 键 + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// + /// ``` + pub fn add_key(&mut self, _key: &Slice) { + todo!() + } + /// 构造filterBlock + /// + /// # Examples + /// + /// ``` + /// filter_block_builder.finish(); + /// ``` + pub fn finish(&mut self) -> Result { + todo!() + } +} + +pub struct FilterBlockReader {} + +impl FilterBlockReader { + pub fn key_may_match(&self, _block_offset: u64, _key: &Slice) -> bool { + todo!() + } +} \ No newline at end of file diff --git a/src/table/format.rs b/src/table/format.rs new file mode 100644 index 0000000000000000000000000000000000000000..01c9610c998bfc3f3532bf7df37e4868de6d1577 --- /dev/null +++ b/src/table/format.rs @@ -0,0 +1,229 @@ +use crate::traits::coding_trait::CodingTrait; +use crate::util::coding; +use crate::util::slice::Slice; +use crate::util::Result; +use crate::util::status::Status; + +/// Maximum encoding length of a BlockHandle +pub const k_max_encoded_length: u32 = 10 + 10; + +/// Encoded length of a Footer. Note that the serialization of a +/// Footer will always occupy exactly this many bytes. It consists +/// of two block handles and a magic number. +pub const k_encoded_length: u32 = 2 * k_max_encoded_length + 8; + +// // kTableMagicNumber was picked by running +// // echo http://code.google.com/p/leveldb/ | sha1sum +// // and taking the leading 64 bits. +// pub const k_table_magic_number: &str = 0xdb4775248b80fb57ull; + +/// 1-byte type + 32-bit crc +pub const k_block_trailer_size: usize = 5; + +pub struct BlockHandle { + // 偏移量 + offset_: u64, + // + size_: u64 +} + +/// Footer encapsulates the fixed information stored at the tail +/// end of every table file. +pub struct Footer { + metaindex_handle_: BlockHandle, + index_handle_: BlockHandle +} + +pub struct BlockContents { + // Actual contents of data + data: Slice, + + // True if data can be cached + cachable: bool, + + // True if caller should delete[] data.data() + heap_allocated:bool, +} + +trait BlockTrait { + /// + /// The offset of the block in the file. + /// + fn offset(&self) -> u64; + + /// + /// set offset + /// # Arguments + /// + /// * `offset`: + /// + fn set_offset(&mut self, offset: u64); + + // The size of the stored block + fn size(&self) -> u64; + + /// + /// set size + /// # Arguments + /// + /// * `size`: + /// + fn set_size(&mut self, size: u64); + + /// + /// 将 Block 对象编码成 Slice + /// + /// # Arguments + /// + /// returns: Slice + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn encode_to(&self) -> Result; + + /// + /// 将 Slice 对象解码后与BlockHandle set field + /// + /// # Arguments + /// * `input`: + /// + /// returns: Result + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn decode_from(&mut self, input: Slice) -> Result<()>; +} + +trait FootTrait { + // The block handle for the metaindex block of the table + fn metaindex_handle(&self) -> BlockHandle; + + fn set_metaindex_handle(&mut self, h: BlockHandle); + + fn index_handle(&self) -> BlockHandle; + + fn set_index_handle(&mut self, h: BlockHandle); + + /// + /// 将 Foot 对象编码成 Slice + /// + /// # Arguments + /// + /// returns: Slice + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn encode_to(&self) -> Result; + + /// + /// 将 Slice 对象解码后与 BlockHandle 比较,是否可以成功 + /// + /// # Arguments + /// * `input`: + /// + /// returns: Result + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn decode_from(&mut self, input: Slice) -> Result<()>; +} + +trait BlockContent { + /// Read the block identified by "handle" from "file". On failure + /// return non-OK. On success fill *result and return OK. + fn read_block(&self, + // todo RandomAccessFile, ReadOptions 未提供 + // file: RandomAccessFile, options: ReadOptions, + handle: BlockHandle + ) -> Result; + +} + +impl BlockTrait for BlockHandle { + fn offset(&self) -> u64 { + self.offset_ + } + + fn set_offset(&mut self, offset: u64) { + self.offset_ = offset; + } + + fn size(&self) -> u64 { + self.size_ + } + + fn set_size(&mut self, size: u64) { + self.size_ = size; + } + + fn encode_to(&self) -> Result { + todo!() + + // // Sanity check that all fields have been set + // assert!(self.offset_ != 0); + // assert!(self.size_ != 0); + // + // let mut buf: [u8; 4] = [0, 0, 0, 0]; + // coding::Coding::put_varint64(&mut buf, 0, &self.offset_); + // + // Slice::default() + } + + fn decode_from(&mut self, input: Slice) -> Result<()> { + todo!() + } +} + +impl Default for BlockHandle { + #[inline] + fn default() -> Self { + BlockHandle { + offset_: 0, + size_: 0, + } + } +} + +impl FootTrait for Footer { + fn metaindex_handle(&self) -> BlockHandle { + todo!() + } + + fn set_metaindex_handle(&mut self, h: BlockHandle) { + todo!() + } + + fn index_handle(&self) -> BlockHandle { + todo!() + } + + fn set_index_handle(&mut self, h: BlockHandle) { + todo!() + } + + fn encode_to(&self) -> Result { + todo!() + } + + fn decode_from(&mut self, input: Slice) -> Result<()> { + todo!() + } +} + +impl BlockContent for BlockContents { + fn read_block(&self, handle: BlockHandle) -> Result { + todo!() + } +} diff --git a/src/table/format_test.rs b/src/table/format_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..e106bee8cbcc8205bb4a200e6d18dfe5f320a017 --- /dev/null +++ b/src/table/format_test.rs @@ -0,0 +1,7 @@ + + +// #################### BlockHandle test +#[test] +fn test_block_handle() { + todo!() +} \ No newline at end of file diff --git a/src/table/iterator_wrapper.rs b/src/table/iterator_wrapper.rs new file mode 100644 index 0000000000000000000000000000000000000000..e9f849327424f6eba489109f08bf2557e3fd2259 --- /dev/null +++ b/src/table/iterator_wrapper.rs @@ -0,0 +1,3 @@ +pub struct IteratorWrapper { + +} \ No newline at end of file diff --git a/src/table/mod.rs b/src/table/mod.rs index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..ade478fc4045f8c85b832af1adda4f74d428e097 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -0,0 +1,8 @@ +pub mod block; +pub mod block_builder; +pub mod filter_block; +pub mod format; +mod format_test; +pub(crate) mod ss_table; +mod ss_table_test; +mod iterator_wrapper; \ No newline at end of file diff --git a/src/table/ss_table.rs b/src/table/ss_table.rs new file mode 100644 index 0000000000000000000000000000000000000000..869c7a708fd72d4566dd5a18f02cb54253f1c437 --- /dev/null +++ b/src/table/ss_table.rs @@ -0,0 +1,4 @@ + +pub struct SSTable { + +} \ No newline at end of file diff --git a/src/table/ss_table_test.rs b/src/table/ss_table_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..bb5f800dbae1937e33f8194db84dc4adb0baa1a6 --- /dev/null +++ b/src/table/ss_table_test.rs @@ -0,0 +1,5 @@ + +#[test] +fn test_ss_table() { + todo!() +} \ No newline at end of file diff --git a/src/traits/coding_trait.rs b/src/traits/coding_trait.rs index ed7badeae4e0d1e743901b356b8725491633ec73..3a0a49e42ebb574d2c92272b3e57f0ff406bf999 100644 --- a/src/traits/coding_trait.rs +++ b/src/traits/coding_trait.rs @@ -16,7 +16,7 @@ pub trait CodingTrait { /// let mut string = String::from("encode:"); /// put_fixed32(&mut string, 65535); /// ``` - fn put_fixed32(dst: &mut [u8], offset: usize, value: &mut u32) -> usize; + fn put_fixed32(dst: &mut [u8], offset: usize, value: u32) -> usize; ///64位定长编码写入字符串 /// /// # Arguments @@ -32,7 +32,7 @@ pub trait CodingTrait { /// let mut string = String::from("encode:"); /// put_fixed64(&mut string, 65535); /// ``` - fn put_fixed64(dst: &mut [u8], offset: usize, value: &mut u64) -> usize; + fn put_fixed64(dst: &mut [u8], offset: usize, value: u64) -> usize; /// 32位变长编码写入字符串 /// /// # Arguments @@ -48,7 +48,7 @@ pub trait CodingTrait { /// let mut string = String::from("encode:"); /// put_varint32(&mut string, 65535); /// ``` - fn put_varint32(dst: &mut [u8], offset: usize, value: &mut u32) -> usize; + fn put_varint32(dst: &mut [u8], offset: usize, value: u32) -> usize; /// 64位变长编码写入字符串 /// /// # Arguments @@ -64,7 +64,7 @@ pub trait CodingTrait { /// let mut string = String::from("encode:"); /// put_varint64(&mut string, 65535); /// ``` - fn put_varint64(dst: &mut [u8], offset: usize, value: &mut u64) -> usize; + fn put_varint64(dst: &mut [u8], offset: usize, value: u64) -> usize; /// 将slice的长度写入目标字符串 /// /// # Arguments @@ -79,7 +79,7 @@ pub trait CodingTrait { /// ``` /// /// ``` - fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value: &mut Slice) -> usize; + fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value: Slice) -> usize; /// 从slice的开头解码一个32位的变长整数, 并将slice的索引置于解码后的位置 /// /// # Arguments @@ -139,7 +139,7 @@ pub trait CodingTrait { /// let value: u32 = 65534; /// let offset =encode_varint32(value, &mut buf, 0); /// ``` - fn encode_varint32(value: &mut u32, buf: &mut [u8], offset: usize) -> usize; + fn encode_varint32(value: u32, buf: &mut [u8], offset: usize) -> usize; /// 变长正整数编码 /// /// # Arguments @@ -157,7 +157,7 @@ pub trait CodingTrait { /// let value: u32 = 65534; /// let offset =encode_varint64(value, &mut buf, 0); /// ``` - fn encode_varint64(value: &mut u64, buf: &mut [u8], offset: usize) -> usize; + fn encode_varint64(value: u64, buf: &mut [u8], offset: usize) -> usize; /// 获取变长编码后的长度 /// /// # Arguments @@ -172,7 +172,7 @@ pub trait CodingTrait { /// /// ``` /// 从slice的开头解码一个32位的变长整数, 并将slice的索引置于解码后的位置 - fn varint_length(value: &mut u64) -> i32; + fn varint_length(value: u64) -> i32; /// 32位定长正整数编码 /// /// # Arguments @@ -190,7 +190,7 @@ pub trait CodingTrait { /// let value: u32 = 65534; /// let offset = Self::encode_fixed32(value, &mut buf, 0); /// ``` - fn encode_fixed32(value: &mut u32, buf: &mut [u8], offset: usize) -> usize; + fn encode_fixed32(value: u32, buf: &mut [u8], offset: usize) -> usize; /// 64位定长正整数编码 /// /// # Arguments @@ -208,7 +208,7 @@ pub trait CodingTrait { /// let value: u64 = 65534; /// let offset = encode_fixed64(value, &mut buf, 0); /// ``` - fn encode_fixed64(value: &mut u64, buf: &mut [u8], offset: usize) -> usize; + fn encode_fixed64(value: u64, buf: &mut [u8], offset: usize) -> usize; /// 32位定长解码 /// /// # Arguments diff --git a/src/traits/comparator_trait.rs b/src/traits/comparator_trait.rs index 47388a276f6f79912f5cbba844eeaf54f882aa61..a7a74c1d637fb4626079b3af33d3caf5420b24a0 100644 --- a/src/traits/comparator_trait.rs +++ b/src/traits/comparator_trait.rs @@ -2,7 +2,7 @@ use std::cmp::Ordering; use crate::util::slice::Slice; /// 比较器 -pub trait ComparatorTrait { +pub trait Comparator { /// Slice 的大小比较, 按字典逐字节序进行比较 /// @@ -29,7 +29,7 @@ pub trait ComparatorTrait { fn compare(&self, a: &Slice, b: &Slice) -> Option; /// 返回comparator的名字 - fn get_name() -> String; + fn get_name(&self) -> String; /// 函数:找到start、limit之间最短的字符串,如“helloworld”和”hellozoomer”之间最短的key可以是”hellox” /// diff --git a/src/traits/filter_policy_trait.rs b/src/traits/filter_policy_trait.rs index 361e5a8446380ecfad26cbe11cad0219795df915..f3e4ad07bdcfb2c33b39b569c527890873dbface 100644 --- a/src/traits/filter_policy_trait.rs +++ b/src/traits/filter_policy_trait.rs @@ -3,30 +3,48 @@ use crate::util::slice::Slice; /// 用于key过滤,可以快速的排除不存在的key pub trait FilterPolicy { + /// /// filter的名字 /// Return the name of this policy. Note that if the filter encoding /// changes in an incompatible way, the name returned by this method /// must be changed. Otherwise, old incompatible filters may be /// passed to methods of this type. - fn name() -> String; + /// + fn name(&self) -> String; - /// 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 - /// 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, - /// 并把根据这些key创建的filter追加返回。 + /// 根据 keys 创建过滤器,并返回 bloom_filter Slice /// /// # Arguments /// - /// * `keys`: - /// * `n`: + /// * `keys`: 创建过滤器的数据清单 /// - /// returns: String + /// returns: bloom_filter Slice /// /// # Examples /// /// ``` + /// let mut keys : Vec = Vec::new(); + /// keys.push(Slice::try_from(String::from("hello")).unwrap()); + /// keys.push(Slice::try_from(String::from("world")).unwrap()); /// + /// let policy = BloomFilterPolicy::new(800); + /// let bloom_filter: Slice = policy.create_filter(keys); /// ``` - fn create_filter(&self, keys: Slice, n: u32) -> String; + fn create_filter(&self, keys: Vec) -> Slice; - fn key_may_match(key: &Slice, filter: &Slice) -> bool; + /// + /// + /// # Arguments + /// + /// * `key`: + /// * `bloom_filter`: + /// + /// returns: bool + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool; } \ No newline at end of file diff --git a/src/traits/iterator.rs b/src/traits/iterator.rs index a985e81ad68b49c971c249bca5fefd5ae1908854..7717574de82560f01aac082d4790953581d0b93c 100644 --- a/src/traits/iterator.rs +++ b/src/traits/iterator.rs @@ -1,21 +1,110 @@ use crate::util::slice::Slice; -pub trait Iterator { +pub trait DataIterator { + /// 检查当前位置是否有效 + /// + /// # Arguments + /// + /// + /// returns: bool + /// + /// # Examples + /// + /// ``` + /// + /// ``` fn valid(&self) -> bool; - + /// 将迭代器定位到开始位置 + /// + /// # Arguments + /// + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// + /// ``` fn seek_to_first(&mut self); - + /// 将迭代器定位到末尾位置 + /// + /// # Arguments + /// + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// + /// ``` fn seek_to_last(&mut self); - + /// 将迭代器定位到指定位置 + /// + /// # Arguments + /// + /// * `target`: 迭代器位置 + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// + /// ``` fn seek(&mut self, target: &Slice); - + /// 定位到下一个元素 + /// + /// # Arguments + /// + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// + /// ``` fn next(&mut self); - + /// 定位到上一个元素 + /// + /// # Arguments + /// + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// + /// ``` fn pre(&mut self); - + /// 获取key值 + /// + /// # Arguments + /// + /// + /// returns: Slice + /// + /// # Examples + /// + /// ``` + /// + /// ``` fn key(&self) -> &Slice; - + /// 获取value值 + /// + /// # Arguments + /// + /// + /// returns: Slice + /// + /// # Examples + /// + /// ``` + /// + /// ``` fn value(&self) -> &Slice; -} -trait AAA {} +} diff --git a/src/traits/mod.rs b/src/traits/mod.rs index ffe5726d898f6ef4ede5a1cbdd9f7f5bc0e14a96..47c662e6d0774ea213fb7838379a7ae1e0073d24 100644 --- a/src/traits/mod.rs +++ b/src/traits/mod.rs @@ -4,3 +4,5 @@ pub mod comparator_trait; pub mod coding_trait; pub mod filter_policy_trait; +use std::rc::Rc; +pub use iterator::DataIterator; diff --git a/src/util/cache.rs b/src/util/cache.rs new file mode 100644 index 0000000000000000000000000000000000000000..8d69c2d2621a2775d5f270a6f922127093127ca1 --- /dev/null +++ b/src/util/cache.rs @@ -0,0 +1,170 @@ +use std::cell::RefCell; +use std::rc::Rc; +use crate::util::slice::Slice; + +use crate::util::Result; + +pub struct Handle {} + +pub struct LRUHandle { + key: Slice, + value: T, + hash: u32, + in_cache: bool, + key_length: usize, + charge: usize, + prev: Option>>>, + next: Option>>>, + next_hash: Option>>, +} + +impl LRUHandle { + pub fn key(&self) -> Slice { + todo!() + } +} + +pub struct HandleTable { + length: usize, +} + +impl HandleTable { + pub fn look_up(&self, _key: &Slice, _hash: u32) -> &LRUHandle { + todo!() + } + + pub fn insert(&mut self, _handle: LRUHandle) -> &LRUHandle { + todo!() + } + + pub fn remove(&mut self, _key: &Slice, _hash: u32) -> LRUHandle { + todo!() + } + + pub fn length(&self) -> usize { + self.length + } + + fn resize(&mut self) { + todo!() + } +} + +pub struct LRUCache { + capacity: usize, + usage: usize, + in_use: LRUHandle, + table: HandleTable, +} + +impl LRUCache { + pub fn set_capacity(&mut self, capacity: usize) { + self.capacity = capacity; + } + + pub fn insert(&mut self, _key: &Slice, _hash: u32, _value: T, _charge: usize) -> &LRUHandle { + todo!() + } + + pub fn look_up(&self, _key: &Slice, _hash: u32) -> &LRUHandle { + todo!() + } + + pub fn release(&mut self, _handle: &LRUHandle) { + todo!() + } + + pub fn erase(&mut self, _key: &Slice, _hash: u32) { + todo!() + } + pub fn prune(&mut self) { + todo!() + } + pub fn total_charge(&self) -> usize { + todo!() + } + + pub fn lru_remove(&mut self, _handle: &LRUHandle) { + todo!() + } + pub fn lru_append(&mut self, _head_of_list: &LRUHandle, _e: LRUHandle) { + todo!() + } + pub fn refer(&self, _e: &LRUHandle) { + todo!() + } + pub fn unref(&self, _e: &LRUHandle) { + todo!() + } +} + +pub trait Cache { + /// 向缓存中插入数据 + /// + /// # Arguments + /// + /// * `key`: 键 + /// * `value`: 值 + /// * `charge`: 长度 + /// * `deleter`: 删除的回调函数 + /// + /// returns: Handle + /// + /// # Examples + /// + /// ``` + /// let element = cache.insert(Slice::from("123"), block, 10, move || {}); + /// ``` + fn insert(&mut self, key: &Slice, value: T, charge: usize, deleter: F) -> Handle + where F: FnOnce(&Slice, T); + + /// 从缓存中读取数据 + /// + /// # Arguments + /// + /// * `key`: 键 + /// + /// returns: Handle + /// + /// # Examples + /// + /// ``` + /// let element = cache.lookup(Slice::from("123")); + /// ``` + fn lookup(&self, key: &Slice) -> Handle; + + /// 从缓存中释放元素 + /// + /// # Arguments + /// + /// * `handle`: 元素 + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// cache.release(element); + /// ``` + fn release(&mut self, handle: Handle); + + /// 从缓存中删除元素 + /// + /// # Arguments + /// + /// * `key`: 键 + /// + /// returns: Result<(), Status> + /// + /// # Examples + /// + /// ``` + /// cache.erase(Slice::from("123"))?; + /// ``` + fn erase(&mut self, key: &Slice) -> Result<()>; + + fn new_id(&self) -> Result; + fn prune(&mut self) -> Result<()>; + fn total_charge(&self) -> usize; + // fn value(&self, key: Handle) -> Handle; +} \ No newline at end of file diff --git a/src/util/cache_test.rs b/src/util/cache_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..dd63ab23658544ff681ceea772298a1e406ea3c7 --- /dev/null +++ b/src/util/cache_test.rs @@ -0,0 +1,6 @@ +mod test { + #[test] + fn test_insert() { + + } +} \ No newline at end of file diff --git a/src/util/coding.rs b/src/util/coding.rs index c1f85c18eb2b9c359c0c2305823cd415c689d3db..3a13b7dd95b7ac2a1c47de34c178dfbb328487bb 100644 --- a/src/util/coding.rs +++ b/src/util/coding.rs @@ -5,27 +5,27 @@ use crate::util::slice::Slice; macro_rules! varint { ($TYPE: ty, $NAME: ident, $SNAME: expr) => { - fn $NAME(value: $TYPE, buf: &mut [u8], mut offset: usize) -> usize { - while *value >= 128 { - buf[offset] = (*value | 128) as u8; + fn $NAME(mut value: $TYPE, buf: &mut [u8], mut offset: usize) -> usize { + while value >= 128 { + buf[offset] = (value | 128) as u8; offset += 1; - *value >>= 7; + value >>= 7; } - buf[offset] = *value as u8; + buf[offset] = value as u8; offset } - }; + }; - ($TYPE: ty, $NAME: ident) => { - varint!( $TYPE, $NAME, stringify!($NAME)); - } + ($TYPE: ty, $NAME: ident) => { + varint!( $TYPE, $NAME, stringify!($NAME)); + } } pub struct Coding {} impl CodingTrait for Coding { - fn put_fixed32(dst: &mut [u8], mut offset: usize, value: &mut u32) -> usize { + fn put_fixed32(dst: &mut [u8], mut offset: usize, value: u32) -> usize { let mut buf: [u8; 4] = [0, 0, 0, 0]; Self::encode_fixed32(value, &mut buf, 0); dst[offset] = buf[0]; @@ -38,7 +38,7 @@ impl CodingTrait for Coding { offset } - fn put_fixed64(dst: &mut [u8], mut offset: usize, value: &mut u64) -> usize { + fn put_fixed64(dst: &mut [u8], mut offset: usize, value: u64) -> usize { let mut buf: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; Self::encode_fixed64(value, &mut buf, 0); dst[offset] = buf[0]; @@ -59,11 +59,11 @@ impl CodingTrait for Coding { offset } - varint!(&mut u32,encode_varint32); + varint!(u32,encode_varint32); - varint!(&mut u64,encode_varint64); + varint!(u64,encode_varint64); - fn put_varint32(dst: &mut [u8], mut offset: usize, value: &mut u32) -> usize { + fn put_varint32(dst: &mut [u8], mut offset: usize, value: u32) -> usize { let mut buf: [u8; 4] = [0, 0, 0, 0]; let var_offset = Self::encode_varint32(value, &mut buf, 0); for i in 0..var_offset { @@ -73,7 +73,7 @@ impl CodingTrait for Coding { offset } - fn put_varint64(dst: &mut [u8], mut offset: usize, value: &mut u64) -> usize { + fn put_varint64(dst: &mut [u8], mut offset: usize, value: u64) -> usize { let mut buf: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; let var_offset = Self::encode_varint64(value, &mut buf, 0); for i in 0..var_offset { @@ -83,8 +83,8 @@ impl CodingTrait for Coding { offset } - fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value: &mut Slice) -> usize { - Self::put_varint64(dst, offset, &mut (value.size() as u64)); + fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value: Slice) -> usize { + Self::put_varint64(dst, offset, value.size() as u64); offset } @@ -133,43 +133,43 @@ impl CodingTrait for Coding { Slice::from_buf(decode.to_le_bytes().as_mut_slice()) } - fn varint_length(value: &mut u64) -> i32 { + fn varint_length(mut value: u64) -> i32 { let mut len = 1; - while *value >= 128 { - *value >>= 7; + while value >= 128 { + value >>= 7; len += 1; } len } - fn encode_fixed32(value: &mut u32, buf: &mut [u8], mut offset: usize) -> usize { - buf[offset] = *value as u8; + fn encode_fixed32(value: u32, buf: &mut [u8], mut offset: usize) -> usize { + buf[offset] = value as u8; offset += 1; - buf[offset] = (*value >> 8) as u8; + buf[offset] = (value >> 8) as u8; offset += 1; - buf[offset] = (*value >> 16) as u8; + buf[offset] = (value >> 16) as u8; offset += 1; - buf[offset] = (*value >> 24) as u8; + buf[offset] = (value >> 24) as u8; offset += 1; offset } - fn encode_fixed64(value: &mut u64, buf: &mut [u8], mut offset: usize) -> usize { - buf[offset] = *value as u8; + fn encode_fixed64(value: u64, buf: &mut [u8], mut offset: usize) -> usize { + buf[offset] = value as u8; offset += 1; - buf[offset] = (*value >> 8) as u8; + buf[offset] = (value >> 8) as u8; offset += 1; - buf[offset] = (*value >> 16) as u8; + buf[offset] = (value >> 16) as u8; offset += 1; - buf[offset] = (*value >> 24) as u8; + buf[offset] = (value >> 24) as u8; offset += 1; - buf[offset] = (*value >> 32) as u8; + buf[offset] = (value >> 32) as u8; offset += 1; - buf[offset] = (*value >> 40) as u8; + buf[offset] = (value >> 40) as u8; offset += 1; - buf[offset] = (*value >> 48) as u8; + buf[offset] = (value >> 48) as u8; offset += 1; - buf[offset] = (*value >> 56) as u8; + buf[offset] = (value >> 56) as u8; offset += 1; offset } @@ -197,11 +197,11 @@ impl CodingTrait for Coding { macro_rules! coding_impl { {$TRAIT: ident, $TYPE: ty, $VAR_NAME: ident, $FIXED_NAME: ident} => { impl $TRAIT for $TYPE { - fn varint(mut self, buf: &mut [u8], offset: usize) -> usize { - Coding::$VAR_NAME (&mut self, buf, offset) + fn varint(self, buf: &mut [u8], offset: usize) -> usize { + Coding::$VAR_NAME (self, buf, offset) } - fn fixedint(mut self, buf: &mut [u8], offset: usize) -> usize { - Coding::$FIXED_NAME (&mut self, buf, offset) + fn fixedint(self, buf: &mut [u8], offset: usize) -> usize { + Coding::$FIXED_NAME (self, buf, offset) } } } diff --git a/src/util/coding_test.rs b/src/util/coding_test.rs index 00104de7f2d15464dc49ba7ca2f720d79de0175b..c924acb41d896f96c9038bb993ff5762e28944c9 100644 --- a/src/util/coding_test.rs +++ b/src/util/coding_test.rs @@ -5,16 +5,16 @@ mod test { #[test] fn test_put_fixed32() { let mut dst = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; - let mut value = 65535; - Coding::put_fixed32(&mut dst, 2, &mut value); + let value = 65535; + Coding::put_fixed32(&mut dst, 2, value); assert_eq!([0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0] as [u8; 12], dst); } #[test] fn test_put_fixed64() { let mut dst = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; - let mut value = 65535; - Coding::put_fixed64(&mut dst, 2, &mut value); + let value = 65535; + Coding::put_fixed64(&mut dst, 2, value); assert_eq!([0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0] as [u8; 12], dst); } @@ -22,7 +22,7 @@ mod test { fn test_put_varint32() { let mut value = 65535; let mut dst = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; - let offset = Coding::put_varint32(&mut dst, 2, &mut value); + let offset = Coding::put_varint32(&mut dst, 2, value); println!("offset:{:?}", offset); assert_eq!(offset, 4); println!("dst:{:?}", dst); @@ -33,7 +33,7 @@ mod test { fn test_put_varint64() { let mut value = 65535; let mut dst = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; - let offset = Coding::put_varint64(&mut dst, 2, &mut value); + let offset = Coding::put_varint64(&mut dst, 2, value); println!("offset:{:?}", offset); assert_eq!(offset, 4); println!("dst:{:?}", dst); @@ -44,7 +44,7 @@ mod test { fn test_encode_varint32() { let mut buf: [u8; 4] = [0, 0, 0, 0]; let mut value: u32 = 65534; - let offset = Coding::encode_varint32(&mut value, &mut buf, 0); + let offset = Coding::encode_varint32(value, &mut buf, 0); println!("offset:{:?}", offset); assert_eq!(offset, 2); println!("buf:{:?}", buf); @@ -55,7 +55,7 @@ mod test { fn test_encode_varint64() { let mut buf: [u8; 4] = [0, 0, 0, 0]; let mut value: u64 = 65535; - let offset = Coding::encode_varint64(&mut value, &mut buf, 0); + let offset = Coding::encode_varint64(value, &mut buf, 0); println!("offset:{:?}", offset); assert_eq!(offset, 2); println!("buf:{:?}", buf); @@ -66,7 +66,7 @@ mod test { fn test_encode_fixed32() { let mut buf: [u8; 4] = [0, 0, 0, 0]; let mut value: u32 = 65534; - let offset = Coding::encode_fixed32(&mut value, &mut buf, 0); + let offset = Coding::encode_fixed32(value, &mut buf, 0); assert_eq!(offset, 4); println!("offset:{:?}", offset); assert_eq!(buf, [254, 255, 0, 0]); @@ -77,7 +77,7 @@ mod test { fn test_encode_fixed64() { let mut buf: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; let mut value: u64 = 65535; - let offset = Coding::encode_fixed64(&mut value, &mut buf, 0); + let offset = Coding::encode_fixed64(value, &mut buf, 0); assert_eq!(offset, 8); println!("offset:{:?}", offset); assert_eq!(buf, [255, 255, 0, 0, 0, 0, 0, 0]); @@ -127,7 +127,7 @@ mod test { #[test] fn test_varint_length() { - let len = Coding::varint_length(&mut (65535 as u64)); + let len = Coding::varint_length( 65535 as u64); println!("len: {:?}", len); assert_eq!(len, 3); } @@ -156,7 +156,7 @@ mod test { fn test_decode_fixed32() { let mut value = 65535_u32; let mut buf: [u8; 4] = [0, 0, 0, 0]; - Coding::encode_fixed32(&mut value, &mut buf, 0); + Coding::encode_fixed32(value, &mut buf, 0); let decode = Coding::decode_fixed32(&mut buf); println!("value:{:?}", value); assert_eq!(decode, value); @@ -166,7 +166,7 @@ mod test { fn test_decode_fixed64() { let mut value = 65535_u64; let mut buf: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; - Coding::encode_fixed64(&mut value, &mut buf, 0); + Coding::encode_fixed64(value, &mut buf, 0); let decode = Coding::decode_fixed64(&mut buf); println!("value:{:?}", value); assert_eq!(decode, value); diff --git a/src/util/comparator.rs b/src/util/comparator.rs index 173c68a6ab8a098de67c6c6c721b40e4eafaa2cb..5159a95788853e3d27565a1ff1e08f2913b65a05 100644 --- a/src/util/comparator.rs +++ b/src/util/comparator.rs @@ -1,6 +1,6 @@ use std::cmp::{min, Ordering}; -use crate::traits::comparator_trait::{ComparatorTrait}; +use crate::traits::comparator_trait::{Comparator}; use crate::util::slice::Slice; pub struct BytewiseComparatorImpl {} @@ -15,13 +15,13 @@ impl Default for BytewiseComparatorImpl { } } -impl ComparatorTrait for BytewiseComparatorImpl { +impl Comparator for BytewiseComparatorImpl { fn compare(&self, a: &Slice, b: &Slice) -> Option { a.partial_cmp(b) } - fn get_name() -> String { + fn get_name(&self) -> String { String::from("leveldb.BytewiseComparator") } @@ -88,34 +88,3 @@ impl ComparatorTrait for BytewiseComparatorImpl { Slice::from_buf(key.as_bytes()).into() } } - -/// InternalKeyComparator -pub struct InternalKeyComparator { - // fn user_comparator(&self) -> Box { - // todo!() - // } - - // fn Compare(InternalKey, InternalKey) -} - -/// InternalKeyComparator 比较器: 用来比较内部键(Internal Key)。 -/// 内部键值是为了方便处理,将原普通键、序列号和值类型组成的新键。 -impl ComparatorTrait for InternalKeyComparator { - // todo InternalKeyComparator 的构造方法 - - fn compare(&self, _a: &Slice, _b: &Slice) -> Option { - todo!() - } - - fn get_name() -> String { - String::from("leveldb.InternalKeyComparator") - } - - fn find_shortest_separator(&self, _start: &String, _limit: &Slice) -> String { - todo!() - } - - fn find_short_successor(&self, _key: &String) -> String { - todo!() - } -} diff --git a/src/util/comparator_test.rs b/src/util/comparator_test.rs index a9b28072864bafda6df6746c3cf8e86916aea592..e1ad1b62b0e57ea8971d2b56564c216ccaf9e208 100644 --- a/src/util/comparator_test.rs +++ b/src/util/comparator_test.rs @@ -2,13 +2,14 @@ mod test { use std::cmp::Ordering; use std::io::Write; - use crate::traits::comparator_trait::ComparatorTrait; - use crate::util::comparator::{BytewiseComparatorImpl, InternalKeyComparator}; + use crate::traits::comparator_trait::Comparator; + use crate::util::comparator::{BytewiseComparatorImpl}; use crate::util::slice::Slice; + // ##################### BytewiseComparatorImpl test #[test] fn test_bytewise_comparator_impl_get_name() { - let name = BytewiseComparatorImpl::get_name(); + let name = BytewiseComparatorImpl::default().get_name(); println!("get_name: {}", &name); assert_eq!("leveldb.BytewiseComparator", name); } @@ -19,7 +20,7 @@ mod test { let option_val = comp.compare(&Slice::from("a"), &Slice::from("ab")); assert_eq!(option_val.unwrap(), Ordering::Less); - // todo Slice 存在 bug 未修复 + // // todo Slice 存在 bug 未修复 // let comp = BytewiseComparatorImpl::default(); // let option_val = comp.compare(&Slice::from("b"), &Slice::from("abcd")); // assert_eq!(option_val.unwrap(), Ordering::Greater); @@ -122,10 +123,4 @@ mod test { String::from(Slice::from_buf(expect_u8_max_vec.as_slice()))); } - #[test] - fn test_internal_key_comparator_get_name() { - let name = InternalKeyComparator::get_name(); - assert_eq!("leveldb.InternalKeyComparator", name); - } - } \ No newline at end of file diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs index 46303bff56dcac8119142fa997a24a70919264c5..3ba1ac8cd58dd096b0d497859885db96c87999fd 100644 --- a/src/util/filter_policy.rs +++ b/src/util/filter_policy.rs @@ -1,33 +1,39 @@ -use std::ops::Mul; +use std::ops::{BitOr, Mul, Shl}; use crate::traits::filter_policy_trait::{FilterPolicy}; use crate::util::hash::{Hash, ToHash}; +use crate::util::r#const::HASH_DEFAULT_SEED; use crate::util::slice::Slice; pub trait FromPolicy { fn from_bits_per_key(&self) -> usize; + fn from_k(&self) -> usize; } -pub struct BloomFilterPolicy { - bits_per_key: usize, - k: usize +/// 其他成员的语义扩展 +pub trait AsBloomHash { + #[inline] + fn bloom_hash(&self) -> u32; } -impl<'a> BloomFilterPolicy { - pub fn bloom_hash(key: Slice) -> u32 { - key.to_hash_with_seed(0xbc9f1d34) +/// 实现了 Slice 转 bloom_hash 的特质 +/// Sample: +/// ``` +/// let val = "aabbccd"; +/// let slice: Slice = Slice::from_buf(val.as_bytes()); +/// let hash_val = slice.bloom_hash(); +/// ``` +impl AsBloomHash for Slice { + #[inline] + fn bloom_hash(&self) -> u32 { + BloomFilterPolicy::bloom_hash(self) } } -/// get struct BloomFilterPolicy 属性 -impl FromPolicy for BloomFilterPolicy { - fn from_bits_per_key(&self) -> usize { - self.bits_per_key - } - - fn from_k(&self) -> usize { - self.k - } +// ######################### BloomFilterPolicy +pub struct BloomFilterPolicy { + bits_per_key: usize, + k: usize } impl BloomFilterPolicy { @@ -51,19 +57,37 @@ impl BloomFilterPolicy { } } +impl<'a> BloomFilterPolicy { + pub fn bloom_hash(key: &Slice) -> u32 { + key.to_hash_with_seed(HASH_DEFAULT_SEED) + } +} + +/// get struct BloomFilterPolicy 属性 +impl FromPolicy for BloomFilterPolicy { + fn from_bits_per_key(&self) -> usize { + self.bits_per_key + } + + fn from_k(&self) -> usize { + self.k + } +} + +// dyn FilterPolicy + FromPolicy impl FilterPolicy for BloomFilterPolicy { - fn name() -> String { + fn name(&self) -> String { String::from("leveldb.BuiltinBloomFilter2") } - fn create_filter(&self, keys: Slice, n: usize) -> String { - // 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 - // 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, 并把根据这些key创建的filter追加到 dst中。 + fn create_filter(&self, keys: Vec) -> Slice { + let n: usize = keys.len(); + let mut bits: usize = n * self.bits_per_key; - // For small n, we can see a very high false positive rate. Fix it - // by enforcing a minimum bloom filter length. + // For small n, we can see a very high false positive rate. + // Fix it by enforcing a minimum bloom filter length. if bits < 64 { bits = 64; } @@ -71,9 +95,88 @@ impl FilterPolicy for BloomFilterPolicy { let bytes: usize = (bits + 7) / 8; bits = bytes * 8; + let mut dst_chars: Vec = vec![0; bytes + 1]; + dst_chars[bytes] = self.k as u8; + + for i in 0..n { + let slice = keys.get(i).unwrap(); + + let mut h : u32 = slice.bloom_hash(); + let delta : u32 = (h >> 17) | (h << 15); + + for j in 0..self.k { + let bitpos:usize = ((h as usize) % bits); + + // a |= b --> 按位或, 后赋值给a + let position: usize = bitpos / 8; + let mod_val: usize = bitpos % 8; + let val = (1 as u8).wrapping_shl(mod_val as u32); + + dst_chars[position] |= val; + + h = h.wrapping_add(delta); + } + } + + // Vec 转 Slice + Slice::from_buf(&dst_chars) + } + + fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { + let filter_size: usize = bloom_filter.size(); + if filter_size < 2 { + return false; + } + + let bloom_filter_array:Vec = bloom_filter.to_vec(); + let bits: usize = (filter_size - 1) * 8; + + // Use the encoded k so that we can read filters generated by bloom filters created using different parameters. + let k: u8 = bloom_filter_array[filter_size - 1]; + if k > 30 { + // Reserved for potentially new encodings for short bloom filters. Consider it a match. + return true; + } + + let mut h : u32 = key.bloom_hash(); + // Rotate right 17 bits + let delta = (h >> 17) | (h << 15); + + for j in 0..k { + let bitpos:usize = ((h as usize) % bits); + if (bloom_filter_array[bitpos/8] & (1 << (bitpos % 8))) == 0 { + return false; + } + + h = h.wrapping_add(delta); + } + + return true; + } +} + +// ######################### InternalFilterPolicy +pub struct InternalFilterPolicy { + user_policy_: dyn FilterPolicy +} + +impl InternalFilterPolicy { + fn new(policy: Box) -> Box { + // InternalFilterPolicy{ user_policy_: policy } + todo!() + } +} + +impl FilterPolicy for InternalFilterPolicy { + fn name(&self) -> String { + todo!() + } + + fn create_filter(&self, keys: Vec) -> Slice { + todo!() } - fn key_may_match(key: &Slice, filter: &Slice) -> bool { + fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { todo!() } } \ No newline at end of file diff --git a/src/util/filter_policy_test.rs b/src/util/filter_policy_test.rs index e3247fe2b019cab3dafba7f070b83553926e7a13..e84ee121de60da5aa508025477dcad7d8ab595bd 100644 --- a/src/util/filter_policy_test.rs +++ b/src/util/filter_policy_test.rs @@ -1,15 +1,19 @@ use std::ptr::null; use crate::traits::filter_policy_trait::FilterPolicy; use crate::util::bloom_filter; -use crate::util::filter_policy::{BloomFilterPolicy, FromPolicy}; +use crate::util::filter_policy::{AsBloomHash, BloomFilterPolicy, FromPolicy}; +use crate::util::hash::ToHash; use crate::util::slice::Slice; +// #################### BloomFilterPolicy test #[test] fn test_bloom_hash() { let val = "aabbccd"; let slice: Slice = Slice::from_buf(val.as_bytes()); - let hash_val = BloomFilterPolicy::bloom_hash(slice); + let hash_val = BloomFilterPolicy::bloom_hash(&slice); + let hash_val_1 = slice.bloom_hash(); + assert_eq!(hash_val, hash_val_1); assert_eq!(hash_val, 2085241752); } @@ -22,12 +26,41 @@ fn test_new() { let bloom_filter = BloomFilterPolicy::new(800); assert_eq!(bloom_filter.from_bits_per_key(), 800); assert_eq!(bloom_filter.from_k(), 30); - } -// #[test] -// fn test_create_filter() { -// let bloom_filter: BloomFilterPolicy = BloomFilterPolicy::create_filter(8); -// println!("{}", "aa") -// -// } \ No newline at end of file +// #################### FilterPolicy test +#[test] +fn test_create_filter() { + let policy = BloomFilterPolicy::new(800); + + let mut keys : Vec = Vec::new(); + keys.push(Slice::try_from(String::from("hello")).unwrap()); + keys.push(Slice::try_from(String::from("world")).unwrap()); + + let bloom_filter: Slice = policy.create_filter(keys); + + let mut key_may_match = policy.key_may_match( + &Slice::try_from(String::from("hello")).unwrap(), + &bloom_filter); + assert!(key_may_match); + + key_may_match = policy.key_may_match(&Slice::try_from(String::from("world")).unwrap(), + &bloom_filter); + assert!(key_may_match); + + let mut key_not_match = policy.key_may_match(&Slice::try_from(String::from("x")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + key_not_match = policy.key_may_match(&Slice::try_from(String::from("helloworld")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + key_not_match = policy.key_may_match(&Slice::try_from(String::from("hello world")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + key_not_match = policy.key_may_match(&Slice::try_from(String::from("foo")).unwrap(), + &bloom_filter); + assert!(!key_not_match); +} \ No newline at end of file diff --git a/src/util/hash.rs b/src/util/hash.rs index 7d111c7b063c857432121bf8cdf5ceeb635505ba..2b2e0ba316ec83c1b6cad637debd678457a472bc 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -147,6 +147,7 @@ impl Hash { h = h.wrapping_add(w); // h *= m h = h.wrapping_mul(murmur_hash); + // Rust的位运算符包括:按位取反(!)、按位与(&)、按位或(|)、按位异或(^)、左移(<<)、右移(>>) // ^ 按位异或 bitxor , >> 右移位 shr, << 左移位 shl // h ^= (h >> 16) == h ^= h.shr(16); h = h.bitxor(h.wrapping_shr(16)); diff --git a/src/util/linked_list.rs b/src/util/linked_list.rs new file mode 100644 index 0000000000000000000000000000000000000000..a97ac60e505f4b7e64daa58ec292a3c1079f92d5 --- /dev/null +++ b/src/util/linked_list.rs @@ -0,0 +1,429 @@ +use std::fmt::{Display, Formatter}; +use std::marker::PhantomData; +use std::ptr::NonNull; +use crate::util::Result; +use crate::util::slice::Slice; +use crate::util::status::{LevelError, Status}; + +/// 节点 +#[derive(Debug)] +struct Node { + // 节点值. 如果存在这个 Node,则该Node中 val 必定是有值的; 如果 val 的值为空,则这个 Node 就应该为 None + val: T, + // 前驱 + // 因为会出现一个节点同时存在多个可变引用的情况,因此需要使用裸指针(裸指针的包装 NonNull) + prev: Option>>, + // 后继. Option 表示该节点为空,即不存在 prev 前置节点(整个链表为空时)、或不存在next 后置节点(链表的尾节点) + next: Option>>, +} + +/// 双向链表 +#[derive(Debug)] +pub struct LinkedList { + // 双向链表的当前长度 + length: usize, + // 头 + head: Option>>, + // 尾 + tail: Option>>, +} + +pub trait LinkedListBuilder: Default { + /// 构造函数, 构造空的双向链表 + fn new() -> Self; + + fn length(&self) -> usize; + + /// 链表末尾添加元素 + /// + /// 等同于 add_last + /// + /// # Arguments + /// + /// * `val`: 插入的元素 + /// + /// # Examples + /// + /// ``` + /// use std::collections::LinkedList; + /// + /// let mut list: LinkedList = LinkedList::new(); + /// list.add(8); + /// list.add(9); + /// ``` + fn add(&mut self, val: T); + + /// 在链表头部压入元素,等同于 add_first + /// 在将一个元素压入双向链表时,需要注意:我们需要获取元素完整的所有权 + /// + /// # Arguments + /// + /// * `val`: 插入数据 + /// + /// returns: () + /// + /// # Examples + /// + /// ``` + /// use std::collections::LinkedList; + /// + /// let mut list: LinkedList = LinkedList::new(); + /// list.push(8); + /// list.push(9); + /// ``` + fn push(&mut self, val: T); + + /// 元素添加到头部,返回是否成功,成功为 true,失败为 false。 + /// + /// # Arguments + /// + /// * `val`: 插入数据 + /// + /// returns: Result + /// + /// # Examples + /// + /// ``` + /// use std::collections::LinkedList; + /// + /// let mut list: LinkedList = LinkedList::new(); + /// list.add_first(8).unwrap(); + /// list.add_first(9).unwrap(); + /// ``` + fn add_first(&mut self, val: T) -> Result; + + /// 在列表结尾添加元素,返回是否成功,成功为 true,失败为 false。 + fn add_last(&mut self, val: T) -> Result; + + /// 向指定位置插入元素。 + /// + /// # Arguments + /// + /// * `position`: 插入位置。 从0开始,小于等于 len + /// * `data`: 插入的数据 + /// + /// returns: Result + /// + /// # Examples + /// + /// ``` + /// use std::collections::LinkedList; + /// + /// let mut list_link: LinkedList = LinkedList::new(); + /// for i in 0..10 { + /// list_link.add_last(i).unwrap(); + /// } + /// // list_link.add_by_position(0, 100).expect("panic message"); + /// // list_link.add_by_position(7, 100).expect("panic message"); + /// list_link.add_by_position(10, 100).expect("panic message"); + /// ``` + fn add_by_position(&mut self, position: usize, data: T) -> Result; + + /// 删除并返回第一个元素。 + fn remove_first(&mut self) -> Result>; + + /// 删除并返回最后一个元素。 + fn remove_last(&mut self) -> Result>; + + /// 删除指定位置的元素并返回。 + fn remove(&mut self, position: usize) -> Result>; + // public boolean remove(Object o) 删除某一元素,返回是否成功,成功为 true,失败为 false。 + + /// 获取列表开头的元素 + fn get_first(&self) -> Result>; + + /// 获取列表结尾的元素 + fn get_last(&self) -> Result>; + + /// 得到第 position 元素, 不可变引用类型 + /// + /// # Arguments + /// + /// * `position`: 插入位置。 从0开始,小于 len + /// + /// returns: Result, Status> + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn get(&self, position: usize) -> Result>; + + /// 得到第 position 元素, 返回可变引用类型 + fn get_mut(&self, position: usize) -> Result>; + + // jdk LinkedList 中的方法: + // public E set(int index, E element) replace, 用指定的元素替换此列表中指定位置的元素。 + // public boolean addAll(Collection c) 将一个集合的所有元素添加到链表后面,返回是否成功,成功为 true,失败为 false。 + // public boolean addAll(int index, Collection c) 将一个集合的所有元素添加到链表的指定位置后面,返回是否成功,成功为 true,失败为 false。 + + // public void clear() 清空链表。 + // public boolean removeAll(Collection c) 删除此集合的所有元素,这些元素也包含在指定的集合中(可选操作)。 此调用返回后,此集合将不包含与指定集合相同的元素。 + // public boolean retainAll(Collection c) 仅保留此集合中包含在指定集合中的元素(可选操作)。 换句话说,从此集合中删除未包含在指定集合中的所有元素。 + // public String toString() 返回此集合的字符串表示形式。 字符串表示由一个集合元素的列表组成,它们的迭代器返回它们的顺序,用方括号括起来( "[]" )。 相邻元素由字符", " (逗号和空格)分隔。 元素将转换为字符串,如String.valueOf(Object)所示 。 + + // public boolean offer(E e) 向链表末尾添加元素,返回是否成功,成功为 true,失败为 false。 + // public boolean offerFirst(E e) 头部插入元素,返回是否成功,成功为 true,失败为 false。 + // public boolean offerLast(E e) 尾部插入元素,返回是否成功,成功为 true,失败为 false。 + + // public E poll() 删除并返回第一个元素。 + // public E pollFirst() 检索并删除此列表的第一个元素,如果此列表为空,则返回 null 。 + // public E pollLast() 检索并删除此列表的最后一个元素,如果此列表为空,则返回 null 。 + + // public E pop() 弹出此列表所代表的堆栈中的元素。(将元素从链表中删除,并且返回) + // public E popFirst() + // public E popLast() + + // public E element() 返回第一个元素。 + // public E peek() 返回第一个元素。不可变引用类型 + // public E peek_mut() 返回第一个元素。可变引用类型 + // public E peekFirst() 返回头部元素。 + // public E peekLast() 返回尾部元素。 + + // public Iterator descendingIterator() 返回倒序迭代器。 + // public ListIterator listIterator(int index) 返回从指定位置开始到末尾的迭代器。 + // public Object[] toArray() 返回一个由链表元素组成的数组。 + // public T[] toArray(T[] a) 返回一个由链表元素转换类型而成的数组。 + + // public boolean isEmpty() 如果此集合不包含任何元素,则返回 true 。 + // public boolean contains(Object o) 判断是否含有某一元素。 + + // public int indexOf(Object o) 查找指定元素从前往后第一次出现的索引。 + // public int lastIndexOf(Object o) 查找指定元素最后一次出现的索引。 +} + +impl Node { + fn new(val: T) -> Node { + Node { + val, + prev: None, + next: None, + } + } + + /// 用于将 Box 中的 Node 转为含有所有权的 T 类型 + fn into_val(self: Box) -> T { + self.val + } +} + +impl Default for LinkedList { + /// 构造空的双向链表 + fn default() -> Self { + Self::new() + } +} + +impl LinkedListBuilder for LinkedList { + fn new() -> Self { + Self { + length: 0, + head: None, + tail: None, + } + } + + fn length(&self) -> usize { + self.length + } + + fn add(&mut self, val: T) { + self.add_last(val).unwrap(); + } + + fn push(&mut self, val: T) { + self.add_first(val).unwrap(); + } + + fn add_first(&mut self, val: T) -> Result { + // 使用入参中的 val 创建一个链表节点Node,为了方便后续直接从 Box 获取到 raw ptr 裸指针, 使用 Box 包装 + let mut node = Box::new(Node::new(val)); + + node.next = self.head; + // 新节点node 的上一个元素是空 + node.prev = None; + + // 使用 Box::into_raw(node),将 node 转为裸指针 + let node = NonNull::new(Box::into_raw(node)); + + // 判断当前链表的头节点是否为空 + match self.head { + // 如果为空,则将链表的尾节点指向这个新节点 + None => self.tail = node, + // 如果头节点不为空,则需要将当前链表头节点的前一个元素赋值为新的节点 + Some(head) => unsafe { (*head.as_ptr()).prev = node }, + } + + // 将新的节点设为链表的头节点,链表长度加一 + self.head = node; + self.length += 1; + + Ok(true) + } + + fn add_last(&mut self, val: T) -> Result { + let mut node = Box::new(Node::new(val)); + + node.next = None; + node.prev = self.tail; + let node = NonNull::new(Box::into_raw(node)); + + match self.tail { + None => self.head = node, + Some(tail) => unsafe { (*tail.as_ptr()).next = node }, + } + + self.tail = node; + self.length += 1; + + Ok(true) + } + + fn add_by_position(&mut self, position: usize, data: T) -> Result { + let len = self.length; + + if position > len { + return Err(Status::wrapper(LevelError::KInvalidArgument, + Slice::from(String::from("IndexOutOfRange")))); + } + + if position == 0 { + return self.add_first(data); + } else if position == len { + return self.add_last(data); + } + + unsafe { + // Create Node + let mut spliced_node = Box::new(Node::new(data)); + + let before_node = self.find_by_position_mut(position - 1)?; + let after_node = before_node.unwrap().as_mut().next; + spliced_node.prev = before_node; + spliced_node.next = after_node; + let spliced_node = NonNull::new(Box::into_raw(spliced_node)); + + // Insert Node + before_node.unwrap().as_mut().next = spliced_node; + after_node.unwrap().as_mut().prev = spliced_node; + } + + self.length += 1; + + Ok(true) + } + + fn remove_first(&mut self) -> Result> { + todo!() + } + + fn remove_last(&mut self) -> Result> { + todo!() + } + + fn remove(&mut self, position: usize) -> Result> { + todo!() + } + + fn get_first(&self) -> Result> { + todo!() + } + + fn get_last(&self) -> Result> { + todo!() + } + + fn get(&self, position: usize) -> Result> { + let len = self.length; + + if position >= len { + return Err(Status::wrapper(LevelError::KInvalidArgument, + Slice::from(String::from("IndexOutOfRange")))); + } + + // Iterate towards the node at the given index, either from the start or the end, + // depending on which would be faster. + let offset_from_end = len - position - 1; + let mut cur; + if position <= offset_from_end { + // Head to Tail + cur = self.head; + for _ in 0..position { + match cur.take() { + None => { + cur = self.head; + } + Some(current) => unsafe { + cur = current.as_ref().next; + }, + } + } + } else { + // Tail to Head + cur = self.tail; + for _ in 0..offset_from_end { + match cur.take() { + None => { + cur = self.tail; + } + Some(current) => unsafe { + cur = current.as_ref().prev; + }, + } + } + } + + unsafe { Ok(cur.as_ref().map(|node| &node.as_ref().val)) } + } + + fn get_mut(&self, position: usize) -> Result> { + let mut cur = self.find_by_position_mut(position)?; + unsafe { Ok(cur.as_mut().map(|node| &mut node.as_mut().val)) } + } +} + +impl LinkedList { + fn find_by_position_mut(&self, position: usize) -> Result>>> { + let len = self.length; + + if position >= len { + return Err(Status::wrapper(LevelError::KInvalidArgument, + Slice::from(String::from("IndexOutOfRange")))); + } + + // Iterate towards the node at the given index, either from the start or the end, + // depending on which would be faster. + let offset_from_end = len - position - 1; + let mut cur; + if position <= offset_from_end { + // Head to Tail + cur = self.head; + for _ in 0..position { + match cur.take() { + None => { + cur = self.head; + } + Some(current) => unsafe { + cur = current.as_ref().next; + }, + } + } + } else { + // Tail to Head + cur = self.tail; + for _ in 0..offset_from_end { + match cur.take() { + None => { + cur = self.tail; + } + Some(current) => unsafe { + cur = current.as_ref().prev; + }, + } + } + } + + Ok(cur) + } +} diff --git a/src/util/linked_list_test.rs b/src/util/linked_list_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..8eb30c30b919ae71977a632d80a27257b1427dc4 --- /dev/null +++ b/src/util/linked_list_test.rs @@ -0,0 +1,138 @@ + +mod test { + use std::borrow::Borrow; + use crate::util::linked_list::{LinkedList, LinkedListBuilder}; + use crate::util::slice::Slice; + + #[test] + fn test_() { + let list_i32: LinkedList = LinkedList::default(); + let list_i32_1: LinkedList = LinkedList::new(); + println!("linked list_i32: {:#?}", list_i32); + println!("linked list_i32_1: {:#?}", list_i32_1); + + let mut list:LinkedList = LinkedList::new(); + + println!("linked list: {:#?}", list); + } + + #[test] + fn test_push() { + let mut list: LinkedList = LinkedList::new(); + list.push(8); + list.push(9); + println!("linked list: {:#?}", list); + + assert_eq!(list.length(), 2); + + let first = list.get(0).unwrap().unwrap(); + assert!(first.eq(9.borrow())); + + let second = list.get(1).unwrap().unwrap(); + assert!(second.eq(8.borrow())); + } + + #[test] + fn test_add_first() { + let mut list: LinkedList = LinkedList::new(); + list.add_first(8).unwrap(); + list.add_first(9).unwrap(); + println!("linked list: {:#?}", list); + + assert_eq!(list.length(), 2); + + let first = list.get(0).unwrap().unwrap(); + assert!(first.eq(9.borrow())); + + let second = list.get(1).unwrap().unwrap(); + assert!(second.eq(8.borrow())); + } + + #[test] + fn test_add() { + let mut list: LinkedList = LinkedList::new(); + list.add(8); + list.add(9); + println!("linked list: {:#?}", list); + + assert_eq!(list.length(), 2); + + let first = list.get(0).unwrap().unwrap(); + assert!(first.eq(8.borrow())); + + let second = list.get(1).unwrap().unwrap(); + assert!(second.eq(9.borrow())); + } + + #[test] + fn test_add_last() { + let mut list: LinkedList = LinkedList::new(); + list.add_last(8).unwrap(); + list.add_last(9).unwrap(); + println!("linked list: {:#?}", list); + + assert_eq!(list.length(), 2); + + let first = list.get(0).unwrap().unwrap(); + assert!(first.eq(8.borrow())); + + let second = list.get(1).unwrap().unwrap(); + assert!(second.eq(9.borrow())); + } + + #[test] + fn test_add_by_position() { + // 头部添加 + let mut list_first: LinkedList = LinkedList::new(); + for i in 0..10 { + list_first.add_last(i).unwrap(); + } + list_first.add_by_position(0, 100).expect("TODO: panic message"); + assert_eq!(list_first.length(), 11); + + let first = list_first.get(0).unwrap().unwrap(); + assert!(first.eq(100.borrow())); + let second = list_first.get(1).unwrap().unwrap(); + assert!(second.eq(0.borrow())); + + // 尾部添加 + let mut list_second: LinkedList = LinkedList::new(); + for i in 0..10 { + list_second.add_last(i).unwrap(); + } + list_second.add_by_position(10, 100).expect("TODO: panic message"); + assert_eq!(list_second.length(), 11); + + let first = list_second.get(0).unwrap().unwrap(); + assert!(first.eq(0.borrow())); + let mut second = list_second.get(1).unwrap().unwrap(); + assert!(second.eq(1.borrow())); + second = list_second.get(9).unwrap().unwrap(); + assert!(second.eq(9.borrow())); + let thrid = list_second.get(10).unwrap().unwrap(); + assert!(thrid.eq(100.borrow())); + let thrid1 = list_second.get(list_second.length()-1).unwrap().unwrap(); + assert!(thrid1.eq(100.borrow())); + + // 中间位置添加 + let mut list_random: LinkedList = LinkedList::new(); + for i in 0..10 { + list_random.add_last(i).unwrap(); + } + list_random.add_by_position(7, 100).expect("TODO: panic message"); + assert_eq!(list_random.length(), 11); + + let first = list_random.get(0).unwrap().unwrap(); + assert!(first.eq(0.borrow())); + let second = list_random.get(1).unwrap().unwrap(); + assert!(second.eq(1.borrow())); + let thrid = list_random.get(7).unwrap().unwrap(); + assert!(thrid.eq(100.borrow())); + let dt = list_random.get(8).unwrap().unwrap(); + assert!(dt.eq(7.borrow())); + let mut fo = list_random.get(10).unwrap().unwrap(); + assert!(fo.eq(9.borrow())); + fo = list_random.get(list_random.length()-1).unwrap().unwrap(); + assert!(fo.eq(9.borrow())); + } +} \ No newline at end of file diff --git a/src/util/mod.rs b/src/util/mod.rs index cb17c906aae7e8102a3beda4c8cc2dee0d572ff7..3291ada92977f5b76f9281e8d6ab7940be6adf22 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,7 +1,9 @@ +use std::rc::Rc; use std::result; pub use arena::Arena; +use crate::util::comparator::{BytewiseComparatorImpl}; use crate::util::status::Status; /// 常量定义 @@ -9,6 +11,8 @@ pub mod r#const; pub mod slice; mod slice_test; +pub mod cache; +mod cache_test; pub mod coding; mod coding_test; pub mod arena; @@ -25,12 +29,17 @@ mod bloom_filter_test; pub mod filter_policy; mod filter_policy_test; -/// 定义别名 -pub type Result = result::Result; - pub mod histogram; mod histogram_test; mod hash; mod hash_test; mod mutex_lock; mod mutex_lock_test; +pub mod random; +mod random_test; +pub mod options; +pub mod linked_list; +mod linked_list_test; + +/// 定义别名 +pub type Result = result::Result; diff --git a/src/util/options.rs b/src/util/options.rs new file mode 100644 index 0000000000000000000000000000000000000000..a63c9f675ae203452e3f73603a0f13cdb4024253 --- /dev/null +++ b/src/util/options.rs @@ -0,0 +1,169 @@ +use crate::db::db::Snapshot; +use crate::traits::comparator_trait::Comparator; +use crate::util::comparator::BytewiseComparatorImpl; + +pub enum CompressionType { + NoCompression, + SnappyCompression +} + +/// TODO temp +pub struct Env {} + +pub struct Cache {} + +pub struct FilterPolicy {} + +pub struct Options { + + /// Comparator used to define the order of keys in the table. + /// Default: a comparator that uses lexicographic byte-wise ordering + /// + /// REQUIRES: The client must ensure that the comparator supplied + /// here has the same name and orders keys *exactly* the same as the + /// comparator provided to previous open calls on the same DB. + pub cmp: Box, + /// If true, the database will be created if it is missing. + pub create_if_missing: bool, + /// If true, an error is raised if the database already exists. + pub error_if_exists: bool, + /// If true, the implementation will do aggressive checking of the + /// data it is processing and will stop early if it detects any + /// errors. This may have unforeseen ramifications: for example, a + /// corruption of one DB entry may cause a large number of entries to + /// become unreadable or for the entire DB to become unopenable. + pub paranoid_checks: bool, + /// Use the specified object to interact with the environment, + /// e.g. to read/write files, schedule background work, etc. + /// Default: Env::Default() + pub env: Env, + + /// Amount of data to build up in memory (backed by an unsorted log + /// on disk) before converting to a sorted on-disk file. + /// /// Larger values increase performance, especially during bulk loads. + /// Up to two write buffers may be held in memory at the same time, + /// so you may wish to adjust this parameter to control memory usage. + /// Also, a larger write buffer will result in a longer recovery time + /// the next time the database is opened. + pub write_buffer_size: usize, + /// Number of open files that can be used by the DB. You may need to + /// increase this if your database has a large working set (budget + /// one open file per 2MB of working set). + pub max_open_files: u32, + /// Control over blocks (user data is stored in a set of blocks, and + /// a block is the unit of reading from disk). + + /// If non-null, use the specified cache for blocks. + /// If null, leveldb will automatically create and use an 8MB internal cache. + pub block_cache: Option, + + /// Approximate size of user data packed per block. Note that the + /// block size specified here corresponds to uncompressed data. The + /// actual size of the unit read from disk may be smaller if + /// compression is enabled. This parameter can be changed dynamically. + pub block_size: usize, + /// Number of keys between restart points for delta encoding of keys. + /// This parameter can be changed dynamically. Most clients should + /// leave this parameter alone. + pub block_restart_interval: u32, + /// Leveldb will write up to this amount of bytes to a file before + /// switching to a new one. + /// Most clients should leave this parameter alone. However if your + /// filesystem is more efficient with larger files, you could + /// consider increasing the value. The downside will be longer + /// compactions and hence longer latency/performance hiccups. + /// Another reason to increase this parameter might be when you are + /// initially populating a large database. + pub max_file_size: usize, + /// Compress blocks using the specified compression algorithm. This + /// parameter can be changed dynamically. + /// /// Default: kSnappyCompression, which gives lightweight but fast + /// compression. + /// /// Typical speeds of kSnappyCompression on an Intel(R) Core(TM)2 2.4GHz: + /// ~200-500MB/s compression + /// ~400-800MB/s decompression + /// Note that these speeds are significantly faster than most + /// persistent storage speeds, and therefore it is typically never + /// worth switching to kNoCompression. Even if the input data is + /// incompressible, the kSnappyCompression implementation will + /// efficiently detect that and will switch to uncompressed mode. + pub compression: CompressionType, + /// EXPERIMENTAL: If true, append to existing MANIFEST and log files + /// when a database is opened. This can significantly speed up open. + /// /// Default: currently false, but may become true later. + pub reuse_logs: bool, + /// If non-null, use the specified filter policy to reduce disk reads. + /// Many applications will benefit from passing the result of + /// NewBloomFilterPolicy() here. + pub filter_policy: Option, +} +/// Options that control read operations +pub struct ReadOptions { + /// If true, all data read from underlying storage will be + /// verified against corresponding checksums. + pub verify_checksums: bool, + /// Should the data read for this iteration be cached in memory? + /// Callers may wish to set this field to false for bulk scans. + pub fill_cache: bool, + /// If "snapshot" is non-null, read as of the supplied snapshot + /// (which must belong to the DB that is being read and which must + /// not have been released). If "snapshot" is null, use an implicit + /// snapshot of the state at the beginning of this read operation. + pub snapshot: Option>, +} + +/// Options that control write operations +pub struct WriteOptions { + /// If true, the write will be flushed from the operating system + /// buffer cache (by calling WritableFile::Sync()) before the write + /// is considered complete. If this flag is true, writes will be + /// slower. + /// /// If this flag is false, and the machine crashes, some recent + /// writes may be lost. Note that if it is just the process that + /// crashes (i.e., the machine does not reboot), no writes will be + /// lost even if sync==false. + /// /// In other words, a DB write with sync==false has similar + /// crash semantics as the "write()" system call. A DB write + /// with sync==true has similar crash semantics to a "write()" + /// system call followed by "fsync()". + pub sync: bool, +} + +impl Default for Options { + fn default() -> Self { + Self { + cmp: Box::new(BytewiseComparatorImpl::default()), + create_if_missing: false, + error_if_exists: false, + paranoid_checks: false, + env: Env {}, + write_buffer_size: 4 * 1024 * 1024, + max_open_files: 1000, + block_cache: None, + block_size: 4 * 1024, + block_restart_interval: 16, + max_file_size: 2 * 1024 * 1024, + compression: CompressionType::NoCompression, + reuse_logs: false, + filter_policy: None + } + } +} + +impl Default for ReadOptions { + fn default() -> Self { + Self { + verify_checksums: false, + fill_cache: true, + snapshot: None + } + } +} + +impl Default for WriteOptions { + fn default() -> Self { + Self { + sync: false + } + } +} \ No newline at end of file diff --git a/src/util/random.rs b/src/util/random.rs new file mode 100644 index 0000000000000000000000000000000000000000..331175a010c878704d80055d224dfc23619bf165 --- /dev/null +++ b/src/util/random.rs @@ -0,0 +1,109 @@ +pub struct Random { + seed: u32, +} + +impl Random { + /// 根据种子构造随机数 + /// + /// # Arguments + /// + /// * `s`: seed 种子 + /// + /// returns: Random + /// + /// # Examples + /// + /// ``` + /// let random = Random::new(8192); + /// ``` + pub fn new(s: u32) -> Random { + // Avoid bad seeds. + Random { + seed: match s { + 0 => 1, + 2147483647 => 1, + _ => s + } + } + } + + /// 获取一个随机数(u32) + /// + /// # Examples + /// + /// ``` + /// let num = random.next(); + /// ``` + pub fn next(&mut self) -> u32 { + // mod + let m = 2147483647_u32; + let a = 16807_u32; + let product = self.seed.wrapping_mul(a); + self.seed = (product >> 31) + (product & m); + if self.seed > m { self.seed -= m } + self.seed + } + + /// 获取一个随机数(bool) + /// + /// # Examples + /// + /// ``` + /// let num = random.next(); + /// ``` + pub fn next_bool(&mut self) -> bool { + self.one_in(2) + } + + /// 生成随机数并对n取模 + /// + /// # Arguments + /// + /// * `n`: 模 + /// + /// returns: u32 + /// + /// # Examples + /// + /// ``` + /// let num = random.uniform(10); + /// ``` + pub fn uniform(&mut self, n: u32) -> u32 { + self.next() % n + } + + /// 生成随机数对n取模并判断是否为0 + /// + /// # Arguments + /// + /// * `n`: 模 + /// + /// returns: bool + /// + /// # Examples + /// + /// ``` + /// let is_zero = random.one_in(2); + /// ``` + pub fn one_in(&mut self, n: u32) -> bool { + (self.next() % n) == 0 + } + + /// 对随机数进行倾斜 + /// + /// # Arguments + /// + /// * `max_log`: 最大范围 + /// + /// returns: u32 + /// + /// # Examples + /// + /// ``` + /// let skewed_num = random.skewed(10); + /// ``` + pub fn skewed(&mut self, max_log: u32) -> u32 { + let bits = 1_u32.wrapping_shl(self.uniform(max_log + 1)); + self.uniform(bits) + } +} \ No newline at end of file diff --git a/src/util/random_test.rs b/src/util/random_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..e44261256dc8c0cc245ff804f1b8b9e570facfef --- /dev/null +++ b/src/util/random_test.rs @@ -0,0 +1,82 @@ +mod test { + use std::time::{SystemTime, UNIX_EPOCH}; + use crate::util::random::Random; + + fn get_timestamp() -> u32 { + let now = SystemTime::now(); + now + .duration_since(UNIX_EPOCH) + .expect("get_current_unix_err") + .as_secs() as u32 + } + + fn get_seed(use_timestamp: bool) -> u32 { + match use_timestamp { + true => get_timestamp(), + false => 32 + } + } + + #[test] + pub fn test_next() { + let mut random = Random::new(get_seed(false)); + let num = random.next(); + println!("{}", num); + assert_eq!(num, 537824); + let num2 = random.next(); + println!("{}", num2); + assert_ne!(num, num2); + assert_eq!(num2, 449273376); + } + + #[test] + pub fn test_uniform() { + let mut random = Random::new(get_seed(false)); + let num = random.uniform(4); + println!("{}", num); + assert_eq!(num, 0); + + let num2 = random.uniform(5); + println!("{}", num2); + assert_eq!(num2, 1); + } + + #[test] + pub fn test_one_in() { + let mut random = Random::new(get_seed(false)); + let one_in = random.one_in(2); + println!("{}", one_in); + assert_eq!(one_in, true); + + let mut random = Random::new(get_seed(false) + 1); + let one_in = random.one_in(2); + println!("{}", one_in); + assert_eq!(one_in, false); + } + + #[test] + pub fn test_next_bool() { + let mut random = Random::new(get_seed(false)); + let num = random.next_bool(); + println!("{}", num); + assert_eq!(num, true); + + let mut random = Random::new(get_seed(false) + 1); + let num = random.next_bool(); + println!("{}", num); + assert_eq!(num, false); + } + + #[test] + pub fn test_skesed() { + let mut random = Random::new(1021); + let base = random.next(); + let num = random.skewed(31); + let uniform = random.uniform(31); + println!("num: {}", num); + println!("base: {}", base); + println!("uniform: {}", uniform); + assert_eq!(num != base, true); + assert_eq!(num != uniform, true); + } +} \ No newline at end of file diff --git a/src/util/status.rs b/src/util/status.rs index e7607f580c7910c12a47e556322920b75b623363..4af55973969df732fb9aa9e9a58cd463d09bd2e1 100644 --- a/src/util/status.rs +++ b/src/util/status.rs @@ -1,5 +1,6 @@ use std::fmt::{Display, Formatter}; use std::io; +use std::ops::Deref; use crate::util::r#const::COLON_WHITE_SPACE; use crate::util::slice::Slice; use crate::util::status::LevelError::{KCorruption, KIOError, KInvalidArgument, KNotSupported, KNotFound, KOk, KBadRecord}; @@ -288,6 +289,20 @@ impl LevelError { msg } } + + pub fn get_value(&self) -> i32 { + let le = match self { + KOk => 0, + KNotFound => 1, + KCorruption => 2, + KNotSupported => 3, + KInvalidArgument => 4, + KIOError => 5, + KBadRecord => 6 + }; + + le + } } impl Default for LevelError { @@ -355,21 +370,3 @@ impl Display for LevelError { write!(f, "{}", print) } } - -// impl Deref for LevelError { -// type Target = i32; -// -// /// StatusTrait 解引用到 i32 -// fn deref(&self) -> &Self::Target { -// let le = match self { -// KOk => 0, -// KNotFound(_) => 1, -// KCorruption(_) => 2, -// KNotSupported(_) => 3, -// KInvalidArgument(_) => 4, -// KIOError(_) => 5, -// }; -// -// &*le -// } -// } \ No newline at end of file diff --git a/src/util/status_test.rs b/src/util/status_test.rs index 2ac1030cd30292af50470c762a3b33658f52d5cd..baad8329e2ec23eb0947c41ace8fff6759b67772 100644 --- a/src/util/status_test.rs +++ b/src/util/status_test.rs @@ -32,12 +32,10 @@ mod test { let err: Status = LevelError::invalid_argument(String::from(msg1).into(), String::from(msg2).into()); assert!(&err.is_invalid_argument()); - // assert_eq!(err.into_code(), 4); let err: Status = LevelError::corruption(String::from(msg1).into(), String::from(msg2).into()); assert!(&err.is_corruption()); - // assert_eq!(err.into_code(), 2); let err: Status = LevelError::not_found(String::from(msg1).into(), String::from(msg2).into()); @@ -121,32 +119,38 @@ mod test { fn test_level_error_try_from() -> Result<(), String> { let rs = LevelError::try_from(1)?; assert!(&rs.is_not_found()); + assert_eq!(rs.get_value(), 1); let rs: Result = 1.try_into(); assert!(rs.ok().unwrap().is_not_found()); let rs = LevelError::try_from(0)?; assert!(&rs.is_ok()); + assert_eq!(rs.get_value(), 0); let rs: Result = 0.try_into(); assert!(rs.ok().unwrap().is_ok()); let rs = LevelError::try_from(2)?; assert!(&rs.is_corruption()); + assert_eq!(rs.get_value(), 2); let rs: LevelError = 2.try_into()?; assert!(rs.is_corruption()); let rs: LevelError = LevelError::try_from(3)?; assert!(&rs.is_not_supported_error()); + assert_eq!(rs.get_value(), 3); let rs: LevelError = 3.try_into()?; assert!(rs.is_not_supported_error()); let rs = LevelError::try_from(4)?; assert!(&rs.is_invalid_argument()); + assert_eq!(rs.get_value(), 4); let rs = LevelError::try_from(5)?; assert!(&rs.is_io_error()); + assert_eq!(rs.get_value(), 5); - let rs = LevelError::try_from(6); - assert_eq!("Unknown code: 6", rs.err().unwrap()); + let rs = LevelError::try_from(66); + assert_eq!("Unknown code: 66", rs.err().unwrap()); Ok(()) }