From f88ab0e33ae137417b7ef8e7b1320fc1d1651b2e Mon Sep 17 00:00:00 2001 From: HouXiaoxuan Date: Thu, 28 Dec 2023 22:23:44 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9Blob=E9=80=BB=E8=BE=91?= =?UTF-8?q?=EF=BC=8C=E4=B8=BB=E8=A6=81=E5=8F=98=E6=9B=B4=E4=B8=BA=20Blob?= =?UTF-8?q?=20=E4=B8=8E=20objects=E6=96=87=E4=BB=B6=E8=A7=A3=E6=9E=84?= =?UTF-8?q?=E3=80=82store=E4=BD=9C=E4=B8=BA=E4=B8=8Eobjects=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E4=BA=A4=E4=BA=92=E7=9A=84=E5=94=AF=E4=B8=80=E4=BE=9D?= =?UTF-8?q?=E8=B5=96=E3=80=82=E5=85=B7=E4=BD=93=E4=B8=BA=EF=BC=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Blob使用Content新建而不是path 2. workdir的读写能力由util::read|write workfile提供 3. 判断文件是否更改不直接计算hash,经由新建一个不保存的Blob进行。现在Hash算法只由Store决定。 --- src/commands/add.rs | 4 ++-- src/commands/restore.rs | 14 +++++++----- src/commands/status.rs | 8 +++---- src/commands/switch.rs | 3 ++- src/models/blob.rs | 48 +++++++++++++++++++++++++---------------- src/models/index.rs | 6 +++--- src/models/tree.rs | 11 +++++----- src/utils/store.rs | 31 +++++++++++++++----------- src/utils/util.rs | 33 +++++++++++++++------------- 9 files changed, 91 insertions(+), 67 deletions(-) diff --git a/src/commands/add.rs b/src/commands/add.rs index d3aeea6..b041dc2 100644 --- a/src/commands/add.rs +++ b/src/commands/add.rs @@ -60,14 +60,14 @@ fn add_a_file(file: &Path, index: &mut Index) { //文件存在 if !index.contains(file) { //文件未被跟踪 - let blob = Blob::new(file); + let blob = Blob::new(util::read_workfile(file)); index.add(file.to_path_buf(), FileMetaData::new(&blob, file)); println!("add(stage): {}", rel_path.display()); } else { //文件已被跟踪,可能被修改 if index.is_modified(file) { //文件被修改,但不一定内容更改 - let blob = Blob::new(file); //到这一步才创建blob是为了优化 + let blob = Blob::new(util::read_workfile(file)); //到这一步才创建blob是为了优化 if !index.verify_hash(file, &blob.get_hash()) { //比较hash 确认内容更改 index.update(file.to_path_buf(), FileMetaData::new(&blob, file)); diff --git a/src/commands/restore.rs b/src/commands/restore.rs index 14cb5dd..0b79292 100644 --- a/src/commands/restore.rs +++ b/src/commands/restore.rs @@ -9,6 +9,11 @@ use crate::{ utils::{head, store, util}, }; +fn restore_to_file(hash: &Hash, path: &PathBuf) { + let blob = Blob::load(hash); + util::write_workfile(blob.get_content(), path); +} + /// 统计[工作区]中相对于target_blobs已删除的文件(根据filters进行过滤) fn get_worktree_deleted_files_in_filters( filters: &Vec, @@ -68,7 +73,6 @@ pub fn restore_worktree(filter: Option<&Vec>, target_blobs: &Vec<(PathB file_paths.extend(deleted_files); //已删除的文件 let index = Index::get_instance(); - let store = store::Store::new(); for path in &file_paths { assert!(path.is_absolute()); // 绝对路径 @@ -76,7 +80,7 @@ pub fn restore_worktree(filter: Option<&Vec>, target_blobs: &Vec<(PathB //文件不存在于workdir if target_blobs.contains_key(path) { //文件存在于target_commit (deleted),需要恢复 - store.restore_to_file(&target_blobs[path], &path); + restore_to_file(&target_blobs[path], &path); } else { //在target_commit和workdir中都不存在(非法路径), 用户输入 println!("fatal: pathspec '{}' did not match any files", path.display()); @@ -85,9 +89,9 @@ pub fn restore_worktree(filter: Option<&Vec>, target_blobs: &Vec<(PathB //文件存在,有两种情况:1.修改 2.新文件 if target_blobs.contains_key(path) { //文件已修改(modified) - let file_hash = util::calc_file_hash(&path); //TODO tree没有存修改时间,所以这里只能用hash判断 - if file_hash != target_blobs[path] { - store.restore_to_file(&target_blobs[path], &path); + let dry_blob = Blob::dry_new(util::read_workfile(&path)); //TODO tree没有存修改时间,所以这里只能用hash判断 + if dry_blob.get_hash() != target_blobs[path] { + restore_to_file(&target_blobs[path], &path); } } else { //新文件,也分两种情况:1.已跟踪,需要删除 2.未跟踪,保留 diff --git a/src/commands/status.rs b/src/commands/status.rs index 21a73ee..f2850c4 100644 --- a/src/commands/status.rs +++ b/src/commands/status.rs @@ -1,7 +1,6 @@ use crate::{ - utils::head, - models::{Commit, Index}, - utils::util, + models::{Blob, Commit, Index}, + utils::{head, util}, }; use colored::Colorize; use std::path::PathBuf; @@ -116,7 +115,8 @@ pub fn changes_to_be_staged() -> Changes { change.deleted.push(util::to_workdir_relative_path(&file)); } else if index.is_modified(&file) { // 若文件元数据被修改,才需要比较暂存区与文件的hash来判别内容修改 - if !index.verify_hash(&file, &util::calc_file_hash(&file)) { + let dry_blob = Blob::dry_new(util::read_workfile(&file)); + if !index.verify_hash(&file, &dry_blob.get_hash()) { change.modified.push(util::to_workdir_relative_path(&file)); } } diff --git a/src/commands/switch.rs b/src/commands/switch.rs index 90053a1..69e623c 100644 --- a/src/commands/switch.rs +++ b/src/commands/switch.rs @@ -139,7 +139,8 @@ mod test { /* test 4: switch to branch */ let result = switch_to(test_branch_2.clone(), false); assert!(result.is_ok()); - assert!(status::changes_to_be_staged().is_empty() && status::changes_to_be_committed().is_empty()); + assert!(status::changes_to_be_staged().is_empty()); + assert!(status::changes_to_be_committed().is_empty()); assert!(match head::current_head() { head::Head::Branch(branch) => branch == test_branch_2, _ => false, diff --git a/src/models/blob.rs b/src/models/blob.rs index 3df606a..a365cca 100644 --- a/src/models/blob.rs +++ b/src/models/blob.rs @@ -2,11 +2,7 @@ use base64::Engine; use flate2::{read::GzDecoder, write::GzEncoder, Compression}; use std::io::{Read, Write}; -use crate::{ - models::Hash, - utils::{store, util}, -}; -use std::{fs, path::Path}; +use crate::{models::Hash, utils::store}; /**Blob
git中最基本的对象,他储存一份文件的内容,并使用hash作为标识符。 @@ -19,31 +15,46 @@ pub struct Blob { impl Blob { /// 从源文件新建blob对象,并直接保存到/objects/中 - pub fn new(file: &Path) -> Blob { - let data = fs::read_to_string(file).expect(format!("无法读取文件:{:?}", file).as_str()); + pub fn new(data: String) -> Blob { let mut blob = Blob { hash: "".to_string(), data }; blob.save(); blob } - pub fn load(hash: &String) -> Blob { + /// 从源文件新建blob对象,但不保存到/objects/中 + pub fn dry_new(data: String) -> Blob { + let mut blob = Blob { hash: "".to_string(), data }; let s = store::Store::new(); - let encoded = s.load(hash); + let hash: String = s.dry_save(&Blob::encode(blob.data.clone())); + blob.hash = hash; + blob + } + + fn encode(data: String) -> String { + let mut cmopress_encoder = GzEncoder::new(Vec::new(), Compression::default()); + cmopress_encoder.write_all(data.as_bytes()).unwrap(); + let compressed_data = cmopress_encoder.finish().unwrap(); + base64::engine::general_purpose::STANDARD_NO_PAD.encode(&compressed_data) + } + fn decode(encoded: String) -> String { let compressed_data = base64::engine::general_purpose::STANDARD_NO_PAD.decode(&encoded).unwrap(); let mut decompress_decoder = GzDecoder::new(&compressed_data[..]); let mut data = String::new(); decompress_decoder.read_to_string(&mut data).unwrap(); + data + } + + pub fn load(hash: &String) -> Blob { + let s = store::Store::new(); + let encoded_data = s.load(hash); + let data = Blob::decode(encoded_data); Blob { hash: hash.clone(), data } } /// 写入文件 pub fn save(&mut self) -> Hash { let s = store::Store::new(); - let mut cmopress_encoder = GzEncoder::new(Vec::new(), Compression::default()); - cmopress_encoder.write_all(self.data.as_bytes()).unwrap(); - let compressed_data = cmopress_encoder.finish().unwrap(); - let encoded_data = base64::engine::general_purpose::STANDARD_NO_PAD.encode(&compressed_data); - let hash: String = s.save(&encoded_data); + let hash: String = s.save(&Blob::encode(self.data.clone())); self.hash = hash; self.hash.clone() } @@ -51,20 +62,21 @@ impl Blob { pub fn get_hash(&self) -> String { self.hash.clone() } + + pub fn get_content(&self) -> String { + self.data.clone() + } } #[cfg(test)] mod test { - use std::path::PathBuf; - use crate::utils::test_util; #[test] fn test_save_and_load() { test_util::setup_test_with_clean_mit(); let test_data = "hello world"; - test_util::ensure_test_file(&PathBuf::from("a.txt"), Some(test_data)); - let blob = super::Blob::new(&PathBuf::from("a.txt")); + let blob = super::Blob::new(test_data.into()); let blob2 = super::Blob::load(&blob.hash); assert_eq!(blob2.get_hash(), blob.get_hash()); diff --git a/src/models/index.rs b/src/models/index.rs index b6afb98..463ea75 100644 --- a/src/models/index.rs +++ b/src/models/index.rs @@ -233,12 +233,12 @@ mod tests { test_util::setup_test_with_clean_mit(); let index = Index::get_instance(); let path = PathBuf::from("../mit_test_storage/.mit/HEAD"); //测试../相对路径的处理 - index.add(path.clone(), FileMetaData::new(&Blob::new(&path), &path)); + index.add(path.clone(), FileMetaData::new(&Blob::new(util::read_workfile(&path)), &path)); let 中文路径 = "中文路径.txt"; test_util::ensure_test_file(Path::new(中文路径), None); let path = PathBuf::from(中文路径); - index.add(path.clone(), FileMetaData::new(&Blob::new(&path), &path)); + index.add(path.clone(), FileMetaData::new(&Blob::new(util::read_workfile(&path)), &path)); index.save(); println!("{:?}", index.entries); } @@ -248,7 +248,7 @@ mod tests { test_util::setup_test_with_empty_workdir(); let index = Index::get_instance(); let path = PathBuf::from(".mit/HEAD"); - index.add(path.clone(), FileMetaData::new(&Blob::new(&path), &path)); + index.add(path.clone(), FileMetaData::new(&Blob::new(util::read_workfile(&path)), &path)); assert!(Index::new().is_empty()); //未保存前,新读取的index应该是空的 index.save(); assert!(!Index::new().is_empty()); //保存后,新读取的index不是空的 diff --git a/src/models/tree.rs b/src/models/tree.rs index 644c8d8..632ee90 100644 --- a/src/models/tree.rs +++ b/src/models/tree.rs @@ -137,7 +137,7 @@ mod test { use crate::{ models::*, - utils::test_util, + utils::{test_util, util}, }; #[test] @@ -147,8 +147,7 @@ mod test { for test_file in vec!["b.txt", "mit_src/a.txt", "test/test.txt"] { let test_file = PathBuf::from(test_file); test_util::ensure_test_file(&test_file, None); - index.add(test_file.clone(), FileMetaData::new(&Blob::new(&test_file), &test_file)); - index.add(test_file.clone(), FileMetaData::new(&Blob::new(&test_file), &test_file)); + index.add(test_file.clone(), FileMetaData::new(&Blob::new(util::read_workfile(&test_file)), &test_file)); } let tree = Tree::new(&index); @@ -164,7 +163,7 @@ mod test { for test_file in test_files.clone() { let test_file = PathBuf::from(test_file); test_util::ensure_test_file(&test_file, None); - index.add(test_file.clone(), FileMetaData::new(&Blob::new(&test_file), &test_file)); + index.add(test_file.clone(), FileMetaData::new(&Blob::new(util::read_workfile(&test_file)), &test_file)); } let tree = Tree::new(&index); @@ -185,9 +184,9 @@ mod test { for test_file in test_files.clone() { let test_file = PathBuf::from(test_file); test_util::ensure_test_file(&test_file, None); - let blob = Blob::new(&test_file); + let blob = Blob::new(util::read_workfile(&test_file)); test_blobs.push(blob.clone()); - index.add(test_file.clone(), FileMetaData::new(&Blob::new(&test_file), &test_file)); + index.add(test_file.clone(), FileMetaData::new(&Blob::new(util::read_workfile(&test_file)), &test_file)); } let tree = Tree::new(&index); diff --git a/src/utils/store.rs b/src/utils/store.rs index 8170925..adc64fc 100644 --- a/src/utils/store.rs +++ b/src/utils/store.rs @@ -1,5 +1,7 @@ use std::path::PathBuf; +use sha1::{Digest, Sha1}; + use crate::models::Hash; use super::util; @@ -13,6 +15,13 @@ pub struct Store { * 每一个object文件名与内容的hash值相同 */ impl Store { + fn calc_hash(data: &String) -> String { + let mut hasher = Sha1::new(); + hasher.update(data); + let hash = hasher.finalize(); + hex::encode(hash) + } + pub fn new() -> Store { util::check_repo_exist(); let store_path = util::get_storage_path().unwrap(); @@ -29,16 +38,6 @@ impl Store { } } - /// 将hash对应的文件内容(主要是blob)还原到file - pub fn restore_to_file(&self, hash: &Hash, file: &PathBuf) { - let content = self.load(hash); - // 保证文件层次存在 - let mut parent = file.clone(); - parent.pop(); - std::fs::create_dir_all(parent).unwrap(); - std::fs::write(file, content).unwrap(); - } - /** 根据前缀搜索,有歧义时返回 None */ pub fn search(&self, hash: &String) -> Option { if hash.is_empty() { @@ -65,10 +64,9 @@ impl Store { } } - - pub fn save(&self, content: &String) -> String { + pub fn save(&self, content: &String) -> Hash { /* 保存文件内容 */ - let hash = util::calc_hash(content); + let hash = Self::calc_hash(content); let mut path = self.store_path.clone(); path.push("objects"); path.push(&hash); @@ -82,6 +80,13 @@ impl Store { Err(_) => panic!("储存库疑似损坏,无法写入文件"), } } + + pub fn dry_save(&self, content: &String) -> Hash { + /* 不实际保存文件,返回Hash */ + let hash = Self::calc_hash(content); + // TODO more such as check + hash + } } #[cfg(test)] mod tests { diff --git a/src/utils/util.rs b/src/utils/util.rs index 1520b7d..3405802 100644 --- a/src/utils/util.rs +++ b/src/utils/util.rs @@ -1,4 +1,4 @@ -use sha1::{Digest, Sha1}; + use std::{ collections::HashSet, fs, io, @@ -10,18 +10,6 @@ use crate::models::{commit::Commit, object::Hash, tree::Tree}; pub const ROOT_DIR: &str = ".mit"; /* tools for mit */ -pub fn calc_hash(data: &String) -> String { - let mut hasher = Sha1::new(); - hasher.update(data); - let hash = hasher.finalize(); - hex::encode(hash) -} - -/// 计算文件的hash -pub fn calc_file_hash(path: &Path) -> String { - let data = fs::read_to_string(path).expect(&format!("无法读取文件:{}", path.display())); - calc_hash(&data) -} pub fn storage_exist() -> bool { /*检查是否存在储存库 */ @@ -427,11 +415,25 @@ pub fn is_typeof_commit(hash: Hash) -> bool { check_object_type(hash) == ObjectType::Commit } + +/// 将内容对应的文件内容(主要是blob)还原到file +pub fn write_workfile(content: String, file: &PathBuf) { + let mut parent = file.clone(); + parent.pop(); + std::fs::create_dir_all(parent).unwrap(); + std::fs::write(file, content).unwrap(); +} + +/// 从工作区读取文件内容 +pub fn read_workfile(file: &Path) -> String { + std::fs::read_to_string(file).unwrap() +} + #[cfg(test)] mod tests { use crate::{ models::{blob::Blob, index::Index}, - utils::{test_util, util::*}, + utils::{test_util, util::{*, self}}, }; #[test] @@ -536,7 +538,8 @@ mod tests { test_util::setup_test_with_clean_mit(); assert_eq!(check_object_type("123".into()), ObjectType::Invalid); test_util::ensure_test_file(Path::new("test.txt"), Some("test")); - let hash = Blob::new(get_working_dir().unwrap().join("test.txt").as_path()).get_hash(); + let content = util::read_workfile(get_working_dir().unwrap().join("test.txt").as_path()); + let hash = Blob::new(content).get_hash(); assert_eq!(check_object_type(hash), ObjectType::Blob); let mut commit = Commit::new(&Index::get_instance(), vec![], "test".to_string()); assert_eq!(check_object_type(commit.get_tree_hash()), ObjectType::Tree);