修改Blob逻辑,主要变更为 Blob 与 objects文件解构。store作为与objects文件交互的唯一依赖。具体为:

1. Blob使用Content新建而不是path
2. workdir的读写能力由util::read|write workfile提供
3. 判断文件是否更改不直接计算hash,经由新建一个不保存的Blob进行。现在Hash算法只由Store决定。
This commit is contained in:
HouXiaoxuan
2023-12-28 22:23:44 +08:00
parent b3aea1d0d1
commit f88ab0e33a
9 changed files with 91 additions and 67 deletions

View File

@@ -60,14 +60,14 @@ fn add_a_file(file: &Path, index: &mut Index) {
//文件存在
if !index.contains(file) {
//文件未被跟踪
let blob = Blob::new(file);
let blob = Blob::new(util::read_workfile(file));
index.add(file.to_path_buf(), FileMetaData::new(&blob, file));
println!("add(stage): {}", rel_path.display());
} else {
//文件已被跟踪,可能被修改
if index.is_modified(file) {
//文件被修改,但不一定内容更改
let blob = Blob::new(file); //到这一步才创建blob是为了优化
let blob = Blob::new(util::read_workfile(file)); //到这一步才创建blob是为了优化
if !index.verify_hash(file, &blob.get_hash()) {
//比较hash 确认内容更改
index.update(file.to_path_buf(), FileMetaData::new(&blob, file));

View File

@@ -9,6 +9,11 @@ use crate::{
utils::{head, store, util},
};
fn restore_to_file(hash: &Hash, path: &PathBuf) {
let blob = Blob::load(hash);
util::write_workfile(blob.get_content(), path);
}
/// 统计[工作区]中相对于target_blobs已删除的文件根据filters进行过滤
fn get_worktree_deleted_files_in_filters(
filters: &Vec<PathBuf>,
@@ -68,7 +73,6 @@ pub fn restore_worktree(filter: Option<&Vec<PathBuf>>, target_blobs: &Vec<(PathB
file_paths.extend(deleted_files); //已删除的文件
let index = Index::get_instance();
let store = store::Store::new();
for path in &file_paths {
assert!(path.is_absolute()); // 绝对路径
@@ -76,7 +80,7 @@ pub fn restore_worktree(filter: Option<&Vec<PathBuf>>, target_blobs: &Vec<(PathB
//文件不存在于workdir
if target_blobs.contains_key(path) {
//文件存在于target_commit (deleted),需要恢复
store.restore_to_file(&target_blobs[path], &path);
restore_to_file(&target_blobs[path], &path);
} else {
//在target_commit和workdir中都不存在(非法路径) 用户输入
println!("fatal: pathspec '{}' did not match any files", path.display());
@@ -85,9 +89,9 @@ pub fn restore_worktree(filter: Option<&Vec<PathBuf>>, target_blobs: &Vec<(PathB
//文件存在有两种情况1.修改 2.新文件
if target_blobs.contains_key(path) {
//文件已修改(modified)
let file_hash = util::calc_file_hash(&path); //TODO tree没有存修改时间所以这里只能用hash判断
if file_hash != target_blobs[path] {
store.restore_to_file(&target_blobs[path], &path);
let dry_blob = Blob::dry_new(util::read_workfile(&path)); //TODO tree没有存修改时间所以这里只能用hash判断
if dry_blob.get_hash() != target_blobs[path] {
restore_to_file(&target_blobs[path], &path);
}
} else {
//新文件也分两种情况1.已跟踪,需要删除 2.未跟踪,保留

View File

@@ -1,7 +1,6 @@
use crate::{
utils::head,
models::{Commit, Index},
utils::util,
models::{Blob, Commit, Index},
utils::{head, util},
};
use colored::Colorize;
use std::path::PathBuf;
@@ -116,7 +115,8 @@ pub fn changes_to_be_staged() -> Changes {
change.deleted.push(util::to_workdir_relative_path(&file));
} else if index.is_modified(&file) {
// 若文件元数据被修改才需要比较暂存区与文件的hash来判别内容修改
if !index.verify_hash(&file, &util::calc_file_hash(&file)) {
let dry_blob = Blob::dry_new(util::read_workfile(&file));
if !index.verify_hash(&file, &dry_blob.get_hash()) {
change.modified.push(util::to_workdir_relative_path(&file));
}
}

View File

@@ -139,7 +139,8 @@ mod test {
/* test 4: switch to branch */
let result = switch_to(test_branch_2.clone(), false);
assert!(result.is_ok());
assert!(status::changes_to_be_staged().is_empty() && status::changes_to_be_committed().is_empty());
assert!(status::changes_to_be_staged().is_empty());
assert!(status::changes_to_be_committed().is_empty());
assert!(match head::current_head() {
head::Head::Branch(branch) => branch == test_branch_2,
_ => false,

View File

@@ -2,11 +2,7 @@ use base64::Engine;
use flate2::{read::GzDecoder, write::GzEncoder, Compression};
use std::io::{Read, Write};
use crate::{
models::Hash,
utils::{store, util},
};
use std::{fs, path::Path};
use crate::{models::Hash, utils::store};
/**Blob<br>
git中最基本的对象他储存一份文件的内容并使用hash作为标识符。
@@ -19,31 +15,46 @@ pub struct Blob {
impl Blob {
/// 从源文件新建blob对象并直接保存到/objects/中
pub fn new(file: &Path) -> Blob {
let data = fs::read_to_string(file).expect(format!("无法读取文件:{:?}", file).as_str());
pub fn new(data: String) -> Blob {
let mut blob = Blob { hash: "".to_string(), data };
blob.save();
blob
}
pub fn load(hash: &String) -> Blob {
/// 从源文件新建blob对象但不保存到/objects/中
pub fn dry_new(data: String) -> Blob {
let mut blob = Blob { hash: "".to_string(), data };
let s = store::Store::new();
let encoded = s.load(hash);
let hash: String = s.dry_save(&Blob::encode(blob.data.clone()));
blob.hash = hash;
blob
}
fn encode(data: String) -> String {
let mut cmopress_encoder = GzEncoder::new(Vec::new(), Compression::default());
cmopress_encoder.write_all(data.as_bytes()).unwrap();
let compressed_data = cmopress_encoder.finish().unwrap();
base64::engine::general_purpose::STANDARD_NO_PAD.encode(&compressed_data)
}
fn decode(encoded: String) -> String {
let compressed_data = base64::engine::general_purpose::STANDARD_NO_PAD.decode(&encoded).unwrap();
let mut decompress_decoder = GzDecoder::new(&compressed_data[..]);
let mut data = String::new();
decompress_decoder.read_to_string(&mut data).unwrap();
data
}
pub fn load(hash: &String) -> Blob {
let s = store::Store::new();
let encoded_data = s.load(hash);
let data = Blob::decode(encoded_data);
Blob { hash: hash.clone(), data }
}
/// 写入文件
pub fn save(&mut self) -> Hash {
let s = store::Store::new();
let mut cmopress_encoder = GzEncoder::new(Vec::new(), Compression::default());
cmopress_encoder.write_all(self.data.as_bytes()).unwrap();
let compressed_data = cmopress_encoder.finish().unwrap();
let encoded_data = base64::engine::general_purpose::STANDARD_NO_PAD.encode(&compressed_data);
let hash: String = s.save(&encoded_data);
let hash: String = s.save(&Blob::encode(self.data.clone()));
self.hash = hash;
self.hash.clone()
}
@@ -51,20 +62,21 @@ impl Blob {
pub fn get_hash(&self) -> String {
self.hash.clone()
}
pub fn get_content(&self) -> String {
self.data.clone()
}
}
#[cfg(test)]
mod test {
use std::path::PathBuf;
use crate::utils::test_util;
#[test]
fn test_save_and_load() {
test_util::setup_test_with_clean_mit();
let test_data = "hello world";
test_util::ensure_test_file(&PathBuf::from("a.txt"), Some(test_data));
let blob = super::Blob::new(&PathBuf::from("a.txt"));
let blob = super::Blob::new(test_data.into());
let blob2 = super::Blob::load(&blob.hash);
assert_eq!(blob2.get_hash(), blob.get_hash());

View File

@@ -233,12 +233,12 @@ mod tests {
test_util::setup_test_with_clean_mit();
let index = Index::get_instance();
let path = PathBuf::from("../mit_test_storage/.mit/HEAD"); //测试../相对路径的处理
index.add(path.clone(), FileMetaData::new(&Blob::new(&path), &path));
index.add(path.clone(), FileMetaData::new(&Blob::new(util::read_workfile(&path)), &path));
let = "中文路径.txt";
test_util::ensure_test_file(Path::new(), None);
let path = PathBuf::from();
index.add(path.clone(), FileMetaData::new(&Blob::new(&path), &path));
index.add(path.clone(), FileMetaData::new(&Blob::new(util::read_workfile(&path)), &path));
index.save();
println!("{:?}", index.entries);
}
@@ -248,7 +248,7 @@ mod tests {
test_util::setup_test_with_empty_workdir();
let index = Index::get_instance();
let path = PathBuf::from(".mit/HEAD");
index.add(path.clone(), FileMetaData::new(&Blob::new(&path), &path));
index.add(path.clone(), FileMetaData::new(&Blob::new(util::read_workfile(&path)), &path));
assert!(Index::new().is_empty()); //未保存前新读取的index应该是空的
index.save();
assert!(!Index::new().is_empty()); //保存后新读取的index不是空的

View File

@@ -137,7 +137,7 @@ mod test {
use crate::{
models::*,
utils::test_util,
utils::{test_util, util},
};
#[test]
@@ -147,8 +147,7 @@ mod test {
for test_file in vec!["b.txt", "mit_src/a.txt", "test/test.txt"] {
let test_file = PathBuf::from(test_file);
test_util::ensure_test_file(&test_file, None);
index.add(test_file.clone(), FileMetaData::new(&Blob::new(&test_file), &test_file));
index.add(test_file.clone(), FileMetaData::new(&Blob::new(&test_file), &test_file));
index.add(test_file.clone(), FileMetaData::new(&Blob::new(util::read_workfile(&test_file)), &test_file));
}
let tree = Tree::new(&index);
@@ -164,7 +163,7 @@ mod test {
for test_file in test_files.clone() {
let test_file = PathBuf::from(test_file);
test_util::ensure_test_file(&test_file, None);
index.add(test_file.clone(), FileMetaData::new(&Blob::new(&test_file), &test_file));
index.add(test_file.clone(), FileMetaData::new(&Blob::new(util::read_workfile(&test_file)), &test_file));
}
let tree = Tree::new(&index);
@@ -185,9 +184,9 @@ mod test {
for test_file in test_files.clone() {
let test_file = PathBuf::from(test_file);
test_util::ensure_test_file(&test_file, None);
let blob = Blob::new(&test_file);
let blob = Blob::new(util::read_workfile(&test_file));
test_blobs.push(blob.clone());
index.add(test_file.clone(), FileMetaData::new(&Blob::new(&test_file), &test_file));
index.add(test_file.clone(), FileMetaData::new(&Blob::new(util::read_workfile(&test_file)), &test_file));
}
let tree = Tree::new(&index);

View File

@@ -1,5 +1,7 @@
use std::path::PathBuf;
use sha1::{Digest, Sha1};
use crate::models::Hash;
use super::util;
@@ -13,6 +15,13 @@ pub struct Store {
* 每一个object文件名与内容的hash值相同
*/
impl Store {
fn calc_hash(data: &String) -> String {
let mut hasher = Sha1::new();
hasher.update(data);
let hash = hasher.finalize();
hex::encode(hash)
}
pub fn new() -> Store {
util::check_repo_exist();
let store_path = util::get_storage_path().unwrap();
@@ -29,16 +38,6 @@ impl Store {
}
}
/// 将hash对应的文件内容(主要是blob)还原到file
pub fn restore_to_file(&self, hash: &Hash, file: &PathBuf) {
let content = self.load(hash);
// 保证文件层次存在
let mut parent = file.clone();
parent.pop();
std::fs::create_dir_all(parent).unwrap();
std::fs::write(file, content).unwrap();
}
/** 根据前缀搜索,有歧义时返回 None */
pub fn search(&self, hash: &String) -> Option<Hash> {
if hash.is_empty() {
@@ -65,10 +64,9 @@ impl Store {
}
}
pub fn save(&self, content: &String) -> String {
pub fn save(&self, content: &String) -> Hash {
/* 保存文件内容 */
let hash = util::calc_hash(content);
let hash = Self::calc_hash(content);
let mut path = self.store_path.clone();
path.push("objects");
path.push(&hash);
@@ -82,6 +80,13 @@ impl Store {
Err(_) => panic!("储存库疑似损坏,无法写入文件"),
}
}
pub fn dry_save(&self, content: &String) -> Hash {
/* 不实际保存文件返回Hash */
let hash = Self::calc_hash(content);
// TODO more such as check
hash
}
}
#[cfg(test)]
mod tests {

View File

@@ -1,4 +1,4 @@
use sha1::{Digest, Sha1};
use std::{
collections::HashSet,
fs, io,
@@ -10,18 +10,6 @@ use crate::models::{commit::Commit, object::Hash, tree::Tree};
pub const ROOT_DIR: &str = ".mit";
/* tools for mit */
pub fn calc_hash(data: &String) -> String {
let mut hasher = Sha1::new();
hasher.update(data);
let hash = hasher.finalize();
hex::encode(hash)
}
/// 计算文件的hash
pub fn calc_file_hash(path: &Path) -> String {
let data = fs::read_to_string(path).expect(&format!("无法读取文件:{}", path.display()));
calc_hash(&data)
}
pub fn storage_exist() -> bool {
/*检查是否存在储存库 */
@@ -427,11 +415,25 @@ pub fn is_typeof_commit(hash: Hash) -> bool {
check_object_type(hash) == ObjectType::Commit
}
/// 将内容对应的文件内容(主要是blob)还原到file
pub fn write_workfile(content: String, file: &PathBuf) {
let mut parent = file.clone();
parent.pop();
std::fs::create_dir_all(parent).unwrap();
std::fs::write(file, content).unwrap();
}
/// 从工作区读取文件内容
pub fn read_workfile(file: &Path) -> String {
std::fs::read_to_string(file).unwrap()
}
#[cfg(test)]
mod tests {
use crate::{
models::{blob::Blob, index::Index},
utils::{test_util, util::*},
utils::{test_util, util::{*, self}},
};
#[test]
@@ -536,7 +538,8 @@ mod tests {
test_util::setup_test_with_clean_mit();
assert_eq!(check_object_type("123".into()), ObjectType::Invalid);
test_util::ensure_test_file(Path::new("test.txt"), Some("test"));
let hash = Blob::new(get_working_dir().unwrap().join("test.txt").as_path()).get_hash();
let content = util::read_workfile(get_working_dir().unwrap().join("test.txt").as_path());
let hash = Blob::new(content).get_hash();
assert_eq!(check_object_type(hash), ObjectType::Blob);
let mut commit = Commit::new(&Index::get_instance(), vec![], "test".to_string());
assert_eq!(check_object_type(commit.get_tree_hash()), ObjectType::Tree);