From 7ac3d891af647906a8eb4a3f94d95f827df15534 Mon Sep 17 00:00:00 2001 From: Lgf <2246221508@qq.com> Date: Tue, 21 Apr 2026 17:22:57 +0800 Subject: [PATCH] =?UTF-8?q?feat(compaction):=20=E6=B7=BB=E5=8A=A0=E5=9F=BA?= =?UTF-8?q?=E4=BA=8E=E5=A2=93=E7=A2=91=E5=AF=86=E5=BA=A6=E7=9A=84=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E5=8E=8B=E7=BC=A9=E7=AD=96=E7=95=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加墓碑条目统计功能,当文件中的墓碑密度超过50%时优先进行压缩 在FileMetaData中新增num_entries和num_tombstones字段记录条目数量 修改VersionSet以支持基于墓碑密度的压缩优先级判断 更新相关文件处理逻辑以维护和传递墓碑统计信息 --- db/builder.cc | 7 +++++++ db/db_impl.cc | 24 +++++++++++++++++++----- db/repair.cc | 10 ++++++++-- db/version_edit.cc | 45 ++++++++++++++++++++++++++++++++++++++------- db/version_edit.h | 32 ++++++++++++++++++++++++++++---- db/version_set.cc | 46 ++++++++++++++++++++++++++++++++++++++++++---- db/version_set.h | 10 +++++++++- 7 files changed, 151 insertions(+), 23 deletions(-) diff --git a/db/builder.cc b/db/builder.cc index e6329e05e4..111a54e21f 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -18,6 +18,8 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options, TableCache* table_cache, Iterator* iter, FileMetaData* meta) { Status s; meta->file_size = 0; + meta->num_entries = 0; + meta->num_tombstones = 0; iter->SeekToFirst(); std::string fname = TableFileName(dbname, meta->number); @@ -31,8 +33,13 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options, TableBuilder* builder = new TableBuilder(options, file); meta->smallest.DecodeFrom(iter->key()); Slice key; + ParsedInternalKey ikey; for (; iter->Valid(); iter->Next()) { key = iter->key(); + meta->num_entries++; + if (ParseInternalKey(key, &ikey) && ikey.type == kTypeDeletion) { + meta->num_tombstones++; + } builder->Add(key, iter->value()); } if (!key.empty()) { diff --git a/db/db_impl.cc b/db/db_impl.cc index ce4ef8276a..71ef43d1cc 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -56,6 +56,8 @@ struct DBImpl::CompactionState { struct Output { uint64_t number; uint64_t file_size; + uint64_t num_tombstones; // Number of tombstone entries in output file + uint64_t num_entries; // Total number of entries in output file InternalKey smallest, largest; }; @@ -535,8 +537,8 @@ Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit, if (base != nullptr) { level = base->PickLevelForMemTableOutput(min_user_key, max_user_key); } - edit->AddFile(level, meta.number, meta.file_size, meta.smallest, - meta.largest); + edit->AddFile(level, meta.number, meta.file_size, meta.num_tombstones, + meta.num_entries, meta.smallest, meta.largest); } CompactionStats stats; @@ -740,8 +742,8 @@ void DBImpl::BackgroundCompaction() { assert(c->num_input_files(0) == 1); FileMetaData* f = c->input(0, 0); c->edit()->RemoveFile(c->level(), f->number); - c->edit()->AddFile(c->level() + 1, f->number, f->file_size, f->smallest, - f->largest); + c->edit()->AddFile(c->level() + 1, f->number, f->file_size, f->num_tombstones, + f->num_entries, f->smallest, f->largest); status = versions_->LogAndApply(c->edit(), &mutex_); if (!status.ok()) { RecordBackgroundError(status); @@ -813,6 +815,9 @@ Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) { pending_outputs_.insert(file_number); CompactionState::Output out; out.number = file_number; + out.file_size = 0; + out.num_tombstones = 0; + out.num_entries = 0; out.smallest.Clear(); out.largest.Clear(); compact->outputs.push_back(out); @@ -890,6 +895,7 @@ Status DBImpl::InstallCompactionResults(CompactionState* compact) { for (size_t i = 0; i < compact->outputs.size(); i++) { const CompactionState::Output& out = compact->outputs[i]; compact->compaction->edit()->AddFile(level + 1, out.number, out.file_size, + out.num_tombstones, out.num_entries, out.smallest, out.largest); } return versions_->LogAndApply(compact->compaction->edit(), &mutex_); @@ -949,7 +955,8 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { // Handle key/value, add to state, etc. bool drop = false; - if (!ParseInternalKey(key, &ikey)) { + const bool parsed_ok = ParseInternalKey(key, &ikey); + if (!parsed_ok) { // Do not hide error keys current_user_key.clear(); has_current_user_key = false; @@ -1004,6 +1011,13 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { compact->current_output()->smallest.DecodeFrom(key); } compact->current_output()->largest.DecodeFrom(key); + + // Update tombstone statistics for output file + compact->current_output()->num_entries++; + if (parsed_ok && ikey.type == kTypeDeletion) { + compact->current_output()->num_tombstones++; + } + compact->builder->Add(key, input->value()); // Close output file if it is big enough diff --git a/db/repair.cc b/db/repair.cc index 97a27c6698..c2b5a54ead 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -259,6 +259,8 @@ class Repairer { bool empty = true; ParsedInternalKey parsed; t.max_sequence = 0; + t.meta.num_entries = 0; + t.meta.num_tombstones = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { Slice key = iter->key(); if (!ParseInternalKey(key, &parsed)) { @@ -268,6 +270,10 @@ class Repairer { } counter++; + t.meta.num_entries++; + if (parsed.type == kTypeDeletion) { + t.meta.num_tombstones++; + } if (empty) { empty = false; t.meta.smallest.DecodeFrom(key); @@ -368,8 +374,8 @@ class Repairer { for (size_t i = 0; i < tables_.size(); i++) { // TODO(opt): separate out into multiple levels const TableInfo& t = tables_[i]; - edit_.AddFile(0, t.meta.number, t.meta.file_size, t.meta.smallest, - t.meta.largest); + edit_.AddFile(0, t.meta.number, t.meta.file_size, t.meta.num_tombstones, + t.meta.num_entries, t.meta.smallest, t.meta.largest); } // std::fprintf(stderr, diff --git a/db/version_edit.cc b/db/version_edit.cc index 356ce883ee..0f4fd92a9c 100644 --- a/db/version_edit.cc +++ b/db/version_edit.cc @@ -20,7 +20,8 @@ enum Tag { kDeletedFile = 6, kNewFile = 7, // 8 was used for large value refs - kPrevLogNumber = 9 + kPrevLogNumber = 9, + kNewFileWithStats = 10 // New file with tombstone statistics }; void VersionEdit::Clear() { @@ -75,12 +76,23 @@ void VersionEdit::EncodeTo(std::string* dst) const { for (size_t i = 0; i < new_files_.size(); i++) { const FileMetaData& f = new_files_[i].second; - PutVarint32(dst, kNewFile); - PutVarint32(dst, new_files_[i].first); // level - PutVarint64(dst, f.number); - PutVarint64(dst, f.file_size); - PutLengthPrefixedSlice(dst, f.smallest.Encode()); - PutLengthPrefixedSlice(dst, f.largest.Encode()); + if (f.num_entries > 0 || f.num_tombstones > 0) { + PutVarint32(dst, kNewFileWithStats); + PutVarint32(dst, new_files_[i].first); // level + PutVarint64(dst, f.number); + PutVarint64(dst, f.file_size); + PutVarint64(dst, f.num_tombstones); + PutVarint64(dst, f.num_entries); + PutLengthPrefixedSlice(dst, f.smallest.Encode()); + PutLengthPrefixedSlice(dst, f.largest.Encode()); + } else { + PutVarint32(dst, kNewFile); + PutVarint32(dst, new_files_[i].first); // level + PutVarint64(dst, f.number); + PutVarint64(dst, f.file_size); + PutLengthPrefixedSlice(dst, f.smallest.Encode()); + PutLengthPrefixedSlice(dst, f.largest.Encode()); + } } } @@ -176,6 +188,8 @@ Status VersionEdit::DecodeFrom(const Slice& src) { break; case kNewFile: + f.num_tombstones = 0; + f.num_entries = 0; if (GetLevel(&input, &level) && GetVarint64(&input, &f.number) && GetVarint64(&input, &f.file_size) && GetInternalKey(&input, &f.smallest) && @@ -186,6 +200,19 @@ Status VersionEdit::DecodeFrom(const Slice& src) { } break; + case kNewFileWithStats: + if (GetLevel(&input, &level) && GetVarint64(&input, &f.number) && + GetVarint64(&input, &f.file_size) && + GetVarint64(&input, &f.num_tombstones) && + GetVarint64(&input, &f.num_entries) && + GetInternalKey(&input, &f.smallest) && + GetInternalKey(&input, &f.largest)) { + new_files_.push_back(std::make_pair(level, f)); + } else { + msg = "new-file with stats entry"; + } + break; + default: msg = "unknown tag"; break; @@ -246,6 +273,10 @@ std::string VersionEdit::DebugString() const { AppendNumberTo(&r, f.number); r.append(" "); AppendNumberTo(&r, f.file_size); + r.append(" tombstones:"); + AppendNumberTo(&r, f.num_tombstones); + r.append(" entries:"); + AppendNumberTo(&r, f.num_entries); r.append(" "); r.append(f.smallest.DebugString()); r.append(" .. "); diff --git a/db/version_edit.h b/db/version_edit.h index 137b4b10e0..342e246f91 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -16,14 +16,25 @@ namespace leveldb { class VersionSet; struct FileMetaData { - FileMetaData() : refs(0), allowed_seeks(1 << 30), file_size(0) {} + FileMetaData() + : refs(0), + allowed_seeks(1 << 30), + file_size(0), + num_tombstones(0), + num_entries(0) {} int refs; int allowed_seeks; // Seeks allowed until compaction uint64_t number; - uint64_t file_size; // File size in bytes - InternalKey smallest; // Smallest internal key served by table - InternalKey largest; // Largest internal key served by table + uint64_t file_size; // File size in bytes + uint64_t num_tombstones; // Number of tombstone entries (kTypeDeletion) + uint64_t num_entries; // Total number of entries in the file + InternalKey smallest; // Smallest internal key served by table + InternalKey largest; // Largest internal key served by table + + double TombstoneDensity() const { + return (num_entries > 0) ? static_cast(num_tombstones) / num_entries : 0.0; + } }; class VersionEdit { @@ -70,6 +81,19 @@ class VersionEdit { new_files_.push_back(std::make_pair(level, f)); } + void AddFile(int level, uint64_t file, uint64_t file_size, + uint64_t num_tombstones, uint64_t num_entries, + const InternalKey& smallest, const InternalKey& largest) { + FileMetaData f; + f.number = file; + f.file_size = file_size; + f.num_tombstones = num_tombstones; + f.num_entries = num_entries; + f.smallest = smallest; + f.largest = largest; + new_files_.push_back(std::make_pair(level, f)); + } + // Delete the specified "file" from the specified "level". void RemoveFile(int level, uint64_t file) { deleted_files_.insert(std::make_pair(level, file)); diff --git a/db/version_set.cc b/db/version_set.cc index 4e37bf90e9..35eb39f218 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1033,6 +1033,15 @@ void VersionSet::Finalize(Version* v) { int best_level = -1; double best_score = -1; + // Reset tombstone compaction fields + v->tombstone_file_to_compact_ = nullptr; + v->tombstone_file_to_compact_level_ = -1; + + // Track the file with highest tombstone density (> 50%) + FileMetaData* highest_tombstone_file = nullptr; + int highest_tombstone_level = -1; + double highest_tombstone_density = 0.5; // Threshold: 50% + for (int level = 0; level < config::kNumLevels - 1; level++) { double score; if (level == 0) { @@ -1054,6 +1063,16 @@ void VersionSet::Finalize(Version* v) { const uint64_t level_bytes = TotalFileSize(v->files_[level]); score = static_cast(level_bytes) / MaxBytesForLevel(options_, level); + + // Check tombstone density for levels > 0 + for (FileMetaData* f : v->files_[level]) { + double density = f->TombstoneDensity(); + if (density > highest_tombstone_density) { + highest_tombstone_density = density; + highest_tombstone_file = f; + highest_tombstone_level = level; + } + } } if (score > best_score) { @@ -1064,6 +1083,12 @@ void VersionSet::Finalize(Version* v) { v->compaction_level_ = best_level; v->compaction_score_ = best_score; + + // Set tombstone compaction file if found (density > 50%) + if (highest_tombstone_file != nullptr) { + v->tombstone_file_to_compact_ = highest_tombstone_file; + v->tombstone_file_to_compact_level_ = highest_tombstone_level; + } } Status VersionSet::WriteSnapshot(log::Writer* log) { @@ -1087,7 +1112,8 @@ Status VersionSet::WriteSnapshot(log::Writer* log) { const std::vector& files = current_->files_[level]; for (size_t i = 0; i < files.size(); i++) { const FileMetaData* f = files[i]; - edit.AddFile(level, f->number, f->file_size, f->smallest, f->largest); + edit.AddFile(level, f->number, f->file_size, f->num_tombstones, + f->num_entries, f->smallest, f->largest); } } @@ -1253,11 +1279,23 @@ Compaction* VersionSet::PickCompaction() { Compaction* c; int level; - // We prefer compactions triggered by too much data in a level over - // the compactions triggered by seeks. + // Priority order: + // 1. Tombstone compaction (density > 50%) - highest priority + // 2. Size compaction (based on file size/number limits) + // 3. Seek compaction (based on seek stats) + const bool tombstone_compaction = (current_->tombstone_file_to_compact_ != nullptr); const bool size_compaction = (current_->compaction_score_ >= 1); const bool seek_compaction = (current_->file_to_compact_ != nullptr); - if (size_compaction) { + + if (tombstone_compaction) { + // Prioritize compaction for files with high tombstone density (> 50%) + level = current_->tombstone_file_to_compact_level_; + assert(level >= 0); + assert(level > 0); // Tombstone compaction only for levels > 0 + assert(level + 1 < config::kNumLevels); + c = new Compaction(options_, level); + c->inputs_[0].push_back(current_->tombstone_file_to_compact_); + } else if (size_compaction) { level = current_->compaction_level_; assert(level >= 0); assert(level + 1 < config::kNumLevels); diff --git a/db/version_set.h b/db/version_set.h index ea0c925da2..6b0a034f30 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -127,6 +127,8 @@ class Version { refs_(0), file_to_compact_(nullptr), file_to_compact_level_(-1), + tombstone_file_to_compact_(nullptr), + tombstone_file_to_compact_level_(-1), compaction_score_(-1), compaction_level_(-1) {} @@ -157,6 +159,11 @@ class Version { FileMetaData* file_to_compact_; int file_to_compact_level_; + // Next file to compact based on tombstone density. + // When tombstone density > 50%, we prioritize this file for compaction. + FileMetaData* tombstone_file_to_compact_; + int tombstone_file_to_compact_level_; + // Level that should be compacted next and its compaction score. // Score < 1 means compaction is not strictly needed. These fields // are initialized by Finalize(). @@ -251,7 +258,8 @@ class VersionSet { // Returns true iff some level needs a compaction. bool NeedsCompaction() const { Version* v = current_; - return (v->compaction_score_ >= 1) || (v->file_to_compact_ != nullptr); + return (v->compaction_score_ >= 1) || (v->file_to_compact_ != nullptr) || + (v->tombstone_file_to_compact_ != nullptr); } // Add all files listed in any live version to *live.