diff --git a/Cargo.lock b/Cargo.lock index 69791cb6aa2..20aa509fa1a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -305,7 +305,7 @@ dependencies = [ "time 0.3.20", "tokio", "url", - "uuid 1.2.1", + "uuid 1.7.0", ] [[package]] @@ -331,7 +331,7 @@ dependencies = [ "serde_json", "time 0.3.20", "url", - "uuid 1.2.1", + "uuid 1.7.0", ] [[package]] @@ -351,7 +351,7 @@ dependencies = [ "serde_json", "time 0.3.20", "url", - "uuid 1.2.1", + "uuid 1.7.0", ] [[package]] @@ -387,7 +387,7 @@ dependencies = [ "serde_json", "time 0.3.20", "url", - "uuid 1.2.1", + "uuid 1.7.0", ] [[package]] @@ -406,7 +406,7 @@ dependencies = [ "serde_json", "time 0.3.20", "url", - "uuid 1.2.1", + "uuid 1.7.0", ] [[package]] @@ -516,6 +516,7 @@ dependencies = [ "security", "slog", "slog-global", + "sst_importer", "tempdir", "tempfile", "test_pd", @@ -869,6 +870,7 @@ dependencies = [ "semver 1.0.4", "slog", "slog-global", + "sst_importer", "tempfile", "test_pd_client", "test_raftstore", @@ -1380,7 +1382,7 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" dependencies = [ - "uuid 1.2.1", + "uuid 1.7.0", ] [[package]] @@ -2838,7 +2840,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#705bb9244fd9557b45c0f4f1530ba239c782068b" +source = "git+https://github.com/overvenus/kvproto?branch=resolved-ts-compat#516bc755c6c1ad6c795feb5e569287b65e89079d" dependencies = [ "futures 0.3.15", "grpcio", @@ -3054,15 +3056,6 @@ dependencies = [ "winapi 0.3.9", ] -[[package]] -name = "memmap2" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" -dependencies = [ - "libc 0.2.151", -] - [[package]] name = "memmap2" version = "0.9.3" @@ -3159,14 +3152,13 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.5" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" +checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" dependencies = [ "libc 0.2.151", - "log", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.42.0", + "windows-sys 0.48.0", ] [[package]] @@ -3893,9 +3885,9 @@ dependencies = [ [[package]] name = "pprof" -version = "0.11.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "196ded5d4be535690899a4631cc9f18cdc41b7ebf24a79400f46f48e49a11059" +checksum = "ef5c97c51bd34c7e742402e216abdeb44d415fbe6ae41d56b114723e953711cb" dependencies = [ "backtrace", "cfg-if 1.0.0", @@ -4177,7 +4169,7 @@ dependencies = [ "libc 0.2.151", "log", "lz4-sys", - "memmap2 0.9.3", + "memmap2", "nix 0.26.2", "num-derive 0.4.0", "num-traits", @@ -4676,6 +4668,7 @@ dependencies = [ "security", "slog", "slog-global", + "sst_importer", "tempfile", "test_raftstore", "test_sst_importer", @@ -4925,9 +4918,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.16" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" [[package]] name = "rustc-hash" @@ -5307,6 +5300,7 @@ dependencies = [ "slog", "slog-global", "snap_recovery", + "sst_importer", "tempfile", "tikv", "tikv_alloc", @@ -5705,21 +5699,21 @@ dependencies = [ [[package]] name = "symbolic-common" -version = "10.1.1" +version = "12.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac457d054f793cedfde6f32d21d692b8351cfec9084fefd0470c0373f6d799bc" +checksum = "1cccfffbc6bb3bb2d3a26cd2077f4d055f6808d266f9d4d158797a4c60510dfe" dependencies = [ "debugid", - "memmap2 0.5.10", + "memmap2", "stable_deref_trait", - "uuid 1.2.1", + "uuid 1.7.0", ] [[package]] name = "symbolic-demangle" -version = "10.1.1" +version = "12.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48808b846eef84e0ac06365dc620f028ae632355e5dcffc007bf1b2bf5eab17b" +checksum = "76a99812da4020a67e76c4eb41f08c87364c14170495ff780f30dd519c221a68" dependencies = [ "rustc-demangle", "symbolic-common", @@ -5997,6 +5991,7 @@ dependencies = [ "service", "slog", "slog-global", + "sst_importer", "tempfile", "test_pd_client", "test_util", @@ -6046,6 +6041,7 @@ dependencies = [ "service", "slog", "slog-global", + "sst_importer", "tempfile", "test_pd_client", "test_raftstore", @@ -6073,8 +6069,14 @@ dependencies = [ "crc32fast", "engine_rocks", "engine_traits", + "external_storage", + "futures 0.3.15", + "grpcio", "keys", "kvproto", + "tempfile", + "tikv_util", + "txn_types", "uuid 0.8.2", ] @@ -6846,7 +6848,7 @@ dependencies = [ "autocfg", "bytes", "libc 0.2.151", - "mio 0.8.5", + "mio 0.8.11", "num_cpus", "parking_lot 0.12.1", "pin-project-lite", @@ -7175,9 +7177,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.2.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "feb41e78f93363bb2df8b0e86a2ca30eed7806ea16ea0c790d757cf93f79be83" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" dependencies = [ "getrandom 0.2.11", ] diff --git a/Cargo.toml b/Cargo.toml index d1f60059580..87c90e82965 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -144,7 +144,7 @@ paste = "1.0" pd_client = { workspace = true } pin-project = "1.0" pnet_datalink = "0.23" -pprof = { version = "0.11", default-features = false, features = [ +pprof = { version = "0.13", default-features = false, features = [ "flamegraph", "protobuf-codec", ] } @@ -249,6 +249,8 @@ procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "7693954bd1dd86e # After the PR to kvproto is merged, remember to comment this out and run `cargo update -p kvproto`. # [patch.'https://github.com/pingcap/kvproto'] # kvproto = { git = "https://github.com/your_github_id/kvproto", branch = "your_branch" } +[patch.'https://github.com/pingcap/kvproto'] +kvproto = { git = "https://github.com/overvenus/kvproto", branch = "resolved-ts-compat" } # # After the PR to rust-rocksdb is merged, remember to comment this out and run `cargo update -p rocksdb`. # [patch.'https://github.com/tikv/rust-rocksdb'] diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index d37ba3cacb6..005e4d1a1a1 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -64,6 +64,7 @@ resolved_ts = { workspace = true } security = { path = "../security" } slog = { workspace = true } slog-global = { workspace = true } +sst_importer = { workspace = true } thiserror = "1" tidb_query_datatype = { workspace = true } tikv = { workspace = true } diff --git a/components/cdc/Cargo.toml b/components/cdc/Cargo.toml index eb9de9d4e5d..7248e471afd 100644 --- a/components/cdc/Cargo.toml +++ b/components/cdc/Cargo.toml @@ -58,6 +58,7 @@ security = { workspace = true } semver = "1.0" slog = { workspace = true } slog-global = { workspace = true } +sst_importer = { workspace = true } thiserror = "1.0" tikv = { workspace = true } tikv_kv = { workspace = true } diff --git a/components/error_code/src/sst_importer.rs b/components/error_code/src/sst_importer.rs index 9e568ee00c1..aac2358d86c 100644 --- a/components/error_code/src/sst_importer.rs +++ b/components/error_code/src/sst_importer.rs @@ -22,10 +22,12 @@ define_error_codes!( TTL_LEN_NOT_EQUALS_TO_PAIRS => ("TtlLenNotEqualsToPairs", "", ""), INCOMPATIBLE_API_VERSION => ("IncompatibleApiVersion", "", ""), INVALID_KEY_MODE => ("InvalidKeyMode", "", ""), - RESOURCE_NOT_ENOUTH => ("ResourceNotEnough", "", ""), + RESOURCE_NOT_ENOUGH => ("ResourceNotEnough", "", ""), SUSPENDED => ("Suspended", "this request has been suspended.", "Probably there are some export tools don't support exporting data inserted by `ingest`(say, snapshot backup). Check the user manual and stop them."), REQUEST_TOO_NEW => ("RequestTooNew", "", ""), - REQUEST_TOO_OLD => ("RequestTooOld", "", "") + REQUEST_TOO_OLD => ("RequestTooOld", "", ""), + LEASE_EXPIRED => ("LeaseExpired", "", ""), + INVALID_LEASE => ("InvalidLease", "", "") ); diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index df82f1abfe9..8165b9e2f28 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -42,16 +42,28 @@ pub mod test_util { use kvproto::{kvrpcpb::ApiVersion, metapb::RegionEpoch, raft_cmdpb::RaftRequestHeader}; use raft::prelude::{Entry, EntryType}; use raftstore::store::simple_write::SimpleWriteEncoder; - use sst_importer::SstImporter; + use sst_importer::{IngestMediator, IngestObserver, Mediator, SstImporter}; use tempfile::TempDir; use super::{CatchUpLogs, SimpleWriteReqEncoder}; use crate::{fsm::ApplyResReporter, router::ApplyRes}; pub fn create_tmp_importer() -> (TempDir, Arc>) { + let mut ingest_mediator = IngestMediator::default(); + let ingest_observer = Arc::new(IngestObserver::default()); + ingest_mediator.register(ingest_observer.clone()); let dir = TempDir::new().unwrap(); let importer = Arc::new( - SstImporter::new(&Default::default(), dir.path(), None, ApiVersion::V1, true).unwrap(), + SstImporter::new( + &Default::default(), + dir.path(), + None, + ApiVersion::V1, + true, + Arc::new(ingest_mediator), + ingest_observer, + ) + .unwrap(), ); (dir, importer) } diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 1a52e86f098..13cdf8b78d9 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -48,7 +48,7 @@ use resource_control::{ResourceController, ResourceGroupManager}; use resource_metering::CollectorRegHandle; use service::service_manager::GrpcServiceManager; use slog::{debug, o, Logger}; -use sst_importer::SstImporter; +use sst_importer::{IngestMediator, IngestObserver, Mediator, SstImporter}; use tempfile::TempDir; use test_pd::mocker::Service; use tikv_util::{ @@ -326,6 +326,9 @@ impl RunningState { .unwrap(); let coprocessor_host = CoprocessorHost::new(router.store_router().clone(), cop_cfg.value().clone()); + let mut ingest_mediator = IngestMediator::default(); + let ingest_observer = Arc::new(IngestObserver::default()); + ingest_mediator.register(ingest_observer.clone()); let importer = Arc::new( SstImporter::new( &Default::default(), @@ -333,6 +336,8 @@ impl RunningState { key_manager.clone(), ApiVersion::V1, true, + Arc::new(ingest_mediator), + ingest_observer, ) .unwrap(), ); diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 2332e8844f0..8f5b4fdf25d 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -5037,7 +5037,7 @@ mod tests { }; use protobuf::Message; use raft::eraftpb::{ConfChange, ConfChangeV2}; - use sst_importer::Config as ImportConfig; + use sst_importer::{Config as ImportConfig, IngestMediator, IngestObserver, Mediator}; use tempfile::{Builder, TempDir}; use test_sst_importer::*; use tikv_util::{ @@ -5074,6 +5074,9 @@ mod tests { } pub fn create_tmp_importer(path: &str) -> (TempDir, Arc>) { + let mut ingest_mediator = IngestMediator::default(); + let ingest_observer = Arc::new(IngestObserver::default()); + ingest_mediator.register(ingest_observer.clone()); let dir = Builder::new().prefix(path).tempdir().unwrap(); let importer = Arc::new( SstImporter::new( @@ -5082,6 +5085,8 @@ mod tests { None, ApiVersion::V1, false, + Arc::new(ingest_mediator), + ingest_observer, ) .unwrap(), ); diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 92618efbd47..397dd33f8d4 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -36,6 +36,7 @@ use futures::{compat::Future01CompatExt, FutureExt}; use health_controller::{types::LatencyInspector, HealthController}; use keys::{self, data_end_key, data_key, enc_end_key, enc_start_key}; use kvproto::{ + import_sstpb::SstMeta, metapb::{self, Region, RegionEpoch}, pdpb::{self, QueryStats, StoreStats}, raft_cmdpb::{AdminCmdType, AdminRequest}, @@ -3024,6 +3025,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER // we will remove 1-week old version 1 SST files. const VERSION_1_SST_CLEANUP_DURATION: Duration = Duration::from_secs(7 * 24 * 60 * 60); +const WARN_LINGER_SST_DURATION: Duration = Duration::from_secs(60); impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER, T> { fn on_cleanup_import_sst(&mut self) -> Result<()> { @@ -3033,6 +3035,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER if ssts.is_empty() { return Ok(()); } + let mut longest_linger_sst: Option<(SstMeta, Duration)> = None; + let mut linger_sst_count = 0; let now = SystemTime::now(); { let meta = self.ctx.store_meta.lock().unwrap(); @@ -3042,6 +3046,15 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER if util::is_epoch_stale(sst.0.get_region_epoch(), region_epoch) { // If the SST epoch is stale, it will not be ingested anymore. delete_ssts.push(sst.0); + } else if let Ok(duration) = now.duration_since(sst.2) { + if duration > WARN_LINGER_SST_DURATION { + linger_sst_count += 1; + if longest_linger_sst.is_none() + || duration > longest_linger_sst.as_ref().unwrap().1 + { + longest_linger_sst = Some((sst.0.clone(), duration)); + } + } } } else if sst.1 >= sst_importer::API_VERSION_2 { // The write RPC of import sst service have make sure the region do exist at @@ -3066,6 +3079,14 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } } + if let Some((sst, duration)) = longest_linger_sst { + warn!( + "found lingering import SST file"; + "linger_sst_count" => linger_sst_count, + "longest_linger_sst_meta" => ?sst, + "longest_linger_duration" => ?duration, + ); + } if !delete_ssts.is_empty() { let task = CleanupSstTask::DeleteSst { ssts: delete_ssts }; diff --git a/components/resolved_ts/Cargo.toml b/components/resolved_ts/Cargo.toml index 8bcca29480d..0ed706ecb7c 100644 --- a/components/resolved_ts/Cargo.toml +++ b/components/resolved_ts/Cargo.toml @@ -6,6 +6,7 @@ publish = false license = "Apache-2.0" [features] +default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] tcmalloc = ["tikv/tcmalloc"] jemalloc = ["tikv/jemalloc"] mimalloc = ["tikv/mimalloc"] @@ -44,6 +45,7 @@ raftstore = { workspace = true } security = { workspace = true } slog = { workspace = true } slog-global = { workspace = true } +sst_importer = { workspace = true } thiserror = "1.0" tikv = { workspace = true } tikv_util = { workspace = true } diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index 856d042a75d..072b40c77d7 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -30,6 +30,7 @@ use raftstore::{ store::{msg::Callback, util::RegionReadProgressRegistry}, }; use security::SecurityManager; +use sst_importer::Observer; use tikv_util::{ info, sys::thread::ThreadBuildWrapper, @@ -49,11 +50,12 @@ pub(crate) const DEFAULT_CHECK_LEADER_TIMEOUT_DURATION: Duration = Duration::fro const DEFAULT_GRPC_GZIP_COMPRESSION_LEVEL: usize = 2; const DEFAULT_GRPC_MIN_MESSAGE_SIZE_TO_COMPRESS: usize = 4096; -pub struct AdvanceTsWorker { +pub(crate) struct AdvanceTsWorker { pd_client: Arc, timer: SteadyTimer, worker: Runtime, scheduler: Scheduler, + ingest_observer: Option>, /// The concurrency manager for transactions. It's needed for CDC to check /// locks when calculating resolved_ts. pub(crate) concurrency_manager: ConcurrencyManager, @@ -67,6 +69,7 @@ impl AdvanceTsWorker { pd_client: Arc, scheduler: Scheduler, concurrency_manager: ConcurrencyManager, + ingest_observer: Option>, ) -> Self { let worker = Builder::new_multi_thread() .thread_name("advance-ts") @@ -80,6 +83,7 @@ impl AdvanceTsWorker { pd_client, worker, timer: SteadyTimer::default(), + ingest_observer, concurrency_manager, last_pd_tso: Arc::new(std::sync::Mutex::new(None)), } @@ -95,6 +99,7 @@ impl AdvanceTsWorker { advance_ts_interval: Duration, advance_notify: Arc, ) { + let ingest_observer = self.ingest_observer.clone(); let cm = self.concurrency_manager.clone(); let pd_client = self.pd_client.clone(); let scheduler = self.scheduler.clone(); @@ -125,7 +130,23 @@ impl AdvanceTsWorker { } } - let regions = leader_resolver.resolve(regions, min_ts).await; + let mut regions = leader_resolver.resolve(regions, min_ts).await; + + // Skip regions those are currently ingesting SSTs. + if let Some(observer) = ingest_observer { + regions.retain(|region_id| { + if let Some(uuid) = observer.get_region_lease(*region_id) { + info!("skip advancing resolved ts due to ingest sst"; + "region_id" => region_id, + "lease_uuid" => ?uuid, + ); + false + } else { + true + } + }); + } + if !regions.is_empty() { if let Err(e) = scheduler.schedule(Task::ResolvedTsAdvanced { regions, diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index dfbafc0531f..38b9ce4436e 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -27,6 +27,7 @@ use raftstore::{ }, }; use security::SecurityManager; +use sst_importer::Observer; use tikv::config::ResolvedTsConfig; use tikv_util::{ memory::{HeapSize, MemoryQuota}, @@ -655,13 +656,18 @@ where concurrency_manager: ConcurrencyManager, env: Arc, security_mgr: Arc, + ingest_observer: Option>, ) -> Self { let (region_read_progress, store_id) = { let meta = store_meta.lock().unwrap(); (meta.region_read_progress().clone(), meta.store_id()) }; - let advance_worker = - AdvanceTsWorker::new(pd_client.clone(), scheduler.clone(), concurrency_manager); + let advance_worker = AdvanceTsWorker::new( + pd_client.clone(), + scheduler.clone(), + concurrency_manager, + ingest_observer, + ); let scanner_pool = ScannerPool::new(cfg.scan_lock_pool_size, cdc_handle); let store_resolver_gc_interval = Duration::from_secs(60); let leader_resolver = LeadershipResolver::new( diff --git a/components/resolved_ts/tests/integrations/mod.rs b/components/resolved_ts/tests/integrations/mod.rs index 881d0b299f1..d3ca592f9a3 100644 --- a/components/resolved_ts/tests/integrations/mod.rs +++ b/components/resolved_ts/tests/integrations/mod.rs @@ -1,7 +1,9 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. +mod test_sst_lease; #[path = "../mod.rs"] mod testsuite; + use std::{sync::mpsc::channel, time::Duration}; use futures::executor::block_on; @@ -69,10 +71,15 @@ fn test_resolved_ts_basic() { meta.set_region_id(r1.id); meta.set_region_epoch(sst_epoch); - suite.upload_sst(r1.id, &meta, &data).unwrap(); + let import = suite.get_import_client(r1.id); + must_acquire_sst_lease(import, &meta, Duration::MAX); + let resp = send_upload_sst(import, &meta, &data).unwrap(); + assert!(!resp.has_error(), "{:?}", resp); let tracked_index_before = suite.region_tracked_index(r1.id); - suite.must_ingest_sst(r1.id, meta); + let ctx = suite.get_context(r1.id); + let import = suite.get_import_client(r1.id); + must_ingest_sst(import, ctx, meta); let mut tracked_index_after = suite.region_tracked_index(r1.id); for _ in 0..10 { if tracked_index_after > tracked_index_before { diff --git a/components/resolved_ts/tests/integrations/test_sst_lease.rs b/components/resolved_ts/tests/integrations/test_sst_lease.rs new file mode 100644 index 00000000000..13158984faf --- /dev/null +++ b/components/resolved_ts/tests/integrations/test_sst_lease.rs @@ -0,0 +1,93 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::time::Duration; + +use futures::executor::block_on; +use pd_client::PdClient; +use tempfile::Builder; +use test_raftstore::sleep_ms; +use test_sst_importer::*; + +use super::testsuite::*; + +#[test] +fn test_lease_block_resolved_ts() { + let mut suite = TestSuite::new(1); + let region = suite.cluster.get_region(&[]); + + let initial_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + + // The resolved-ts must advance. + suite.must_get_rts_ge(region.id, initial_ts); + + let temp_dir = Builder::new().tempdir().unwrap(); + let sst_path = temp_dir.path().join("test.sst"); + let sst_range = (0, 100); + let (mut meta, data) = gen_sst_file(sst_path, sst_range); + meta.set_region_id(region.id); + meta.set_region_epoch(region.get_region_epoch().clone()); + // The resolved-ts won't be updated from acquiring sst lease till ingest sst. + let import = suite.get_import_client(region.id); + must_acquire_sst_lease(import, &meta, Duration::MAX); + // The resolved-ts must be less than the latest ts. + let sst_commit_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let blocked_rts = suite.region_resolved_ts(region.id).unwrap(); + assert!( + sst_commit_ts > blocked_rts, + "{:?}", + (sst_commit_ts, blocked_rts) + ); + + let import = suite.get_import_client(region.id); + let resp = send_upload_sst(import, &meta, &data).unwrap(); + assert!(!resp.has_error(), "{:?}", resp); + // The resolved-ts must be blocked. + suite.must_get_rts(region.id, blocked_rts); + + let ctx = suite.get_context(region.id); + let import = suite.get_import_client(region.id); + must_ingest_sst(import, ctx, meta); + + // The resolved-ts must advance after ingest sst. + suite.must_get_rts_ge(region.id, blocked_rts); + + suite.stop(); +} + +#[test] +fn test_lease_release_unblock_resolved_ts() { + let mut suite = TestSuite::new(1); + let region = suite.cluster.get_region(&[]); + + let initial_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + + // The resolved-ts must advance. + suite.must_get_rts_ge(region.id, initial_ts); + + let temp_dir = Builder::new().tempdir().unwrap(); + let sst_path = temp_dir.path().join("test.sst"); + let sst_range = (0, 100); + let (mut meta, _) = gen_sst_file(sst_path, sst_range); + meta.set_region_id(region.id); + meta.set_region_epoch(region.get_region_epoch().clone()); + // The resolved-ts won't be updated from acquiring sst lease till ingest sst. + let import = suite.get_import_client(region.id); + must_acquire_sst_lease(import, &meta, Duration::MAX); + let blocked_rts1 = suite.region_resolved_ts(region.id).unwrap(); + sleep_ms(100); + // The resolved-ts must be blocked within lease. + suite.must_get_rts(region.id, blocked_rts1); + // Until we explicitly release the lease. + let import = suite.get_import_client(region.id); + must_release_sst_lease(import, &meta); + suite.must_get_rts_ge(region.id, blocked_rts1); + + // Block resolved ts until lease expires. + let import = suite.get_import_client(region.id); + must_acquire_sst_lease(import, &meta, Duration::from_millis(100)); + let blocked_rts2 = suite.region_resolved_ts(region.id).unwrap(); + sleep_ms(200); + suite.must_get_rts_ge(region.id, blocked_rts2); + + suite.stop(); +} diff --git a/components/resolved_ts/tests/mod.rs b/components/resolved_ts/tests/mod.rs index fc3d5720929..5547fef461a 100644 --- a/components/resolved_ts/tests/mod.rs +++ b/components/resolved_ts/tests/mod.rs @@ -3,23 +3,15 @@ use std::{sync::*, time::Duration}; use collections::HashMap; -use concurrency_manager::ConcurrencyManager; use engine_rocks::RocksEngine; -use futures::{executor::block_on, stream, SinkExt}; -use grpcio::{ChannelBuilder, ClientUnaryReceiver, Environment, Result, WriteFlags}; -use kvproto::{ - import_sstpb::{IngestRequest, SstMeta, UploadRequest, UploadResponse}, - import_sstpb_grpc::ImportSstClient, - kvrpcpb::{PrewriteRequestPessimisticAction::*, *}, - tikvpb::TikvClient, -}; +use grpcio::{ChannelBuilder, Environment}; +use kvproto::{import_sstpb_grpc::ImportSstClient, kvrpcpb::*, tikvpb::TikvClient}; use online_config::ConfigValue; -use raftstore::{coprocessor::CoprocessorHost, router::CdcRaftRouter}; -use resolved_ts::{Observer, Task}; +use resolved_ts::Task; use test_raftstore::*; -use tikv::config::ResolvedTsConfig; -use tikv_util::{worker::LazyWorker, HandyRwLock}; +use tikv_util::{config::ReadableDuration, HandyRwLock}; use txn_types::TimeStamp; + static INIT: Once = Once::new(); pub fn init() { @@ -28,11 +20,8 @@ pub fn init() { pub struct TestSuite { pub cluster: Cluster>, - pub endpoints: HashMap>, - pub obs: HashMap, tikv_cli: HashMap, import_cli: HashMap, - concurrency_managers: HashMap, env: Arc, } @@ -42,68 +31,14 @@ impl TestSuite { let mut cluster = new_server_cluster(1, count); // Increase the Raft tick interval to make this test case running reliably. configure_for_lease_read(&mut cluster.cfg, Some(100), None); - Self::with_cluster(count, cluster) - } - - pub fn with_cluster( - count: usize, - mut cluster: Cluster>, - ) -> Self { - init(); - let pd_cli = cluster.pd_client.clone(); - let mut endpoints = HashMap::default(); - let mut obs = HashMap::default(); - let mut concurrency_managers = HashMap::default(); - // Hack! node id are generated from 1..count+1. - for id in 1..=count as u64 { - // Create and run cdc endpoints. - let worker = LazyWorker::new(format!("cdc-{}", id)); - let mut sim = cluster.sim.wl(); - - // Register cdc service to gRPC server. - let scheduler = worker.scheduler(); - let rts_ob = resolved_ts::Observer::new(scheduler.clone()); - obs.insert(id, rts_ob.clone()); - sim.coprocessor_hooks.entry(id).or_default().push(Box::new( - move |host: &mut CoprocessorHost<_>| { - // Migrated to 2021 migration. This let statement is probably not needed, see - // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html - let _ = &rts_ob; - rts_ob.register_to(host); - }, - )); - endpoints.insert(id, worker); - } + // Start resolved ts endpoint. + cluster.cfg.resolved_ts.enable = true; + cluster.cfg.resolved_ts.advance_ts_interval = ReadableDuration::millis(10); cluster.run(); - for (id, worker) in &mut endpoints { - let sim = cluster.sim.wl(); - let raft_router = sim.get_server_router(*id); - let cm = sim.get_concurrency_manager(*id); - let env = Arc::new(Environment::new(1)); - let cfg = ResolvedTsConfig { - advance_ts_interval: tikv_util::config::ReadableDuration(Duration::from_millis(10)), - ..Default::default() - }; - let rts_endpoint = resolved_ts::Endpoint::new( - &cfg, - worker.scheduler(), - CdcRaftRouter(raft_router), - cluster.store_metas[id].clone(), - pd_cli.clone(), - cm.clone(), - env, - sim.security_mgr.clone(), - ); - concurrency_managers.insert(*id, cm); - worker.start(rts_endpoint); - } TestSuite { cluster, - endpoints, - obs, - concurrency_managers, env: Arc::new(Environment::new(1)), tikv_cli: HashMap::default(), import_cli: HashMap::default(), @@ -111,9 +46,6 @@ impl TestSuite { } pub fn stop(mut self) { - for (_, mut worker) in self.endpoints { - worker.stop(); - } self.cluster.shutdown(); } @@ -139,7 +71,13 @@ impl TestSuite { } pub fn must_schedule_task(&self, store_id: u64, task: Task) { - let scheduler = self.endpoints.get(&store_id).unwrap().scheduler(); + let scheduler = self + .cluster + .sim + .read() + .unwrap() + .get_resolved_ts_scheduler(store_id) + .unwrap(); scheduler.schedule(task).unwrap(); } @@ -222,112 +160,6 @@ impl TestSuite { ); } - pub fn must_check_txn_status( - &mut self, - region_id: u64, - primary_key: Vec, - lock_ts: TimeStamp, - caller_start_ts: TimeStamp, - current_ts: TimeStamp, - rollback_if_not_exist: bool, - ) -> Action { - let mut req = CheckTxnStatusRequest::default(); - req.set_context(self.get_context(region_id)); - req.set_primary_key(primary_key); - req.set_lock_ts(lock_ts.into_inner()); - req.set_caller_start_ts(caller_start_ts.into_inner()); - req.set_current_ts(current_ts.into_inner()); - req.set_rollback_if_not_exist(rollback_if_not_exist); - let resp = self - .get_tikv_client(region_id) - .kv_check_txn_status(&req) - .unwrap(); - assert!(!resp.has_region_error(), "{:?}", resp.get_region_error()); - assert!(!resp.has_error(), "{:?}", resp.get_error()); - resp.get_action() - } - - pub fn must_acquire_pessimistic_lock( - &mut self, - region_id: u64, - muts: Vec, - pk: Vec, - start_ts: TimeStamp, - for_update_ts: TimeStamp, - ) { - let mut lock_req = PessimisticLockRequest::default(); - lock_req.set_context(self.get_context(region_id)); - lock_req.set_mutations(muts.into_iter().collect()); - lock_req.start_version = start_ts.into_inner(); - lock_req.for_update_ts = for_update_ts.into_inner(); - lock_req.primary_lock = pk; - let lock_resp = self - .get_tikv_client(region_id) - .kv_pessimistic_lock(&lock_req) - .unwrap(); - assert!( - !lock_resp.has_region_error(), - "{:?}", - lock_resp.get_region_error() - ); - assert!( - lock_resp.get_errors().is_empty(), - "{:?}", - lock_resp.get_errors() - ); - } - - pub fn must_kv_pessimistic_prewrite( - &mut self, - region_id: u64, - muts: Vec, - pk: Vec, - ts: TimeStamp, - for_update_ts: TimeStamp, - ) { - let mut prewrite_req = PrewriteRequest::default(); - prewrite_req.set_context(self.get_context(region_id)); - prewrite_req.set_mutations(muts.into_iter().collect()); - prewrite_req.primary_lock = pk; - prewrite_req.start_version = ts.into_inner(); - prewrite_req.lock_ttl = prewrite_req.start_version + 1; - prewrite_req.for_update_ts = for_update_ts.into_inner(); - prewrite_req - .mut_pessimistic_actions() - .push(DoPessimisticCheck); - let prewrite_resp = self - .get_tikv_client(region_id) - .kv_prewrite(&prewrite_req) - .unwrap(); - assert!( - !prewrite_resp.has_region_error(), - "{:?}", - prewrite_resp.get_region_error() - ); - assert!( - prewrite_resp.errors.is_empty(), - "{:?}", - prewrite_resp.get_errors() - ); - } - - pub fn async_kv_commit( - &mut self, - region_id: u64, - keys: Vec>, - start_ts: TimeStamp, - commit_ts: TimeStamp, - ) -> ClientUnaryReceiver { - let mut commit_req = CommitRequest::default(); - commit_req.set_context(self.get_context(region_id)); - commit_req.start_version = start_ts.into_inner(); - commit_req.set_keys(keys.into_iter().collect()); - commit_req.commit_version = commit_ts.into_inner(); - self.get_tikv_client(region_id) - .kv_commit_async(&commit_req) - .unwrap() - } - pub fn get_context(&mut self, region_id: u64) -> Context { let epoch = self.cluster.get_region_epoch(region_id); let leader = self.cluster.leader_of_region(region_id).unwrap(); @@ -364,14 +196,6 @@ impl TestSuite { }) } - pub fn get_txn_concurrency_manager(&self, store_id: u64) -> Option { - self.concurrency_managers.get(&store_id).cloned() - } - - pub fn set_tso(&self, ts: impl Into) { - self.cluster.pd_client.set_tso(ts.into()); - } - pub fn region_resolved_ts(&mut self, region_id: u64) -> Option { let leader = self.cluster.leader_of_region(region_id)?; let meta = self.cluster.store_metas[&leader.store_id].lock().unwrap(); @@ -420,45 +244,4 @@ impl TestSuite { } panic!("fail to get greater ts after 50 trys"); } - - pub fn upload_sst( - &mut self, - region_id: u64, - meta: &SstMeta, - data: &[u8], - ) -> Result { - let import = self.get_import_client(region_id); - let mut r1 = UploadRequest::default(); - r1.set_meta(meta.clone()); - let mut r2 = UploadRequest::default(); - r2.set_data(data.to_vec()); - let reqs: Vec<_> = vec![r1, r2] - .into_iter() - .map(|r| Result::Ok((r, WriteFlags::default()))) - .collect(); - let (mut tx, rx) = import.upload().unwrap(); - let mut stream = stream::iter(reqs); - block_on(async move { - tx.send_all(&mut stream).await?; - tx.close().await?; - rx.await - }) - } - - pub fn must_ingest_sst(&mut self, region_id: u64, meta: SstMeta) { - let mut ingest_request = IngestRequest::default(); - ingest_request.set_context(self.get_context(region_id)); - ingest_request.set_sst(meta); - - let ingest_sst_resp = self - .get_import_client(region_id) - .ingest(&ingest_request) - .unwrap(); - - assert!( - !ingest_sst_resp.has_error(), - "{:?}", - ingest_sst_resp.get_error() - ); - } } diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index 64476107adf..e1babe70f69 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -81,6 +81,7 @@ service = { workspace = true } slog = { workspace = true } slog-global = { workspace = true } snap_recovery = { workspace = true } +sst_importer = { workspace = true } tempfile = "3.0" tikv = { workspace = true } tikv_alloc = { workspace = true } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 09f4ac3449a..fcc9d97cf28 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -79,6 +79,7 @@ use resource_control::ResourceGroupManager; use security::SecurityManager; use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; use snap_recovery::RecoveryService; +use sst_importer::{IngestMediator, IngestObserver, Mediator}; use tikv::{ config::{ ConfigController, DbConfigManger, DbType, LogConfigManager, MemoryConfigManager, TikvConfig, @@ -894,6 +895,8 @@ where .unwrap() .region_read_progress .clone(); + // TODO: backup_stream may need to write its own observer so that + // its resolved ts does not advance until ingested ssts are backed up. let leadership_resolver = LeadershipResolver::new( node.id(), self.pd_client.clone(), @@ -925,37 +928,6 @@ where None }; - let import_path = self.core.store_path.join("import"); - let mut importer = SstImporter::new( - &self.core.config.import, - import_path, - self.core.encryption_key_manager.clone(), - self.core.config.storage.api_version(), - false, - ) - .unwrap(); - for (cf_name, compression_type) in &[ - ( - CF_DEFAULT, - self.core - .config - .rocksdb - .defaultcf - .bottommost_level_compression, - ), - ( - CF_WRITE, - self.core - .config - .rocksdb - .writecf - .bottommost_level_compression, - ), - ] { - importer.set_compression_type(cf_name, from_rocks_compression_type(*compression_type)); - } - let importer = Arc::new(importer); - let split_check_runner = SplitCheckRunner::new( engines.engines.kv.clone(), self.router.clone(), @@ -1000,39 +972,6 @@ where .registry .register_consistency_check_observer(100, observer); - node.start( - engines.engines.clone(), - server.transport(), - snap_mgr, - pd_worker, - engines.store_meta.clone(), - self.coprocessor_host.clone().unwrap(), - importer.clone(), - split_check_scheduler, - auto_split_controller, - self.concurrency_manager.clone(), - collector_reg_handle, - self.causal_ts_provider.clone(), - self.grpc_service_mgr.clone(), - safe_point.clone(), - ) - .unwrap_or_else(|e| fatal!("failed to start node: {}", e)); - - // Start auto gc. Must after `Node::start` because `node_id` is initialized - // there. - assert!(node.id() > 0); // Node id should never be 0. - let auto_gc_config = AutoGcConfig::new( - self.pd_client.clone(), - self.region_info_accessor.clone(), - node.id(), - ); - gc_worker - .start(node.id()) - .unwrap_or_else(|e| fatal!("failed to start gc worker: {}", e)); - if let Err(e) = gc_worker.start_auto_gc(auto_gc_config, safe_point) { - fatal!("failed to start auto_gc on storage, error: {}", e); - } - initial_metric(&self.core.config.metric); if self.core.config.storage.enable_ttl { ttl_checker.start_with_timer(TtlChecker::new( @@ -1069,6 +1008,8 @@ where // Start resolved ts if let Some(mut rts_worker) = rts_worker { + // Do not let sst ingest block resolved ts in production. + let ingest_observer = None; let rts_endpoint = resolved_ts::Endpoint::new( &self.core.config.resolved_ts, rts_worker.scheduler(), @@ -1078,12 +1019,82 @@ where self.concurrency_manager.clone(), server.env(), self.security_mgr.clone(), + ingest_observer, ); self.resolved_ts_scheduler = Some(rts_worker.scheduler()); rts_worker.start_with_timer(rts_endpoint); self.core.to_stop.push(rts_worker); } + // Start SST importer. + let ingest_observer = Arc::new(IngestObserver::default()); + let mut ingest_mediator = IngestMediator::default(); + ingest_mediator.register(ingest_observer.clone()); + let import_path = self.core.store_path.join("import"); + let mut importer = SstImporter::new( + &self.core.config.import, + import_path, + self.core.encryption_key_manager.clone(), + self.core.config.storage.api_version(), + false, + Arc::new(ingest_mediator), + ingest_observer, + ) + .unwrap(); + for (cf_name, compression_type) in &[ + ( + CF_DEFAULT, + self.core + .config + .rocksdb + .defaultcf + .bottommost_level_compression, + ), + ( + CF_WRITE, + self.core + .config + .rocksdb + .writecf + .bottommost_level_compression, + ), + ] { + importer.set_compression_type(cf_name, from_rocks_compression_type(*compression_type)); + } + let importer = Arc::new(importer); + + node.start( + engines.engines.clone(), + server.transport(), + snap_mgr, + pd_worker, + engines.store_meta.clone(), + self.coprocessor_host.clone().unwrap(), + importer.clone(), + split_check_scheduler, + auto_split_controller, + self.concurrency_manager.clone(), + collector_reg_handle, + self.causal_ts_provider.clone(), + self.grpc_service_mgr.clone(), + safe_point.clone(), + ) + .unwrap_or_else(|e| fatal!("failed to start node: {}", e)); + + // Start auto gc. Must after `Node::start` because `node_id` is initialized + // there. + assert!(node.id() > 0); // Node id should never be 0. + let auto_gc_config = AutoGcConfig::new( + self.pd_client.clone(), + self.region_info_accessor.clone(), + node.id(), + ); + gc_worker + .start(node.id()) + .unwrap_or_else(|e| fatal!("failed to start gc worker: {}", e)); + if let Err(e) = gc_worker.start_auto_gc(auto_gc_config, safe_point) { + fatal!("failed to start auto_gc on storage, error: {}", e); + } cfg_controller.register( tikv::config::Module::Raftstore, Box::new(RaftstoreConfigManager::new( diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 750e73b0e5b..d03c440a533 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -71,6 +71,7 @@ use resolved_ts::Task; use resource_control::ResourceGroupManager; use security::SecurityManager; use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; +use sst_importer::{IngestMediator, IngestObserver, Mediator}; use tikv::{ config::{ loop_registry, ConfigController, ConfigurableDb, DbConfigManger, DbType, LogConfigManager, @@ -680,6 +681,8 @@ where rts_worker.scheduler(), )), ); + // Do not let sst ingest block resolved ts in production. + let ingest_observer = None; let rts_endpoint = resolved_ts::Endpoint::new( &self.core.config.resolved_ts, rts_worker.scheduler(), @@ -689,6 +692,7 @@ where self.concurrency_manager.clone(), self.env.clone(), self.security_mgr.clone(), + ingest_observer, ); self.resolved_ts_scheduler = Some(rts_worker.scheduler()); rts_worker.start_with_timer(rts_endpoint); @@ -735,6 +739,43 @@ where None }; + // Start SST importer. + let ingest_observer = Arc::new(IngestObserver::default()); + let mut ingest_mediator = IngestMediator::default(); + ingest_mediator.register(ingest_observer.clone()); + let import_path = self.core.store_path.join("import"); + let mut importer = SstImporter::new( + &self.core.config.import, + import_path, + self.core.encryption_key_manager.clone(), + self.core.config.storage.api_version(), + true, + Arc::new(ingest_mediator), + ingest_observer, + ) + .unwrap(); + for (cf_name, compression_type) in &[ + ( + CF_DEFAULT, + self.core + .config + .rocksdb + .defaultcf + .bottommost_level_compression, + ), + ( + CF_WRITE, + self.core + .config + .rocksdb + .writecf + .bottommost_level_compression, + ), + ] { + importer.set_compression_type(cf_name, from_rocks_compression_type(*compression_type)); + } + let importer = Arc::new(importer); + let server_config = Arc::new(VersionTrack::new(self.core.config.server.clone())); self.core.config.raft_store.optimize_for(true); @@ -789,37 +830,6 @@ where )), ); - let import_path = self.core.store_path.join("import"); - let mut importer = SstImporter::new( - &self.core.config.import, - import_path, - self.core.encryption_key_manager.clone(), - self.core.config.storage.api_version(), - true, - ) - .unwrap(); - for (cf_name, compression_type) in &[ - ( - CF_DEFAULT, - self.core - .config - .rocksdb - .defaultcf - .bottommost_level_compression, - ), - ( - CF_WRITE, - self.core - .config - .rocksdb - .writecf - .bottommost_level_compression, - ), - ] { - importer.set_compression_type(cf_name, from_rocks_compression_type(*compression_type)); - } - let importer = Arc::new(importer); - // V2 starts split-check worker within raftstore. let split_config_manager = diff --git a/components/sst_importer/src/errors.rs b/components/sst_importer/src/errors.rs index e5e235e9761..73ddbccda97 100644 --- a/components/sst_importer/src/errors.rs +++ b/components/sst_importer/src/errors.rs @@ -33,6 +33,8 @@ pub fn error_inc(type_: &str, err: &Error) { Error::Encryption(..) => "encryption", Error::CodecError(..) => "codec", Error::Suspended { .. } => "suspended", + Error::LeaseExpired => "lease_expired", + Error::InvalidLease => "invalid_lease", _ => return, }; IMPORTER_ERROR_VEC.with_label_values(&[type_, label]).inc(); @@ -136,6 +138,12 @@ pub enum Error { #[error("imports are suspended for {time_to_lease_expire:?}")] Suspended { time_to_lease_expire: Duration }, + + #[error("lease has expired")] + LeaseExpired, + + #[error("invalid lease")] + InvalidLease, } impl Error { @@ -218,10 +226,12 @@ impl ErrorCodeExt for Error { Error::TtlLenNotEqualsToPairs => error_code::sst_importer::TTL_LEN_NOT_EQUALS_TO_PAIRS, Error::IncompatibleApiVersion => error_code::sst_importer::INCOMPATIBLE_API_VERSION, Error::InvalidKeyMode { .. } => error_code::sst_importer::INVALID_KEY_MODE, - Error::ResourceNotEnough(_) => error_code::sst_importer::RESOURCE_NOT_ENOUTH, + Error::ResourceNotEnough(_) => error_code::sst_importer::RESOURCE_NOT_ENOUGH, Error::Suspended { .. } => error_code::sst_importer::SUSPENDED, Error::RequestTooNew(_) => error_code::sst_importer::REQUEST_TOO_NEW, Error::RequestTooOld(_) => error_code::sst_importer::REQUEST_TOO_OLD, + Error::LeaseExpired => error_code::sst_importer::LEASE_EXPIRED, + Error::InvalidLease => error_code::sst_importer::INVALID_LEASE, } } } diff --git a/components/sst_importer/src/lib.rs b/components/sst_importer/src/lib.rs index ff137005b09..b38c2519d2c 100644 --- a/components/sst_importer/src/lib.rs +++ b/components/sst_importer/src/lib.rs @@ -2,6 +2,8 @@ //! Importing RocksDB SST files into TiKV #![feature(min_specialization)] +#![feature(let_chains)] +#![feature(assert_matches)] #[macro_use] extern crate lazy_static; @@ -15,6 +17,7 @@ extern crate tikv_alloc; mod config; mod errors; mod import_file; +mod mediate; mod sst_writer; mod util; #[macro_use] @@ -29,6 +32,7 @@ pub use self::{ errors::{error_inc, Error, Result}, import_file::{sst_meta_to_path, API_VERSION_2}, import_mode2::range_overlaps, + mediate::{IngestMediator, IngestObserver, Mediator, Observer}, sst_importer::SstImporter, sst_writer::{RawSstWriter, TxnSstWriter}, util::{copy_sst_for_ingestion, prepare_sst_for_ingestion}, diff --git a/components/sst_importer/src/mediate/mod.rs b/components/sst_importer/src/mediate/mod.rs new file mode 100644 index 00000000000..c5a49a25ba1 --- /dev/null +++ b/components/sst_importer/src/mediate/mod.rs @@ -0,0 +1,229 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +//! This module contains the implementation of the mediator pattern for the +//! sst_importer component in TiKV. It provides the `Mediator` trait and the +//! `IngestMediator` struct, which act as a central hub for communication +//! between different observers. Observers can register with the mediator and +//! receive events through the `Observer` trait. + +use std::{ + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, Weak, + }, + time::{Duration, Instant}, +}; + +use futures::compat::Future01CompatExt; +use uuid::Uuid; + +mod observer; +pub use observer::IngestObserver; + +/// The event type that the mediator can send to observers. +#[derive(Clone)] +pub enum Event { + Acquire { + region_id: u64, + uuid: Uuid, + deadline: Instant, + }, + Release { + region_id: u64, + uuid: Uuid, + }, +} + +#[derive(Debug)] +struct LeaseState { + deadline: Instant, + ref_count: Arc, +} + +impl LeaseState { + fn ref_(&self) -> LeaseRef { + self.ref_count.fetch_add(1, Ordering::SeqCst); + LeaseRef { + lease: LeaseState { + deadline: self.deadline, + ref_count: self.ref_count.clone(), + }, + } + } + + fn has_ref(&self) -> bool { + self.ref_count.load(Ordering::SeqCst) != 0 + } + + fn expire(&mut self) { + self.deadline = Instant::now() - Duration::from_secs(1); + } + + fn is_expired(&self) -> bool { + Instant::now() > self.deadline + } +} + +/// A reference to a lease. It should be hold when a long time import RPC is in +/// progress. +#[derive(Debug)] +pub struct LeaseRef { + lease: LeaseState, +} + +impl LeaseRef { + /// Checks if the lease is expired. + pub fn is_expired(&self) -> bool { + self.lease.is_expired() + } +} + +impl Drop for LeaseRef { + fn drop(&mut self) { + self.lease.ref_count.fetch_sub(1, Ordering::SeqCst); + } +} + +pub trait Observer: Sync + Send { + fn update(&self, event: Event); + /// Returns a lease specified by region_id and uuid if the lease is valid. + fn get_lease(&self, region_id: u64, uuid: &Uuid) -> Option; + /// Returns a valid lease uuid for a region. + /// Called by Resolver before advancing resolved_ts. + fn get_region_lease(&self, region_id: u64) -> Option; + /// Garbage collection and it should never block. + fn gc(&self); +} + +pub trait Mediator: Sync + Send { + fn acquire(&self, region_id: u64, uuid: Uuid, deadline: Instant); + fn release(&self, region_id: u64, uuid: Uuid); + fn register(&mut self, comp: Arc); + fn gc(&self); +} + +#[derive(Default)] +pub struct IngestMediator { + comps: Vec>, +} + +impl Mediator for IngestMediator { + fn acquire(&self, region_id: u64, uuid: Uuid, deadline: Instant) { + let event = Event::Acquire { + region_id, + uuid, + deadline, + }; + for comp in &self.comps { + comp.update(event.clone()) + } + } + + fn release(&self, region_id: u64, uuid: Uuid) { + let event = Event::Release { region_id, uuid }; + for comp in &self.comps { + comp.update(event.clone()) + } + } + + fn register(&mut self, comp: Arc) { + self.comps.push(comp) + } + + fn gc(&self) { + for comp in &self.comps { + comp.gc(); + } + } +} + +/// Periodically triggers garbage collection on the mediator. +pub async fn periodic_gc_mediator(mediator: Weak, duration: Duration) { + loop { + let Some(m) = mediator.upgrade() else { + return; + }; + m.gc(); + let _ = tikv_util::timer::GLOBAL_TIMER_HANDLE + .delay(Instant::now() + duration) + .compat() + .await; + } +} + +#[cfg(test)] +mod tests { + use std::{thread, time::Duration}; + + use futures::executor::block_on; + + use super::*; + + #[test] + fn test_lease_ref_is_expired() { + let ttl = Duration::from_millis(200); + let lease = LeaseState { + deadline: Instant::now() + ttl, + ref_count: Arc::default(), + }; + let ref_ = lease.ref_(); + assert!(!ref_.is_expired()); + std::thread::sleep(2 * ttl); + assert!(ref_.is_expired()); + } + + #[test] + fn test_lease_ref_count() { + let lease = LeaseState { + deadline: Instant::now(), + ref_count: Arc::default(), + }; + assert!(!lease.has_ref()); + + let ref1 = lease.ref_(); + assert!(lease.has_ref()); + let ref2 = lease.ref_(); + assert_eq!(lease.ref_count.load(Ordering::SeqCst), 2); + + drop(ref1); + assert_eq!(lease.ref_count.load(Ordering::SeqCst), 1); + drop(ref2); + assert_eq!(lease.ref_count.load(Ordering::SeqCst), 0); + assert!(!lease.has_ref()); + } + + #[test] + fn test_gc() { + struct Mock { + gc_count: AtomicU64, + } + impl Observer for Mock { + fn update(&self, _: Event) {} + fn get_lease(&self, _: u64, _: &Uuid) -> Option { + None + } + fn get_region_lease(&self, _: u64) -> Option { + None + } + fn gc(&self) { + self.gc_count.fetch_add(1, Ordering::SeqCst); + } + } + + let mock = Arc::new(Mock { + gc_count: AtomicU64::new(0), + }); + let mut mediator = IngestMediator::default(); + mediator.register(mock.clone()); + let mediator = Arc::new(mediator); + let mediator_weak = Arc::downgrade(&mediator); + thread::spawn(move || { + block_on(periodic_gc_mediator( + mediator_weak, + Duration::from_millis(100), + )) + }); + thread::sleep(Duration::from_millis(500)); + assert!(mock.gc_count.load(Ordering::SeqCst) > 2); + } +} diff --git a/components/sst_importer/src/mediate/observer.rs b/components/sst_importer/src/mediate/observer.rs new file mode 100644 index 00000000000..e3a0c5bf40e --- /dev/null +++ b/components/sst_importer/src/mediate/observer.rs @@ -0,0 +1,338 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + sync::{Arc, RwLock}, + time::Instant, +}; + +use collections::{HashMap, HashMapEntry}; +use uuid::Uuid; + +use super::*; + +const MIN_SHRINK_CAP: usize = 1024; + +// region_id -> (uuid -> deadline) +#[derive(Default)] +struct SstLeases(HashMap>); + +impl SstLeases { + fn upsert_lease(&mut self, region_id: u64, uuid: Uuid, deadline: Instant) { + let ssts = &mut self.0; + let region_leases = ssts.entry(region_id).or_default(); + match region_leases.entry(uuid) { + HashMapEntry::Vacant(e) => { + e.insert(LeaseState { + deadline, + ref_count: Arc::default(), + }); + } + HashMapEntry::Occupied(mut e) => { + // Update deadline and keep ref_count as it is. + e.get_mut().deadline = deadline; + } + }; + warn!("dbg upsert"; "region_id" => region_id, "leases" => format!("{:?}", ssts.get(®ion_id))); + } + fn expire_lease(&mut self, region_id: u64, uuid: &Uuid) { + let ssts = &mut self.0; + if let HashMapEntry::Occupied(mut leases) = ssts.entry(region_id) { + if let Some(lease) = leases.get_mut().get_mut(uuid) { + if lease.has_ref() { + // Do not remove a lease that has refs. + lease.expire(); + } else { + leases.get_mut().remove(uuid); + } + } + if leases.get().is_empty() { + leases.remove(); + } + } else { + warn!("sst lease not found"; "region_id" => region_id, "uuid" => ?uuid); + }; + warn!("dbg expire"; "region_id" => region_id, "leases" => format!("{:?}", ssts.get(®ion_id))); + } + + fn gc(&mut self) { + let ssts = &mut self.0; + let mut empty_regions = vec![]; + for (region_id, leases) in ssts.iter_mut() { + leases.retain(|_, lease| { + // Remove a lease that has expired and no ref. + lease.has_ref() || !lease.is_expired() + }); + if leases.is_empty() { + empty_regions.push(*region_id); + } + } + // Remove regions that have no lease. + for region_id in empty_regions { + ssts.remove(®ion_id); + } + if ssts.capacity() > MIN_SHRINK_CAP && ssts.capacity() > ssts.len() * 2 { + ssts.shrink_to(MIN_SHRINK_CAP); + } + } +} + +#[derive(Default)] +pub struct IngestObserver { + sst_leases: RwLock, +} + +impl Observer for IngestObserver { + fn update(&self, event: Event) { + match event { + Event::Acquire { + region_id, + uuid, + deadline, + } => { + self.upsert_lease(region_id, uuid, deadline); + } + Event::Release { region_id, uuid } => { + self.expire_lease(region_id, &uuid); + } + } + } + + fn get_lease(&self, region_id: u64, uuid: &Uuid) -> Option { + let ssts = self.sst_leases.read().unwrap(); + let Some(leases) = ssts.0.get(®ion_id) else { + return None; + }; + warn!("dbg query"; "region_id" => region_id, "leases" => format!("{:?}", ssts.0.get(®ion_id))); + leases.get(uuid).map(|lease| lease.ref_()) + } + + fn get_region_lease(&self, region_id: u64) -> Option { + let ssts = self.sst_leases.read().unwrap(); + let leases = ssts.0.get(®ion_id)?; + for (uuid, lease) in leases { + if !lease.is_expired() || lease.has_ref() { + return Some(*uuid); + } + } + None + } + + fn gc(&self) { + let Ok(mut leases) = self.sst_leases.try_write() else { + return; + }; + leases.gc() + } +} + +impl IngestObserver { + fn upsert_lease(&self, region_id: u64, uuid: Uuid, deadline: Instant) { + let mut ssts = self.sst_leases.write().unwrap(); + ssts.upsert_lease(region_id, uuid, deadline) + } + fn expire_lease(&self, region_id: u64, uuid: &Uuid) { + let mut ssts = self.sst_leases.write().unwrap(); + ssts.expire_lease(region_id, uuid) + } +} + +#[cfg(test)] +mod tests { + use std::{assert_matches::assert_matches, sync::mpsc::channel, thread, time::Duration}; + + use super::*; + + #[test] + fn test_observer_get_region_lease() { + let observer = IngestObserver::default(); + let region_id = 1; + assert!(observer.get_region_lease(region_id).is_none()); + + let uuid = Uuid::new_v4(); + let deadline = Instant::now() + Duration::from_secs(60); + observer.update(Event::Acquire { + region_id, + uuid, + deadline, + }); + assert_eq!(observer.get_region_lease(region_id).unwrap(), uuid); + + observer.update(Event::Release { region_id, uuid }); + assert!(observer.get_region_lease(region_id).is_none()); + + observer.update(Event::Acquire { + region_id, + uuid, + deadline: Instant::now(), + }); + thread::sleep(Duration::from_millis(200)); + assert!(observer.get_region_lease(region_id).is_none()); + } + + #[test] + fn test_observer_upsert_lease() { + let observer = IngestObserver::default(); + let region_id = 1; + let uuid1 = Uuid::new_v4(); + let deadline1 = Instant::now(); + let uuid2 = Uuid::new_v4(); + let deadline2 = Instant::now(); + observer.update(Event::Acquire { + region_id, + uuid: uuid1, + deadline: deadline1, + }); + observer.update(Event::Acquire { + region_id, + uuid: uuid2, + deadline: deadline2, + }); + + let assert_ref_count = |uuid, count: u64| { + assert_eq!( + observer.sst_leases.read().unwrap().0[®ion_id][&uuid] + .ref_count + .load(Ordering::SeqCst), + count, + ); + }; + + let ref1 = observer.get_lease(region_id, &uuid1).unwrap(); + assert_eq!(ref1.lease.deadline, deadline1); + assert_ref_count(uuid1, 1); + let ref2 = observer.get_lease(region_id, &uuid2).unwrap(); + assert_eq!(ref2.lease.deadline, deadline2); + + // Make sure upsert does not overwrite ref_count. + let deadline3 = Instant::now(); + observer.update(Event::Acquire { + region_id, + uuid: uuid1, + deadline: deadline3, + }); + let ref3 = observer.get_lease(region_id, &uuid1).unwrap(); + assert_eq!(ref3.lease.deadline, deadline3); + assert_ref_count(uuid1, 2); + } + + #[test] + fn test_observer_expire_lease() { + let observer = IngestObserver::default(); + let region_id = 1; + let uuid1 = Uuid::new_v4(); + let deadline1 = Instant::now(); + let uuid2 = Uuid::new_v4(); + let deadline2 = Instant::now(); + observer.update(Event::Acquire { + region_id, + uuid: uuid1, + deadline: deadline1, + }); + observer.update(Event::Acquire { + region_id, + uuid: uuid2, + deadline: deadline2, + }); + + // Hold a ref to uuid1. + let ref1 = observer.get_lease(region_id, &uuid1).unwrap(); + assert_eq!(ref1.lease.deadline, deadline1); + + observer.update(Event::Release { + region_id, + uuid: uuid1, + }); + observer.update(Event::Release { + region_id, + uuid: uuid2, + }); + + // Make sure expire does not remove a lease that has refs. + let ref11 = observer.get_lease(region_id, &uuid1).unwrap(); + // Make sure the unremoved lease is indeed expired. + assert!(ref11.is_expired()); + + assert_matches!(observer.get_lease(region_id, &uuid2), None); + } + + #[test] + fn test_observer_gc() { + let observer = IngestObserver::default(); + + let region_id1 = 1; + let uuid11 = Uuid::new_v4(); + let deadline11 = Instant::now() + Duration::from_secs(60); + let uuid12 = Uuid::new_v4(); + let deadline12 = Instant::now() + Duration::from_secs(60); + observer.update(Event::Acquire { + region_id: region_id1, + uuid: uuid11, + deadline: deadline11, + }); + observer.update(Event::Acquire { + region_id: region_id1, + uuid: uuid12, + deadline: deadline12, + }); + + let region_id2 = 2; + let uuid2 = Uuid::new_v4(); + let deadline2 = Instant::now() + Duration::from_secs(60); + observer.update(Event::Acquire { + region_id: region_id2, + uuid: uuid2, + deadline: deadline2, + }); + + // Gc does not remove valid leases. + observer.gc(); + observer.get_lease(region_id1, &uuid11).unwrap(); + observer.get_lease(region_id1, &uuid12).unwrap(); + observer.get_lease(region_id2, &uuid2).unwrap(); + + // Gc does not remove leases that have refs. + let ref2 = observer.get_lease(region_id2, &uuid2).unwrap(); + observer.update(Event::Release { + region_id: region_id2, + uuid: uuid2, + }); + observer.gc(); + observer.get_lease(region_id2, &uuid2).unwrap(); + + // Gc does remove regions that has no valid lease. + drop(ref2); + observer.gc(); + assert!(observer.get_lease(region_id2, &uuid2).is_none()); + + // Gc can handle concurrent leases. + observer.update(Event::Release { + region_id: region_id1, + uuid: uuid12, + }); + observer.gc(); + observer.get_lease(region_id1, &uuid11).unwrap(); + assert!(observer.get_lease(region_id1, &uuid12).is_none()); + + // Gc reclaims memory. + observer + .sst_leases + .write() + .unwrap() + .0 + .reserve(MIN_SHRINK_CAP * 2); + observer.gc(); + let cap = observer.sst_leases.write().unwrap().0.capacity(); + assert!(cap < MIN_SHRINK_CAP * 2); + + // Gc never block. + let observer = Arc::new(observer); + let _guard = observer.sst_leases.write().unwrap(); + let observer_ = observer.clone(); + let (tx, rx) = channel(); + thread::spawn(move || { + observer_.gc(); + tx.send(()).unwrap(); + }); + rx.recv_timeout(Duration::from_secs(5)).unwrap(); + } +} diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index e74a1f6978c..30b8acef76f 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -11,7 +11,7 @@ use std::{ atomic::{AtomicU64, Ordering}, Arc, }, - time::{Duration, SystemTime}, + time::{Duration, Instant as StdInstant, SystemTime}, }; use collections::HashSet; @@ -47,15 +47,17 @@ use tokio::{ sync::OnceCell, }; use txn_types::{Key, TimeStamp, WriteRef}; +use uuid::Uuid; use crate::{ caching::cache_map::{CacheMap, ShareOwned}, import_file::{ImportDir, ImportFile}, import_mode::{ImportModeSwitcher, RocksDbMetricsFn}, import_mode2::{HashRange, ImportModeSwitcherV2}, + mediate::{periodic_gc_mediator, LeaseRef}, metrics::*, sst_writer::{RawSstWriter, TxnSstWriter}, - util, Config, ConfigManager as ImportConfigManager, Error, Result, + util, Config, ConfigManager as ImportConfigManager, Error, Mediator, Observer, Result, }; pub struct LoadedFile { @@ -162,10 +164,12 @@ pub struct SstImporter { cached_storage: CacheMap, // We need to keep reference to the runtime so background tasks won't be dropped. - _download_rt: Runtime, + _auxiliary_rt: Runtime, file_locks: Arc>, mem_use: Arc, mem_limit: Arc, + ingest_mediator: Arc, + ingest_observer: Arc, } impl SstImporter { @@ -175,6 +179,8 @@ impl SstImporter { key_manager: Option>, api_version: ApiVersion, raft_kv_v2: bool, + ingest_mediator: Arc, + ingest_observer: Arc, ) -> Result { let switcher = if raft_kv_v2 { Either::Right(ImportModeSwitcherV2::new(cfg)) @@ -183,10 +189,10 @@ impl SstImporter { }; let cached_storage = CacheMap::default(); // We are going to run some background tasks here, (hyper needs to maintain the - // connection, the cache map needs gc intervally.) so we must create a + // connection, the cache map needs gc periodically.) so we must create a // multi-thread runtime, given there isn't blocking, a single thread runtime is // enough. - let download_rt = tokio::runtime::Builder::new_multi_thread() + let auxiliary_rt = tokio::runtime::Builder::new_multi_thread() .worker_threads(1) .thread_name("sst_import_misc") .with_sys_and_custom_hooks( @@ -197,7 +203,12 @@ impl SstImporter { ) .enable_all() .build()?; - download_rt.spawn(cached_storage.gc_loop()); + auxiliary_rt.spawn(cached_storage.gc_loop()); + let mediator = Arc::downgrade(&ingest_mediator); + let gc_mediator_duration = Duration::from_secs(60); + auxiliary_rt.spawn(async move { + periodic_gc_mediator(mediator, gc_mediator_duration).await; + }); let memory_limit = Self::calcualte_usage_mem(cfg.memory_use_ratio); info!( @@ -216,12 +227,57 @@ impl SstImporter { compression_types: HashMap::with_capacity(2), file_locks: Arc::new(DashMap::default()), cached_storage, - _download_rt: download_rt, + _auxiliary_rt: auxiliary_rt, mem_use: Arc::new(AtomicU64::new(0)), mem_limit: Arc::new(AtomicU64::new(memory_limit)), + ingest_mediator, + ingest_observer, }) } + pub fn acquire_lease( + &self, + region_id: u64, + uuid_bytes: &[u8], + deadline: StdInstant, + ) -> Result<()> { + let uuid = parse_uuid_from_slice(uuid_bytes)?; + self.ingest_mediator.acquire(region_id, uuid, deadline); + Ok(()) + } + + pub fn release_lease(&self, region_id: u64, uuid_bytes: &[u8]) -> Result<()> { + let uuid = parse_uuid_from_slice(uuid_bytes)?; + self.ingest_mediator.release(region_id, uuid); + Ok(()) + } + + // TODO: To avid race condition, maybe we should check the lease and hold it + // and let caller to decided when to unhold. + // hold means make a lease valid even if it exceeds its deadline. + pub fn check_lease( + &self, + region_id: u64, + uuid_bytes: &[u8], + rpc: &'static str, + ) -> Result { + let uuid = parse_uuid_from_slice(uuid_bytes)?; + let lease_ref = self.ingest_observer.get_lease(region_id, &uuid); + info!("dbg check lease"; "region_id" => region_id, "now" => ?StdInstant::now(), "deadline" => ?lease_ref); + if let Some(lease) = lease_ref + && !lease.is_expired() + { + Ok(lease) + } else { + info!("sst lease expired"; + "region_id" => region_id, + "lease_uuid" => ?uuid, + "rpc" => rpc, + ); + Err(Error::LeaseExpired) + } + } + pub fn ranges_enter_import_mode(&self, ranges: Vec) { if let Either::Right(ref switcher) = self.switcher { switcher.ranges_enter_import_mode(ranges) @@ -471,7 +527,7 @@ impl SstImporter { speed_limiter: &Limiter, restore_config: external_storage::RestoreConfig, ) -> Result<()> { - self._download_rt + self._auxiliary_rt .block_on(self.async_download_file_from_external_storage( file_length, src_file_name, @@ -856,7 +912,7 @@ impl SstImporter { } } - pub async fn do_download_kv_file( + async fn do_download_kv_file( &self, meta: &KvMeta, backend: &StorageBackend, @@ -1090,7 +1146,7 @@ impl SstImporter { speed_limiter: Limiter, engine: E, ) -> Result> { - self._download_rt.block_on(self.download_ext( + self._auxiliary_rt.block_on(self.download_ext( meta, backend, name, @@ -1469,12 +1525,26 @@ fn is_after_end_bound>(value: &[u8], bound: &Bound) -> bool { } } +fn parse_uuid_from_slice(uuid_bytes: &[u8]) -> Result { + match Uuid::from_slice(uuid_bytes) { + Ok(uuid) => Ok(uuid), + Err(e) => { + warn!("fail to parse uuid from bytes"; + "error" => ?e, + "uuid_bytes" => ?log_wrappers::hex_encode_upper(uuid_bytes), + ); + Err(Error::InvalidLease) + } + } +} + #[cfg(test)] mod tests { use std::{ + assert_matches::assert_matches, io::{self, BufWriter, Write}, ops::Sub, - usize, + thread, usize, }; use engine_rocks::get_env; @@ -1487,7 +1557,7 @@ mod tests { use kvproto::encryptionpb::EncryptionMethod; use online_config::{ConfigManager, OnlineConfig}; use openssl::hash::{Hasher, MessageDigest}; - use tempfile::Builder; + use tempfile::{Builder, TempDir}; use test_sst_importer::*; use test_util::new_test_key_manager; use tikv_util::{codec::stream_event::EventEncoder, stream::block_on_external_io}; @@ -1992,6 +2062,27 @@ mod tests { assert_eq!(err.kind(), io::ErrorKind::TimedOut); } + fn new_importer( + cfg: &Config, + key_manager: Option>, + ) -> (SstImporter, TempDir) { + let mut ingest_mediator = IngestMediator::default(); + let ingest_observer = Arc::new(IngestObserver::default()); + ingest_mediator.register(ingest_observer.clone()); + let import_dir = tempfile::tempdir().unwrap(); + let importer = SstImporter::::new( + cfg, + import_dir.path(), + key_manager, + ApiVersion::V1, + false, + Arc::new(ingest_mediator), + ingest_observer, + ) + .unwrap(); + (importer, import_dir) + } + #[test] fn test_update_config_memory_use_ratio() { // create SstImpoter with default. @@ -1999,9 +2090,7 @@ mod tests { memory_use_ratio: 0.3, ..Default::default() }; - let import_dir = tempfile::tempdir().unwrap(); - let importer = - SstImporter::::new(&cfg, import_dir, None, ApiVersion::V1, false).unwrap(); + let (importer, _import_dir) = new_importer::(&cfg, None); let mem_limit_old = importer.mem_limit.load(Ordering::SeqCst); // create new config and get the diff config. @@ -2046,16 +2135,9 @@ mod tests { let (_temp_dir, backend, kv_meta, buff) = create_sample_external_kv_file().unwrap(); // create importer object. - let import_dir = tempfile::tempdir().unwrap(); let (_, key_manager) = new_key_manager_for_test(); - let importer = SstImporter::::new( - &Config::default(), - import_dir, - Some(key_manager), - ApiVersion::V1, - false, - ) - .unwrap(); + let (importer, _import_dir) = + new_importer::(&Config::default(), Some(key_manager)); let ext_storage = { importer.wrap_kms( importer.external_storage_or_cache(&backend, "").unwrap(), @@ -2103,16 +2185,9 @@ mod tests { let (_temp_dir, backend, kv_meta, buff) = create_sample_external_kv_file().unwrap(); // create importer object. - let import_dir = tempfile::tempdir().unwrap(); let (_, key_manager) = new_key_manager_for_test(); - let importer = SstImporter::::new( - &Config::default(), - import_dir, - Some(key_manager), - ApiVersion::V1, - false, - ) - .unwrap(); + let (importer, _import_dir) = + new_importer::(&Config::default(), Some(key_manager)); let ext_storage = { let inner = importer.wrap_kms( importer.external_storage_or_cache(&backend, "").unwrap(), @@ -2167,20 +2242,12 @@ mod tests { let (_temp_dir, backend, kv_meta, buff) = create_sample_external_kv_file().unwrap(); // create importer object. - let import_dir = tempfile::tempdir().unwrap(); let (_, key_manager) = new_key_manager_for_test(); let cfg = Config { memory_use_ratio: 0.0, ..Default::default() }; - let importer = SstImporter::::new( - &cfg, - import_dir, - Some(key_manager), - ApiVersion::V1, - false, - ) - .unwrap(); + let (importer, _import_dir) = new_importer::(&cfg, Some(key_manager)); let ext_storage = { importer.wrap_kms( importer.external_storage_or_cache(&backend, "").unwrap(), @@ -2225,16 +2292,9 @@ mod tests { let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file().unwrap(); // create importer object. - let import_dir = tempfile::tempdir().unwrap(); let (_, key_manager) = new_key_manager_for_test(); - let importer = SstImporter::::new( - &Config::default(), - import_dir, - Some(key_manager.clone()), - ApiVersion::V1, - false, - ) - .unwrap(); + let (importer, _import_dir) = + new_importer::(&Config::default(), Some(key_manager.clone())); // perform download file into .temp dir. let file_name = "sample.sst"; @@ -2262,16 +2322,8 @@ mod tests { fn test_download_file_from_external_storage_for_kv() { let (_temp_dir, backend, kv_meta, _) = create_sample_external_kv_file().unwrap(); let (_, key_manager) = new_key_manager_for_test(); - - let import_dir = tempfile::tempdir().unwrap(); - let importer = SstImporter::::new( - &Config::default(), - import_dir, - Some(key_manager), - ApiVersion::V1, - false, - ) - .unwrap(); + let (importer, _import_dir) = + new_importer::(&Config::default(), Some(key_manager)); let path = importer.dir.get_import_path(kv_meta.get_name()).unwrap(); let restore_config = external_storage::RestoreConfig { @@ -2302,11 +2354,7 @@ mod tests { let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file().unwrap(); // performs the download. - let importer_dir = tempfile::tempdir().unwrap(); - let cfg = Config::default(); - let importer = - SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) - .unwrap(); + let (importer, _import_dir) = new_importer::(&Config::default(), None); let db = create_sst_test_engine().unwrap(); let range = importer @@ -2353,17 +2401,9 @@ mod tests { let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file().unwrap(); // performs the download. - let importer_dir = tempfile::tempdir().unwrap(); - let cfg = Config::default(); let (temp_dir, key_manager) = new_key_manager_for_test(); - let importer = SstImporter::::new( - &cfg, - &importer_dir, - Some(key_manager.clone()), - ApiVersion::V1, - false, - ) - .unwrap(); + let (importer, _import_dir) = + new_importer::(&Config::default(), Some(key_manager.clone())); let db_path = temp_dir.path().join("db"); let env = get_env(Some(key_manager), None /* io_rate_limiter */).unwrap(); @@ -2413,11 +2453,7 @@ mod tests { let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file().unwrap(); // performs the download. - let importer_dir = tempfile::tempdir().unwrap(); - let cfg = Config::default(); - let importer = - SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) - .unwrap(); + let (importer, _import_dir) = new_importer::(&Config::default(), None); let db = create_sst_test_engine().unwrap(); let range = importer @@ -2460,11 +2496,7 @@ mod tests { #[test] fn test_download_sst_with_key_rewrite_ts_default() { // performs the download. - let importer_dir = tempfile::tempdir().unwrap(); - let cfg = Config::default(); - let importer = - SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) - .unwrap(); + let (importer, _import_dir) = new_importer::(&Config::default(), None); // creates a sample SST file. let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file_txn_default().unwrap(); @@ -2506,11 +2538,7 @@ mod tests { #[test] fn test_download_sst_with_key_rewrite_ts_write() { // performs the download. - let importer_dir = tempfile::tempdir().unwrap(); - let cfg = Config::default(); - let importer = - SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) - .unwrap(); + let (importer, _import_dir) = new_importer::(&Config::default(), None); // creates a sample SST file. let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file_txn_write().unwrap(); @@ -2574,11 +2602,7 @@ mod tests { meta.set_cf_name((*cf).to_string()); // performs the download. - let importer_dir = tempfile::tempdir().unwrap(); - let cfg = Config::default(); - let importer = - SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) - .unwrap(); + let (importer, _import_dir) = new_importer::(&Config::default(), None); let db = create_sst_test_engine().unwrap(); let range = importer @@ -2648,11 +2672,7 @@ mod tests { #[test] fn test_download_sst_partial_range() { let (_ext_sst_dir, backend, mut meta) = create_sample_external_sst_file().unwrap(); - let importer_dir = tempfile::tempdir().unwrap(); - let cfg = Config::default(); - let importer = - SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) - .unwrap(); + let (importer, _import_dir) = new_importer::(&Config::default(), None); let db = create_sst_test_engine().unwrap(); // note: the range doesn't contain the DATA_PREFIX 'z'. meta.mut_range().set_start(b"t123_r02".to_vec()); @@ -2696,11 +2716,7 @@ mod tests { #[test] fn test_download_sst_partial_range_with_key_rewrite() { let (_ext_sst_dir, backend, mut meta) = create_sample_external_sst_file().unwrap(); - let importer_dir = tempfile::tempdir().unwrap(); - let cfg = Config::default(); - let importer = - SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) - .unwrap(); + let (importer, _import_dir) = new_importer::(&Config::default(), None); let db = create_sst_test_engine().unwrap(); meta.mut_range().set_start(b"t5_r02".to_vec()); meta.mut_range().set_end(b"t5_r12".to_vec()); @@ -2745,11 +2761,7 @@ mod tests { file_system::write(ext_sst_dir.path().join("sample.sst"), b"not an SST file").unwrap(); let mut meta = SstMeta::default(); meta.set_uuid(vec![0u8; 16]); - let importer_dir = tempfile::tempdir().unwrap(); - let cfg = Config::default(); - let importer = - SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) - .unwrap(); + let (importer, _import_dir) = new_importer::(&Config::default(), None); let db = create_sst_test_engine().unwrap(); let backend = external_storage::make_local_backend(ext_sst_dir.path()); @@ -2772,11 +2784,7 @@ mod tests { #[test] fn test_download_sst_empty() { let (_ext_sst_dir, backend, mut meta) = create_sample_external_sst_file().unwrap(); - let importer_dir = tempfile::tempdir().unwrap(); - let cfg = Config::default(); - let importer = - SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) - .unwrap(); + let (importer, _import_dir) = new_importer::(&Config::default(), None); let db = create_sst_test_engine().unwrap(); meta.mut_range().set_start(vec![b'x']); meta.mut_range().set_end(vec![b'y']); @@ -2800,11 +2808,7 @@ mod tests { #[test] fn test_download_sst_wrong_key_prefix() { let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file().unwrap(); - let importer_dir = tempfile::tempdir().unwrap(); - let cfg = Config::default(); - let importer = - SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) - .unwrap(); + let (importer, _import_dir) = new_importer::(&Config::default(), None); let db = create_sst_test_engine().unwrap(); let result = importer.download( @@ -2839,10 +2843,20 @@ mod tests { create_sample_external_rawkv_sst_file(b"0", b"z", false).unwrap(); // performs the download. - let importer_dir = tempfile::tempdir().unwrap(); - let cfg = Config::default(); - let importer = - SstImporter::::new(&cfg, &importer_dir, None, api_version, false).unwrap(); + let mut ingest_mediator = IngestMediator::default(); + let ingest_observer = Arc::new(IngestObserver::default()); + ingest_mediator.register(ingest_observer.clone()); + let import_dir = tempfile::tempdir().unwrap(); + let importer = SstImporter::::new( + &Config::default(), + import_dir, + None, + api_version, + false, + Arc::new(ingest_mediator), + ingest_observer, + ) + .unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer @@ -2899,10 +2913,20 @@ mod tests { create_sample_external_rawkv_sst_file(b"b", b"c\x00", false).unwrap(); // performs the download. - let importer_dir = tempfile::tempdir().unwrap(); - let cfg = Config::default(); - let importer = - SstImporter::::new(&cfg, &importer_dir, None, api_version, false).unwrap(); + let mut ingest_mediator = IngestMediator::default(); + let ingest_observer = Arc::new(IngestObserver::default()); + ingest_mediator.register(ingest_observer.clone()); + let import_dir = tempfile::tempdir().unwrap(); + let importer = SstImporter::::new( + &Config::default(), + import_dir, + None, + api_version, + false, + Arc::new(ingest_mediator), + ingest_observer, + ) + .unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer @@ -2955,10 +2979,20 @@ mod tests { create_sample_external_rawkv_sst_file(b"b", b"c\x00", true).unwrap(); // performs the download. - let importer_dir = tempfile::tempdir().unwrap(); - let cfg = Config::default(); - let importer = - SstImporter::::new(&cfg, &importer_dir, None, api_version, false).unwrap(); + let mut ingest_mediator = IngestMediator::default(); + let ingest_observer = Arc::new(IngestObserver::default()); + ingest_mediator.register(ingest_observer.clone()); + let import_dir = tempfile::tempdir().unwrap(); + let importer = SstImporter::::new( + &Config::default(), + import_dir, + None, + api_version, + false, + Arc::new(ingest_mediator), + ingest_observer, + ) + .unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer @@ -3003,11 +3037,7 @@ mod tests { let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file().unwrap(); // performs the download. - let importer_dir = tempfile::tempdir().unwrap(); - let cfg = Config::default(); - let mut importer = - SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) - .unwrap(); + let (mut importer, _import_dir) = new_importer::(&Config::default(), None); importer.set_compression_type(CF_DEFAULT, Some(SstCompressionType::Snappy)); let db = create_sst_test_engine().unwrap(); @@ -3037,13 +3067,9 @@ mod tests { let mut meta = SstMeta::default(); meta.set_uuid(Uuid::new_v4().as_bytes().to_vec()); - let importer_dir = tempfile::tempdir().unwrap(); - let cfg = Config::default(); - let mut importer = - SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) - .unwrap(); + let (mut importer, import_dir) = new_importer::(&Config::default(), None); importer.set_compression_type(CF_DEFAULT, Some(SstCompressionType::Zstd)); - let db_path = importer_dir.path().join("db"); + let db_path = import_dir.path().join("db"); let db = new_test_engine(db_path.to_str().unwrap(), DATA_CFS); let mut w = importer.new_txn_writer(&db, meta).unwrap(); @@ -3088,44 +3114,21 @@ mod tests { #[test] fn test_import_support_download() { - let import_dir = tempfile::tempdir().unwrap(); - let importer = SstImporter::::new( - &Config::default(), - import_dir, - None, - ApiVersion::V1, - false, - ) - .unwrap(); + let (importer, _import_dir) = new_importer::(&Config::default(), None); assert_eq!(importer.import_support_download(), false); - let import_dir = tempfile::tempdir().unwrap(); - let importer = SstImporter::::new( - &Config { - memory_use_ratio: 0.0, - ..Default::default() - }, - import_dir, - None, - ApiVersion::V1, - false, - ) - .unwrap(); + let cfg = Config { + memory_use_ratio: 0.0, + ..Default::default() + }; + let (importer, _import_dir) = new_importer::(&cfg, None); assert_eq!(importer.import_support_download(), true); } #[test] fn test_inc_mem_and_check() { // create importer object. - let import_dir = tempfile::tempdir().unwrap(); - let importer = SstImporter::::new( - &Config::default(), - import_dir, - None, - ApiVersion::V1, - false, - ) - .unwrap(); + let (importer, _import_dir) = new_importer::(&Config::default(), None); assert_eq!(importer.mem_use.load(Ordering::SeqCst), 0); // test inc_mem_and_check() and dec_mem() successfully. @@ -3151,16 +3154,7 @@ mod tests { #[test] fn test_dashmap_lock() { - let import_dir = tempfile::tempdir().unwrap(); - let importer = SstImporter::::new( - &Config::default(), - import_dir, - None, - ApiVersion::V1, - false, - ) - .unwrap(); - + let (importer, _import_dir) = new_importer::(&Config::default(), None); let key = "file1"; let r = Arc::new(OnceCell::new()); let value = (CacheKvFile::Mem(r), Instant::now()); @@ -3180,4 +3174,69 @@ mod tests { let _buff = v.0.clone(); assert_eq!(v.0.ref_count(), 2); } + + #[test] + fn test_ingest_lease() { + let (importer, _import_dir) = new_importer::(&Config::default(), None); + let region_id = 1; + let uuid = Uuid::new_v4(); + let rpc = "test"; + + // Not found means lease expired. + assert_matches!( + importer.check_lease(region_id, uuid.as_bytes(), rpc), + Err(Error::LeaseExpired), + ); + + let expired_deadline = StdInstant::now() - Duration::from_secs(1); + importer + .acquire_lease(region_id, uuid.as_bytes(), expired_deadline) + .unwrap(); + assert_matches!( + importer.check_lease(region_id, uuid.as_bytes(), rpc), + Err(Error::LeaseExpired), + ); + + // Renew the lease. + let deadline = StdInstant::now() + Duration::from_millis(500); + importer + .acquire_lease(region_id, uuid.as_bytes(), deadline) + .unwrap(); + importer + .check_lease(region_id, uuid.as_bytes(), rpc) + .unwrap(); + + // Wait for expiring. + thread::sleep(Duration::from_millis(800)); + assert_matches!( + importer.check_lease(region_id, uuid.as_bytes(), rpc), + Err(Error::LeaseExpired), + ); + + // Renew two leases. + let deadline = StdInstant::now() + Duration::from_millis(500); + importer + .acquire_lease(region_id, uuid.as_bytes(), deadline) + .unwrap(); + importer + .check_lease(region_id, uuid.as_bytes(), rpc) + .unwrap(); + let new_uuid = Uuid::new_v4(); + importer + .acquire_lease(region_id, new_uuid.as_bytes(), deadline) + .unwrap(); + importer + .check_lease(region_id, new_uuid.as_bytes(), rpc) + .unwrap(); + + // Expire one lease does not affect the other. + importer.release_lease(region_id, uuid.as_bytes()).unwrap(); + assert_matches!( + importer.check_lease(region_id, uuid.as_bytes(), rpc), + Err(Error::LeaseExpired), + ); + importer + .check_lease(region_id, new_uuid.as_bytes(), rpc) + .unwrap(); + } } diff --git a/components/sst_importer/src/sst_writer.rs b/components/sst_importer/src/sst_writer.rs index 1c6b06902a4..4fcf8fa2378 100644 --- a/components/sst_importer/src/sst_writer.rs +++ b/components/sst_importer/src/sst_writer.rs @@ -298,7 +298,7 @@ mod tests { use uuid::Uuid; use super::*; - use crate::{Config, SstImporter}; + use crate::{Config, IngestMediator, IngestObserver, Mediator, SstImporter}; // Return the temp dir path to avoid it drop out of the scope. fn new_writer, &RocksEngine, SstMeta) -> Result>( @@ -308,10 +308,21 @@ mod tests { let mut meta = SstMeta::default(); meta.set_uuid(Uuid::new_v4().as_bytes().to_vec()); + let mut ingest_mediator = IngestMediator::default(); + let ingest_observer = Arc::new(IngestObserver::default()); + ingest_mediator.register(ingest_observer.clone()); let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = - SstImporter::::new(&cfg, &importer_dir, None, api_version, false).unwrap(); + let importer = SstImporter::::new( + &cfg, + &importer_dir, + None, + api_version, + false, + Arc::new(ingest_mediator), + ingest_observer, + ) + .unwrap(); let db_path = importer_dir.path().join("db"); let db = new_test_engine(db_path.to_str().unwrap(), DATA_CFS); (f(&importer, &db, meta).unwrap(), importer_dir) diff --git a/components/test_raftstore-v2/Cargo.toml b/components/test_raftstore-v2/Cargo.toml index 7df2462fe3d..77d85fef437 100644 --- a/components/test_raftstore-v2/Cargo.toml +++ b/components/test_raftstore-v2/Cargo.toml @@ -60,6 +60,7 @@ service = { workspace = true } slog = { workspace = true } # better to not use slog-global, but pass in the logger slog-global = { workspace = true } +sst_importer = { workspace = true } tempfile = "3.0" test_pd_client = { workspace = true } test_raftstore = { workspace = true } diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index 70b6ccb1407..9c35b13796e 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -34,6 +34,7 @@ use raftstore_v2::{ use resource_control::ResourceGroupManager; use resource_metering::CollectorRegHandle; use service::service_manager::GrpcServiceManager; +use sst_importer::{IngestMediator, IngestObserver, Mediator}; use tempfile::TempDir; use test_pd_client::TestPdClient; use test_raftstore::{Config, Filter}; @@ -295,6 +296,9 @@ impl Simulator for NodeCluster { None, ); let importer = { + let mut ingest_mediator = IngestMediator::default(); + let ingest_observer = Arc::new(IngestObserver::default()); + ingest_mediator.register(ingest_observer.clone()); let dir = Path::new(raft_engine.get_engine_path()).join("../import-sst"); Arc::new( SstImporter::new( @@ -303,6 +307,8 @@ impl Simulator for NodeCluster { key_manager.clone(), cfg.storage.api_version(), true, + Arc::new(ingest_mediator), + ingest_observer, ) .unwrap(), ) diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 7f6d036403d..04b84cd32fd 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -46,6 +46,7 @@ use resource_metering::{CollectorRegHandle, ResourceTagFactory}; use security::SecurityManager; use service::service_manager::GrpcServiceManager; use slog_global::debug; +use sst_importer::{IngestMediator, IngestObserver, Mediator}; use tempfile::TempDir; use test_pd_client::TestPdClient; use test_raftstore::{filter_send, AddressMap, Config, Filter}; @@ -457,6 +458,7 @@ impl ServerCluster { ); gc_worker.start(node_id).unwrap(); + let mut ingest_mediator = IngestMediator::default(); let rts_worker = if cfg.resolved_ts.enable { // Resolved ts worker let mut rts_worker = LazyWorker::new("resolved-ts"); @@ -465,6 +467,8 @@ impl ServerCluster { // resolved ts endpoint needs store id. store_meta.lock().unwrap().store_id = node_id; // Resolved ts endpoint + let ingest_observer = Arc::new(IngestObserver::default()); + ingest_mediator.register(ingest_observer.clone()); let rts_endpoint = resolved_ts::Endpoint::new( &cfg.resolved_ts, rts_worker.scheduler(), @@ -474,6 +478,7 @@ impl ServerCluster { concurrency_manager.clone(), self.env.clone(), self.security_mgr.clone(), + Some(ingest_observer), ); // Start the worker rts_worker.start(rts_endpoint); @@ -542,6 +547,9 @@ impl ServerCluster { // Create import service. let importer = { + let mut ingest_mediator = IngestMediator::default(); + let ingest_observer = Arc::new(IngestObserver::default()); + ingest_mediator.register(ingest_observer.clone()); let dir = Path::new(raft_engine.get_engine_path()).join("../import-sst"); Arc::new( SstImporter::new( @@ -550,6 +558,8 @@ impl ServerCluster { key_manager.clone(), cfg.storage.api_version(), true, + Arc::new(ingest_mediator), + ingest_observer, ) .unwrap(), ) diff --git a/components/test_raftstore/Cargo.toml b/components/test_raftstore/Cargo.toml index 1f5064f0544..4bb94e4106b 100644 --- a/components/test_raftstore/Cargo.toml +++ b/components/test_raftstore/Cargo.toml @@ -61,6 +61,7 @@ service = { workspace = true } slog = { workspace = true } # better to not use slog-global, but pass in the logger slog-global = { workspace = true } +sst_importer = { workspace = true } tempfile = "3.0" test_pd_client = { workspace = true } test_util = { workspace = true } diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 5a5b86150c2..96002a59e33 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -34,6 +34,7 @@ use raftstore::{ use resource_control::ResourceGroupManager; use resource_metering::CollectorRegHandle; use service::service_manager::GrpcServiceManager; +use sst_importer::{IngestMediator, IngestObserver, Mediator}; use tempfile::TempDir; use test_pd_client::TestPdClient; use tikv::{ @@ -297,9 +298,21 @@ impl Simulator for NodeCluster { ReplicaReadLockChecker::new(cm.clone()).register(&mut coprocessor_host); let importer = { + let mut ingest_mediator = IngestMediator::default(); + let ingest_observer = Arc::new(IngestObserver::default()); + ingest_mediator.register(ingest_observer.clone()); let dir = Path::new(engines.kv.path()).join("import-sst"); Arc::new( - SstImporter::new(&cfg.import, dir, None, cfg.storage.api_version(), false).unwrap(), + SstImporter::new( + &cfg.import, + dir, + None, + cfg.storage.api_version(), + false, + Arc::new(ingest_mediator), + ingest_observer, + ) + .unwrap(), ) }; diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 09eb5a11f66..e9e0ae49c28 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -46,6 +46,7 @@ use resource_control::ResourceGroupManager; use resource_metering::{CollectorRegHandle, ResourceTagFactory}; use security::SecurityManager; use service::service_manager::GrpcServiceManager; +use sst_importer::{IngestMediator, IngestObserver, Mediator}; use tempfile::TempDir; use test_pd_client::TestPdClient; use tikv::{ @@ -77,7 +78,7 @@ use tikv_util::{ quota_limiter::QuotaLimiter, sys::thread::ThreadBuildWrapper, time::ThreadReadId, - worker::{Builder as WorkerBuilder, LazyWorker}, + worker::{Builder as WorkerBuilder, LazyWorker, Scheduler}, HandyRwLock, }; use tokio::runtime::Builder as TokioBuilder; @@ -344,6 +345,7 @@ impl ServerCluster { ); gc_worker.start(node_id).unwrap(); + let mut ingest_mediator = IngestMediator::default(); let rts_worker = if cfg.resolved_ts.enable { // Resolved ts worker let mut rts_worker = LazyWorker::new("resolved-ts"); @@ -351,6 +353,9 @@ impl ServerCluster { rts_ob.register_to(&mut coprocessor_host); // resolved ts endpoint needs store id. store_meta.lock().unwrap().store_id = Some(node_id); + // Setup ingest observer. + let ingest_observer = Arc::new(IngestObserver::default()); + ingest_mediator.register(ingest_observer.clone()); // Resolved ts endpoint let rts_endpoint = resolved_ts::Endpoint::new( &cfg.resolved_ts, @@ -361,6 +366,7 @@ impl ServerCluster { concurrency_manager.clone(), self.env.clone(), self.security_mgr.clone(), + Some(ingest_observer), ); // Start the worker rts_worker.start(rts_endpoint); @@ -428,6 +434,8 @@ impl ServerCluster { // Create import service. let importer = { + let ingest_observer = Arc::new(IngestObserver::default()); + ingest_mediator.register(ingest_observer.clone()); let dir = Path::new(engines.kv.path()).join("import-sst"); Arc::new( SstImporter::new( @@ -436,6 +444,8 @@ impl ServerCluster { key_manager.clone(), cfg.storage.api_version(), false, + Arc::new(ingest_mediator), + ingest_observer, ) .unwrap(), ) @@ -663,6 +673,12 @@ impl ServerCluster { self.raft_clients.insert(node_id, client); Ok(node_id) } + + pub fn get_resolved_ts_scheduler(&self, store_id: u64) -> Option> { + let meta = self.metas.get(&store_id)?; + let w = meta.rts_worker.as_ref()?; + Some(w.scheduler()) + } } impl Simulator for ServerCluster { diff --git a/components/test_sst_importer/Cargo.toml b/components/test_sst_importer/Cargo.toml index 56d00183180..5975f38163e 100644 --- a/components/test_sst_importer/Cargo.toml +++ b/components/test_sst_importer/Cargo.toml @@ -13,6 +13,12 @@ test = false crc32fast = "1.2" engine_rocks = { workspace = true } engine_traits = { workspace = true } +external_storage ={ workspace = true } +futures = "0.3" +grpcio = { workspace = true } keys = { workspace = true } kvproto = { workspace = true } +tempfile = "3.0" +tikv_util = { workspace = true } +txn_types = { workspace = true } uuid = { version = "0.8.1", features = ["serde", "v4"] } diff --git a/components/test_sst_importer/src/lib.rs b/components/test_sst_importer/src/lib.rs index 2f8c195a6bf..721add2b58d 100644 --- a/components/test_sst_importer/src/lib.rs +++ b/components/test_sst_importer/src/lib.rs @@ -12,6 +12,9 @@ use engine_traits::{KvEngine, SstWriter, SstWriterBuilder}; use kvproto::import_sstpb::*; use uuid::Uuid; +mod util; +pub use util::*; + pub const PROP_TEST_MARKER_CF_NAME: &[u8] = b"tikv.test_marker_cf_name"; pub fn new_test_engine(path: &str, cfs: &[&str]) -> RocksEngine { diff --git a/components/test_sst_importer/src/util.rs b/components/test_sst_importer/src/util.rs new file mode 100644 index 00000000000..ba469fa14c6 --- /dev/null +++ b/components/test_sst_importer/src/util.rs @@ -0,0 +1,311 @@ +// Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + collections::HashMap, + io::{Cursor, Write}, + thread, + time::Duration, +}; + +use engine_traits::CF_DEFAULT; +use external_storage::{ExternalStorage, UnpinReader}; +use futures::{executor::block_on, io::Cursor as AsyncCursor, stream, SinkExt}; +use grpcio::{Result, WriteFlags}; +use kvproto::{ + brpb::{Local, StorageBackend}, + import_sstpb::{KvMeta, *}, + kvrpcpb::*, + tikvpb::*, +}; +use tempfile::TempDir; +use tikv_util::{codec::stream_event::EventEncoder, stream::block_on_external_io}; +use txn_types::Key; +use uuid::Uuid; + +const CLEANUP_SST_MILLIS: u64 = 10; + +pub fn new_sst_meta(crc32: u32, length: u64) -> SstMeta { + let mut m = SstMeta::default(); + m.set_uuid(Uuid::new_v4().as_bytes().to_vec()); + m.set_crc32(crc32); + m.set_length(length); + m +} + +pub fn must_acquire_sst_lease(client: &ImportSstClient, meta: &SstMeta, ttl: Duration) { + let resp = send_acquire_sst_lease(client, meta, ttl).unwrap(); + let acquired_lease = &resp.get_acquired()[0]; + assert_eq!(meta.get_region_id(), acquired_lease.get_region().get_id()); + assert_eq!(meta.get_uuid(), acquired_lease.get_uuid()); +} + +pub fn send_acquire_sst_lease( + client: &ImportSstClient, + meta: &SstMeta, + ttl: Duration, +) -> Result { + let region_id = meta.get_region_id(); + let mut acquire = AcquireLease::default(); + acquire.mut_lease().mut_region().set_id(region_id); + acquire.mut_lease().set_uuid(meta.get_uuid().into()); + acquire.set_ttl(ttl.as_secs()); + let mut req = LeaseRequest::default(); + req.mut_acquire().push(acquire); + let resp = client.lease(&req)?; + Ok(resp) +} + +pub fn must_release_sst_lease(client: &ImportSstClient, meta: &SstMeta) { + let resp = send_release_sst_lease(client, meta).unwrap(); + let released_lease = &resp.get_released()[0]; + assert_eq!(meta.get_region_id(), released_lease.get_region().get_id()); + assert_eq!(meta.get_uuid(), released_lease.get_uuid()); +} + +fn send_release_sst_lease(client: &ImportSstClient, meta: &SstMeta) -> Result { + let region_id = meta.get_region_id(); + let mut release = ReleaseLease::default(); + release.mut_lease().mut_region().set_id(region_id); + release.mut_lease().set_uuid(meta.get_uuid().into()); + let mut req = LeaseRequest::default(); + req.mut_release().push(release); + let resp = client.lease(&req)?; + Ok(resp) +} + +pub fn send_upload_sst( + client: &ImportSstClient, + meta: &SstMeta, + data: &[u8], +) -> Result { + let mut r1 = UploadRequest::default(); + r1.set_meta(meta.clone()); + let mut r2 = UploadRequest::default(); + r2.set_data(data.to_vec()); + let reqs: Vec<_> = vec![r1, r2] + .into_iter() + .map(|r| Result::Ok((r, WriteFlags::default()))) + .collect(); + let (mut tx, rx) = client.upload().unwrap(); + let mut stream = stream::iter(reqs); + block_on(async move { + let send_res = tx.send_all(&mut stream).await; + let close_res = tx.close().await; + match rx.await { + Ok(resp) => Ok(resp), + Err(e) => { + send_res?; + close_res?; + Err(e) + } + } + }) +} + +pub fn send_write_sst( + client: &ImportSstClient, + meta: &SstMeta, + keys: Vec>, + values: Vec>, + commit_ts: u64, +) -> Result { + let mut r1 = WriteRequest::default(); + r1.set_meta(meta.clone()); + let mut r2 = WriteRequest::default(); + + let mut batch = WriteBatch::default(); + let mut pairs = vec![]; + + for (i, key) in keys.iter().enumerate() { + let mut pair = Pair::default(); + pair.set_key(key.to_vec()); + pair.set_value(values[i].to_vec()); + pairs.push(pair); + } + batch.set_commit_ts(commit_ts); + batch.set_pairs(pairs.into()); + r2.set_batch(batch); + + let reqs: Vec<_> = vec![r1, r2] + .into_iter() + .map(|r| Result::Ok((r, WriteFlags::default()))) + .collect(); + + let (mut tx, rx) = client.write().unwrap(); + let mut stream = stream::iter(reqs); + block_on(async move { + let send_res = tx.send_all(&mut stream).await; + let close_res = tx.close().await; + match rx.await { + Ok(resp) => Ok(resp), + Err(e) => { + send_res?; + close_res?; + Err(e) + } + } + }) +} + +pub fn must_ingest_sst(client: &ImportSstClient, context: Context, meta: SstMeta) { + let mut ingest_request = IngestRequest::default(); + ingest_request.set_context(context); + ingest_request.set_sst(meta); + + let resp = client.ingest(&ingest_request).unwrap(); + + assert!(!resp.has_error(), "{:?}", resp); +} + +pub fn must_ingest_sst_error(client: &ImportSstClient, context: Context, meta: SstMeta) { + let mut ingest_request = IngestRequest::default(); + ingest_request.set_context(context); + ingest_request.set_sst(meta); + + let resp = client.ingest(&ingest_request).unwrap(); + + assert!(resp.has_error(), "{:?}", resp); +} + +pub fn check_ingested_kvs(tikv: &TikvClient, ctx: &Context, sst_range: (u8, u8)) { + check_ingested_kvs_cf(tikv, ctx, "", sst_range); +} + +pub fn check_ingested_kvs_cf(tikv: &TikvClient, ctx: &Context, cf: &str, sst_range: (u8, u8)) { + for i in sst_range.0..sst_range.1 { + let mut m = RawGetRequest::default(); + m.set_context(ctx.clone()); + m.set_key(vec![i]); + m.set_cf(cf.to_owned()); + let resp = tikv.raw_get(&m).unwrap(); + assert!(resp.get_error().is_empty()); + assert!(!resp.has_region_error()); + assert_eq!(resp.get_value(), &[i]); + } +} + +#[track_caller] +pub fn check_applied_kvs_cf, V: AsRef<[u8]> + std::fmt::Debug>( + tikv: &TikvClient, + ctx: &Context, + cf: &str, + entries: impl Iterator, +) { + let mut get = RawBatchGetRequest::default(); + get.set_cf(cf.to_owned()); + get.set_context(ctx.clone()); + let mut keymap = HashMap::new(); + for (key, value, ts) in entries { + let the_key = Key::from_raw(key.as_ref()) + .append_ts(ts.into()) + .into_encoded(); + keymap.insert(the_key.clone(), value); + get.mut_keys().push(the_key); + } + for pair in tikv.raw_batch_get(&get).unwrap().get_pairs() { + let entry = keymap.remove(pair.get_key()).expect("unexpected key"); + assert_eq!( + entry.as_ref(), + pair.get_value(), + "key is {:?}", + pair.get_key() + ); + } + assert!( + keymap.is_empty(), + "not all keys consumed, remained {:?}", + keymap + ); +} + +pub fn check_ingested_txn_kvs( + tikv: &TikvClient, + ctx: &Context, + sst_range: (u8, u8), + start_ts: u64, +) { + for i in sst_range.0..sst_range.1 { + let mut m = GetRequest::default(); + m.set_context(ctx.clone()); + m.set_key(vec![i]); + m.set_version(start_ts); + let resp = tikv.kv_get(&m).unwrap(); + assert!(!resp.has_region_error()); + assert_eq!(resp.get_value(), &[i]); + } +} + +pub fn check_sst_deleted(client: &ImportSstClient, meta: &SstMeta, data: &[u8]) { + for _ in 0..10 { + if send_upload_sst(client, meta, data).is_ok() { + // If we can upload the file, it means the previous file has been deleted. + return; + } + thread::sleep(Duration::from_millis(CLEANUP_SST_MILLIS)); + } + send_upload_sst(client, meta, data).unwrap(); +} + +pub fn make_plain_file(storage: &dyn ExternalStorage, name: &str, kvs: I) -> KvMeta +where + I: Iterator, + K: AsRef<[u8]>, + V: AsRef<[u8]>, +{ + let mut buf = vec![]; + let mut file = Cursor::new(&mut buf); + let mut start_ts: Option = None; + for (key, value, ts) in kvs { + let the_key = Key::from_raw(key.as_ref()) + .append_ts(ts.into()) + .into_encoded(); + start_ts = Some(start_ts.map_or(ts, |ts0| ts0.min(ts))); + for segment in EventEncoder::encode_event(&the_key, value.as_ref()) { + file.write_all(segment.as_ref()).unwrap(); + } + } + file.flush().unwrap(); + let len = buf.len() as u64; + block_on_external_io(storage.write(name, UnpinReader(Box::new(AsyncCursor::new(buf))), len)) + .unwrap(); + let mut meta = KvMeta::new(); + meta.set_start_ts(start_ts.unwrap_or_default()); + meta.set_length(len); + meta.set_restore_ts(u64::MAX); + meta.set_compression_type(kvproto::brpb::CompressionType::Unknown); + meta.set_name(name.to_owned()); + meta.set_cf(CF_DEFAULT.to_owned()); + meta +} + +pub fn rewrite_for(meta: &mut KvMeta, old_prefix: &[u8], new_prefix: &[u8]) -> RewriteRule { + assert_eq!(old_prefix.len(), new_prefix.len()); + fn rewrite(key: &mut Vec, old_prefix: &[u8], new_prefix: &[u8]) { + assert!(key.starts_with(old_prefix)); + let len = old_prefix.len(); + key.splice(..len, new_prefix.iter().cloned()); + } + rewrite(meta.mut_start_key(), old_prefix, new_prefix); + rewrite(meta.mut_end_key(), old_prefix, new_prefix); + let mut rule = RewriteRule::default(); + rule.set_old_key_prefix(old_prefix.to_vec()); + rule.set_new_key_prefix(new_prefix.to_vec()); + rule +} + +pub fn register_range_for(meta: &mut KvMeta, start: &[u8], end: &[u8]) { + let start = Key::from_raw(start); + let end = Key::from_raw(end); + meta.set_start_key(start.into_encoded()); + meta.set_end_key(end.into_encoded()); +} + +pub fn local_storage(tmp: &TempDir) -> StorageBackend { + let mut backend = StorageBackend::default(); + backend.set_local({ + let mut local = Local::default(); + local.set_path(tmp.path().to_str().unwrap().to_owned()); + local + }); + backend +} diff --git a/deny.toml b/deny.toml index ee4099d1370..29d9ef65c09 100644 --- a/deny.toml +++ b/deny.toml @@ -107,4 +107,4 @@ exceptions = [ [sources] unknown-git = "deny" unknown-registry = "deny" -allow-org = { github = ["tikv", "pingcap", "rust-lang"] } +allow-org = { github = ["tikv", "pingcap", "rust-lang", "overvenus"] } diff --git a/src/import/ingest.rs b/src/import/ingest.rs new file mode 100644 index 00000000000..c16a61fe726 --- /dev/null +++ b/src/import/ingest.rs @@ -0,0 +1,329 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + collections::HashSet, + future::Future, + path::PathBuf, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, Mutex, + }, + time::Duration, +}; + +use engine_traits::{KvEngine, CF_WRITE}; +use kvproto::{ + errorpb, + import_sstpb::{Error as ImportPbError, SstMeta, SwitchMode, *}, + kvrpcpb::Context, +}; +use raftstore_v2::StoreMeta; +use sst_importer::{metrics::*, sst_meta_to_path, Error, Result, SstImporter}; +use tikv_kv::{ + Engine, LocalTablets, Modify, SnapContext, Snapshot, SnapshotExt, WriteData, WriteEvent, +}; +use txn_types::TimeStamp; + +use super::{pb_error_inc, raft_writer::wait_write}; +use crate::storage::{self, errors::extract_region_error_from_error}; + +#[derive(Default)] +pub(super) struct IngestLatch(Mutex>); + +impl IngestLatch { + pub(super) fn acquire_lock(&self, meta: &SstMeta) -> Result { + let mut slots = self.0.lock().unwrap(); + let p = sst_meta_to_path(meta)?; + Ok(slots.insert(p)) + } + + pub(super) fn release_lock(&self, meta: &SstMeta) -> Result { + let mut slots = self.0.lock().unwrap(); + let p = sst_meta_to_path(meta)?; + Ok(slots.remove(&p)) + } +} + +#[derive(Default)] +pub(super) struct SuspendDeadline(AtomicU64); + +impl SuspendDeadline { + /// Check whether we should suspend the current request. + pub(super) fn check_suspend(&self) -> Result<()> { + let now = TimeStamp::physical_now(); + let suspend_until = self.0.load(Ordering::SeqCst); + if now < suspend_until { + Err(Error::Suspended { + time_to_lease_expire: Duration::from_millis(suspend_until - now), + }) + } else { + Ok(()) + } + } + + /// suspend requests for a period. + /// + /// # returns + /// + /// whether for now, the requests has already been suspended. + pub(super) fn suspend_requests(&self, for_time: Duration) -> bool { + let now = TimeStamp::physical_now(); + let last_suspend_until = self.0.load(Ordering::SeqCst); + let suspended = now < last_suspend_until; + let suspend_until = TimeStamp::physical_now() + for_time.as_millis() as u64; + self.0.store(suspend_until, Ordering::SeqCst); + suspended + } + + /// allow all requests to enter. + /// + /// # returns + /// + /// whether requests has already been previously suspended. + pub(super) fn allow_requests(&self) -> bool { + let now = TimeStamp::physical_now(); + let last_suspend_until = self.0.load(Ordering::SeqCst); + let suspended = now < last_suspend_until; + self.0.store(0, Ordering::SeqCst); + suspended + } +} + +fn check_write_stall( + region_id: u64, + tablets: &LocalTablets, + store_meta: &Option>>>, + importer: &SstImporter, +) -> Option { + let tablet = match tablets.get(region_id) { + Some(tablet) => tablet, + None => { + let mut errorpb = errorpb::Error::default(); + errorpb.set_message(format!("region {} not found", region_id)); + errorpb.mut_region_not_found().set_region_id(region_id); + return Some(errorpb); + } + }; + + let reject_error = |region_id: Option| -> Option { + let mut errorpb = errorpb::Error::default(); + let err = if let Some(id) = region_id { + format!("too many sst files are ingesting for region {}", id) + } else { + "too many sst files are ingesting".to_string() + }; + let mut server_is_busy_err = errorpb::ServerIsBusy::default(); + server_is_busy_err.set_reason(err.clone()); + errorpb.set_message(err); + errorpb.set_server_is_busy(server_is_busy_err); + Some(errorpb) + }; + + // store_meta being Some means it is v2 + if let Some(ref store_meta) = store_meta { + if let Some((region, _)) = store_meta.lock().unwrap().regions.get(®ion_id) { + if !importer.region_in_import_mode(region) + && tablet.ingest_maybe_slowdown_writes(CF_WRITE).expect("cf") + { + return reject_error(Some(region_id)); + } + } else { + let mut errorpb = errorpb::Error::default(); + errorpb.set_message(format!("region {} not found", region_id)); + errorpb.mut_region_not_found().set_region_id(region_id); + return Some(errorpb); + } + } else if importer.get_mode() == SwitchMode::Normal + && tablet.ingest_maybe_slowdown_writes(CF_WRITE).expect("cf") + { + match tablet.get_sst_key_ranges(CF_WRITE, 0) { + Ok(l0_sst_ranges) => { + warn!( + "sst ingest is too slow"; + "sst_ranges" => ?l0_sst_ranges, + ); + } + Err(e) => { + error!("get sst key ranges failed"; "err" => ?e); + } + } + return reject_error(None); + } + + None +} + +pub(super) fn async_snapshot( + engine: &mut E, + context: &Context, +) -> impl Future> { + let res = engine.async_snapshot(SnapContext { + pb_ctx: context, + ..Default::default() + }); + async move { + res.await.map_err(|e| { + let err: storage::Error = e.into(); + if let Some(e) = extract_region_error_from_error(&err) { + e + } else { + let mut e = errorpb::Error::default(); + e.set_message(format!("{}", err)); + e + } + }) + } +} + +async fn ingest_files_impl( + mut context: Context, + ssts: Vec, + mut engine: E, + importer: &SstImporter, + label: &'static str, +) -> Result { + // check api version + if !importer.check_api_version(&ssts)? { + return Err(Error::IncompatibleApiVersion); + } + + let snapshot_res = async_snapshot(&mut engine, &context); + let mut resp = IngestResponse::default(); + let res = match snapshot_res.await { + Ok(snap) => snap, + Err(e) => { + pb_error_inc(label, &e); + resp.set_error(e); + return Ok(resp); + } + }; + + fail_point!("import::sst_service::ingest"); + // Here we shall check whether the file has been ingested before. This operation + // must execute after geting a snapshot from raftstore to make sure that the + // current leader has applied to current term. + for sst in &ssts { + if !importer.exist(sst) { + warn!( + "sst [{:?}] not exist. we may retry an operation that has already succeeded", + sst + ); + let mut errorpb = errorpb::Error::default(); + let err = "The file which would be ingested doest not exist."; + let stale_err = errorpb::StaleCommand::default(); + errorpb.set_message(err.to_string()); + errorpb.set_stale_command(stale_err); + resp.set_error(errorpb); + return Ok(resp); + } + } + let modifies = ssts + .into_iter() + .map(|s| Modify::Ingest(Box::new(s))) + .collect(); + context.set_term(res.ext().get_term().unwrap().into()); + let region_id = context.get_region_id(); + let res = engine.async_write( + &context, + WriteData::from_modifies(modifies), + WriteEvent::BASIC_EVENT, + None, + ); + + let mut resp = IngestResponse::default(); + if let Err(e) = wait_write(res).await { + if let Some(e) = extract_region_error_from_error(&e) { + pb_error_inc(label, &e); + resp.set_error(e); + } else { + IMPORTER_ERROR_VEC + .with_label_values(&[label, "unknown"]) + .inc(); + resp.mut_error() + .set_message(format!("[region {}] ingest failed: {:?}", region_id, e)); + } + } + Ok(resp) +} + +pub async fn ingest( + mut req: MultiIngestRequest, + engine: E, + suspend: &Arc, + tablets: &LocalTablets, + store_meta: &Option>>>, + importer: &SstImporter, + ingest_latch: &Arc, + label: &'static str, +) -> Result { + let mut resp = IngestResponse::default(); + if let Err(err) = suspend.check_suspend() { + resp.set_error(ImportPbError::from(err).take_store_error()); + return Ok(resp); + } + + if let Some(errorpb) = check_write_stall( + req.get_context().get_region_id(), + tablets, + store_meta, + importer, + ) { + resp.set_error(errorpb); + return Ok(resp); + } + + // Make sure all ssts have valid leases. + let mut lease_refs = Vec::with_capacity(req.get_ssts().len()); + for meta in req.get_ssts() { + match importer.check_lease(meta.get_region_id(), meta.get_uuid(), label) { + Ok(lease) => lease_refs.push(lease), + Err(e) => { + let mut resp = IngestResponse::default(); + let mut errorpb = errorpb::Error::default(); + errorpb.set_message(e.to_string()); + resp.set_error(errorpb); + return Ok(resp); + } + }; + } + + let mut errorpb = errorpb::Error::default(); + let mut metas = vec![]; + for meta in req.get_ssts() { + if ingest_latch.acquire_lock(meta).unwrap_or(false) { + metas.push(meta.clone()); + } + } + if metas.len() < req.get_ssts().len() { + for m in metas { + ingest_latch.release_lock(&m).unwrap(); + } + errorpb.set_message(Error::FileConflict.to_string()); + resp.set_error(errorpb); + return Ok(resp); + } + let res = ingest_files_impl( + req.take_context(), + req.take_ssts().into(), + engine, + importer, + label, + ) + .await; + for meta in &metas { + ingest_latch.release_lock(meta).unwrap(); + } + // We should drop lease refs, so that they can be expired. + drop(lease_refs); + for meta in &metas { + if let Err(e) = importer.release_lease(meta.get_region_id(), meta.get_uuid()) { + warn!("expire sst lease failed after ingest"; + "region_id" => meta.get_region_id(), + "uuid" => log_wrappers::hex_encode_upper(meta.get_uuid()), + "error" => ?e, + ); + } + } + + res +} diff --git a/src/import/mod.rs b/src/import/mod.rs index 6fe43b9aa32..20d637a387b 100644 --- a/src/import/mod.rs +++ b/src/import/mod.rs @@ -13,12 +13,15 @@ //! inside TiKV because it needs to interact with raftstore. mod duplicate_detect; +mod ingest; mod raft_writer; mod sst_service; use std::fmt::Debug; use grpcio::{RpcStatus, RpcStatusCode}; +use kvproto::errorpb; +use sst_importer::metrics::IMPORTER_ERROR_VEC; pub use sst_importer::{Config, Error, Result, SstImporter, TxnSstWriter}; pub use self::sst_service::ImportSstService; @@ -49,3 +52,28 @@ macro_rules! send_rpc_response { let _ = res.map_err(|e| warn!("send rpc response"; "err" => %e)).await; }}; } + +// add error statistics from pb error response +fn pb_error_inc(type_: &str, e: &errorpb::Error) { + let label = if e.has_not_leader() { + "not_leader" + } else if e.has_store_not_match() { + "store_not_match" + } else if e.has_region_not_found() { + "region_not_found" + } else if e.has_key_not_in_region() { + "key_not_in_range" + } else if e.has_epoch_not_match() { + "epoch_not_match" + } else if e.has_server_is_busy() { + "server_is_busy" + } else if e.has_stale_command() { + "stale_command" + } else if e.has_raft_entry_too_large() { + "raft_entry_too_large" + } else { + "unknown" + }; + + IMPORTER_ERROR_VEC.with_label_values(&[type_, label]).inc(); +} diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index bd12053031f..c35003c3f30 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -1,18 +1,13 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - collections::{HashMap, HashSet, VecDeque}, + collections::{HashMap, VecDeque}, convert::identity, - future::Future, - path::PathBuf, - sync::{ - atomic::{AtomicU64, Ordering}, - Arc, Mutex, - }, - time::Duration, + sync::{Arc, Mutex}, + time::{Duration, Instant as StdInstant}, }; -use engine_traits::{CompactExt, MiscExt, CF_DEFAULT, CF_WRITE}; +use engine_traits::{CompactExt, CF_DEFAULT, CF_WRITE}; use file_system::{set_io_type, IoType}; use futures::{sink::SinkExt, stream::TryStreamExt, FutureExt, TryFutureExt}; use grpcio::{ @@ -22,11 +17,10 @@ use kvproto::{ encryptionpb::EncryptionMethod, errorpb, import_sstpb::{ - Error as ImportPbError, ImportSst, Range, RawWriteRequest_oneof_chunk as RawChunk, SstMeta, + Error as ImportPbError, ImportSst, Range, RawWriteRequest_oneof_chunk as RawChunk, SuspendImportRpcRequest, SuspendImportRpcResponse, SwitchMode, WriteRequest_oneof_chunk as Chunk, *, }, - kvrpcpb::Context, metapb::RegionEpoch, }; use raftstore::{ @@ -37,12 +31,10 @@ use raftstore::{ use raftstore_v2::StoreMeta; use resource_control::{with_resource_limiter, ResourceGroupManager}; use sst_importer::{ - error_inc, metrics::*, sst_importer::DownloadExt, sst_meta_to_path, Config, ConfigManager, - Error, Result, SstImporter, -}; -use tikv_kv::{ - Engine, LocalTablets, Modify, SnapContext, Snapshot, SnapshotExt, WriteData, WriteEvent, + error_inc, metrics::*, sst_importer::DownloadExt, Config, ConfigManager, Error, Result, + SstImporter, }; +use tikv_kv::{Engine, LocalTablets, Modify, WriteData}; use tikv_util::{ config::ReadableSize, future::{create_stream_with_buffer, paired_future_callback}, @@ -51,11 +43,11 @@ use tikv_util::{ HandyRwLock, }; use tokio::{runtime::Runtime, time::sleep}; -use txn_types::{Key, TimeStamp, WriteRef, WriteType}; +use txn_types::{Key, WriteRef, WriteType}; use super::{ - make_rpc_error, - raft_writer::{self, wait_write}, + ingest::{async_snapshot, ingest, IngestLatch, SuspendDeadline}, + make_rpc_error, pb_error_inc, raft_writer, }; use crate::{ import::duplicate_detect::DuplicateDetector, @@ -95,6 +87,7 @@ const WRITER_GC_INTERVAL: Duration = Duration::from_secs(300); /// This may save us from some client sending insane value to the server. const SUSPEND_REQUEST_MAX_SECS: u64 = // 6h 6 * 60 * 60; +const SST_LEASE_DURATION_MAX_SECS: u64 = 6 * 60 * 60; fn transfer_error(err: storage::Error) -> ImportPbError { let mut e = ImportPbError::default(); @@ -128,7 +121,7 @@ pub struct ImportSstService { threads: Arc, importer: Arc>, limiter: Limiter, - task_slots: Arc>>, + ingest_latch: Arc, raft_entry_max_size: ReadableSize, region_info_accessor: Arc, @@ -139,7 +132,7 @@ pub struct ImportSstService { resource_manager: Option>, // When less than now, don't accept any requests. - suspend_req_until: Arc, + suspend: Arc, } struct RequestCollector { @@ -184,8 +177,8 @@ impl RequestCollector { } fn accept_kv(&mut self, cf: &str, is_delete: bool, k: Vec, v: Vec) { - debug!("Accepting KV."; "cf" => %cf, - "key" => %log_wrappers::Value::key(&k), + debug!("Accepting KV."; "cf" => %cf, + "key" => %log_wrappers::Value::key(&k), "value" => %log_wrappers::Value::key(&v)); // Need to skip the empty key/value that could break the transaction or cause // data corruption. see details at https://github.com/pingcap/tiflow/issues/5468. @@ -371,13 +364,13 @@ impl ImportSstService { engine, importer, limiter: Limiter::new(f64::INFINITY), - task_slots: Arc::new(Mutex::new(HashSet::default())), + ingest_latch: Arc::default(), raft_entry_max_size, region_info_accessor, writer, store_meta, resource_manager, - suspend_req_until: Arc::new(AtomicU64::new(0)), + suspend: Arc::default(), } } @@ -394,173 +387,6 @@ impl ImportSstService { } } - fn acquire_lock(task_slots: &Arc>>, meta: &SstMeta) -> Result { - let mut slots = task_slots.lock().unwrap(); - let p = sst_meta_to_path(meta)?; - Ok(slots.insert(p)) - } - - fn release_lock(task_slots: &Arc>>, meta: &SstMeta) -> Result { - let mut slots = task_slots.lock().unwrap(); - let p = sst_meta_to_path(meta)?; - Ok(slots.remove(&p)) - } - - fn async_snapshot( - engine: &mut E, - context: &Context, - ) -> impl Future> { - let res = engine.async_snapshot(SnapContext { - pb_ctx: context, - ..Default::default() - }); - async move { - res.await.map_err(|e| { - let err: storage::Error = e.into(); - if let Some(e) = extract_region_error_from_error(&err) { - e - } else { - let mut e = errorpb::Error::default(); - e.set_message(format!("{}", err)); - e - } - }) - } - } - - fn check_write_stall(&self, region_id: u64) -> Option { - let tablet = match self.tablets.get(region_id) { - Some(tablet) => tablet, - None => { - let mut errorpb = errorpb::Error::default(); - errorpb.set_message(format!("region {} not found", region_id)); - errorpb.mut_region_not_found().set_region_id(region_id); - return Some(errorpb); - } - }; - - let reject_error = |region_id: Option| -> Option { - let mut errorpb = errorpb::Error::default(); - let err = if let Some(id) = region_id { - format!("too many sst files are ingesting for region {}", id) - } else { - "too many sst files are ingesting".to_string() - }; - let mut server_is_busy_err = errorpb::ServerIsBusy::default(); - server_is_busy_err.set_reason(err.clone()); - errorpb.set_message(err); - errorpb.set_server_is_busy(server_is_busy_err); - Some(errorpb) - }; - - // store_meta being Some means it is v2 - if let Some(ref store_meta) = self.store_meta { - if let Some((region, _)) = store_meta.lock().unwrap().regions.get(®ion_id) { - if !self.importer.region_in_import_mode(region) - && tablet.ingest_maybe_slowdown_writes(CF_WRITE).expect("cf") - { - return reject_error(Some(region_id)); - } - } else { - let mut errorpb = errorpb::Error::default(); - errorpb.set_message(format!("region {} not found", region_id)); - errorpb.mut_region_not_found().set_region_id(region_id); - return Some(errorpb); - } - } else if self.importer.get_mode() == SwitchMode::Normal - && tablet.ingest_maybe_slowdown_writes(CF_WRITE).expect("cf") - { - match tablet.get_sst_key_ranges(CF_WRITE, 0) { - Ok(l0_sst_ranges) => { - warn!( - "sst ingest is too slow"; - "sst_ranges" => ?l0_sst_ranges, - ); - } - Err(e) => { - error!("get sst key ranges failed"; "err" => ?e); - } - } - return reject_error(None); - } - - None - } - - fn ingest_files( - &mut self, - mut context: Context, - label: &'static str, - ssts: Vec, - ) -> impl Future> { - let snapshot_res = Self::async_snapshot(&mut self.engine, &context); - let engine = self.engine.clone(); - let importer = self.importer.clone(); - async move { - // check api version - if !importer.as_ref().check_api_version(&ssts)? { - return Err(Error::IncompatibleApiVersion); - } - - let mut resp = IngestResponse::default(); - let res = match snapshot_res.await { - Ok(snap) => snap, - Err(e) => { - pb_error_inc(label, &e); - resp.set_error(e); - return Ok(resp); - } - }; - - fail_point!("import::sst_service::ingest"); - // Here we shall check whether the file has been ingested before. This operation - // must execute after geting a snapshot from raftstore to make sure that the - // current leader has applied to current term. - for sst in ssts.iter() { - if !importer.exist(sst) { - warn!( - "sst [{:?}] not exist. we may retry an operation that has already succeeded", - sst - ); - let mut errorpb = errorpb::Error::default(); - let err = "The file which would be ingested doest not exist."; - let stale_err = errorpb::StaleCommand::default(); - errorpb.set_message(err.to_string()); - errorpb.set_stale_command(stale_err); - resp.set_error(errorpb); - return Ok(resp); - } - } - let modifies = ssts - .iter() - .map(|s| Modify::Ingest(Box::new(s.clone()))) - .collect(); - context.set_term(res.ext().get_term().unwrap().into()); - let region_id = context.get_region_id(); - let res = engine.async_write( - &context, - WriteData::from_modifies(modifies), - WriteEvent::BASIC_EVENT, - None, - ); - - let mut resp = IngestResponse::default(); - if let Err(e) = wait_write(res).await { - if let Some(e) = extract_region_error_from_error(&e) { - pb_error_inc(label, &e); - resp.set_error(e); - } else { - IMPORTER_ERROR_VEC - .with_label_values(&[label, "unknown"]) - .inc(); - resp.mut_error() - .set_message(format!("[region {}] ingest failed: {:?}", region_id, e)); - } - } - Ok(resp) - } - } - async fn apply_imp( mut req: ApplyRequest, importer: Arc>, @@ -641,47 +467,6 @@ impl ImportSstService { Ok(range) } - - /// Check whether we should suspend the current request. - fn check_suspend(&self) -> Result<()> { - let now = TimeStamp::physical_now(); - let suspend_until = self.suspend_req_until.load(Ordering::SeqCst); - if now < suspend_until { - Err(Error::Suspended { - time_to_lease_expire: Duration::from_millis(suspend_until - now), - }) - } else { - Ok(()) - } - } - - /// suspend requests for a period. - /// - /// # returns - /// - /// whether for now, the requests has already been suspended. - pub fn suspend_requests(&self, for_time: Duration) -> bool { - let now = TimeStamp::physical_now(); - let last_suspend_until = self.suspend_req_until.load(Ordering::SeqCst); - let suspended = now < last_suspend_until; - let suspend_until = TimeStamp::physical_now() + for_time.as_millis() as u64; - self.suspend_req_until - .store(suspend_until, Ordering::SeqCst); - suspended - } - - /// allow all requests to enter. - /// - /// # returns - /// - /// whether requests has already been previously suspended. - pub fn allow_requests(&self) -> bool { - let now = TimeStamp::physical_now(); - let last_suspend_until = self.suspend_req_until.load(Ordering::SeqCst); - let suspended = now < last_suspend_until; - self.suspend_req_until.store(0, Ordering::SeqCst); - suspended - } } fn check_local_region_stale( @@ -765,6 +550,21 @@ macro_rules! impl_write { } _ => return (Err(Error::InvalidChunk), Some(rx)), }; + + // Make sure it has a valid lease. + let _lease = + match import.check_lease(meta.get_region_id(), meta.get_uuid(), label) { + Ok(lease) => lease, + Err(e) => { + let mut resp = $resp_ty::default(); + resp.mut_error().set_message(e.to_string()); + let mut errorpb = errorpb::Error::default(); + errorpb.set_message(e.to_string()); + resp.mut_error().set_store_error(errorpb); + return (Ok(resp), Some(rx)); + } + }; + // wait the region epoch on this TiKV to catch up with the epoch // in request, which comes from PD and represents the majority // peers' status. @@ -935,6 +735,60 @@ impl ImportSst for ImportSstService { ctx.spawn(task); } + fn lease(&mut self, _ctx: RpcContext<'_>, req: LeaseRequest, sink: UnarySink) { + let label = "lease"; + let timer = Instant::now_coarse(); + let import = self.importer.clone(); + let acquire_lease_task = async move { + let mut resp = LeaseResponse::default(); + let now = StdInstant::now(); + info!("dbg rpc lease"; "req" => ?req); + + // Acquire leases. + for acquire_lease in req.get_acquire() { + let region_id = acquire_lease.get_lease().get_region().get_id(); + let uuid = acquire_lease.get_lease().get_uuid(); + let lease_secs = + std::cmp::min(acquire_lease.get_ttl(), SST_LEASE_DURATION_MAX_SECS); + info!("dbg rpc lease"; "region_id" => region_id, "lease_secs" => lease_secs); + let deadline = now + Duration::from_secs(lease_secs); + match import.acquire_lease(region_id, uuid, deadline) { + Ok(_) => { + resp.mut_acquired().push(acquire_lease.get_lease().clone()); + } + Err(e) => { + warn!("acquire lease failed"; + "region_id" => region_id, + "err" => %e); + continue; + } + }; + } + + // Release leases. + for release_lease in req.get_release() { + let region_id = release_lease.get_lease().get_region().get_id(); + let uuid = release_lease.get_lease().get_uuid(); + match import.release_lease(region_id, uuid) { + Ok(_) => { + resp.mut_released().push(release_lease.get_lease().clone()); + } + Err(e) => { + warn!("release lease failed"; + "region_id" => region_id, + "err" => %e); + continue; + } + }; + } + + // TODO should we set the timestamp? + // resp.set_timestamp(); + crate::send_rpc_response!(Ok(resp), sink, label, timer); + }; + self.threads.spawn(acquire_lease_task); + } + /// Receive SST from client and save the file for later ingesting. fn upload( &mut self, @@ -958,6 +812,19 @@ impl ImportSst for ImportSstService { Some(ref chunk) if chunk.has_meta() => chunk.get_meta(), _ => return Err(Error::InvalidChunk), }; + // Make sure it has a valid lease. + let _lease = match import.check_lease(meta.get_region_id(), meta.get_uuid(), label) + { + Ok(lease) => lease, + Err(e) => { + let mut resp = UploadResponse::default(); + let mut errorpb = errorpb::Error::default(); + errorpb.set_message(e.to_string()); + resp.set_error(errorpb); + return Ok(resp); + } + }; + let file = import.create(meta)?; let mut file = rx .try_fold(file, |mut file, chunk| async move { @@ -1074,6 +941,20 @@ impl ImportSst for ImportSstService { .with_label_values(&["queue"]) .observe(start.saturating_elapsed().as_secs_f64()); + // Make sure it has a valid lease. + let meta = req.get_sst(); + let _lease = match importer.check_lease(meta.get_region_id(), meta.get_uuid(), label) { + Ok(lease) => lease, + Err(e) => { + let mut resp = DownloadResponse::default(); + resp.mut_error().set_message(e.to_string()); + let mut errorpb = errorpb::Error::default(); + errorpb.set_message(e.to_string()); + resp.mut_error().set_store_error(errorpb); + return crate::send_rpc_response!(Ok(resp), sink, label, timer); + } + }; + // FIXME: download() should be an async fn, to allow BR to cancel // a download task. // Unfortunately, this currently can't happen because the S3Storage @@ -1133,47 +1014,34 @@ impl ImportSst for ImportSstService { /// CleanupSstWorker. fn ingest( &mut self, - ctx: RpcContext<'_>, + _: RpcContext<'_>, mut req: IngestRequest, sink: UnarySink, ) { let label = "ingest"; let timer = Instant::now_coarse(); - let mut resp = IngestResponse::default(); - - if let Err(err) = self.check_suspend() { - resp.set_error(ImportPbError::from(err).take_store_error()); - ctx.spawn(async move { crate::send_rpc_response!(Ok(resp), sink, label, timer) }); - return; - } - - let region_id = req.get_context().get_region_id(); - if let Some(errorpb) = self.check_write_stall(region_id) { - resp.set_error(errorpb); - ctx.spawn( - sink.success(resp) - .unwrap_or_else(|e| warn!("send rpc failed"; "err" => %e)), - ); - return; - } - - let mut errorpb = errorpb::Error::default(); - if !Self::acquire_lock(&self.task_slots, req.get_sst()).unwrap_or(false) { - errorpb.set_message(Error::FileConflict.to_string()); - resp.set_error(errorpb); - ctx.spawn( - sink.success(resp) - .unwrap_or_else(|e| warn!("send rpc failed"; "err" => %e)), - ); - return; - } + let import = self.importer.clone(); + let engine = self.engine.clone(); + let suspend = self.suspend.clone(); + let tablets = self.tablets.clone(); + let store_meta = self.store_meta.clone(); + let ingest_latch = self.ingest_latch.clone(); - let task_slots = self.task_slots.clone(); - let meta = req.take_sst(); - let f = self.ingest_files(req.take_context(), label, vec![meta.clone()]); let handle_task = async move { - let res = f.await; - Self::release_lock(&task_slots, &meta).unwrap(); + let mut multi_ingest = MultiIngestRequest::default(); + multi_ingest.set_context(req.take_context()); + multi_ingest.mut_ssts().push(req.take_sst()); + let res = ingest( + multi_ingest, + engine, + &suspend, + &tablets, + &store_meta, + &import, + &ingest_latch, + label, + ) + .await; crate::send_rpc_response!(res, sink, label, timer); }; self.threads.spawn(handle_task); @@ -1182,54 +1050,31 @@ impl ImportSst for ImportSstService { /// Ingest multiple files by sending a raft command to raftstore. fn multi_ingest( &mut self, - ctx: RpcContext<'_>, - mut req: MultiIngestRequest, + _: RpcContext<'_>, + req: MultiIngestRequest, sink: UnarySink, ) { let label = "multi-ingest"; let timer = Instant::now_coarse(); - let mut resp = IngestResponse::default(); - if let Err(err) = self.check_suspend() { - resp.set_error(ImportPbError::from(err).take_store_error()); - ctx.spawn(async move { crate::send_rpc_response!(Ok(resp), sink, label, timer) }); - return; - } - - if let Some(errorpb) = self.check_write_stall(req.get_context().get_region_id()) { - resp.set_error(errorpb); - ctx.spawn( - sink.success(resp) - .unwrap_or_else(|e| warn!("send rpc failed"; "err" => %e)), - ); - return; - } + let import = self.importer.clone(); + let engine = self.engine.clone(); + let suspend = self.suspend.clone(); + let tablets = self.tablets.clone(); + let store_meta = self.store_meta.clone(); + let ingest_latch = self.ingest_latch.clone(); - let mut errorpb = errorpb::Error::default(); - let mut metas = vec![]; - for sst in req.get_ssts() { - if Self::acquire_lock(&self.task_slots, sst).unwrap_or(false) { - metas.push(sst.clone()); - } - } - if metas.len() < req.get_ssts().len() { - for m in metas { - Self::release_lock(&self.task_slots, &m).unwrap(); - } - errorpb.set_message(Error::FileConflict.to_string()); - resp.set_error(errorpb); - ctx.spawn( - sink.success(resp) - .unwrap_or_else(|e| warn!("send rpc failed"; "err" => %e)), - ); - return; - } - let task_slots = self.task_slots.clone(); - let f = self.ingest_files(req.take_context(), label, req.take_ssts().into()); let handle_task = async move { - let res = f.await; - for m in metas { - Self::release_lock(&task_slots, &m).unwrap(); - } + let res = ingest( + req, + engine, + &suspend, + &tablets, + &store_meta, + &import, + &ingest_latch, + label, + ) + .await; crate::send_rpc_response!(res, sink, label, timer); }; self.threads.spawn(handle_task); @@ -1339,7 +1184,7 @@ impl ImportSst for ImportSstService { Some(request.take_end_key()) }; let key_only = request.get_key_only(); - let snap_res = Self::async_snapshot(&mut self.engine, &context); + let snap_res = async_snapshot(&mut self.engine, &context); let handle_task = async move { let res = snap_res.await; let snapshot = match res { @@ -1411,7 +1256,7 @@ impl ImportSst for ImportSstService { ctx.spawn(async move { send_rpc_response!(Err(Error::Io( std::io::Error::new(std::io::ErrorKind::InvalidInput, - format!("you are going to suspend the import RPCs too long. (for {} seconds, max acceptable duration is {} seconds)", + format!("you are going to suspend the import RPCs too long. (for {} seconds, max acceptable duration is {} seconds)", req.get_duration_in_secs(), SUSPEND_REQUEST_MAX_SECS)))), sink, label, timer); }); return; @@ -1419,10 +1264,11 @@ impl ImportSst for ImportSstService { let suspended = if req.should_suspend_imports { info!("suspend incoming import RPCs."; "for_second" => req.get_duration_in_secs(), "caller" => req.get_caller()); - self.suspend_requests(Duration::from_secs(req.get_duration_in_secs())) + self.suspend + .suspend_requests(Duration::from_secs(req.get_duration_in_secs())) } else { info!("allow incoming import RPCs."; "caller" => req.get_caller()); - self.allow_requests() + self.suspend.allow_requests() }; let mut resp = SuspendImportRpcResponse::default(); resp.set_already_suspended(suspended); @@ -1430,31 +1276,6 @@ impl ImportSst for ImportSstService { } } -// add error statistics from pb error response -fn pb_error_inc(type_: &str, e: &errorpb::Error) { - let label = if e.has_not_leader() { - "not_leader" - } else if e.has_store_not_match() { - "store_not_match" - } else if e.has_region_not_found() { - "region_not_found" - } else if e.has_key_not_in_region() { - "key_not_in_range" - } else if e.has_epoch_not_match() { - "epoch_not_match" - } else if e.has_server_is_busy() { - "server_is_busy" - } else if e.has_stale_command() { - "stale_command" - } else if e.has_raft_entry_too_large() { - "raft_entry_too_large" - } else { - "unknown" - }; - - IMPORTER_ERROR_VEC.with_label_values(&[type_, label]).inc(); -} - fn write_needs_restore(write: &[u8]) -> bool { let w = WriteRef::parse(write); match w { diff --git a/tests/failpoints/cases/test_import_service.rs b/tests/failpoints/cases/test_import_service.rs index 010d12177b6..8cd8d9cd5ca 100644 --- a/tests/failpoints/cases/test_import_service.rs +++ b/tests/failpoints/cases/test_import_service.rs @@ -6,8 +6,8 @@ use std::{ }; use file_system::calc_crc32; -use futures::{executor::block_on, stream, SinkExt}; -use grpcio::{ChannelBuilder, Environment, Result, WriteFlags}; +use futures::executor::block_on; +use grpcio::{ChannelBuilder, Environment}; use kvproto::{import_sstpb::*, tikvpb_grpc::TikvClient}; use tempfile::{Builder, TempDir}; use test_raftstore::{must_raw_put, Simulator}; @@ -18,9 +18,11 @@ use tikv_util::{config::ReadableSize, HandyRwLock}; #[allow(dead_code)] #[path = "../../integrations/import/util.rs"] mod util; +use test_sst_importer::{check_ingested_kvs, must_acquire_sst_lease, send_upload_sst}; + use self::util::{ - check_ingested_kvs, new_cluster_and_tikv_import_client, new_cluster_and_tikv_import_client_tde, - open_cluster_and_tikv_import_client_v2, send_upload_sst, + new_cluster_and_tikv_import_client, new_cluster_and_tikv_import_client_tde, + open_cluster_and_tikv_import_client_v2, }; // Opening sst writer involves IO operation, it may block threads for a while. @@ -38,6 +40,7 @@ fn test_download_sst_blocking_sst_writer() { let (mut meta, _) = gen_sst_file(sst_path, sst_range); meta.set_region_id(ctx.get_region_id()); meta.set_region_epoch(ctx.get_region_epoch().clone()); + must_acquire_sst_lease(&import, &meta, Duration::MAX); // Sleep 20s, make sure it is large than grpc_keepalive_timeout (3s). let sst_writer_open_fp = "on_open_sst_writer"; @@ -63,33 +66,10 @@ fn test_download_sst_blocking_sst_writer() { fail::remove(sst_writer_open_fp); // Do an ingest and verify the result is correct. - let mut ingest = IngestRequest::default(); - ingest.set_context(ctx.clone()); - ingest.set_sst(meta); - let resp = import.ingest(&ingest).unwrap(); - assert!(!resp.has_error()); - + must_ingest_sst(&import, ctx.clone(), meta); check_ingested_kvs(&tikv, &ctx, sst_range); } -fn upload_sst(import: &ImportSstClient, meta: &SstMeta, data: &[u8]) -> Result { - let mut r1 = UploadRequest::default(); - r1.set_meta(meta.clone()); - let mut r2 = UploadRequest::default(); - r2.set_data(data.to_vec()); - let reqs: Vec<_> = vec![r1, r2] - .into_iter() - .map(|r| Result::Ok((r, WriteFlags::default()))) - .collect(); - let (mut tx, rx) = import.upload().unwrap(); - let mut stream = stream::iter(reqs); - block_on(async move { - tx.send_all(&mut stream).await?; - tx.close().await?; - rx.await - }) -} - #[test] fn test_ingest_reentrant() { let (cluster, ctx, _tikv, import) = new_cluster_and_tikv_import_client(); @@ -104,11 +84,8 @@ fn test_ingest_reentrant() { let (mut meta, data) = gen_sst_file(sst_path, sst_range); meta.set_region_id(ctx.get_region_id()); meta.set_region_epoch(ctx.get_region_epoch().clone()); - upload_sst(&import, &meta, &data).unwrap(); - - let mut ingest = IngestRequest::default(); - ingest.set_context(ctx); - ingest.set_sst(meta.clone()); + must_acquire_sst_lease(&import, &meta, Duration::MAX); + send_upload_sst(&import, &meta, &data).unwrap(); // Don't delete ingested sst file or we cannot find sst file in next ingest. fail::cfg("dont_delete_ingested_sst", "1*return").unwrap(); @@ -124,9 +101,8 @@ fn test_ingest_reentrant() { .get_path(&meta); let checksum1 = calc_crc32(save_path.clone()).unwrap(); - // Do ingest and it will ingest successs. - let resp = import.ingest(&ingest).unwrap(); - assert!(!resp.has_error()); + // Do ingest and it will ingest success. + must_ingest_sst(&import, ctx.clone(), meta.clone()); let checksum2 = calc_crc32(save_path).unwrap(); // TODO: Remove this once write_global_seqno is deprecated. @@ -134,8 +110,8 @@ fn test_ingest_reentrant() { // updated with the default setting, which is write_global_seqno=false. assert_eq!(checksum1, checksum2); // Do ingest again and it can be reentrant - let resp = import.ingest(&ingest).unwrap(); - assert!(!resp.has_error()); + must_acquire_sst_lease(&import, &meta, Duration::MAX); + must_ingest_sst(&import, ctx.clone(), meta); } #[test] @@ -153,7 +129,8 @@ fn test_ingest_key_manager_delete_file_failed() { meta.set_region_id(ctx.get_region_id()); meta.set_region_epoch(ctx.get_region_epoch().clone()); - upload_sst(&import, &meta, &data).unwrap(); + must_acquire_sst_lease(&import, &meta, Duration::MAX); + send_upload_sst(&import, &meta, &data).unwrap(); let deregister_fp = "key_manager_fails_before_delete_file"; // the first delete is in check before ingest, the second is in ingest cleanup @@ -164,12 +141,7 @@ fn test_ingest_key_manager_delete_file_failed() { // Do an ingest and verify the result is correct. Though the ingest succeeded, // the clone file is still in the key manager // TODO: how to check the key manager contains the clone key - let mut ingest = IngestRequest::default(); - ingest.set_context(ctx.clone()); - ingest.set_sst(meta.clone()); - let resp = import.ingest(&ingest).unwrap(); - - assert!(!resp.has_error()); + must_ingest_sst(&import, ctx.clone(), meta.clone()); fail::remove(deregister_fp); @@ -193,12 +165,9 @@ fn test_ingest_key_manager_delete_file_failed() { // Do upload and ingest again, though key manager contains this file, the ingest // action should success. - upload_sst(&import, &meta, &data).unwrap(); - let mut ingest = IngestRequest::default(); - ingest.set_context(ctx); - ingest.set_sst(meta); - let resp = import.ingest(&ingest).unwrap(); - assert!(!resp.has_error()); + must_acquire_sst_lease(&import, &meta, Duration::MAX); + send_upload_sst(&import, &meta, &data).unwrap(); + must_ingest_sst(&import, ctx, meta); } #[test] @@ -215,10 +184,11 @@ fn test_ingest_file_twice_and_conflict() { let (mut meta, data) = gen_sst_file(sst_path, sst_range); meta.set_region_id(ctx.get_region_id()); meta.set_region_epoch(ctx.get_region_epoch().clone()); - upload_sst(&import, &meta, &data).unwrap(); + must_acquire_sst_lease(&import, &meta, Duration::MAX); + send_upload_sst(&import, &meta, &data).unwrap(); let mut ingest = IngestRequest::default(); ingest.set_context(ctx); - ingest.set_sst(meta); + ingest.set_sst(meta.clone()); let latch_fp = "import::sst_service::ingest"; let (tx1, rx1) = channel(); @@ -240,9 +210,10 @@ fn test_ingest_file_twice_and_conflict() { assert_eq!("ingest file conflict", resp.get_error().get_message()); tx2.send(()).unwrap(); let resp = block_on(resp_recv).unwrap(); - assert!(!resp.has_error()); + assert!(!resp.has_error(), "{:?}", resp); fail::remove(latch_fp); + must_acquire_sst_lease(&import, &meta, Duration::MAX); let resp = import.ingest(&ingest).unwrap(); assert!(resp.has_error()); assert_eq!( @@ -264,19 +235,16 @@ fn test_delete_sst_v2_after_epoch_stale() { let sst_path = temp_dir.path().join("test.sst"); let sst_range = (0, 100); let (mut meta, data) = gen_sst_file(sst_path, sst_range); + must_acquire_sst_lease(&import, &meta, Duration::MAX); // disable data flushed fail::cfg("on_flush_completed", "return()").unwrap(); send_upload_sst(&import, &meta, &data).unwrap(); - let mut ingest = IngestRequest::default(); - ingest.set_context(ctx.clone()); - ingest.set_sst(meta.clone()); meta.set_region_id(ctx.get_region_id()); meta.set_region_epoch(ctx.get_region_epoch().clone()); + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); - ingest.set_sst(meta.clone()); + must_ingest_sst(&import, ctx.clone(), meta.clone()); - let resp = import.ingest(&ingest).unwrap(); - assert!(!resp.has_error(), "{:?}", resp.get_error()); let (tx, rx) = channel::<()>(); let tx = Arc::new(Mutex::new(tx)); fail::cfg_callback("on_cleanup_import_sst_schedule", move || { @@ -332,16 +300,13 @@ fn test_delete_sst_after_applied_sst() { let sst_range = (0, 100); let (mut meta, data) = gen_sst_file(sst_path, sst_range); // No region id and epoch. + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); - let mut ingest = IngestRequest::default(); - ingest.set_context(ctx.clone()); - ingest.set_sst(meta.clone()); meta.set_region_id(ctx.get_region_id()); meta.set_region_epoch(ctx.get_region_epoch().clone()); + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); - ingest.set_sst(meta.clone()); - let resp = import.ingest(&ingest).unwrap(); - assert!(!resp.has_error(), "{:?}", resp.get_error()); + must_ingest_sst(&import, ctx.clone(), meta); // restart node cluster.stop_node(1); @@ -390,17 +355,13 @@ fn test_split_buckets_after_ingest_sst_v2() { let sst_path = temp_dir.path().join("test.sst"); let sst_range = (0, 255); let (mut meta, data) = gen_sst_file(sst_path, sst_range); + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); - let mut ingest = IngestRequest::default(); - ingest.set_context(ctx.clone()); - ingest.set_sst(meta.clone()); meta.set_region_id(ctx.get_region_id()); meta.set_region_epoch(ctx.get_region_epoch().clone()); + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); - ingest.set_sst(meta.clone()); - - let resp = import.ingest(&ingest).unwrap(); - assert!(!resp.has_error(), "{:?}", resp.get_error()); + must_ingest_sst(&import, ctx.clone(), meta); let (tx, rx) = channel::<()>(); let tx = Arc::new(Mutex::new(tx)); @@ -474,16 +435,13 @@ fn test_flushed_applied_index_after_ingset() { let sst_range = (i * 20, (i + 1) * 20); let (mut meta, data) = gen_sst_file(sst_path.clone(), sst_range); // No region id and epoch. + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); - let mut ingest = IngestRequest::default(); - ingest.set_context(ctx.clone()); - ingest.set_sst(meta.clone()); meta.set_region_id(ctx.get_region_id()); meta.set_region_epoch(ctx.get_region_epoch().clone()); + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); - ingest.set_sst(meta.clone()); - let resp = import.ingest(&ingest).unwrap(); - assert!(!resp.has_error(), "{:?}", resp.get_error()); + must_ingest_sst(&import, ctx.clone(), meta); } // only 1 sst left because there is no more event to trigger a raft ready flush. @@ -494,16 +452,13 @@ fn test_flushed_applied_index_after_ingset() { let sst_range = (i * 20, (i + 1) * 20); let (mut meta, data) = gen_sst_file(sst_path.clone(), sst_range); // No region id and epoch. + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); - let mut ingest = IngestRequest::default(); - ingest.set_context(ctx.clone()); - ingest.set_sst(meta.clone()); meta.set_region_id(ctx.get_region_id()); meta.set_region_epoch(ctx.get_region_epoch().clone()); + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); - ingest.set_sst(meta.clone()); - let resp = import.ingest(&ingest).unwrap(); - assert!(!resp.has_error(), "{:?}", resp.get_error()); + must_ingest_sst(&import, ctx.clone(), meta); } // ingest more sst files, unflushed index still be 1. diff --git a/tests/integrations/config/dynamic/raftstore.rs b/tests/integrations/config/dynamic/raftstore.rs index eb5d2dda710..af5d95d3199 100644 --- a/tests/integrations/config/dynamic/raftstore.rs +++ b/tests/integrations/config/dynamic/raftstore.rs @@ -22,6 +22,7 @@ use raftstore::{ }; use resource_metering::CollectorRegHandle; use service::service_manager::GrpcServiceManager; +use sst_importer::{IngestMediator, IngestObserver, Mediator}; use tempfile::TempDir; use test_pd_client::TestPdClient; use tikv::{ @@ -72,13 +73,27 @@ fn start_raftstore( let engines = create_tmp_engine(dir); let host = CoprocessorHost::default(); let importer = { + let mut ingest_mediator = IngestMediator::default(); + let ingest_observer = Arc::new(IngestObserver::default()); + ingest_mediator.register(ingest_observer.clone()); let p = dir .path() .join("store-config-importer") .as_path() .display() .to_string(); - Arc::new(SstImporter::new(&cfg.import, p, None, cfg.storage.api_version(), false).unwrap()) + Arc::new( + SstImporter::new( + &cfg.import, + p, + None, + cfg.storage.api_version(), + false, + Arc::new(ingest_mediator), + ingest_observer, + ) + .unwrap(), + ) }; let snap_mgr = { let p = dir diff --git a/tests/integrations/import/mod.rs b/tests/integrations/import/mod.rs index 4de0fa26472..29aca5e5192 100644 --- a/tests/integrations/import/mod.rs +++ b/tests/integrations/import/mod.rs @@ -1,5 +1,6 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. mod test_apply_log; +mod test_lease; mod test_sst_service; mod util; diff --git a/tests/integrations/import/test_apply_log.rs b/tests/integrations/import/test_apply_log.rs index 9dda44888bb..398f33f6764 100644 --- a/tests/integrations/import/test_apply_log.rs +++ b/tests/integrations/import/test_apply_log.rs @@ -1,13 +1,16 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + use engine_traits::CF_DEFAULT; use external_storage::LocalStorage; use kvproto::import_sstpb::ApplyRequest; use tempfile::TempDir; +use test_sst_importer::*; -use crate::import::util; +use super::util::*; #[test] fn test_basic_apply() { - let (_cluster, ctx, tikv, import) = util::new_cluster_and_tikv_import_client(); + let (_cluster, ctx, tikv, import) = new_cluster_and_tikv_import_client(); let tmp = TempDir::new().unwrap(); let storage = LocalStorage::new(tmp.path()).unwrap(); let default = [ @@ -17,20 +20,20 @@ fn test_basic_apply() { (b"k4", b"v4", 4), ]; let default_rewritten = [(b"r1", b"v1", 1), (b"r2", b"v2", 2), (b"r3", b"v3", 3)]; - let mut sst_meta = util::make_plain_file(&storage, "file1.log", default.into_iter()); - util::register_range_for(&mut sst_meta, b"k1", b"k3a"); + let mut sst_meta = make_plain_file(&storage, "file1.log", default.into_iter()); + register_range_for(&mut sst_meta, b"k1", b"k3a"); let mut req = ApplyRequest::new(); req.set_context(ctx.clone()); - req.set_rewrite_rules(vec![util::rewrite_for(&mut sst_meta, b"k", b"r")].into()); + req.set_rewrite_rules(vec![rewrite_for(&mut sst_meta, b"k", b"r")].into()); req.set_metas(vec![sst_meta].into()); - req.set_storage_backend(util::local_storage(&tmp)); + req.set_storage_backend(local_storage(&tmp)); import.apply(&req).unwrap(); - util::check_applied_kvs_cf(&tikv, &ctx, CF_DEFAULT, default_rewritten.into_iter()); + check_applied_kvs_cf(&tikv, &ctx, CF_DEFAULT, default_rewritten.into_iter()); } #[test] fn test_apply_twice() { - let (_cluster, ctx, tikv, import) = util::new_cluster_and_tikv_import_client(); + let (_cluster, ctx, tikv, import) = new_cluster_and_tikv_import_client(); let tmp = TempDir::new().unwrap(); let storage = LocalStorage::new(tmp.path()).unwrap(); let default = [( @@ -49,21 +52,21 @@ fn test_apply_twice() { 1, )]; - let mut sst_meta = util::make_plain_file(&storage, "file2.log", default.into_iter()); - util::register_range_for(&mut sst_meta, b"k1", b"k1a"); + let mut sst_meta = make_plain_file(&storage, "file2.log", default.into_iter()); + register_range_for(&mut sst_meta, b"k1", b"k1a"); let mut req = ApplyRequest::new(); req.set_context(ctx.clone()); - req.set_rewrite_rules(vec![util::rewrite_for(&mut sst_meta, b"k", b"r")].into()); + req.set_rewrite_rules(vec![rewrite_for(&mut sst_meta, b"k", b"r")].into()); req.set_metas(vec![sst_meta.clone()].into()); - req.set_storage_backend(util::local_storage(&tmp)); + req.set_storage_backend(local_storage(&tmp)); import.apply(&req).unwrap(); - util::check_applied_kvs_cf(&tikv, &ctx, CF_DEFAULT, default_fst.into_iter()); + check_applied_kvs_cf(&tikv, &ctx, CF_DEFAULT, default_fst.into_iter()); - util::register_range_for(&mut sst_meta, b"k1", b"k1a"); - req.set_rewrite_rules(vec![util::rewrite_for(&mut sst_meta, b"k", b"z")].into()); + register_range_for(&mut sst_meta, b"k1", b"k1a"); + req.set_rewrite_rules(vec![rewrite_for(&mut sst_meta, b"k", b"z")].into()); req.set_metas(vec![sst_meta].into()); import.apply(&req).unwrap(); - util::check_applied_kvs_cf( + check_applied_kvs_cf( &tikv, &ctx, CF_DEFAULT, diff --git a/tests/integrations/import/test_lease.rs b/tests/integrations/import/test_lease.rs new file mode 100644 index 00000000000..b8a3b88bfd8 --- /dev/null +++ b/tests/integrations/import/test_lease.rs @@ -0,0 +1,250 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::time::Duration; + +use futures::{executor::block_on, SinkExt}; +use grpcio::WriteFlags; +use kvproto::import_sstpb::*; +use test_raftstore::sleep_ms; +use test_sst_importer::*; +use uuid::Uuid; + +use super::util::*; + +fn new_write_params() -> (Vec>, Vec>, (u8, u8)) { + let mut keys = vec![]; + let mut values = vec![]; + let sst_range = (0u8, 10u8); + for i in sst_range.0..sst_range.1 { + keys.push(vec![i]); + values.push(vec![i]); + } + (keys, values, sst_range) +} + +fn new_acquire(region_id: u64) -> AcquireLease { + let meta = new_sst_meta(0, 0); + let mut acquire = AcquireLease::default(); + acquire.mut_lease().mut_region().set_id(region_id); + acquire.mut_lease().set_uuid(meta.get_uuid().into()); + acquire.set_ttl(1); + acquire +} + +fn new_release(mut acquire: AcquireLease) -> ReleaseLease { + let mut release = ReleaseLease::default(); + release.set_lease(acquire.take_lease()); + release +} + +fn new_write_request(v: u8, commit_ts: u64) -> WriteRequest { + let mut pairs = vec![]; + let mut pair = Pair::default(); + pair.set_key(vec![v]); + pair.set_value(vec![v]); + pairs.push(pair); + let mut batch = WriteBatch::default(); + batch.set_commit_ts(commit_ts); + batch.set_pairs(pairs.into()); + let mut req = WriteRequest::default(); + req.set_batch(batch); + req +} + +#[test] +fn test_lease_expire() { + let (_cluster, ctx, tikv, import) = new_cluster_and_tikv_import_client(); + + let mut meta = new_sst_meta(0, 0); + meta.set_region_id(ctx.get_region_id()); + meta.set_region_epoch(ctx.get_region_epoch().clone()); + + let (keys, values, sst_range) = new_write_params(); + must_acquire_sst_lease(&import, &meta, Duration::MAX); + let resp = send_write_sst(&import, &meta, keys.clone(), values.clone(), 1).unwrap(); + assert_eq!(resp.metas.len(), 1); + // A successful ingest expires its lease. + must_ingest_sst(&import, ctx.clone(), resp.metas[0].clone()); + check_ingested_txn_kvs(&tikv, &ctx, sst_range, 2); + + // Must fail with lease expired. + let resp = send_write_sst(&import, &meta, keys.clone(), values.clone(), 1).unwrap(); + assert!( + resp.get_error().get_message().contains("lease has expired"), + "{:?}", + resp + ); + + // Release release expires lease immediately. + meta.set_uuid(Uuid::new_v4().as_bytes().into()); + must_acquire_sst_lease(&import, &meta, Duration::MAX); + let resp = send_write_sst(&import, &meta, keys.clone(), values.clone(), 1).unwrap(); + assert!(!resp.has_error(), "{:?}", resp); + must_release_sst_lease(&import, &meta); + let mut ingest = IngestRequest::default(); + ingest.set_context(ctx.clone()); + ingest.set_sst(resp.metas[0].clone()); + let resp = import.ingest(&ingest).unwrap(); + assert!( + resp.get_error().get_message().contains("lease has expired"), + "{:?}", + resp + ); +} + +#[test] +fn test_lease_renew() { + let (_cluster, ctx, _tikv, import) = new_cluster_and_tikv_import_client(); + + let mut meta = new_sst_meta(0, 0); + meta.set_region_id(ctx.get_region_id()); + meta.set_region_epoch(ctx.get_region_epoch().clone()); + + let (keys, values, _) = new_write_params(); + must_acquire_sst_lease(&import, &meta, Duration::from_millis(10)); + sleep_ms(200); + + // Must fail with lease expired. + let resp = send_write_sst(&import, &meta, keys.clone(), values.clone(), 1).unwrap(); + assert!( + resp.get_error().get_message().contains("lease has expired"), + "{:?}", + resp + ); + + must_acquire_sst_lease(&import, &meta, Duration::MAX); + let resp = send_write_sst(&import, &meta, keys, values, 1).unwrap(); + assert!(!resp.has_error(), "{:?}", resp); + assert_eq!(resp.metas.len(), 1, "{:?}", resp); +} + +#[test] +fn test_lease_concurrent_requests() { + let (_cluster, ctx, _tikv, import) = new_cluster_and_tikv_import_client(); + + // Acquire two leases for the same region. + let mut meta1 = new_sst_meta(0, 0); + meta1.set_region_id(ctx.get_region_id()); + meta1.set_region_epoch(ctx.get_region_epoch().clone()); + must_acquire_sst_lease(&import, &meta1, Duration::MAX); + + let mut meta2 = new_sst_meta(0, 0); + meta2.set_region_id(ctx.get_region_id()); + meta2.set_region_epoch(ctx.get_region_epoch().clone()); + must_acquire_sst_lease(&import, &meta2, Duration::MAX); + + // Release meta2 lease in advance. + sleep_ms(100); + must_release_sst_lease(&import, &meta2); + + // Write meta2 must fail with lease expire. + let (keys, values, _) = new_write_params(); + let resp = send_write_sst(&import, &meta2, keys.clone(), values.clone(), 1).unwrap(); + assert_eq!(resp.metas.len(), 0); + assert!( + resp.get_error().get_message().contains("lease has expired"), + "{:?}", + resp + ); + + // Write meta1 must success. + let resp = send_write_sst(&import, &meta1, keys, values, 1).unwrap(); + assert_eq!(resp.metas.len(), 1); +} + +// A long-running request sent within a lease must can be served even if +// the lease is expired. +#[test] +fn test_lease_expire_before_request_finish() { + let (_cluster, ctx, _tikv, import) = new_cluster_and_tikv_import_client(); + + let mut meta1 = new_sst_meta(0, 0); + meta1.set_region_id(ctx.get_region_id()); + meta1.set_region_epoch(ctx.get_region_epoch().clone()); + must_acquire_sst_lease(&import, &meta1, Duration::MAX); + + let commit_ts = 1; + let (mut tx, rx) = import.write().unwrap(); + block_on(async { + let mut r1 = WriteRequest::default(); + r1.set_meta(meta1.clone()); + tx.send((r1, WriteFlags::default())).await.unwrap(); + + let req = new_write_request(1, commit_ts); + tx.send((req, WriteFlags::default())).await.unwrap(); + }); + + // Expire the lease. + must_release_sst_lease(&import, &meta1); + // New write must fail. + let (keys, values, _) = new_write_params(); + let resp = send_write_sst(&import, &meta1, keys, values, 1).unwrap(); + assert!( + resp.get_error().get_message().contains("lease has expired"), + "{:?}", + resp + ); + + // The existing write can continue. + block_on(async { + let req = new_write_request(2, commit_ts); + tx.send((req, WriteFlags::default())).await.unwrap(); + tx.close().await.unwrap(); + + let resp = rx.await.unwrap(); + assert_eq!(resp.metas.len(), 1); + }); +} + +#[test] +fn test_lease_invalid_uuid() { + let (_cluster, ctx, _tikv, import) = new_cluster_and_tikv_import_client(); + + let mut meta = new_sst_meta(0, 0); + meta.set_region_id(ctx.get_region_id()); + meta.set_region_epoch(ctx.get_region_epoch().clone()); + + // Set an invalid uuid, an normal uuid should be 16 bytes. + meta.set_uuid(vec![7, 7, 7]); + let resp = send_acquire_sst_lease(&import, &meta, Duration::MAX).unwrap(); + assert_eq!(resp.get_acquired().len(), 0, "{:?}", resp); + + let (keys, values, _) = new_write_params(); + let resp = send_write_sst(&import, &meta, keys.clone(), values.clone(), 1).unwrap(); + assert!( + resp.get_error().get_message().contains("invalid lease"), + "{:?}", + resp + ); +} + +#[test] +fn test_lease_batch_acquire_release() { + let (_cluster, _ctx, _tikv, import) = new_cluster_and_tikv_import_client(); + + let acquire1 = new_acquire(1); + let acquire2 = new_acquire(2); + let mut acquire3 = new_acquire(3); + // Set an invalid uuid, an normal uuid should be 16 bytes. + acquire3.mut_lease().set_uuid(vec![7, 7, 7]); + + // Acquire three leases in the same request. + let mut req = LeaseRequest::default(); + req.mut_acquire().push(acquire1.clone()); + req.mut_acquire().push(acquire2.clone()); + req.mut_acquire().push(acquire3.clone()); + let resp = import.lease(&req).unwrap(); + assert_eq!(resp.get_acquired().len(), 2, "{:?}", resp); + assert_eq!(&resp.get_acquired()[0], acquire1.get_lease(), "{:?}", resp); + assert_eq!(&resp.get_acquired()[1], acquire2.get_lease(), "{:?}", resp); + + // Release three leases in the same request. + let mut req = LeaseRequest::default(); + req.mut_release().push(new_release(acquire1.clone())); + req.mut_release().push(new_release(acquire2.clone())); + req.mut_release().push(new_release(acquire3)); + let resp = import.lease(&req).unwrap(); + assert_eq!(resp.get_released().len(), 2, "{:?}", resp); + assert_eq!(&resp.get_released()[0], acquire1.get_lease(), "{:?}", resp); + assert_eq!(&resp.get_released()[1], acquire2.get_lease(), "{:?}", resp); +} diff --git a/tests/integrations/import/test_sst_service.rs b/tests/integrations/import/test_sst_service.rs index f1b2e23014c..e7f7aa1f6ec 100644 --- a/tests/integrations/import/test_sst_service.rs +++ b/tests/integrations/import/test_sst_service.rs @@ -34,11 +34,22 @@ fn test_upload_sst() { // Mismatch crc32 let meta = new_sst_meta(0, length); + must_acquire_sst_lease(&import, &meta, Duration::MAX); assert_to_string_contains!(send_upload_sst(&import, &meta, &data).unwrap_err(), "crc32"); let mut meta = new_sst_meta(crc32, length); meta.set_region_id(ctx.get_region_id()); meta.set_region_epoch(ctx.get_region_epoch().clone()); + + // Without lease, upload must fail. + let resp = send_upload_sst(&import, &meta, &data).unwrap(); + assert!( + resp.get_error().get_message().contains("lease has expired"), + "{:?}", + resp + ); + + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); // Can't upload the same uuid file again. @@ -60,14 +71,20 @@ fn run_test_write_sst(ctx: Context, tikv: TikvClient, import: ImportSstClient) { keys.push(vec![i]); values.push(vec![i]); } + + // Without lease, write must fail. + let resp = send_write_sst(&import, &meta, keys.clone(), values.clone(), 1).unwrap(); + assert!( + resp.get_error().get_message().contains("lease has expired"), + "{:?}", + resp + ); + + must_acquire_sst_lease(&import, &meta, Duration::MAX); let resp = send_write_sst(&import, &meta, keys, values, 1).unwrap(); for m in resp.metas.into_iter() { - let mut ingest = IngestRequest::default(); - ingest.set_context(ctx.clone()); - ingest.set_sst(m.clone()); - let resp = import.ingest(&ingest).unwrap(); - assert!(!resp.has_error()); + must_ingest_sst(&import, ctx.clone(), m.clone()); } check_ingested_txn_kvs(&tikv, &ctx, sst_range, 2); } @@ -106,27 +123,22 @@ fn test_ingest_sst() { let (mut meta, data) = gen_sst_file(sst_path, sst_range); // No region id and epoch. + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); - let mut ingest = IngestRequest::default(); - ingest.set_context(ctx.clone()); - ingest.set_sst(meta.clone()); - let resp = import.ingest(&ingest).unwrap(); - assert!(resp.has_error()); + must_ingest_sst_error(&import, ctx.clone(), meta.clone()); // Set region id and epoch. meta.set_region_id(ctx.get_region_id()); meta.set_region_epoch(ctx.get_region_epoch().clone()); + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); // Can't upload the same file again. assert_to_string_contains!( send_upload_sst(&import, &meta, &data).unwrap_err(), "FileExists" ); - - ingest.set_sst(meta); - let resp = import.ingest(&ingest).unwrap(); - assert!(!resp.has_error(), "{:?}", resp.get_error()); + must_ingest_sst(&import, ctx.clone(), meta.clone()); for _ in 0..10 { let region_keys = cluster @@ -185,6 +197,7 @@ fn test_switch_mode_v2() { // Set region id and epoch. meta.set_region_id(ctx.get_region_id()); meta.set_region_epoch(ctx.get_region_epoch().clone()); + must_acquire_sst_lease(import, &meta, Duration::MAX); send_upload_sst(import, &meta, &data).unwrap(); let mut ingest = IngestRequest::default(); ingest.set_context(ctx.clone()); @@ -241,13 +254,10 @@ fn test_upload_and_ingest_with_tde() { meta.set_region_id(ctx.get_region_id()); meta.set_region_epoch(ctx.get_region_epoch().clone()); + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); - let mut ingest = IngestRequest::default(); - ingest.set_context(ctx.clone()); - ingest.set_sst(meta); - let resp = import.ingest(&ingest).unwrap(); - assert!(!resp.has_error(), "{:?}", resp.get_error()); + must_ingest_sst(&import, ctx.clone(), meta); check_ingested_kvs(&tikv, &ctx, sst_range); } @@ -268,14 +278,11 @@ fn test_ingest_sst_without_crc32() { meta.set_region_epoch(ctx.get_region_epoch().clone()); // Set crc32 == 0 and length != 0 still ingest success + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); meta.set_crc32(0); - let mut ingest = IngestRequest::default(); - ingest.set_context(ctx.clone()); - ingest.set_sst(meta); - let resp = import.ingest(&ingest).unwrap(); - assert!(!resp.has_error(), "{:?}", resp.get_error()); + must_ingest_sst(&import, ctx.clone(), meta); // Check ingested kvs check_ingested_kvs(&tikv, &ctx, sst_range); @@ -301,6 +308,18 @@ fn test_download_sst() { download.set_storage_backend(external_storage::make_local_backend(temp_dir.path())); download.set_name("missing.sst".to_owned()); + // Without lease, download must fail. + let result = import.download(&download).unwrap(); + assert!( + result + .get_error() + .get_message() + .contains("lease has expired"), + "{:?}", + result + ); + + must_acquire_sst_lease(&import, &meta, Duration::MAX); let result = import.download(&download).unwrap(); assert!( result.has_error(), @@ -328,11 +347,7 @@ fn test_download_sst() { // Do an ingest and verify the result is correct. - let mut ingest = IngestRequest::default(); - ingest.set_context(ctx.clone()); - ingest.set_sst(meta); - let resp = import.ingest(&ingest).unwrap(); - assert!(!resp.has_error()); + must_ingest_sst(&import, ctx.clone(), meta); check_ingested_kvs(&tikv, &ctx, sst_range); } @@ -349,6 +364,7 @@ fn test_cleanup_sst() { meta.set_region_id(ctx.get_region_id()); meta.set_region_epoch(ctx.get_region_epoch().clone()); + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); // Can not upload the same file when it exists. @@ -394,6 +410,7 @@ fn test_cleanup_sst_v2() { meta.set_region_id(ctx.get_region_id()); meta.set_region_epoch(ctx.get_region_epoch().clone()); + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); // Can not upload the same file when it exists. @@ -413,6 +430,7 @@ fn test_cleanup_sst_v2() { let sst_range = (0, 100); let (mut meta, data) = gen_sst_file(sst_path, sst_range); meta.set_region_id(9999); + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); // This should be cleanuped check_sst_deleted(&import, &meta, &data); @@ -426,6 +444,7 @@ fn test_cleanup_sst_v2() { let sst_range = (60, 80); let (mut meta, data) = gen_sst_file(sst_path, sst_range); meta.set_region_id(9999); + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_upload_sst(&import, &meta, &data).unwrap(); std::thread::sleep(Duration::from_millis(500)); assert_to_string_contains!( @@ -454,6 +473,7 @@ fn test_ingest_sst_region_not_found() { meta.set_region_id(ctx_not_found.get_region_id()); meta.set_region_epoch(ctx_not_found.get_region_epoch().clone()); + must_acquire_sst_lease(&import, &meta, Duration::MAX); let mut ingest = IngestRequest::default(); ingest.set_context(ctx_not_found); ingest.set_sst(meta); @@ -483,6 +503,8 @@ fn test_ingest_multiple_sst() { meta2.set_region_epoch(ctx.get_region_epoch().clone()); meta2.set_cf_name("write".to_owned()); + must_acquire_sst_lease(&import, &meta1, Duration::MAX); + must_acquire_sst_lease(&import, &meta2, Duration::MAX); send_upload_sst(&import, &meta1, &data1).unwrap(); send_upload_sst(&import, &meta2, &data2).unwrap(); @@ -518,13 +540,10 @@ fn test_duplicate_and_close() { keys.push(key.as_bytes().to_vec()); values.push(key.as_bytes().to_vec()); } + must_acquire_sst_lease(&import, &meta, Duration::MAX); let resp = send_write_sst(&import, &meta, keys, values, commit_ts).unwrap(); for m in resp.metas.into_iter() { - let mut ingest = IngestRequest::default(); - ingest.set_context(ctx.clone()); - ingest.set_sst(m.clone()); - let resp = import.ingest(&ingest).unwrap(); - assert!(!resp.has_error()); + must_ingest_sst(&import, ctx.clone(), m.clone()); } } @@ -571,6 +590,7 @@ fn test_suspend_import() { keys.push(vec![i]); values.push(vec![i]); } + must_acquire_sst_lease(&import, &meta, Duration::MAX); send_write_sst(&import, &meta, keys, values, 1) }; let ingest = |sst_meta: &SstMeta| { diff --git a/tests/integrations/import/util.rs b/tests/integrations/import/util.rs index 92804860dd9..d6a8f071163 100644 --- a/tests/integrations/import/util.rs +++ b/tests/integrations/import/util.rs @@ -1,32 +1,15 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - io::{Cursor, Write}, - sync::Arc, - thread, - time::Duration, -}; +use std::{sync::Arc, time::Duration}; -use collections::HashMap; use engine_rocks::RocksEngine; -use engine_traits::CF_DEFAULT; -use external_storage::{ExternalStorage, UnpinReader}; -use futures::{executor::block_on, io::Cursor as AsyncCursor, stream, SinkExt}; -use grpcio::{ChannelBuilder, Environment, Result, WriteFlags}; -use kvproto::{ - brpb::{Local, StorageBackend}, - import_sstpb::{KvMeta, *}, - kvrpcpb::*, - tikvpb::*, -}; +use grpcio::{ChannelBuilder, Environment}; +use kvproto::{import_sstpb::*, kvrpcpb::*, tikvpb::*}; use security::SecurityConfig; -use tempfile::TempDir; use test_raftstore::*; use test_raftstore_v2::{Cluster as ClusterV2, ServerCluster as ServerClusterV2}; use tikv::config::TikvConfig; -use tikv_util::{codec::stream_event::EventEncoder, stream::block_on_external_io, HandyRwLock}; -use txn_types::Key; -use uuid::Uuid; +use tikv_util::HandyRwLock; const CLEANUP_SST_MILLIS: u64 = 10; @@ -184,214 +167,3 @@ pub fn new_cluster_and_tikv_import_client_tde() -> ( let (cluster, ctx, tikv, import) = open_cluster_and_tikv_import_client(Some(config)); (tmp_dir, cluster, ctx, tikv, import) } - -pub fn new_sst_meta(crc32: u32, length: u64) -> SstMeta { - let mut m = SstMeta::default(); - m.set_uuid(Uuid::new_v4().as_bytes().to_vec()); - m.set_crc32(crc32); - m.set_length(length); - m -} - -pub fn send_upload_sst( - client: &ImportSstClient, - meta: &SstMeta, - data: &[u8], -) -> Result { - let mut r1 = UploadRequest::default(); - r1.set_meta(meta.clone()); - let mut r2 = UploadRequest::default(); - r2.set_data(data.to_vec()); - let reqs: Vec<_> = vec![r1, r2] - .into_iter() - .map(|r| Result::Ok((r, WriteFlags::default()))) - .collect(); - let (mut tx, rx) = client.upload().unwrap(); - let mut stream = stream::iter(reqs); - block_on(async move { - tx.send_all(&mut stream).await?; - tx.close().await?; - rx.await - }) -} - -pub fn send_write_sst( - client: &ImportSstClient, - meta: &SstMeta, - keys: Vec>, - values: Vec>, - commit_ts: u64, -) -> Result { - let mut r1 = WriteRequest::default(); - r1.set_meta(meta.clone()); - let mut r2 = WriteRequest::default(); - - let mut batch = WriteBatch::default(); - let mut pairs = vec![]; - - for (i, key) in keys.iter().enumerate() { - let mut pair = Pair::default(); - pair.set_key(key.to_vec()); - pair.set_value(values[i].to_vec()); - pairs.push(pair); - } - batch.set_commit_ts(commit_ts); - batch.set_pairs(pairs.into()); - r2.set_batch(batch); - - let reqs: Vec<_> = vec![r1, r2] - .into_iter() - .map(|r| Result::Ok((r, WriteFlags::default()))) - .collect(); - - let (mut tx, rx) = client.write().unwrap(); - let mut stream = stream::iter(reqs); - block_on(async move { - tx.send_all(&mut stream).await?; - tx.close().await?; - rx.await - }) -} - -pub fn check_ingested_kvs(tikv: &TikvClient, ctx: &Context, sst_range: (u8, u8)) { - check_ingested_kvs_cf(tikv, ctx, "", sst_range); -} - -pub fn check_ingested_kvs_cf(tikv: &TikvClient, ctx: &Context, cf: &str, sst_range: (u8, u8)) { - for i in sst_range.0..sst_range.1 { - let mut m = RawGetRequest::default(); - m.set_context(ctx.clone()); - m.set_key(vec![i]); - m.set_cf(cf.to_owned()); - let resp = tikv.raw_get(&m).unwrap(); - assert!(resp.get_error().is_empty()); - assert!(!resp.has_region_error()); - assert_eq!(resp.get_value(), &[i]); - } -} - -#[track_caller] -pub fn check_applied_kvs_cf, V: AsRef<[u8]> + std::fmt::Debug>( - tikv: &TikvClient, - ctx: &Context, - cf: &str, - entries: impl Iterator, -) { - let mut get = RawBatchGetRequest::default(); - get.set_cf(cf.to_owned()); - get.set_context(ctx.clone()); - let mut keymap = HashMap::default(); - for (key, value, ts) in entries { - let the_key = Key::from_raw(key.as_ref()) - .append_ts(ts.into()) - .into_encoded(); - keymap.insert(the_key.clone(), value); - get.mut_keys().push(the_key); - } - for pair in tikv.raw_batch_get(&get).unwrap().get_pairs() { - let entry = keymap.remove(pair.get_key()).expect("unexpected key"); - assert_eq!( - entry.as_ref(), - pair.get_value(), - "key is {:?}", - pair.get_key() - ); - } - assert!( - keymap.is_empty(), - "not all keys consumed, remained {:?}", - keymap - ); -} - -pub fn check_ingested_txn_kvs( - tikv: &TikvClient, - ctx: &Context, - sst_range: (u8, u8), - start_ts: u64, -) { - for i in sst_range.0..sst_range.1 { - let mut m = GetRequest::default(); - m.set_context(ctx.clone()); - m.set_key(vec![i]); - m.set_version(start_ts); - let resp = tikv.kv_get(&m).unwrap(); - assert!(!resp.has_region_error()); - assert_eq!(resp.get_value(), &[i]); - } -} - -pub fn check_sst_deleted(client: &ImportSstClient, meta: &SstMeta, data: &[u8]) { - for _ in 0..10 { - if send_upload_sst(client, meta, data).is_ok() { - // If we can upload the file, it means the previous file has been deleted. - return; - } - thread::sleep(Duration::from_millis(CLEANUP_SST_MILLIS)); - } - send_upload_sst(client, meta, data).unwrap(); -} - -pub fn make_plain_file(storage: &dyn ExternalStorage, name: &str, kvs: I) -> KvMeta -where - I: Iterator, - K: AsRef<[u8]>, - V: AsRef<[u8]>, -{ - let mut buf = vec![]; - let mut file = Cursor::new(&mut buf); - let mut start_ts: Option = None; - for (key, value, ts) in kvs { - let the_key = Key::from_raw(key.as_ref()) - .append_ts(ts.into()) - .into_encoded(); - start_ts = Some(start_ts.map_or(ts, |ts0| ts0.min(ts))); - for segment in EventEncoder::encode_event(&the_key, value.as_ref()) { - file.write_all(segment.as_ref()).unwrap(); - } - } - file.flush().unwrap(); - let len = buf.len() as u64; - block_on_external_io(storage.write(name, UnpinReader(Box::new(AsyncCursor::new(buf))), len)) - .unwrap(); - let mut meta = KvMeta::new(); - meta.set_start_ts(start_ts.unwrap_or_default()); - meta.set_length(len); - meta.set_restore_ts(u64::MAX); - meta.set_compression_type(kvproto::brpb::CompressionType::Unknown); - meta.set_name(name.to_owned()); - meta.set_cf(CF_DEFAULT.to_owned()); - meta -} - -pub fn rewrite_for(meta: &mut KvMeta, old_prefix: &[u8], new_prefix: &[u8]) -> RewriteRule { - assert_eq!(old_prefix.len(), new_prefix.len()); - fn rewrite(key: &mut Vec, old_prefix: &[u8], new_prefix: &[u8]) { - assert!(key.starts_with(old_prefix)); - let len = old_prefix.len(); - key.splice(..len, new_prefix.iter().cloned()); - } - rewrite(meta.mut_start_key(), old_prefix, new_prefix); - rewrite(meta.mut_end_key(), old_prefix, new_prefix); - let mut rule = RewriteRule::default(); - rule.set_old_key_prefix(old_prefix.to_vec()); - rule.set_new_key_prefix(new_prefix.to_vec()); - rule -} - -pub fn register_range_for(meta: &mut KvMeta, start: &[u8], end: &[u8]) { - let start = Key::from_raw(start); - let end = Key::from_raw(end); - meta.set_start_key(start.into_encoded()); - meta.set_end_key(end.into_encoded()); -} - -pub fn local_storage(tmp: &TempDir) -> StorageBackend { - let mut backend = StorageBackend::default(); - backend.set_local({ - let mut local = Local::default(); - local.set_path(tmp.path().to_str().unwrap().to_owned()); - local - }); - backend -} diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index e3a1f50100d..3441e0e684a 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -20,6 +20,7 @@ use raftstore::{ use raftstore_v2::router::PeerMsg; use resource_metering::CollectorRegHandle; use service::service_manager::GrpcServiceManager; +use sst_importer::{IngestMediator, IngestObserver, Mediator}; use tempfile::Builder; use test_pd_client::{bootstrap_with_first_region, TestPdClient}; use test_raftstore::*; @@ -102,9 +103,21 @@ fn test_node_bootstrap_with_prepared_data() { let coprocessor_host = CoprocessorHost::new(node.get_router(), cfg.coprocessor); let importer = { + let mut ingest_mediator = IngestMediator::default(); + let ingest_observer = Arc::new(IngestObserver::default()); + ingest_mediator.register(ingest_observer.clone()); let dir = tmp_path.path().join("import-sst"); Arc::new( - SstImporter::new(&cfg.import, dir, None, cfg.storage.api_version(), false).unwrap(), + SstImporter::new( + &cfg.import, + dir, + None, + cfg.storage.api_version(), + false, + Arc::new(ingest_mediator), + ingest_observer, + ) + .unwrap(), ) }; let (split_check_scheduler, _) = dummy_scheduler(); diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index fadb3de4a8d..50f293dd30d 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -34,6 +34,7 @@ use raftstore::{ }; use resource_metering::CollectorRegHandle; use service::service_manager::GrpcServiceManager; +use sst_importer::{IngestMediator, IngestObserver, Mediator}; use tempfile::Builder; use test_raftstore::*; use test_raftstore_macro::test_case; @@ -1390,9 +1391,21 @@ fn test_double_run_node() { let snap_mgr = SnapManager::new(tmp.path().to_str().unwrap()); let coprocessor_host = CoprocessorHost::new(router, raftstore::coprocessor::Config::default()); let importer = { + let mut ingest_mediator = IngestMediator::default(); + let ingest_observer = Arc::new(IngestObserver::default()); + ingest_mediator.register(ingest_observer.clone()); let dir = Path::new(MiscExt::path(&engines.kv)).join("import-sst"); Arc::new( - SstImporter::new(&ImportConfig::default(), dir, None, ApiVersion::V1, false).unwrap(), + SstImporter::new( + &ImportConfig::default(), + dir, + None, + ApiVersion::V1, + false, + Arc::new(ingest_mediator), + ingest_observer, + ) + .unwrap(), ) }; let (split_check_scheduler, _) = dummy_scheduler();