diff --git a/Cargo.lock b/Cargo.lock index 5ff69c7..3e7cf1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -216,9 +216,9 @@ dependencies = [ [[package]] name = "async-graphql" -version = "7.0.9" +version = "7.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d37c3e9ba322eb00e9e5e997d58f08e8b6de037325b9367ac59bca8e3cd46af" +checksum = "a19415d9541f1758f39bdf0c732848beb7e2e39df9b32f90c6635882c3f9173a" dependencies = [ "async-graphql-derive", "async-graphql-parser", @@ -247,9 +247,9 @@ dependencies = [ [[package]] name = "async-graphql-derive" -version = "7.0.9" +version = "7.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1141703c11c6ad4fa9b3b0e1e476dea01dbd18a44db00f949b804afaab2f344" +checksum = "73a85254454f63ae1e5a475afff931465f11bf76d19fb5bb1b1d0d6a2f2b8db0" dependencies = [ "Inflector", "async-graphql-parser", @@ -264,9 +264,9 @@ dependencies = [ [[package]] name = "async-graphql-parser" -version = "7.0.9" +version = "7.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f66edcce4c38c18f7eb181fdf561c3d3aa2d644ce7358fc7a928c00a4ffef17" +checksum = "2e94b202e404d18429c8482d61f64cb0a8639fd1e7c2caf2b258f035e0b7caff" dependencies = [ "async-graphql-value", "pest", @@ -276,9 +276,9 @@ dependencies = [ [[package]] name = "async-graphql-value" -version = "7.0.9" +version = "7.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b0206011cad065420c27988f17dd7fe201a0e056b20c262209b7bffcd6fa176" +checksum = "a43a7bbb0ddea47c6f51913eba6e17a093b34e000588a93bb80a978ad129f3e9" dependencies = [ "bytes 1.7.2", "indexmap 2.5.0", @@ -316,9 +316,9 @@ checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" [[package]] name = "async-trait" -version = "0.1.82" +version = "0.1.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a27b8a3a6e1a44fa4c8baf1f653e4172e81486d4941f2237e20dc2d0cf4ddff1" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", @@ -342,6 +342,17 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +[[package]] +name = "backon" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4fa97bb310c33c811334143cf64c5bb2b7b3c06e453db6b095d7061eff8f113" +dependencies = [ + "fastrand", + "gloo-timers", + "tokio", +] + [[package]] name = "backtrace" version = "0.3.74" @@ -736,18 +747,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.17" +version = "4.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e5a21b8495e732f1b3c364c9949b201ca7bae518c502c80256c96ad79eaf6ac" +checksum = "b0956a43b323ac1afaffc053ed5c4b7c1f1800bacd1683c353aabbb752515dd3" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.17" +version = "4.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cf2dd12af7a047ad9d6da2b6b249759a22a7abc0f474c1dae1777afa4b21a73" +checksum = "4d72166dd41634086d5803a47eb71ae740e61d84709c36f3c34110173db3961b" dependencies = [ "anstyle", "clap_lex", @@ -1374,6 +1385,18 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "gloo-timers" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" +dependencies = [ + "futures-channel", + "futures-core", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "half" version = "2.4.1" @@ -1560,9 +1583,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da62f120a8a37763efb0cf8fdf264b884c7b8b9ac8660b900c8661030c00e6ba" +checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" dependencies = [ "bytes 1.7.2", "futures-channel", @@ -1573,7 +1596,6 @@ dependencies = [ "pin-project-lite", "socket2", "tokio", - "tower", "tower-service", "tracing", ] @@ -1823,9 +1845,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.158" +version = "0.2.159" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" +checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" [[package]] name = "libm" @@ -2380,26 +2402,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" -[[package]] -name = "pin-project" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.77", -] - [[package]] name = "pin-project-lite" version = "0.2.14" @@ -2414,9 +2416,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" [[package]] name = "plotters" @@ -2448,9 +2450,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265" +checksum = "d30538d42559de6b034bc76fd6dd4c38961b1ee5c6c56e3808c50128fdbc22ce" [[package]] name = "powerfmt" @@ -2756,9 +2758,9 @@ checksum = "568fde39e6aec674be99c9dd38b4c79040faf31038bd5a41ab1908db00c2319b" [[package]] name = "redox_syscall" -version = "0.5.4" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0884ad60e090bf1345b93da0a5de8923c93884cd03f40dfcfddd3b4bee661853" +checksum = "62871f2d65009c0256aed1b9cfeeb8ac272833c404e13d53d400cd0dad7a2ac0" dependencies = [ "bitflags 2.6.0", ] @@ -3340,9 +3342,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simdutf8" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "simple_asn1" @@ -3784,18 +3786,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.63" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.63" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" +checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" dependencies = [ "proc-macro2", "quote", @@ -3964,36 +3966,15 @@ checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" [[package]] name = "toml_edit" -version = "0.22.21" +version = "0.22.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b072cee73c449a636ffd6f32bd8de3a9f7119139aff882f44943ce2986dc5cf" +checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" dependencies = [ "indexmap 2.5.0", "toml_datetime", "winnow", ] -[[package]] -name = "tower" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" -dependencies = [ - "futures-core", - "futures-util", - "pin-project", - "pin-project-lite", - "tokio", - "tower-layer", - "tower-service", -] - -[[package]] -name = "tower-layer" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" - [[package]] name = "tower-service" version = "0.3.3" @@ -4141,9 +4122,9 @@ dependencies = [ [[package]] name = "unicode-width" -version = "0.1.13" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "unicode-xid" @@ -4361,6 +4342,7 @@ name = "wikidata-to-surrealdb" version = "0.1.0" dependencies = [ "anyhow", + "backon", "bzip2", "criterion", "futures 0.3.30", @@ -4530,9 +4512,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.6.18" +version = "0.6.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68a9bda4691f099d435ad181000724da8e5899daa10713c2d432552b9ccd3a6f" +checksum = "c52ac009d615e79296318c1bcce2d422aaca15ad08515e344feeda07df67a587" dependencies = [ "memchr", ] diff --git a/Cargo.toml b/Cargo.toml index b020128..b8508d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ bzip2 = { version = "0.4", features = ["tokio"] } lazy_static = "1.5" indicatif = "0.17" rand = "0.8" +backon = { version = "1.2", features = ["tokio-sleep"] } [dev-dependencies] rstest = "0.22" diff --git a/flake.lock b/flake.lock index 6b16ef9..786d7e0 100644 --- a/flake.lock +++ b/flake.lock @@ -3,11 +3,11 @@ "advisory-db": { "flake": false, "locked": { - "lastModified": 1726496182, - "narHash": "sha256-V5193OeAuy0smfeOF2omeJXk+kq+tlEzcpk2MrUpai0=", + "lastModified": 1727103737, + "narHash": "sha256-otyUwbqaXYkeBxPy3Gf0ACB0rHl23OzAlfpsVvY1hbc=", "owner": "rustsec", "repo": "advisory-db", - "rev": "3cae2352cf82b5815b98aa309e0f4df6aa737cec", + "rev": "45780a4d66ad647bd3c148509fb081943efdacfd", "type": "github" }, "original": { @@ -18,11 +18,11 @@ }, "crane": { "locked": { - "lastModified": 1725409566, - "narHash": "sha256-PrtLmqhM6UtJP7v7IGyzjBFhbG4eOAHT6LPYOFmYfbk=", + "lastModified": 1727060013, + "narHash": "sha256-/fC5YlJy4IoAW9GhkJiwyzk0K/gQd9Qi4rRcoweyG9E=", "owner": "ipetkov", "repo": "crane", - "rev": "7e4586bad4e3f8f97a9271def747cf58c4b68f3c", + "rev": "6b40cc876c929bfe1e3a24bf538ce3b5622646ba", "type": "github" }, "original": { @@ -39,11 +39,11 @@ "rust-analyzer-src": [] }, "locked": { - "lastModified": 1726727555, - "narHash": "sha256-WAYoFXry7bU3UXCFklX6tBOfBdZUHi+MeIk1JxUKNEo=", + "lastModified": 1727159616, + "narHash": "sha256-1VjZ+khJwZphRJZy2HvbMSCgi3OV7mu8RjVzqCxVi2k=", "owner": "nix-community", "repo": "fenix", - "rev": "3b974166133158907839fe20147e11696fade644", + "rev": "4306d494985e00719573bbdeb863c27c6d83dc9c", "type": "github" }, "original": { @@ -72,11 +72,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1726583932, - "narHash": "sha256-zACxiQx8knB3F8+Ze+1BpiYrI+CbhxyWpcSID9kVhkQ=", + "lastModified": 1727089097, + "narHash": "sha256-ZMHMThPsthhUREwDebXw7GX45bJnBCVbfnH1g5iuSPc=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "658e7223191d2598641d50ee4e898126768fe847", + "rev": "568bfef547c14ca438c56a0bece08b8bb2b71a9c", "type": "github" }, "original": { diff --git a/src/utils.rs b/src/utils.rs index b16ceac..6132f02 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,4 +1,5 @@ use anyhow::{Error, Result}; +use backon::Retryable; use core::panic; use futures::future::join_all; use indicatif::ProgressBar; @@ -7,9 +8,9 @@ use rand::{distributions::Alphanumeric, Rng}; use serde_json::{from_str, Value}; use std::{env, io::BufRead}; use surrealdb::{Connection, Surreal}; -use tokio::time::{sleep, Duration}; use wikidata::Entity; +pub mod init_backoff; pub mod init_db; pub mod init_progress_bar; pub mod init_reader; @@ -65,64 +66,50 @@ impl CreateVersion { } fn spawn_chunk( - &self, + self, dbo: Option>, chunk: Vec, pb: Option, batch_size: usize, ) -> tokio::task::JoinHandle<()> { - let create_version = *self; - tokio::spawn(async move { - let mut retries = 0; - - loop { - match dbo { - Some(ref db) => { - if create_version.create(db, &chunk, &pb, batch_size).await { - break; - } - } - None => { - let db = match init_db::create_db_remote().await { - Ok(db) => db, - Err(_) => continue, - }; - if create_version.create(&db, &chunk, &pb, batch_size).await { - break; - } - } + match dbo { + Some(db) => self.create_retry(&db, &chunk, &pb, batch_size).await, + None => { + let db = init_db::create_db_remote + .retry(*init_backoff::exponential) + .await + .expect("Failed to create remote db"); + self.create_retry(&db, &chunk, &pb, batch_size).await } - - // Exponential backoff with cap at 60 seconds - if retries == 30 { - panic!("Failed to create entities, too many retries"); - } - sleep(Duration::from_millis(250) * 2_u32.pow(retries.min(8))).await; - retries += 1; } + .unwrap_or_else(|err| panic!("Failed to create entities, too many retries: {}", err)); }) } + /// Retry create with exponential backoff + async fn create_retry( + self, + db: &Surreal, + chunk: &[String], + pb: &Option, + batch_size: usize, + ) -> Result<(), Error> { + (|| async { self.create(db, chunk, pb, batch_size).await }) + .retry(*init_backoff::exponential) + .await + } + async fn create( self, db: &Surreal, chunk: &[String], pb: &Option, batch_size: usize, - ) -> bool { + ) -> Result<(), Error> { match self { - CreateVersion::Bulk => self.create_bulk(db, chunk, pb, batch_size).await.is_ok(), - CreateVersion::BulkFilter => self - .create_bulk_filter(db, chunk, pb, batch_size) - .await - .is_ok(), - // CreateVersion::BulkFilter => { - // if let Err(err) = self.create_bulk_filter(db, chunk, pb, batch_size).await { - // panic!("Failed to create entities: {}", err); - // } - // true - // } + CreateVersion::Bulk => self.create_bulk(db, chunk, pb, batch_size).await, + CreateVersion::BulkFilter => self.create_bulk_filter(db, chunk, pb, batch_size).await, } } diff --git a/src/utils/init_backoff.rs b/src/utils/init_backoff.rs new file mode 100644 index 0000000..7e26766 --- /dev/null +++ b/src/utils/init_backoff.rs @@ -0,0 +1,9 @@ +use backon::ExponentialBuilder; +use lazy_static::lazy_static; +use tokio::time::Duration; + +lazy_static! { + pub static ref exponential: ExponentialBuilder = ExponentialBuilder::default() + .with_max_times(30) + .with_max_delay(Duration::from_secs(60)); +}