From 2ded1d5b1b5923c11c0f1a704b560f0f3cb225c0 Mon Sep 17 00:00:00 2001 From: NexVeridian Date: Tue, 27 Aug 2024 15:51:51 -0700 Subject: [PATCH] refactor init_db and init_progress_bar --- Cargo.lock | 87 +++++++++------------------------- benches/bench.rs | 2 +- flake.lock | 18 +++---- src/main.rs | 5 +- src/utils.rs | 63 +++--------------------- src/utils/init_db.rs | 38 +++++++++++++++ src/utils/init_progress_bar.rs | 22 +++++++++ tests/integration.rs | 2 +- 8 files changed, 103 insertions(+), 134 deletions(-) create mode 100644 src/utils/init_db.rs create mode 100644 src/utils/init_progress_bar.rs diff --git a/Cargo.lock b/Cargo.lock index 2a6b923..7408b9f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -434,9 +434,9 @@ dependencies = [ [[package]] name = "bytemuck" -version = "1.17.0" +version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fd4c6dcc3b0aea2f5c0b4b82c2b15fe39ddbc76041a310848f4706edf76bb31" +checksum = "773d90827bc3feecfb67fab12e24de0749aad83c74b9504ecde46237b5cd24e2" [[package]] name = "byteorder" @@ -500,9 +500,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.14" +version = "1.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d2eb3cd3d1bf4529e31c215ee6f93ec5a3d536d9f578f93d9d33ee19562932" +checksum = "57b6a275aa2903740dc87da01c62040406b8812552e97129a63ea8850a17c6e6" dependencies = [ "shlex", ] @@ -2385,9 +2385,9 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" [[package]] name = "proc-macro-crate" -version = "3.1.0" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284" +checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" dependencies = [ "toml_edit", ] @@ -2994,9 +2994,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.34" +version = "0.38.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +checksum = "a85d50532239da68e9addb745ba38ff4612a242c1c7ceea689c4bc7c2f43c36f" dependencies = [ "bitflags 2.6.0", "errno", @@ -3041,9 +3041,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.102.6" +version = "0.102.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e6b52d4fda176fd835fdc55a835d4a89b8499cad995885a21149d5ad62f852e" +checksum = "84678086bd54edf2b415183ed7a94d0efb049f1b646a33e22a36f3794be6ae56" dependencies = [ "ring", "rustls-pki-types", @@ -3346,15 +3346,15 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "stacker" -version = "0.1.16" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a5daa25ea337c85ed954c0496e3bdd2c7308cc3b24cf7b50d04876654c579f" +checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" dependencies = [ "cc", "cfg-if", "libc", "psm", - "windows-sys 0.36.1", + "windows-sys 0.59.0", ] [[package]] @@ -3831,9 +3831,9 @@ checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" [[package]] name = "toml_edit" -version = "0.21.1" +version = "0.22.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d" dependencies = [ "indexmap 2.4.0", "toml_datetime", @@ -4307,19 +4307,6 @@ dependencies = [ "windows-targets", ] -[[package]] -name = "windows-sys" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" -dependencies = [ - "windows_aarch64_msvc 0.36.1", - "windows_i686_gnu 0.36.1", - "windows_i686_msvc 0.36.1", - "windows_x86_64_gnu 0.36.1", - "windows_x86_64_msvc 0.36.1", -] - [[package]] name = "windows-sys" version = "0.52.0" @@ -4345,13 +4332,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ "windows_aarch64_gnullvm", - "windows_aarch64_msvc 0.52.6", - "windows_i686_gnu 0.52.6", + "windows_aarch64_msvc", + "windows_i686_gnu", "windows_i686_gnullvm", - "windows_i686_msvc 0.52.6", - "windows_x86_64_gnu 0.52.6", + "windows_i686_msvc", + "windows_x86_64_gnu", "windows_x86_64_gnullvm", - "windows_x86_64_msvc 0.52.6", + "windows_x86_64_msvc", ] [[package]] @@ -4360,24 +4347,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" - [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_i686_gnu" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" - [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -4390,24 +4365,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" - [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_x86_64_gnu" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" - [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -4420,12 +4383,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" - [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -4434,9 +4391,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.5.40" +version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +checksum = "68a9bda4691f099d435ad181000724da8e5899daa10713c2d432552b9ccd3a6f" dependencies = [ "memchr", ] diff --git a/benches/bench.rs b/benches/bench.rs index 749c3a4..817b5cf 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -11,7 +11,7 @@ async fn inti_db() -> Result, Error> { env::set_var("WIKIDATA_LANG", "en"); env::set_var("OVERWRITE_DB", "true"); - let db = create_db_mem().await?; + let db = init_db::create_db_mem().await?; Ok(db) } diff --git a/flake.lock b/flake.lock index ef13edb..f265288 100644 --- a/flake.lock +++ b/flake.lock @@ -3,11 +3,11 @@ "advisory-db": { "flake": false, "locked": { - "lastModified": 1724510776, - "narHash": "sha256-K9CHOXzHPfNjZsz3dC9Vhdryz70dyaDTsCjFJHB19xA=", + "lastModified": 1724775741, + "narHash": "sha256-xuj7Ye3Y2EgunLiEEV5zYxUQuLTURV5mgbXDB1fA7h8=", "owner": "rustsec", "repo": "advisory-db", - "rev": "dd0703e582ab7edc2637bc3385d540c3dbffa0db", + "rev": "fe4d5979b34444815287d61bd2a4e193cebbc7a6", "type": "github" }, "original": { @@ -44,11 +44,11 @@ "rust-analyzer-src": [] }, "locked": { - "lastModified": 1724653830, - "narHash": "sha256-88f0KK8h6tGIP4Na5RJDKs0S+7WsGGaCGNkLj/bPV3g=", + "lastModified": 1724740262, + "narHash": "sha256-cpFasbzOTlwLi4fNas6hDznVUdCJn/lMLxi7MAMG6hg=", "owner": "nix-community", "repo": "fenix", - "rev": "9ecf5e7d800ace001320da8acadd4a3deb872a83", + "rev": "703efdd9b5c6a7d5824afa348a24fbbf8ff226be", "type": "github" }, "original": { @@ -77,11 +77,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1724395761, - "narHash": "sha256-zRkDV/nbrnp3Y8oCADf5ETl1sDrdmAW6/bBVJ8EbIdQ=", + "lastModified": 1724748588, + "narHash": "sha256-NlpGA4+AIf1dKNq76ps90rxowlFXUsV9x7vK/mN37JM=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "ae815cee91b417be55d43781eb4b73ae1ecc396c", + "rev": "a6292e34000dc93d43bccf78338770c1c5ec8a99", "type": "github" }, "original": { diff --git a/src/main.rs b/src/main.rs index db8da2b..848225d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,7 @@ use lazy_static::lazy_static; use std::{env, io::BufRead}; use surrealdb::{engine::remote::ws::Client, Surreal}; use tokio::time::{sleep, Duration}; + mod utils; use utils::*; @@ -35,9 +36,9 @@ pub enum CreateMode { #[tokio::main] async fn main() -> Result<(), Error> { sleep(Duration::from_secs(10)).await; - let pb = create_pb().await; + let pb = init_progress_bar::create_pb().await; - let db = create_db_ws().await?; + let db = init_db::create_db_ws().await?; let reader = File_Format::new(&WIKIDATA_FILE_FORMAT).reader(&WIKIDATA_FILE_NAME)?; match *CREATE_MODE { diff --git a/src/utils.rs b/src/utils.rs index 1e915a5..4707d7c 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -2,7 +2,7 @@ use anyhow::{Error, Result}; use bzip2::read::MultiBzDecoder; use core::panic; use futures::future::join_all; -use indicatif::{ProgressBar, ProgressState, ProgressStyle}; +use indicatif::ProgressBar; use lazy_static::lazy_static; use rand::{distributions::Alphanumeric, Rng}; use serde_json::{from_str, Value}; @@ -11,17 +11,12 @@ use std::{ fs::File, io::{BufRead, BufReader}, }; -use surrealdb::{ - engine::{ - local::{Db, Mem}, - remote::ws::{Client, Ws}, - }, - opt::auth::Root, - Connection, Surreal, -}; +use surrealdb::{Connection, Surreal}; use tokio::time::{sleep, Duration}; use wikidata::Entity; +pub mod init_db; +pub mod init_progress_bar; mod tables; use tables::*; @@ -30,10 +25,6 @@ lazy_static! { .expect("OVERWRITE_DB not set") .parse() .expect("Failed to parse OVERWRITE_DB"); - static ref DB_USER: String = env::var("DB_USER").expect("DB_USER not set"); - static ref DB_PASSWORD: String = env::var("DB_PASSWORD").expect("DB_PASSWORD not set"); - static ref WIKIDATA_DB_PORT: String = - env::var("WIKIDATA_DB_PORT").expect("WIKIDATA_DB_PORT not set"); static ref FILTER_PATH: String = env::var("FILTER_PATH").unwrap_or("../filter.surql".to_string()); } @@ -172,7 +163,7 @@ pub async fn create_db_entities_bulk_filter( pb: &Option, batch_size: usize, ) -> Result<(), Error> { - let db_mem = create_db_mem().await?; + let db_mem = init_db::create_db_mem().await?; create_db_entities_bulk(&db_mem, lines, &None, batch_size).await?; let filter = tokio::fs::read_to_string(&*FILTER_PATH).await?; @@ -263,7 +254,7 @@ pub async fn create_db_entities_threaded( }; } None => { - let db = if let Ok(db) = create_db_ws().await { + let db = if let Ok(db) = init_db::create_db_ws().await { db } else { continue; @@ -300,49 +291,9 @@ pub async fn create_db_entities_threaded( create_db_entities(&db, &chunk, &pb).await?; } None => { - create_db_entities(&create_db_ws().await?, &chunk, &pb).await?; + create_db_entities(&init_db::create_db_ws().await?, &chunk, &pb).await?; } } join_all(futures).await; Ok(()) } - -pub async fn create_db_ws() -> Result, Error> { - let db = Surreal::new::(WIKIDATA_DB_PORT.as_str()).await?; - - db.signin(Root { - username: &DB_USER, - password: &DB_PASSWORD, - }) - .await?; - db.use_ns("wikidata").use_db("wikidata").await?; - - Ok(db) -} - -pub async fn create_db_mem() -> Result, Error> { - let db = Surreal::new::(()).await?; - db.use_ns("wikidata").use_db("wikidata").await?; - Ok(db) -} - -pub async fn create_pb() -> ProgressBar { - let total_size = 110_000_000; - let pb = ProgressBar::new(total_size); - pb.set_style( - ProgressStyle::with_template( - "[{elapsed_precise}] [{wide_bar:.cyan/blue}] {human_pos}/{human_len} ETA:[{eta}]", - ) - .unwrap() - .with_key( - "eta", - |state: &ProgressState, w: &mut dyn std::fmt::Write| { - let sec = state.eta().as_secs(); - let min = (sec / 60) % 60; - let hr = (sec / 60) / 60; - write!(w, "{}:{:02}:{:02}", hr, min, sec % 60).unwrap() - }, - ), - ); - pb -} diff --git a/src/utils/init_db.rs b/src/utils/init_db.rs new file mode 100644 index 0000000..da1075f --- /dev/null +++ b/src/utils/init_db.rs @@ -0,0 +1,38 @@ +use anyhow::Error; +use anyhow::Result; +use lazy_static::lazy_static; +use std::env; +use surrealdb::{ + engine::{ + local::{Db, Mem}, + remote::ws::{Client, Ws}, + }, + opt::auth::Root, + Surreal, +}; + +lazy_static! { + static ref DB_USER: String = env::var("DB_USER").expect("DB_USER not set"); + static ref DB_PASSWORD: String = env::var("DB_PASSWORD").expect("DB_PASSWORD not set"); + static ref WIKIDATA_DB_PORT: String = + env::var("WIKIDATA_DB_PORT").expect("WIKIDATA_DB_PORT not set"); +} + +pub async fn create_db_ws() -> Result, Error> { + let db = Surreal::new::(WIKIDATA_DB_PORT.as_str()).await?; + + db.signin(Root { + username: &DB_USER, + password: &DB_PASSWORD, + }) + .await?; + db.use_ns("wikidata").use_db("wikidata").await?; + + Ok(db) +} + +pub async fn create_db_mem() -> Result, Error> { + let db = Surreal::new::(()).await?; + db.use_ns("wikidata").use_db("wikidata").await?; + Ok(db) +} diff --git a/src/utils/init_progress_bar.rs b/src/utils/init_progress_bar.rs new file mode 100644 index 0000000..2ae849f --- /dev/null +++ b/src/utils/init_progress_bar.rs @@ -0,0 +1,22 @@ +use indicatif::{ProgressBar, ProgressState, ProgressStyle}; + +pub async fn create_pb() -> ProgressBar { + let total_size = 110_000_000; + let pb = ProgressBar::new(total_size); + pb.set_style( + ProgressStyle::with_template( + "[{elapsed_precise}] [{wide_bar:.cyan/blue}] {human_pos}/{human_len} ETA:[{eta}]", + ) + .unwrap() + .with_key( + "eta", + |state: &ProgressState, w: &mut dyn std::fmt::Write| { + let sec = state.eta().as_secs(); + let min = (sec / 60) % 60; + let hr = (sec / 60) / 60; + write!(w, "{}:{:02}:{:02}", hr, min, sec % 60).unwrap() + }, + ), + ); + pb +} diff --git a/tests/integration.rs b/tests/integration.rs index 2f52af3..e9a62b0 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -9,7 +9,7 @@ async fn inti_db() -> Result, Error> { env::set_var("WIKIDATA_LANG", "en"); env::set_var("OVERWRITE_DB", "true"); - let db = create_db_mem().await?; + let db = init_db::create_db_mem().await?; Ok(db) }