refactor init_db and init_progress_bar

This commit is contained in:
Elijah McMorris 2024-08-27 15:51:51 -07:00
parent 731df97cd2
commit 2ded1d5b1b
Signed by: NexVeridian
SSH key fingerprint: SHA256:bsA1SKZxuEcEVHAy3gY1HUeM5ykRJl0U0kQHQn0hMg8
8 changed files with 103 additions and 134 deletions

87
Cargo.lock generated
View file

@ -434,9 +434,9 @@ dependencies = [
[[package]]
name = "bytemuck"
version = "1.17.0"
version = "1.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fd4c6dcc3b0aea2f5c0b4b82c2b15fe39ddbc76041a310848f4706edf76bb31"
checksum = "773d90827bc3feecfb67fab12e24de0749aad83c74b9504ecde46237b5cd24e2"
[[package]]
name = "byteorder"
@ -500,9 +500,9 @@ dependencies = [
[[package]]
name = "cc"
version = "1.1.14"
version = "1.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d2eb3cd3d1bf4529e31c215ee6f93ec5a3d536d9f578f93d9d33ee19562932"
checksum = "57b6a275aa2903740dc87da01c62040406b8812552e97129a63ea8850a17c6e6"
dependencies = [
"shlex",
]
@ -2385,9 +2385,9 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "proc-macro-crate"
version = "3.1.0"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284"
checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b"
dependencies = [
"toml_edit",
]
@ -2994,9 +2994,9 @@ dependencies = [
[[package]]
name = "rustix"
version = "0.38.34"
version = "0.38.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f"
checksum = "a85d50532239da68e9addb745ba38ff4612a242c1c7ceea689c4bc7c2f43c36f"
dependencies = [
"bitflags 2.6.0",
"errno",
@ -3041,9 +3041,9 @@ dependencies = [
[[package]]
name = "rustls-webpki"
version = "0.102.6"
version = "0.102.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e6b52d4fda176fd835fdc55a835d4a89b8499cad995885a21149d5ad62f852e"
checksum = "84678086bd54edf2b415183ed7a94d0efb049f1b646a33e22a36f3794be6ae56"
dependencies = [
"ring",
"rustls-pki-types",
@ -3346,15 +3346,15 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "stacker"
version = "0.1.16"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95a5daa25ea337c85ed954c0496e3bdd2c7308cc3b24cf7b50d04876654c579f"
checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b"
dependencies = [
"cc",
"cfg-if",
"libc",
"psm",
"windows-sys 0.36.1",
"windows-sys 0.59.0",
]
[[package]]
@ -3831,9 +3831,9 @@ checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
[[package]]
name = "toml_edit"
version = "0.21.1"
version = "0.22.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1"
checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d"
dependencies = [
"indexmap 2.4.0",
"toml_datetime",
@ -4307,19 +4307,6 @@ dependencies = [
"windows-targets",
]
[[package]]
name = "windows-sys"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
dependencies = [
"windows_aarch64_msvc 0.36.1",
"windows_i686_gnu 0.36.1",
"windows_i686_msvc 0.36.1",
"windows_x86_64_gnu 0.36.1",
"windows_x86_64_msvc 0.36.1",
]
[[package]]
name = "windows-sys"
version = "0.52.0"
@ -4345,13 +4332,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc 0.52.6",
"windows_i686_gnu 0.52.6",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc 0.52.6",
"windows_x86_64_gnu 0.52.6",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc 0.52.6",
"windows_x86_64_msvc",
]
[[package]]
@ -4360,24 +4347,12 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
@ -4390,24 +4365,12 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
@ -4420,12 +4383,6 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
@ -4434,9 +4391,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "winnow"
version = "0.5.40"
version = "0.6.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876"
checksum = "68a9bda4691f099d435ad181000724da8e5899daa10713c2d432552b9ccd3a6f"
dependencies = [
"memchr",
]

View file

@ -11,7 +11,7 @@ async fn inti_db() -> Result<Surreal<Db>, Error> {
env::set_var("WIKIDATA_LANG", "en");
env::set_var("OVERWRITE_DB", "true");
let db = create_db_mem().await?;
let db = init_db::create_db_mem().await?;
Ok(db)
}

18
flake.lock generated
View file

@ -3,11 +3,11 @@
"advisory-db": {
"flake": false,
"locked": {
"lastModified": 1724510776,
"narHash": "sha256-K9CHOXzHPfNjZsz3dC9Vhdryz70dyaDTsCjFJHB19xA=",
"lastModified": 1724775741,
"narHash": "sha256-xuj7Ye3Y2EgunLiEEV5zYxUQuLTURV5mgbXDB1fA7h8=",
"owner": "rustsec",
"repo": "advisory-db",
"rev": "dd0703e582ab7edc2637bc3385d540c3dbffa0db",
"rev": "fe4d5979b34444815287d61bd2a4e193cebbc7a6",
"type": "github"
},
"original": {
@ -44,11 +44,11 @@
"rust-analyzer-src": []
},
"locked": {
"lastModified": 1724653830,
"narHash": "sha256-88f0KK8h6tGIP4Na5RJDKs0S+7WsGGaCGNkLj/bPV3g=",
"lastModified": 1724740262,
"narHash": "sha256-cpFasbzOTlwLi4fNas6hDznVUdCJn/lMLxi7MAMG6hg=",
"owner": "nix-community",
"repo": "fenix",
"rev": "9ecf5e7d800ace001320da8acadd4a3deb872a83",
"rev": "703efdd9b5c6a7d5824afa348a24fbbf8ff226be",
"type": "github"
},
"original": {
@ -77,11 +77,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1724395761,
"narHash": "sha256-zRkDV/nbrnp3Y8oCADf5ETl1sDrdmAW6/bBVJ8EbIdQ=",
"lastModified": 1724748588,
"narHash": "sha256-NlpGA4+AIf1dKNq76ps90rxowlFXUsV9x7vK/mN37JM=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "ae815cee91b417be55d43781eb4b73ae1ecc396c",
"rev": "a6292e34000dc93d43bccf78338770c1c5ec8a99",
"type": "github"
},
"original": {

View file

@ -3,6 +3,7 @@ use lazy_static::lazy_static;
use std::{env, io::BufRead};
use surrealdb::{engine::remote::ws::Client, Surreal};
use tokio::time::{sleep, Duration};
mod utils;
use utils::*;
@ -35,9 +36,9 @@ pub enum CreateMode {
#[tokio::main]
async fn main() -> Result<(), Error> {
sleep(Duration::from_secs(10)).await;
let pb = create_pb().await;
let pb = init_progress_bar::create_pb().await;
let db = create_db_ws().await?;
let db = init_db::create_db_ws().await?;
let reader = File_Format::new(&WIKIDATA_FILE_FORMAT).reader(&WIKIDATA_FILE_NAME)?;
match *CREATE_MODE {

View file

@ -2,7 +2,7 @@ use anyhow::{Error, Result};
use bzip2::read::MultiBzDecoder;
use core::panic;
use futures::future::join_all;
use indicatif::{ProgressBar, ProgressState, ProgressStyle};
use indicatif::ProgressBar;
use lazy_static::lazy_static;
use rand::{distributions::Alphanumeric, Rng};
use serde_json::{from_str, Value};
@ -11,17 +11,12 @@ use std::{
fs::File,
io::{BufRead, BufReader},
};
use surrealdb::{
engine::{
local::{Db, Mem},
remote::ws::{Client, Ws},
},
opt::auth::Root,
Connection, Surreal,
};
use surrealdb::{Connection, Surreal};
use tokio::time::{sleep, Duration};
use wikidata::Entity;
pub mod init_db;
pub mod init_progress_bar;
mod tables;
use tables::*;
@ -30,10 +25,6 @@ lazy_static! {
.expect("OVERWRITE_DB not set")
.parse()
.expect("Failed to parse OVERWRITE_DB");
static ref DB_USER: String = env::var("DB_USER").expect("DB_USER not set");
static ref DB_PASSWORD: String = env::var("DB_PASSWORD").expect("DB_PASSWORD not set");
static ref WIKIDATA_DB_PORT: String =
env::var("WIKIDATA_DB_PORT").expect("WIKIDATA_DB_PORT not set");
static ref FILTER_PATH: String =
env::var("FILTER_PATH").unwrap_or("../filter.surql".to_string());
}
@ -172,7 +163,7 @@ pub async fn create_db_entities_bulk_filter(
pb: &Option<ProgressBar>,
batch_size: usize,
) -> Result<(), Error> {
let db_mem = create_db_mem().await?;
let db_mem = init_db::create_db_mem().await?;
create_db_entities_bulk(&db_mem, lines, &None, batch_size).await?;
let filter = tokio::fs::read_to_string(&*FILTER_PATH).await?;
@ -263,7 +254,7 @@ pub async fn create_db_entities_threaded(
};
}
None => {
let db = if let Ok(db) = create_db_ws().await {
let db = if let Ok(db) = init_db::create_db_ws().await {
db
} else {
continue;
@ -300,49 +291,9 @@ pub async fn create_db_entities_threaded(
create_db_entities(&db, &chunk, &pb).await?;
}
None => {
create_db_entities(&create_db_ws().await?, &chunk, &pb).await?;
create_db_entities(&init_db::create_db_ws().await?, &chunk, &pb).await?;
}
}
join_all(futures).await;
Ok(())
}
pub async fn create_db_ws() -> Result<Surreal<Client>, Error> {
let db = Surreal::new::<Ws>(WIKIDATA_DB_PORT.as_str()).await?;
db.signin(Root {
username: &DB_USER,
password: &DB_PASSWORD,
})
.await?;
db.use_ns("wikidata").use_db("wikidata").await?;
Ok(db)
}
pub async fn create_db_mem() -> Result<Surreal<Db>, Error> {
let db = Surreal::new::<Mem>(()).await?;
db.use_ns("wikidata").use_db("wikidata").await?;
Ok(db)
}
pub async fn create_pb() -> ProgressBar {
let total_size = 110_000_000;
let pb = ProgressBar::new(total_size);
pb.set_style(
ProgressStyle::with_template(
"[{elapsed_precise}] [{wide_bar:.cyan/blue}] {human_pos}/{human_len} ETA:[{eta}]",
)
.unwrap()
.with_key(
"eta",
|state: &ProgressState, w: &mut dyn std::fmt::Write| {
let sec = state.eta().as_secs();
let min = (sec / 60) % 60;
let hr = (sec / 60) / 60;
write!(w, "{}:{:02}:{:02}", hr, min, sec % 60).unwrap()
},
),
);
pb
}

38
src/utils/init_db.rs Normal file
View file

@ -0,0 +1,38 @@
use anyhow::Error;
use anyhow::Result;
use lazy_static::lazy_static;
use std::env;
use surrealdb::{
engine::{
local::{Db, Mem},
remote::ws::{Client, Ws},
},
opt::auth::Root,
Surreal,
};
lazy_static! {
static ref DB_USER: String = env::var("DB_USER").expect("DB_USER not set");
static ref DB_PASSWORD: String = env::var("DB_PASSWORD").expect("DB_PASSWORD not set");
static ref WIKIDATA_DB_PORT: String =
env::var("WIKIDATA_DB_PORT").expect("WIKIDATA_DB_PORT not set");
}
pub async fn create_db_ws() -> Result<Surreal<Client>, Error> {
let db = Surreal::new::<Ws>(WIKIDATA_DB_PORT.as_str()).await?;
db.signin(Root {
username: &DB_USER,
password: &DB_PASSWORD,
})
.await?;
db.use_ns("wikidata").use_db("wikidata").await?;
Ok(db)
}
pub async fn create_db_mem() -> Result<Surreal<Db>, Error> {
let db = Surreal::new::<Mem>(()).await?;
db.use_ns("wikidata").use_db("wikidata").await?;
Ok(db)
}

View file

@ -0,0 +1,22 @@
use indicatif::{ProgressBar, ProgressState, ProgressStyle};
pub async fn create_pb() -> ProgressBar {
let total_size = 110_000_000;
let pb = ProgressBar::new(total_size);
pb.set_style(
ProgressStyle::with_template(
"[{elapsed_precise}] [{wide_bar:.cyan/blue}] {human_pos}/{human_len} ETA:[{eta}]",
)
.unwrap()
.with_key(
"eta",
|state: &ProgressState, w: &mut dyn std::fmt::Write| {
let sec = state.eta().as_secs();
let min = (sec / 60) % 60;
let hr = (sec / 60) / 60;
write!(w, "{}:{:02}:{:02}", hr, min, sec % 60).unwrap()
},
),
);
pb
}

View file

@ -9,7 +9,7 @@ async fn inti_db() -> Result<Surreal<Db>, Error> {
env::set_var("WIKIDATA_LANG", "en");
env::set_var("OVERWRITE_DB", "true");
let db = create_db_mem().await?;
let db = init_db::create_db_mem().await?;
Ok(db)
}