diff --git a/README.md b/README.md index 74d23f3..426a0f5 100644 --- a/README.md +++ b/README.md @@ -53,17 +53,17 @@ WIKIDATA_FILE_NAME=data/latest-all.json.bz2 WIKIDATA_DB_PORT=surrealdb:8000 # true=overwrite existing data, false=skip if already exists OVERWRITE_DB=false -CREATE_MODE=ThreadedSingle +CREATE_VERSION=Bulk #FILTER_PATH=../filter.surql ``` -Env string CREATE_MODE must be in the enum CreateMode +Env string CREATE_VERSION must be in the enum CREATE_VERSION ```rust -pub enum CreateMode { +pub enum CreateVersion { Single, - ThreadedSingle, - ThreadedBulk, - // must create a filter.surql file in the root directory + #[default] + Bulk, + /// must create a filter.surql file in the root directory BulkFilter, } ``` diff --git a/src/main.rs b/src/main.rs index de069bc..23cfb4a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ use anyhow::{Error, Ok, Result}; use lazy_static::lazy_static; -use std::{env, io::BufRead}; +use std::env; use surrealdb::{engine::remote::ws::Client, Surreal}; use tokio::time::{sleep, Duration}; @@ -12,96 +12,32 @@ lazy_static! { env::var("WIKIDATA_FILE_FORMAT").expect("FILE_FORMAT not set"); static ref WIKIDATA_FILE_NAME: String = env::var("WIKIDATA_FILE_NAME").expect("FILE_NAME not set"); - static ref CREATE_MODE: CreateMode = match env::var("CREATE_MODE") - .expect("CREATE_MODE not set") + static ref CREATE_VERSION: CreateVersion = match env::var("CREATE_VERSION") + .expect("CREATE_VERSION not set") .as_str() { - "Single" => CreateMode::Single, - "ThreadedSingle" => CreateMode::ThreadedSingle, - "ThreadedBulk" => CreateMode::ThreadedBulk, - "ThreadedBulkFilter" => CreateMode::ThreadedBulkFilter, - _ => panic!("Unknown CREATE_MODE"), + "Single" => CreateVersion::Single, + "Bulk" => CreateVersion::Bulk, + "BulkFilter" => CreateVersion::BulkFilter, + _ => panic!("Unknown CREATE_VERSION"), }; } -#[derive(Clone, Copy)] -pub enum CreateMode { - Single, - ThreadedSingle, - ThreadedBulk, - // must create a filter.surql file in the root directory - ThreadedBulkFilter, -} - #[tokio::main] async fn main() -> Result<(), Error> { sleep(Duration::from_secs(10)).await; let pb = init_progress_bar::create_pb().await; - - let db = init_db::create_db_ws().await?; let reader = File_Format::new(&WIKIDATA_FILE_FORMAT).reader(&WIKIDATA_FILE_NAME)?; - match *CREATE_MODE { - CreateMode::Single => { - let mut counter = 0; - for line in reader.lines() { - let mut retries = 0; - let line = line?; - - loop { - if create_entity(&db, &line).await.is_ok() { - break; - } - if retries >= 60 * 10 { - panic!("Failed to create entities, too many retries"); - } - retries += 1; - sleep(Duration::from_secs(1)).await; - if db.use_ns("wikidata").use_db("wikidata").await.is_err() { - continue; - }; - } - - counter += 1; - if counter % 100 == 0 { - pb.inc(100); - } - } - } - CreateMode::ThreadedSingle => { - CreateVersion::Single - .run_threaded( - None::>, - reader, - Some(pb.clone()), - 2_500, - 100, - ) - .await?; - } - CreateMode::ThreadedBulk => { - CreateVersion::Bulk - .run_threaded( - None::>, - reader, - Some(pb.clone()), - 500, - 1_000, - ) - .await?; - } - CreateMode::ThreadedBulkFilter => { - CreateVersion::BulkFilter - .run_threaded( - None::>, - reader, - Some(pb.clone()), - 500, - 1_000, - ) - .await?; - } - } + CREATE_VERSION + .run_threaded( + None::>, + reader, + Some(pb.clone()), + 500, + 1_000, + ) + .await?; pb.finish(); Ok(()) diff --git a/src/utils.rs b/src/utils.rs index 4355d64..f1273bc 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -76,11 +76,12 @@ pub async fn create_entity(db: &Surreal, line: &str) -> Result< Ok(()) } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Default)] pub enum CreateVersion { Single, + #[default] Bulk, - // must create a filter.surql file in the root directory + /// must create a filter.surql file in the root directory BulkFilter, } impl CreateVersion {