refactor: main

This commit is contained in:
Elijah McMorris 2024-08-27 19:11:23 -07:00
parent b885315cd7
commit bb9967ced6
Signed by: NexVeridian
SSH key fingerprint: SHA256:bsA1SKZxuEcEVHAy3gY1HUeM5ykRJl0U0kQHQn0hMg8
3 changed files with 25 additions and 88 deletions

View file

@ -53,17 +53,17 @@ WIKIDATA_FILE_NAME=data/latest-all.json.bz2
WIKIDATA_DB_PORT=surrealdb:8000 WIKIDATA_DB_PORT=surrealdb:8000
# true=overwrite existing data, false=skip if already exists # true=overwrite existing data, false=skip if already exists
OVERWRITE_DB=false OVERWRITE_DB=false
CREATE_MODE=ThreadedSingle CREATE_VERSION=Bulk
#FILTER_PATH=../filter.surql #FILTER_PATH=../filter.surql
``` ```
Env string CREATE_MODE must be in the enum CreateMode Env string CREATE_VERSION must be in the enum CREATE_VERSION
```rust ```rust
pub enum CreateMode { pub enum CreateVersion {
Single, Single,
ThreadedSingle, #[default]
ThreadedBulk, Bulk,
// must create a filter.surql file in the root directory /// must create a filter.surql file in the root directory
BulkFilter, BulkFilter,
} }
``` ```

View file

@ -1,6 +1,6 @@
use anyhow::{Error, Ok, Result}; use anyhow::{Error, Ok, Result};
use lazy_static::lazy_static; use lazy_static::lazy_static;
use std::{env, io::BufRead}; use std::env;
use surrealdb::{engine::remote::ws::Client, Surreal}; use surrealdb::{engine::remote::ws::Client, Surreal};
use tokio::time::{sleep, Duration}; use tokio::time::{sleep, Duration};
@ -12,75 +12,24 @@ lazy_static! {
env::var("WIKIDATA_FILE_FORMAT").expect("FILE_FORMAT not set"); env::var("WIKIDATA_FILE_FORMAT").expect("FILE_FORMAT not set");
static ref WIKIDATA_FILE_NAME: String = static ref WIKIDATA_FILE_NAME: String =
env::var("WIKIDATA_FILE_NAME").expect("FILE_NAME not set"); env::var("WIKIDATA_FILE_NAME").expect("FILE_NAME not set");
static ref CREATE_MODE: CreateMode = match env::var("CREATE_MODE") static ref CREATE_VERSION: CreateVersion = match env::var("CREATE_VERSION")
.expect("CREATE_MODE not set") .expect("CREATE_VERSION not set")
.as_str() .as_str()
{ {
"Single" => CreateMode::Single, "Single" => CreateVersion::Single,
"ThreadedSingle" => CreateMode::ThreadedSingle, "Bulk" => CreateVersion::Bulk,
"ThreadedBulk" => CreateMode::ThreadedBulk, "BulkFilter" => CreateVersion::BulkFilter,
"ThreadedBulkFilter" => CreateMode::ThreadedBulkFilter, _ => panic!("Unknown CREATE_VERSION"),
_ => panic!("Unknown CREATE_MODE"),
}; };
} }
#[derive(Clone, Copy)]
pub enum CreateMode {
Single,
ThreadedSingle,
ThreadedBulk,
// must create a filter.surql file in the root directory
ThreadedBulkFilter,
}
#[tokio::main] #[tokio::main]
async fn main() -> Result<(), Error> { async fn main() -> Result<(), Error> {
sleep(Duration::from_secs(10)).await; sleep(Duration::from_secs(10)).await;
let pb = init_progress_bar::create_pb().await; let pb = init_progress_bar::create_pb().await;
let db = init_db::create_db_ws().await?;
let reader = File_Format::new(&WIKIDATA_FILE_FORMAT).reader(&WIKIDATA_FILE_NAME)?; let reader = File_Format::new(&WIKIDATA_FILE_FORMAT).reader(&WIKIDATA_FILE_NAME)?;
match *CREATE_MODE { CREATE_VERSION
CreateMode::Single => {
let mut counter = 0;
for line in reader.lines() {
let mut retries = 0;
let line = line?;
loop {
if create_entity(&db, &line).await.is_ok() {
break;
}
if retries >= 60 * 10 {
panic!("Failed to create entities, too many retries");
}
retries += 1;
sleep(Duration::from_secs(1)).await;
if db.use_ns("wikidata").use_db("wikidata").await.is_err() {
continue;
};
}
counter += 1;
if counter % 100 == 0 {
pb.inc(100);
}
}
}
CreateMode::ThreadedSingle => {
CreateVersion::Single
.run_threaded(
None::<Surreal<Client>>,
reader,
Some(pb.clone()),
2_500,
100,
)
.await?;
}
CreateMode::ThreadedBulk => {
CreateVersion::Bulk
.run_threaded( .run_threaded(
None::<Surreal<Client>>, None::<Surreal<Client>>,
reader, reader,
@ -89,19 +38,6 @@ async fn main() -> Result<(), Error> {
1_000, 1_000,
) )
.await?; .await?;
}
CreateMode::ThreadedBulkFilter => {
CreateVersion::BulkFilter
.run_threaded(
None::<Surreal<Client>>,
reader,
Some(pb.clone()),
500,
1_000,
)
.await?;
}
}
pb.finish(); pb.finish();
Ok(()) Ok(())

View file

@ -76,11 +76,12 @@ pub async fn create_entity(db: &Surreal<impl Connection>, line: &str) -> Result<
Ok(()) Ok(())
} }
#[derive(Clone, Copy)] #[derive(Clone, Copy, Default)]
pub enum CreateVersion { pub enum CreateVersion {
Single, Single,
#[default]
Bulk, Bulk,
// must create a filter.surql file in the root directory /// must create a filter.surql file in the root directory
BulkFilter, BulkFilter,
} }
impl CreateVersion { impl CreateVersion {