From 2edaeef04200d748895f780e90d78ced12a2e822 Mon Sep 17 00:00:00 2001 From: NexVeridian Date: Sat, 16 Dec 2023 17:11:54 -0800 Subject: [PATCH] is_err create --- src/main.rs | 41 ++++++++++++++++++++++++++++------------- src/utils.rs | 5 +++-- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/src/main.rs b/src/main.rs index 920d489..f8eb784 100644 --- a/src/main.rs +++ b/src/main.rs @@ -67,19 +67,32 @@ async fn create_db_entity(db: &Surreal, line: String) -> Result<(), Erro let id = data.id.clone().expect("No ID"); data.id = None; - let _: Option = db.delete(&id).await?; - let _: Option = db.create(&id).content(data.clone()).await?; + let _ = db.create::>(&id).await.is_err(); + { + db.update::>(&id).content(data).await?; + }; let id = claims.id.clone().expect("No ID"); claims.id = None; - let _: Option = db.delete(&id).await?; - let _: Option = db.create(&id).content(claims).await?; + let _ = db.create::>(&id).await.is_err(); + { + db.update::>(&id).content(claims).await?; + } Ok(()) } -async fn create_db_entities(db: &Surreal, lines: Vec) -> Result<(), Error> { +async fn create_db_entities( + db: &Surreal, + lines: Vec, + pb: ProgressBar, +) -> Result<(), Error> { + let mut counter = 0; for line in lines { create_db_entity(db, line.to_string()).await?; + counter += 1; + if counter % 100 == 0 { + pb.inc(100); + } } Ok(()) } @@ -114,9 +127,10 @@ async fn main() -> Result<(), Error> { let reader = File_Format::new(&WIKIDATA_FILE_FORMAT).reader(&WIKIDATA_FILE_NAME)?; if !*THREADED_REQUESTS { - let counter = 0; + let mut counter = 0; for line in reader.lines() { create_db_entity(&db, line?).await?; + counter += 1; if counter % 100 == 0 { pb.inc(100); } @@ -124,34 +138,35 @@ async fn main() -> Result<(), Error> { } else { let mut futures = Vec::new(); let mut chunk = Vec::new(); - let mut chunk_counter: i32 = 0; - const BATCH_AMMOUNT: u16 = 50; + let mut chunk_counter = 0; + const BATCH_SIZE: usize = 1000; + const BATCH_NUM: usize = 100; for line in reader.lines() { chunk.push(line.unwrap()); - if chunk.len() >= BATCH_AMMOUNT.try_into().unwrap() { + if chunk.len() >= BATCH_SIZE { let db = db.clone(); let lines = chunk.clone(); let pb = pb.clone(); futures.push(tokio::spawn(async move { - create_db_entities(&db, lines).await.unwrap(); - pb.inc(BATCH_AMMOUNT.try_into().unwrap()); + create_db_entities(&db, lines, pb).await.unwrap(); })); chunk_counter += 1; chunk.clear(); } - if chunk_counter >= 50 { + if chunk_counter >= BATCH_NUM { join_all(futures).await; futures = Vec::new(); + chunk_counter = 0; } } join_all(futures).await; } - pb.finish_with_message("Done parsing Wikidata"); + pb.finish(); Ok(()) } diff --git a/src/utils.rs b/src/utils.rs index bf3155f..c4453ba 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -75,7 +75,7 @@ impl EntityMini { id: Some(get_id_entity(&entity)), label: get_name(&entity), claims: thing_claim, - description: get_description(&entity).unwrap_or("".to_string()), + description: get_description(&entity), }, ) } @@ -129,9 +129,10 @@ fn get_name(entity: &Entity) -> String { .unwrap_or_default() } -fn get_description(entity: &Entity) -> Option { +fn get_description(entity: &Entity) -> String { entity .descriptions .get(&Lang(WIKIDATA_LANG.to_string())) .cloned() + .unwrap_or_default() }