mirror of
https://github.com/NexVeridian/wikidata-to-surrealdb.git
synced 2025-09-02 09:59:13 +00:00
progress bar, fix docker
This commit is contained in:
parent
8b3311ff21
commit
ee15b46ae2
8 changed files with 71 additions and 25 deletions
38
src/main.rs
38
src/main.rs
|
@ -1,11 +1,15 @@
|
|||
use anyhow::{Error, Ok, Result};
|
||||
use bzip2::read::MultiBzDecoder;
|
||||
use indicatif::{ProgressBar, ProgressState, ProgressStyle};
|
||||
use lazy_static::lazy_static;
|
||||
use serde_json::{from_str, Value};
|
||||
use std::{
|
||||
env,
|
||||
fmt::Write,
|
||||
fs::File,
|
||||
io::{BufRead, BufReader},
|
||||
thread,
|
||||
time::Duration,
|
||||
};
|
||||
use surrealdb::{engine::remote::ws::Ws, opt::auth::Root, Surreal};
|
||||
use wikidata::Entity;
|
||||
|
@ -17,8 +21,9 @@ lazy_static! {
|
|||
#[derive(Debug)]
|
||||
static ref DB_USER: String = env::var("DB_USER").expect("DB_USER not set");
|
||||
static ref DB_PASSWORD: String = env::var("DB_PASSWORD").expect("DB_PASSWORD not set");
|
||||
static ref FILE_FORMAT: String = env::var("FILE_FORMAT").expect("FILE_FORMAT not set");
|
||||
static ref FILE_NAME: String = env::var("FILE_NAME").expect("FILE_NAME not set");
|
||||
static ref WIKIDATA_FILE_FORMAT: String = env::var("WIKIDATA_FILE_FORMAT").expect("FILE_FORMAT not set");
|
||||
static ref WIKIDATA_FILE_NAME: String = env::var("WIKIDATA_FILE_NAME").expect("FILE_NAME not set");
|
||||
static ref WIKIDATA_DB_PORT: String = env::var("WIKIDATA_DB_PORT").expect("WIKIDATA_DB_PORT not set");
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
|
@ -45,7 +50,26 @@ impl File_Format {
|
|||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Error> {
|
||||
let db = Surreal::new::<Ws>("0.0.0.0:8000").await?;
|
||||
thread::sleep(Duration::from_secs(10));
|
||||
|
||||
let mut compleated = 0;
|
||||
let total_size = 113_000_000;
|
||||
|
||||
let pb = ProgressBar::new(total_size);
|
||||
pb.set_style(
|
||||
ProgressStyle::with_template(
|
||||
"[{elapsed_precise}] [{wide_bar:.cyan/blue}] {human_pos}/{human_len} {percent} ETA:{eta}",
|
||||
)?
|
||||
.with_key("eta", |state: &ProgressState, w: &mut dyn Write| {
|
||||
let sec = state.eta().as_secs();
|
||||
let min = (sec / 60) % 60;
|
||||
let hr = (sec / 60) / 60;
|
||||
write!(w, "{}:{:02}:{:02}", hr, min, sec % 60).unwrap()
|
||||
}),
|
||||
);
|
||||
|
||||
let db = Surreal::new::<Ws>(WIKIDATA_DB_PORT.as_str()).await?;
|
||||
|
||||
db.signin(Root {
|
||||
username: &DB_USER,
|
||||
password: &DB_PASSWORD,
|
||||
|
@ -53,7 +77,7 @@ async fn main() -> Result<(), Error> {
|
|||
.await?;
|
||||
db.use_ns("wikidata").use_db("wikidata").await?;
|
||||
|
||||
let reader = File_Format::new(&FILE_FORMAT).reader(&FILE_NAME)?;
|
||||
let reader = File_Format::new(&WIKIDATA_FILE_FORMAT).reader(&WIKIDATA_FILE_NAME)?;
|
||||
|
||||
for line in reader.lines() {
|
||||
let line = line?.trim().trim_end_matches(',').to_string();
|
||||
|
@ -75,7 +99,13 @@ async fn main() -> Result<(), Error> {
|
|||
claims.id = None;
|
||||
let _: Option<Claims> = db.delete(&id).await?;
|
||||
let _: Option<Claims> = db.create(&id).content(claims).await?;
|
||||
|
||||
compleated += 1;
|
||||
if compleated % 1000 == 0 {
|
||||
pb.set_position(compleated);
|
||||
}
|
||||
}
|
||||
|
||||
pb.finish_with_message("Done parsing Wikidata");
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -125,8 +125,8 @@ fn get_name(entity: &Entity) -> String {
|
|||
entity
|
||||
.labels
|
||||
.get(&Lang(WIKIDATA_LANG.to_string()))
|
||||
.expect("No label found")
|
||||
.to_string()
|
||||
.map(|label| label.to_string())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
fn get_description(entity: &Entity) -> Option<String> {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue