mirror of
https://github.com/NexVeridian/wikidata-to-surrealdb.git
synced 2025-09-02 09:59:13 +00:00
bz2
This commit is contained in:
parent
08d8d2d63b
commit
7bf0033970
4 changed files with 29 additions and 9 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -103,7 +103,6 @@ venv.bak/
|
||||||
# mypy
|
# mypy
|
||||||
.mypy_cache/
|
.mypy_cache/
|
||||||
|
|
||||||
|
/data
|
||||||
/target
|
/target
|
||||||
Cargo.lock
|
Cargo.lock
|
||||||
|
|
||||||
data/*
|
|
||||||
|
|
|
@ -6,9 +6,10 @@ license = "MIT OR Apache-2.0"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
dotenv_codegen = "0.15.0"
|
dotenv_codegen = "0.15"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
surrealdb = "1.0"
|
surrealdb = "1.0"
|
||||||
tokio = "1.35"
|
tokio = "1.35"
|
||||||
wikidata = "0.3"
|
wikidata = "0.3.1"
|
||||||
|
bzip2 = { version = "0.4", features = ["tokio"] }
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
version: "3"
|
version: "3"
|
||||||
services:
|
services:
|
||||||
surrealdb:
|
surrealdb:
|
||||||
|
image: surrealdb/surrealdb:latest
|
||||||
container_name: surrealdb
|
container_name: surrealdb
|
||||||
env_file:
|
env_file:
|
||||||
- .env
|
- .env
|
||||||
|
@ -12,7 +13,6 @@ services:
|
||||||
- --pass
|
- --pass
|
||||||
- $DB_PASSWORD
|
- $DB_PASSWORD
|
||||||
- file:/data/surrealdb
|
- file:/data/surrealdb
|
||||||
image: surrealdb/surrealdb:latest
|
|
||||||
ports:
|
ports:
|
||||||
- 8000:8000
|
- 8000:8000
|
||||||
volumes:
|
volumes:
|
||||||
|
|
28
src/main.rs
28
src/main.rs
|
@ -1,4 +1,5 @@
|
||||||
use anyhow::{Error, Ok, Result};
|
use anyhow::{Error, Ok, Result};
|
||||||
|
use bzip2::read::MultiBzDecoder;
|
||||||
use dotenv_codegen::dotenv;
|
use dotenv_codegen::dotenv;
|
||||||
use serde_json::{from_str, Value};
|
use serde_json::{from_str, Value};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
@ -9,20 +10,39 @@ use wikidata::Entity;
|
||||||
mod utils;
|
mod utils;
|
||||||
use utils::*;
|
use utils::*;
|
||||||
|
|
||||||
|
#[allow(non_camel_case_types)]
|
||||||
|
enum File_Format {
|
||||||
|
json,
|
||||||
|
bz2,
|
||||||
|
}
|
||||||
|
impl File_Format {
|
||||||
|
fn new(file: &str) -> Self {
|
||||||
|
match file {
|
||||||
|
"json" => Self::json,
|
||||||
|
"bz2" => Self::bz2,
|
||||||
|
_ => panic!("Unknown file format"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fn reader(self, file: &str) -> Result<Box<dyn BufRead>, Error> {
|
||||||
|
let file = File::open(file)?;
|
||||||
|
match self {
|
||||||
|
File_Format::json => Ok(Box::new(BufReader::new(file))),
|
||||||
|
File_Format::bz2 => Ok(Box::new(BufReader::new(MultiBzDecoder::new(file)))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<(), Error> {
|
async fn main() -> Result<(), Error> {
|
||||||
let db = Surreal::new::<Ws>("0.0.0.0:8000").await?;
|
let db = Surreal::new::<Ws>("0.0.0.0:8000").await?;
|
||||||
|
|
||||||
db.signin(Root {
|
db.signin(Root {
|
||||||
username: dotenv!("DB_USER"),
|
username: dotenv!("DB_USER"),
|
||||||
password: dotenv!("DB_PASSWORD"),
|
password: dotenv!("DB_PASSWORD"),
|
||||||
})
|
})
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
db.use_ns("wikidata").use_db("wikidata").await?;
|
db.use_ns("wikidata").use_db("wikidata").await?;
|
||||||
|
|
||||||
let file = File::open("data/e.json")?;
|
let reader = File_Format::new(dotenv!("FILE_FORMAT")).reader(dotenv!("FILE_NAME"))?;
|
||||||
let reader = BufReader::new(file);
|
|
||||||
|
|
||||||
for line in reader.lines() {
|
for line in reader.lines() {
|
||||||
let line = line?.trim().trim_end_matches(',').to_string();
|
let line = line?.trim().trim_end_matches(',').to_string();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue