mirror of
https://github.com/NexVeridian/wikidata-to-surrealdb.git
synced 2025-09-02 01:49:13 +00:00
fix: file path, swap from ws to http
This commit is contained in:
parent
acef3f8f3b
commit
53e3ffe0dd
12 changed files with 29 additions and 15 deletions
1
.github/workflows/crane.yml
vendored
1
.github/workflows/crane.yml
vendored
|
@ -13,6 +13,7 @@ on:
|
||||||
|
|
||||||
env:
|
env:
|
||||||
CARGO_TERM_COLOR: always
|
CARGO_TERM_COLOR: always
|
||||||
|
NIX_CONFIG: 'download-buffer-size = 5000MB'
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
check:
|
check:
|
||||||
|
|
1
.github/workflows/docker.yml
vendored
1
.github/workflows/docker.yml
vendored
|
@ -20,6 +20,7 @@ env:
|
||||||
REGISTRY: ghcr.io
|
REGISTRY: ghcr.io
|
||||||
# github.repository as <account>/<repo>
|
# github.repository as <account>/<repo>
|
||||||
IMAGE_NAME: ${{ github.repository }}
|
IMAGE_NAME: ${{ github.repository }}
|
||||||
|
NIX_CONFIG: 'download-buffer-size = 5000MB'
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
|
|
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -105,5 +105,3 @@ venv.bak/
|
||||||
|
|
||||||
/data
|
/data
|
||||||
/target
|
/target
|
||||||
|
|
||||||
filter.surql
|
|
||||||
|
|
|
@ -23,7 +23,7 @@ Run tests with `nix flake check` or `cargo t`
|
||||||
Remove the cargo cache for buildkit with `docker builder prune --filter type=exec.cachemount`
|
Remove the cargo cache for buildkit with `docker builder prune --filter type=exec.cachemount`
|
||||||
|
|
||||||
### View Progress
|
### View Progress
|
||||||
`docker attach wikidata-to-surrealdb`
|
`make view`
|
||||||
|
|
||||||
# License
|
# License
|
||||||
All code in this repository is dual-licensed under either [License-MIT](./LICENSE-MIT) or [LICENSE-APACHE](./LICENSE-Apache) at your option. This means you can select the license you prefer. [Why dual license](https://github.com/bevyengine/bevy/issues/2373).
|
All code in this repository is dual-licensed under either [License-MIT](./LICENSE-MIT) or [LICENSE-APACHE](./LICENSE-Apache) at your option. This means you can select the license you prefer. [Why dual license](https://github.com/bevyengine/bevy/issues/2373).
|
||||||
|
|
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -3438,6 +3438,7 @@ dependencies = [
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-tungstenite",
|
"tokio-tungstenite",
|
||||||
|
"tokio-util",
|
||||||
"tracing",
|
"tracing",
|
||||||
"trice",
|
"trice",
|
||||||
"url",
|
"url",
|
||||||
|
@ -3818,6 +3819,7 @@ checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytes 1.7.1",
|
"bytes 1.7.1",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
|
"futures-io",
|
||||||
"futures-sink",
|
"futures-sink",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
|
|
@ -8,7 +8,7 @@ license = "MIT OR Apache-2.0"
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
surrealdb-alpha = { version = "2.0.9", features = ["protocol-ws", "kv-mem"] }
|
surrealdb-alpha = { version = "2.0.9", features = ["protocol-http", "kv-mem"] }
|
||||||
tokio = { version = "1.39", features = ["fs", "time"] }
|
tokio = { version = "1.39", features = ["fs", "time"] }
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
wikidata = "1.1"
|
wikidata = "1.1"
|
||||||
|
|
|
@ -27,7 +27,9 @@ Create data folder next to docker-compose.yml and .env, place data inside, and s
|
||||||
├── data
|
├── data
|
||||||
│ ├── Entity.json
|
│ ├── Entity.json
|
||||||
│ ├── latest-all.json.bz2
|
│ ├── latest-all.json.bz2
|
||||||
│ └── surrealdb
|
│ ├── filter.surql
|
||||||
|
│ ├── surrealdb
|
||||||
|
│ └── temp
|
||||||
├── Makefile
|
├── Makefile
|
||||||
├── docker-compose.yml
|
├── docker-compose.yml
|
||||||
└── .env
|
└── .env
|
||||||
|
@ -54,7 +56,7 @@ WIKIDATA_DB_PORT=surrealdb:8000
|
||||||
# true=overwrite existing data, false=skip if already exists
|
# true=overwrite existing data, false=skip if already exists
|
||||||
OVERWRITE_DB=false
|
OVERWRITE_DB=false
|
||||||
CREATE_VERSION=Bulk
|
CREATE_VERSION=Bulk
|
||||||
#FILTER_PATH=../filter.surql
|
#FILTER_PATH=data/filter.surql
|
||||||
```
|
```
|
||||||
|
|
||||||
Env string CREATE_VERSION must be in the enum CREATE_VERSION
|
Env string CREATE_VERSION must be in the enum CREATE_VERSION
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use anyhow::{Error, Ok, Result};
|
use anyhow::{Error, Ok, Result};
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use std::env;
|
use std::env;
|
||||||
use surrealdb::{engine::remote::ws::Client, Surreal};
|
use surrealdb::{engine::remote::http::Client, Surreal};
|
||||||
use tokio::time::{sleep, Duration};
|
use tokio::time::{sleep, Duration};
|
||||||
|
|
||||||
mod utils;
|
mod utils;
|
||||||
|
@ -30,6 +30,10 @@ async fn main() -> Result<(), Error> {
|
||||||
let pb = init_progress_bar::create_pb().await;
|
let pb = init_progress_bar::create_pb().await;
|
||||||
let reader = File_Format::new(&WIKIDATA_FILE_FORMAT).reader(&WIKIDATA_FILE_NAME)?;
|
let reader = File_Format::new(&WIKIDATA_FILE_FORMAT).reader(&WIKIDATA_FILE_NAME)?;
|
||||||
|
|
||||||
|
tokio::fs::create_dir_all("data/temp").await?;
|
||||||
|
tokio::fs::remove_dir_all("data/temp").await?;
|
||||||
|
tokio::fs::create_dir_all("data/temp").await?;
|
||||||
|
|
||||||
CREATE_VERSION
|
CREATE_VERSION
|
||||||
.run(
|
.run(
|
||||||
None::<Surreal<Client>>,
|
None::<Surreal<Client>>,
|
||||||
|
|
12
src/utils.rs
12
src/utils.rs
|
@ -1,4 +1,5 @@
|
||||||
use anyhow::{Error, Result};
|
use anyhow::{Error, Result};
|
||||||
|
use core::panic;
|
||||||
use futures::future::join_all;
|
use futures::future::join_all;
|
||||||
use indicatif::ProgressBar;
|
use indicatif::ProgressBar;
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
|
@ -21,7 +22,7 @@ lazy_static! {
|
||||||
.parse()
|
.parse()
|
||||||
.expect("Failed to parse OVERWRITE_DB");
|
.expect("Failed to parse OVERWRITE_DB");
|
||||||
static ref FILTER_PATH: String =
|
static ref FILTER_PATH: String =
|
||||||
env::var("FILTER_PATH").unwrap_or("../filter.surql".to_string());
|
env::var("FILTER_PATH").unwrap_or("data/filter.surql".to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn create_entity(db: &Surreal<impl Connection>, line: &str) -> Result<(), Error> {
|
pub async fn create_entity(db: &Surreal<impl Connection>, line: &str) -> Result<(), Error> {
|
||||||
|
@ -108,7 +109,7 @@ impl CreateVersion {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
let db = match init_db::create_db_ws().await {
|
let db = match init_db::create_db_remote().await {
|
||||||
Ok(db) => db,
|
Ok(db) => db,
|
||||||
Err(_) => continue,
|
Err(_) => continue,
|
||||||
};
|
};
|
||||||
|
@ -141,6 +142,12 @@ impl CreateVersion {
|
||||||
.create_bulk_filter(db, chunk, pb, batch_size)
|
.create_bulk_filter(db, chunk, pb, batch_size)
|
||||||
.await
|
.await
|
||||||
.is_ok(),
|
.is_ok(),
|
||||||
|
// CreateVersion::BulkFilter => {
|
||||||
|
// if let Err(err) = self.create_bulk_filter(db, chunk, pb, batch_size).await {
|
||||||
|
// panic!("Failed to create entities: {}", err);
|
||||||
|
// }
|
||||||
|
// true
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -233,7 +240,6 @@ impl CreateVersion {
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let file_path = format!("data/temp/{}.surql", file_name);
|
let file_path = format!("data/temp/{}.surql", file_name);
|
||||||
tokio::fs::create_dir_all("data/temp").await?;
|
|
||||||
|
|
||||||
db_mem.export(&file_path).await?;
|
db_mem.export(&file_path).await?;
|
||||||
db.import(&file_path).await?;
|
db.import(&file_path).await?;
|
||||||
|
|
|
@ -5,7 +5,7 @@ use std::env;
|
||||||
use surrealdb::{
|
use surrealdb::{
|
||||||
engine::{
|
engine::{
|
||||||
local::{Db, Mem},
|
local::{Db, Mem},
|
||||||
remote::ws::{Client, Ws},
|
remote::http::{Client, Http},
|
||||||
},
|
},
|
||||||
opt::auth::Root,
|
opt::auth::Root,
|
||||||
Surreal,
|
Surreal,
|
||||||
|
@ -18,8 +18,8 @@ lazy_static! {
|
||||||
env::var("WIKIDATA_DB_PORT").expect("WIKIDATA_DB_PORT not set");
|
env::var("WIKIDATA_DB_PORT").expect("WIKIDATA_DB_PORT not set");
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn create_db_ws() -> Result<Surreal<Client>, Error> {
|
pub async fn create_db_remote() -> Result<Surreal<Client>, Error> {
|
||||||
let db = Surreal::new::<Ws>(WIKIDATA_DB_PORT.as_str()).await?;
|
let db = Surreal::new::<Http>(WIKIDATA_DB_PORT.as_str()).await?;
|
||||||
|
|
||||||
db.signin(Root {
|
db.signin(Root {
|
||||||
username: &DB_USER,
|
username: &DB_USER,
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use indicatif::{ProgressBar, ProgressState, ProgressStyle};
|
use indicatif::{ProgressBar, ProgressState, ProgressStyle};
|
||||||
|
|
||||||
pub async fn create_pb() -> ProgressBar {
|
pub async fn create_pb() -> ProgressBar {
|
||||||
let total_size = 110_000_000;
|
let total_size = 112_500_000;
|
||||||
let pb = ProgressBar::new(total_size);
|
let pb = ProgressBar::new(total_size);
|
||||||
pb.set_style(
|
pb.set_style(
|
||||||
ProgressStyle::with_template(
|
ProgressStyle::with_template(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue