mirror of
https://github.com/NexVeridian/wikidata-to-surrealdb.git
synced 2025-09-02 01:49:13 +00:00
fix: file path, swap from ws to http
This commit is contained in:
parent
acef3f8f3b
commit
53e3ffe0dd
12 changed files with 29 additions and 15 deletions
1
.github/workflows/crane.yml
vendored
1
.github/workflows/crane.yml
vendored
|
@ -13,6 +13,7 @@ on:
|
|||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
NIX_CONFIG: 'download-buffer-size = 5000MB'
|
||||
|
||||
jobs:
|
||||
check:
|
||||
|
|
1
.github/workflows/docker.yml
vendored
1
.github/workflows/docker.yml
vendored
|
@ -20,6 +20,7 @@ env:
|
|||
REGISTRY: ghcr.io
|
||||
# github.repository as <account>/<repo>
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
NIX_CONFIG: 'download-buffer-size = 5000MB'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
|
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -105,5 +105,3 @@ venv.bak/
|
|||
|
||||
/data
|
||||
/target
|
||||
|
||||
filter.surql
|
||||
|
|
|
@ -23,7 +23,7 @@ Run tests with `nix flake check` or `cargo t`
|
|||
Remove the cargo cache for buildkit with `docker builder prune --filter type=exec.cachemount`
|
||||
|
||||
### View Progress
|
||||
`docker attach wikidata-to-surrealdb`
|
||||
`make view`
|
||||
|
||||
# License
|
||||
All code in this repository is dual-licensed under either [License-MIT](./LICENSE-MIT) or [LICENSE-APACHE](./LICENSE-Apache) at your option. This means you can select the license you prefer. [Why dual license](https://github.com/bevyengine/bevy/issues/2373).
|
||||
|
|
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -3438,6 +3438,7 @@ dependencies = [
|
|||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-tungstenite",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"trice",
|
||||
"url",
|
||||
|
@ -3818,6 +3819,7 @@ checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1"
|
|||
dependencies = [
|
||||
"bytes 1.7.1",
|
||||
"futures-core",
|
||||
"futures-io",
|
||||
"futures-sink",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
|
|
|
@ -8,7 +8,7 @@ license = "MIT OR Apache-2.0"
|
|||
anyhow = "1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
surrealdb-alpha = { version = "2.0.9", features = ["protocol-ws", "kv-mem"] }
|
||||
surrealdb-alpha = { version = "2.0.9", features = ["protocol-http", "kv-mem"] }
|
||||
tokio = { version = "1.39", features = ["fs", "time"] }
|
||||
futures = "0.3"
|
||||
wikidata = "1.1"
|
||||
|
|
|
@ -27,7 +27,9 @@ Create data folder next to docker-compose.yml and .env, place data inside, and s
|
|||
├── data
|
||||
│ ├── Entity.json
|
||||
│ ├── latest-all.json.bz2
|
||||
│ └── surrealdb
|
||||
│ ├── filter.surql
|
||||
│ ├── surrealdb
|
||||
│ └── temp
|
||||
├── Makefile
|
||||
├── docker-compose.yml
|
||||
└── .env
|
||||
|
@ -54,7 +56,7 @@ WIKIDATA_DB_PORT=surrealdb:8000
|
|||
# true=overwrite existing data, false=skip if already exists
|
||||
OVERWRITE_DB=false
|
||||
CREATE_VERSION=Bulk
|
||||
#FILTER_PATH=../filter.surql
|
||||
#FILTER_PATH=data/filter.surql
|
||||
```
|
||||
|
||||
Env string CREATE_VERSION must be in the enum CREATE_VERSION
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use anyhow::{Error, Ok, Result};
|
||||
use lazy_static::lazy_static;
|
||||
use std::env;
|
||||
use surrealdb::{engine::remote::ws::Client, Surreal};
|
||||
use surrealdb::{engine::remote::http::Client, Surreal};
|
||||
use tokio::time::{sleep, Duration};
|
||||
|
||||
mod utils;
|
||||
|
@ -30,6 +30,10 @@ async fn main() -> Result<(), Error> {
|
|||
let pb = init_progress_bar::create_pb().await;
|
||||
let reader = File_Format::new(&WIKIDATA_FILE_FORMAT).reader(&WIKIDATA_FILE_NAME)?;
|
||||
|
||||
tokio::fs::create_dir_all("data/temp").await?;
|
||||
tokio::fs::remove_dir_all("data/temp").await?;
|
||||
tokio::fs::create_dir_all("data/temp").await?;
|
||||
|
||||
CREATE_VERSION
|
||||
.run(
|
||||
None::<Surreal<Client>>,
|
||||
|
|
12
src/utils.rs
12
src/utils.rs
|
@ -1,4 +1,5 @@
|
|||
use anyhow::{Error, Result};
|
||||
use core::panic;
|
||||
use futures::future::join_all;
|
||||
use indicatif::ProgressBar;
|
||||
use lazy_static::lazy_static;
|
||||
|
@ -21,7 +22,7 @@ lazy_static! {
|
|||
.parse()
|
||||
.expect("Failed to parse OVERWRITE_DB");
|
||||
static ref FILTER_PATH: String =
|
||||
env::var("FILTER_PATH").unwrap_or("../filter.surql".to_string());
|
||||
env::var("FILTER_PATH").unwrap_or("data/filter.surql".to_string());
|
||||
}
|
||||
|
||||
pub async fn create_entity(db: &Surreal<impl Connection>, line: &str) -> Result<(), Error> {
|
||||
|
@ -108,7 +109,7 @@ impl CreateVersion {
|
|||
}
|
||||
}
|
||||
None => {
|
||||
let db = match init_db::create_db_ws().await {
|
||||
let db = match init_db::create_db_remote().await {
|
||||
Ok(db) => db,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
@ -141,6 +142,12 @@ impl CreateVersion {
|
|||
.create_bulk_filter(db, chunk, pb, batch_size)
|
||||
.await
|
||||
.is_ok(),
|
||||
// CreateVersion::BulkFilter => {
|
||||
// if let Err(err) = self.create_bulk_filter(db, chunk, pb, batch_size).await {
|
||||
// panic!("Failed to create entities: {}", err);
|
||||
// }
|
||||
// true
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -233,7 +240,6 @@ impl CreateVersion {
|
|||
.collect();
|
||||
|
||||
let file_path = format!("data/temp/{}.surql", file_name);
|
||||
tokio::fs::create_dir_all("data/temp").await?;
|
||||
|
||||
db_mem.export(&file_path).await?;
|
||||
db.import(&file_path).await?;
|
||||
|
|
|
@ -5,7 +5,7 @@ use std::env;
|
|||
use surrealdb::{
|
||||
engine::{
|
||||
local::{Db, Mem},
|
||||
remote::ws::{Client, Ws},
|
||||
remote::http::{Client, Http},
|
||||
},
|
||||
opt::auth::Root,
|
||||
Surreal,
|
||||
|
@ -18,8 +18,8 @@ lazy_static! {
|
|||
env::var("WIKIDATA_DB_PORT").expect("WIKIDATA_DB_PORT not set");
|
||||
}
|
||||
|
||||
pub async fn create_db_ws() -> Result<Surreal<Client>, Error> {
|
||||
let db = Surreal::new::<Ws>(WIKIDATA_DB_PORT.as_str()).await?;
|
||||
pub async fn create_db_remote() -> Result<Surreal<Client>, Error> {
|
||||
let db = Surreal::new::<Http>(WIKIDATA_DB_PORT.as_str()).await?;
|
||||
|
||||
db.signin(Root {
|
||||
username: &DB_USER,
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use indicatif::{ProgressBar, ProgressState, ProgressStyle};
|
||||
|
||||
pub async fn create_pb() -> ProgressBar {
|
||||
let total_size = 110_000_000;
|
||||
let total_size = 112_500_000;
|
||||
let pb = ProgressBar::new(total_size);
|
||||
pb.set_style(
|
||||
ProgressStyle::with_template(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue