fix: file path, swap from ws to http

This commit is contained in:
Elijah McMorris 2024-08-29 17:59:46 -07:00
parent acef3f8f3b
commit 53e3ffe0dd
Signed by: NexVeridian
SSH key fingerprint: SHA256:bsA1SKZxuEcEVHAy3gY1HUeM5ykRJl0U0kQHQn0hMg8
12 changed files with 29 additions and 15 deletions

View file

@ -13,6 +13,7 @@ on:
env:
CARGO_TERM_COLOR: always
NIX_CONFIG: 'download-buffer-size = 5000MB'
jobs:
check:

View file

@ -20,6 +20,7 @@ env:
REGISTRY: ghcr.io
# github.repository as <account>/<repo>
IMAGE_NAME: ${{ github.repository }}
NIX_CONFIG: 'download-buffer-size = 5000MB'
jobs:
build:

2
.gitignore vendored
View file

@ -105,5 +105,3 @@ venv.bak/
/data
/target
filter.surql

View file

@ -23,7 +23,7 @@ Run tests with `nix flake check` or `cargo t`
Remove the cargo cache for buildkit with `docker builder prune --filter type=exec.cachemount`
### View Progress
`docker attach wikidata-to-surrealdb`
`make view`
# License
All code in this repository is dual-licensed under either [License-MIT](./LICENSE-MIT) or [LICENSE-APACHE](./LICENSE-Apache) at your option. This means you can select the license you prefer. [Why dual license](https://github.com/bevyengine/bevy/issues/2373).

2
Cargo.lock generated
View file

@ -3438,6 +3438,7 @@ dependencies = [
"thiserror",
"tokio",
"tokio-tungstenite",
"tokio-util",
"tracing",
"trice",
"url",
@ -3818,6 +3819,7 @@ checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1"
dependencies = [
"bytes 1.7.1",
"futures-core",
"futures-io",
"futures-sink",
"pin-project-lite",
"tokio",

View file

@ -8,7 +8,7 @@ license = "MIT OR Apache-2.0"
anyhow = "1.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
surrealdb-alpha = { version = "2.0.9", features = ["protocol-ws", "kv-mem"] }
surrealdb-alpha = { version = "2.0.9", features = ["protocol-http", "kv-mem"] }
tokio = { version = "1.39", features = ["fs", "time"] }
futures = "0.3"
wikidata = "1.1"

View file

@ -27,7 +27,9 @@ Create data folder next to docker-compose.yml and .env, place data inside, and s
├── data
│ ├── Entity.json
│ ├── latest-all.json.bz2
│ └── surrealdb
│ ├── filter.surql
│ ├── surrealdb
│ └── temp
├── Makefile
├── docker-compose.yml
└── .env
@ -54,7 +56,7 @@ WIKIDATA_DB_PORT=surrealdb:8000
# true=overwrite existing data, false=skip if already exists
OVERWRITE_DB=false
CREATE_VERSION=Bulk
#FILTER_PATH=../filter.surql
#FILTER_PATH=data/filter.surql
```
Env string CREATE_VERSION must be in the enum CREATE_VERSION

View file

@ -1,7 +1,7 @@
use anyhow::{Error, Ok, Result};
use lazy_static::lazy_static;
use std::env;
use surrealdb::{engine::remote::ws::Client, Surreal};
use surrealdb::{engine::remote::http::Client, Surreal};
use tokio::time::{sleep, Duration};
mod utils;
@ -30,6 +30,10 @@ async fn main() -> Result<(), Error> {
let pb = init_progress_bar::create_pb().await;
let reader = File_Format::new(&WIKIDATA_FILE_FORMAT).reader(&WIKIDATA_FILE_NAME)?;
tokio::fs::create_dir_all("data/temp").await?;
tokio::fs::remove_dir_all("data/temp").await?;
tokio::fs::create_dir_all("data/temp").await?;
CREATE_VERSION
.run(
None::<Surreal<Client>>,

View file

@ -1,4 +1,5 @@
use anyhow::{Error, Result};
use core::panic;
use futures::future::join_all;
use indicatif::ProgressBar;
use lazy_static::lazy_static;
@ -21,7 +22,7 @@ lazy_static! {
.parse()
.expect("Failed to parse OVERWRITE_DB");
static ref FILTER_PATH: String =
env::var("FILTER_PATH").unwrap_or("../filter.surql".to_string());
env::var("FILTER_PATH").unwrap_or("data/filter.surql".to_string());
}
pub async fn create_entity(db: &Surreal<impl Connection>, line: &str) -> Result<(), Error> {
@ -108,7 +109,7 @@ impl CreateVersion {
}
}
None => {
let db = match init_db::create_db_ws().await {
let db = match init_db::create_db_remote().await {
Ok(db) => db,
Err(_) => continue,
};
@ -141,6 +142,12 @@ impl CreateVersion {
.create_bulk_filter(db, chunk, pb, batch_size)
.await
.is_ok(),
// CreateVersion::BulkFilter => {
// if let Err(err) = self.create_bulk_filter(db, chunk, pb, batch_size).await {
// panic!("Failed to create entities: {}", err);
// }
// true
// }
}
}
@ -233,7 +240,6 @@ impl CreateVersion {
.collect();
let file_path = format!("data/temp/{}.surql", file_name);
tokio::fs::create_dir_all("data/temp").await?;
db_mem.export(&file_path).await?;
db.import(&file_path).await?;

View file

@ -5,7 +5,7 @@ use std::env;
use surrealdb::{
engine::{
local::{Db, Mem},
remote::ws::{Client, Ws},
remote::http::{Client, Http},
},
opt::auth::Root,
Surreal,
@ -18,8 +18,8 @@ lazy_static! {
env::var("WIKIDATA_DB_PORT").expect("WIKIDATA_DB_PORT not set");
}
pub async fn create_db_ws() -> Result<Surreal<Client>, Error> {
let db = Surreal::new::<Ws>(WIKIDATA_DB_PORT.as_str()).await?;
pub async fn create_db_remote() -> Result<Surreal<Client>, Error> {
let db = Surreal::new::<Http>(WIKIDATA_DB_PORT.as_str()).await?;
db.signin(Root {
username: &DB_USER,

View file

@ -1,7 +1,7 @@
use indicatif::{ProgressBar, ProgressState, ProgressStyle};
pub async fn create_pb() -> ProgressBar {
let total_size = 110_000_000;
let total_size = 112_500_000;
let pb = ProgressBar::new(total_size);
pb.set_style(
ProgressStyle::with_template(

View file

@ -5,4 +5,4 @@ let $entity = return (select id from $delete).id;
let $claims = return (select claims from $delete).claims;
delete $claims;
delete $entity;
delete $entity;