fix: file path, swap from ws to http

This commit is contained in:
Elijah McMorris 2024-08-29 17:59:46 -07:00
parent acef3f8f3b
commit 53e3ffe0dd
Signed by: NexVeridian
SSH key fingerprint: SHA256:bsA1SKZxuEcEVHAy3gY1HUeM5ykRJl0U0kQHQn0hMg8
12 changed files with 29 additions and 15 deletions

View file

@ -13,6 +13,7 @@ on:
env: env:
CARGO_TERM_COLOR: always CARGO_TERM_COLOR: always
NIX_CONFIG: 'download-buffer-size = 5000MB'
jobs: jobs:
check: check:

View file

@ -20,6 +20,7 @@ env:
REGISTRY: ghcr.io REGISTRY: ghcr.io
# github.repository as <account>/<repo> # github.repository as <account>/<repo>
IMAGE_NAME: ${{ github.repository }} IMAGE_NAME: ${{ github.repository }}
NIX_CONFIG: 'download-buffer-size = 5000MB'
jobs: jobs:
build: build:

2
.gitignore vendored
View file

@ -105,5 +105,3 @@ venv.bak/
/data /data
/target /target
filter.surql

View file

@ -23,7 +23,7 @@ Run tests with `nix flake check` or `cargo t`
Remove the cargo cache for buildkit with `docker builder prune --filter type=exec.cachemount` Remove the cargo cache for buildkit with `docker builder prune --filter type=exec.cachemount`
### View Progress ### View Progress
`docker attach wikidata-to-surrealdb` `make view`
# License # License
All code in this repository is dual-licensed under either [License-MIT](./LICENSE-MIT) or [LICENSE-APACHE](./LICENSE-Apache) at your option. This means you can select the license you prefer. [Why dual license](https://github.com/bevyengine/bevy/issues/2373). All code in this repository is dual-licensed under either [License-MIT](./LICENSE-MIT) or [LICENSE-APACHE](./LICENSE-Apache) at your option. This means you can select the license you prefer. [Why dual license](https://github.com/bevyengine/bevy/issues/2373).

2
Cargo.lock generated
View file

@ -3438,6 +3438,7 @@ dependencies = [
"thiserror", "thiserror",
"tokio", "tokio",
"tokio-tungstenite", "tokio-tungstenite",
"tokio-util",
"tracing", "tracing",
"trice", "trice",
"url", "url",
@ -3818,6 +3819,7 @@ checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1"
dependencies = [ dependencies = [
"bytes 1.7.1", "bytes 1.7.1",
"futures-core", "futures-core",
"futures-io",
"futures-sink", "futures-sink",
"pin-project-lite", "pin-project-lite",
"tokio", "tokio",

View file

@ -8,7 +8,7 @@ license = "MIT OR Apache-2.0"
anyhow = "1.0" anyhow = "1.0"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" serde_json = "1.0"
surrealdb-alpha = { version = "2.0.9", features = ["protocol-ws", "kv-mem"] } surrealdb-alpha = { version = "2.0.9", features = ["protocol-http", "kv-mem"] }
tokio = { version = "1.39", features = ["fs", "time"] } tokio = { version = "1.39", features = ["fs", "time"] }
futures = "0.3" futures = "0.3"
wikidata = "1.1" wikidata = "1.1"

View file

@ -27,7 +27,9 @@ Create data folder next to docker-compose.yml and .env, place data inside, and s
├── data ├── data
│ ├── Entity.json │ ├── Entity.json
│ ├── latest-all.json.bz2 │ ├── latest-all.json.bz2
│ └── surrealdb │ ├── filter.surql
│ ├── surrealdb
│ └── temp
├── Makefile ├── Makefile
├── docker-compose.yml ├── docker-compose.yml
└── .env └── .env
@ -54,7 +56,7 @@ WIKIDATA_DB_PORT=surrealdb:8000
# true=overwrite existing data, false=skip if already exists # true=overwrite existing data, false=skip if already exists
OVERWRITE_DB=false OVERWRITE_DB=false
CREATE_VERSION=Bulk CREATE_VERSION=Bulk
#FILTER_PATH=../filter.surql #FILTER_PATH=data/filter.surql
``` ```
Env string CREATE_VERSION must be in the enum CREATE_VERSION Env string CREATE_VERSION must be in the enum CREATE_VERSION

View file

@ -1,7 +1,7 @@
use anyhow::{Error, Ok, Result}; use anyhow::{Error, Ok, Result};
use lazy_static::lazy_static; use lazy_static::lazy_static;
use std::env; use std::env;
use surrealdb::{engine::remote::ws::Client, Surreal}; use surrealdb::{engine::remote::http::Client, Surreal};
use tokio::time::{sleep, Duration}; use tokio::time::{sleep, Duration};
mod utils; mod utils;
@ -30,6 +30,10 @@ async fn main() -> Result<(), Error> {
let pb = init_progress_bar::create_pb().await; let pb = init_progress_bar::create_pb().await;
let reader = File_Format::new(&WIKIDATA_FILE_FORMAT).reader(&WIKIDATA_FILE_NAME)?; let reader = File_Format::new(&WIKIDATA_FILE_FORMAT).reader(&WIKIDATA_FILE_NAME)?;
tokio::fs::create_dir_all("data/temp").await?;
tokio::fs::remove_dir_all("data/temp").await?;
tokio::fs::create_dir_all("data/temp").await?;
CREATE_VERSION CREATE_VERSION
.run( .run(
None::<Surreal<Client>>, None::<Surreal<Client>>,

View file

@ -1,4 +1,5 @@
use anyhow::{Error, Result}; use anyhow::{Error, Result};
use core::panic;
use futures::future::join_all; use futures::future::join_all;
use indicatif::ProgressBar; use indicatif::ProgressBar;
use lazy_static::lazy_static; use lazy_static::lazy_static;
@ -21,7 +22,7 @@ lazy_static! {
.parse() .parse()
.expect("Failed to parse OVERWRITE_DB"); .expect("Failed to parse OVERWRITE_DB");
static ref FILTER_PATH: String = static ref FILTER_PATH: String =
env::var("FILTER_PATH").unwrap_or("../filter.surql".to_string()); env::var("FILTER_PATH").unwrap_or("data/filter.surql".to_string());
} }
pub async fn create_entity(db: &Surreal<impl Connection>, line: &str) -> Result<(), Error> { pub async fn create_entity(db: &Surreal<impl Connection>, line: &str) -> Result<(), Error> {
@ -108,7 +109,7 @@ impl CreateVersion {
} }
} }
None => { None => {
let db = match init_db::create_db_ws().await { let db = match init_db::create_db_remote().await {
Ok(db) => db, Ok(db) => db,
Err(_) => continue, Err(_) => continue,
}; };
@ -141,6 +142,12 @@ impl CreateVersion {
.create_bulk_filter(db, chunk, pb, batch_size) .create_bulk_filter(db, chunk, pb, batch_size)
.await .await
.is_ok(), .is_ok(),
// CreateVersion::BulkFilter => {
// if let Err(err) = self.create_bulk_filter(db, chunk, pb, batch_size).await {
// panic!("Failed to create entities: {}", err);
// }
// true
// }
} }
} }
@ -233,7 +240,6 @@ impl CreateVersion {
.collect(); .collect();
let file_path = format!("data/temp/{}.surql", file_name); let file_path = format!("data/temp/{}.surql", file_name);
tokio::fs::create_dir_all("data/temp").await?;
db_mem.export(&file_path).await?; db_mem.export(&file_path).await?;
db.import(&file_path).await?; db.import(&file_path).await?;

View file

@ -5,7 +5,7 @@ use std::env;
use surrealdb::{ use surrealdb::{
engine::{ engine::{
local::{Db, Mem}, local::{Db, Mem},
remote::ws::{Client, Ws}, remote::http::{Client, Http},
}, },
opt::auth::Root, opt::auth::Root,
Surreal, Surreal,
@ -18,8 +18,8 @@ lazy_static! {
env::var("WIKIDATA_DB_PORT").expect("WIKIDATA_DB_PORT not set"); env::var("WIKIDATA_DB_PORT").expect("WIKIDATA_DB_PORT not set");
} }
pub async fn create_db_ws() -> Result<Surreal<Client>, Error> { pub async fn create_db_remote() -> Result<Surreal<Client>, Error> {
let db = Surreal::new::<Ws>(WIKIDATA_DB_PORT.as_str()).await?; let db = Surreal::new::<Http>(WIKIDATA_DB_PORT.as_str()).await?;
db.signin(Root { db.signin(Root {
username: &DB_USER, username: &DB_USER,

View file

@ -1,7 +1,7 @@
use indicatif::{ProgressBar, ProgressState, ProgressStyle}; use indicatif::{ProgressBar, ProgressState, ProgressStyle};
pub async fn create_pb() -> ProgressBar { pub async fn create_pb() -> ProgressBar {
let total_size = 110_000_000; let total_size = 112_500_000;
let pb = ProgressBar::new(total_size); let pb = ProgressBar::new(total_size);
pb.set_style( pb.set_style(
ProgressStyle::with_template( ProgressStyle::with_template(

View file

@ -5,4 +5,4 @@ let $entity = return (select id from $delete).id;
let $claims = return (select claims from $delete).claims; let $claims = return (select claims from $delete).claims;
delete $claims; delete $claims;
delete $entity; delete $entity;