mirror of
https://github.com/NexVeridian/wikidata-to-surrealdb.git
synced 2025-09-02 09:59:13 +00:00
fix!: remove OVERWRITE_DB, swap to querry
This commit is contained in:
parent
d9803555d1
commit
8df3ab1d74
5 changed files with 28 additions and 61 deletions
20
Cargo.lock
generated
20
Cargo.lock
generated
|
@ -216,9 +216,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-graphql"
|
name = "async-graphql"
|
||||||
version = "7.0.10"
|
version = "7.0.11"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a19415d9541f1758f39bdf0c732848beb7e2e39df9b32f90c6635882c3f9173a"
|
checksum = "0ba6d24703c5adc5ba9116901b92ee4e4c0643c01a56c4fd303f3818638d7449"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-graphql-derive",
|
"async-graphql-derive",
|
||||||
"async-graphql-parser",
|
"async-graphql-parser",
|
||||||
|
@ -247,9 +247,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-graphql-derive"
|
name = "async-graphql-derive"
|
||||||
version = "7.0.10"
|
version = "7.0.11"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "73a85254454f63ae1e5a475afff931465f11bf76d19fb5bb1b1d0d6a2f2b8db0"
|
checksum = "a94c2d176893486bd37cd1b6defadd999f7357bf5804e92f510c08bcf16c538f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"Inflector",
|
"Inflector",
|
||||||
"async-graphql-parser",
|
"async-graphql-parser",
|
||||||
|
@ -264,9 +264,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-graphql-parser"
|
name = "async-graphql-parser"
|
||||||
version = "7.0.10"
|
version = "7.0.11"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2e94b202e404d18429c8482d61f64cb0a8639fd1e7c2caf2b258f035e0b7caff"
|
checksum = "79272bdbf26af97866e149f05b2b546edb5c00e51b5f916289931ed233e208ad"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-graphql-value",
|
"async-graphql-value",
|
||||||
"pest",
|
"pest",
|
||||||
|
@ -276,9 +276,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-graphql-value"
|
name = "async-graphql-value"
|
||||||
version = "7.0.10"
|
version = "7.0.11"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a43a7bbb0ddea47c6f51913eba6e17a093b34e000588a93bb80a978ad129f3e9"
|
checksum = "ef5ec94176a12a8cbe985cd73f2e54dc9c702c88c766bdef12f1f3a67cedbee1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytes 1.7.2",
|
"bytes 1.7.2",
|
||||||
"indexmap 2.5.0",
|
"indexmap 2.5.0",
|
||||||
|
@ -2752,9 +2752,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "reblessive"
|
name = "reblessive"
|
||||||
version = "0.4.0"
|
version = "0.4.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "568fde39e6aec674be99c9dd38b4c79040faf31038bd5a41ab1908db00c2319b"
|
checksum = "1d4f118ca848dfd632a8c0883f9aacd6b58da548eb0629a78cafee3d330938da"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "redox_syscall"
|
name = "redox_syscall"
|
||||||
|
|
|
@ -53,8 +53,6 @@ WIKIDATA_FILE_FORMAT=bz2
|
||||||
WIKIDATA_FILE_NAME=data/latest-all.json.bz2
|
WIKIDATA_FILE_NAME=data/latest-all.json.bz2
|
||||||
# If not using docker file for Wikidata to SurrealDB, use 0.0.0.0:8000
|
# If not using docker file for Wikidata to SurrealDB, use 0.0.0.0:8000
|
||||||
WIKIDATA_DB_PORT=surrealdb:8000
|
WIKIDATA_DB_PORT=surrealdb:8000
|
||||||
# true=overwrite existing data, false=skip if already exists
|
|
||||||
OVERWRITE_DB=false
|
|
||||||
CREATE_VERSION=Bulk
|
CREATE_VERSION=Bulk
|
||||||
#FILTER_PATH=data/filter.surql
|
#FILTER_PATH=data/filter.surql
|
||||||
```
|
```
|
||||||
|
|
18
flake.lock
generated
18
flake.lock
generated
|
@ -3,11 +3,11 @@
|
||||||
"advisory-db": {
|
"advisory-db": {
|
||||||
"flake": false,
|
"flake": false,
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1727306865,
|
"lastModified": 1727353582,
|
||||||
"narHash": "sha256-okbcHbpPc9wDVrppp8kQDOWd+PBwlHNkbiCLQ/GArvM=",
|
"narHash": "sha256-2csMEEOZhvowVKZNBHk1kMJqk72ZMrPj9LQYCzP6EKs=",
|
||||||
"owner": "rustsec",
|
"owner": "rustsec",
|
||||||
"repo": "advisory-db",
|
"repo": "advisory-db",
|
||||||
"rev": "6960f548f3abd498f948105b627d33065e23e37c",
|
"rev": "cb905e6e405834bdff1eb1e20c9b10edb5403889",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
@ -39,11 +39,11 @@
|
||||||
"rust-analyzer-src": []
|
"rust-analyzer-src": []
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1727245890,
|
"lastModified": 1727332394,
|
||||||
"narHash": "sha256-B4gUhZxqdn24PqL7z7ZuvLOS84HVskhKRByWdgA4/RI=",
|
"narHash": "sha256-dBYQD4DPxu/hBndSbfMA5HhHrVnrxrW9Ju8R3augGzw=",
|
||||||
"owner": "nix-community",
|
"owner": "nix-community",
|
||||||
"repo": "fenix",
|
"repo": "fenix",
|
||||||
"rev": "de3acda8b67b92abeeb35ac236924afd959874ad",
|
"rev": "60a35a47e8ae3721efaae2229ec6037e3fde2d17",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
@ -72,11 +72,11 @@
|
||||||
},
|
},
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1727173215,
|
"lastModified": 1727296349,
|
||||||
"narHash": "sha256-OtMlWYCqBDbnEsByoows785Gem9CSMiXYEBiKKtStk4=",
|
"narHash": "sha256-C3SRU3GMDNII9l16o4+nkybuxaDX4x5TBypwmmUBCo0=",
|
||||||
"owner": "NixOS",
|
"owner": "NixOS",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"rev": "965289e5e07243f1cde3212d8bcaf726d36c5c46",
|
"rev": "fe866c653c24adf1520628236d4e70bbb2fdd949",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|
48
src/utils.rs
48
src/utils.rs
|
@ -17,20 +17,8 @@ pub mod init_reader;
|
||||||
mod tables;
|
mod tables;
|
||||||
use tables::*;
|
use tables::*;
|
||||||
|
|
||||||
static OVERWRITE_DB: OnceCell<bool> = OnceCell::const_new();
|
|
||||||
static FILTER_PATH: OnceCell<String> = OnceCell::const_new();
|
static FILTER_PATH: OnceCell<String> = OnceCell::const_new();
|
||||||
|
|
||||||
async fn get_overwrite_db() -> bool {
|
|
||||||
*OVERWRITE_DB
|
|
||||||
.get_or_init(|| async {
|
|
||||||
env::var("OVERWRITE_DB")
|
|
||||||
.expect("OVERWRITE_DB not set")
|
|
||||||
.parse::<bool>()
|
|
||||||
.expect("Failed to parse OVERWRITE_DB")
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn get_filter_path() -> &'static String {
|
async fn get_filter_path() -> &'static String {
|
||||||
FILTER_PATH
|
FILTER_PATH
|
||||||
.get_or_init(|| async {
|
.get_or_init(|| async {
|
||||||
|
@ -160,33 +148,15 @@ impl CreateVersion {
|
||||||
claims_vec.push(claims);
|
claims_vec.push(claims);
|
||||||
}
|
}
|
||||||
|
|
||||||
if get_overwrite_db().await {
|
db.query("INSERT INTO Entity $entity_vec RETURN NONE;")
|
||||||
db.upsert::<Vec<EntityMini>>("Entity")
|
.bind(("entity_vec", entity_vec))
|
||||||
.content(entity_vec)
|
.query("INSERT INTO Claims $claims_vec RETURN NONE;")
|
||||||
.await?;
|
.bind(("claims_vec", claims_vec))
|
||||||
db.upsert::<Vec<Claims>>("Claims")
|
.query("INSERT INTO Property $property_vec RETURN NONE;")
|
||||||
.content(claims_vec)
|
.bind(("property_vec", property_vec))
|
||||||
.await?;
|
.query("INSERT INTO Lexeme $lexeme_vec RETURN NONE;")
|
||||||
db.upsert::<Vec<EntityMini>>("Property")
|
.bind(("lexeme_vec", lexeme_vec))
|
||||||
.content(property_vec)
|
.await?;
|
||||||
.await?;
|
|
||||||
db.upsert::<Vec<EntityMini>>("Lexeme")
|
|
||||||
.content(lexeme_vec)
|
|
||||||
.await?;
|
|
||||||
} else {
|
|
||||||
db.insert::<Vec<EntityMini>>("Entity")
|
|
||||||
.content(entity_vec)
|
|
||||||
.await?;
|
|
||||||
db.insert::<Vec<Claims>>("Claims")
|
|
||||||
.content(claims_vec)
|
|
||||||
.await?;
|
|
||||||
db.insert::<Vec<EntityMini>>("Property")
|
|
||||||
.content(property_vec)
|
|
||||||
.await?;
|
|
||||||
db.insert::<Vec<EntityMini>>("Lexeme")
|
|
||||||
.content(lexeme_vec)
|
|
||||||
.await?;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(ref p) = pb {
|
if let Some(ref p) = pb {
|
||||||
p.inc(batch_size as u64)
|
p.inc(batch_size as u64)
|
||||||
|
|
|
@ -8,7 +8,6 @@ use wikidata_to_surrealdb::utils::*;
|
||||||
|
|
||||||
async fn inti_db() -> Result<Surreal<Db>, Error> {
|
async fn inti_db() -> Result<Surreal<Db>, Error> {
|
||||||
env::set_var("WIKIDATA_LANG", "en");
|
env::set_var("WIKIDATA_LANG", "en");
|
||||||
env::set_var("OVERWRITE_DB", "false");
|
|
||||||
|
|
||||||
let db = init_db::create_db_mem().await?;
|
let db = init_db::create_db_mem().await?;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue