mirror of
https://github.com/NexVeridian/wikidata-to-surrealdb.git
synced 2025-09-02 09:59:13 +00:00
move claims to claims table
This commit is contained in:
parent
f0716081fe
commit
60cbb47a10
2 changed files with 59 additions and 40 deletions
15
src/main.rs
15
src/main.rs
|
@ -1,15 +1,13 @@
|
||||||
use anyhow::{Error, Result};
|
use anyhow::{Error, Ok, Result};
|
||||||
use dotenv_codegen::dotenv;
|
use dotenv_codegen::dotenv;
|
||||||
use serde_json::{from_str, Value};
|
use serde_json::{from_str, Value};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{BufRead, BufReader};
|
use std::io::{BufRead, BufReader};
|
||||||
use surrealdb::engine::remote::ws::Ws;
|
use surrealdb::{engine::remote::ws::Ws, opt::auth::Root, Surreal};
|
||||||
use surrealdb::opt::auth::Root;
|
use wikidata::Entity;
|
||||||
use surrealdb::Surreal;
|
|
||||||
|
|
||||||
mod utils;
|
mod utils;
|
||||||
use utils::*;
|
use utils::*;
|
||||||
use wikidata::Entity;
|
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<(), Error> {
|
async fn main() -> Result<(), Error> {
|
||||||
|
@ -23,7 +21,7 @@ async fn main() -> Result<(), Error> {
|
||||||
|
|
||||||
db.use_ns("wikidata").use_db("wikidata").await?;
|
db.use_ns("wikidata").use_db("wikidata").await?;
|
||||||
|
|
||||||
let file = File::open("data/w.json")?;
|
let file = File::open("data/ex2.json")?;
|
||||||
let reader = BufReader::new(file);
|
let reader = BufReader::new(file);
|
||||||
|
|
||||||
for line in reader.lines() {
|
for line in reader.lines() {
|
||||||
|
@ -35,10 +33,13 @@ async fn main() -> Result<(), Error> {
|
||||||
let json: Value = from_str(&line)?;
|
let json: Value = from_str(&line)?;
|
||||||
let data = Entity::from_json(json).expect("Failed to parse JSON");
|
let data = Entity::from_json(json).expect("Failed to parse JSON");
|
||||||
|
|
||||||
let (id, data) = EntityMini::from_entity(data);
|
let (id, claims, data) = EntityMini::from_entity(data);
|
||||||
|
|
||||||
let _: Option<EntityMini> = db.delete(&id).await?;
|
let _: Option<EntityMini> = db.delete(&id).await?;
|
||||||
let _: Option<EntityMini> = db.create(&id).content(data.clone()).await?;
|
let _: Option<EntityMini> = db.create(&id).content(data.clone()).await?;
|
||||||
|
|
||||||
|
let _: Option<Claims> = db.delete(&claims.0).await?;
|
||||||
|
let _: Option<Claims> = db.create(&claims.0).content(claims.1).await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
40
src/utils.rs
40
src/utils.rs
|
@ -1,30 +1,44 @@
|
||||||
|
use dotenv_codegen::dotenv;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use surrealdb::sql::Thing;
|
use surrealdb::sql::Thing;
|
||||||
use wikidata::ClaimValueData;
|
use wikidata::{ClaimValue, ClaimValueData, Entity, Lang, Pid, WikiId};
|
||||||
use wikidata::{ClaimValue, Entity, Lang, Pid, WikiId};
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
|
pub struct Claims {
|
||||||
|
pub claims: Vec<(Thing, ClaimValueData)>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct EntityMini {
|
pub struct EntityMini {
|
||||||
// In English
|
|
||||||
pub label: String,
|
pub label: String,
|
||||||
pub claims: Vec<(Thing, ClaimValueData)>,
|
pub claims: Thing,
|
||||||
pub description: String,
|
pub description: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EntityMini {
|
impl EntityMini {
|
||||||
pub fn from_entity(entity: Entity) -> (Thing, Self) {
|
pub fn from_entity(entity: Entity) -> (Thing, (Thing, Claims), Self) {
|
||||||
|
let thing_claim = Thing {
|
||||||
|
id: get_id(&entity).id,
|
||||||
|
tb: "Claims".to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
(
|
(
|
||||||
get_id(&entity),
|
get_id(&entity),
|
||||||
|
(
|
||||||
|
thing_claim.clone(),
|
||||||
|
Self::flatten_claims(entity.claims.clone()),
|
||||||
|
),
|
||||||
Self {
|
Self {
|
||||||
label: get_name(&entity),
|
label: get_name(&entity),
|
||||||
claims: Self::flatten_claims(entity.claims.clone()),
|
claims: thing_claim,
|
||||||
description: get_description(&entity).unwrap_or("".to_string()),
|
description: get_description(&entity).unwrap_or("".to_string()),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn flatten_claims(claims: Vec<(Pid, ClaimValue)>) -> Vec<(Thing, ClaimValueData)> {
|
fn flatten_claims(claims: Vec<(Pid, ClaimValue)>) -> Claims {
|
||||||
claims
|
Claims {
|
||||||
|
claims: claims
|
||||||
.iter()
|
.iter()
|
||||||
.flat_map(|(pid, claim_value)| {
|
.flat_map(|(pid, claim_value)| {
|
||||||
let mut flattened = vec![(
|
let mut flattened = vec![(
|
||||||
|
@ -48,7 +62,8 @@ impl EntityMini {
|
||||||
));
|
));
|
||||||
flattened
|
flattened
|
||||||
})
|
})
|
||||||
.collect()
|
.collect(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,11 +81,14 @@ fn get_id(entity: &Entity) -> Thing {
|
||||||
fn get_name(entity: &Entity) -> String {
|
fn get_name(entity: &Entity) -> String {
|
||||||
entity
|
entity
|
||||||
.labels
|
.labels
|
||||||
.get(&Lang("en".to_string()))
|
.get(&Lang(dotenv!("WIKIDATA_LANG").to_string()))
|
||||||
.expect("No label found")
|
.expect("No label found")
|
||||||
.to_string()
|
.to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_description(entity: &Entity) -> Option<String> {
|
fn get_description(entity: &Entity) -> Option<String> {
|
||||||
entity.descriptions.get(&Lang("en".to_string())).cloned()
|
entity
|
||||||
|
.descriptions
|
||||||
|
.get(&Lang(dotenv!("WIKIDATA_LANG").to_string()))
|
||||||
|
.cloned()
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue