mirror of
https://github.com/NexVeridian/wikidata-to-surrealdb.git
synced 2025-09-02 09:59:13 +00:00
flat
This commit is contained in:
parent
47e4dd6b8d
commit
623f1f1d89
2 changed files with 29 additions and 4 deletions
|
@ -3,7 +3,6 @@ use anyhow::{Error, Result};
|
||||||
use dotenv_codegen::dotenv;
|
use dotenv_codegen::dotenv;
|
||||||
use surrealdb::engine::remote::ws::Ws;
|
use surrealdb::engine::remote::ws::Ws;
|
||||||
use surrealdb::opt::auth::Root;
|
use surrealdb::opt::auth::Root;
|
||||||
use surrealdb::sql::Thing;
|
|
||||||
use surrealdb::Surreal;
|
use surrealdb::Surreal;
|
||||||
|
|
||||||
mod utils;
|
mod utils;
|
||||||
|
@ -11,7 +10,7 @@ use utils::*;
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<(), Error> {
|
async fn main() -> Result<(), Error> {
|
||||||
let data = get_entity("data/wiki.json").await?;
|
let data = get_entity("data/e.json").await?;
|
||||||
let (id, data) = EntityMini::from_entity(data);
|
let (id, data) = EntityMini::from_entity(data);
|
||||||
|
|
||||||
let db = Surreal::new::<Ws>("0.0.0.0:8000").await?;
|
let db = Surreal::new::<Ws>("0.0.0.0:8000").await?;
|
||||||
|
|
30
src/utils.rs
30
src/utils.rs
|
@ -3,9 +3,11 @@ use serde::{Deserialize, Serialize};
|
||||||
use serde_json::from_reader;
|
use serde_json::from_reader;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
use wikidata::ClaimValueData;
|
||||||
use wikidata::{ClaimValue, Entity, Lang, Pid, WikiId};
|
use wikidata::{ClaimValue, Entity, Lang, Pid, WikiId};
|
||||||
|
|
||||||
pub async fn get_entity(path: &str) -> Result<Entity, Error> {
|
pub async fn get_entity(path: &str) -> Result<Entity, Error> {
|
||||||
|
// From here - https://www.wikidata.org/wiki/Special:EntityData/P1476.json
|
||||||
let mut file = File::open(path)?;
|
let mut file = File::open(path)?;
|
||||||
let json: Value = from_reader(&mut file)?;
|
let json: Value = from_reader(&mut file)?;
|
||||||
let data = Entity::from_json(json).expect("Failed to parse JSON");
|
let data = Entity::from_json(json).expect("Failed to parse JSON");
|
||||||
|
@ -28,7 +30,7 @@ impl Id {
|
||||||
pub struct EntityMini {
|
pub struct EntityMini {
|
||||||
// In English
|
// In English
|
||||||
pub label: String,
|
pub label: String,
|
||||||
pub claims: Vec<(Pid, ClaimValue)>,
|
pub claims: Vec<(Id, ClaimValueData)>,
|
||||||
pub description: String,
|
pub description: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,11 +40,35 @@ impl EntityMini {
|
||||||
get_id(&entity),
|
get_id(&entity),
|
||||||
Self {
|
Self {
|
||||||
label: get_name(&entity),
|
label: get_name(&entity),
|
||||||
claims: entity.claims.clone(),
|
claims: Self::flatten_claims(entity.claims.clone()),
|
||||||
description: get_description(&entity).unwrap_or("".to_string()),
|
description: get_description(&entity).unwrap_or("".to_string()),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
fn flatten_claims(claims: Vec<(Pid, ClaimValue)>) -> Vec<(Id, ClaimValueData)> {
|
||||||
|
claims
|
||||||
|
.iter()
|
||||||
|
.flat_map(|(pid, claim_value)| {
|
||||||
|
let mut flattened = vec![(
|
||||||
|
Id {
|
||||||
|
id: pid.0,
|
||||||
|
entity_type: "Property".to_string(),
|
||||||
|
},
|
||||||
|
claim_value.data.clone(),
|
||||||
|
)];
|
||||||
|
for (qualifier_pid, qualifier_value) in &claim_value.qualifiers {
|
||||||
|
flattened.push((
|
||||||
|
Id {
|
||||||
|
id: qualifier_pid.0,
|
||||||
|
entity_type: "Property".to_string(),
|
||||||
|
},
|
||||||
|
qualifier_value.clone(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
flattened
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_id(entity: &Entity) -> Id {
|
fn get_id(entity: &Entity) -> Id {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue