mirror of
https://github.com/NexVeridian/wikidata-to-surrealdb.git
synced 2025-09-02 09:59:13 +00:00
99 lines
2.9 KiB
Rust
99 lines
2.9 KiB
Rust
use anyhow::{Error, Result};
|
|
use serde::{Deserialize, Serialize};
|
|
use serde_json::from_reader;
|
|
use serde_json::Value;
|
|
use std::fs::File;
|
|
use wikidata::ClaimValueData;
|
|
use wikidata::{ClaimValue, Entity, Lang, Pid, WikiId};
|
|
|
|
pub async fn get_entity(path: &str) -> Result<Entity, Error> {
|
|
// From here - https://www.wikidata.org/wiki/Special:EntityData/P1476.json
|
|
let mut file = File::open(path)?;
|
|
let json: Value = from_reader(&mut file)?;
|
|
let data = Entity::from_json(json).expect("Failed to parse JSON");
|
|
Ok(data)
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
|
pub struct Id {
|
|
pub entity_type: String,
|
|
pub id: u64,
|
|
}
|
|
|
|
impl Id {
|
|
pub fn to_string(&self) -> (String, String) {
|
|
(self.entity_type.clone(), self.id.to_string())
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
|
pub struct EntityMini {
|
|
// In English
|
|
pub label: String,
|
|
pub claims: Vec<(Id, ClaimValueData)>,
|
|
pub description: String,
|
|
}
|
|
|
|
impl EntityMini {
|
|
pub fn from_entity(entity: Entity) -> (Id, Self) {
|
|
(
|
|
get_id(&entity),
|
|
Self {
|
|
label: get_name(&entity),
|
|
claims: Self::flatten_claims(entity.claims.clone()),
|
|
description: get_description(&entity).unwrap_or("".to_string()),
|
|
},
|
|
)
|
|
}
|
|
|
|
fn flatten_claims(claims: Vec<(Pid, ClaimValue)>) -> Vec<(Id, ClaimValueData)> {
|
|
claims
|
|
.iter()
|
|
.flat_map(|(pid, claim_value)| {
|
|
let mut flattened = vec![(
|
|
Id {
|
|
id: pid.0,
|
|
entity_type: "Property".to_string(),
|
|
},
|
|
claim_value.data.clone(),
|
|
)];
|
|
|
|
flattened.extend(claim_value.qualifiers.iter().map(
|
|
|(qualifier_pid, qualifier_value)| {
|
|
(
|
|
Id {
|
|
id: qualifier_pid.0,
|
|
entity_type: "Property".to_string(),
|
|
},
|
|
qualifier_value.clone(),
|
|
)
|
|
},
|
|
));
|
|
flattened
|
|
})
|
|
.collect()
|
|
}
|
|
}
|
|
|
|
fn get_id(entity: &Entity) -> Id {
|
|
let (id, entity_type) = match entity.id {
|
|
WikiId::EntityId(qid) => (qid.0, "Entity".to_string()),
|
|
WikiId::PropertyId(pid) => (pid.0, "Property".to_string()),
|
|
WikiId::LexemeId(lid) => (lid.0, "Lexeme".to_string()),
|
|
_ => todo!("Not implemented"),
|
|
};
|
|
|
|
Id { id, entity_type }
|
|
}
|
|
|
|
fn get_name(entity: &Entity) -> String {
|
|
entity
|
|
.labels
|
|
.get(&Lang("en".to_string()))
|
|
.expect("No label found")
|
|
.to_string()
|
|
}
|
|
|
|
fn get_description(entity: &Entity) -> Option<String> {
|
|
entity.descriptions.get(&Lang("en".to_string())).cloned()
|
|
}
|