wikidata/src/entity.rs

1023 lines
38 KiB
Rust
Executable file

use std::{collections::BTreeMap, str::FromStr};
use crate::ids::{Fid, Lid, Pid, Qid, Sid, WikiId, consts};
use crate::text::{Lang, Text};
use chrono::{DateTime, TimeZone, Utc};
use serde::{Deserialize, Serialize};
use serde_json::Value;
/// A Wikibase entity: this could be an entity, property, or lexeme.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Entity {
/// Unique identifier
pub id: WikiId,
/// All of the claims on the entity.
pub claims: Vec<(Pid, ClaimValue)>,
/// The type of the entity.
pub entity_type: EntityType,
/// All of the descriptions in all known languages.
pub descriptions: BTreeMap<Lang, String>,
/// All of the labels in all known languages.
pub labels: BTreeMap<Lang, String>,
/// Known aliases of the item.
pub aliases: BTreeMap<Lang, Vec<String>>,
/// site links (e.g. to wikipedia, wikivoyage, ...)
pub sitelinks: BTreeMap<SiteName, SitelinkValue>,
}
/// The type of entity: normal entity with a Qid, a property with a Pid, or a lexeme with a Lid.
///
/// `EntitySchemas` (with E IDs) are currently unsupported.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum EntityType {
/// An entity with a Qid.
Entity,
/// An entity with a Pid.
Property,
/// An entity with a Lid.
Lexeme,
}
/// Data relating to a claim value.
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
pub enum ClaimValueData {
/// The ID of a file on Wikimedia Commons.
CommonsMedia(String),
/// Coordinates on some globe.
GlobeCoordinate {
/// Latitude.
lat: f64,
/// Longitude.
lon: f64,
/// How many degrees of distance of precision there are.
precision: f64,
/// The globe the coordnaties are on, usually [Earth](consts::EARTH).
globe: Qid,
},
/// A Wikidata item.
Item(Qid),
/// A Wikidata property.
Property(Pid),
/// A language-less string of text.
String(String),
/// Text with a language.
MonolingualText(Text),
/// The same text, translated across multiple languages.
MultilingualText(Vec<Text>),
/// An external identifier.
ExternalID(String),
/// Some numeric quantity of something.
Quantity {
/// How much.
amount: f64, // technically it could exceed the bound, but meh
/// The lowest possible value. If this isn't present then it is exactly the amount.
lower_bound: Option<f64>,
/// The highest possible value. If this isn't present then it is exactly the amount.
upper_bound: Option<f64>,
/// The units used.
unit: Option<Qid>, // *could* be any IRI but in practice almost all are Wikidata entity IRIs
},
/// A point in time time.
DateTime {
/// The time as a Chrono `DateTime`.
date_time: DateTime<chrono::offset::Utc>,
/// The precision of the date:
///
/// | precision | time |
/// | --------- | ---- |
/// | `0` | 1 billion years |
/// | `1` | 100 million years |
/// | `2` | 10 million years |
/// | `3` | 1 million years |
/// | `4` | 100k years |
/// | `5` | 10k years |
/// | `6` | 1000 years |
/// | `7` | 100 years |
/// | `8` | decade |
/// | `9` | year |
/// | `10` | month |
/// | `11` | day |
/// | `12` | hour (deprecated) |
/// | `13` | minute (deprecated) |
/// | `14` | second (deprecated) |
precision: u8,
},
/// A URL.
Url(String),
/// A LaTeX math expression.
MathExpr(String),
/// A geometric shape. The value of the string is currently unspecified.
GeoShape(String),
/// `LilyPond` musical notation.
MusicNotation(String),
/// ID of a file with tabular data on Wikimedia commons.
TabularData(String),
/// A lexeme ID on Wikidata.
Lexeme(Lid),
/// A form ID on Wikidata.
Form(Fid),
/// A sense ID on Wikidata.
Sense(Sid),
/// No value.
#[default]
NoValue,
/// Unknown value.
UnknownValue,
}
/// A statement rank.
#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Default, Serialize, Deserialize)]
pub enum Rank {
/// The deprecated rank, indicating outdated/wrong info. Deprecated claims should usually be
/// ignored.
Deprecated,
/// Normal rank, the default.
#[default]
Normal,
/// Preferred rank, indicates the claim is most recent or accurate.
Preferred,
}
impl FromStr for Rank {
type Err = EntityError;
fn from_str(x: &str) -> Result<Self, Self::Err> {
match x {
"normal" => Ok(Self::Normal),
"deprecated" => Ok(Self::Deprecated),
"preferred" => Ok(Self::Preferred),
_ => Err(EntityError::UnknownRank),
}
}
}
/// A group of claims that make up a single reference.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ReferenceGroup {
/// All of the claims.
pub claims: Vec<(Pid, ClaimValueData)>,
/// The hash associated with the reference group.
pub hash: String,
}
/// A claim value.
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
pub struct ClaimValue {
/// The data of the claim.
pub data: ClaimValueData,
/// The rank of this claim.
pub rank: Rank,
/// The globally unique claim ID.
pub id: String,
/// All of the qualifiers for this claim.
pub qualifiers: Vec<(Pid, ClaimValueData)>,
/// All of the groups of references for this claim.
pub references: Vec<ReferenceGroup>,
}
/// A site name, as used in the sitelinks.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub struct SiteName(pub String);
/// A sitelink value.
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
pub struct SitelinkValue {
/// The title of the site link.
pub title: String,
/// The badges of the article.
pub badges: Vec<Qid>,
/// The url of the article, if present
pub url: Option<String>,
}
impl Entity {
/// All of the values of "instance of" on the entity.
#[must_use]
pub fn instances(&self) -> Vec<Qid> {
let mut instances = Vec::with_capacity(1);
for (pid, claim) in &self.claims {
if *pid == consts::INSTANCE_OF
&& let ClaimValueData::Item(qid) = claim.data
{
instances.push(qid);
}
}
instances.shrink_to_fit();
instances
}
/// When the entity started existing.
#[must_use]
pub fn start_time(&self) -> Option<DateTime<chrono::offset::Utc>> {
for (pid, claim) in &self.claims {
if *pid == consts::DATE_OF_BIRTH
&& let ClaimValueData::DateTime { date_time, .. } = claim.data
{
return Some(date_time);
}
}
None
}
/// When the entity stopped existing.
#[must_use]
pub fn end_time(&self) -> Option<DateTime<chrono::offset::Utc>> {
for (pid, claim) in &self.claims {
if *pid == consts::DATE_OF_DEATH
&& let ClaimValueData::DateTime { date_time, .. } = claim.data
{
return Some(date_time);
}
}
None
}
/// Construct an entity from the Wikibase JSON repersentation. The input can either be an
/// object directly containing the Wikibase entity representation, or a multi-entity object
/// returned by some endpoints such as `Special:EntityData`. Multi-entity objects must only
/// contain one entity.
///
/// # Errors
/// If the JSON reperesntation can't be parsed to an `Entity`, an `EntityError` will be returned.
pub fn from_json(mut json: Value) -> Result<Self, EntityError> {
let mut json = match json.get_mut("entities") {
Some(ents) => {
let obj = ents.as_object_mut().ok_or(EntityError::ExpectedObject)?;
match obj.len() {
0 => return Err(EntityError::NoEntities),
1 => obj
.iter_mut()
.next()
.ok_or(EntityError::ExpectedObject)?
.1
.take(),
_ => return Err(EntityError::MultipleEntities),
}
}
None => json,
};
let raw_id: &str = json
.get_mut("id")
.ok_or(EntityError::ExpectedObject)?
.as_str()
.ok_or(EntityError::ExpectedKeyvalTextString)?;
let id: WikiId = match WikiId::from_str(raw_id) {
Ok(id) => id,
_ => return Err(EntityError::NoId),
};
macro_rules! text_keyval {
($key:literal) => {{
match json.get($key) {
Some(json_map) => {
let json_map = json_map.as_object().ok_or(EntityError::ExpectedObject)?;
let mut map = BTreeMap::new();
for (key, val) in json_map {
map.insert(
Lang(key.to_string()),
val.as_object()
.ok_or(EntityError::ExpectedObject)?
.get("value")
.ok_or(EntityError::ExpectedLangString)?
.as_str()
.ok_or(EntityError::ExpectedKeyvalTextString)?
.to_string(),
);
}
map
}
None => BTreeMap::new(),
}
}};
}
let labels = text_keyval!("labels");
let descriptions = text_keyval!("descriptions");
let aliases = match json.get("aliases") {
Some(json_map) => {
let json_map = json_map.as_object().ok_or(EntityError::ExpectedObject)?;
let mut map = BTreeMap::new();
for (key, val) in json_map {
map.insert(
Lang(key.to_string()),
val.as_array()
.ok_or(EntityError::ExpectedAliasArray)?
.iter()
.filter_map(|val| {
Some(
val.get("value")
.ok_or(EntityError::ExpectedTextValue)
.ok()?
.as_str()
.ok_or(EntityError::ExpectedAliasString)
.ok()?
.to_string(),
)
})
.collect(),
);
}
map
}
None => BTreeMap::new(),
};
let sitelinks = match json.get("sitelinks") {
Some(json_map) => {
let json_map = json_map.as_object().ok_or(EntityError::ExpectedObject)?;
let mut map = BTreeMap::new();
for (key, val) in json_map {
let obj = val.as_object().ok_or(EntityError::ExpectedObject)?;
map.insert(
SiteName(key.to_string()),
SitelinkValue {
title: obj
.get("title")
.ok_or(EntityError::ExpectedSiteTitleString)?
.as_str()
.ok_or(EntityError::ExpectedKeyvalTextString)?
.to_string(),
badges: obj
.get("badges")
.ok_or(EntityError::ExpectedSiteBadgesArray)?
.as_array()
.ok_or(EntityError::ExpectedSiteBadgesArray)?
.iter()
.filter_map(|val| {
let raw_id = val
.as_str()
.ok_or(EntityError::ExpectedKeyvalTextString)
.ok()?;
Qid::from_str(raw_id).ok()
})
.collect(),
url: obj.get("url").map(std::string::ToString::to_string),
},
);
}
map
}
None => BTreeMap::new(),
};
let entity_type = match &json.get("type").ok_or(EntityError::NoEntityType)?.as_str() {
Some("item") => EntityType::Entity,
Some("property") => EntityType::Property,
Some("lexeme") => EntityType::Lexeme,
_ => return Err(EntityError::NoEntityType),
};
let mut claims = Vec::new();
for (pid, claim_list) in json
.get_mut("claims")
.ok_or(EntityError::NoClaims)?
.as_object_mut()
.ok_or(EntityError::ExpectedObject)?
{
let pid = Pid::from_str(pid).map_err(|_| EntityError::BadId)?;
for claim in claim_list
.as_array_mut()
.ok_or(EntityError::ExpectedClaimArray)?
.iter_mut()
{
let references =
if let Some(ref_groups) = claim.get("references").and_then(Value::as_array) {
let mut references = Vec::with_capacity(ref_groups.len());
for group in ref_groups {
let snaks = group
.get("snaks")
.ok_or(EntityError::NoReferenceSnaks)?
.as_object()
.ok_or(EntityError::ExpectedObject)?;
let mut claims = Vec::with_capacity(snaks.len());
for pid in group
.get("snaks-order")
.and_then(Value::as_array)
.ok_or(EntityError::NoSnakOrder)?
{
let pid = pid.as_str().ok_or(EntityError::ExpectedPidString)?;
for subsnak in snaks
.get(pid)
.ok_or(EntityError::SnaksOrderIncludesNonSnak)?
.as_array()
.ok_or(EntityError::ExpectedReferenceArray)?
{
claims.push((
Pid::from_str(pid).map_err(|_| EntityError::BadId)?,
ClaimValueData::parse_snak(subsnak.clone())?,
));
}
}
claims.shrink_to_fit();
references.push(ReferenceGroup {
claims,
hash: group
.get("hash")
.ok_or(EntityError::NoHash)?
.as_str()
.ok_or(EntityError::ExpectedHashString)?
.to_string(),
});
}
references
} else {
Vec::new()
};
let qualifiers = if let Some(order) =
claim.get("qualifiers-order").and_then(Value::as_array)
{
let qualifiers_json = claim
.get("qualifiers")
.ok_or(EntityError::QualifiersOrderButNoObject)?
.as_object()
.ok_or(EntityError::ExpectedObject)?;
let mut qualifiers = Vec::new();
for pid in order {
let pid = pid.as_str().ok_or(EntityError::NoId)?;
let pid_id = Pid::from_str(pid).map_err(|_| EntityError::BadId)?;
let qual_list = qualifiers_json
.get(pid)
.and_then(Value::as_array)
.ok_or(EntityError::QualiferOrderNamesNonQualifier)?;
for qual in qual_list {
qualifiers.push((pid_id, ClaimValueData::parse_snak(qual.clone())?));
}
}
qualifiers
} else {
Vec::new()
};
claims.push((
pid,
ClaimValue {
id: claim
.get("id")
.ok_or(EntityError::NoClaimId)?
.as_str()
.ok_or(EntityError::NoClaimId)?
.to_string(),
rank: Rank::from_str(
claim
.get("rank")
.ok_or(EntityError::NoRank)?
.as_str()
.ok_or(EntityError::NoRank)?,
)?,
data: ClaimValueData::parse_snak(
claim
.get_mut("mainsnak")
.ok_or(EntityError::MissingMainsnak)?
.take(),
)?,
qualifiers,
references,
},
));
}
}
Ok(Self {
id,
claims,
entity_type,
descriptions,
labels,
aliases,
sitelinks,
})
}
/// Returns an iterator of references to all the claim values for a property ID.
///
/// ## Example
/// ```
/// # let j: serde_json::Value = serde_json::from_str(include_str!("../items/Q42.json")).unwrap();
/// # let q42 = wikidata::Entity::from_json(j).unwrap();
/// for claim_value in q42.pid_claims(wikidata::Pid(69)) {
/// if let wikidata::ClaimValueData::Item(value_qid) = claim_value.data {
/// assert!(value_qid == wikidata::Qid(4961791) || value_qid == wikidata::Qid(691283));
/// } else {
/// panic!("Claim value data is not an item");
/// }
/// }
/// ```
pub fn pid_claims(&self, pid: Pid) -> impl Iterator<Item = &ClaimValue> {
self.claims
.iter()
.filter(move |(claim_pid, _)| *claim_pid == pid)
.map(|(_, value)| value)
}
/// Find a claim by its ID.
///
/// ## Example
///
/// ```
/// # let j: serde_json::Value = serde_json::from_str(include_str!("../items/Q42.json")).unwrap();
/// # let q42 = wikidata::Entity::from_json(j).unwrap();
/// let (pid, claim_value) = q42.claim_by_id("Q42$285E0C13-9674-4131-9556-51B316A57AEE").unwrap();
/// assert_eq!(pid, wikidata::Pid(1411));
/// assert_eq!(claim_value.rank, wikidata::Rank::Normal);
/// ```
#[must_use]
pub fn claim_by_id(&self, id: &str) -> Option<(Pid, &ClaimValue)> {
self.claims
.iter()
.find(|(_, value)| value.id == id)
.map(|(pid, value)| (*pid, value))
}
}
/// An error related to entity parsing/creation.
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub enum EntityError {
/// A float couldn't be parsed
FloatParse,
/// A string was expected but not found
ExpectedString,
/// An object was expected but not found
ExpectedObject,
/// An array was expected but now found
ExpectedArray,
/// Expected string repersenting number
ExpectedNumberString,
/// Expected string repersenting URI
ExpectedUriString,
/// A valid Qid URI was expected but not found
ExpectedQidString,
/// Expected a string because the datatype is string
ExpectedStringDatatype,
/// A time string was empty
TimeEmpty,
/// An ID was invalid
BadId,
/// A date didn't have a year
NoDateYear,
/// No date matched the day/month/year
NoDateMatched,
/// An ambiguous date was specified
DateAmbiguous,
/// The datatype was invalid
InvalidDatatype,
/// The datatype was invalid or unknown
UnknownDatatype,
/// The time was missing an hour
MissingHour,
/// The time was missing an minute
MissingMinute,
/// The time was missing an second
MissingSecond,
/// The snaktype was invalid
InvalidSnaktype,
/// The precision level was invalid
InvalidPrecision,
/// No rank was specified
NoRank,
/// A number was out of bounds
NumberOutOfBounds,
/// No ID was found
NoId,
/// No entities are in the object
NoEntities,
/// Multiple entities are in the object
MultipleEntities,
/// The entity had no type
NoEntityType,
/// There are no claims
NoClaims,
/// The claim ID is missing
NoClaimId,
/// That rank is unknown
UnknownRank,
/// A reference group is missing a snaks-order field
NoSnakOrder,
/// A hash is missing on a reference group
NoHash,
/// A reference group has no snaks
NoReferenceSnaks,
/// snaks-order includes a non-snak
SnaksOrderIncludesNonSnak,
/// A qualifier order exists but qulaifiers do not
QualifiersOrderButNoObject,
/// qualifier-order names property that is not a qualifier
QualiferOrderNamesNonQualifier,
/// Expected a string in a key-val entity info object (name or description)
ExpectedKeyvalTextString,
/// Expected a value in a language+value object
ExpectedTextValue,
/// An array of aliases was not found
ExpectedAliasArray,
/// An array of claims was not found
ExpectedClaimArray,
/// An array of references was not found
ExpectedReferenceArray,
/// An array of reference subsnaks was not found
ExpectedReferenceSubsnakArray,
/// A hash was expected but not found
ExpectedHashString,
/// A string representing a language was expected but not found
ExpectedLangString,
/// A string representing an alias was expected but not found
ExpectedAliasString,
/// A string representing a Pid was expected but not found
ExpectedPidString,
/// A string representing a site title was expected but not found
ExpectedSiteTitleString,
/// An array representing site badges was expected but not found
ExpectedSiteBadgesArray,
/// A mainsnak is missing
MissingMainsnak,
/// An hour/minute/second is out of bounds.
OutOfBoundsTime,
}
fn get_json_string(json: &Value) -> Result<String, EntityError> {
json.as_str()
.map(ToString::to_string)
.ok_or(EntityError::ExpectedString)
}
fn parse_wb_number(num: &Value) -> Result<f64, EntityError> {
match num {
Value::Number(num) => num.as_f64().ok_or(EntityError::NumberOutOfBounds),
Value::String(s) => {
// "+1" is a valid Wikibase number
let s = if let Some(b'+') = s.bytes().next() {
&s[1..]
} else {
&s[..]
};
match s.parse() {
Ok(x) => Ok(x),
Err(_) => Err(EntityError::FloatParse),
}
}
_ => Err(EntityError::ExpectedNumberString),
}
}
fn try_get_as_qid(datavalue: &Value) -> Result<Qid, EntityError> {
match datavalue
.as_str()
.ok_or(EntityError::ExpectedUriString)?
.split("http://www.wikidata.org/entity/Q")
.nth(1)
.ok_or(EntityError::ExpectedQidString)?
.parse()
{
Ok(x) => Ok(Qid(x)),
Err(_) => Err(EntityError::FloatParse),
}
}
fn take_prop(key: &'static str, claim: &mut Value) -> Value {
match claim.as_object_mut() {
Some(obj) => obj.remove(key).unwrap_or(Value::Null),
None => Value::Null,
}
}
fn parse_wb_time(time: &str) -> Result<chrono::DateTime<chrono::offset::Utc>, EntityError> {
if time.is_empty() {
return Err(EntityError::TimeEmpty);
}
// "Negative years are allowed in formatting but not in parsing.", so we
// set the era ourselves, after parsing
let is_ce = time.chars().next().ok_or(EntityError::TimeEmpty)? == '+';
let time = &time[1..];
let time_parts: Vec<&str> = time.split('T').collect();
let dash_parts: Vec<&str> = time_parts[0].split('-').collect();
// could be wrong maybe if the percision is more than a year, meh
let year: i32 = match dash_parts[0].parse() {
Ok(x) => x,
Err(_) => return Err(EntityError::NoDateYear),
};
let year: i32 = year * (if is_ce { 1 } else { -1 });
let month: Option<u32> = match dash_parts.get(1) {
Some(month_str) => match month_str.parse() {
Ok(0) | Err(_) => None,
Ok(x) => Some(x),
},
None => None,
};
let day: Option<u32> = match dash_parts.get(2) {
Some(day_str) => match day_str.parse() {
Ok(0) | Err(_) => None,
Ok(x) => Some(x),
},
None => None,
};
#[allow(deprecated)] // TODO: avoid using ymd_opt here
let maybe_date = Utc.ymd_opt(year, month.unwrap_or(1), day.unwrap_or(1));
let date = match maybe_date {
chrono::offset::LocalResult::Single(date) => date,
chrono::offset::LocalResult::None => return Err(EntityError::NoDateMatched),
chrono::offset::LocalResult::Ambiguous(_, _) => return Err(EntityError::DateAmbiguous),
};
let (hour, min, sec) = if time_parts.len() == 2 {
let colon_parts: Vec<&str> = time_parts[1].split(':').collect();
let Ok(hour) = colon_parts.first().ok_or(EntityError::MissingHour)?.parse() else {
return Err(EntityError::FloatParse);
};
let Ok(minute) = colon_parts
.get(1)
.ok_or(EntityError::MissingMinute)?
.parse()
else {
return Err(EntityError::FloatParse);
};
let Ok(sec) = colon_parts.get(2).ok_or(EntityError::MissingSecond)?[0..2].parse() else {
return Err(EntityError::FloatParse);
};
(hour, minute, sec)
} else {
(0, 0, 0)
};
date.and_hms_opt(hour, min, sec)
.ok_or(EntityError::OutOfBoundsTime)
}
impl ClaimValueData {
/// Parses a snak.
///
/// # Errors
/// If the `snak` does not correspond to a valid snak, then an error will be returned.
pub fn parse_snak(mut snak: Value) -> Result<Self, EntityError> {
let mut datavalue: Value = take_prop("datavalue", &mut snak);
let datatype: &str = &get_json_string(&take_prop("datatype", &mut snak))?;
let snaktype: &str = &get_json_string(&take_prop("snaktype", &mut snak))?;
match snaktype {
"value" => {}
"somevalue" => return Ok(ClaimValueData::UnknownValue),
"novalue" => return Ok(ClaimValueData::NoValue),
_ => return Err(EntityError::InvalidSnaktype),
}
let type_str = take_prop("type", &mut datavalue)
.as_str()
.ok_or(EntityError::InvalidSnaktype)?
.to_string();
let mut value = take_prop("value", &mut datavalue);
match &type_str[..] {
"string" => {
let s = value
.as_str()
.ok_or(EntityError::ExpectedStringDatatype)?
.to_string();
match datatype {
"string" => Ok(ClaimValueData::String(s)),
"commonsMedia" => Ok(ClaimValueData::CommonsMedia(s)),
"external-id" => Ok(ClaimValueData::ExternalID(s)),
"math" => Ok(ClaimValueData::MathExpr(s)),
"geo-shape" => Ok(ClaimValueData::GeoShape(s)),
"musical-notation" => Ok(ClaimValueData::MusicNotation(s)),
"tabular-data" => Ok(ClaimValueData::TabularData(s)),
"url" => Ok(ClaimValueData::Url(s)),
_ => Err(EntityError::InvalidDatatype),
}
}
"wikibase-entityid" => {
// the ID could be a entity, lexeme, property, form, or sense
let id = get_json_string(&take_prop("id", &mut value))?;
match id.chars().next().ok_or(EntityError::BadId)? {
'Q' => Ok(ClaimValueData::Item(Qid(id[1..]
.parse()
.map_err(|_| EntityError::BadId)?))),
'P' => Ok(ClaimValueData::Property(Pid(id[1..]
.parse()
.map_err(|_| EntityError::BadId)?))),
'L' => {
// sense: "L1-S2", form: "L1-F2", lexeme: "L2"
let parts: Vec<&str> = id.split('-').collect();
match parts.len() {
1 => Ok(ClaimValueData::Lexeme(Lid(id[1..]
.parse()
.map_err(|_| EntityError::BadId)?))),
2 => match parts[1].chars().next().ok_or(EntityError::BadId)? {
'F' => Ok(ClaimValueData::Form(Fid(
Lid(parts[0][1..].parse().map_err(|_| EntityError::BadId)?),
parts[1][1..].parse().map_err(|_| EntityError::BadId)?,
))),
'S' => Ok(ClaimValueData::Sense(Sid(
Lid(parts[0][1..].parse().map_err(|_| EntityError::BadId)?),
parts[1][1..].parse().map_err(|_| EntityError::BadId)?,
))),
_ => Err(EntityError::BadId),
},
_ => Err(EntityError::BadId),
}
}
_ => Err(EntityError::BadId),
}
}
"globecoordinate" => {
Ok(ClaimValueData::GlobeCoordinate {
// altitude field is deprecated and we ignore it
lat: parse_wb_number(&take_prop("latitude", &mut value))?,
lon: parse_wb_number(&take_prop("longitude", &mut value))?,
// sometimes precision is missing, default it to 1.0
precision: parse_wb_number(&take_prop("precision", &mut value)).unwrap_or(1.0),
// globe *can* be any IRI, but it practice it's almost always an entity URI
// so we return None if it doesn't match our expectations
globe: try_get_as_qid(&take_prop("globe", &mut value))?,
})
}
"quantity" => Ok(ClaimValueData::Quantity {
amount: parse_wb_number(&take_prop("amount", &mut value))?,
upper_bound: parse_wb_number(&take_prop("upperBound", &mut value)).ok(),
lower_bound: parse_wb_number(&take_prop("lowerBound", &mut value)).ok(),
unit: try_get_as_qid(&take_prop("unit", &mut value)).ok(),
}),
// our time parsing code can't handle a few edge cases (really old years), so we
"time" => Ok(
match parse_wb_time(&get_json_string(&take_prop("time", &mut value))?) {
Ok(date_time) => ClaimValueData::DateTime {
date_time,
precision: parse_wb_number(&take_prop("precision", &mut value))
.map_err(|_| EntityError::InvalidPrecision)?
as u8,
},
Err(_) => ClaimValueData::UnknownValue,
},
),
"monolingualtext" => Ok(ClaimValueData::MonolingualText(Text {
text: get_json_string(&take_prop("text", &mut value))?,
lang: Lang(get_json_string(&take_prop("language", &mut value))?),
})),
_ => Err(EntityError::UnknownDatatype),
}
}
}
impl ClaimValue {
/// Try to parse a JSON claim to a claim value.
#[must_use]
pub fn get_prop_from_snak(mut claim: Value, skip_id: bool) -> Option<ClaimValue> {
let rank = match take_prop("rank", &mut claim).as_str()? {
"deprecated" => {
return None;
}
"normal" => Rank::Normal,
"preferred" => Rank::Preferred,
_ => return None,
};
let mainsnak = take_prop("mainsnak", &mut claim);
let data = ClaimValueData::parse_snak(mainsnak).ok()?;
let references = if let Some(arr) = take_prop("references", &mut claim).as_array() {
let mut v: Vec<ReferenceGroup> = Vec::with_capacity(arr.len());
for reference_group in arr {
let reference_group = reference_group.as_object()?;
let mut claims = Vec::with_capacity(reference_group["snaks"].as_array()?.len());
let snaks = reference_group["snaks"].as_object()?;
for (pid, snak_group) in snaks {
for snak in snak_group.as_array()? {
// clone, meh
let owned_snak = snak.clone().take();
if let Ok(x) = ClaimValueData::parse_snak(owned_snak) {
claims.push((Pid(pid[1..].parse().ok()?), x));
}
}
}
v.push(ReferenceGroup {
claims,
hash: reference_group.get("hash")?.as_str()?.to_string(),
});
}
v
} else {
Vec::new()
};
let qualifiers_json = take_prop("qualifiers", &mut claim);
let qualifiers = if qualifiers_json.is_object() {
let mut v: Vec<(Pid, ClaimValueData)> = vec![];
for (pid, claim_array_json) in qualifiers_json.as_object()? {
// yep it's a clone, meh
let Value::Array(mut claim_array) = claim_array_json.clone().take() else {
return None;
};
for claim in claim_array.drain(..) {
if let Ok(x) = ClaimValueData::parse_snak(claim) {
v.push((Pid(pid[1..].parse().ok()?), x));
}
}
}
v
} else {
vec![]
};
Some(ClaimValue {
rank,
id: if skip_id {
String::new()
} else {
take_prop("id", &mut claim).as_str()?.to_string()
},
data,
references,
qualifiers,
})
}
/// Returns an iterator of references to all the qualifer claim data for a property ID.
///
/// ## Example
/// ```
/// # let j: serde_json::Value = serde_json::from_str(include_str!("../items/Q42.json")).unwrap();
/// # let q42 = wikidata::Entity::from_json(j).unwrap();
/// let claim = q42.claim_by_id("Q42$14ec162d-4a7c-3515-19ad-32b0e14fbb44").unwrap().1;
/// let media_legends = claim.qualifier_pid_claims(wikidata::Pid(2096));
/// assert_eq!(media_legends.count(), 5);
/// ```
pub fn qualifier_pid_claims(&self, pid: Pid) -> impl Iterator<Item = &ClaimValueData> {
self.qualifiers
.iter()
.filter(move |(claim_pid, _)| *claim_pid == pid)
.map(|(_, value)| value)
}
}
impl ReferenceGroup {
/// Returns an iterator of references to all the claim data for a property ID.
///
/// ## Example
/// ```
/// # let j: serde_json::Value = serde_json::from_str(include_str!("../items/Q42.json")).unwrap();
/// # let q42 = wikidata::Entity::from_json(j).unwrap();
/// let group = &q42.claim_by_id("q42$881F40DC-0AFE-4FEB-B882-79600D234273").unwrap().1.references[0];
/// let mut claims = group.pid_claims(wikidata::Pid(854));
/// if let Some(wikidata::ClaimValueData::Url(url)) = claims.next() {
/// assert_eq!(url, "http://highgatecemetery.org/visit/who");
/// } else {
/// panic!("Expected a URL");
/// };
/// assert_eq!(claims.next(), None);
/// ```
pub fn pid_claims(&self, pid: Pid) -> impl Iterator<Item = &ClaimValueData> {
self.claims
.iter()
.filter(move |(claim_pid, _)| *claim_pid == pid)
.map(|(_, value)| value)
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn time_parsing() {
let valid_times = vec![
"+2001-12-31T00:00:00Z",
"+12346-12-31T00:00:00Z",
"+311-12-31T00:00:00Z",
"+1979-00-00T00:00:00Z",
"-1979-00-00T00:00:00Z",
"+2001-12-31T00:00:00Z",
"+2001-12-31",
"+2001-12",
"-12561",
"+311-12-31T12:34:56Z",
"+311-12-31T23:45:42Z",
// below are times that *should* work, but chrono doesn't accept
// "-410000000-00-00T00:00:00Z",
];
for time in valid_times {
println!("Trying \"{time}\"");
assert!(match parse_wb_time(time) {
Ok(val) => {
println!("Got {val:#?}");
true
}
Err(_) => false,
});
}
}
#[test]
fn as_qid_test() {
let qid = try_get_as_qid(
&serde_json::from_str(r#""http://www.wikidata.org/entity/Q1234567""#).unwrap(),
);
assert_eq!(qid, Ok(Qid(1_234_567)));
}
#[test]
fn number_parsing() {
assert_eq!(parse_wb_number(&serde_json::json!("+5")), Ok(5.));
assert_eq!(parse_wb_number(&serde_json::json!("5")), Ok(5.));
assert_eq!(parse_wb_number(&serde_json::json!("-5")), Ok(-5.));
assert_eq!(
parse_wb_number(&serde_json::json!("-81.12683")),
Ok(-81.12683)
);
assert_eq!(parse_wb_number(&serde_json::json!("+0")), Ok(0.));
}
}