From 0d330126feb4a628ff331a47ec1d5445750c5877 Mon Sep 17 00:00:00 2001 From: Smitty Date: Thu, 27 May 2021 17:36:20 -0400 Subject: [PATCH] init library --- Cargo.toml | 11 ++ src/entity.rs | 497 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/ids.rs | 429 +++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 5 + 4 files changed, 942 insertions(+) create mode 100644 Cargo.toml create mode 100755 src/entity.rs create mode 100755 src/ids.rs create mode 100644 src/lib.rs diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..ace907c --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "wikidata" +version = "0.1.0" +authors = ["Smitty "] +edition = "2018" + +[dependencies] +chrono = { version = "0.4.19", features = ["std", "serde", "clock"], default-features = false } +serde = { version = "1.0.126", features = ["derive"] } +json = "0.12.4" +lazy_static = "1.4.0" diff --git a/src/entity.rs b/src/entity.rs new file mode 100755 index 0000000..0124924 --- /dev/null +++ b/src/entity.rs @@ -0,0 +1,497 @@ +use crate::ids::{consts, Fid, Lid, Pid, Qid, Sid}; +use chrono::{DateTime, TimeZone, Utc}; +use serde::{Deserialize, Serialize}; + +/// A Wikibase entity. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Entity { + pub claims: Vec<(Pid, ClaimValue)>, +} + +/// Data relating to a claim value. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum ClaimValueData { + CommonsMedia(String), + GlobeCoordinate { + //supported + lat: f64, + lon: f64, + precision: f64, + globe: Qid, + }, + Item(Qid), + Property(Pid), + Stringg(String), + MonolingualText { + text: String, + lang: String, + }, + ExternalID(String), + Quantity { + amount: f64, // technically it could exceed the bound, but meh + lower_bound: Option, + upper_bound: Option, + unit: Option, // *could* be any IRI but in practice almost all are Wikidata entity IRIs + }, + DateTime { + date_time: DateTime, + /// 0 - billion years + /// 1 - 100 million years + /// 2 - 10 million years + /// 3 - 1 million years + /// 4 - 100k years + /// 5 - 10k years + /// 6 - 1000 years + /// 7 - 100 years + /// 8 - decade + /// 9 - year + /// 10 - month + /// 11 - day + /// 12 - hour (deprecated) + /// 13 - minute (deprecated) + /// 14 - second (deprecated) + precision: u8, + }, + Url(String), + MathExpr(String), + GeoShape(String), + MusicNotation(String), + TabularData(String), + Lexeme(Lid), + Form(Fid), + Sense(Sid), + NoValue, + UnknownValue, +} + +impl Default for ClaimValueData { + fn default() -> Self { + ClaimValueData::NoValue + } +} + +/// A statement rank. +#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)] +pub enum Rank { + Deprecated, + Normal, + Preferred, +} + +impl Default for Rank { + fn default() -> Self { + Rank::Normal + } +} + +/// A group of claims that make up a single reference. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ReferenceGroup { + pub claims: Vec<(Pid, ClaimValueData)>, +} + +/// A claim value. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ClaimValue { + pub data: ClaimValueData, + pub rank: Rank, + pub id: String, + pub qualifiers: Vec<(Pid, ClaimValueData)>, + pub references: Vec, +} + +impl Entity { + pub fn instances(&self) -> Vec { + let mut instances = Vec::with_capacity(1); + for (pid, claim) in &self.claims { + if *pid == consts::INSTANCE_OF { + if let ClaimValueData::Item(qid) = claim.data { + instances.push(qid); + }; + }; + } + instances.shrink_to_fit(); + instances + } + + pub fn start_time(&self) -> Option> { + for (pid, claim) in &self.claims { + if *pid == consts::DATE_OF_BIRTH { + if let ClaimValueData::DateTime { date_time, .. } = claim.data { + return Some(date_time); + }; + }; + } + None + } + + pub fn end_time(&self) -> Option> { + for (pid, claim) in &self.claims { + if *pid == consts::DATE_OF_DEATH { + if let ClaimValueData::DateTime { date_time, .. } = claim.data { + return Some(date_time); + }; + }; + } + None + } + + /// The amount of days that the entity was in operations. For animals, this is lifespan. + /// For corporations and other legal entities it's the time between formation and dissolution. + pub fn timespan(&self) -> Option { + let start = self.start_time()?; + let end = self.end_time().unwrap_or_else(Utc::now); + let days = end.signed_duration_since(start).num_days(); + if days < 0 { + None + } else { + Some(days as u64) + } + } +} + +fn get_json_string(mut json: json::JsonValue) -> String { + match json.take_string() { + Some(x) => x, + None => panic!("get_json_stringing called with a non-string JsonValue"), + } +} + +fn parse_wb_number(num: &json::JsonValue) -> Option { + // could be a string repersenting a number, or a number + if num.is_number() { + Some(num.as_number()?.into()) + } else { + let s = num.as_str()?; + match s.parse() { + Ok(x) => Some(x), + Err(_) => None, + } + } +} + +fn try_get_as_qid(datavalue: &json::JsonValue) -> Option { + match datavalue + .as_str()? + .split("http://www.wikidata.org/entity/Q") + .nth(1)? + .parse() + { + Ok(x) => Some(Qid(x)), + Err(_) => None, + } +} + +fn take_prop(key: &'static str, claim: &mut json::JsonValue) -> json::JsonValue { + claim.remove(key) +} + +fn parse_wb_time(time: &str) -> Option> { + if time.is_empty() { + return None; + } + + // "Negative years are allowed in formatting but not in parsing.", so we + // set the era ourselves, after parsing + let is_ce = time.chars().next()? == '+'; + let time = &time[1..]; + + let time_parts: Vec<&str> = time.split('T').collect(); + let dash_parts: Vec<&str> = time_parts[0].split('-').collect(); + // could be wrong maybe if the percision is more than a year, meh + let year: i32 = match dash_parts[0].parse() { + Ok(x) => x, + Err(_) => return None, + }; + let year: i32 = year * (if is_ce { 1 } else { -1 }); + let month: Option = match dash_parts.get(1) { + Some(month_str) => match month_str.parse() { + Ok(0) | Err(_) => None, + Ok(x) => Some(x), + }, + None => None, + }; + let day: Option = match dash_parts.get(2) { + Some(day_str) => match day_str.parse() { + Ok(0) | Err(_) => None, + Ok(x) => Some(x), + }, + None => None, + }; + let maybe_date = Utc.ymd_opt(year, month.unwrap_or(1), day.unwrap_or(1)); + let date = match maybe_date { + chrono::offset::LocalResult::Single(date) => date, + _ => return None, // matched zero dates or matched multiple + }; + let (hour, min, sec) = if time_parts.len() == 2 { + let colon_parts: Vec<&str> = time_parts[1].split(':').collect(); + let hour = match colon_parts.get(0)?.parse() { + Ok(x) => x, + Err(_) => return None, + }; + let minute = match colon_parts.get(1)?.parse() { + Ok(x) => x, + Err(_) => return None, + }; + let sec = match colon_parts.get(2)?[0..2].parse() { + Ok(x) => x, + Err(_) => return None, + }; + (hour, minute, sec) + } else { + (0, 0, 0) + }; + Some(date.and_hms(hour, min, sec)) +} + +impl ClaimValueData { + #[must_use] + /// Parses a snak, panics on failure. + pub fn parse_snak(mut snak: json::JsonValue) -> Option { + let mut datavalue: json::JsonValue = take_prop("datavalue", &mut snak); + let datatype: &str = &get_json_string(take_prop("datatype", &mut snak)); + let snaktype: &str = &get_json_string(take_prop("snaktype", &mut snak)); + match snaktype { + "value" => {} + "somevalue" => return Some(ClaimValueData::UnknownValue), + "novalue" => return Some(ClaimValueData::NoValue), + x => panic!( + "Expected snaktype to be value, somevalue, or novalue, but it was {}", + x + ), + }; + let type_str = take_prop("type", &mut datavalue) + .take_string() + .expect("Invalid datavalue type. Perhaps a new data type has been added?"); + let mut value = take_prop("value", &mut datavalue); + match &type_str[..] { + "string" => { + let s = value + .take_string() + .expect("expected string, didn't find one"); + match datatype { + "string" => Some(ClaimValueData::Stringg(s)), + "commonsMedia" => Some(ClaimValueData::CommonsMedia(s)), + "external-id" => Some(ClaimValueData::ExternalID(s)), + "math" => Some(ClaimValueData::MathExpr(s)), + "geo-shape" => Some(ClaimValueData::GeoShape(s)), + "musical-notation" => Some(ClaimValueData::MusicNotation(s)), + "tabular-data" => Some(ClaimValueData::TabularData(s)), + "url" => Some(ClaimValueData::Url(s)), + _ => { + eprintln!("Invalid datatype {}", datatype); + None + } + } + } + "wikibase-entityid" => { + // the ID could be a entity, lexeme, property, form, or sense + let id = get_json_string(take_prop("id", &mut value)); + match id.chars().next().expect("Entity ID was empty string") { + 'Q' => Some(ClaimValueData::Item(Qid(id[1..] + .parse() + .expect("Malformed entity ID")))), + 'P' => Some(ClaimValueData::Property(Pid(id[1..] + .parse() + .expect("Malformed property ID")))), + 'L' => { + // sense: "L1-S2", form: "L1-F2", lexeme: "L2" + let parts: Vec<&str> = id.split('-').collect(); + match parts.len() { + 1 => Some(ClaimValueData::Lexeme(Lid(id[1..] + .parse() + .expect("Malformed lexeme ID")))), + 2 => { + match parts[1] + .chars() + .next() + .expect("Nothing after dash in lexeme ID") + { + 'F' => Some(ClaimValueData::Form(Fid( + Lid(parts[0][1..].parse().expect("Malformed lexeme ID")), + parts[1][1..].parse().expect("Invalid form ID"), + ))), + 'S' => Some(ClaimValueData::Sense(Sid( + Lid(parts[0][1..].parse().expect("Malformed lexeme ID")), + parts[1][1..].parse().expect("Invalid sense ID"), + ))), + _ => panic!("Invalid second part of lexeme ID"), + } + } + _ => panic!("Lexeme ID had more than 1 dash"), + } + } + _ => panic!("Couldn't parse entity ID"), + } + } + "globecoordinate" => { + Some(ClaimValueData::GlobeCoordinate { + lat: parse_wb_number(&take_prop("latitude", &mut value))?, + lon: parse_wb_number(&take_prop("longitude", &mut value))?, + // altitude field is deprecated and we ignore it + precision: parse_wb_number(&take_prop("precision", &mut value))?, + // globe *can* be any IRI, but it practice it's almost always an entity URI + // so we return None if it doesn't match our expectations + globe: try_get_as_qid(&take_prop("globe", &mut value))?, + }) + } + "quantity" => Some(ClaimValueData::Quantity { + amount: parse_wb_number(&take_prop("amount", &mut value))?, + upper_bound: parse_wb_number(&take_prop("upperBound", &mut value)), + lower_bound: parse_wb_number(&take_prop("lowerBound", &mut value)), + unit: try_get_as_qid(&take_prop("unit", &mut value)), + }), + "time" => Some(ClaimValueData::DateTime { + // our time parsing code can't handle a few edge cases (really old years), so we + // just give up on parsing the snak if parse_wb_time returns None + date_time: parse_wb_time(&get_json_string(take_prop("time", &mut value)))?, + precision: parse_wb_number(&take_prop("precision", &mut value)) + .expect("Invalid precision {}") as u8, + }), + "monolingualtext" => Some(ClaimValueData::MonolingualText { + text: get_json_string(take_prop("text", &mut value)), + lang: get_json_string(take_prop("language", &mut value)), + }), + other => { + eprintln!("Couldn't parse data type {}", other); + None + } + } + } +} + +impl ClaimValue { + #[must_use] + pub fn get_prop_from_snak(mut claim: json::JsonValue, skip_id: bool) -> Option { + let claim_str = take_prop("rank", &mut claim) + .take_string() + .expect("No rank"); + let rank = match &claim_str[..] { + "deprecated" => { + return None; + } + "normal" => Rank::Normal, + "preferred" => Rank::Preferred, + other => panic!("Invalid rank {}", other), + }; + let mainsnak = take_prop("mainsnak", &mut claim); + let data = match ClaimValueData::parse_snak(mainsnak) { + Some(x) => x, + None => { + eprintln!("Failed to parse mainsnak"); + return None; + } + }; + let references_json = take_prop("references", &mut claim); + let references = if references_json.is_array() { + let mut v: Vec = Vec::with_capacity(references_json.len()); + let mut references_vec = if let json::JsonValue::Array(a) = references_json { + a + } else { + panic!("references not an array"); + }; + for mut reference_group in references_vec.drain(..) { + let mut claims = Vec::with_capacity(reference_group["snaks"].len()); + let snaks = take_prop("snaks", &mut reference_group); + let mut entries: Vec<(&str, &json::JsonValue)> = snaks.entries().collect(); + for (pid, snak_group) in entries.drain(..) { + let mut members: Vec<&json::JsonValue> = snak_group.members().collect(); + for snak in members.drain(..) { + // clone, meh + let owned_snak = snak.clone().take(); + match ClaimValueData::parse_snak(owned_snak) { + Some(x) => claims + .push((Pid(pid[1..].parse().expect("Invalid property ID")), x)), + None => { + eprintln!("Failed to parse reference snak"); + } + } + } + } + v.push(ReferenceGroup { claims }); + } + v + } else { + vec![] + }; + let qualifiers_json = take_prop("qualifiers", &mut claim); + let qualifiers = if qualifiers_json.is_object() { + let mut v: Vec<(Pid, ClaimValueData)> = vec![]; + let mut entries: Vec<(&str, &json::JsonValue)> = qualifiers_json.entries().collect(); + for (pid, claim_array_json) in entries.drain(..) { + // yep it's a clone, meh + let mut claim_array = + if let json::JsonValue::Array(x) = claim_array_json.clone().take() { + x + } else { + panic!("qualifiers doesn't have a claim array"); + }; + for claim in claim_array.drain(..) { + match ClaimValueData::parse_snak(claim) { + Some(x) => v.push((Pid(pid[1..].parse().expect("Invalid property ID")), x)), + None => { + eprintln!("Failed to parse qualifier snak"); + } + }; + } + } + v + } else { + vec![] + }; + Some(ClaimValue { + rank, + id: if skip_id { + String::new() + } else { + take_prop("id", &mut claim) + .take_string() + .expect("No id on snak") + }, + data, + references, + qualifiers, + }) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn time_parsing() { + let valid_times = vec![ + "+2001-12-31T00:00:00Z", + "+12346-12-31T00:00:00Z", + "+311-12-31T00:00:00Z", + "+1979-00-00T00:00:00Z", + "-1979-00-00T00:00:00Z", + "+2001-12-31T00:00:00Z", + "+2001-12-31", + "+2001-12", + "-12561", + "+311-12-31T12:34:56Z", + "+311-12-31T23:45:42Z", + // below are times that *should* work, but chrono doesn't accept + // "-410000000-00-00T00:00:00Z", + ]; + for time in valid_times { + println!("Trying \"{}\"", time); + assert!(match parse_wb_time(time) { + Some(val) => { + println!("Got {:#?}", val); + true + } + None => false, + }); + } + } + + #[test] + fn as_qid_test() { + let qid = + try_get_as_qid(&json::parse(r#""http://www.wikidata.org/entity/Q1234567""#).unwrap()); + assert_eq!(qid, Some(Qid(1234567))); + } +} diff --git a/src/ids.rs b/src/ids.rs new file mode 100755 index 0000000..61c5a36 --- /dev/null +++ b/src/ids.rs @@ -0,0 +1,429 @@ +//! Various ID types used by Wikidata. + +use serde::{Deserialize, Serialize}; +use std::{num::ParseIntError, str::FromStr}; + +#[derive(Debug, Clone)] +pub enum IdParseError { + UnparseableNumber(ParseIntError), + InvalidPrefix, +} + +macro_rules! id_def { + ($name:ident, $full_name:expr, $letter:expr) => { + #[derive( + Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, + )] + pub struct $name(pub u32); + + impl $name { + /// Get the URL to access data about the claim on Wikidata. + #[must_use] + pub fn json_url(&self) -> String { + let mut ret = String::new(); + ret.push_str(concat!( + "https://www.wikidata.org/wiki/Special:EntityData/", + $letter + )); + match self { + $name(num) => { + ret.push_str(&num.to_string()); + } + } + ret.push_str(".json"); + ret + } + } + impl FromStr for $name { + type Err = IdParseError; + + /// Parse the identifier from a string. + fn from_str(x: &str) -> Result { + if x.is_empty() { + return Err(IdParseError::InvalidPrefix); + } + let num_str = &x[1..]; + match num_str.parse() { + Ok(num) => Ok(Self(num)), + Err(e) => Err(IdParseError::UnparseableNumber(e)), + } + } + } + impl std::fmt::Display for $name { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, concat!($letter, "{}"), self.0) + } + } + }; +} + +id_def!(Qid, "entity ID", "Q"); +id_def!(Pid, "property ID", "P"); +id_def!(Lid, "lexeme ID", "L"); + +/// A lexeme ID and associated form ID +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] +// see id_def! comment about datatype +pub struct Fid(pub Lid, pub u16); + +/// A lexeme ID and assoicated sense ID +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] +// see id_def! comment about datatype +pub struct Sid(pub Lid, pub u16); + +impl ToString for Fid { + #[must_use] + fn to_string(&self) -> String { + match self { + Fid(Lid(lid), fid) => format!("L{}-F{}", lid, fid), + } + } +} + +impl ToString for Sid { + #[must_use] + fn to_string(&self) -> String { + match self { + Sid(Lid(lid), sid) => format!("L{}-S{}", lid, sid), + } + } +} + +macro_rules! qid_consts ( + { $($key:ident => $value:expr),+, } => { + $( + pub const $key: Qid = Qid($value); + )+ + }; +); +macro_rules! pid_consts ( + { $($key:ident => $value:expr),+, } => { + $( + pub const $key: Pid = Pid($value); + )+ + }; +); + +macro_rules! qid_unit_suffixes { + { $($key:ident => $value:expr),+, } => { + #[must_use] + pub fn unit_suffix(qid: Qid) -> Option<&'static str> { + $( + if qid == $key { + Some($value) + } else + )+ + { + None + } + } + }; +} + +#[allow(clippy::unreadable_literal)] +/// Various IDs for commonly used entities/properties on Wikidata. +pub mod consts { + use super::*; + qid_consts! { + EARTH => 2, + HUMAN => 5, + UNIT_OF_MEASUREMENT => 47574, + PHYSICAL_QUANTITY => 107715, + SI_BASE_UNIT => 223662, + LENGTH => 36253, + METRE => 11573, + YOTTAMETRE => 10543042, + ZETTAMETRE => 3277915, + EXAMETRE => 3277907, + PETAMETRE => 3277919, + TERAMETRE => 3267417, + GIGAMETRE => 854546, + MEGAMETRE => 1054140, + MYRIAMETRE => 1970718, + KILOMETRE => 828224, + HECTOMETRE => 844338, + DECAMETRE => 848856, + DECIMETRE => 200323, + CENTIMETRE => 174728, + MILLIMETRE => 174789, + MICROMETRE => 175821, + NANOMETRE => 178674, + PICOMETRE => 192274, + FEMTOMETRE => 208788, + ATTOMETRE => 6003257, + ZEPTOMETRE => 3270676, + YOCTOMETRE => 3221356, + PARSEC => 12129, + GIGAPARSEC => 14916719, + MEGAPARSEC => 3773454, + KILOPARSEC => 11929860, + ATTOPARSEC => 15784325, + LIGHT_YEAR => 531, + LIGHT_SECOND => 909315, + ASTRONOMICAL_UNIT => 1811, + MILE => 253276, + FOOT => 3710, + INCH => 218593, + THOU => 1165799, + AREA => 11500, + SQUARE_METRE => 25343, + SQUARE_KILOMETRE => 712226, + SQUARE_CENTIMETRE => 2489298, + SQUARE_MILLIMETRE => 2737347, + ARE => 185078, + HECTARE => 35852, + VOLUME => 39297, + CUBIC_METRE => 25517, + CUBIC_KILOMETRE => 4243638, + CUBIC_DECIMETRE => 2175964, + CUBIC_CENTIMETRE => 1022113, + CUBIC_MILLIMETRE => 3675550, + LITER => 11582, + HECTOLITER => 2029519, + DECALITER => 2637946, + CENTILITER => 1815100, + MILLILITER => 2332346, + MICROLITER => 2282891, + PICOLITER => 3902688, + FEMTOLITER => 3312063, + TIME => 11471, + FREQUENCY => 11652, + TIME_INTERVAL => 186081, + SECOND => 11574, + MILLISECOND => 723733, + MICROSECOND => 842015, + NANOSECOND => 838801, + FEMTOSECOND => 1777507, + ATTOSECOND => 2483628, + PICOSECOND => 3902709, + DAY => 573, + WEEK => 23387, + HOUR => 25235, + MINUTE => 7727, + MONTH => 5151, + ANNUM => 1092296, + YEAR => 577, + TWENTY_FOUR_HOUR_CLOCK => 216589, + HERTZ => 39369, + KILOHERTZ => 2143992, + MEGAHERTZ => 732707, + GIGAHERTZ => 3276763, + MASS => 11423, + KILOGRAM => 11570, + YOTTAGRAM => 613726, + ZETTAGRAM => 14754979, + EXAGRAM => 2655272, + PETAGRAM => 2612219, + TERAGRAM => 1770733, + GIGAGRAM => 2799294, + MEGAGRAM => 11776930, + MYRIAGRAM => 2151240, + HECTOGRAM => 1057069, + DECAGRAM => 6517513, + GRAM => 41803, + DECIGRAM => 1772386, + CENTIGRAM => 2691798, + MILLIGRAM => 3241121, + MICROGRAM => 1645498, + NANOGRAM => 2282906, + PICOGRAM => 3239557, + FEMTOGRAM => 1913097, + ATTOGRAM => 2438073, + ZEPTOGRAM => 6171168, + YOCTOGRAM => 6170164, + POUND => 100995, + DALTON => 483261, + DENSITY => 29539, + KILOGRAM_PER_CUBIC_METRE => 844211, + GRAM_PER_CUBIC_CENTIMETRE => 13147228, + CONCENTRATION => 3686031, + GRAM_PER_LITER => 834105, + MILLILITRE_PER_LITRE => 21075844, + MILLIGRAM_PER_CUBIC_METER => 21077820, + MOL_PER_KILOGRAM_OF_SOLVENT => 21064838, + MOL_PER_LITRE_OF_SOLUTION => 21064845, + MASS_FRACTION => 899138, + MOLE_FRACTION => 125264, + VOLUME_FRACTION => 909482, + PARTS_PER_MILLION => 21006887, + PART_PER_BILLION => 2055118, + MILLIGRAM_PER_KILOGRAM => 21091747, + GRAM_PER_KILOGRAM => 21061369, + TEMPERATURE => 11466, + DEGREE_CELSIUS => 25267, + KELVIN => 11579, + DEGREE_FAHRENHEIT => 42289, + RANKINE_SCALE => 207488, + PRESSURE => 39552, + ATMOSPHERE => 177974, + TECHNICAL_ATMOSPHERE => 909066, + BAR => 103510, + PASCAL => 44395, + MEGAPASCAL => 21062777, + KILOPASCAL => 21064807, + HECTOPASCAL => 5139563, + TORR => 185648, + MILLIMETER_OF_MERCURY => 6859652, + METRE_OF_WATER => 2042279, + CENTIMETRE_OF_WATER => 1247300, + MILLIMETRE_OF_WATER => 13479685, + HEAT_CAPACITY => 179388, + JOULE_PER_MOLE_KELVIN => 20966455, + THERMAL_CONDUCTIVITY => 487005, + WATT_PER_METRE_KELVIN => 1463969, + SPEED => 3711325, + KILOMETRE_PER_HOUR => 180154, + METRE_PER_SECOND => 182429, + KNOT => 128822, + KINEMATIC_VISCOSITY => 15106259, + STOKES => 1569733, + ELECTRICAL_CONDUCTIVITY => 4593291, + AMPERE_PER_VOLT_METRE => 20966435, + LUMINOSITY => 105902, + SOLAR_LUMINOSITY => 843877, + ENTHALPY => 161064, + JOULE_PER_MOLE => 13035094, + KILOJOULE_PER_MOLE => 752197, + KILOJOULE_PER_KILOGRAM => 21077849, + CURRENCY => 8142, + EURO => 4916, + CRORE => 1137675, + INFECTION => 166231, + DEGREE => 28390, + BUSINESS => 4830453, + FICTIONAL_HUMAN => 15632617, + } + // only include common ones + qid_unit_suffixes! { + METRE => " m", + KILOMETRE => " km", + CENTIMETRE => " cm", + MILLIMETRE => " mm", + SQUARE_METRE => " m²", + SQUARE_KILOMETRE => " km²", + SQUARE_CENTIMETRE => " cm²", + SQUARE_MILLIMETRE => " mm²", + CUBIC_METRE => " m³", + CUBIC_KILOMETRE => " km³", + CUBIC_CENTIMETRE => " cm³", + CUBIC_MILLIMETRE => " mm³", + GRAM => " g", + MILLIGRAM => " mg", + KILOGRAM_PER_CUBIC_METRE => " kg/m³", + GRAM_PER_CUBIC_CENTIMETRE => " g/cm³", + MILLILITRE_PER_LITRE => " ml/l", + MILLIGRAM_PER_CUBIC_METER => " mg/cm³", + PARTS_PER_MILLION => " ppm", + MILLIGRAM_PER_KILOGRAM => " mg/k", + GRAM_PER_KILOGRAM => " g/kg", + DEGREE_CELSIUS => " °C", + KELVIN => " °K", + DEGREE_FAHRENHEIT => " °F", + KILOMETRE_PER_HOUR => " km/h", + ASTRONOMICAL_UNIT => " AU", + DEGREE => "°", + } + pid_consts! { + INSTANCE_OF => 31, + REFERENCE_URL => 854, + LANGUAGE => 407, // language of work or name fully + TITLE => 1476, + AUTHOR => 50, + AUTHOR_NAME_STRING => 2093, + STATED_IN => 248, + HEIGHT => 2048, + DATE_OF_BIRTH => 569, + DATE_OF_DEATH => 570, + NET_WORTH => 2218, + SPOUSE => 26, + EDUCATED_AT => 69, + NUMBER_OF_CHILDREN => 1971, + AWARD_RECEIVED => 166, + OFFICIAL_NAME => 1448, + EMAIL => 968, + SIBLING => 3373, + NOMINATED_FOR => 1411, + PHONE => 1329, + EMPLOYEES => 1128, + INCEPTION => 571, + CEO => 169, + TICKER_SYMBOL => 249, + LEGAL_FORM => 1454, + FOUNDED_BY => 112, + SEX_OR_GENDER => 21, + CITIZENSHIP => 27, + PLACE_OF_BIRTH => 19, + PLACE_OF_DEATH => 570, + FATHER => 22, + UNMARRIED_PARTNER => 451, + CHILD => 40, + MOTHER => 25, + EYE_COLOR => 1340, + HAIR_COLOR => 1884, + HANDEDNESS => 552, + MILITARY_RANK => 410, + PRONOUN => 6553, + PSUEDONYM => 742, + TWITTER_USERNAME => 2002, + FB_ID => 2013, + YT_CHANNEL_ID => 2397, + IG_USERNAME => 2003, + } +} + +#[cfg(test)] +pub mod test { + use super::*; + + #[test] + fn json_url() { + let entity = Qid(42); + assert_eq!( + entity.json_url(), + "https://www.wikidata.org/wiki/Special:EntityData/Q42.json" + ); + + let prop = Pid(31); + assert_eq!( + prop.json_url(), + "https://www.wikidata.org/wiki/Special:EntityData/P31.json" + ); + + let lexeme = Lid(1); + assert_eq!( + lexeme.json_url(), + "https://www.wikidata.org/wiki/Special:EntityData/L1.json" + ) + } + + #[test] + fn to_string() { + let entity = Qid(42); + assert_eq!(entity.to_string(), "Q42"); + + let prop = Pid(6); + assert_eq!(prop.to_string(), "P6"); + + let lexeme = Lid(2); + assert_eq!(lexeme.to_string(), "L2"); + + let sense = Sid(Lid(5), 9); + assert_eq!(sense.to_string(), "L5-S9"); + + let form = Fid(Lid(3), 11); + assert_eq!(form.to_string(), "L3-F11"); + } + + #[test] + fn from_str() { + assert_eq!(Qid::from_str("Q42").unwrap(), Qid(42)); + assert_eq!(Lid::from_str("L944114").unwrap(), Lid(944114)); + assert_eq!(Pid::from_str("P1341").unwrap(), Pid(1341)); + } + + #[test] + fn unit_suffix() { + assert_eq!(consts::unit_suffix(consts::METRE).unwrap(), " m"); + assert_eq!(consts::unit_suffix(consts::DEGREE).unwrap(), "°"); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..ab89529 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,5 @@ +//! Rust library for Wikidata. It has some support for Wikibase as well, although the main focus is +//! supporting the Wikidata instance. + +pub mod entity; +pub mod ids;