init library
This commit is contained in:
parent
8b0655d655
commit
0d330126fe
4 changed files with 942 additions and 0 deletions
429
src/ids.rs
Executable file
429
src/ids.rs
Executable file
|
@ -0,0 +1,429 @@
|
|||
//! Various ID types used by Wikidata.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{num::ParseIntError, str::FromStr};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum IdParseError {
|
||||
UnparseableNumber(ParseIntError),
|
||||
InvalidPrefix,
|
||||
}
|
||||
|
||||
macro_rules! id_def {
|
||||
($name:ident, $full_name:expr, $letter:expr) => {
|
||||
#[derive(
|
||||
Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize,
|
||||
)]
|
||||
pub struct $name(pub u32);
|
||||
|
||||
impl $name {
|
||||
/// Get the URL to access data about the claim on Wikidata.
|
||||
#[must_use]
|
||||
pub fn json_url(&self) -> String {
|
||||
let mut ret = String::new();
|
||||
ret.push_str(concat!(
|
||||
"https://www.wikidata.org/wiki/Special:EntityData/",
|
||||
$letter
|
||||
));
|
||||
match self {
|
||||
$name(num) => {
|
||||
ret.push_str(&num.to_string());
|
||||
}
|
||||
}
|
||||
ret.push_str(".json");
|
||||
ret
|
||||
}
|
||||
}
|
||||
impl FromStr for $name {
|
||||
type Err = IdParseError;
|
||||
|
||||
/// Parse the identifier from a string.
|
||||
fn from_str(x: &str) -> Result<Self, Self::Err> {
|
||||
if x.is_empty() {
|
||||
return Err(IdParseError::InvalidPrefix);
|
||||
}
|
||||
let num_str = &x[1..];
|
||||
match num_str.parse() {
|
||||
Ok(num) => Ok(Self(num)),
|
||||
Err(e) => Err(IdParseError::UnparseableNumber(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl std::fmt::Display for $name {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, concat!($letter, "{}"), self.0)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
id_def!(Qid, "entity ID", "Q");
|
||||
id_def!(Pid, "property ID", "P");
|
||||
id_def!(Lid, "lexeme ID", "L");
|
||||
|
||||
/// A lexeme ID and associated form ID
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
// see id_def! comment about datatype
|
||||
pub struct Fid(pub Lid, pub u16);
|
||||
|
||||
/// A lexeme ID and assoicated sense ID
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
// see id_def! comment about datatype
|
||||
pub struct Sid(pub Lid, pub u16);
|
||||
|
||||
impl ToString for Fid {
|
||||
#[must_use]
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
Fid(Lid(lid), fid) => format!("L{}-F{}", lid, fid),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for Sid {
|
||||
#[must_use]
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
Sid(Lid(lid), sid) => format!("L{}-S{}", lid, sid),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! qid_consts (
|
||||
{ $($key:ident => $value:expr),+, } => {
|
||||
$(
|
||||
pub const $key: Qid = Qid($value);
|
||||
)+
|
||||
};
|
||||
);
|
||||
macro_rules! pid_consts (
|
||||
{ $($key:ident => $value:expr),+, } => {
|
||||
$(
|
||||
pub const $key: Pid = Pid($value);
|
||||
)+
|
||||
};
|
||||
);
|
||||
|
||||
macro_rules! qid_unit_suffixes {
|
||||
{ $($key:ident => $value:expr),+, } => {
|
||||
#[must_use]
|
||||
pub fn unit_suffix(qid: Qid) -> Option<&'static str> {
|
||||
$(
|
||||
if qid == $key {
|
||||
Some($value)
|
||||
} else
|
||||
)+
|
||||
{
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[allow(clippy::unreadable_literal)]
|
||||
/// Various IDs for commonly used entities/properties on Wikidata.
|
||||
pub mod consts {
|
||||
use super::*;
|
||||
qid_consts! {
|
||||
EARTH => 2,
|
||||
HUMAN => 5,
|
||||
UNIT_OF_MEASUREMENT => 47574,
|
||||
PHYSICAL_QUANTITY => 107715,
|
||||
SI_BASE_UNIT => 223662,
|
||||
LENGTH => 36253,
|
||||
METRE => 11573,
|
||||
YOTTAMETRE => 10543042,
|
||||
ZETTAMETRE => 3277915,
|
||||
EXAMETRE => 3277907,
|
||||
PETAMETRE => 3277919,
|
||||
TERAMETRE => 3267417,
|
||||
GIGAMETRE => 854546,
|
||||
MEGAMETRE => 1054140,
|
||||
MYRIAMETRE => 1970718,
|
||||
KILOMETRE => 828224,
|
||||
HECTOMETRE => 844338,
|
||||
DECAMETRE => 848856,
|
||||
DECIMETRE => 200323,
|
||||
CENTIMETRE => 174728,
|
||||
MILLIMETRE => 174789,
|
||||
MICROMETRE => 175821,
|
||||
NANOMETRE => 178674,
|
||||
PICOMETRE => 192274,
|
||||
FEMTOMETRE => 208788,
|
||||
ATTOMETRE => 6003257,
|
||||
ZEPTOMETRE => 3270676,
|
||||
YOCTOMETRE => 3221356,
|
||||
PARSEC => 12129,
|
||||
GIGAPARSEC => 14916719,
|
||||
MEGAPARSEC => 3773454,
|
||||
KILOPARSEC => 11929860,
|
||||
ATTOPARSEC => 15784325,
|
||||
LIGHT_YEAR => 531,
|
||||
LIGHT_SECOND => 909315,
|
||||
ASTRONOMICAL_UNIT => 1811,
|
||||
MILE => 253276,
|
||||
FOOT => 3710,
|
||||
INCH => 218593,
|
||||
THOU => 1165799,
|
||||
AREA => 11500,
|
||||
SQUARE_METRE => 25343,
|
||||
SQUARE_KILOMETRE => 712226,
|
||||
SQUARE_CENTIMETRE => 2489298,
|
||||
SQUARE_MILLIMETRE => 2737347,
|
||||
ARE => 185078,
|
||||
HECTARE => 35852,
|
||||
VOLUME => 39297,
|
||||
CUBIC_METRE => 25517,
|
||||
CUBIC_KILOMETRE => 4243638,
|
||||
CUBIC_DECIMETRE => 2175964,
|
||||
CUBIC_CENTIMETRE => 1022113,
|
||||
CUBIC_MILLIMETRE => 3675550,
|
||||
LITER => 11582,
|
||||
HECTOLITER => 2029519,
|
||||
DECALITER => 2637946,
|
||||
CENTILITER => 1815100,
|
||||
MILLILITER => 2332346,
|
||||
MICROLITER => 2282891,
|
||||
PICOLITER => 3902688,
|
||||
FEMTOLITER => 3312063,
|
||||
TIME => 11471,
|
||||
FREQUENCY => 11652,
|
||||
TIME_INTERVAL => 186081,
|
||||
SECOND => 11574,
|
||||
MILLISECOND => 723733,
|
||||
MICROSECOND => 842015,
|
||||
NANOSECOND => 838801,
|
||||
FEMTOSECOND => 1777507,
|
||||
ATTOSECOND => 2483628,
|
||||
PICOSECOND => 3902709,
|
||||
DAY => 573,
|
||||
WEEK => 23387,
|
||||
HOUR => 25235,
|
||||
MINUTE => 7727,
|
||||
MONTH => 5151,
|
||||
ANNUM => 1092296,
|
||||
YEAR => 577,
|
||||
TWENTY_FOUR_HOUR_CLOCK => 216589,
|
||||
HERTZ => 39369,
|
||||
KILOHERTZ => 2143992,
|
||||
MEGAHERTZ => 732707,
|
||||
GIGAHERTZ => 3276763,
|
||||
MASS => 11423,
|
||||
KILOGRAM => 11570,
|
||||
YOTTAGRAM => 613726,
|
||||
ZETTAGRAM => 14754979,
|
||||
EXAGRAM => 2655272,
|
||||
PETAGRAM => 2612219,
|
||||
TERAGRAM => 1770733,
|
||||
GIGAGRAM => 2799294,
|
||||
MEGAGRAM => 11776930,
|
||||
MYRIAGRAM => 2151240,
|
||||
HECTOGRAM => 1057069,
|
||||
DECAGRAM => 6517513,
|
||||
GRAM => 41803,
|
||||
DECIGRAM => 1772386,
|
||||
CENTIGRAM => 2691798,
|
||||
MILLIGRAM => 3241121,
|
||||
MICROGRAM => 1645498,
|
||||
NANOGRAM => 2282906,
|
||||
PICOGRAM => 3239557,
|
||||
FEMTOGRAM => 1913097,
|
||||
ATTOGRAM => 2438073,
|
||||
ZEPTOGRAM => 6171168,
|
||||
YOCTOGRAM => 6170164,
|
||||
POUND => 100995,
|
||||
DALTON => 483261,
|
||||
DENSITY => 29539,
|
||||
KILOGRAM_PER_CUBIC_METRE => 844211,
|
||||
GRAM_PER_CUBIC_CENTIMETRE => 13147228,
|
||||
CONCENTRATION => 3686031,
|
||||
GRAM_PER_LITER => 834105,
|
||||
MILLILITRE_PER_LITRE => 21075844,
|
||||
MILLIGRAM_PER_CUBIC_METER => 21077820,
|
||||
MOL_PER_KILOGRAM_OF_SOLVENT => 21064838,
|
||||
MOL_PER_LITRE_OF_SOLUTION => 21064845,
|
||||
MASS_FRACTION => 899138,
|
||||
MOLE_FRACTION => 125264,
|
||||
VOLUME_FRACTION => 909482,
|
||||
PARTS_PER_MILLION => 21006887,
|
||||
PART_PER_BILLION => 2055118,
|
||||
MILLIGRAM_PER_KILOGRAM => 21091747,
|
||||
GRAM_PER_KILOGRAM => 21061369,
|
||||
TEMPERATURE => 11466,
|
||||
DEGREE_CELSIUS => 25267,
|
||||
KELVIN => 11579,
|
||||
DEGREE_FAHRENHEIT => 42289,
|
||||
RANKINE_SCALE => 207488,
|
||||
PRESSURE => 39552,
|
||||
ATMOSPHERE => 177974,
|
||||
TECHNICAL_ATMOSPHERE => 909066,
|
||||
BAR => 103510,
|
||||
PASCAL => 44395,
|
||||
MEGAPASCAL => 21062777,
|
||||
KILOPASCAL => 21064807,
|
||||
HECTOPASCAL => 5139563,
|
||||
TORR => 185648,
|
||||
MILLIMETER_OF_MERCURY => 6859652,
|
||||
METRE_OF_WATER => 2042279,
|
||||
CENTIMETRE_OF_WATER => 1247300,
|
||||
MILLIMETRE_OF_WATER => 13479685,
|
||||
HEAT_CAPACITY => 179388,
|
||||
JOULE_PER_MOLE_KELVIN => 20966455,
|
||||
THERMAL_CONDUCTIVITY => 487005,
|
||||
WATT_PER_METRE_KELVIN => 1463969,
|
||||
SPEED => 3711325,
|
||||
KILOMETRE_PER_HOUR => 180154,
|
||||
METRE_PER_SECOND => 182429,
|
||||
KNOT => 128822,
|
||||
KINEMATIC_VISCOSITY => 15106259,
|
||||
STOKES => 1569733,
|
||||
ELECTRICAL_CONDUCTIVITY => 4593291,
|
||||
AMPERE_PER_VOLT_METRE => 20966435,
|
||||
LUMINOSITY => 105902,
|
||||
SOLAR_LUMINOSITY => 843877,
|
||||
ENTHALPY => 161064,
|
||||
JOULE_PER_MOLE => 13035094,
|
||||
KILOJOULE_PER_MOLE => 752197,
|
||||
KILOJOULE_PER_KILOGRAM => 21077849,
|
||||
CURRENCY => 8142,
|
||||
EURO => 4916,
|
||||
CRORE => 1137675,
|
||||
INFECTION => 166231,
|
||||
DEGREE => 28390,
|
||||
BUSINESS => 4830453,
|
||||
FICTIONAL_HUMAN => 15632617,
|
||||
}
|
||||
// only include common ones
|
||||
qid_unit_suffixes! {
|
||||
METRE => " m",
|
||||
KILOMETRE => " km",
|
||||
CENTIMETRE => " cm",
|
||||
MILLIMETRE => " mm",
|
||||
SQUARE_METRE => " m²",
|
||||
SQUARE_KILOMETRE => " km²",
|
||||
SQUARE_CENTIMETRE => " cm²",
|
||||
SQUARE_MILLIMETRE => " mm²",
|
||||
CUBIC_METRE => " m³",
|
||||
CUBIC_KILOMETRE => " km³",
|
||||
CUBIC_CENTIMETRE => " cm³",
|
||||
CUBIC_MILLIMETRE => " mm³",
|
||||
GRAM => " g",
|
||||
MILLIGRAM => " mg",
|
||||
KILOGRAM_PER_CUBIC_METRE => " kg/m³",
|
||||
GRAM_PER_CUBIC_CENTIMETRE => " g/cm³",
|
||||
MILLILITRE_PER_LITRE => " ml/l",
|
||||
MILLIGRAM_PER_CUBIC_METER => " mg/cm³",
|
||||
PARTS_PER_MILLION => " ppm",
|
||||
MILLIGRAM_PER_KILOGRAM => " mg/k",
|
||||
GRAM_PER_KILOGRAM => " g/kg",
|
||||
DEGREE_CELSIUS => " °C",
|
||||
KELVIN => " °K",
|
||||
DEGREE_FAHRENHEIT => " °F",
|
||||
KILOMETRE_PER_HOUR => " km/h",
|
||||
ASTRONOMICAL_UNIT => " AU",
|
||||
DEGREE => "°",
|
||||
}
|
||||
pid_consts! {
|
||||
INSTANCE_OF => 31,
|
||||
REFERENCE_URL => 854,
|
||||
LANGUAGE => 407, // language of work or name fully
|
||||
TITLE => 1476,
|
||||
AUTHOR => 50,
|
||||
AUTHOR_NAME_STRING => 2093,
|
||||
STATED_IN => 248,
|
||||
HEIGHT => 2048,
|
||||
DATE_OF_BIRTH => 569,
|
||||
DATE_OF_DEATH => 570,
|
||||
NET_WORTH => 2218,
|
||||
SPOUSE => 26,
|
||||
EDUCATED_AT => 69,
|
||||
NUMBER_OF_CHILDREN => 1971,
|
||||
AWARD_RECEIVED => 166,
|
||||
OFFICIAL_NAME => 1448,
|
||||
EMAIL => 968,
|
||||
SIBLING => 3373,
|
||||
NOMINATED_FOR => 1411,
|
||||
PHONE => 1329,
|
||||
EMPLOYEES => 1128,
|
||||
INCEPTION => 571,
|
||||
CEO => 169,
|
||||
TICKER_SYMBOL => 249,
|
||||
LEGAL_FORM => 1454,
|
||||
FOUNDED_BY => 112,
|
||||
SEX_OR_GENDER => 21,
|
||||
CITIZENSHIP => 27,
|
||||
PLACE_OF_BIRTH => 19,
|
||||
PLACE_OF_DEATH => 570,
|
||||
FATHER => 22,
|
||||
UNMARRIED_PARTNER => 451,
|
||||
CHILD => 40,
|
||||
MOTHER => 25,
|
||||
EYE_COLOR => 1340,
|
||||
HAIR_COLOR => 1884,
|
||||
HANDEDNESS => 552,
|
||||
MILITARY_RANK => 410,
|
||||
PRONOUN => 6553,
|
||||
PSUEDONYM => 742,
|
||||
TWITTER_USERNAME => 2002,
|
||||
FB_ID => 2013,
|
||||
YT_CHANNEL_ID => 2397,
|
||||
IG_USERNAME => 2003,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn json_url() {
|
||||
let entity = Qid(42);
|
||||
assert_eq!(
|
||||
entity.json_url(),
|
||||
"https://www.wikidata.org/wiki/Special:EntityData/Q42.json"
|
||||
);
|
||||
|
||||
let prop = Pid(31);
|
||||
assert_eq!(
|
||||
prop.json_url(),
|
||||
"https://www.wikidata.org/wiki/Special:EntityData/P31.json"
|
||||
);
|
||||
|
||||
let lexeme = Lid(1);
|
||||
assert_eq!(
|
||||
lexeme.json_url(),
|
||||
"https://www.wikidata.org/wiki/Special:EntityData/L1.json"
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_string() {
|
||||
let entity = Qid(42);
|
||||
assert_eq!(entity.to_string(), "Q42");
|
||||
|
||||
let prop = Pid(6);
|
||||
assert_eq!(prop.to_string(), "P6");
|
||||
|
||||
let lexeme = Lid(2);
|
||||
assert_eq!(lexeme.to_string(), "L2");
|
||||
|
||||
let sense = Sid(Lid(5), 9);
|
||||
assert_eq!(sense.to_string(), "L5-S9");
|
||||
|
||||
let form = Fid(Lid(3), 11);
|
||||
assert_eq!(form.to_string(), "L3-F11");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_str() {
|
||||
assert_eq!(Qid::from_str("Q42").unwrap(), Qid(42));
|
||||
assert_eq!(Lid::from_str("L944114").unwrap(), Lid(944114));
|
||||
assert_eq!(Pid::from_str("P1341").unwrap(), Pid(1341));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unit_suffix() {
|
||||
assert_eq!(consts::unit_suffix(consts::METRE).unwrap(), " m");
|
||||
assert_eq!(consts::unit_suffix(consts::DEGREE).unwrap(), "°");
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue