From 0d330126feb4a628ff331a47ec1d5445750c5877 Mon Sep 17 00:00:00 2001
From: Smitty <me@smitop.com>
Date: Thu, 27 May 2021 17:36:20 -0400
Subject: [PATCH] init library

---
 Cargo.toml    |  11 ++
 src/entity.rs | 497 ++++++++++++++++++++++++++++++++++++++++++++++++++
 src/ids.rs    | 429 +++++++++++++++++++++++++++++++++++++++++++
 src/lib.rs    |   5 +
 4 files changed, 942 insertions(+)
 create mode 100644 Cargo.toml
 create mode 100755 src/entity.rs
 create mode 100755 src/ids.rs
 create mode 100644 src/lib.rs
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..ace907c
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "wikidata"
+version = "0.1.0"
+authors = ["Smitty <me@smitop.com>"]
+edition = "2018"
+
+[dependencies]
+chrono = { version = "0.4.19", features = ["std", "serde", "clock"], default-features = false }
+serde = { version = "1.0.126", features = ["derive"] }
+json = "0.12.4"
+lazy_static = "1.4.0"
diff --git a/src/entity.rs b/src/entity.rs
new file mode 100755
index 0000000..0124924
--- /dev/null
+++ b/src/entity.rs
@@ -0,0 +1,497 @@
+use crate::ids::{consts, Fid, Lid, Pid, Qid, Sid};
+use chrono::{DateTime, TimeZone, Utc};
+use serde::{Deserialize, Serialize};
+
+/// A Wikibase entity.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Entity {
+    pub claims: Vec<(Pid, ClaimValue)>,
+}
+
+/// Data relating to a claim value.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum ClaimValueData {
+    CommonsMedia(String),
+    GlobeCoordinate {
+        //supported
+        lat: f64,
+        lon: f64,
+        precision: f64,
+        globe: Qid,
+    },
+    Item(Qid),
+    Property(Pid),
+    Stringg(String),
+    MonolingualText {
+        text: String,
+        lang: String,
+    },
+    ExternalID(String),
+    Quantity {
+        amount: f64, // technically it could exceed the bound, but meh
+        lower_bound: Option<f64>,
+        upper_bound: Option<f64>,
+        unit: Option<Qid>, // *could* be any IRI but in practice almost all are Wikidata entity IRIs
+    },
+    DateTime {
+        date_time: DateTime<chrono::offset::Utc>,
+        /// 0 - billion years
+        /// 1 - 100 million years
+        /// 2 - 10 million years
+        /// 3 - 1 million years
+        /// 4 - 100k years
+        /// 5 - 10k years
+        /// 6 - 1000 years
+        /// 7 - 100 years
+        /// 8 - decade
+        /// 9 - year
+        /// 10 - month
+        /// 11 - day
+        /// 12 - hour (deprecated)
+        /// 13 - minute (deprecated)
+        /// 14 - second (deprecated)
+        precision: u8,
+    },
+    Url(String),
+    MathExpr(String),
+    GeoShape(String),
+    MusicNotation(String),
+    TabularData(String),
+    Lexeme(Lid),
+    Form(Fid),
+    Sense(Sid),
+    NoValue,
+    UnknownValue,
+}
+
+impl Default for ClaimValueData {
+    fn default() -> Self {
+        ClaimValueData::NoValue
+    }
+}
+
+/// A statement rank.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)]
+pub enum Rank {
+    Deprecated,
+    Normal,
+    Preferred,
+}
+
+impl Default for Rank {
+    fn default() -> Self {
+        Rank::Normal
+    }
+}
+
+/// A group of claims that make up a single reference.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ReferenceGroup {
+    pub claims: Vec<(Pid, ClaimValueData)>,
+}
+
+/// A claim value.
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct ClaimValue {
+    pub data: ClaimValueData,
+    pub rank: Rank,
+    pub id: String,
+    pub qualifiers: Vec<(Pid, ClaimValueData)>,
+    pub references: Vec<ReferenceGroup>,
+}
+
+impl Entity {
+    pub fn instances(&self) -> Vec<Qid> {
+        let mut instances = Vec::with_capacity(1);
+        for (pid, claim) in &self.claims {
+            if *pid == consts::INSTANCE_OF {
+                if let ClaimValueData::Item(qid) = claim.data {
+                    instances.push(qid);
+                };
+            };
+        }
+        instances.shrink_to_fit();
+        instances
+    }
+
+    pub fn start_time(&self) -> Option<DateTime<chrono::offset::Utc>> {
+        for (pid, claim) in &self.claims {
+            if *pid == consts::DATE_OF_BIRTH {
+                if let ClaimValueData::DateTime { date_time, .. } = claim.data {
+                    return Some(date_time);
+                };
+            };
+        }
+        None
+    }
+
+    pub fn end_time(&self) -> Option<DateTime<chrono::offset::Utc>> {
+        for (pid, claim) in &self.claims {
+            if *pid == consts::DATE_OF_DEATH {
+                if let ClaimValueData::DateTime { date_time, .. } = claim.data {
+                    return Some(date_time);
+                };
+            };
+        }
+        None
+    }
+
+    /// The amount of days that the entity was in operations. For animals, this is lifespan.
+    /// For corporations and other legal entities it's the time between formation and dissolution.
+    pub fn timespan(&self) -> Option<u64> {
+        let start = self.start_time()?;
+        let end = self.end_time().unwrap_or_else(Utc::now);
+        let days = end.signed_duration_since(start).num_days();
+        if days < 0 {
+            None
+        } else {
+            Some(days as u64)
+        }
+    }
+}
+
+fn get_json_string(mut json: json::JsonValue) -> String {
+    match json.take_string() {
+        Some(x) => x,
+        None => panic!("get_json_stringing called with a non-string JsonValue"),
+    }
+}
+
+fn parse_wb_number(num: &json::JsonValue) -> Option<f64> {
+    // could be a string repersenting a number, or a number
+    if num.is_number() {
+        Some(num.as_number()?.into())
+    } else {
+        let s = num.as_str()?;
+        match s.parse() {
+            Ok(x) => Some(x),
+            Err(_) => None,
+        }
+    }
+}
+
+fn try_get_as_qid(datavalue: &json::JsonValue) -> Option<Qid> {
+    match datavalue
+        .as_str()?
+        .split("http://www.wikidata.org/entity/Q")
+        .nth(1)?
+        .parse()
+    {
+        Ok(x) => Some(Qid(x)),
+        Err(_) => None,
+    }
+}
+
+fn take_prop(key: &'static str, claim: &mut json::JsonValue) -> json::JsonValue {
+    claim.remove(key)
+}
+
+fn parse_wb_time(time: &str) -> Option<chrono::DateTime<chrono::offset::Utc>> {
+    if time.is_empty() {
+        return None;
+    }
+
+    // "Negative years are allowed in formatting but not in parsing.", so we
+    // set the era ourselves, after parsing
+    let is_ce = time.chars().next()? == '+';
+    let time = &time[1..];
+
+    let time_parts: Vec<&str> = time.split('T').collect();
+    let dash_parts: Vec<&str> = time_parts[0].split('-').collect();
+    // could be wrong maybe if the percision is more than a year, meh
+    let year: i32 = match dash_parts[0].parse() {
+        Ok(x) => x,
+        Err(_) => return None,
+    };
+    let year: i32 = year * (if is_ce { 1 } else { -1 });
+    let month: Option<u32> = match dash_parts.get(1) {
+        Some(month_str) => match month_str.parse() {
+            Ok(0) | Err(_) => None,
+            Ok(x) => Some(x),
+        },
+        None => None,
+    };
+    let day: Option<u32> = match dash_parts.get(2) {
+        Some(day_str) => match day_str.parse() {
+            Ok(0) | Err(_) => None,
+            Ok(x) => Some(x),
+        },
+        None => None,
+    };
+    let maybe_date = Utc.ymd_opt(year, month.unwrap_or(1), day.unwrap_or(1));
+    let date = match maybe_date {
+        chrono::offset::LocalResult::Single(date) => date,
+        _ => return None, //  matched zero dates or matched multiple
+    };
+    let (hour, min, sec) = if time_parts.len() == 2 {
+        let colon_parts: Vec<&str> = time_parts[1].split(':').collect();
+        let hour = match colon_parts.get(0)?.parse() {
+            Ok(x) => x,
+            Err(_) => return None,
+        };
+        let minute = match colon_parts.get(1)?.parse() {
+            Ok(x) => x,
+            Err(_) => return None,
+        };
+        let sec = match colon_parts.get(2)?[0..2].parse() {
+            Ok(x) => x,
+            Err(_) => return None,
+        };
+        (hour, minute, sec)
+    } else {
+        (0, 0, 0)
+    };
+    Some(date.and_hms(hour, min, sec))
+}
+
+impl ClaimValueData {
+    #[must_use]
+    /// Parses a snak, panics on failure.
+    pub fn parse_snak(mut snak: json::JsonValue) -> Option<Self> {
+        let mut datavalue: json::JsonValue = take_prop("datavalue", &mut snak);
+        let datatype: &str = &get_json_string(take_prop("datatype", &mut snak));
+        let snaktype: &str = &get_json_string(take_prop("snaktype", &mut snak));
+        match snaktype {
+            "value" => {}
+            "somevalue" => return Some(ClaimValueData::UnknownValue),
+            "novalue" => return Some(ClaimValueData::NoValue),
+            x => panic!(
+                "Expected snaktype to be value, somevalue, or novalue, but it was {}",
+                x
+            ),
+        };
+        let type_str = take_prop("type", &mut datavalue)
+            .take_string()
+            .expect("Invalid datavalue type. Perhaps a new data type has been added?");
+        let mut value = take_prop("value", &mut datavalue);
+        match &type_str[..] {
+            "string" => {
+                let s = value
+                    .take_string()
+                    .expect("expected string, didn't find one");
+                match datatype {
+                    "string" => Some(ClaimValueData::Stringg(s)),
+                    "commonsMedia" => Some(ClaimValueData::CommonsMedia(s)),
+                    "external-id" => Some(ClaimValueData::ExternalID(s)),
+                    "math" => Some(ClaimValueData::MathExpr(s)),
+                    "geo-shape" => Some(ClaimValueData::GeoShape(s)),
+                    "musical-notation" => Some(ClaimValueData::MusicNotation(s)),
+                    "tabular-data" => Some(ClaimValueData::TabularData(s)),
+                    "url" => Some(ClaimValueData::Url(s)),
+                    _ => {
+                        eprintln!("Invalid datatype {}", datatype);
+                        None
+                    }
+                }
+            }
+            "wikibase-entityid" => {
+                // the ID could be a entity, lexeme, property, form, or sense
+                let id = get_json_string(take_prop("id", &mut value));
+                match id.chars().next().expect("Entity ID was empty string") {
+                    'Q' => Some(ClaimValueData::Item(Qid(id[1..]
+                        .parse()
+                        .expect("Malformed entity ID")))),
+                    'P' => Some(ClaimValueData::Property(Pid(id[1..]
+                        .parse()
+                        .expect("Malformed property ID")))),
+                    'L' => {
+                        // sense: "L1-S2", form: "L1-F2", lexeme: "L2"
+                        let parts: Vec<&str> = id.split('-').collect();
+                        match parts.len() {
+                            1 => Some(ClaimValueData::Lexeme(Lid(id[1..]
+                                .parse()
+                                .expect("Malformed lexeme ID")))),
+                            2 => {
+                                match parts[1]
+                                    .chars()
+                                    .next()
+                                    .expect("Nothing after dash in lexeme ID")
+                                {
+                                    'F' => Some(ClaimValueData::Form(Fid(
+                                        Lid(parts[0][1..].parse().expect("Malformed lexeme ID")),
+                                        parts[1][1..].parse().expect("Invalid form ID"),
+                                    ))),
+                                    'S' => Some(ClaimValueData::Sense(Sid(
+                                        Lid(parts[0][1..].parse().expect("Malformed lexeme ID")),
+                                        parts[1][1..].parse().expect("Invalid sense ID"),
+                                    ))),
+                                    _ => panic!("Invalid second part of lexeme ID"),
+                                }
+                            }
+                            _ => panic!("Lexeme ID had more than 1 dash"),
+                        }
+                    }
+                    _ => panic!("Couldn't parse entity ID"),
+                }
+            }
+            "globecoordinate" => {
+                Some(ClaimValueData::GlobeCoordinate {
+                    lat: parse_wb_number(&take_prop("latitude", &mut value))?,
+                    lon: parse_wb_number(&take_prop("longitude", &mut value))?,
+                    // altitude field is deprecated and we ignore it
+                    precision: parse_wb_number(&take_prop("precision", &mut value))?,
+                    // globe *can* be any IRI, but it practice it's almost always an entity URI
+                    // so we return None if it doesn't match our expectations
+                    globe: try_get_as_qid(&take_prop("globe", &mut value))?,
+                })
+            }
+            "quantity" => Some(ClaimValueData::Quantity {
+                amount: parse_wb_number(&take_prop("amount", &mut value))?,
+                upper_bound: parse_wb_number(&take_prop("upperBound", &mut value)),
+                lower_bound: parse_wb_number(&take_prop("lowerBound", &mut value)),
+                unit: try_get_as_qid(&take_prop("unit", &mut value)),
+            }),
+            "time" => Some(ClaimValueData::DateTime {
+                // our time parsing code can't handle a few edge cases (really old years), so we
+                // just give up on parsing the snak if parse_wb_time returns None
+                date_time: parse_wb_time(&get_json_string(take_prop("time", &mut value)))?,
+                precision: parse_wb_number(&take_prop("precision", &mut value))
+                    .expect("Invalid precision {}") as u8,
+            }),
+            "monolingualtext" => Some(ClaimValueData::MonolingualText {
+                text: get_json_string(take_prop("text", &mut value)),
+                lang: get_json_string(take_prop("language", &mut value)),
+            }),
+            other => {
+                eprintln!("Couldn't parse data type {}", other);
+                None
+            }
+        }
+    }
+}
+
+impl ClaimValue {
+    #[must_use]
+    pub fn get_prop_from_snak(mut claim: json::JsonValue, skip_id: bool) -> Option<ClaimValue> {
+        let claim_str = take_prop("rank", &mut claim)
+            .take_string()
+            .expect("No rank");
+        let rank = match &claim_str[..] {
+            "deprecated" => {
+                return None;
+            }
+            "normal" => Rank::Normal,
+            "preferred" => Rank::Preferred,
+            other => panic!("Invalid rank {}", other),
+        };
+        let mainsnak = take_prop("mainsnak", &mut claim);
+        let data = match ClaimValueData::parse_snak(mainsnak) {
+            Some(x) => x,
+            None => {
+                eprintln!("Failed to parse mainsnak");
+                return None;
+            }
+        };
+        let references_json = take_prop("references", &mut claim);
+        let references = if references_json.is_array() {
+            let mut v: Vec<ReferenceGroup> = Vec::with_capacity(references_json.len());
+            let mut references_vec = if let json::JsonValue::Array(a) = references_json {
+                a
+            } else {
+                panic!("references not an array");
+            };
+            for mut reference_group in references_vec.drain(..) {
+                let mut claims = Vec::with_capacity(reference_group["snaks"].len());
+                let snaks = take_prop("snaks", &mut reference_group);
+                let mut entries: Vec<(&str, &json::JsonValue)> = snaks.entries().collect();
+                for (pid, snak_group) in entries.drain(..) {
+                    let mut members: Vec<&json::JsonValue> = snak_group.members().collect();
+                    for snak in members.drain(..) {
+                        // clone, meh
+                        let owned_snak = snak.clone().take();
+                        match ClaimValueData::parse_snak(owned_snak) {
+                            Some(x) => claims
+                                .push((Pid(pid[1..].parse().expect("Invalid property ID")), x)),
+                            None => {
+                                eprintln!("Failed to parse reference snak");
+                            }
+                        }
+                    }
+                }
+                v.push(ReferenceGroup { claims });
+            }
+            v
+        } else {
+            vec![]
+        };
+        let qualifiers_json = take_prop("qualifiers", &mut claim);
+        let qualifiers = if qualifiers_json.is_object() {
+            let mut v: Vec<(Pid, ClaimValueData)> = vec![];
+            let mut entries: Vec<(&str, &json::JsonValue)> = qualifiers_json.entries().collect();
+            for (pid, claim_array_json) in entries.drain(..) {
+                // yep it's a clone, meh
+                let mut claim_array =
+                    if let json::JsonValue::Array(x) = claim_array_json.clone().take() {
+                        x
+                    } else {
+                        panic!("qualifiers doesn't have a claim array");
+                    };
+                for claim in claim_array.drain(..) {
+                    match ClaimValueData::parse_snak(claim) {
+                        Some(x) => v.push((Pid(pid[1..].parse().expect("Invalid property ID")), x)),
+                        None => {
+                            eprintln!("Failed to parse qualifier snak");
+                        }
+                    };
+                }
+            }
+            v
+        } else {
+            vec![]
+        };
+        Some(ClaimValue {
+            rank,
+            id: if skip_id {
+                String::new()
+            } else {
+                take_prop("id", &mut claim)
+                    .take_string()
+                    .expect("No id on snak")
+            },
+            data,
+            references,
+            qualifiers,
+        })
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn time_parsing() {
+        let valid_times = vec![
+            "+2001-12-31T00:00:00Z",
+            "+12346-12-31T00:00:00Z",
+            "+311-12-31T00:00:00Z",
+            "+1979-00-00T00:00:00Z",
+            "-1979-00-00T00:00:00Z",
+            "+2001-12-31T00:00:00Z",
+            "+2001-12-31",
+            "+2001-12",
+            "-12561",
+            "+311-12-31T12:34:56Z",
+            "+311-12-31T23:45:42Z",
+            // below are times that *should* work, but chrono doesn't accept
+            // "-410000000-00-00T00:00:00Z",
+        ];
+        for time in valid_times {
+            println!("Trying \"{}\"", time);
+            assert!(match parse_wb_time(time) {
+                Some(val) => {
+                    println!("Got {:#?}", val);
+                    true
+                }
+                None => false,
+            });
+        }
+    }
+
+    #[test]
+    fn as_qid_test() {
+        let qid =
+            try_get_as_qid(&json::parse(r#""http://www.wikidata.org/entity/Q1234567""#).unwrap());
+        assert_eq!(qid, Some(Qid(1234567)));
+    }
+}
diff --git a/src/ids.rs b/src/ids.rs
new file mode 100755
index 0000000..61c5a36
--- /dev/null
+++ b/src/ids.rs
@@ -0,0 +1,429 @@
+//! Various ID types used by Wikidata.
+
+use serde::{Deserialize, Serialize};
+use std::{num::ParseIntError, str::FromStr};
+
+#[derive(Debug, Clone)]
+pub enum IdParseError {
+    UnparseableNumber(ParseIntError),
+    InvalidPrefix,
+}
+
+macro_rules! id_def {
+    ($name:ident, $full_name:expr, $letter:expr) => {
+        #[derive(
+            Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize,
+        )]
+        pub struct $name(pub u32);
+
+        impl $name {
+            /// Get the URL to access data about the claim on Wikidata.
+            #[must_use]
+            pub fn json_url(&self) -> String {
+                let mut ret = String::new();
+                ret.push_str(concat!(
+                    "https://www.wikidata.org/wiki/Special:EntityData/",
+                    $letter
+                ));
+                match self {
+                    $name(num) => {
+                        ret.push_str(&num.to_string());
+                    }
+                }
+                ret.push_str(".json");
+                ret
+            }
+        }
+        impl FromStr for $name {
+            type Err = IdParseError;
+
+            /// Parse the identifier from a string.
+            fn from_str(x: &str) -> Result<Self, Self::Err> {
+                if x.is_empty() {
+                    return Err(IdParseError::InvalidPrefix);
+                }
+                let num_str = &x[1..];
+                match num_str.parse() {
+                    Ok(num) => Ok(Self(num)),
+                    Err(e) => Err(IdParseError::UnparseableNumber(e)),
+                }
+            }
+        }
+        impl std::fmt::Display for $name {
+            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                write!(f, concat!($letter, "{}"), self.0)
+            }
+        }
+    };
+}
+
+id_def!(Qid, "entity ID", "Q");
+id_def!(Pid, "property ID", "P");
+id_def!(Lid, "lexeme ID", "L");
+
+/// A lexeme ID and associated form ID
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
+// see id_def! comment about datatype
+pub struct Fid(pub Lid, pub u16);
+
+/// A lexeme ID and assoicated sense ID
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
+// see id_def! comment about datatype
+pub struct Sid(pub Lid, pub u16);
+
+impl ToString for Fid {
+    #[must_use]
+    fn to_string(&self) -> String {
+        match self {
+            Fid(Lid(lid), fid) => format!("L{}-F{}", lid, fid),
+        }
+    }
+}
+
+impl ToString for Sid {
+    #[must_use]
+    fn to_string(&self) -> String {
+        match self {
+            Sid(Lid(lid), sid) => format!("L{}-S{}", lid, sid),
+        }
+    }
+}
+
+macro_rules! qid_consts (
+    { $($key:ident => $value:expr),+, } => {
+        $(
+            pub const $key: Qid = Qid($value);
+        )+
+    };
+);
+macro_rules! pid_consts (
+    { $($key:ident => $value:expr),+, } => {
+        $(
+            pub const $key: Pid = Pid($value);
+        )+
+    };
+);
+
+macro_rules! qid_unit_suffixes {
+    { $($key:ident => $value:expr),+, } => {
+        #[must_use]
+        pub fn unit_suffix(qid: Qid) -> Option<&'static str> {
+            $(
+                if qid == $key {
+                    Some($value)
+                } else
+            )+
+            {
+                None
+            }
+        }
+    };
+}
+
+#[allow(clippy::unreadable_literal)]
+/// Various IDs for commonly used entities/properties on Wikidata.
+pub mod consts {
+    use super::*;
+    qid_consts! {
+        EARTH => 2,
+        HUMAN => 5,
+        UNIT_OF_MEASUREMENT => 47574,
+        PHYSICAL_QUANTITY => 107715,
+        SI_BASE_UNIT => 223662,
+        LENGTH => 36253,
+        METRE => 11573,
+        YOTTAMETRE => 10543042,
+        ZETTAMETRE => 3277915,
+        EXAMETRE => 3277907,
+        PETAMETRE => 3277919,
+        TERAMETRE => 3267417,
+        GIGAMETRE => 854546,
+        MEGAMETRE => 1054140,
+        MYRIAMETRE => 1970718,
+        KILOMETRE => 828224,
+        HECTOMETRE => 844338,
+        DECAMETRE => 848856,
+        DECIMETRE => 200323,
+        CENTIMETRE => 174728,
+        MILLIMETRE => 174789,
+        MICROMETRE => 175821,
+        NANOMETRE => 178674,
+        PICOMETRE => 192274,
+        FEMTOMETRE => 208788,
+        ATTOMETRE => 6003257,
+        ZEPTOMETRE => 3270676,
+        YOCTOMETRE => 3221356,
+        PARSEC => 12129,
+        GIGAPARSEC => 14916719,
+        MEGAPARSEC => 3773454,
+        KILOPARSEC => 11929860,
+        ATTOPARSEC => 15784325,
+        LIGHT_YEAR => 531,
+        LIGHT_SECOND => 909315,
+        ASTRONOMICAL_UNIT => 1811,
+        MILE => 253276,
+        FOOT => 3710,
+        INCH => 218593,
+        THOU => 1165799,
+        AREA => 11500,
+        SQUARE_METRE => 25343,
+        SQUARE_KILOMETRE => 712226,
+        SQUARE_CENTIMETRE => 2489298,
+        SQUARE_MILLIMETRE => 2737347,
+        ARE => 185078,
+        HECTARE => 35852,
+        VOLUME => 39297,
+        CUBIC_METRE => 25517,
+        CUBIC_KILOMETRE => 4243638,
+        CUBIC_DECIMETRE => 2175964,
+        CUBIC_CENTIMETRE => 1022113,
+        CUBIC_MILLIMETRE => 3675550,
+        LITER => 11582,
+        HECTOLITER => 2029519,
+        DECALITER => 2637946,
+        CENTILITER => 1815100,
+        MILLILITER => 2332346,
+        MICROLITER => 2282891,
+        PICOLITER => 3902688,
+        FEMTOLITER => 3312063,
+        TIME => 11471,
+        FREQUENCY => 11652,
+        TIME_INTERVAL => 186081,
+        SECOND => 11574,
+        MILLISECOND => 723733,
+        MICROSECOND => 842015,
+        NANOSECOND => 838801,
+        FEMTOSECOND => 1777507,
+        ATTOSECOND => 2483628,
+        PICOSECOND => 3902709,
+        DAY => 573,
+        WEEK => 23387,
+        HOUR => 25235,
+        MINUTE => 7727,
+        MONTH => 5151,
+        ANNUM => 1092296,
+        YEAR => 577,
+        TWENTY_FOUR_HOUR_CLOCK => 216589,
+        HERTZ => 39369,
+        KILOHERTZ => 2143992,
+        MEGAHERTZ => 732707,
+        GIGAHERTZ => 3276763,
+        MASS => 11423,
+        KILOGRAM => 11570,
+        YOTTAGRAM => 613726,
+        ZETTAGRAM => 14754979,
+        EXAGRAM => 2655272,
+        PETAGRAM => 2612219,
+        TERAGRAM => 1770733,
+        GIGAGRAM => 2799294,
+        MEGAGRAM => 11776930,
+        MYRIAGRAM => 2151240,
+        HECTOGRAM => 1057069,
+        DECAGRAM => 6517513,
+        GRAM => 41803,
+        DECIGRAM => 1772386,
+        CENTIGRAM => 2691798,
+        MILLIGRAM => 3241121,
+        MICROGRAM => 1645498,
+        NANOGRAM => 2282906,
+        PICOGRAM => 3239557,
+        FEMTOGRAM => 1913097,
+        ATTOGRAM => 2438073,
+        ZEPTOGRAM => 6171168,
+        YOCTOGRAM => 6170164,
+        POUND => 100995,
+        DALTON => 483261,
+        DENSITY => 29539,
+        KILOGRAM_PER_CUBIC_METRE => 844211,
+        GRAM_PER_CUBIC_CENTIMETRE => 13147228,
+        CONCENTRATION => 3686031,
+        GRAM_PER_LITER => 834105,
+        MILLILITRE_PER_LITRE => 21075844,
+        MILLIGRAM_PER_CUBIC_METER => 21077820,
+        MOL_PER_KILOGRAM_OF_SOLVENT => 21064838,
+        MOL_PER_LITRE_OF_SOLUTION => 21064845,
+        MASS_FRACTION => 899138,
+        MOLE_FRACTION => 125264,
+        VOLUME_FRACTION => 909482,
+        PARTS_PER_MILLION => 21006887,
+        PART_PER_BILLION => 2055118,
+        MILLIGRAM_PER_KILOGRAM => 21091747,
+        GRAM_PER_KILOGRAM => 21061369,
+        TEMPERATURE => 11466,
+        DEGREE_CELSIUS => 25267,
+        KELVIN => 11579,
+        DEGREE_FAHRENHEIT => 42289,
+        RANKINE_SCALE => 207488,
+        PRESSURE => 39552,
+        ATMOSPHERE => 177974,
+        TECHNICAL_ATMOSPHERE => 909066,
+        BAR => 103510,
+        PASCAL => 44395,
+        MEGAPASCAL => 21062777,
+        KILOPASCAL => 21064807,
+        HECTOPASCAL => 5139563,
+        TORR => 185648,
+        MILLIMETER_OF_MERCURY => 6859652,
+        METRE_OF_WATER => 2042279,
+        CENTIMETRE_OF_WATER => 1247300,
+        MILLIMETRE_OF_WATER => 13479685,
+        HEAT_CAPACITY => 179388,
+        JOULE_PER_MOLE_KELVIN => 20966455,
+        THERMAL_CONDUCTIVITY => 487005,
+        WATT_PER_METRE_KELVIN => 1463969,
+        SPEED => 3711325,
+        KILOMETRE_PER_HOUR => 180154,
+        METRE_PER_SECOND => 182429,
+        KNOT => 128822,
+        KINEMATIC_VISCOSITY => 15106259,
+        STOKES => 1569733,
+        ELECTRICAL_CONDUCTIVITY => 4593291,
+        AMPERE_PER_VOLT_METRE => 20966435,
+        LUMINOSITY => 105902,
+        SOLAR_LUMINOSITY => 843877,
+        ENTHALPY => 161064,
+        JOULE_PER_MOLE => 13035094,
+        KILOJOULE_PER_MOLE => 752197,
+        KILOJOULE_PER_KILOGRAM => 21077849,
+        CURRENCY => 8142,
+        EURO => 4916,
+        CRORE => 1137675,
+        INFECTION => 166231,
+        DEGREE => 28390,
+        BUSINESS => 4830453,
+        FICTIONAL_HUMAN => 15632617,
+    }
+    // only include common ones
+    qid_unit_suffixes! {
+        METRE => " m",
+        KILOMETRE => " km",
+        CENTIMETRE => " cm",
+        MILLIMETRE => " mm",
+        SQUARE_METRE => " m²",
+        SQUARE_KILOMETRE => " km²",
+        SQUARE_CENTIMETRE => " cm²",
+        SQUARE_MILLIMETRE => " mm²",
+        CUBIC_METRE => " m³",
+        CUBIC_KILOMETRE => " km³",
+        CUBIC_CENTIMETRE => " cm³",
+        CUBIC_MILLIMETRE => " mm³",
+        GRAM => " g",
+        MILLIGRAM => " mg",
+        KILOGRAM_PER_CUBIC_METRE => " kg/m³",
+        GRAM_PER_CUBIC_CENTIMETRE => " g/cm³",
+        MILLILITRE_PER_LITRE => " ml/l",
+        MILLIGRAM_PER_CUBIC_METER => " mg/cm³",
+        PARTS_PER_MILLION => " ppm",
+        MILLIGRAM_PER_KILOGRAM => " mg/k",
+        GRAM_PER_KILOGRAM => " g/kg",
+        DEGREE_CELSIUS => " °C",
+        KELVIN => " °K",
+        DEGREE_FAHRENHEIT => " °F",
+        KILOMETRE_PER_HOUR => " km/h",
+        ASTRONOMICAL_UNIT => " AU",
+        DEGREE => "°",
+    }
+    pid_consts! {
+        INSTANCE_OF => 31,
+        REFERENCE_URL => 854,
+        LANGUAGE => 407, // language of work or name fully
+        TITLE => 1476,
+        AUTHOR => 50,
+        AUTHOR_NAME_STRING => 2093,
+        STATED_IN => 248,
+        HEIGHT => 2048,
+        DATE_OF_BIRTH => 569,
+        DATE_OF_DEATH => 570,
+        NET_WORTH => 2218,
+        SPOUSE => 26,
+        EDUCATED_AT => 69,
+        NUMBER_OF_CHILDREN => 1971,
+        AWARD_RECEIVED => 166,
+        OFFICIAL_NAME => 1448,
+        EMAIL => 968,
+        SIBLING => 3373,
+        NOMINATED_FOR => 1411,
+        PHONE => 1329,
+        EMPLOYEES => 1128,
+        INCEPTION => 571,
+        CEO => 169,
+        TICKER_SYMBOL => 249,
+        LEGAL_FORM => 1454,
+        FOUNDED_BY => 112,
+        SEX_OR_GENDER => 21,
+        CITIZENSHIP => 27,
+        PLACE_OF_BIRTH => 19,
+        PLACE_OF_DEATH => 570,
+        FATHER => 22,
+        UNMARRIED_PARTNER => 451,
+        CHILD => 40,
+        MOTHER => 25,
+        EYE_COLOR => 1340,
+        HAIR_COLOR => 1884,
+        HANDEDNESS => 552,
+        MILITARY_RANK => 410,
+        PRONOUN => 6553,
+        PSUEDONYM => 742,
+        TWITTER_USERNAME => 2002,
+        FB_ID => 2013,
+        YT_CHANNEL_ID => 2397,
+        IG_USERNAME => 2003,
+    }
+}
+
+#[cfg(test)]
+pub mod test {
+    use super::*;
+
+    #[test]
+    fn json_url() {
+        let entity = Qid(42);
+        assert_eq!(
+            entity.json_url(),
+            "https://www.wikidata.org/wiki/Special:EntityData/Q42.json"
+        );
+
+        let prop = Pid(31);
+        assert_eq!(
+            prop.json_url(),
+            "https://www.wikidata.org/wiki/Special:EntityData/P31.json"
+        );
+
+        let lexeme = Lid(1);
+        assert_eq!(
+            lexeme.json_url(),
+            "https://www.wikidata.org/wiki/Special:EntityData/L1.json"
+        )
+    }
+
+    #[test]
+    fn to_string() {
+        let entity = Qid(42);
+        assert_eq!(entity.to_string(), "Q42");
+
+        let prop = Pid(6);
+        assert_eq!(prop.to_string(), "P6");
+
+        let lexeme = Lid(2);
+        assert_eq!(lexeme.to_string(), "L2");
+
+        let sense = Sid(Lid(5), 9);
+        assert_eq!(sense.to_string(), "L5-S9");
+
+        let form = Fid(Lid(3), 11);
+        assert_eq!(form.to_string(), "L3-F11");
+    }
+
+    #[test]
+    fn from_str() {
+        assert_eq!(Qid::from_str("Q42").unwrap(), Qid(42));
+        assert_eq!(Lid::from_str("L944114").unwrap(), Lid(944114));
+        assert_eq!(Pid::from_str("P1341").unwrap(), Pid(1341));
+    }
+
+    #[test]
+    fn unit_suffix() {
+        assert_eq!(consts::unit_suffix(consts::METRE).unwrap(), " m");
+        assert_eq!(consts::unit_suffix(consts::DEGREE).unwrap(), "°");
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..ab89529
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,5 @@
+//! Rust library for Wikidata. It has some support for Wikibase as well, although the main focus is
+//! supporting the Wikidata instance.
+
+pub mod entity;
+pub mod ids;