Merge pull request #8 from NexVeridian/master
Some checks are pending
Build/test / Build and test (push) Waiting to run

chore: 2024 edition, update dependencies, cargo fmt and clippy
This commit is contained in:
Smitty 2025-07-10 15:09:05 -04:00 committed by GitHub
commit efcbc2697c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 76 additions and 85 deletions

View file

@ -1,19 +1,19 @@
[package]
name = "wikidata"
version = "1.1.0"
edition = "2018"
edition = "2024"
repository = "https://github.com/syvb/wikidata/"
license = "Apache-2.0"
description = "A library for working with Wikidata in Rust"
exclude = [
"items/"
]
exclude = ["items/"]
[dependencies]
chrono = { version = "0.4.31", features = ["std", "serde"], default-features = false }
serde = { version = "1.0.126", features = ["derive"] }
chrono = { version = "0.4.31", features = [
"std",
"serde",
], default-features = false }
serde = { version = "1.0.216", features = ["derive"] }
serde_json = "1.0.64"
lazy_static = "1.4.0"
[dev-dependencies]
reqwest = { version = "0.11", features = ["blocking"] }
reqwest = { version = "0.12", features = ["blocking"] }

View file

@ -2,10 +2,7 @@ use wikidata::*;
fn main() {
for i in 1_usize.. {
let uri = format!(
"https://www.wikidata.org/wiki/Special:EntityData/Q{}.json",
i
);
let uri = format!("https://www.wikidata.org/wiki/Special:EntityData/Q{i}.json");
let res = reqwest::blocking::get(uri).unwrap();
let text = res.text().unwrap();
if text.contains("<h1>Not Found</h1><p>No entity with ID ") {
@ -13,6 +10,6 @@ fn main() {
}
let ent = Entity::from_json(serde_json::from_str(&text).unwrap()).unwrap();
let _ = ent;
println!("verified Q{}", i);
println!("verified Q{i}");
}
}

View file

@ -1,6 +1,6 @@
use std::{collections::BTreeMap, str::FromStr};
use crate::ids::{consts, Fid, Lid, Pid, Qid, Sid, WikiId};
use crate::ids::{Fid, Lid, Pid, Qid, Sid, WikiId, consts};
use crate::text::{Lang, Text};
use chrono::{DateTime, TimeZone, Utc};
use serde::{Deserialize, Serialize};
@ -27,7 +27,7 @@ pub struct Entity {
/// The type of entity: normal entity with a Qid, a property with a Pid, or a lexeme with a Lid.
///
/// EntitySchemas (with E IDs) are currently unsupported.
/// `EntitySchemas` (with E IDs) are currently unsupported.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum EntityType {
@ -80,7 +80,7 @@ pub enum ClaimValueData {
},
/// A point in time time.
DateTime {
/// The time as a Chrono DateTime.
/// The time as a Chrono `DateTime`.
date_time: DateTime<chrono::offset::Utc>,
/// The precision of the date:
///
@ -109,7 +109,7 @@ pub enum ClaimValueData {
MathExpr(String),
/// A geometric shape. The value of the string is currently unspecified.
GeoShape(String),
/// LilyPond musical notation.
/// `LilyPond` musical notation.
MusicNotation(String),
/// ID of a file with tabular data on Wikimedia commons.
TabularData(String),
@ -197,11 +197,11 @@ impl Entity {
pub fn instances(&self) -> Vec<Qid> {
let mut instances = Vec::with_capacity(1);
for (pid, claim) in &self.claims {
if *pid == consts::INSTANCE_OF {
if let ClaimValueData::Item(qid) = claim.data {
if *pid == consts::INSTANCE_OF
&& let ClaimValueData::Item(qid) = claim.data
{
instances.push(qid);
};
};
}
}
instances.shrink_to_fit();
instances
@ -211,11 +211,11 @@ impl Entity {
#[must_use]
pub fn start_time(&self) -> Option<DateTime<chrono::offset::Utc>> {
for (pid, claim) in &self.claims {
if *pid == consts::DATE_OF_BIRTH {
if let ClaimValueData::DateTime { date_time, .. } = claim.data {
if *pid == consts::DATE_OF_BIRTH
&& let ClaimValueData::DateTime { date_time, .. } = claim.data
{
return Some(date_time);
};
};
}
}
None
}
@ -224,11 +224,11 @@ impl Entity {
#[must_use]
pub fn end_time(&self) -> Option<DateTime<chrono::offset::Utc>> {
for (pid, claim) in &self.claims {
if *pid == consts::DATE_OF_DEATH {
if let ClaimValueData::DateTime { date_time, .. } = claim.data {
if *pid == consts::DATE_OF_DEATH
&& let ClaimValueData::DateTime { date_time, .. } = claim.data
{
return Some(date_time);
};
};
}
}
None
}
@ -355,7 +355,7 @@ impl Entity {
Qid::from_str(raw_id).ok()
})
.collect(),
url: obj.get("url").map(|val| val.to_string()),
url: obj.get("url").map(std::string::ToString::to_string),
},
);
}
@ -505,7 +505,6 @@ impl Entity {
/// }
/// }
/// ```
#[must_use]
pub fn pid_claims(&self, pid: Pid) -> impl Iterator<Item = &ClaimValue> {
self.claims
.iter()
@ -637,7 +636,7 @@ pub enum EntityError {
OutOfBoundsTime,
}
fn get_json_string(json: Value) -> Result<String, EntityError> {
fn get_json_string(json: &Value) -> Result<String, EntityError> {
json.as_str()
.map(ToString::to_string)
.ok_or(EntityError::ExpectedString)
@ -724,29 +723,27 @@ fn parse_wb_time(time: &str) -> Result<chrono::DateTime<chrono::offset::Utc>, En
};
let (hour, min, sec) = if time_parts.len() == 2 {
let colon_parts: Vec<&str> = time_parts[1].split(':').collect();
let hour = match colon_parts.get(0).ok_or(EntityError::MissingHour)?.parse() {
Ok(x) => x,
Err(_) => return Err(EntityError::FloatParse),
let Ok(hour) = colon_parts.first().ok_or(EntityError::MissingHour)?.parse() else {
return Err(EntityError::FloatParse);
};
let minute = match colon_parts
let Ok(minute) = colon_parts
.get(1)
.ok_or(EntityError::MissingMinute)?
.parse()
{
Ok(x) => x,
Err(_) => return Err(EntityError::FloatParse),
else {
return Err(EntityError::FloatParse);
};
let sec = match colon_parts.get(2).ok_or(EntityError::MissingSecond)?[0..2].parse() {
Ok(x) => x,
Err(_) => return Err(EntityError::FloatParse),
let Ok(sec) = colon_parts.get(2).ok_or(EntityError::MissingSecond)?[0..2].parse() else {
return Err(EntityError::FloatParse);
};
(hour, minute, sec)
} else {
(0, 0, 0)
};
Ok(date
.and_hms_opt(hour, min, sec)
.ok_or(EntityError::OutOfBoundsTime)?)
date.and_hms_opt(hour, min, sec)
.ok_or(EntityError::OutOfBoundsTime)
}
impl ClaimValueData {
@ -756,14 +753,14 @@ impl ClaimValueData {
/// If the `snak` does not correspond to a valid snak, then an error will be returned.
pub fn parse_snak(mut snak: Value) -> Result<Self, EntityError> {
let mut datavalue: Value = take_prop("datavalue", &mut snak);
let datatype: &str = &get_json_string(take_prop("datatype", &mut snak))?;
let snaktype: &str = &get_json_string(take_prop("snaktype", &mut snak))?;
let datatype: &str = &get_json_string(&take_prop("datatype", &mut snak))?;
let snaktype: &str = &get_json_string(&take_prop("snaktype", &mut snak))?;
match snaktype {
"value" => {}
"somevalue" => return Ok(ClaimValueData::UnknownValue),
"novalue" => return Ok(ClaimValueData::NoValue),
_ => return Err(EntityError::InvalidSnaktype),
};
}
let type_str = take_prop("type", &mut datavalue)
.as_str()
.ok_or(EntityError::InvalidSnaktype)?
@ -789,7 +786,7 @@ impl ClaimValueData {
}
"wikibase-entityid" => {
// the ID could be a entity, lexeme, property, form, or sense
let id = get_json_string(take_prop("id", &mut value))?;
let id = get_json_string(&take_prop("id", &mut value))?;
match id.chars().next().ok_or(EntityError::BadId)? {
'Q' => Ok(ClaimValueData::Item(Qid(id[1..]
.parse()
@ -841,7 +838,7 @@ impl ClaimValueData {
}),
// our time parsing code can't handle a few edge cases (really old years), so we
"time" => Ok(
match parse_wb_time(&get_json_string(take_prop("time", &mut value))?) {
match parse_wb_time(&get_json_string(&take_prop("time", &mut value))?) {
Ok(date_time) => ClaimValueData::DateTime {
date_time,
precision: parse_wb_number(&take_prop("precision", &mut value))
@ -852,8 +849,8 @@ impl ClaimValueData {
},
),
"monolingualtext" => Ok(ClaimValueData::MonolingualText(Text {
text: get_json_string(take_prop("text", &mut value))?,
lang: Lang(get_json_string(take_prop("language", &mut value))?),
text: get_json_string(&take_prop("text", &mut value))?,
lang: Lang(get_json_string(&take_prop("language", &mut value))?),
})),
_ => Err(EntityError::UnknownDatatype),
}
@ -880,8 +877,8 @@ impl ClaimValue {
let reference_group = reference_group.as_object()?;
let mut claims = Vec::with_capacity(reference_group["snaks"].as_array()?.len());
let snaks = reference_group["snaks"].as_object()?;
for (pid, snak_group) in snaks.iter() {
for snak in snak_group.as_array()?.iter() {
for (pid, snak_group) in snaks {
for snak in snak_group.as_array()? {
// clone, meh
let owned_snak = snak.clone().take();
if let Ok(x) = ClaimValueData::parse_snak(owned_snak) {
@ -901,11 +898,9 @@ impl ClaimValue {
let qualifiers_json = take_prop("qualifiers", &mut claim);
let qualifiers = if qualifiers_json.is_object() {
let mut v: Vec<(Pid, ClaimValueData)> = vec![];
for (pid, claim_array_json) in qualifiers_json.as_object()?.iter() {
for (pid, claim_array_json) in qualifiers_json.as_object()? {
// yep it's a clone, meh
let mut claim_array = if let Value::Array(x) = claim_array_json.clone().take() {
x
} else {
let Value::Array(mut claim_array) = claim_array_json.clone().take() else {
return None;
};
for claim in claim_array.drain(..) {
@ -931,7 +926,6 @@ impl ClaimValue {
})
}
#[must_use]
/// Returns an iterator of references to all the qualifer claim data for a property ID.
///
/// ## Example
@ -951,7 +945,6 @@ impl ClaimValue {
}
impl ReferenceGroup {
#[must_use]
/// Returns an iterator of references to all the claim data for a property ID.
///
/// ## Example
@ -967,7 +960,6 @@ impl ReferenceGroup {
/// };
/// assert_eq!(claims.next(), None);
/// ```
pub fn pid_claims(&self, pid: Pid) -> impl Iterator<Item = &ClaimValueData> {
self.claims
.iter()
@ -998,10 +990,10 @@ mod test {
// "-410000000-00-00T00:00:00Z",
];
for time in valid_times {
println!("Trying \"{}\"", time);
println!("Trying \"{time}\"");
assert!(match parse_wb_time(time) {
Ok(val) => {
println!("Got {:#?}", val);
println!("Got {val:#?}");
true
}
Err(_) => false,
@ -1014,7 +1006,7 @@ mod test {
let qid = try_get_as_qid(
&serde_json::from_str(r#""http://www.wikidata.org/entity/Q1234567""#).unwrap(),
);
assert_eq!(qid, Ok(Qid(1234567)));
assert_eq!(qid, Ok(Qid(1_234_567)));
}
#[test]

View file

@ -7,7 +7,7 @@ pub mod consts;
/// Three main types of IDs entities can have.
///
/// EntitySchemas (with E IDs) are currently unsupported.
/// `EntitySchemas` (with E IDs) are currently unsupported.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum WikiId {
@ -162,6 +162,7 @@ lexeme_subid_def!(Fid, "form ID", "F", 'F');
lexeme_subid_def!(Sid, "sense ID", "S", 'S');
#[cfg(test)]
#[allow(missing_docs)]
pub mod test {
use super::*;
@ -178,31 +179,31 @@ pub mod test {
assert_eq!(
Lid(1).json_url(),
"https://www.wikidata.org/wiki/Special:EntityData/L1.json"
)
);
}
#[test]
fn to_string() {
let entity = Qid(42);
assert_eq!(format!("{}", entity), "Q42");
assert_eq!(format!("{entity}"), "Q42");
let prop = Pid(6);
assert_eq!(format!("{}", prop), "P6");
assert_eq!(format!("{prop}"), "P6");
let lexeme = Lid(2);
assert_eq!(format!("{}", lexeme), "L2");
assert_eq!(format!("{lexeme}"), "L2");
let sense = Sid(Lid(5), 9);
assert_eq!(format!("{}", sense), "L5-S9");
assert_eq!(format!("{sense}"), "L5-S9");
let form = Fid(Lid(3), 11);
assert_eq!(format!("{}", form), "L3-F11");
assert_eq!(format!("{form}"), "L3-F11");
}
#[test]
fn from_str() {
assert_eq!(Qid::from_str("Q42").unwrap(), Qid(42));
assert_eq!(Lid::from_str("L944114").unwrap(), Lid(944114));
assert_eq!(Lid::from_str("L944114").unwrap(), Lid(944_114));
assert_eq!(Pid::from_str("P1341").unwrap(), Pid(1341));
assert_eq!(Pid::from_str("Q1341"), Err(IdParseError::InvalidPrefix));
assert_eq!(Pid::from_str("1341"), Err(IdParseError::InvalidPrefix));

View file

@ -7,13 +7,13 @@ fn id_snak() {
let j: serde_json::Value =
serde_json::from_str(include_str!("../items/Q106975887.json")).unwrap();
let snak = &j["entities"]["Q106975887"]["claims"]["P31"][0]["mainsnak"];
println!("{:?}", snak);
println!("{snak:?}");
let data = ClaimValueData::parse_snak(snak.clone()).unwrap();
assert_eq!(data, ClaimValueData::Item(Qid(5)));
let j: serde_json::Value = serde_json::from_str(include_str!("../items/Q1.json")).unwrap();
let snak = &j["entities"]["Q1"]["claims"]["P793"][0]["mainsnak"];
println!("{:?}", snak);
println!("{snak:?}");
let data = ClaimValueData::parse_snak(snak.clone()).unwrap();
assert_eq!(data, ClaimValueData::Item(Qid(323)));
}
@ -22,7 +22,7 @@ fn id_snak() {
fn commons_snak() {
let j: serde_json::Value = serde_json::from_str(include_str!("../items/Q42.json")).unwrap();
let snak = &j["entities"]["Q42"]["claims"]["P18"][0]["mainsnak"];
println!("{:?}", snak);
println!("{snak:?}");
let data = ClaimValueData::parse_snak(snak.clone()).unwrap();
assert_eq!(
data,
@ -34,7 +34,7 @@ fn commons_snak() {
fn quantity_snak() {
let j: serde_json::Value = serde_json::from_str(include_str!("../items/Q42.json")).unwrap();
let snak = &j["entities"]["Q42"]["claims"]["P2048"][0]["mainsnak"];
println!("{:?}", snak);
println!("{snak:?}");
let data = ClaimValueData::parse_snak(snak.clone()).unwrap();
assert_eq!(
data,
@ -51,7 +51,8 @@ fn quantity_snak() {
fn external_id_snak() {
let j: serde_json::Value = serde_json::from_str(include_str!("../items/Q42.json")).unwrap();
let snak = &j["entities"]["Q42"]["claims"]["P213"][0]["mainsnak"];
println!("{:?}", snak);
println!("{snak:?}");
let data = ClaimValueData::parse_snak(snak.clone()).unwrap();
assert_eq!(
data,
@ -63,7 +64,7 @@ fn external_id_snak() {
fn coordinates_snak() {
let j: serde_json::Value = serde_json::from_str(include_str!("../items/Q513.json")).unwrap();
let snak = &j["entities"]["Q513"]["claims"]["P625"][0]["mainsnak"];
println!("{:?}", snak);
println!("{snak:?}");
let data = ClaimValueData::parse_snak(snak.clone()).unwrap();
assert_eq!(
data,
@ -80,7 +81,7 @@ fn coordinates_snak() {
fn mono_text_snak() {
let j: serde_json::Value = serde_json::from_str(include_str!("../items/Q42.json")).unwrap();
let snak = &j["entities"]["Q42"]["claims"]["P1477"][0]["mainsnak"];
println!("{:?}", snak);
println!("{snak:?}");
let data = ClaimValueData::parse_snak(snak.clone()).unwrap();
assert_eq!(
data,
@ -95,10 +96,10 @@ fn mono_text_snak() {
fn date_snak() {
let j: serde_json::Value = serde_json::from_str(include_str!("../items/Q42.json")).unwrap();
let snak = &j["entities"]["Q42"]["claims"]["P569"][0]["mainsnak"];
println!("{:?}", snak);
println! {"{snak:?}"};
let data = ClaimValueData::parse_snak(snak.clone()).unwrap();
assert_eq!(
&format!("{:?}", data),
&format!("{data:?}"),
"DateTime { date_time: 1952-03-11T00:00:00Z, precision: 11 }",
);
}
@ -107,7 +108,7 @@ fn date_snak() {
fn lexeme_snak() {
let j: serde_json::Value = serde_json::from_str(include_str!("../items/Q31928.json")).unwrap();
let snak = &j["entities"]["Q31928"]["claims"]["P6254"][0]["mainsnak"];
println!("{:?}", snak);
println!("{snak:?}");
let data = ClaimValueData::parse_snak(snak.clone()).unwrap();
assert_eq!(data, ClaimValueData::Lexeme(Lid(361)));
}