mirror of
https://github.com/NexVeridian/wikidata-to-surrealdb.git
synced 2025-09-02 09:59:13 +00:00
readme
This commit is contained in:
parent
7bf0033970
commit
44b66d43c1
7 changed files with 164 additions and 5 deletions
|
@ -2,6 +2,25 @@
|
||||||
- Make sure the test pass
|
- Make sure the test pass
|
||||||
- Run `cargo clippy --fix --allow-dirty`
|
- Run `cargo clippy --fix --allow-dirty`
|
||||||
|
|
||||||
|
# Dev Install
|
||||||
|
## Dev Containers
|
||||||
|
Install docker, vscode and the [Dev Containers Extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
|
||||||
|
|
||||||
|
`git clone`
|
||||||
|
|
||||||
|
`Ctrl+Shift+P` **Dev Containers: Open Folder in Container**
|
||||||
|
|
||||||
|
Run code with `F5` or `cargo run`
|
||||||
|
|
||||||
|
Run tests with `cargo t`
|
||||||
|
|
||||||
|
## Docker Compose
|
||||||
|
`git clone`
|
||||||
|
|
||||||
|
`docker compose -f docker-compose.dev.yml build && docker compose -f docker-compose.dev.yml up`
|
||||||
|
|
||||||
|
Remove the cargo cache for buildkit with `docker builder prune --filter type=exec.cachemount`
|
||||||
|
|
||||||
# License
|
# License
|
||||||
All code in this repository is dual-licensed under either [License-MIT](./LICENSE-MIT) or [LICENSE-APACHE](./LICENSE-Apache) at your option. This means you can select the license you prefer.
|
All code in this repository is dual-licensed under either [License-MIT](./LICENSE-MIT) or [LICENSE-APACHE](./LICENSE-Apache) at your option. This means you can select the license you prefer.
|
||||||
|
|
||||||
|
|
27
DockerFile
Normal file
27
DockerFile
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
FROM rust:bookworm AS builder
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt install -y musl-tools musl-dev libssl-dev clang mold
|
||||||
|
|
||||||
|
# RUN curl -LsSf https://get.nexte.st/latest/linux | tar zxf - -C ${CARGO_HOME:-~/.cargo}/bin
|
||||||
|
RUN curl -LsSf https://get.nexte.st/latest/linux | tar zxf - -C /usr/local/bin
|
||||||
|
# RUN cargo install cargo-nextest --locked
|
||||||
|
|
||||||
|
WORKDIR /wikidata-to-surrealdb
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN rustup target add x86_64-unknown-linux-musl && rustup update && cargo update
|
||||||
|
|
||||||
|
RUN --mount=type=cache,target=/usr/local/cargo,from=rust,source=/usr/local/cargo \
|
||||||
|
--mount=type=cache,target=./target \
|
||||||
|
cargo build --target x86_64-unknown-linux-musl --release && \
|
||||||
|
cp ./target/target/x86_64-unknown-linux-musl/release/wikidata-to-surrealdb .
|
||||||
|
|
||||||
|
FROM alpine:latest AS main
|
||||||
|
|
||||||
|
WORKDIR /wikidata-to-surrealdb
|
||||||
|
|
||||||
|
COPY --from=builder wikidata-to-surrealdb/wikidata-to-surrealdb .
|
||||||
|
|
||||||
|
CMD ["./wikidata-to-surrealdb"]
|
77
README.md
77
README.md
|
@ -1,8 +1,77 @@
|
||||||
# Similar Libraries
|
A tool for converting Wikidata dumps to a [SurrealDB](https://surrealdb.com/) database. Either From a bz2 or json file format.
|
||||||
|
|
||||||
|
# Getting The Data
|
||||||
|
https://www.wikidata.org/wiki/Wikidata:Data_access
|
||||||
|
|
||||||
|
## From bz2 file (Recommended) ~80GB
|
||||||
|
### Dump: [Docs](https://www.wikidata.org/wiki/Wikidata:Database_download)
|
||||||
|
### [Download - latest-all.json.bz2](https://dumps.wikimedia.org/wikidatawiki/entities/latest-all.json.bz2)
|
||||||
|
|
||||||
|
## From json file
|
||||||
|
### Linked Data Interface: [Docs](https://www.wikidata.org/wiki/Wikidata:Data_access#Linked_Data_Interface_(URI))
|
||||||
|
```
|
||||||
|
https://www.wikidata.org/wiki/Special:EntityData/Q60746544.json
|
||||||
|
https://www.wikidata.org/wiki/Special:EntityData/P527.json
|
||||||
|
```
|
||||||
|
|
||||||
|
# Example .env
|
||||||
|
```
|
||||||
|
DB_USER=root
|
||||||
|
DB_PASSWORD=root
|
||||||
|
WIKIDATA_LANG=en
|
||||||
|
FILE_FORMAT=bz2
|
||||||
|
FILE_NAME=data/latest-all.json.bz2
|
||||||
|
```
|
||||||
|
|
||||||
|
# How to Query
|
||||||
|
## See [Useful queries.md](./Useful%20queries.md)
|
||||||
|
|
||||||
|
# Table Layout
|
||||||
|
## Thing
|
||||||
|
```rust
|
||||||
|
pub struct Thing {
|
||||||
|
pub table: String,
|
||||||
|
pub id: Id,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Table: Entity, Property, Lexeme
|
||||||
|
```rust
|
||||||
|
pub struct EntityMini {
|
||||||
|
pub id: Option<Thing>,
|
||||||
|
pub label: String,
|
||||||
|
pub claims: Thing,
|
||||||
|
pub description: String,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Table: Claims
|
||||||
|
```rust
|
||||||
|
pub struct Claims {
|
||||||
|
pub id: Option<Thing>,
|
||||||
|
pub claims: Vec<Claim>,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Table: Claim
|
||||||
|
```rust
|
||||||
|
pub struct Claim {
|
||||||
|
pub id: Thing,
|
||||||
|
pub value: ClaimData,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## ClaimData
|
||||||
|
```rust
|
||||||
|
pub enum ClaimData {
|
||||||
|
Thing(Thing),
|
||||||
|
ClaimValueData(ClaimValueData),
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
# Similar Projects
|
||||||
- [wd2duckdb](https://github.com/weso/wd2duckdb)
|
- [wd2duckdb](https://github.com/weso/wd2duckdb)
|
||||||
- [wd2sql](https://github.com/p-e-w/wd2sql)
|
- [wd2sql](https://github.com/p-e-w/wd2sql)
|
||||||
|
|
||||||
# License
|
# License
|
||||||
All code in this repository is dual-licensed under either [License-MIT](./LICENSE-MIT) or [LICENSE-APACHE](./LICENSE-Apache) at your option. This means you can select the license you prefer.
|
All code in this repository is dual-licensed under either [License-MIT](./LICENSE-MIT) or [LICENSE-APACHE](./LICENSE-Apache) at your option. This means you can select the license you prefer. [Why dual license](https://github.com/bevyengine/bevy/issues/2373).
|
||||||
|
|
||||||
[Why dual license](https://github.com/bevyengine/bevy/issues/2373)
|
|
||||||
|
|
|
@ -1,11 +1,15 @@
|
||||||
# Get number of episodes
|
# Get number of episodes
|
||||||
|
```
|
||||||
let $number_of_episodes = (select claims.claims[where id = Property:1113][0].value.ClaimValueData.Quantity.amount as number_of_episodes from Entity where label = "Black Clover, season 1")[0].number_of_episodes;
|
let $number_of_episodes = (select claims.claims[where id = Property:1113][0].value.ClaimValueData.Quantity.amount as number_of_episodes from Entity where label = "Black Clover, season 1")[0].number_of_episodes;
|
||||||
|
|
||||||
return $number_of_episodes[0].number_of_episodes;
|
return $number_of_episodes[0].number_of_episodes;
|
||||||
|
|
||||||
update Entity SET number_of_episodes=$number_of_episodes where label = "Black Clover, season 1";
|
update Entity SET number_of_episodes=$number_of_episodes where label = "Black Clover, season 1";
|
||||||
|
```
|
||||||
|
|
||||||
# Get Parts
|
# Get Parts
|
||||||
|
```
|
||||||
let $parts = (select claims.claims[where id = Property:527].value.Thing as parts from Entity where label = "Black Clover")[0].parts;
|
let $parts = (select claims.claims[where id = Property:527].value.Thing as parts from Entity where label = "Black Clover")[0].parts;
|
||||||
|
|
||||||
return $parts;
|
return $parts;
|
||||||
|
```
|
||||||
|
|
30
docker-compose.dev.yml
Normal file
30
docker-compose.dev.yml
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
version: "3"
|
||||||
|
services:
|
||||||
|
surrealdb:
|
||||||
|
container_name: surrealdb
|
||||||
|
image: surrealdb/surrealdb:latest
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
entrypoint:
|
||||||
|
- /surreal
|
||||||
|
- start
|
||||||
|
- --user
|
||||||
|
- $DB_USER
|
||||||
|
- --pass
|
||||||
|
- $DB_PASSWORD
|
||||||
|
- file:/data/surrealdb
|
||||||
|
ports:
|
||||||
|
- 8000:8000
|
||||||
|
volumes:
|
||||||
|
- ./data:/data
|
||||||
|
|
||||||
|
wikidata-to-surrealdb:
|
||||||
|
container_name: wikidata-to-surrealdb
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
restart: unless-stopped
|
||||||
|
volumes:
|
||||||
|
- ./data:/data
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
data:
|
|
@ -1,8 +1,8 @@
|
||||||
version: "3"
|
version: "3"
|
||||||
services:
|
services:
|
||||||
surrealdb:
|
surrealdb:
|
||||||
image: surrealdb/surrealdb:latest
|
|
||||||
container_name: surrealdb
|
container_name: surrealdb
|
||||||
|
image: surrealdb/surrealdb:latest
|
||||||
env_file:
|
env_file:
|
||||||
- .env
|
- .env
|
||||||
entrypoint:
|
entrypoint:
|
||||||
|
@ -18,5 +18,12 @@ services:
|
||||||
volumes:
|
volumes:
|
||||||
- ./data:/data
|
- ./data:/data
|
||||||
|
|
||||||
|
wikidata-to-surrealdb:
|
||||||
|
container_name: wikidata-to-surrealdb
|
||||||
|
image: ghcr.io/nexveridian/ark-invest-api-rust-data:latest
|
||||||
|
restart: unless-stopped
|
||||||
|
volumes:
|
||||||
|
- ./data:/data
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
data:
|
data:
|
||||||
|
|
|
@ -31,18 +31,21 @@ impl ClaimData {
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct Claims {
|
pub struct Claims {
|
||||||
|
// Table: Claims
|
||||||
pub id: Option<Thing>,
|
pub id: Option<Thing>,
|
||||||
pub claims: Vec<Claim>,
|
pub claims: Vec<Claim>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct Claim {
|
pub struct Claim {
|
||||||
|
// Table: Claim
|
||||||
pub id: Thing,
|
pub id: Thing,
|
||||||
pub value: ClaimData,
|
pub value: ClaimData,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct EntityMini {
|
pub struct EntityMini {
|
||||||
|
// Table: Entity, Property, Lexeme
|
||||||
pub id: Option<Thing>,
|
pub id: Option<Thing>,
|
||||||
pub label: String,
|
pub label: String,
|
||||||
pub claims: Thing,
|
pub claims: Thing,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue