mirror of
https://github.com/NexVeridian/wikidata-to-surrealdb.git
synced 2025-09-02 01:49:13 +00:00
readme
This commit is contained in:
parent
7bf0033970
commit
44b66d43c1
7 changed files with 164 additions and 5 deletions
|
@ -2,6 +2,25 @@
|
|||
- Make sure the test pass
|
||||
- Run `cargo clippy --fix --allow-dirty`
|
||||
|
||||
# Dev Install
|
||||
## Dev Containers
|
||||
Install docker, vscode and the [Dev Containers Extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
|
||||
|
||||
`git clone`
|
||||
|
||||
`Ctrl+Shift+P` **Dev Containers: Open Folder in Container**
|
||||
|
||||
Run code with `F5` or `cargo run`
|
||||
|
||||
Run tests with `cargo t`
|
||||
|
||||
## Docker Compose
|
||||
`git clone`
|
||||
|
||||
`docker compose -f docker-compose.dev.yml build && docker compose -f docker-compose.dev.yml up`
|
||||
|
||||
Remove the cargo cache for buildkit with `docker builder prune --filter type=exec.cachemount`
|
||||
|
||||
# License
|
||||
All code in this repository is dual-licensed under either [License-MIT](./LICENSE-MIT) or [LICENSE-APACHE](./LICENSE-Apache) at your option. This means you can select the license you prefer.
|
||||
|
||||
|
|
27
DockerFile
Normal file
27
DockerFile
Normal file
|
@ -0,0 +1,27 @@
|
|||
FROM rust:bookworm AS builder
|
||||
|
||||
RUN apt-get update && \
|
||||
apt install -y musl-tools musl-dev libssl-dev clang mold
|
||||
|
||||
# RUN curl -LsSf https://get.nexte.st/latest/linux | tar zxf - -C ${CARGO_HOME:-~/.cargo}/bin
|
||||
RUN curl -LsSf https://get.nexte.st/latest/linux | tar zxf - -C /usr/local/bin
|
||||
# RUN cargo install cargo-nextest --locked
|
||||
|
||||
WORKDIR /wikidata-to-surrealdb
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN rustup target add x86_64-unknown-linux-musl && rustup update && cargo update
|
||||
|
||||
RUN --mount=type=cache,target=/usr/local/cargo,from=rust,source=/usr/local/cargo \
|
||||
--mount=type=cache,target=./target \
|
||||
cargo build --target x86_64-unknown-linux-musl --release && \
|
||||
cp ./target/target/x86_64-unknown-linux-musl/release/wikidata-to-surrealdb .
|
||||
|
||||
FROM alpine:latest AS main
|
||||
|
||||
WORKDIR /wikidata-to-surrealdb
|
||||
|
||||
COPY --from=builder wikidata-to-surrealdb/wikidata-to-surrealdb .
|
||||
|
||||
CMD ["./wikidata-to-surrealdb"]
|
77
README.md
77
README.md
|
@ -1,8 +1,77 @@
|
|||
# Similar Libraries
|
||||
A tool for converting Wikidata dumps to a [SurrealDB](https://surrealdb.com/) database. Either From a bz2 or json file format.
|
||||
|
||||
# Getting The Data
|
||||
https://www.wikidata.org/wiki/Wikidata:Data_access
|
||||
|
||||
## From bz2 file (Recommended) ~80GB
|
||||
### Dump: [Docs](https://www.wikidata.org/wiki/Wikidata:Database_download)
|
||||
### [Download - latest-all.json.bz2](https://dumps.wikimedia.org/wikidatawiki/entities/latest-all.json.bz2)
|
||||
|
||||
## From json file
|
||||
### Linked Data Interface: [Docs](https://www.wikidata.org/wiki/Wikidata:Data_access#Linked_Data_Interface_(URI))
|
||||
```
|
||||
https://www.wikidata.org/wiki/Special:EntityData/Q60746544.json
|
||||
https://www.wikidata.org/wiki/Special:EntityData/P527.json
|
||||
```
|
||||
|
||||
# Example .env
|
||||
```
|
||||
DB_USER=root
|
||||
DB_PASSWORD=root
|
||||
WIKIDATA_LANG=en
|
||||
FILE_FORMAT=bz2
|
||||
FILE_NAME=data/latest-all.json.bz2
|
||||
```
|
||||
|
||||
# How to Query
|
||||
## See [Useful queries.md](./Useful%20queries.md)
|
||||
|
||||
# Table Layout
|
||||
## Thing
|
||||
```rust
|
||||
pub struct Thing {
|
||||
pub table: String,
|
||||
pub id: Id,
|
||||
}
|
||||
```
|
||||
|
||||
## Table: Entity, Property, Lexeme
|
||||
```rust
|
||||
pub struct EntityMini {
|
||||
pub id: Option<Thing>,
|
||||
pub label: String,
|
||||
pub claims: Thing,
|
||||
pub description: String,
|
||||
}
|
||||
```
|
||||
|
||||
## Table: Claims
|
||||
```rust
|
||||
pub struct Claims {
|
||||
pub id: Option<Thing>,
|
||||
pub claims: Vec<Claim>,
|
||||
}
|
||||
```
|
||||
|
||||
## Table: Claim
|
||||
```rust
|
||||
pub struct Claim {
|
||||
pub id: Thing,
|
||||
pub value: ClaimData,
|
||||
}
|
||||
```
|
||||
|
||||
## ClaimData
|
||||
```rust
|
||||
pub enum ClaimData {
|
||||
Thing(Thing),
|
||||
ClaimValueData(ClaimValueData),
|
||||
}
|
||||
```
|
||||
|
||||
# Similar Projects
|
||||
- [wd2duckdb](https://github.com/weso/wd2duckdb)
|
||||
- [wd2sql](https://github.com/p-e-w/wd2sql)
|
||||
|
||||
# License
|
||||
All code in this repository is dual-licensed under either [License-MIT](./LICENSE-MIT) or [LICENSE-APACHE](./LICENSE-Apache) at your option. This means you can select the license you prefer.
|
||||
|
||||
[Why dual license](https://github.com/bevyengine/bevy/issues/2373)
|
||||
All code in this repository is dual-licensed under either [License-MIT](./LICENSE-MIT) or [LICENSE-APACHE](./LICENSE-Apache) at your option. This means you can select the license you prefer. [Why dual license](https://github.com/bevyengine/bevy/issues/2373).
|
||||
|
|
|
@ -1,11 +1,15 @@
|
|||
# Get number of episodes
|
||||
```
|
||||
let $number_of_episodes = (select claims.claims[where id = Property:1113][0].value.ClaimValueData.Quantity.amount as number_of_episodes from Entity where label = "Black Clover, season 1")[0].number_of_episodes;
|
||||
|
||||
return $number_of_episodes[0].number_of_episodes;
|
||||
|
||||
update Entity SET number_of_episodes=$number_of_episodes where label = "Black Clover, season 1";
|
||||
```
|
||||
|
||||
# Get Parts
|
||||
```
|
||||
let $parts = (select claims.claims[where id = Property:527].value.Thing as parts from Entity where label = "Black Clover")[0].parts;
|
||||
|
||||
return $parts;
|
||||
```
|
||||
|
|
30
docker-compose.dev.yml
Normal file
30
docker-compose.dev.yml
Normal file
|
@ -0,0 +1,30 @@
|
|||
version: "3"
|
||||
services:
|
||||
surrealdb:
|
||||
container_name: surrealdb
|
||||
image: surrealdb/surrealdb:latest
|
||||
env_file:
|
||||
- .env
|
||||
entrypoint:
|
||||
- /surreal
|
||||
- start
|
||||
- --user
|
||||
- $DB_USER
|
||||
- --pass
|
||||
- $DB_PASSWORD
|
||||
- file:/data/surrealdb
|
||||
ports:
|
||||
- 8000:8000
|
||||
volumes:
|
||||
- ./data:/data
|
||||
|
||||
wikidata-to-surrealdb:
|
||||
container_name: wikidata-to-surrealdb
|
||||
build:
|
||||
context: .
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- ./data:/data
|
||||
|
||||
volumes:
|
||||
data:
|
|
@ -1,8 +1,8 @@
|
|||
version: "3"
|
||||
services:
|
||||
surrealdb:
|
||||
image: surrealdb/surrealdb:latest
|
||||
container_name: surrealdb
|
||||
image: surrealdb/surrealdb:latest
|
||||
env_file:
|
||||
- .env
|
||||
entrypoint:
|
||||
|
@ -18,5 +18,12 @@ services:
|
|||
volumes:
|
||||
- ./data:/data
|
||||
|
||||
wikidata-to-surrealdb:
|
||||
container_name: wikidata-to-surrealdb
|
||||
image: ghcr.io/nexveridian/ark-invest-api-rust-data:latest
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- ./data:/data
|
||||
|
||||
volumes:
|
||||
data:
|
||||
|
|
|
@ -31,18 +31,21 @@ impl ClaimData {
|
|||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct Claims {
|
||||
// Table: Claims
|
||||
pub id: Option<Thing>,
|
||||
pub claims: Vec<Claim>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct Claim {
|
||||
// Table: Claim
|
||||
pub id: Thing,
|
||||
pub value: ClaimData,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct EntityMini {
|
||||
// Table: Entity, Property, Lexeme
|
||||
pub id: Option<Thing>,
|
||||
pub label: String,
|
||||
pub claims: Thing,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue