mirror of
https://github.com/NexVeridian/wikidata-to-surrealdb.git
synced 2025-09-02 09:59:13 +00:00
docker
This commit is contained in:
parent
44b66d43c1
commit
8b3311ff21
6 changed files with 65 additions and 12 deletions
|
@ -7,4 +7,5 @@
|
||||||
Cargo.lock
|
Cargo.lock
|
||||||
Dockerfile
|
Dockerfile
|
||||||
docker-compose.yml
|
docker-compose.yml
|
||||||
|
docker-compose.dev.yml
|
||||||
*.md
|
*.md
|
||||||
|
|
|
@ -6,10 +6,10 @@ license = "MIT OR Apache-2.0"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
dotenv_codegen = "0.15"
|
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
surrealdb = "1.0"
|
surrealdb = "1.0"
|
||||||
tokio = "1.35"
|
tokio = "1.35"
|
||||||
wikidata = "0.3.1"
|
wikidata = "0.3.1"
|
||||||
bzip2 = { version = "0.4", features = ["tokio"] }
|
bzip2 = { version = "0.4", features = ["tokio"] }
|
||||||
|
lazy_static = "1.4"
|
||||||
|
|
|
@ -17,14 +17,31 @@ services:
|
||||||
- 8000:8000
|
- 8000:8000
|
||||||
volumes:
|
volumes:
|
||||||
- ./data:/data
|
- ./data:/data
|
||||||
|
networks:
|
||||||
|
- surrealdb
|
||||||
|
|
||||||
wikidata-to-surrealdb:
|
wikidata-to-surrealdb:
|
||||||
container_name: wikidata-to-surrealdb
|
container_name: wikidata-to-surrealdb
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
restart: unless-stopped
|
env_file:
|
||||||
|
- .env
|
||||||
|
environment:
|
||||||
|
- DB_USER=$DB_USER
|
||||||
|
- DB_PASSWORD=$DB_PASSWORD
|
||||||
|
- WIKIDATA_LANG=$WIKIDATA_LANG
|
||||||
|
- FILE_FORMAT=$FILE_FORMAT
|
||||||
|
- FILE_NAME=$FILE_NAME
|
||||||
|
restart: no
|
||||||
|
depends_on:
|
||||||
|
- surrealdb
|
||||||
volumes:
|
volumes:
|
||||||
- ./data:/data
|
- ./data:/data
|
||||||
|
networks:
|
||||||
|
- surrealdb
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
data:
|
data:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
surrealdb:
|
||||||
|
|
|
@ -17,13 +17,30 @@ services:
|
||||||
- 8000:8000
|
- 8000:8000
|
||||||
volumes:
|
volumes:
|
||||||
- ./data:/data
|
- ./data:/data
|
||||||
|
networks:
|
||||||
|
- surrealdb
|
||||||
|
|
||||||
wikidata-to-surrealdb:
|
wikidata-to-surrealdb:
|
||||||
container_name: wikidata-to-surrealdb
|
container_name: wikidata-to-surrealdb
|
||||||
image: ghcr.io/nexveridian/ark-invest-api-rust-data:latest
|
image: ghcr.io/nexveridian/ark-invest-api-rust-data:latest
|
||||||
restart: unless-stopped
|
env_file:
|
||||||
|
- .env
|
||||||
|
environment:
|
||||||
|
- DB_USER=$DB_USER
|
||||||
|
- DB_PASSWORD=$DB_PASSWORD
|
||||||
|
- WIKIDATA_LANG=$WIKIDATA_LANG
|
||||||
|
- FILE_FORMAT=$FILE_FORMAT
|
||||||
|
- FILE_NAME=$FILE_NAME
|
||||||
|
restart: no
|
||||||
|
depends_on:
|
||||||
|
- surrealdb
|
||||||
volumes:
|
volumes:
|
||||||
- ./data:/data
|
- ./data:/data
|
||||||
|
networks:
|
||||||
|
- surrealdb
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
data:
|
data:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
surrealdb:
|
||||||
|
|
23
src/main.rs
23
src/main.rs
|
@ -1,15 +1,26 @@
|
||||||
use anyhow::{Error, Ok, Result};
|
use anyhow::{Error, Ok, Result};
|
||||||
use bzip2::read::MultiBzDecoder;
|
use bzip2::read::MultiBzDecoder;
|
||||||
use dotenv_codegen::dotenv;
|
use lazy_static::lazy_static;
|
||||||
use serde_json::{from_str, Value};
|
use serde_json::{from_str, Value};
|
||||||
use std::fs::File;
|
use std::{
|
||||||
use std::io::{BufRead, BufReader};
|
env,
|
||||||
|
fs::File,
|
||||||
|
io::{BufRead, BufReader},
|
||||||
|
};
|
||||||
use surrealdb::{engine::remote::ws::Ws, opt::auth::Root, Surreal};
|
use surrealdb::{engine::remote::ws::Ws, opt::auth::Root, Surreal};
|
||||||
use wikidata::Entity;
|
use wikidata::Entity;
|
||||||
|
|
||||||
mod utils;
|
mod utils;
|
||||||
use utils::*;
|
use utils::*;
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
#[derive(Debug)]
|
||||||
|
static ref DB_USER: String = env::var("DB_USER").expect("DB_USER not set");
|
||||||
|
static ref DB_PASSWORD: String = env::var("DB_PASSWORD").expect("DB_PASSWORD not set");
|
||||||
|
static ref FILE_FORMAT: String = env::var("FILE_FORMAT").expect("FILE_FORMAT not set");
|
||||||
|
static ref FILE_NAME: String = env::var("FILE_NAME").expect("FILE_NAME not set");
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(non_camel_case_types)]
|
#[allow(non_camel_case_types)]
|
||||||
enum File_Format {
|
enum File_Format {
|
||||||
json,
|
json,
|
||||||
|
@ -36,13 +47,13 @@ impl File_Format {
|
||||||
async fn main() -> Result<(), Error> {
|
async fn main() -> Result<(), Error> {
|
||||||
let db = Surreal::new::<Ws>("0.0.0.0:8000").await?;
|
let db = Surreal::new::<Ws>("0.0.0.0:8000").await?;
|
||||||
db.signin(Root {
|
db.signin(Root {
|
||||||
username: dotenv!("DB_USER"),
|
username: &DB_USER,
|
||||||
password: dotenv!("DB_PASSWORD"),
|
password: &DB_PASSWORD,
|
||||||
})
|
})
|
||||||
.await?;
|
.await?;
|
||||||
db.use_ns("wikidata").use_db("wikidata").await?;
|
db.use_ns("wikidata").use_db("wikidata").await?;
|
||||||
|
|
||||||
let reader = File_Format::new(dotenv!("FILE_FORMAT")).reader(dotenv!("FILE_NAME"))?;
|
let reader = File_Format::new(&FILE_FORMAT).reader(&FILE_NAME)?;
|
||||||
|
|
||||||
for line in reader.lines() {
|
for line in reader.lines() {
|
||||||
let line = line?.trim().trim_end_matches(',').to_string();
|
let line = line?.trim().trim_end_matches(',').to_string();
|
||||||
|
|
13
src/utils.rs
13
src/utils.rs
|
@ -1,8 +1,15 @@
|
||||||
use dotenv_codegen::dotenv;
|
use lazy_static::lazy_static;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::env;
|
||||||
use surrealdb::sql::Thing;
|
use surrealdb::sql::Thing;
|
||||||
use wikidata::{ClaimValue, ClaimValueData, Entity, Lang, Pid, WikiId};
|
use wikidata::{ClaimValue, ClaimValueData, Entity, Lang, Pid, WikiId};
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref WIKIDATA_LANG: String = env::var("WIKIDATA_LANG")
|
||||||
|
.expect("WIKIDATA_LANG not set")
|
||||||
|
.to_string();
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
pub enum ClaimData {
|
pub enum ClaimData {
|
||||||
Thing(Thing),
|
Thing(Thing),
|
||||||
|
@ -117,7 +124,7 @@ fn get_id_entity(entity: &Entity) -> Thing {
|
||||||
fn get_name(entity: &Entity) -> String {
|
fn get_name(entity: &Entity) -> String {
|
||||||
entity
|
entity
|
||||||
.labels
|
.labels
|
||||||
.get(&Lang(dotenv!("WIKIDATA_LANG").to_string()))
|
.get(&Lang(WIKIDATA_LANG.to_string()))
|
||||||
.expect("No label found")
|
.expect("No label found")
|
||||||
.to_string()
|
.to_string()
|
||||||
}
|
}
|
||||||
|
@ -125,6 +132,6 @@ fn get_name(entity: &Entity) -> String {
|
||||||
fn get_description(entity: &Entity) -> Option<String> {
|
fn get_description(entity: &Entity) -> Option<String> {
|
||||||
entity
|
entity
|
||||||
.descriptions
|
.descriptions
|
||||||
.get(&Lang(dotenv!("WIKIDATA_LANG").to_string()))
|
.get(&Lang(WIKIDATA_LANG.to_string()))
|
||||||
.cloned()
|
.cloned()
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue