This commit is contained in:
Elijah McMorris 2023-12-15 00:45:08 -08:00
parent 44b66d43c1
commit 8b3311ff21
Signed by: NexVeridian
SSH key fingerprint: SHA256:bsA1SKZxuEcEVHAy3gY1HUeM5ykRJl0U0kQHQn0hMg8
6 changed files with 65 additions and 12 deletions

View file

@ -7,4 +7,5 @@
Cargo.lock Cargo.lock
Dockerfile Dockerfile
docker-compose.yml docker-compose.yml
docker-compose.dev.yml
*.md *.md

View file

@ -6,10 +6,10 @@ license = "MIT OR Apache-2.0"
[dependencies] [dependencies]
anyhow = "1.0" anyhow = "1.0"
dotenv_codegen = "0.15"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" serde_json = "1.0"
surrealdb = "1.0" surrealdb = "1.0"
tokio = "1.35" tokio = "1.35"
wikidata = "0.3.1" wikidata = "0.3.1"
bzip2 = { version = "0.4", features = ["tokio"] } bzip2 = { version = "0.4", features = ["tokio"] }
lazy_static = "1.4"

View file

@ -17,14 +17,31 @@ services:
- 8000:8000 - 8000:8000
volumes: volumes:
- ./data:/data - ./data:/data
networks:
- surrealdb
wikidata-to-surrealdb: wikidata-to-surrealdb:
container_name: wikidata-to-surrealdb container_name: wikidata-to-surrealdb
build: build:
context: . context: .
restart: unless-stopped env_file:
- .env
environment:
- DB_USER=$DB_USER
- DB_PASSWORD=$DB_PASSWORD
- WIKIDATA_LANG=$WIKIDATA_LANG
- FILE_FORMAT=$FILE_FORMAT
- FILE_NAME=$FILE_NAME
restart: no
depends_on:
- surrealdb
volumes: volumes:
- ./data:/data - ./data:/data
networks:
- surrealdb
volumes: volumes:
data: data:
networks:
surrealdb:

View file

@ -17,13 +17,30 @@ services:
- 8000:8000 - 8000:8000
volumes: volumes:
- ./data:/data - ./data:/data
networks:
- surrealdb
wikidata-to-surrealdb: wikidata-to-surrealdb:
container_name: wikidata-to-surrealdb container_name: wikidata-to-surrealdb
image: ghcr.io/nexveridian/ark-invest-api-rust-data:latest image: ghcr.io/nexveridian/ark-invest-api-rust-data:latest
restart: unless-stopped env_file:
- .env
environment:
- DB_USER=$DB_USER
- DB_PASSWORD=$DB_PASSWORD
- WIKIDATA_LANG=$WIKIDATA_LANG
- FILE_FORMAT=$FILE_FORMAT
- FILE_NAME=$FILE_NAME
restart: no
depends_on:
- surrealdb
volumes: volumes:
- ./data:/data - ./data:/data
networks:
- surrealdb
volumes: volumes:
data: data:
networks:
surrealdb:

View file

@ -1,15 +1,26 @@
use anyhow::{Error, Ok, Result}; use anyhow::{Error, Ok, Result};
use bzip2::read::MultiBzDecoder; use bzip2::read::MultiBzDecoder;
use dotenv_codegen::dotenv; use lazy_static::lazy_static;
use serde_json::{from_str, Value}; use serde_json::{from_str, Value};
use std::fs::File; use std::{
use std::io::{BufRead, BufReader}; env,
fs::File,
io::{BufRead, BufReader},
};
use surrealdb::{engine::remote::ws::Ws, opt::auth::Root, Surreal}; use surrealdb::{engine::remote::ws::Ws, opt::auth::Root, Surreal};
use wikidata::Entity; use wikidata::Entity;
mod utils; mod utils;
use utils::*; use utils::*;
lazy_static! {
#[derive(Debug)]
static ref DB_USER: String = env::var("DB_USER").expect("DB_USER not set");
static ref DB_PASSWORD: String = env::var("DB_PASSWORD").expect("DB_PASSWORD not set");
static ref FILE_FORMAT: String = env::var("FILE_FORMAT").expect("FILE_FORMAT not set");
static ref FILE_NAME: String = env::var("FILE_NAME").expect("FILE_NAME not set");
}
#[allow(non_camel_case_types)] #[allow(non_camel_case_types)]
enum File_Format { enum File_Format {
json, json,
@ -36,13 +47,13 @@ impl File_Format {
async fn main() -> Result<(), Error> { async fn main() -> Result<(), Error> {
let db = Surreal::new::<Ws>("0.0.0.0:8000").await?; let db = Surreal::new::<Ws>("0.0.0.0:8000").await?;
db.signin(Root { db.signin(Root {
username: dotenv!("DB_USER"), username: &DB_USER,
password: dotenv!("DB_PASSWORD"), password: &DB_PASSWORD,
}) })
.await?; .await?;
db.use_ns("wikidata").use_db("wikidata").await?; db.use_ns("wikidata").use_db("wikidata").await?;
let reader = File_Format::new(dotenv!("FILE_FORMAT")).reader(dotenv!("FILE_NAME"))?; let reader = File_Format::new(&FILE_FORMAT).reader(&FILE_NAME)?;
for line in reader.lines() { for line in reader.lines() {
let line = line?.trim().trim_end_matches(',').to_string(); let line = line?.trim().trim_end_matches(',').to_string();

View file

@ -1,8 +1,15 @@
use dotenv_codegen::dotenv; use lazy_static::lazy_static;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::env;
use surrealdb::sql::Thing; use surrealdb::sql::Thing;
use wikidata::{ClaimValue, ClaimValueData, Entity, Lang, Pid, WikiId}; use wikidata::{ClaimValue, ClaimValueData, Entity, Lang, Pid, WikiId};
lazy_static! {
static ref WIKIDATA_LANG: String = env::var("WIKIDATA_LANG")
.expect("WIKIDATA_LANG not set")
.to_string();
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum ClaimData { pub enum ClaimData {
Thing(Thing), Thing(Thing),
@ -117,7 +124,7 @@ fn get_id_entity(entity: &Entity) -> Thing {
fn get_name(entity: &Entity) -> String { fn get_name(entity: &Entity) -> String {
entity entity
.labels .labels
.get(&Lang(dotenv!("WIKIDATA_LANG").to_string())) .get(&Lang(WIKIDATA_LANG.to_string()))
.expect("No label found") .expect("No label found")
.to_string() .to_string()
} }
@ -125,6 +132,6 @@ fn get_name(entity: &Entity) -> String {
fn get_description(entity: &Entity) -> Option<String> { fn get_description(entity: &Entity) -> Option<String> {
entity entity
.descriptions .descriptions
.get(&Lang(dotenv!("WIKIDATA_LANG").to_string())) .get(&Lang(WIKIDATA_LANG.to_string()))
.cloned() .cloned()
} }