diff --git a/.dockerignore b/.dockerignore index 729d6d6..4fcb122 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,5 +1,6 @@ /.devcontainer /.vscode +/data /target .dockerignore Cargo.lock diff --git a/README.md b/README.md index 1186fc9..ea95519 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Fetches and caches ETF data daily, from csv download or api, and saves the data Not affiliated with Ark Invest -# Install for csv download +# Install Copy docker-compose.yml Create data folder next to docker-compose.yml @@ -16,6 +16,26 @@ Create data folder next to docker-compose.yml `docker compose up --pull always` +# Changing the data source +In docker-compose.yml, change the data source by changing the environment variable +``` +environment: + - ARK_SOURCE=ApiIncremental +``` +Env string ARK_SOURCE must be in the enum Source +``` +pub enum Source { + // Reads Parquet file if exists + Read, + // From ARK Invest + Ark, + // From api.NexVeridian.com (Default) + ApiIncremental, + // From api.NexVeridian.com, not usually nessisary, use ApiIncremental + ApiFull, +} +``` + # Dev Install ## Dev Containers Install docker, vscode and the [Dev Containers Extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) @@ -31,15 +51,7 @@ Run tests with `cargo t` ## Docker Compose `git clone` -`docker compose build && docker compose up` +`docker compose -f docker-compose.dev.yml build && docker compose -f docker-compose.dev.yml up` Remove the cargo cache for buildkit with `docker builder prune --filter type=exec.cachemount` -# Install for api -`git clone` - -in main.rs change `Source::Ark` to `Source::ApiIncremental` or `Source::ApiFull` for first run - -in docker-compose.yml remove this line`image: ghcr.io/NexVeridian/ark-invest-api-rust-data:latest` - -uncomment everything else diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml new file mode 100644 index 0000000..2412836 --- /dev/null +++ b/docker-compose.dev.yml @@ -0,0 +1,30 @@ +version: "3" +services: + ark-invest-api-rust-data-test: + container_name: ark-invest-api-rust-data-test + build: + context: . + target: test + args: + DOCKER_BUILDKIT: 1 + restart: no + + ark-invest-api-rust-data: + # image: ghcr.io/NexVeridian/ark-invest-api-rust-data:latest + image: ark-invest-api-rust-data + container_name: ark-invest-api-rust-data + build: + context: . + target: main + args: + DOCKER_BUILDKIT: 1 + restart: no + environment: + - ARK_SOURCE=ARK + # - STARTUP_CSV_MERGE=true + # - STARTUP_ARK_ETF=true + volumes: + - ./data:/ark-invest-api-rust-data/data + +volumes: + data: \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 675e2e3..a8c1635 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,6 @@ version: "3" services: ark-invest-api-rust-data: image: ghcr.io/NexVeridian/ark-invest-api-rust-data:latest - # image: ark-invest-api-rust-data container_name: ark-invest-api-rust-data restart: unless-stopped # environment: @@ -12,14 +11,5 @@ services: volumes: - ./data:/ark-invest-api-rust-data/data - # ark-invest-api-rust-data-test: - # container_name: ark-invest-api-rust-data-test - # build: - # context: . - # target: test - # args: - # DOCKER_BUILDKIT: 1 - # restart: no - volumes: data: diff --git a/src/main.rs b/src/main.rs index 2b72ccd..b25ee31 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ use clokwerk::{AsyncScheduler, Job, TimeUnits}; use futures::future::join_all; use lazy_static::lazy_static; +use polars::prelude::DataFrame; use rand::Rng; use std::env; use std::error::Error; @@ -16,19 +17,29 @@ use util::*; lazy_static! { static ref SOURCE: Source = match env::var("ARK_SOURCE") { - Ok(val) => Source::from_str(val.as_str()).expect("Env string SOURCE is not in enum Source"), - Err(_e) => Source::Ark, + Ok(val) => + Source::from_str(val.as_str()).expect("Env string ARK_SOURCE is not in enum Source"), + Err(_e) => Source::ApiIncremental, }; } +fn print_df(ticker: &Ticker, df: &DataFrame) { + println!( + "Ticker: {:#?}\nShape: {:?}\n{:#?}", + ticker, + df.shape(), + df.tail(Some(1)) + ); +} + fn csv_merge() -> Result<(), Box> { for ticker in Ticker::iter() { let df = Ark::merge_old_csv_to_parquet(ticker, None)? .format()? .sort()? .write_parquet()? - .collect(); - println!("Ticker: {:#?}\n{:#?}", ticker, df); + .collect()?; + print_df(&ticker, &df); } Ok(()) } @@ -44,7 +55,7 @@ fn ark_plan(ticker: Ticker) -> Result<(), Box> { .write_parquet()? .collect()?; - println!("Ticker: {:#?}\n{:#?}", ticker, df.tail(Some(1))); + print_df(&ticker, &df); Ok(()) } @@ -73,7 +84,7 @@ async fn main() { .map(|v| v == "true") .unwrap_or(false) { - print!("Merging CSVs to Parquet..."); + println!("Merging CSVs to Parquet"); csv_merge().unwrap(); } @@ -84,7 +95,7 @@ async fn main() { ark_etf().await; } - scheduler.every(1.day()).at("11:30 pm").run(ark_etf); + scheduler.every(1.day()).at("10:00 am").run(ark_etf); scheduler .every(5.day()) diff --git a/src/util.rs b/src/util.rs index cc91477..171a753 100644 --- a/src/util.rs +++ b/src/util.rs @@ -82,9 +82,13 @@ impl DFS for Vec { #[derive(EnumString, Clone, Copy)] pub enum Source { + // Reads Parquet file if exists Read, + // From ARK Invest Ark, + // From api.NexVeridian.com ApiIncremental, + // From api.NexVeridian.com, not usually nessisary, use ApiIncremental ApiFull, } pub struct Ark { @@ -259,25 +263,34 @@ impl Ark { } if !df.fields().contains(&Field::new("date", DataType::Date)) { - let date_format = |df: DataFrame, format: &str| -> Result> { - Ok(df + let date_format = |mut df: DataFrame, format:Option| -> Result> { + df = df .lazy() .with_column(col("date").str().strptime( DataType::Date, StrptimeOptions { - format: Some(format.into()), + format, strict: false, exact: true, cache: true, }, )) - .collect()?) + .collect()?; + + if df.column("date").unwrap().null_count() > df.height() / 10 { + return Err("wrong date format".into()); + } + + Ok(df) }; - if let Ok(x) = date_format(df.clone(), "%m/%d/%Y") { + if let Ok(x) = date_format(df.clone(), Some("%m/%d/%Y".into())) { df = x } - if let Ok(x) = date_format(df.clone(), "%Y/%m/%d") { + else if let Ok(x) = date_format(df.clone(), Some("%Y/%m/%d".into())) { + df = x + } + else if let Ok(x) = date_format(df.clone(), None) { df = x } } @@ -397,9 +410,8 @@ impl Ark { if Self::read_parquet(ticker, path.clone()).is_ok() { let df_old = Self::read_parquet(ticker, path.clone())?; - df = Self::concat_df(vec![Self::df_format(df_old)?, Self::df_format(df)?])? + df = Self::concat_df(vec![Self::df_format(df_old)?, Self::df_format(df)?])?; } - Ok(Self { df, ticker, path }) } }