diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e5a10ec..750dcce 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,5 @@ # Contributing code -- Make sure the test pass -- Run `cargo clippy --fix --allow-dirty` +- Run `make precommit` # Dev Install ## Dev Containers diff --git a/Cargo.toml b/Cargo.toml index e41ae4f..aa79d34 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ polars = { version = "0.32", features = [ "object", "dtype-struct", ] } -reqwest = { version = "0.11", features = ["blocking", "gzip"] } +reqwest = { version = "0.12", features = ["blocking", "gzip"] } glob = { version = "0.3" } clokwerk = "0.4" strum_macros = "0.26" @@ -30,6 +30,6 @@ lazy_static = "1.4" anyhow = "1.0" [dev-dependencies] -serial_test = "*" -rstest = "0.21" +serial_test = "3.1" +rstest = "0.23" pretty_assertions = "1.4" diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8ab4f9e --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +precommit: + rustup update + cargo update + cargo check + cargo fmt + cargo t + cargo clippy --fix --allow-dirty diff --git a/src/lib.rs b/src/lib.rs index 836ae89..7cda224 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,5 @@ pub mod util; pub use util::*; + +#[cfg(test)] +mod test_utils; diff --git a/src/main.rs b/src/main.rs index c15590e..c831fcb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ use anyhow::{Error, Result}; +use ark_invest_api_rust_data::{util::ticker::Ticker, *}; use clokwerk::{AsyncScheduler, Job, TimeUnits}; use futures::future::join_all; use lazy_static::lazy_static; @@ -11,10 +12,6 @@ use strum::IntoEnumIterator; use tokio::task; use tokio::time::Duration; -mod util; -use util::ticker::Ticker; -use util::*; - lazy_static! { static ref SOURCE: Source = match env::var("ARK_SOURCE") { Ok(val) => diff --git a/src/test_utils.rs b/src/test_utils.rs new file mode 100644 index 0000000..73dc5ef --- /dev/null +++ b/src/test_utils.rs @@ -0,0 +1,17 @@ +use anyhow::{Error, Result}; +use polars::prelude::*; + +pub fn defualt_df(ticker: &[Option<&str>], company: &[Option<&str>]) -> Result { + let target_len = ticker.len() + 1; + let df = df![ + "date" => vec!["2024-01-01"; target_len], + "ticker" => [ticker, &[Some("TSLA")]].concat(), + "cusip" => vec!["TESLA"; target_len], + "company" => [company, &[Some("TESLA")]].concat(), + "market_value" => vec![10; target_len], + "shares" => vec![10; target_len], + "share_price" => vec![100.00; target_len], + "weight" => vec![10.00; target_len], + ]?; + Ok(df) +} diff --git a/src/util.rs b/src/util.rs index b0c559e..bf1ecd2 100644 --- a/src/util.rs +++ b/src/util.rs @@ -13,7 +13,7 @@ use strum_macros::EnumString; use ticker::{DataSource, Ticker}; pub mod data_reader; pub mod df; -mod df_format; +mod format; pub mod ticker; #[derive(Debug, Default, EnumString, Clone, Copy, PartialEq)] @@ -159,14 +159,14 @@ impl Ark { let mut df = df.collect()?; match data_source { Some(ds) => { - df = df_format::df_format(ds, df.into())?.collect()?; + df = format::data_source(ds, df.into())?.collect()?; } None => { - df = df_format::df_format_europe_csv(df.into())?.collect()?; - df = df_format::df_format_europe_arkfundsio(df.into())?.collect()?; - df = df_format::df_format_21shares(df.into())?.collect()?; - df = df_format::df_format_arkvx(df.into())?.collect()?; - df = df_format::df_format_europe(df.into())?.collect()?; + df = format::df_format_europe_csv(df.into())?.collect()?; + df = format::df_format_europe_arkfundsio(df.into())?.collect()?; + df = format::df_format_21shares(df.into())?.collect()?; + df = format::df_format_arkvx(df.into())?.collect()?; + df = format::df_format_europe(df.into())?.collect()?; } } @@ -242,34 +242,7 @@ impl Ark { } } - // format arkw, ARK BITCOIN ETF HOLDCO (ARKW) to ARKB - if let Ok(x) = df - .clone() - .lazy() - .with_columns(vec![ - when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKW)"))) - .then(lit("ARKB")) - .otherwise(col("ticker")) - .alias("ticker"), - when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKW)"))) - .then(lit("ARKB")) - .otherwise(col("company")) - .alias("company"), - ]) - .with_columns(vec![ - when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKF)"))) - .then(lit("ARKB")) - .otherwise(col("ticker")) - .alias("ticker"), - when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKF)"))) - .then(lit("ARKB")) - .otherwise(col("company")) - .alias("company"), - ]) - .collect() - { - df = x; - } + df = format::Ticker::all(df.into())?.collect()?; let mut expressions: Vec = vec![]; @@ -581,6 +554,7 @@ impl Ark { #[cfg(test)] mod tests { use super::*; + use crate::test_utils::*; use pretty_assertions::assert_eq; use serial_test::serial; use std::fs; @@ -588,16 +562,7 @@ mod tests { #[test] #[serial] fn read_write_parquet() -> Result<(), Error> { - let test_df = df![ - "date" => ["2023-01-01"], - "ticker" => ["TSLA"], - "cusip" => ["123abc"], - "company" => ["Tesla"], - "market_value" => [100], - "shares" => [10], - "share_price" => [10], - "weight" => [10.00] - ]?; + let test_df = defualt_df(&[Some("COIN")], &[Some("COINBASE")])?; Ark::write_df_parquet("data/test/ARKK.parquet".into(), test_df.clone().into())?; let read = Ark::new(Source::Read, Ticker::ARKK, Some("data/test".to_owned()))?.collect()?; @@ -610,16 +575,14 @@ mod tests { #[test] #[serial] fn arkw_format_arkb() -> Result<(), Error> { - let test_df = df![ - "date" => ["2024-01-01", "2024-01-02"], - "ticker" => [None::<&str>, Some("TSLA")], - "cusip" => ["123abc", "TESLA"], - "company" => ["ARK BITCOIN ETF HOLDCO (ARKW)", "TESLA"], - "market_value" => [100, 400], - "shares" => [10, 20], - "share_price" => [10, 20], - "weight" => [10.00, 20.00] - ]?; + let test_df = defualt_df( + &[None::<&str>, Some("ARKB"), Some("ARKB")], + &[ + Some("ARK BITCOIN ETF HOLDCO (ARKW)"), + Some("ARK BITCOIN ETF HOLDCO (ARKW)"), + Some("ARKB"), + ], + )?; Ark::write_df_parquet("data/test/ARKW.parquet".into(), test_df.clone().into())?; let read = Ark::new(Source::Read, Ticker::ARKW, Some("data/test".to_owned()))?.collect()?; @@ -628,16 +591,10 @@ mod tests { let df = Ark::df_format(read.into(), None)?.collect()?; assert_eq!( df, - df![ - "date" => ["2024-01-01", "2024-01-02"], - "ticker" => ["ARKB", "TSLA"], - "cusip" => ["123abc", "TESLA"], - "company" => ["ARKB", "TESLA"], - "market_value" => [100, 400], - "shares" => [10, 20], - "share_price" => [10, 20], - "weight" => [10.00, 20.00] - ]? + defualt_df( + &[Some("ARKB"), Some("ARKB"), Some("ARKB")], + &[Some("ARKB"), Some("ARKB"), Some("ARKB")] + )?, ); Ok(()) @@ -646,17 +603,14 @@ mod tests { #[test] #[serial] fn arkf_format_arkb() -> Result<(), Error> { - let test_df = df![ - "date" => ["2024-01-01", "2024-01-02"], - "ticker" => [None::<&str>, Some("TSLA")], - "cusip" => ["123abc", "TESLA"], - "company" => ["ARK BITCOIN ETF HOLDCO (ARKF)", "TESLA"], - "market_value" => [100, 400], - "shares" => [10, 20], - "share_price" => [10, 20], - "weight" => [10.00, 20.00] - ]?; - + let test_df = defualt_df( + &[None::<&str>, Some("ARKB"), Some("ARKB")], + &[ + Some("ARK BITCOIN ETF HOLDCO (ARKF)"), + Some("ARK BITCOIN ETF HOLDCO (ARKF)"), + Some("ARKB"), + ], + )?; Ark::write_df_parquet("data/test/ARKF.parquet".into(), test_df.clone().into())?; let read = Ark::new(Source::Read, Ticker::ARKF, Some("data/test".to_owned()))?.collect()?; fs::remove_file("data/test/ARKF.parquet")?; @@ -664,16 +618,10 @@ mod tests { let df = Ark::df_format(read.into(), None)?.collect()?; assert_eq!( df, - df![ - "date" => ["2024-01-01", "2024-01-02"], - "ticker" => ["ARKB", "TSLA"], - "cusip" => ["123abc", "TESLA"], - "company" => ["ARKB", "TESLA"], - "market_value" => [100, 400], - "shares" => [10, 20], - "share_price" => [10, 20], - "weight" => [10.00, 20.00] - ]? + defualt_df( + &[Some("ARKB"), Some("ARKB"), Some("ARKB")], + &[Some("ARKB"), Some("ARKB"), Some("ARKB")] + )?, ); Ok(()) diff --git a/src/util/df_format.rs b/src/util/format/data_source.rs similarity index 98% rename from src/util/df_format.rs rename to src/util/format/data_source.rs index 6b174e7..3ce9ff9 100644 --- a/src/util/df_format.rs +++ b/src/util/format/data_source.rs @@ -3,7 +3,7 @@ use polars::prelude::*; use crate::{ticker::DataSource, util::df::DF}; -pub fn df_format(data_source: DataSource, mut df: DF) -> Result { +pub fn data_source(data_source: DataSource, mut df: DF) -> Result { let df = match data_source { DataSource::ArkVenture => df_format_arkvx(df)?, DataSource::Ark => df, diff --git a/src/util/format/mod.rs b/src/util/format/mod.rs new file mode 100644 index 0000000..af29012 --- /dev/null +++ b/src/util/format/mod.rs @@ -0,0 +1,4 @@ +pub mod data_source; +pub use data_source::*; +pub mod ticker; +pub use ticker::*; diff --git a/src/util/format/ticker.rs b/src/util/format/ticker.rs new file mode 100644 index 0000000..d51e26a --- /dev/null +++ b/src/util/format/ticker.rs @@ -0,0 +1,171 @@ + +use anyhow::{Error, Result}; +use polars::prelude::*; +use strum::IntoEnumIterator; +use strum_macros::EnumIter; + +use crate::util::df::DF; + +#[allow(clippy::upper_case_acronyms, non_camel_case_types)] +#[derive(Debug, strum_macros::Display, EnumIter, Clone, Copy, PartialEq)] +pub enum Ticker { + ARKW, + MKFG, + CASH_USD, +} + +impl Ticker { + pub fn all(mut df: DF) -> Result { + for ticker in Ticker::iter() { + df = ticker.format(df)?; + } + Ok(df) + } + + pub fn format(&self, df: DF) -> Result { + match self { + Ticker::ARKW => Self::arkw(df), + Ticker::MKFG => Self::mkfg(df), + Ticker::CASH_USD => Self::cash_usd(df), + } + } + + fn arkw(df: DF) -> Result { + let mut df = df.collect()?; + + if let Ok(x) = df + .clone() + .lazy() + .with_columns(vec![ + when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKW)"))) + .then(lit("ARKB")) + .otherwise(col("ticker")) + .alias("ticker"), + when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKW)"))) + .then(lit("ARKB")) + .otherwise(col("company")) + .alias("company"), + ]) + .with_columns(vec![ + when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKF)"))) + .then(lit("ARKB")) + .otherwise(col("ticker")) + .alias("ticker"), + when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKF)"))) + .then(lit("ARKB")) + .otherwise(col("company")) + .alias("company"), + ]) + .collect() + { + df = x; + } + + Ok(df.into()) + } + + fn mkfg(df: DF) -> Result { + let mut df = df.collect()?; + + if let Ok(x) = df + .clone() + .lazy() + .with_columns(vec![when(col("ticker").eq(lit("MARKFORGEDG"))) + .then(lit("MKFG")) + .otherwise(col("ticker")) + .alias("ticker")]) + .collect() + { + df = x; + } + + Ok(df.into()) + } + + fn cash_usd(df: DF) -> Result { + let mut df = df.collect()?; + + let exprs = |company: &str| -> Vec { + vec![ + when(col("company").eq(lit(company))) + .then(lit("CASH USD")) + .otherwise(col("ticker")) + .alias("ticker"), + when(col("company").eq(lit(company))) + .then(lit("CASH USD")) + .otherwise(col("company")) + .alias("company"), + ] + }; + + if let Ok(x) = df + .clone() + .lazy() + .with_columns(exprs("Cash & Cash Equivalents")) + .with_columns(exprs("GOLDMAN FS TRSY OBLIG INST 468")) + .collect() + { + df = x; + } + + Ok(df.into()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_utils::*; + use pretty_assertions::assert_eq; + use rstest::rstest; + + #[rstest] + #[case::mkfg( + Ticker::MKFG, + defualt_df( + &[Some("MKFG"), Some("MARKFORGEDG")], + &[Some("MARKFORGEDG"), Some("MARKFORGEDG")], + )?, + defualt_df( + &[Some("MKFG"), Some("MKFG")], + &[Some("MARKFORGEDG"), Some("MARKFORGEDG")] + )?, + )] + #[case::arkb( + Ticker::ARKW, + defualt_df( + &[None::<&str>, Some("ARKB"), Some("ARKB"), Some("ARKB")], + &[ + Some("ARK BITCOIN ETF HOLDCO (ARKW)"), + Some("ARK BITCOIN ETF HOLDCO (ARKW)"), + Some("ARK BITCOIN ETF HOLDCO (ARKF)"), + Some("ARKB"), + ], + )?, + defualt_df( + &[Some("ARKB"), Some("ARKB"), Some("ARKB"), Some("ARKB")], + &[Some("ARKB"), Some("ARKB"), Some("ARKB"), Some("ARKB")], + )?, + )] + #[case::cash_usd( + Ticker::CASH_USD, + defualt_df( + &[None::<&str>, None::<&str>], + &[Some("Cash & Cash Equivalents"), Some("GOLDMAN FS TRSY OBLIG INST 468")], + )?, + defualt_df( + &[Some("CASH USD"), Some("CASH USD")], + &[Some("CASH USD"), Some("CASH USD")], + )?, + )] + fn matrix( + #[case] ticker: Ticker, + #[case] input: DataFrame, + #[case] expected: DataFrame, + ) -> Result<(), Error> { + let test_df = input; + let formatted_df = ticker.format(test_df.into())?.collect()?; + assert_eq!(formatted_df, expected,); + Ok(()) + } +}