feat: ticker format, test_utils, Makefile

This commit is contained in:
Elijah McMorris 2024-10-02 15:59:39 -07:00
parent 493ea31661
commit 126005905b
Signed by: NexVeridian
SSH key fingerprint: SHA256:bsA1SKZxuEcEVHAy3gY1HUeM5ykRJl0U0kQHQn0hMg8
10 changed files with 242 additions and 96 deletions

View file

@ -1,6 +1,5 @@
# Contributing code # Contributing code
- Make sure the test pass - Run `make precommit`
- Run `cargo clippy --fix --allow-dirty`
# Dev Install # Dev Install
## Dev Containers ## Dev Containers

View file

@ -15,7 +15,7 @@ polars = { version = "0.32", features = [
"object", "object",
"dtype-struct", "dtype-struct",
] } ] }
reqwest = { version = "0.11", features = ["blocking", "gzip"] } reqwest = { version = "0.12", features = ["blocking", "gzip"] }
glob = { version = "0.3" } glob = { version = "0.3" }
clokwerk = "0.4" clokwerk = "0.4"
strum_macros = "0.26" strum_macros = "0.26"
@ -30,6 +30,6 @@ lazy_static = "1.4"
anyhow = "1.0" anyhow = "1.0"
[dev-dependencies] [dev-dependencies]
serial_test = "*" serial_test = "3.1"
rstest = "0.21" rstest = "0.23"
pretty_assertions = "1.4" pretty_assertions = "1.4"

7
Makefile Normal file
View file

@ -0,0 +1,7 @@
precommit:
rustup update
cargo update
cargo check
cargo fmt
cargo t
cargo clippy --fix --allow-dirty

View file

@ -1,2 +1,5 @@
pub mod util; pub mod util;
pub use util::*; pub use util::*;
#[cfg(test)]
mod test_utils;

View file

@ -1,4 +1,5 @@
use anyhow::{Error, Result}; use anyhow::{Error, Result};
use ark_invest_api_rust_data::{util::ticker::Ticker, *};
use clokwerk::{AsyncScheduler, Job, TimeUnits}; use clokwerk::{AsyncScheduler, Job, TimeUnits};
use futures::future::join_all; use futures::future::join_all;
use lazy_static::lazy_static; use lazy_static::lazy_static;
@ -11,10 +12,6 @@ use strum::IntoEnumIterator;
use tokio::task; use tokio::task;
use tokio::time::Duration; use tokio::time::Duration;
mod util;
use util::ticker::Ticker;
use util::*;
lazy_static! { lazy_static! {
static ref SOURCE: Source = match env::var("ARK_SOURCE") { static ref SOURCE: Source = match env::var("ARK_SOURCE") {
Ok(val) => Ok(val) =>

17
src/test_utils.rs Normal file
View file

@ -0,0 +1,17 @@
use anyhow::{Error, Result};
use polars::prelude::*;
pub fn defualt_df(ticker: &[Option<&str>], company: &[Option<&str>]) -> Result<DataFrame, Error> {
let target_len = ticker.len() + 1;
let df = df![
"date" => vec!["2024-01-01"; target_len],
"ticker" => [ticker, &[Some("TSLA")]].concat(),
"cusip" => vec!["TESLA"; target_len],
"company" => [company, &[Some("TESLA")]].concat(),
"market_value" => vec![10; target_len],
"shares" => vec![10; target_len],
"share_price" => vec![100.00; target_len],
"weight" => vec![10.00; target_len],
]?;
Ok(df)
}

View file

@ -13,7 +13,7 @@ use strum_macros::EnumString;
use ticker::{DataSource, Ticker}; use ticker::{DataSource, Ticker};
pub mod data_reader; pub mod data_reader;
pub mod df; pub mod df;
mod df_format; mod format;
pub mod ticker; pub mod ticker;
#[derive(Debug, Default, EnumString, Clone, Copy, PartialEq)] #[derive(Debug, Default, EnumString, Clone, Copy, PartialEq)]
@ -159,14 +159,14 @@ impl Ark {
let mut df = df.collect()?; let mut df = df.collect()?;
match data_source { match data_source {
Some(ds) => { Some(ds) => {
df = df_format::df_format(ds, df.into())?.collect()?; df = format::data_source(ds, df.into())?.collect()?;
} }
None => { None => {
df = df_format::df_format_europe_csv(df.into())?.collect()?; df = format::df_format_europe_csv(df.into())?.collect()?;
df = df_format::df_format_europe_arkfundsio(df.into())?.collect()?; df = format::df_format_europe_arkfundsio(df.into())?.collect()?;
df = df_format::df_format_21shares(df.into())?.collect()?; df = format::df_format_21shares(df.into())?.collect()?;
df = df_format::df_format_arkvx(df.into())?.collect()?; df = format::df_format_arkvx(df.into())?.collect()?;
df = df_format::df_format_europe(df.into())?.collect()?; df = format::df_format_europe(df.into())?.collect()?;
} }
} }
@ -242,34 +242,7 @@ impl Ark {
} }
} }
// format arkw, ARK BITCOIN ETF HOLDCO (ARKW) to ARKB df = format::Ticker::all(df.into())?.collect()?;
if let Ok(x) = df
.clone()
.lazy()
.with_columns(vec![
when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKW)")))
.then(lit("ARKB"))
.otherwise(col("ticker"))
.alias("ticker"),
when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKW)")))
.then(lit("ARKB"))
.otherwise(col("company"))
.alias("company"),
])
.with_columns(vec![
when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKF)")))
.then(lit("ARKB"))
.otherwise(col("ticker"))
.alias("ticker"),
when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKF)")))
.then(lit("ARKB"))
.otherwise(col("company"))
.alias("company"),
])
.collect()
{
df = x;
}
let mut expressions: Vec<Expr> = vec![]; let mut expressions: Vec<Expr> = vec![];
@ -581,6 +554,7 @@ impl Ark {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::test_utils::*;
use pretty_assertions::assert_eq; use pretty_assertions::assert_eq;
use serial_test::serial; use serial_test::serial;
use std::fs; use std::fs;
@ -588,16 +562,7 @@ mod tests {
#[test] #[test]
#[serial] #[serial]
fn read_write_parquet() -> Result<(), Error> { fn read_write_parquet() -> Result<(), Error> {
let test_df = df![ let test_df = defualt_df(&[Some("COIN")], &[Some("COINBASE")])?;
"date" => ["2023-01-01"],
"ticker" => ["TSLA"],
"cusip" => ["123abc"],
"company" => ["Tesla"],
"market_value" => [100],
"shares" => [10],
"share_price" => [10],
"weight" => [10.00]
]?;
Ark::write_df_parquet("data/test/ARKK.parquet".into(), test_df.clone().into())?; Ark::write_df_parquet("data/test/ARKK.parquet".into(), test_df.clone().into())?;
let read = Ark::new(Source::Read, Ticker::ARKK, Some("data/test".to_owned()))?.collect()?; let read = Ark::new(Source::Read, Ticker::ARKK, Some("data/test".to_owned()))?.collect()?;
@ -610,16 +575,14 @@ mod tests {
#[test] #[test]
#[serial] #[serial]
fn arkw_format_arkb() -> Result<(), Error> { fn arkw_format_arkb() -> Result<(), Error> {
let test_df = df![ let test_df = defualt_df(
"date" => ["2024-01-01", "2024-01-02"], &[None::<&str>, Some("ARKB"), Some("ARKB")],
"ticker" => [None::<&str>, Some("TSLA")], &[
"cusip" => ["123abc", "TESLA"], Some("ARK BITCOIN ETF HOLDCO (ARKW)"),
"company" => ["ARK BITCOIN ETF HOLDCO (ARKW)", "TESLA"], Some("ARK BITCOIN ETF HOLDCO (ARKW)"),
"market_value" => [100, 400], Some("ARKB"),
"shares" => [10, 20], ],
"share_price" => [10, 20], )?;
"weight" => [10.00, 20.00]
]?;
Ark::write_df_parquet("data/test/ARKW.parquet".into(), test_df.clone().into())?; Ark::write_df_parquet("data/test/ARKW.parquet".into(), test_df.clone().into())?;
let read = Ark::new(Source::Read, Ticker::ARKW, Some("data/test".to_owned()))?.collect()?; let read = Ark::new(Source::Read, Ticker::ARKW, Some("data/test".to_owned()))?.collect()?;
@ -628,16 +591,10 @@ mod tests {
let df = Ark::df_format(read.into(), None)?.collect()?; let df = Ark::df_format(read.into(), None)?.collect()?;
assert_eq!( assert_eq!(
df, df,
df![ defualt_df(
"date" => ["2024-01-01", "2024-01-02"], &[Some("ARKB"), Some("ARKB"), Some("ARKB")],
"ticker" => ["ARKB", "TSLA"], &[Some("ARKB"), Some("ARKB"), Some("ARKB")]
"cusip" => ["123abc", "TESLA"], )?,
"company" => ["ARKB", "TESLA"],
"market_value" => [100, 400],
"shares" => [10, 20],
"share_price" => [10, 20],
"weight" => [10.00, 20.00]
]?
); );
Ok(()) Ok(())
@ -646,17 +603,14 @@ mod tests {
#[test] #[test]
#[serial] #[serial]
fn arkf_format_arkb() -> Result<(), Error> { fn arkf_format_arkb() -> Result<(), Error> {
let test_df = df![ let test_df = defualt_df(
"date" => ["2024-01-01", "2024-01-02"], &[None::<&str>, Some("ARKB"), Some("ARKB")],
"ticker" => [None::<&str>, Some("TSLA")], &[
"cusip" => ["123abc", "TESLA"], Some("ARK BITCOIN ETF HOLDCO (ARKF)"),
"company" => ["ARK BITCOIN ETF HOLDCO (ARKF)", "TESLA"], Some("ARK BITCOIN ETF HOLDCO (ARKF)"),
"market_value" => [100, 400], Some("ARKB"),
"shares" => [10, 20], ],
"share_price" => [10, 20], )?;
"weight" => [10.00, 20.00]
]?;
Ark::write_df_parquet("data/test/ARKF.parquet".into(), test_df.clone().into())?; Ark::write_df_parquet("data/test/ARKF.parquet".into(), test_df.clone().into())?;
let read = Ark::new(Source::Read, Ticker::ARKF, Some("data/test".to_owned()))?.collect()?; let read = Ark::new(Source::Read, Ticker::ARKF, Some("data/test".to_owned()))?.collect()?;
fs::remove_file("data/test/ARKF.parquet")?; fs::remove_file("data/test/ARKF.parquet")?;
@ -664,16 +618,10 @@ mod tests {
let df = Ark::df_format(read.into(), None)?.collect()?; let df = Ark::df_format(read.into(), None)?.collect()?;
assert_eq!( assert_eq!(
df, df,
df![ defualt_df(
"date" => ["2024-01-01", "2024-01-02"], &[Some("ARKB"), Some("ARKB"), Some("ARKB")],
"ticker" => ["ARKB", "TSLA"], &[Some("ARKB"), Some("ARKB"), Some("ARKB")]
"cusip" => ["123abc", "TESLA"], )?,
"company" => ["ARKB", "TESLA"],
"market_value" => [100, 400],
"shares" => [10, 20],
"share_price" => [10, 20],
"weight" => [10.00, 20.00]
]?
); );
Ok(()) Ok(())

View file

@ -3,7 +3,7 @@ use polars::prelude::*;
use crate::{ticker::DataSource, util::df::DF}; use crate::{ticker::DataSource, util::df::DF};
pub fn df_format(data_source: DataSource, mut df: DF) -> Result<DF, Error> { pub fn data_source(data_source: DataSource, mut df: DF) -> Result<DF, Error> {
let df = match data_source { let df = match data_source {
DataSource::ArkVenture => df_format_arkvx(df)?, DataSource::ArkVenture => df_format_arkvx(df)?,
DataSource::Ark => df, DataSource::Ark => df,

4
src/util/format/mod.rs Normal file
View file

@ -0,0 +1,4 @@
pub mod data_source;
pub use data_source::*;
pub mod ticker;
pub use ticker::*;

171
src/util/format/ticker.rs Normal file
View file

@ -0,0 +1,171 @@
use anyhow::{Error, Result};
use polars::prelude::*;
use strum::IntoEnumIterator;
use strum_macros::EnumIter;
use crate::util::df::DF;
#[allow(clippy::upper_case_acronyms, non_camel_case_types)]
#[derive(Debug, strum_macros::Display, EnumIter, Clone, Copy, PartialEq)]
pub enum Ticker {
ARKW,
MKFG,
CASH_USD,
}
impl Ticker {
pub fn all(mut df: DF) -> Result<DF, Error> {
for ticker in Ticker::iter() {
df = ticker.format(df)?;
}
Ok(df)
}
pub fn format(&self, df: DF) -> Result<DF, Error> {
match self {
Ticker::ARKW => Self::arkw(df),
Ticker::MKFG => Self::mkfg(df),
Ticker::CASH_USD => Self::cash_usd(df),
}
}
fn arkw(df: DF) -> Result<DF, Error> {
let mut df = df.collect()?;
if let Ok(x) = df
.clone()
.lazy()
.with_columns(vec![
when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKW)")))
.then(lit("ARKB"))
.otherwise(col("ticker"))
.alias("ticker"),
when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKW)")))
.then(lit("ARKB"))
.otherwise(col("company"))
.alias("company"),
])
.with_columns(vec![
when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKF)")))
.then(lit("ARKB"))
.otherwise(col("ticker"))
.alias("ticker"),
when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKF)")))
.then(lit("ARKB"))
.otherwise(col("company"))
.alias("company"),
])
.collect()
{
df = x;
}
Ok(df.into())
}
fn mkfg(df: DF) -> Result<DF, Error> {
let mut df = df.collect()?;
if let Ok(x) = df
.clone()
.lazy()
.with_columns(vec![when(col("ticker").eq(lit("MARKFORGEDG")))
.then(lit("MKFG"))
.otherwise(col("ticker"))
.alias("ticker")])
.collect()
{
df = x;
}
Ok(df.into())
}
fn cash_usd(df: DF) -> Result<DF, Error> {
let mut df = df.collect()?;
let exprs = |company: &str| -> Vec<Expr> {
vec![
when(col("company").eq(lit(company)))
.then(lit("CASH USD"))
.otherwise(col("ticker"))
.alias("ticker"),
when(col("company").eq(lit(company)))
.then(lit("CASH USD"))
.otherwise(col("company"))
.alias("company"),
]
};
if let Ok(x) = df
.clone()
.lazy()
.with_columns(exprs("Cash & Cash Equivalents"))
.with_columns(exprs("GOLDMAN FS TRSY OBLIG INST 468"))
.collect()
{
df = x;
}
Ok(df.into())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::test_utils::*;
use pretty_assertions::assert_eq;
use rstest::rstest;
#[rstest]
#[case::mkfg(
Ticker::MKFG,
defualt_df(
&[Some("MKFG"), Some("MARKFORGEDG")],
&[Some("MARKFORGEDG"), Some("MARKFORGEDG")],
)?,
defualt_df(
&[Some("MKFG"), Some("MKFG")],
&[Some("MARKFORGEDG"), Some("MARKFORGEDG")]
)?,
)]
#[case::arkb(
Ticker::ARKW,
defualt_df(
&[None::<&str>, Some("ARKB"), Some("ARKB"), Some("ARKB")],
&[
Some("ARK BITCOIN ETF HOLDCO (ARKW)"),
Some("ARK BITCOIN ETF HOLDCO (ARKW)"),
Some("ARK BITCOIN ETF HOLDCO (ARKF)"),
Some("ARKB"),
],
)?,
defualt_df(
&[Some("ARKB"), Some("ARKB"), Some("ARKB"), Some("ARKB")],
&[Some("ARKB"), Some("ARKB"), Some("ARKB"), Some("ARKB")],
)?,
)]
#[case::cash_usd(
Ticker::CASH_USD,
defualt_df(
&[None::<&str>, None::<&str>],
&[Some("Cash & Cash Equivalents"), Some("GOLDMAN FS TRSY OBLIG INST 468")],
)?,
defualt_df(
&[Some("CASH USD"), Some("CASH USD")],
&[Some("CASH USD"), Some("CASH USD")],
)?,
)]
fn matrix(
#[case] ticker: Ticker,
#[case] input: DataFrame,
#[case] expected: DataFrame,
) -> Result<(), Error> {
let test_df = input;
let formatted_df = ticker.format(test_df.into())?.collect()?;
assert_eq!(formatted_df, expected,);
Ok(())
}
}