feat: ticker format, test_utils, Makefile

This commit is contained in:
Elijah McMorris 2024-10-02 15:59:39 -07:00
parent 493ea31661
commit 126005905b
Signed by: NexVeridian
SSH key fingerprint: SHA256:bsA1SKZxuEcEVHAy3gY1HUeM5ykRJl0U0kQHQn0hMg8
10 changed files with 242 additions and 96 deletions

View file

@ -0,0 +1,159 @@
use anyhow::{Error, Result};
use polars::prelude::*;
use crate::{ticker::DataSource, util::df::DF};
pub fn data_source(data_source: DataSource, mut df: DF) -> Result<DF, Error> {
let df = match data_source {
DataSource::ArkVenture => df_format_arkvx(df)?,
DataSource::Ark => df,
DataSource::Shares21 => df_format_21shares(df)?,
DataSource::ArkEurope | DataSource::Rize => {
df = df_format_europe_csv(df)?;
df = df_format_europe_arkfundsio(df)?;
df_format_europe(df)?
}
};
Ok(df)
}
pub fn df_format_21shares(df: DF) -> Result<DF, Error> {
let mut df = df.collect()?;
if df.get_column_names().contains(&"Weightings") {
df = df
.lazy()
.rename(
vec![
"Date",
"StockTicker",
"CUSIP",
"SecurityName",
"Shares",
"Price",
"MarketValue",
"Weightings",
],
vec![
"date",
"ticker",
"cusip",
"company",
"shares",
"share_price",
"market_value",
"weight",
],
)
.collect()?;
_ = df.drop_in_place("Account");
_ = df.drop_in_place("NetAssets");
_ = df.drop_in_place("SharesOutstanding");
_ = df.drop_in_place("CreationUnits");
_ = df.drop_in_place("MoneyMarketFlag");
}
Ok(df.into())
}
pub fn df_format_arkvx(df: DF) -> Result<DF, Error> {
let mut df = df.collect()?;
if df.get_column_names().contains(&"CUSIP") {
df = df
.lazy()
.rename(vec!["CUSIP", "weight (%)"], vec!["cusip", "weight"])
.collect()?;
}
if df.get_column_names().contains(&"weight (%)") {
df = df
.lazy()
.rename(vec!["weight (%)"], vec!["weight"])
.collect()?;
}
if !df.get_column_names().contains(&"market_value") {
df = df
.lazy()
.with_columns([
Series::new("market_value", [None::<i64>]).lit(),
Series::new("shares", [None::<i64>]).lit(),
Series::new("share_price", [None::<f64>]).lit(),
])
.collect()?;
}
Ok(df.into())
}
pub fn df_format_europe(df: DF) -> Result<DF, Error> {
let mut df = df.collect()?;
if df.get_column_names().contains(&"Currency") {
_ = df.drop_in_place("Currency");
df = df
.lazy()
.rename(
vec!["name", "ISIN", "Weight"],
vec!["company", "cusip", "weight"],
)
.with_columns([
Series::new("date", [chrono::Local::now().date_naive()]).lit(),
Series::new("ticker", [None::<String>]).lit(),
Series::new("market_value", [None::<i64>]).lit(),
Series::new("shares", [None::<i64>]).lit(),
Series::new("share_price", [None::<f64>]).lit(),
])
.collect()?;
}
Ok(df.into())
}
pub fn df_format_europe_arkfundsio(df: DF) -> Result<DF, Error> {
let mut df = df.collect()?;
if df
.get_column_names()
.eq(&["company", "cusip", "date", "fund", "weight", "weight_rank"])
{
_ = df.drop_in_place("fund");
_ = df.drop_in_place("weight_rank");
df = df
.lazy()
.with_columns([
Series::new("ticker", [None::<String>]).lit(),
Series::new("market_value", [None::<i64>]).lit(),
Series::new("shares", [None::<i64>]).lit(),
Series::new("share_price", [None::<f64>]).lit(),
])
.collect()?;
}
Ok(df.into())
}
pub fn df_format_europe_csv(df: DF) -> Result<DF, Error> {
let mut df = df.collect()?;
if df.get_column_names().contains(&"_duplicated_0") {
df = df.slice(2, df.height());
df = df
.clone()
.lazy()
.rename(df.get_column_names(), ["company", "cusip", "weight"])
.with_columns([
Series::new("date", [chrono::Local::now().date_naive()]).lit(),
Series::new("ticker", [None::<String>]).lit(),
Series::new("market_value", [None::<i64>]).lit(),
Series::new("shares", [None::<i64>]).lit(),
Series::new("share_price", [None::<f64>]).lit(),
])
.collect()?;
}
Ok(df.into())
}

4
src/util/format/mod.rs Normal file
View file

@ -0,0 +1,4 @@
pub mod data_source;
pub use data_source::*;
pub mod ticker;
pub use ticker::*;

171
src/util/format/ticker.rs Normal file
View file

@ -0,0 +1,171 @@
use anyhow::{Error, Result};
use polars::prelude::*;
use strum::IntoEnumIterator;
use strum_macros::EnumIter;
use crate::util::df::DF;
#[allow(clippy::upper_case_acronyms, non_camel_case_types)]
#[derive(Debug, strum_macros::Display, EnumIter, Clone, Copy, PartialEq)]
pub enum Ticker {
ARKW,
MKFG,
CASH_USD,
}
impl Ticker {
pub fn all(mut df: DF) -> Result<DF, Error> {
for ticker in Ticker::iter() {
df = ticker.format(df)?;
}
Ok(df)
}
pub fn format(&self, df: DF) -> Result<DF, Error> {
match self {
Ticker::ARKW => Self::arkw(df),
Ticker::MKFG => Self::mkfg(df),
Ticker::CASH_USD => Self::cash_usd(df),
}
}
fn arkw(df: DF) -> Result<DF, Error> {
let mut df = df.collect()?;
if let Ok(x) = df
.clone()
.lazy()
.with_columns(vec![
when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKW)")))
.then(lit("ARKB"))
.otherwise(col("ticker"))
.alias("ticker"),
when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKW)")))
.then(lit("ARKB"))
.otherwise(col("company"))
.alias("company"),
])
.with_columns(vec![
when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKF)")))
.then(lit("ARKB"))
.otherwise(col("ticker"))
.alias("ticker"),
when(col("company").eq(lit("ARK BITCOIN ETF HOLDCO (ARKF)")))
.then(lit("ARKB"))
.otherwise(col("company"))
.alias("company"),
])
.collect()
{
df = x;
}
Ok(df.into())
}
fn mkfg(df: DF) -> Result<DF, Error> {
let mut df = df.collect()?;
if let Ok(x) = df
.clone()
.lazy()
.with_columns(vec![when(col("ticker").eq(lit("MARKFORGEDG")))
.then(lit("MKFG"))
.otherwise(col("ticker"))
.alias("ticker")])
.collect()
{
df = x;
}
Ok(df.into())
}
fn cash_usd(df: DF) -> Result<DF, Error> {
let mut df = df.collect()?;
let exprs = |company: &str| -> Vec<Expr> {
vec![
when(col("company").eq(lit(company)))
.then(lit("CASH USD"))
.otherwise(col("ticker"))
.alias("ticker"),
when(col("company").eq(lit(company)))
.then(lit("CASH USD"))
.otherwise(col("company"))
.alias("company"),
]
};
if let Ok(x) = df
.clone()
.lazy()
.with_columns(exprs("Cash & Cash Equivalents"))
.with_columns(exprs("GOLDMAN FS TRSY OBLIG INST 468"))
.collect()
{
df = x;
}
Ok(df.into())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::test_utils::*;
use pretty_assertions::assert_eq;
use rstest::rstest;
#[rstest]
#[case::mkfg(
Ticker::MKFG,
defualt_df(
&[Some("MKFG"), Some("MARKFORGEDG")],
&[Some("MARKFORGEDG"), Some("MARKFORGEDG")],
)?,
defualt_df(
&[Some("MKFG"), Some("MKFG")],
&[Some("MARKFORGEDG"), Some("MARKFORGEDG")]
)?,
)]
#[case::arkb(
Ticker::ARKW,
defualt_df(
&[None::<&str>, Some("ARKB"), Some("ARKB"), Some("ARKB")],
&[
Some("ARK BITCOIN ETF HOLDCO (ARKW)"),
Some("ARK BITCOIN ETF HOLDCO (ARKW)"),
Some("ARK BITCOIN ETF HOLDCO (ARKF)"),
Some("ARKB"),
],
)?,
defualt_df(
&[Some("ARKB"), Some("ARKB"), Some("ARKB"), Some("ARKB")],
&[Some("ARKB"), Some("ARKB"), Some("ARKB"), Some("ARKB")],
)?,
)]
#[case::cash_usd(
Ticker::CASH_USD,
defualt_df(
&[None::<&str>, None::<&str>],
&[Some("Cash & Cash Equivalents"), Some("GOLDMAN FS TRSY OBLIG INST 468")],
)?,
defualt_df(
&[Some("CASH USD"), Some("CASH USD")],
&[Some("CASH USD"), Some("CASH USD")],
)?,
)]
fn matrix(
#[case] ticker: Ticker,
#[case] input: DataFrame,
#[case] expected: DataFrame,
) -> Result<(), Error> {
let test_df = input;
let formatted_df = ticker.format(test_df.into())?.collect()?;
assert_eq!(formatted_df, expected,);
Ok(())
}
}