refactor: df format

This commit is contained in:
Elijah McMorris 2024-06-14 22:50:45 -07:00
parent b344b9ac24
commit 493ea31661
Signed by: NexVeridian
SSH key fingerprint: SHA256:bsA1SKZxuEcEVHAy3gY1HUeM5ykRJl0U0kQHQn0hMg8
3 changed files with 44 additions and 17 deletions

View file

@ -69,11 +69,11 @@ impl Ark {
if let Some(update) = update { if let Some(update) = update {
if existing_file { if existing_file {
ark.df = Self::concat_df(vec![ ark.df = Self::concat_df(vec![
Self::df_format(ark.df)?, Self::df_format(ark.df, None)?,
Self::df_format(update.into())?, Self::df_format(update.into(), None)?,
])?; ])?;
} else { } else {
ark.df = Self::df_format(update.into())?; ark.df = Self::df_format(update.into(), None)?;
} }
} }
@ -150,17 +150,25 @@ impl Ark {
} }
pub fn format(mut self) -> Result<Self, Error> { pub fn format(mut self) -> Result<Self, Error> {
self.df = Self::df_format(self.df)?; // self.df = Self::df_format(self.df, Some(self.ticker.data_source()))?;
self.df = Self::df_format(self.df, None)?;
Ok(self) Ok(self)
} }
pub fn df_format(df: DF) -> Result<DF, Error> { pub fn df_format(df: DF, data_source: Option<DataSource>) -> Result<DF, Error> {
let mut df = df.collect()?; let mut df = df.collect()?;
match data_source {
Some(ds) => {
df = df_format::df_format(ds, df.into())?.collect()?;
}
None => {
df = df_format::df_format_europe_csv(df.into())?.collect()?; df = df_format::df_format_europe_csv(df.into())?.collect()?;
df = df_format::df_format_europe_arkfundsio(df.into())?.collect()?; df = df_format::df_format_europe_arkfundsio(df.into())?.collect()?;
df = df_format::df_format_21shares(df.into())?.collect()?; df = df_format::df_format_21shares(df.into())?.collect()?;
df = df_format::df_format_arkvx(df.into())?.collect()?; df = df_format::df_format_arkvx(df.into())?.collect()?;
df = df_format::df_format_europe(df.into())?.collect()?; df = df_format::df_format_europe(df.into())?.collect()?;
}
}
if df.get_column_names().contains(&"market_value_($)") { if df.get_column_names().contains(&"market_value_($)") {
df = df df = df
@ -326,6 +334,8 @@ impl Ark {
.str() .str()
.replace_all(lit(" UW"), lit(""), true) .replace_all(lit(" UW"), lit(""), true)
.str() .str()
.replace_all(lit("/U"), lit(""), true)
.str()
.replace_all(lit(" CN"), lit(""), true) .replace_all(lit(" CN"), lit(""), true)
.str() .str()
.replace(lit("DKNN"), lit("DKNG"), true) .replace(lit("DKNN"), lit("DKNG"), true)
@ -558,8 +568,11 @@ impl Ark {
if Self::read_parquet(&ticker, path.as_ref()).is_ok() { if Self::read_parquet(&ticker, path.as_ref()).is_ok() {
let df_old = Self::read_parquet(&ticker, path.as_ref())?; let df_old = Self::read_parquet(&ticker, path.as_ref())?;
df = Self::concat_df(vec![Self::df_format(df_old)?, Self::df_format(df)?])?; df = Self::concat_df(vec![
df = Self::df_format(df)?; Self::df_format(df_old, None)?,
Self::df_format(df, None)?,
])?;
df = Self::df_format(df, None)?;
} }
Ok(Self { df, ticker, path }) Ok(Self { df, ticker, path })
} }
@ -612,7 +625,7 @@ mod tests {
let read = Ark::new(Source::Read, Ticker::ARKW, Some("data/test".to_owned()))?.collect()?; let read = Ark::new(Source::Read, Ticker::ARKW, Some("data/test".to_owned()))?.collect()?;
fs::remove_file("data/test/ARKW.parquet")?; fs::remove_file("data/test/ARKW.parquet")?;
let df = Ark::df_format(read.into())?.collect()?; let df = Ark::df_format(read.into(), None)?.collect()?;
assert_eq!( assert_eq!(
df, df,
df![ df![
@ -648,7 +661,7 @@ mod tests {
let read = Ark::new(Source::Read, Ticker::ARKF, Some("data/test".to_owned()))?.collect()?; let read = Ark::new(Source::Read, Ticker::ARKF, Some("data/test".to_owned()))?.collect()?;
fs::remove_file("data/test/ARKF.parquet")?; fs::remove_file("data/test/ARKF.parquet")?;
let df = Ark::df_format(read.into())?.collect()?; let df = Ark::df_format(read.into(), None)?.collect()?;
assert_eq!( assert_eq!(
df, df,
df![ df![

View file

@ -1,7 +1,21 @@
use anyhow::{Error, Result}; use anyhow::{Error, Result};
use polars::prelude::*; use polars::prelude::*;
use crate::util::df::DF; use crate::{ticker::DataSource, util::df::DF};
pub fn df_format(data_source: DataSource, mut df: DF) -> Result<DF, Error> {
let df = match data_source {
DataSource::ArkVenture => df_format_arkvx(df)?,
DataSource::Ark => df,
DataSource::Shares21 => df_format_21shares(df)?,
DataSource::ArkEurope | DataSource::Rize => {
df = df_format_europe_csv(df)?;
df = df_format_europe_arkfundsio(df)?;
df_format_europe(df)?
}
};
Ok(df)
}
pub fn df_format_21shares(df: DF) -> Result<DF, Error> { pub fn df_format_21shares(df: DF) -> Result<DF, Error> {
let mut df = df.collect()?; let mut df = df.collect()?;

View file

@ -46,7 +46,7 @@ fn get_api_format_arkk() -> Result<(), Error> {
Some("data/test".to_owned()), Some("data/test".to_owned()),
)? )?
.get_api(NaiveDate::from_ymd_opt(2023, 5, 18), None)?; .get_api(NaiveDate::from_ymd_opt(2023, 5, 18), None)?;
let df = Ark::df_format(dfl.into())?.collect()?; let df = Ark::df_format(dfl.into(), None)?.collect()?;
assert_eq!( assert_eq!(
(df.get_column_names(), df.dtypes(), df.shape().1 > 1), (df.get_column_names(), df.dtypes(), df.shape().1 > 1),
@ -86,7 +86,7 @@ fn get_api_format_arkvx() -> Result<(), Error> {
Some("data/test".to_owned()), Some("data/test".to_owned()),
)? )?
.get_api(NaiveDate::from_ymd_opt(2023, 1, 1), None)?; .get_api(NaiveDate::from_ymd_opt(2023, 1, 1), None)?;
let df = Ark::df_format(dfl.into())?.collect()?; let df = Ark::df_format(dfl.into(), None)?.collect()?;
assert_eq!( assert_eq!(
(df.get_column_names(), df.dtypes(), df.shape().1 > 1), (df.get_column_names(), df.dtypes(), df.shape().1 > 1),