diff --git a/src/main.rs b/src/main.rs index d75c6e0..e64fec7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -36,24 +36,29 @@ use util::*; // } // } -fn main() { - let read = Ark::new(Source::Read, Ticker::ARKK, None) - .unwrap() - .collect() - .unwrap(); - println!("{:#?}", read.dtypes()); - println!("{:#?}", read); - - let api = Ark::new(Source::ApiFull, Ticker::ARKK, None) - .unwrap() - .collect() - .unwrap(); - println!("{:#?}", api); - - // let ark = Ark::new(Source::Ark, Ticker::ARKK, None) +fn main() -> Result<(), Box> { + // let csv = Ark::merge_old_csv_to_parquet(Ticker::ARKK, None) + // .unwrap() + // .format() + // .unwrap() + // .write_parquet() // .unwrap() // .collect() // .unwrap(); + // println!("{:#?}", csv); + + let read = Ark::new(Source::Read, Ticker::ARKK, None)?.collect()?; + println!("{:#?}", read.dtypes()); + println!("{:#?}", read.get_column_names()); + println!("{:#?}", read); + + // let api = Ark::new(Source::ApiFull, Ticker::ARKK, None) + // .unwrap() + // .collect() + // .unwrap(); + // println!("{:#?}", api); + + // let ark = Ark::new(Source::Ark, Ticker::ARKK, None)?.collect()?; // println!("{:#?}", ark); // let ark = Ark::new(Source::Ark, Ticker::ARKVC, None) @@ -61,4 +66,5 @@ fn main() { // .collect() // .unwrap(); // println!("{:#?}", ark); + Ok(()) } diff --git a/src/util.rs b/src/util.rs index 6338d32..07795c4 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,14 +1,14 @@ use chrono::NaiveDate; use glob::glob; use polars::datatypes::DataType; +use polars::lazy::dsl::StrptimeOptions; use polars::prelude::*; -use polars::prelude::{DataFrame, StrptimeOptions, UniqueKeepStrategy}; use reqwest::blocking::Client; use serde_json::Value; use std::error::Error; use std::fs::{create_dir_all, File}; use std::io::Cursor; -use std::path::{Path, PathBuf}; +use std::path::Path; use std::result::Result; use strum_macros::EnumIter; @@ -249,21 +249,38 @@ impl Ark { if df.get_column_names().contains(&"weight_rank") { _ = df.drop_in_place("weight_rank"); } - - let mut expressions: Vec = vec![]; + if df.get_column_names().contains(&"") { + let mut cols = df.get_column_names(); + cols.retain(|&item| !item.is_empty()); + df = df.select(cols)?; + } if !df.fields().contains(&Field::new("date", DataType::Date)) { - expressions.push(col("date").str().strptime( - DataType::Date, - StrptimeOptions { - format: Some("%m/%d/%Y".into()), - strict: false, - exact: true, - cache: true, - }, - )); + let date_format = |df: DataFrame, format: &str| -> Result> { + Ok(df + .lazy() + .with_column(col("date").str().strptime( + DataType::Date, + StrptimeOptions { + format: Some(format.into()), + strict: false, + exact: true, + cache: true, + }, + )) + .collect()?) + }; + + if let Ok(x) = date_format(df.clone(), "%m/%d/%Y") { + df = x + } + if let Ok(x) = date_format(df.clone(), "%Y/%m/%d") { + df = x + } } + let mut expressions: Vec = vec![]; + if df.fields().contains(&Field::new("weight", DataType::Utf8)) { expressions.push( col("weight") diff --git a/tests/integration.rs b/tests/integration.rs index 8c273d6..1c75f5e 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -25,8 +25,7 @@ fn get_api_arkk() -> Result<(), Box> { "share_price", "shares", "ticker", - "weight", - "weight_rank" + "weight" ] ); Ok(())