This commit is contained in:
Elijah McMorris 2023-06-08 03:25:11 +00:00
parent 4074a97ae2
commit f3bc96b251
Signed by: NexVeridian
SSH key fingerprint: SHA256:bsA1SKZxuEcEVHAy3gY1HUeM5ykRJl0U0kQHQn0hMg8
7 changed files with 124 additions and 36 deletions

2
.gitignore vendored
View file

@ -24,7 +24,7 @@ wheels/
.installed.cfg .installed.cfg
*.egg *.egg
MANIFEST MANIFEST
.vscode # .vscode
# PyInstaller # PyInstaller
# Usually these files are written by a python script from a template # Usually these files are written by a python script from a template

43
.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,43 @@
{
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "lldb",
"request": "launch",
"name": "Debug executable 'ark-invest-api-rust-data'",
"cargo": {
"args": [
"build",
"--bin=ark-invest-api-rust-data",
"--package=ark-invest-api-rust-data"
],
"filter": {
"name": "ark-invest-api-rust-data",
"kind": "bin"
}
},
"args": [],
"cwd": "${workspaceFolder}"
},
{
"type": "lldb",
"request": "launch",
"name": "Debug unit tests in executable 'ark-invest-api-rust-data'",
"cargo": {
"args": [
"test",
"--no-run",
"--bin=ark-invest-api-rust-data",
"--package=ark-invest-api-rust-data"
],
"filter": {
"name": "ark-invest-api-rust-data",
"kind": "bin"
}
},
"args": [],
"cwd": "${workspaceFolder}"
}
]
}

17
.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,17 @@
{
"[Rust]": {
"editor.defaultFormatter": "rust-lang.rust-analyzer",
"editor.formatOnSave": true,
"editor.formatOnSaveMode": "file"
},
"rust-analyzer.check.command": "clippy",
"rust-analyzer.cargo.buildScripts.overrideCommand": [
"cargo",
"clippy",
"--fix",
"--workspace",
"--message-format=json",
"--all-targets",
"--allow-dirty"
],
}

View file

@ -4,7 +4,7 @@ version = "0.1.0"
edition = "2021" edition = "2021"
[dependencies] [dependencies]
polars = { version = "0.28", features = [ polars = { version = "0.30", features = [
"lazy", "lazy",
"strings", "strings",
"parquet", "parquet",

View file

@ -1 +1,18 @@
Fetches and caches data from csv download and saves the data in parquet format Fetches and caches data from csv download and saves the data in parquet format
# Dev Install
## Dev Containers
Install docker, vscode, [Remote Development Extension Pack](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.vscode-remote-extensionpack), and the [GitHub Repositories Extension](https://marketplace.visualstudio.com/items?itemName=GitHub.remotehub)
`Ctrl+Shift+P` **Dev Containers: Clone Repository in Container Volume**
Select github then paste the url `https://github.com/NexVeridian/ark-invest-api-rust-data`
Run code with `F5` or `cargo run`
## Docker Compose
`git clone`
`docker compose build && docker compose up`
Remove the cargo cache for buildkit with `docker builder prune --filter type=exec.cachemount`

View file

@ -40,10 +40,14 @@ fn main() {
let dfn = df_format(read_parquet(Ticker::ARKVC).unwrap()).unwrap(); let dfn = df_format(read_parquet(Ticker::ARKVC).unwrap()).unwrap();
println!("{:#?}", dfn); println!("{:#?}", dfn);
// update_parquet(Ticker::ARKVC).unwrap(); // let update = df_format(get_csv(Ticker::ARKF).unwrap()).unwrap();
// let update = df_format(Ticker::ARKF, get_csv(Ticker::ARKF).unwrap()).unwrap(); // println!("{:#?}", update);
// let update = get_csv(Ticker::ARKF).unwrap().collect().unwrap();
// update_parquet(Ticker::ARKVC).unwrap();
// let x = df_format(read_parquet(Ticker::ARKVC).unwrap()).unwrap();
// println!("{:#?}", x);
// merge_csv_to_parquet(Ticker::ARKVC).unwrap();
// let x = df_format(read_parquet(Ticker::ARKVC).unwrap()).unwrap(); // let x = df_format(read_parquet(Ticker::ARKVC).unwrap()).unwrap();
// println!("{:#?}", x); // println!("{:#?}", x);
} }

View file

@ -1,8 +1,7 @@
use glob::glob; use glob::glob;
use polars::datatypes::DataType; use polars::datatypes::DataType;
use polars::lazy::dsl::StrpTimeOptions;
use polars::prelude::*; use polars::prelude::*;
use polars::prelude::{DataFrame, UniqueKeepStrategy}; use polars::prelude::{DataFrame, StrptimeOptions, UniqueKeepStrategy};
use reqwest::blocking::Client; use reqwest::blocking::Client;
use std::error::Error; use std::error::Error;
use std::fs::File; use std::fs::File;
@ -35,16 +34,28 @@ impl Ticker {
} }
} }
pub fn merge_csv_to_parquet(folder: Ticker) -> Result<(), Box<dyn Error>> { pub fn merge_csv_to_parquet(ticker: Ticker) -> Result<(), Box<dyn Error>> {
let mut dfs = vec![]; let mut dfs = vec![];
for x in glob(&format!("data/csv/{}/*", folder))?.filter_map(Result::ok) { for x in glob(&format!("data/csv/{}/*", ticker))?.filter_map(Result::ok) {
dfs.push(LazyCsvReader::new(x).finish()?); dfs.push(LazyCsvReader::new(x).finish()?);
} }
let df = concat(dfs, false, true)?; let mut df = concat(dfs, false, true)?;
if read_parquet(ticker).is_ok() {
let df_old = read_parquet(ticker)?;
df = concat(
vec![df_format(df_old)?.lazy(), df_format(df)?.lazy()],
false,
true,
)?
.unique_stable(None, UniqueKeepStrategy::First);
write_parquet(ticker, df_sort(df.collect()?)?)?;
} else {
write_parquet(ticker, df_format(df)?)?;
}
write_parquet(folder, df_format(df)?)?;
Ok(()) Ok(())
} }
@ -60,7 +71,7 @@ pub fn update_parquet(ticker: Ticker) -> Result<(), Box<dyn Error>> {
)? )?
.unique_stable(None, UniqueKeepStrategy::First); .unique_stable(None, UniqueKeepStrategy::First);
write_parquet(ticker, df.collect()?)?; write_parquet(ticker, df_sort(df.collect()?)?)?;
Ok(()) Ok(())
} }
@ -79,6 +90,10 @@ pub fn write_parquet(ticker: Ticker, mut df: DataFrame) -> Result<(), Box<dyn Er
Ok(()) Ok(())
} }
pub fn df_sort(df: DataFrame) -> Result<DataFrame, Box<dyn Error>> {
Ok(df.sort(["date", "weight"], vec![false, true])?)
}
pub fn df_format(df: LazyFrame) -> Result<DataFrame, Box<dyn Error>> { pub fn df_format(df: LazyFrame) -> Result<DataFrame, Box<dyn Error>> {
let mut df = df.collect()?; let mut df = df.collect()?;
@ -86,8 +101,8 @@ pub fn df_format(df: LazyFrame) -> Result<DataFrame, Box<dyn Error>> {
df = df df = df
.lazy() .lazy()
.rename( .rename(
vec!["market_value_($), weight_(%)"], vec!["market_value_($)", "weight_(%)"],
vec!["market_value, weight"], vec!["market_value", "weight"],
) )
.collect()?; .collect()?;
} }
@ -95,8 +110,8 @@ pub fn df_format(df: LazyFrame) -> Result<DataFrame, Box<dyn Error>> {
df = df df = df
.lazy() .lazy()
.rename( .rename(
vec!["market value ($), weight (%)"], vec!["market value ($)", "weight (%)"],
vec!["market_value, weight"], vec!["market_value", "weight"],
) )
.collect()?; .collect()?;
} }
@ -123,15 +138,15 @@ pub fn df_format(df: LazyFrame) -> Result<DataFrame, Box<dyn Error>> {
let mut expressions: Vec<Expr> = vec![]; let mut expressions: Vec<Expr> = vec![];
if !df.fields().contains(&Field::new("date", DataType::Date)) { if !df.fields().contains(&Field::new("date", DataType::Date)) {
expressions.push(col("date").str().strptime(StrpTimeOptions { expressions.push(col("date").str().strptime(
date_dtype: DataType::Date, DataType::Date,
fmt: Some("%m/%d/%Y".into()), StrptimeOptions {
strict: false, format: Some("%m/%d/%Y".into()),
exact: true, strict: false,
cache: false, exact: true,
tz_aware: false, cache: false,
utc: false, },
})); ));
} }
if df.fields().contains(&Field::new("weight", DataType::Utf8)) { if df.fields().contains(&Field::new("weight", DataType::Utf8)) {
@ -208,16 +223,8 @@ pub fn df_format(df: LazyFrame) -> Result<DataFrame, Box<dyn Error>> {
pub fn get_csv(ticker: Ticker) -> Result<LazyFrame, Box<dyn Error>> { pub fn get_csv(ticker: Ticker) -> Result<LazyFrame, Box<dyn Error>> {
let url = match ticker { let url = match ticker {
Ticker::ARKVC => { Ticker::ARKVC => "https://ark-ventures.com/wp-content/uploads/funds-etf-csv/ARK_VENTURE_FUND_HOLDINGS.csv".to_owned(),
"https://ark-ventures.com/wp-content/uploads/funds-etf-csv/ARK_VENTURE_FUND_HOLDINGS.csv".to_owned() _ => format!("https://ark-funds.com/wp-content/uploads/funds-etf-csv/ARK_{}_ETF_{}_HOLDINGS.csv", ticker.value(), ticker),
}
_ => {
format!(
"https://ark-funds.com/wp-content/uploads/funds-etf-csv/ARK_{}_ETF_{}_HOLDINGS.csv",
ticker.value(),
ticker
)
}
}; };
let response = Client::builder() let response = Client::builder()
@ -232,7 +239,7 @@ pub fn get_csv(ticker: Ticker) -> Result<LazyFrame, Box<dyn Error>> {
.into()); .into());
} }
let data: Vec<u8> = response.text()?.bytes().collect(); let data = response.text()?.into_bytes();
let df = CsvReader::new(Cursor::new(data)) let df = CsvReader::new(Cursor::new(data))
.has_header(true) .has_header(true)