mirror of
https://github.com/NexVeridian/ark-invest-api-rust-data.git
synced 2025-09-02 01:49:12 +00:00
0.2.2
This commit is contained in:
parent
4074a97ae2
commit
f3bc96b251
7 changed files with 124 additions and 36 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -24,7 +24,7 @@ wheels/
|
|||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
.vscode
|
||||
# .vscode
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
|
|
43
.vscode/launch.json
vendored
Normal file
43
.vscode/launch.json
vendored
Normal file
|
@ -0,0 +1,43 @@
|
|||
{
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "Debug executable 'ark-invest-api-rust-data'",
|
||||
"cargo": {
|
||||
"args": [
|
||||
"build",
|
||||
"--bin=ark-invest-api-rust-data",
|
||||
"--package=ark-invest-api-rust-data"
|
||||
],
|
||||
"filter": {
|
||||
"name": "ark-invest-api-rust-data",
|
||||
"kind": "bin"
|
||||
}
|
||||
},
|
||||
"args": [],
|
||||
"cwd": "${workspaceFolder}"
|
||||
},
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "Debug unit tests in executable 'ark-invest-api-rust-data'",
|
||||
"cargo": {
|
||||
"args": [
|
||||
"test",
|
||||
"--no-run",
|
||||
"--bin=ark-invest-api-rust-data",
|
||||
"--package=ark-invest-api-rust-data"
|
||||
],
|
||||
"filter": {
|
||||
"name": "ark-invest-api-rust-data",
|
||||
"kind": "bin"
|
||||
}
|
||||
},
|
||||
"args": [],
|
||||
"cwd": "${workspaceFolder}"
|
||||
}
|
||||
]
|
||||
}
|
17
.vscode/settings.json
vendored
Normal file
17
.vscode/settings.json
vendored
Normal file
|
@ -0,0 +1,17 @@
|
|||
{
|
||||
"[Rust]": {
|
||||
"editor.defaultFormatter": "rust-lang.rust-analyzer",
|
||||
"editor.formatOnSave": true,
|
||||
"editor.formatOnSaveMode": "file"
|
||||
},
|
||||
"rust-analyzer.check.command": "clippy",
|
||||
"rust-analyzer.cargo.buildScripts.overrideCommand": [
|
||||
"cargo",
|
||||
"clippy",
|
||||
"--fix",
|
||||
"--workspace",
|
||||
"--message-format=json",
|
||||
"--all-targets",
|
||||
"--allow-dirty"
|
||||
],
|
||||
}
|
|
@ -4,7 +4,7 @@ version = "0.1.0"
|
|||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
polars = { version = "0.28", features = [
|
||||
polars = { version = "0.30", features = [
|
||||
"lazy",
|
||||
"strings",
|
||||
"parquet",
|
||||
|
|
17
README.md
17
README.md
|
@ -1 +1,18 @@
|
|||
Fetches and caches data from csv download and saves the data in parquet format
|
||||
|
||||
# Dev Install
|
||||
## Dev Containers
|
||||
Install docker, vscode, [Remote Development Extension Pack](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.vscode-remote-extensionpack), and the [GitHub Repositories Extension](https://marketplace.visualstudio.com/items?itemName=GitHub.remotehub)
|
||||
|
||||
`Ctrl+Shift+P` **Dev Containers: Clone Repository in Container Volume**
|
||||
|
||||
Select github then paste the url `https://github.com/NexVeridian/ark-invest-api-rust-data`
|
||||
|
||||
Run code with `F5` or `cargo run`
|
||||
|
||||
## Docker Compose
|
||||
`git clone`
|
||||
|
||||
`docker compose build && docker compose up`
|
||||
|
||||
Remove the cargo cache for buildkit with `docker builder prune --filter type=exec.cachemount`
|
||||
|
|
10
src/main.rs
10
src/main.rs
|
@ -40,10 +40,14 @@ fn main() {
|
|||
let dfn = df_format(read_parquet(Ticker::ARKVC).unwrap()).unwrap();
|
||||
println!("{:#?}", dfn);
|
||||
|
||||
// update_parquet(Ticker::ARKVC).unwrap();
|
||||
// let update = df_format(Ticker::ARKF, get_csv(Ticker::ARKF).unwrap()).unwrap();
|
||||
// let update = get_csv(Ticker::ARKF).unwrap().collect().unwrap();
|
||||
// let update = df_format(get_csv(Ticker::ARKF).unwrap()).unwrap();
|
||||
// println!("{:#?}", update);
|
||||
|
||||
// update_parquet(Ticker::ARKVC).unwrap();
|
||||
// let x = df_format(read_parquet(Ticker::ARKVC).unwrap()).unwrap();
|
||||
// println!("{:#?}", x);
|
||||
|
||||
// merge_csv_to_parquet(Ticker::ARKVC).unwrap();
|
||||
// let x = df_format(read_parquet(Ticker::ARKVC).unwrap()).unwrap();
|
||||
// println!("{:#?}", x);
|
||||
}
|
||||
|
|
69
src/util.rs
69
src/util.rs
|
@ -1,8 +1,7 @@
|
|||
use glob::glob;
|
||||
use polars::datatypes::DataType;
|
||||
use polars::lazy::dsl::StrpTimeOptions;
|
||||
use polars::prelude::*;
|
||||
use polars::prelude::{DataFrame, UniqueKeepStrategy};
|
||||
use polars::prelude::{DataFrame, StrptimeOptions, UniqueKeepStrategy};
|
||||
use reqwest::blocking::Client;
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
|
@ -35,16 +34,28 @@ impl Ticker {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn merge_csv_to_parquet(folder: Ticker) -> Result<(), Box<dyn Error>> {
|
||||
pub fn merge_csv_to_parquet(ticker: Ticker) -> Result<(), Box<dyn Error>> {
|
||||
let mut dfs = vec![];
|
||||
|
||||
for x in glob(&format!("data/csv/{}/*", folder))?.filter_map(Result::ok) {
|
||||
for x in glob(&format!("data/csv/{}/*", ticker))?.filter_map(Result::ok) {
|
||||
dfs.push(LazyCsvReader::new(x).finish()?);
|
||||
}
|
||||
|
||||
let df = concat(dfs, false, true)?;
|
||||
let mut df = concat(dfs, false, true)?;
|
||||
|
||||
if read_parquet(ticker).is_ok() {
|
||||
let df_old = read_parquet(ticker)?;
|
||||
df = concat(
|
||||
vec![df_format(df_old)?.lazy(), df_format(df)?.lazy()],
|
||||
false,
|
||||
true,
|
||||
)?
|
||||
.unique_stable(None, UniqueKeepStrategy::First);
|
||||
write_parquet(ticker, df_sort(df.collect()?)?)?;
|
||||
} else {
|
||||
write_parquet(ticker, df_format(df)?)?;
|
||||
}
|
||||
|
||||
write_parquet(folder, df_format(df)?)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -60,7 +71,7 @@ pub fn update_parquet(ticker: Ticker) -> Result<(), Box<dyn Error>> {
|
|||
)?
|
||||
.unique_stable(None, UniqueKeepStrategy::First);
|
||||
|
||||
write_parquet(ticker, df.collect()?)?;
|
||||
write_parquet(ticker, df_sort(df.collect()?)?)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -79,6 +90,10 @@ pub fn write_parquet(ticker: Ticker, mut df: DataFrame) -> Result<(), Box<dyn Er
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub fn df_sort(df: DataFrame) -> Result<DataFrame, Box<dyn Error>> {
|
||||
Ok(df.sort(["date", "weight"], vec![false, true])?)
|
||||
}
|
||||
|
||||
pub fn df_format(df: LazyFrame) -> Result<DataFrame, Box<dyn Error>> {
|
||||
let mut df = df.collect()?;
|
||||
|
||||
|
@ -86,8 +101,8 @@ pub fn df_format(df: LazyFrame) -> Result<DataFrame, Box<dyn Error>> {
|
|||
df = df
|
||||
.lazy()
|
||||
.rename(
|
||||
vec!["market_value_($), weight_(%)"],
|
||||
vec!["market_value, weight"],
|
||||
vec!["market_value_($)", "weight_(%)"],
|
||||
vec!["market_value", "weight"],
|
||||
)
|
||||
.collect()?;
|
||||
}
|
||||
|
@ -95,8 +110,8 @@ pub fn df_format(df: LazyFrame) -> Result<DataFrame, Box<dyn Error>> {
|
|||
df = df
|
||||
.lazy()
|
||||
.rename(
|
||||
vec!["market value ($), weight (%)"],
|
||||
vec!["market_value, weight"],
|
||||
vec!["market value ($)", "weight (%)"],
|
||||
vec!["market_value", "weight"],
|
||||
)
|
||||
.collect()?;
|
||||
}
|
||||
|
@ -123,15 +138,15 @@ pub fn df_format(df: LazyFrame) -> Result<DataFrame, Box<dyn Error>> {
|
|||
let mut expressions: Vec<Expr> = vec![];
|
||||
|
||||
if !df.fields().contains(&Field::new("date", DataType::Date)) {
|
||||
expressions.push(col("date").str().strptime(StrpTimeOptions {
|
||||
date_dtype: DataType::Date,
|
||||
fmt: Some("%m/%d/%Y".into()),
|
||||
strict: false,
|
||||
exact: true,
|
||||
cache: false,
|
||||
tz_aware: false,
|
||||
utc: false,
|
||||
}));
|
||||
expressions.push(col("date").str().strptime(
|
||||
DataType::Date,
|
||||
StrptimeOptions {
|
||||
format: Some("%m/%d/%Y".into()),
|
||||
strict: false,
|
||||
exact: true,
|
||||
cache: false,
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
if df.fields().contains(&Field::new("weight", DataType::Utf8)) {
|
||||
|
@ -208,16 +223,8 @@ pub fn df_format(df: LazyFrame) -> Result<DataFrame, Box<dyn Error>> {
|
|||
|
||||
pub fn get_csv(ticker: Ticker) -> Result<LazyFrame, Box<dyn Error>> {
|
||||
let url = match ticker {
|
||||
Ticker::ARKVC => {
|
||||
"https://ark-ventures.com/wp-content/uploads/funds-etf-csv/ARK_VENTURE_FUND_HOLDINGS.csv".to_owned()
|
||||
}
|
||||
_ => {
|
||||
format!(
|
||||
"https://ark-funds.com/wp-content/uploads/funds-etf-csv/ARK_{}_ETF_{}_HOLDINGS.csv",
|
||||
ticker.value(),
|
||||
ticker
|
||||
)
|
||||
}
|
||||
Ticker::ARKVC => "https://ark-ventures.com/wp-content/uploads/funds-etf-csv/ARK_VENTURE_FUND_HOLDINGS.csv".to_owned(),
|
||||
_ => format!("https://ark-funds.com/wp-content/uploads/funds-etf-csv/ARK_{}_ETF_{}_HOLDINGS.csv", ticker.value(), ticker),
|
||||
};
|
||||
|
||||
let response = Client::builder()
|
||||
|
@ -232,7 +239,7 @@ pub fn get_csv(ticker: Ticker) -> Result<LazyFrame, Box<dyn Error>> {
|
|||
.into());
|
||||
}
|
||||
|
||||
let data: Vec<u8> = response.text()?.bytes().collect();
|
||||
let data = response.text()?.into_bytes();
|
||||
|
||||
let df = CsvReader::new(Cursor::new(data))
|
||||
.has_header(true)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue