mirror of
https://github.com/NexVeridian/ark-invest-api-rust-data.git
synced 2025-09-02 01:49:12 +00:00
0.2.2
This commit is contained in:
parent
4074a97ae2
commit
f3bc96b251
7 changed files with 124 additions and 36 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -24,7 +24,7 @@ wheels/
|
||||||
.installed.cfg
|
.installed.cfg
|
||||||
*.egg
|
*.egg
|
||||||
MANIFEST
|
MANIFEST
|
||||||
.vscode
|
# .vscode
|
||||||
|
|
||||||
# PyInstaller
|
# PyInstaller
|
||||||
# Usually these files are written by a python script from a template
|
# Usually these files are written by a python script from a template
|
||||||
|
|
43
.vscode/launch.json
vendored
Normal file
43
.vscode/launch.json
vendored
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
{
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"type": "lldb",
|
||||||
|
"request": "launch",
|
||||||
|
"name": "Debug executable 'ark-invest-api-rust-data'",
|
||||||
|
"cargo": {
|
||||||
|
"args": [
|
||||||
|
"build",
|
||||||
|
"--bin=ark-invest-api-rust-data",
|
||||||
|
"--package=ark-invest-api-rust-data"
|
||||||
|
],
|
||||||
|
"filter": {
|
||||||
|
"name": "ark-invest-api-rust-data",
|
||||||
|
"kind": "bin"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"args": [],
|
||||||
|
"cwd": "${workspaceFolder}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "lldb",
|
||||||
|
"request": "launch",
|
||||||
|
"name": "Debug unit tests in executable 'ark-invest-api-rust-data'",
|
||||||
|
"cargo": {
|
||||||
|
"args": [
|
||||||
|
"test",
|
||||||
|
"--no-run",
|
||||||
|
"--bin=ark-invest-api-rust-data",
|
||||||
|
"--package=ark-invest-api-rust-data"
|
||||||
|
],
|
||||||
|
"filter": {
|
||||||
|
"name": "ark-invest-api-rust-data",
|
||||||
|
"kind": "bin"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"args": [],
|
||||||
|
"cwd": "${workspaceFolder}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
17
.vscode/settings.json
vendored
Normal file
17
.vscode/settings.json
vendored
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
{
|
||||||
|
"[Rust]": {
|
||||||
|
"editor.defaultFormatter": "rust-lang.rust-analyzer",
|
||||||
|
"editor.formatOnSave": true,
|
||||||
|
"editor.formatOnSaveMode": "file"
|
||||||
|
},
|
||||||
|
"rust-analyzer.check.command": "clippy",
|
||||||
|
"rust-analyzer.cargo.buildScripts.overrideCommand": [
|
||||||
|
"cargo",
|
||||||
|
"clippy",
|
||||||
|
"--fix",
|
||||||
|
"--workspace",
|
||||||
|
"--message-format=json",
|
||||||
|
"--all-targets",
|
||||||
|
"--allow-dirty"
|
||||||
|
],
|
||||||
|
}
|
|
@ -4,7 +4,7 @@ version = "0.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
polars = { version = "0.28", features = [
|
polars = { version = "0.30", features = [
|
||||||
"lazy",
|
"lazy",
|
||||||
"strings",
|
"strings",
|
||||||
"parquet",
|
"parquet",
|
||||||
|
|
17
README.md
17
README.md
|
@ -1 +1,18 @@
|
||||||
Fetches and caches data from csv download and saves the data in parquet format
|
Fetches and caches data from csv download and saves the data in parquet format
|
||||||
|
|
||||||
|
# Dev Install
|
||||||
|
## Dev Containers
|
||||||
|
Install docker, vscode, [Remote Development Extension Pack](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.vscode-remote-extensionpack), and the [GitHub Repositories Extension](https://marketplace.visualstudio.com/items?itemName=GitHub.remotehub)
|
||||||
|
|
||||||
|
`Ctrl+Shift+P` **Dev Containers: Clone Repository in Container Volume**
|
||||||
|
|
||||||
|
Select github then paste the url `https://github.com/NexVeridian/ark-invest-api-rust-data`
|
||||||
|
|
||||||
|
Run code with `F5` or `cargo run`
|
||||||
|
|
||||||
|
## Docker Compose
|
||||||
|
`git clone`
|
||||||
|
|
||||||
|
`docker compose build && docker compose up`
|
||||||
|
|
||||||
|
Remove the cargo cache for buildkit with `docker builder prune --filter type=exec.cachemount`
|
||||||
|
|
10
src/main.rs
10
src/main.rs
|
@ -40,10 +40,14 @@ fn main() {
|
||||||
let dfn = df_format(read_parquet(Ticker::ARKVC).unwrap()).unwrap();
|
let dfn = df_format(read_parquet(Ticker::ARKVC).unwrap()).unwrap();
|
||||||
println!("{:#?}", dfn);
|
println!("{:#?}", dfn);
|
||||||
|
|
||||||
// update_parquet(Ticker::ARKVC).unwrap();
|
// let update = df_format(get_csv(Ticker::ARKF).unwrap()).unwrap();
|
||||||
// let update = df_format(Ticker::ARKF, get_csv(Ticker::ARKF).unwrap()).unwrap();
|
// println!("{:#?}", update);
|
||||||
// let update = get_csv(Ticker::ARKF).unwrap().collect().unwrap();
|
|
||||||
|
|
||||||
|
// update_parquet(Ticker::ARKVC).unwrap();
|
||||||
|
// let x = df_format(read_parquet(Ticker::ARKVC).unwrap()).unwrap();
|
||||||
|
// println!("{:#?}", x);
|
||||||
|
|
||||||
|
// merge_csv_to_parquet(Ticker::ARKVC).unwrap();
|
||||||
// let x = df_format(read_parquet(Ticker::ARKVC).unwrap()).unwrap();
|
// let x = df_format(read_parquet(Ticker::ARKVC).unwrap()).unwrap();
|
||||||
// println!("{:#?}", x);
|
// println!("{:#?}", x);
|
||||||
}
|
}
|
||||||
|
|
69
src/util.rs
69
src/util.rs
|
@ -1,8 +1,7 @@
|
||||||
use glob::glob;
|
use glob::glob;
|
||||||
use polars::datatypes::DataType;
|
use polars::datatypes::DataType;
|
||||||
use polars::lazy::dsl::StrpTimeOptions;
|
|
||||||
use polars::prelude::*;
|
use polars::prelude::*;
|
||||||
use polars::prelude::{DataFrame, UniqueKeepStrategy};
|
use polars::prelude::{DataFrame, StrptimeOptions, UniqueKeepStrategy};
|
||||||
use reqwest::blocking::Client;
|
use reqwest::blocking::Client;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
@ -35,16 +34,28 @@ impl Ticker {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn merge_csv_to_parquet(folder: Ticker) -> Result<(), Box<dyn Error>> {
|
pub fn merge_csv_to_parquet(ticker: Ticker) -> Result<(), Box<dyn Error>> {
|
||||||
let mut dfs = vec![];
|
let mut dfs = vec![];
|
||||||
|
|
||||||
for x in glob(&format!("data/csv/{}/*", folder))?.filter_map(Result::ok) {
|
for x in glob(&format!("data/csv/{}/*", ticker))?.filter_map(Result::ok) {
|
||||||
dfs.push(LazyCsvReader::new(x).finish()?);
|
dfs.push(LazyCsvReader::new(x).finish()?);
|
||||||
}
|
}
|
||||||
|
|
||||||
let df = concat(dfs, false, true)?;
|
let mut df = concat(dfs, false, true)?;
|
||||||
|
|
||||||
|
if read_parquet(ticker).is_ok() {
|
||||||
|
let df_old = read_parquet(ticker)?;
|
||||||
|
df = concat(
|
||||||
|
vec![df_format(df_old)?.lazy(), df_format(df)?.lazy()],
|
||||||
|
false,
|
||||||
|
true,
|
||||||
|
)?
|
||||||
|
.unique_stable(None, UniqueKeepStrategy::First);
|
||||||
|
write_parquet(ticker, df_sort(df.collect()?)?)?;
|
||||||
|
} else {
|
||||||
|
write_parquet(ticker, df_format(df)?)?;
|
||||||
|
}
|
||||||
|
|
||||||
write_parquet(folder, df_format(df)?)?;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,7 +71,7 @@ pub fn update_parquet(ticker: Ticker) -> Result<(), Box<dyn Error>> {
|
||||||
)?
|
)?
|
||||||
.unique_stable(None, UniqueKeepStrategy::First);
|
.unique_stable(None, UniqueKeepStrategy::First);
|
||||||
|
|
||||||
write_parquet(ticker, df.collect()?)?;
|
write_parquet(ticker, df_sort(df.collect()?)?)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -79,6 +90,10 @@ pub fn write_parquet(ticker: Ticker, mut df: DataFrame) -> Result<(), Box<dyn Er
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn df_sort(df: DataFrame) -> Result<DataFrame, Box<dyn Error>> {
|
||||||
|
Ok(df.sort(["date", "weight"], vec![false, true])?)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn df_format(df: LazyFrame) -> Result<DataFrame, Box<dyn Error>> {
|
pub fn df_format(df: LazyFrame) -> Result<DataFrame, Box<dyn Error>> {
|
||||||
let mut df = df.collect()?;
|
let mut df = df.collect()?;
|
||||||
|
|
||||||
|
@ -86,8 +101,8 @@ pub fn df_format(df: LazyFrame) -> Result<DataFrame, Box<dyn Error>> {
|
||||||
df = df
|
df = df
|
||||||
.lazy()
|
.lazy()
|
||||||
.rename(
|
.rename(
|
||||||
vec!["market_value_($), weight_(%)"],
|
vec!["market_value_($)", "weight_(%)"],
|
||||||
vec!["market_value, weight"],
|
vec!["market_value", "weight"],
|
||||||
)
|
)
|
||||||
.collect()?;
|
.collect()?;
|
||||||
}
|
}
|
||||||
|
@ -95,8 +110,8 @@ pub fn df_format(df: LazyFrame) -> Result<DataFrame, Box<dyn Error>> {
|
||||||
df = df
|
df = df
|
||||||
.lazy()
|
.lazy()
|
||||||
.rename(
|
.rename(
|
||||||
vec!["market value ($), weight (%)"],
|
vec!["market value ($)", "weight (%)"],
|
||||||
vec!["market_value, weight"],
|
vec!["market_value", "weight"],
|
||||||
)
|
)
|
||||||
.collect()?;
|
.collect()?;
|
||||||
}
|
}
|
||||||
|
@ -123,15 +138,15 @@ pub fn df_format(df: LazyFrame) -> Result<DataFrame, Box<dyn Error>> {
|
||||||
let mut expressions: Vec<Expr> = vec![];
|
let mut expressions: Vec<Expr> = vec![];
|
||||||
|
|
||||||
if !df.fields().contains(&Field::new("date", DataType::Date)) {
|
if !df.fields().contains(&Field::new("date", DataType::Date)) {
|
||||||
expressions.push(col("date").str().strptime(StrpTimeOptions {
|
expressions.push(col("date").str().strptime(
|
||||||
date_dtype: DataType::Date,
|
DataType::Date,
|
||||||
fmt: Some("%m/%d/%Y".into()),
|
StrptimeOptions {
|
||||||
strict: false,
|
format: Some("%m/%d/%Y".into()),
|
||||||
exact: true,
|
strict: false,
|
||||||
cache: false,
|
exact: true,
|
||||||
tz_aware: false,
|
cache: false,
|
||||||
utc: false,
|
},
|
||||||
}));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
if df.fields().contains(&Field::new("weight", DataType::Utf8)) {
|
if df.fields().contains(&Field::new("weight", DataType::Utf8)) {
|
||||||
|
@ -208,16 +223,8 @@ pub fn df_format(df: LazyFrame) -> Result<DataFrame, Box<dyn Error>> {
|
||||||
|
|
||||||
pub fn get_csv(ticker: Ticker) -> Result<LazyFrame, Box<dyn Error>> {
|
pub fn get_csv(ticker: Ticker) -> Result<LazyFrame, Box<dyn Error>> {
|
||||||
let url = match ticker {
|
let url = match ticker {
|
||||||
Ticker::ARKVC => {
|
Ticker::ARKVC => "https://ark-ventures.com/wp-content/uploads/funds-etf-csv/ARK_VENTURE_FUND_HOLDINGS.csv".to_owned(),
|
||||||
"https://ark-ventures.com/wp-content/uploads/funds-etf-csv/ARK_VENTURE_FUND_HOLDINGS.csv".to_owned()
|
_ => format!("https://ark-funds.com/wp-content/uploads/funds-etf-csv/ARK_{}_ETF_{}_HOLDINGS.csv", ticker.value(), ticker),
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
format!(
|
|
||||||
"https://ark-funds.com/wp-content/uploads/funds-etf-csv/ARK_{}_ETF_{}_HOLDINGS.csv",
|
|
||||||
ticker.value(),
|
|
||||||
ticker
|
|
||||||
)
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let response = Client::builder()
|
let response = Client::builder()
|
||||||
|
@ -232,7 +239,7 @@ pub fn get_csv(ticker: Ticker) -> Result<LazyFrame, Box<dyn Error>> {
|
||||||
.into());
|
.into());
|
||||||
}
|
}
|
||||||
|
|
||||||
let data: Vec<u8> = response.text()?.bytes().collect();
|
let data = response.text()?.into_bytes();
|
||||||
|
|
||||||
let df = CsvReader::new(Cursor::new(data))
|
let df = CsvReader::new(Cursor::new(data))
|
||||||
.has_header(true)
|
.has_header(true)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue