This commit is contained in:
Elijah McMorris 2023-06-30 05:31:02 +00:00 committed by NexVeridian
parent ffb24e7943
commit 9ddfb2a563
Signed by: NexVeridian
SSH key fingerprint: SHA256:bsA1SKZxuEcEVHAy3gY1HUeM5ykRJl0U0kQHQn0hMg8
10 changed files with 119 additions and 56 deletions

View file

@ -57,7 +57,7 @@
"extensions": [ "extensions": [
"vadimcn.vscode-lldb", "vadimcn.vscode-lldb",
"serayuzgur.crates", "serayuzgur.crates",
"bungcip.better-toml", "tamasfe.even-better-toml",
"rust-lang.rust-analyzer", "rust-lang.rust-analyzer",
"mutantdino.resourcemonitor", "mutantdino.resourcemonitor",
"christian-kohler.path-intellisense", "christian-kohler.path-intellisense",

View file

@ -1,2 +1,8 @@
/.devcontainer
/.vscode
/target /target
.dockerignore
Cargo.lock Cargo.lock
Dockerfile
docker-compose.yml
*.md

1
.gitignore vendored
View file

@ -24,7 +24,6 @@ wheels/
.installed.cfg .installed.cfg
*.egg *.egg
MANIFEST MANIFEST
# .vscode
# PyInstaller # PyInstaller
# Usually these files are written by a python script from a template # Usually these files are written by a python script from a template

View file

@ -14,7 +14,7 @@ polars = { version = "0.30", features = [
"object", "object",
"dtype-struct", "dtype-struct",
] } ] }
reqwest = { version = "0.11", features = ["blocking"] } reqwest = { version = "0.11", features = ["blocking", "gzip"] }
glob = { version = "0.3" } glob = { version = "0.3" }
clokwerk = "0.4.0" clokwerk = "0.4.0"
strum_macros = "0.25" strum_macros = "0.25"
@ -24,6 +24,7 @@ openssl = { version = "0.10", features = ["vendored"] }
chrono = { version = "0.4", features = ["serde"] } chrono = { version = "0.4", features = ["serde"] }
serde_json = "1.0" serde_json = "1.0"
rand = "0.8" rand = "0.8"
futures = "0.3"
[dev-dependencies] [dev-dependencies]
serial_test = "*" serial_test = "*"

View file

@ -25,13 +25,10 @@ RUN --mount=type=cache,target=/usr/local/cargo,from=rust,source=/usr/local/cargo
cargo nextest run --release --target x86_64-unknown-linux-musl \ cargo nextest run --release --target x86_64-unknown-linux-musl \
-E "all() - test(get_api) - kind(bin)" -E "all() - test(get_api) - kind(bin)"
FROM alpine:latest FROM alpine:latest AS main
WORKDIR /ark-invest-api-rust-data WORKDIR /ark-invest-api-rust-data
COPY --from=builder ark-invest-api-rust-data/ark-invest-api-rust-data . COPY --from=builder ark-invest-api-rust-data/ark-invest-api-rust-data .
ENV PORT=3000
EXPOSE 3000
CMD ["./ark-invest-api-rust-data"] CMD ["./ark-invest-api-rust-data"]

View file

@ -1,4 +1,13 @@
https://ark-funds.com/ark-trade-notifications/ https://ark-funds.com/ark-trade-notifications/
https://etfs.ark-funds.com/hubfs/idt/trades/ARK_Trade_06072023_0800PM_EST_6480efd1294b5.xls
cargo clean && cargo build --timings cargo clean && cargo build --timings
# Futures
https://stackoverflow.com/questions/68448854/how-to-await-for-the-first-k-futures
# 403
https://docs.rs/http/latest/http/header/index.html
https://docs.rs/http/latest/http/index.html?search=HOST
https://stackoverflow.com/questions/70931027/http-403-forbidden-is-showing-while-scraping-a-data-from-a-website-using-python
https://stackoverflow.com/questions/48756326/web-scraping-results-in-403-forbidden-error

View file

@ -1,4 +1,20 @@
Fetches and caches data from csv download and saves the data in parquet format Fetches and caches ETF data daily, from csv download or api, and saves the data in parquet format
[api.NexVeridian.com](https://api.NexVeridian.com)
Not affiliated with Ark Invest
# Install for csv download
Copy docker-compose.yml
Create data folder next to docker-compose.yml
```
├───data
│ └───parquet
├───docker-compose.yml
```
`docker compose up --pull always`
# Dev Install # Dev Install
## Dev Containers ## Dev Containers
@ -18,3 +34,12 @@ Run tests with `cargo t`
`docker compose build && docker compose up` `docker compose build && docker compose up`
Remove the cargo cache for buildkit with `docker builder prune --filter type=exec.cachemount` Remove the cargo cache for buildkit with `docker builder prune --filter type=exec.cachemount`
# Install for api
`git clone`
in main.rs change `Source::Ark` to `Source::ApiIncremental` or `Source::ApiFull` for first run
in docker-compose.yml remove this line`image: ghcr.io/NexVeridian/ark-invest-api-rust-data:latest`
uncomment everything else

View file

@ -1,8 +1,8 @@
version: "3" version: "3"
services: services:
ark-invest-api-rust-data: ark-invest-api-rust-data:
# image: ghcr.io/NexVeridian/ark-invest-api-rust-data:latest image: ghcr.io/NexVeridian/ark-invest-api-rust-data:latest
image: ark-invest-api-rust-data # image: ark-invest-api-rust-data
container_name: ark-invest-api-rust-data container_name: ark-invest-api-rust-data
build: build:
context: . context: .
@ -12,6 +12,14 @@ services:
restart: unless-stopped restart: unless-stopped
volumes: volumes:
- ./data:/ark-invest-api-rust-data/data - ./data:/ark-invest-api-rust-data/data
# ark-invest-api-rust-data-test:
# container_name: ark-invest-api-rust-data-test
# build:
# context: .
# target: test
# args:
# DOCKER_BUILDKIT: 1
# restart: no
volumes: volumes:
data: data:

View file

@ -1,38 +1,59 @@
use clokwerk::{AsyncScheduler, Job, TimeUnits}; use clokwerk::{AsyncScheduler, Job, TimeUnits};
// use polars::prelude::LazyFrame; use futures::future::join_all;
// use polars::prelude::*;
use rand::Rng; use rand::Rng;
use std::error::Error; use std::error::Error;
use std::result::Result; use std::result::Result;
use std::time::Duration; use std::thread;
use strum::IntoEnumIterator; use strum::IntoEnumIterator;
use tokio::task;
use tokio::time::{sleep, Duration};
mod util; mod util;
use util::*; use util::*;
#[tokio::main] #[tokio::main]
async fn main() { async fn main() {
let mut scheduler = AsyncScheduler::new(); let mut scheduler = AsyncScheduler::new();
println!("Scheduler Started"); println!("Scheduler Started");
scheduler.every(1.day()).at("11:30 pm").run(|| async {
for x in Ticker::iter() { fn ark_plan(ticker: Ticker) -> Result<(), Box<dyn Error>> {
if x == Ticker::ARKVC { println!("Starting: {:#?}", ticker);
continue; let sec = Duration::from_secs(rand::thread_rng().gen_range(5 * 60..=30 * 60));
} // sleep(sec).await;
let plan = || -> Result<(), Box<dyn Error>> { thread::sleep(sec);
let df = Ark::new(Source::Ark, x, None)?
let df = Ark::new(Source::Ark, ticker, None)?
.format()? .format()?
.write_parquet()? .write_parquet()?
.collect()?; .collect()?;
println!("{:#?}", df.head(Some(1))); println!("Ticker: {:#?}\n{:#?}", ticker, df.tail(Some(1)));
Ok(()) Ok(())
};
if plan().is_ok() {}
let sec = rand::thread_rng().gen_range(10..=30);
tokio::time::sleep(Duration::from_secs(sec)).await;
} }
});
async fn spawn_ark_plan(ticker: Ticker) -> Result<(), Box<dyn Error + Send>> {
task::spawn_blocking(move || ark_plan(ticker).unwrap())
.await
.unwrap();
Ok(())
}
async fn ark_etf() {
let futures = Ticker::iter()
.filter(|&x| x != Ticker::ARKVC)
.map(spawn_ark_plan)
.collect::<Vec<_>>();
join_all(futures).await;
}
// ark_etf().await;
scheduler.every(1.day()).at("11:30 pm").run(ark_etf);
scheduler
.every(5.day())
.at("11:30 pm")
.run(|| async { if spawn_ark_plan(Ticker::ARKVC).await.is_ok() {} });
loop { loop {
scheduler.run_pending().await; scheduler.run_pending().await;
@ -40,24 +61,3 @@ async fn main() {
tokio::time::sleep(Duration::from_secs(1)).await; tokio::time::sleep(Duration::from_secs(1)).await;
} }
} }
// fn main() -> Result<(), Box<dyn std::error::Error>> {
// let csv = Ark::merge_old_csv_to_parquet(Ticker::ARKK, None)?
// .format()?
// .write_parquet()?
// .collect()?;
// println!("{:#?}", csv);
// let read = Ark::new(Source::Read, Ticker::ARKK, None)?.collect()?;
// println!("{:#?}", read.dtypes());
// println!("{:#?}", read.get_column_names());
// println!("{:#?}", read);
// let api = Ark::new(Source::ApiFull, Ticker::ARKK, None)?.collect()?;
// println!("{:#?}", api);
// let ark = Ark::new(Source::Ark, Ticker::ARKK, None)?.collect()?;
// println!("{:#?}", ark);
// let ark = Ark::new(Source::Ark, Ticker::ARKVC, None)?.collect()?;
// println!("{:#?}", ark);
// Ok(())
// }

View file

@ -4,6 +4,8 @@ use polars::datatypes::DataType;
use polars::lazy::dsl::StrptimeOptions; use polars::lazy::dsl::StrptimeOptions;
use polars::prelude::*; use polars::prelude::*;
use reqwest::blocking::Client; use reqwest::blocking::Client;
use reqwest::header;
use reqwest::header::{HeaderMap, HeaderValue};
use serde_json::Value; use serde_json::Value;
use std::error::Error; use std::error::Error;
use std::fs::{create_dir_all, File}; use std::fs::{create_dir_all, File};
@ -12,7 +14,7 @@ use std::path::Path;
use std::result::Result; use std::result::Result;
use strum_macros::EnumIter; use strum_macros::EnumIter;
#[derive(strum_macros::Display, EnumIter, Clone, Copy, PartialEq)] #[derive(strum_macros::Display, EnumIter, Clone, Copy, PartialEq, Debug)]
pub enum Ticker { pub enum Ticker {
ARKVC, ARKVC,
ARKF, ARKF,
@ -408,8 +410,24 @@ pub enum Reader {
impl Reader { impl Reader {
pub fn get_data_url(&self, url: String) -> Result<LazyFrame, Box<dyn Error>> { pub fn get_data_url(&self, url: String) -> Result<LazyFrame, Box<dyn Error>> {
let mut headers = HeaderMap::new();
headers.insert(
header::USER_AGENT,
HeaderValue::from_static("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"),
);
headers.insert(
header::ACCEPT,
HeaderValue::from_static("text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"),
);
headers.insert(
header::ACCEPT_LANGUAGE,
HeaderValue::from_static("en-US,en;q=0.8"),
);
let response = Client::builder() let response = Client::builder()
.user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3") .default_headers(headers)
.gzip(true)
.build()?.get(url).send()?; .build()?.get(url).send()?;
if !response.status().is_success() { if !response.status().is_success() {