This commit is contained in:
Elijah McMorris 2023-06-30 05:31:02 +00:00 committed by NexVeridian
parent ffb24e7943
commit 9ddfb2a563
Signed by: NexVeridian
SSH key fingerprint: SHA256:bsA1SKZxuEcEVHAy3gY1HUeM5ykRJl0U0kQHQn0hMg8
10 changed files with 119 additions and 56 deletions

View file

@ -57,7 +57,7 @@
"extensions": [
"vadimcn.vscode-lldb",
"serayuzgur.crates",
"bungcip.better-toml",
"tamasfe.even-better-toml",
"rust-lang.rust-analyzer",
"mutantdino.resourcemonitor",
"christian-kohler.path-intellisense",

View file

@ -1,2 +1,8 @@
/.devcontainer
/.vscode
/target
.dockerignore
Cargo.lock
Dockerfile
docker-compose.yml
*.md

1
.gitignore vendored
View file

@ -24,7 +24,6 @@ wheels/
.installed.cfg
*.egg
MANIFEST
# .vscode
# PyInstaller
# Usually these files are written by a python script from a template

View file

@ -14,7 +14,7 @@ polars = { version = "0.30", features = [
"object",
"dtype-struct",
] }
reqwest = { version = "0.11", features = ["blocking"] }
reqwest = { version = "0.11", features = ["blocking", "gzip"] }
glob = { version = "0.3" }
clokwerk = "0.4.0"
strum_macros = "0.25"
@ -24,6 +24,7 @@ openssl = { version = "0.10", features = ["vendored"] }
chrono = { version = "0.4", features = ["serde"] }
serde_json = "1.0"
rand = "0.8"
futures = "0.3"
[dev-dependencies]
serial_test = "*"

View file

@ -25,13 +25,10 @@ RUN --mount=type=cache,target=/usr/local/cargo,from=rust,source=/usr/local/cargo
cargo nextest run --release --target x86_64-unknown-linux-musl \
-E "all() - test(get_api) - kind(bin)"
FROM alpine:latest
FROM alpine:latest AS main
WORKDIR /ark-invest-api-rust-data
COPY --from=builder ark-invest-api-rust-data/ark-invest-api-rust-data .
ENV PORT=3000
EXPOSE 3000
CMD ["./ark-invest-api-rust-data"]

View file

@ -1,4 +1,13 @@
https://ark-funds.com/ark-trade-notifications/
https://etfs.ark-funds.com/hubfs/idt/trades/ARK_Trade_06072023_0800PM_EST_6480efd1294b5.xls
cargo clean && cargo build --timings
# Futures
https://stackoverflow.com/questions/68448854/how-to-await-for-the-first-k-futures
# 403
https://docs.rs/http/latest/http/header/index.html
https://docs.rs/http/latest/http/index.html?search=HOST
https://stackoverflow.com/questions/70931027/http-403-forbidden-is-showing-while-scraping-a-data-from-a-website-using-python
https://stackoverflow.com/questions/48756326/web-scraping-results-in-403-forbidden-error

View file

@ -1,4 +1,20 @@
Fetches and caches data from csv download and saves the data in parquet format
Fetches and caches ETF data daily, from csv download or api, and saves the data in parquet format
[api.NexVeridian.com](https://api.NexVeridian.com)
Not affiliated with Ark Invest
# Install for csv download
Copy docker-compose.yml
Create data folder next to docker-compose.yml
```
├───data
│ └───parquet
├───docker-compose.yml
```
`docker compose up --pull always`
# Dev Install
## Dev Containers
@ -18,3 +34,12 @@ Run tests with `cargo t`
`docker compose build && docker compose up`
Remove the cargo cache for buildkit with `docker builder prune --filter type=exec.cachemount`
# Install for api
`git clone`
in main.rs change `Source::Ark` to `Source::ApiIncremental` or `Source::ApiFull` for first run
in docker-compose.yml remove this line`image: ghcr.io/NexVeridian/ark-invest-api-rust-data:latest`
uncomment everything else

View file

@ -1,8 +1,8 @@
version: "3"
services:
ark-invest-api-rust-data:
# image: ghcr.io/NexVeridian/ark-invest-api-rust-data:latest
image: ark-invest-api-rust-data
image: ghcr.io/NexVeridian/ark-invest-api-rust-data:latest
# image: ark-invest-api-rust-data
container_name: ark-invest-api-rust-data
build:
context: .
@ -12,6 +12,14 @@ services:
restart: unless-stopped
volumes:
- ./data:/ark-invest-api-rust-data/data
# ark-invest-api-rust-data-test:
# container_name: ark-invest-api-rust-data-test
# build:
# context: .
# target: test
# args:
# DOCKER_BUILDKIT: 1
# restart: no
volumes:
data:

View file

@ -1,38 +1,59 @@
use clokwerk::{AsyncScheduler, Job, TimeUnits};
// use polars::prelude::LazyFrame;
// use polars::prelude::*;
use futures::future::join_all;
use rand::Rng;
use std::error::Error;
use std::result::Result;
use std::time::Duration;
use std::thread;
use strum::IntoEnumIterator;
use tokio::task;
use tokio::time::{sleep, Duration};
mod util;
use util::*;
#[tokio::main]
async fn main() {
let mut scheduler = AsyncScheduler::new();
println!("Scheduler Started");
scheduler.every(1.day()).at("11:30 pm").run(|| async {
for x in Ticker::iter() {
if x == Ticker::ARKVC {
continue;
}
let plan = || -> Result<(), Box<dyn Error>> {
let df = Ark::new(Source::Ark, x, None)?
fn ark_plan(ticker: Ticker) -> Result<(), Box<dyn Error>> {
println!("Starting: {:#?}", ticker);
let sec = Duration::from_secs(rand::thread_rng().gen_range(5 * 60..=30 * 60));
// sleep(sec).await;
thread::sleep(sec);
let df = Ark::new(Source::Ark, ticker, None)?
.format()?
.write_parquet()?
.collect()?;
println!("{:#?}", df.head(Some(1)));
println!("Ticker: {:#?}\n{:#?}", ticker, df.tail(Some(1)));
Ok(())
};
if plan().is_ok() {}
let sec = rand::thread_rng().gen_range(10..=30);
tokio::time::sleep(Duration::from_secs(sec)).await;
}
});
async fn spawn_ark_plan(ticker: Ticker) -> Result<(), Box<dyn Error + Send>> {
task::spawn_blocking(move || ark_plan(ticker).unwrap())
.await
.unwrap();
Ok(())
}
async fn ark_etf() {
let futures = Ticker::iter()
.filter(|&x| x != Ticker::ARKVC)
.map(spawn_ark_plan)
.collect::<Vec<_>>();
join_all(futures).await;
}
// ark_etf().await;
scheduler.every(1.day()).at("11:30 pm").run(ark_etf);
scheduler
.every(5.day())
.at("11:30 pm")
.run(|| async { if spawn_ark_plan(Ticker::ARKVC).await.is_ok() {} });
loop {
scheduler.run_pending().await;
@ -40,24 +61,3 @@ async fn main() {
tokio::time::sleep(Duration::from_secs(1)).await;
}
}
// fn main() -> Result<(), Box<dyn std::error::Error>> {
// let csv = Ark::merge_old_csv_to_parquet(Ticker::ARKK, None)?
// .format()?
// .write_parquet()?
// .collect()?;
// println!("{:#?}", csv);
// let read = Ark::new(Source::Read, Ticker::ARKK, None)?.collect()?;
// println!("{:#?}", read.dtypes());
// println!("{:#?}", read.get_column_names());
// println!("{:#?}", read);
// let api = Ark::new(Source::ApiFull, Ticker::ARKK, None)?.collect()?;
// println!("{:#?}", api);
// let ark = Ark::new(Source::Ark, Ticker::ARKK, None)?.collect()?;
// println!("{:#?}", ark);
// let ark = Ark::new(Source::Ark, Ticker::ARKVC, None)?.collect()?;
// println!("{:#?}", ark);
// Ok(())
// }

View file

@ -4,6 +4,8 @@ use polars::datatypes::DataType;
use polars::lazy::dsl::StrptimeOptions;
use polars::prelude::*;
use reqwest::blocking::Client;
use reqwest::header;
use reqwest::header::{HeaderMap, HeaderValue};
use serde_json::Value;
use std::error::Error;
use std::fs::{create_dir_all, File};
@ -12,7 +14,7 @@ use std::path::Path;
use std::result::Result;
use strum_macros::EnumIter;
#[derive(strum_macros::Display, EnumIter, Clone, Copy, PartialEq)]
#[derive(strum_macros::Display, EnumIter, Clone, Copy, PartialEq, Debug)]
pub enum Ticker {
ARKVC,
ARKF,
@ -408,8 +410,24 @@ pub enum Reader {
impl Reader {
pub fn get_data_url(&self, url: String) -> Result<LazyFrame, Box<dyn Error>> {
let mut headers = HeaderMap::new();
headers.insert(
header::USER_AGENT,
HeaderValue::from_static("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"),
);
headers.insert(
header::ACCEPT,
HeaderValue::from_static("text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"),
);
headers.insert(
header::ACCEPT_LANGUAGE,
HeaderValue::from_static("en-US,en;q=0.8"),
);
let response = Client::builder()
.user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3")
.default_headers(headers)
.gzip(true)
.build()?.get(url).send()?;
if !response.status().is_success() {