commit 7e12fc1e657a99faed9ebbdfc1f48e5e836a4103 Author: NexVeridian Date: Mon Apr 3 15:24:38 2023 -0700 0.1.0 diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000..99521d0 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,17 @@ +FROM mcr.microsoft.com/devcontainers/rust:bullseye + +RUN rustup target add x86_64-unknown-linux-musl && \ + apt-get update && \ + apt install -y build-essential xz-utils musl-tools musl-dev gcc-multilib && \ + rustup update + +RUN LAZYGIT_VERSION=$(curl -s "https://api.github.com/repos/jesseduffield/lazygit/releases/latest" | grep -Po '"tag_name": "v\K[0-9.]+') && \ + curl -Lo lazygit.tar.gz "https://github.com/jesseduffield/lazygit/releases/latest/download/lazygit_${LAZYGIT_VERSION}_Linux_x86_64.tar.gz" && \ + sudo tar xf lazygit.tar.gz -C /usr/local/bin lazygit && \ + rm -rf lazygit.tar.gz + +RUN BTOP_VERSION=$(curl -s "https://api.github.com/repos/aristocratos/btop/releases/latest" | grep -Po '"tag_name": "v\K[0-9.]+') && \ + wget "https://github.com/aristocratos/btop/releases/download/v${BTOP_VERSION}/btop-x86_64-linux-musl.tbz" && \ + sudo tar -xvf btop-x86_64-linux-musl.tbz && \ + cd btop && ./install.sh && cd .. && \ + rm -rf btop-x86_64-linux-musl.tbz btop diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..764ef89 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,68 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/rust +// https://containers.dev/implementors/json_reference/#variables-in-devcontainerjson +{ + "name": "Rust", + // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile + // "image": "mcr.microsoft.com/devcontainers/rust:0-1-bullseye", + "build": { + // Path is relataive to the devcontainer.json file. + "dockerfile": "Dockerfile" + }, + // https://github.com/microsoft/vscode-remote-release/issues/2485#issuecomment-1156342780 + "runArgs": [ + "--name", + "devcontainer-${containerWorkspaceFolderBasename}" + ], + "initializeCommand": "docker rm -f devcontainer-${containerWorkspaceFolderBasename} || true", + // Use 'mounts' to make the cargo cache persistent in a Docker Volume. + "mounts": [ + { + // "source": "devcontainer-cargo-cache-${devcontainerId}", + "source": "devcontainer-cargo-cache-${containerWorkspaceFolderBasename}", + "target": "/usr/local/cargo", + "type": "volume" + } + ], + // Features to add to the dev container. More info: https://containers.dev/features. + "features": { + "ghcr.io/devcontainers/features/git:1": {} + // "ghcr.io/devcontainers/features/nix:1": { + // "packages": [ + // "btop" + // // "lazygit" + // ] + // } + }, + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + // Use 'postCreateCommand' to run commands after the container is created. + // "postCreateCommand": { + // }, + "postAttachCommand": { + "AddGitSafeDir": "git config --global --add safe.directory /workspaces/${containerWorkspaceFolderBasename}", + "clippy": "cargo clippy" + }, + // Configure tool-specific properties. + // "customizations": {}, + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + "remoteUser": "root", + "customizations": { + "vscode": { + "extensions": [ + "vadimcn.vscode-lldb", + "serayuzgur.crates", + "bungcip.better-toml", + "rust-lang.rust-analyzer", + "mutantdino.resourcemonitor", + "christian-kohler.path-intellisense", + "Swellaby.vscode-rust-test-adapter", + "Gruntfuggly.todo-tree", + "ms-azuretools.vscode-docker", + "redhat.vscode-yaml", + "tomoki1207.pdf", + "GitHub.copilot" + ] + } + } +} \ No newline at end of file diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..e85ef84 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +/target +Cargo.lock diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b089282 --- /dev/null +++ b/.gitignore @@ -0,0 +1,111 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +.vscode + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +/target +Cargo.lock + +data/csv/* +data/parquet/* diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..361df35 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "ark-invest-api-rust-data" +version = "0.1.0" +edition = "2021" + +[dependencies] +polars = { version = "0.28", features = [ + "lazy", + "strings", + "parquet", + "round_series", +] } +reqwest = { version = "0.11", features = ["blocking"] } +glob = { version = "0.3" } +clokwerk = "0.4.0" +strum_macros = "0.24" +strum = "0.24" +tokio = { version = "1.26", features = ["full"] } +openssl = { version = "0.10", features = ["vendored"] } diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6e62716 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +FROM rust:latest AS builder + +RUN rustup target add x86_64-unknown-linux-musl && apt update && apt install -y musl-tools musl-dev + +WORKDIR /ark-invest-api-rust-data + +COPY . . + +RUN --mount=type=cache,target=/usr/local/cargo,from=rust,source=/usr/local/cargo \ + --mount=type=cache,target=./target \ + cargo build --target x86_64-unknown-linux-musl --release && \ + cp ./target/x86_64-unknown-linux-musl/release/ark-invest-api-rust-data . + +FROM alpine:latest + +WORKDIR /ark-invest-api-rust-data + +COPY --from=builder ark-invest-api-rust-data/ark-invest-api-rust-data . + +ENV PORT=3000 +EXPOSE 3000 + +CMD ["./ark-invest-api-rust-data"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c723ceb --- /dev/null +++ b/LICENSE @@ -0,0 +1,192 @@ +Copyright 2023, Elijah McMorris ( NexVeridian ) + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2023, Elijah McMorris ( NexVeridian ) + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..22c3385 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +Fetches and caches data from csv download and saves the data in parquet format diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..3cc4a4c --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,17 @@ +version: "3" +services: + ark-invest-api-rust-data: + # image: ghcr.io/NexVeridian/ark-invest-api-rust-data:latest + image: ark-invest-api-rust-data + build: + context: . + args: + DOCKER_BUILDKIT: 1 + container_name: ark-invest-api-rust-data + restart: unless-stopped + volumes: + - ./data:/ark-invest-api-rust-data/data + +volumes: + data: + Data: diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..6f64ce7 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,39 @@ +use clokwerk::Interval::*; +use clokwerk::{AsyncScheduler, Job, TimeUnits}; +use polars::prelude::LazyFrame; +use polars::prelude::*; +use std::error::Error; +use std::result::Result; +use std::time::Duration; +use strum::IntoEnumIterator; + +mod util; +use util::*; + +#[tokio::main] +async fn main() { + let mut scheduler = AsyncScheduler::new(); + scheduler.every(1.day()).at("11:30 pm").run(|| async { + for x in Ticker::iter() { + let plan = || -> Result<(), Box> { + let df = LazyFrame::scan_parquet( + format!("data/old/{}/part.0.parquet", x), + ScanArgsParquet::default(), + )?; + let df = df_format(x, df)?; + write_parquet(x, df)?; + Ok(()) + }; + + if let Ok(_) = plan() {} + } + }); + + let dfn = read_parquet(Ticker::ARKF).unwrap().collect().unwrap(); + println!("{:#?}", dfn); + loop { + scheduler.run_pending().await; + // tokio::time::sleep(Duration::from_millis(10)).await; + tokio::time::sleep(Duration::from_secs(1)).await; + } +} diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..c20ae74 --- /dev/null +++ b/src/util.rs @@ -0,0 +1,158 @@ +use glob::glob; +use polars::datatypes::DataType; +use polars::lazy::dsl::StrpTimeOptions; +use polars::prelude::*; +use polars::prelude::{DataFrame, UniqueKeepStrategy}; +use reqwest::blocking::Client; +use std::error::Error; +use std::fs::File; +use std::io::Cursor; +use std::result::Result; +use strum_macros::EnumIter; + +#[derive(strum_macros::Display, EnumIter, Clone, Copy)] +pub enum Ticker { + ARKVC, + ARKF, + ARKG, + ARKK, + ARKQ, + ARKW, + ARKX, +} + +impl Ticker { + pub fn value(&self) -> &str { + match *self { + Ticker::ARKVC => "ARKVC", + Ticker::ARKF => "FINTECH_INNOVATION", + Ticker::ARKG => "GENOMIC_REVOLUTION", + Ticker::ARKK => "INNOVATION", + Ticker::ARKQ => "AUTONOMOUS_TECH._&_ROBOTICS", + Ticker::ARKW => "NEXT_GENERATION_INTERNET", + Ticker::ARKX => "SPACE_EXPLORATION_&_INNOVATION", + } + } +} + +pub fn merge_csv_to_parquet(folder: Ticker) -> Result<(), Box> { + let mut dfs = vec![]; + + for x in glob(&format!("data/csv/{}/*", folder.to_string()))?.filter_map(Result::ok) { + dfs.push(LazyCsvReader::new(x).finish()?); + } + + let df = concat(dfs, false, true)?; + + write_parquet(folder, df_format(folder, df)?)?; + Ok(()) +} + +pub fn update_parquet(ticker: Ticker) -> Result<(), Box> { + let update = get_csv(ticker)?; + + let mut df = read_parquet(ticker)?; + + df = concat(vec![df, update], false, true)?.unique_stable(None, UniqueKeepStrategy::First); + + write_parquet(ticker, df.collect()?)?; + Ok(()) +} + +pub fn read_parquet(ticker: Ticker) -> Result> { + let df = LazyFrame::scan_parquet( + format!("data/parquet/{}.parquet", ticker.to_string()), + ScanArgsParquet::default(), + )?; + Ok(df) +} + +pub fn write_parquet(ticker: Ticker, mut df: DataFrame) -> Result<(), Box> { + ParquetWriter::new(File::create(format!( + "data/parquet/{}.parquet", + ticker.to_string() + ))?) + .finish(&mut df)?; + + Ok(()) +} + +pub fn df_format(folder: Ticker, mut dfl: LazyFrame) -> Result> { + match folder { + Ticker::ARKVC => { + dfl = dfl.rename(vec!["CUSIP", "weight (%)"], vec!["cusip", "weight"]); + + let df = dfl + .with_columns(vec![ + col("date").str().strptime(StrpTimeOptions { + date_dtype: DataType::Date, + fmt: Some("%m/%d/%Y".into()), + strict: false, + exact: true, + cache: false, + tz_aware: false, + utc: false, + }), + col("weight") + .str() + .extract(r"[0-9]*\.[0-9]+", 0) + .cast(DataType::Float64), + ]) + .filter(col("date").is_not_null()) + .collect()?; + + Ok(df) + } + _ => { + let mut df = dfl.collect()?; + + if let Ok(_) = df.rename("market_value_($)", "market_value") {} + if let Ok(_) = df.rename("weight_(%)", "weight") {} + + if let Ok(x) = df + .clone() + .lazy() + .with_column(col("date").cast(DataType::Date)) + .filter(col("date").is_not_null()) + .collect() + { + df = x + } else if let Ok(x) = df + .clone() + .lazy() + .filter(col("date").is_not_null()) + .collect() + { + df = x + } + + Ok(df) + } + } +} + +pub fn get_csv(ticker: Ticker) -> Result> { + let data: Vec; + let request; + match ticker { + Ticker::ARKVC => { + request = Client::new() + .get("https://ark-ventures.com/wp-content/uploads/funds-etf-csv/ARK_VENTURE_FUND_HOLDINGS.csv") + } + _ => { + request = Client::new().get(format!( + "https://ark-funds.com/wp-content/uploads/funds-etf-csv/ARK_{}_ETF_{}_HOLDINGS.csv", + ticker.value(), + ticker.to_string() + )) + } + } + data = request.send()?.text()?.bytes().collect(); + + let df = CsvReader::new(Cursor::new(data)) + .has_header(true) + .finish()? + .lazy(); + + Ok(df) +}