diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 6969575..22cb800 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -10,7 +10,7 @@ services: restart: no ark-invest-api-rust-data: - # image: ghcr.io/NexVeridian/ark-invest-api-rust-data:latest + # image: ghcr.io/nexveridian/ark-invest-api-rust-data:latest image: ark-invest-api-rust-data container_name: ark-invest-api-rust-data build: @@ -20,9 +20,9 @@ services: DOCKER_BUILDKIT: 1 restart: no environment: - - ARK_SOURCE=ARK + - ARK_SOURCE=Ark # - STARTUP_CSV_MERGE=true - # - STARTUP_ARK_ETF=true + - STARTUP_ARK_ETF=true volumes: - ./data:/ark-invest-api-rust-data/data diff --git a/docker-compose.yml b/docker-compose.yml index a8c1635..ff7fb00 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,13 +1,13 @@ version: "3" services: ark-invest-api-rust-data: - image: ghcr.io/NexVeridian/ark-invest-api-rust-data:latest + image: ghcr.io/nexveridian/ark-invest-api-rust-data:latest container_name: ark-invest-api-rust-data restart: unless-stopped - # environment: - # - ARK_SOURCE=ApiIncremental - # - STARTUP_CSV_MERGE=true - # - STARTUP_ARK_ETF=true + environment: + - ARK_SOURCE=ApiIncremental + # - STARTUP_CSV_MERGE=true + # - STARTUP_ARK_ETF=true volumes: - ./data:/ark-invest-api-rust-data/data diff --git a/src/util.rs b/src/util.rs index 5fe11bc..344678a 100644 --- a/src/util.rs +++ b/src/util.rs @@ -228,9 +228,74 @@ impl Ark { Ok(self) } - pub fn df_format(df: DF) -> Result { + fn df_format_21shares(df: DF) -> Result { let mut df = df.collect()?; + if df.get_column_names().contains(&"Weightings") { + df = df + .lazy() + .rename( + vec![ + "Date", + "StockTicker", + "CUSIP", + "SecurityName", + "Shares", + "Price", + "MarketValue", + "Weightings", + ], + vec![ + "date", + "ticker", + "cusip", + "company", + "shares", + "share_price", + "market_value", + "weight", + ], + ) + .collect()?; + + _ = df.drop_in_place("Account"); + _ = df.drop_in_place("NetAssets"); + _ = df.drop_in_place("SharesOutstanding"); + _ = df.drop_in_place("CreationUnits"); + _ = df.drop_in_place("MoneyMarketFlag"); + } + + Ok(df.into()) + } + + fn df_format_arkvx(df: DF) -> Result { + let mut df = df.collect()?; + + if df.get_column_names().contains(&"CUSIP") { + df = df + .lazy() + .rename(vec!["CUSIP", "weight (%)"], vec!["cusip", "weight"]) + .collect()?; + + if !df.get_column_names().contains(&"market_value") { + df = df + .lazy() + .with_columns([ + Series::new("market_value", [None::]).lit(), + Series::new("shares", [None::]).lit(), + Series::new("share_price", [None::]).lit(), + ]) + .collect()?; + } + } + + Ok(df.into()) + } + + pub fn df_format(df: DF) -> Result { + let mut df = Self::df_format_21shares(df)?.collect()?; + df = Self::df_format_arkvx(df.into())?.collect()?; + if df.get_column_names().contains(&"market_value_($)") { df = df .lazy() @@ -249,12 +314,6 @@ impl Ark { ) .collect()?; } - if df.get_column_names().contains(&"CUSIP") { - df = df - .lazy() - .rename(vec!["CUSIP", "weight (%)"], vec!["cusip", "weight"]) - .collect()?; - } // if df.rename("market_value_($)", "market_value").is_ok() {} // if df.rename("market value ($)", "market_value").is_ok() {} @@ -352,7 +411,7 @@ impl Ark { expressions.push( col("ticker") .str() - .replace_all(lit("(?i) fp| uq| un| uw | cn"), lit(""), true) + .replace_all(lit("(?i) fp| uq| un| uw | cn"), lit(""), false) .str() .replace(lit("DKNN"), lit("DKNG"), true) .str() @@ -361,9 +420,9 @@ impl Ark { expressions.push( col("company") .str() - .replace_all(lit("(?i) a| cl| class| inc| ltd| corp| corporation| c| cl| se| hold| holdings| international|-|,|."), lit(""), true) + .replace_all(lit(r"(?i:-A| ADR| A| Cl| Class| Inc| incorporated| Ltd| Corp| Corporation| C| Cl| SE| Hold| Holdings| International|,|\.|-)"), lit(""), false) .str() - .replace(lit("(?i)Coinbase Global"), lit("Coinbase"), true) + .replace(lit("(?i)Coinbase Global"), lit("Coinbase"), false) .str() .replace(lit("Blackdaemon"), lit("Blockdaemon"), true) .str() @@ -412,18 +471,6 @@ impl Ark { df = df.select(["date", "ticker", "cusip", "company", "weight"])?; } - // ARKVX - if !df.get_column_names().contains(&"market_value") { - df = df - .lazy() - .with_columns([ - Series::new("market_value", [None::]).lit(), - Series::new("shares", [None::]).lit(), - Series::new("share_price", [None::]).lit(), - ]) - .collect()?; - } - Ok(df.into()) } @@ -490,6 +537,7 @@ impl Ark { for x in glob(&format!("data/csv/{}/*", ticker))?.filter_map(Result::ok) { dfs.push(LazyCsvReader::new(x).finish()?); } + let mut df = concat( dfs, UnionArgs { @@ -501,6 +549,7 @@ impl Ark { if Self::read_parquet(&ticker, path.as_ref()).is_ok() { let df_old = Self::read_parquet(&ticker, path.as_ref())?; df = Self::concat_df(vec![Self::df_format(df_old)?, Self::df_format(df)?])?; + df = Self::df_format(df)?; } Ok(Self { df, ticker, path }) } @@ -517,7 +566,7 @@ impl Reader { let mut headers = HeaderMap::new(); headers.insert( header::USER_AGENT, - HeaderValue::from_static("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"), + HeaderValue::from_static("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"), ); headers.insert(