This commit is contained in:
Elijah McMorris 2023-06-29 20:28:46 +00:00
parent 0de1bcb725
commit 41bd1e8fe7
Signed by: NexVeridian
SSH key fingerprint: SHA256:bsA1SKZxuEcEVHAy3gY1HUeM5ykRJl0U0kQHQn0hMg8
3 changed files with 52 additions and 30 deletions

View file

@ -1,14 +1,14 @@
use chrono::NaiveDate;
use glob::glob;
use polars::datatypes::DataType;
use polars::lazy::dsl::StrptimeOptions;
use polars::prelude::*;
use polars::prelude::{DataFrame, StrptimeOptions, UniqueKeepStrategy};
use reqwest::blocking::Client;
use serde_json::Value;
use std::error::Error;
use std::fs::{create_dir_all, File};
use std::io::Cursor;
use std::path::{Path, PathBuf};
use std::path::Path;
use std::result::Result;
use strum_macros::EnumIter;
@ -249,21 +249,38 @@ impl Ark {
if df.get_column_names().contains(&"weight_rank") {
_ = df.drop_in_place("weight_rank");
}
let mut expressions: Vec<Expr> = vec![];
if df.get_column_names().contains(&"") {
let mut cols = df.get_column_names();
cols.retain(|&item| !item.is_empty());
df = df.select(cols)?;
}
if !df.fields().contains(&Field::new("date", DataType::Date)) {
expressions.push(col("date").str().strptime(
DataType::Date,
StrptimeOptions {
format: Some("%m/%d/%Y".into()),
strict: false,
exact: true,
cache: true,
},
));
let date_format = |df: DataFrame, format: &str| -> Result<DataFrame, Box<dyn Error>> {
Ok(df
.lazy()
.with_column(col("date").str().strptime(
DataType::Date,
StrptimeOptions {
format: Some(format.into()),
strict: false,
exact: true,
cache: true,
},
))
.collect()?)
};
if let Ok(x) = date_format(df.clone(), "%m/%d/%Y") {
df = x
}
if let Ok(x) = date_format(df.clone(), "%Y/%m/%d") {
df = x
}
}
let mut expressions: Vec<Expr> = vec![];
if df.fields().contains(&Field::new("weight", DataType::Utf8)) {
expressions.push(
col("weight")