from typing import Literal, Optional
import numpy as np
import pandas as pd

City = Literal[
    "Ljubljana", "Montelimar", "Dusseldorf", "Budapest", "Kassel", "Oslo", "Maastricht", 
    "Perpignan", "Roma", "Dresden", "Heathrow", "Tours", "De_bilt", "Stockholm", 
    "Muenchen", "Sonnblick", "Basel", "Malmo"
    ]

def get_data(
        path: str = "weather.csv", ntrain: int = 10, ntest: int = 500,
        force_city: Optional[City] = None, seed: Optional[int] = None
        ) -> tuple[str, pd.DataFrame, pd.Series, pd.DataFrame, pd.Series]:
    
    if seed:
        np.random.seed(seed)
    
    weather_all: pd.DataFrame = pd.read_csv(path, na_values="NA")
    
    city: str
    if force_city:
        city = force_city.lower().capitalize()
    else:
        city = np.random.choice(sorted(list(set(weather_all["city"]))))

    weather_city: pd.DataFrame = weather_all[weather_all["city"] == city].dropna(
            axis=1
        ).drop(
            columns="city"
        ).reset_index(
            drop=True
        )
    
    test_df: pd.DataFrame = weather_city.sample(n=ntest, random_state=seed)
    train_df: pd.DataFrame = weather_city.drop(test_df.index).sample(n=ntrain, random_state=seed).reset_index(drop=True)
    test_df.reset_index(drop=True, inplace=True)

    return (
        city, 
        introduce_mess(train_df.drop(columns="temp_mean_next_day")), train_df["temp_mean_next_day"],
        test_df.drop(columns="temp_mean_next_day"), test_df["temp_mean_next_day"],
        )


def introduce_mess(data: pd.DataFrame) -> pd.DataFrame:
    # different wind speed units
    if "wind_speed" in data.columns and np.random.random() < 0.5:
        data["wind_speed"] = data["wind_speed"].map(
            lambda x: str(x) + "m/s" if np.random.random() < 0.5 else str(round(x * 2.2369, 2)) + "mph"
        )
        data["wind_speed"] = data["wind_speed"].astype("string")
    
    # missing values
    for i, row in data.iterrows():
        for c, val in row.items():
            data.loc[i, c] = np.nan if np.random.random() < 0.02 else val

    return data


def main() -> None:
    pass
    
    
if __name__ == "__main__":
    main()
