"""Plotting referendum results in pandas.

In short, we want to make beautiful map to report results of a referendum. In
some way, we would like to depict results with something similar to the maps
that you can find here:
https://github.com/x-datascience-datacamp/datacamp-assignment-pandas/blob/main/example_map.png

To do that, you will load the data as pandas.DataFrame, merge the info and
aggregate them by regions and finally plot them on a map using `geopandas`.
"""
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt


def load_data():
    """Load data from the CSV files referundum/regions/departments."""
    referendum = pd.read_csv('data/referendum.csv', sep=';')
    regions = pd.read_csv('data/regions.csv')
    departments = pd.read_csv('data/departments.csv')

    return referendum, regions, departments


def merge_regions_and_departments(regions, departments):
    """Merge regions and departments in one DataFrame.

    The columns in the final DataFrame should be:
    ['code_reg', 'name_reg', 'code_dep', 'name_dep']
    """
    # Inner join: regions.code <-> departments.region_code
    merged = pd.merge(
        regions,
        departments,
        left_on='code',
        right_on='region_code',
        how='inner'
    )

    result = merged[['code_x', 'name_x', 'code_y', 'name_y']].copy()
    result.columns = ['code_reg', 'name_reg', 'code_dep', 'name_dep']

    return result


def merge_referendum_and_areas(referendum, regions_and_departments):
    """Merge referendum and regions_and_departments in one DataFrame.

    You can drop the lines relative to DOM-TOM-COM departments, and the
    french living abroad, which all have a code that contains `Z`.

    DOM-TOM-COM departments are departements that are remote from metropolitan
    France, like Guadaloupe, Reunion, or Tahiti.
    """
    # Inner join: referendum.Department code <->
    # regions_and_departments.code_dep
    referendum = referendum.copy()
    regions_and_departments = regions_and_departments.copy()

    # Convert Department code to string
    referendum['Department code'] = referendum['Department code'].astype(str)

    # Normalize  departaments codes: add ceros to the left
    # but keep special codes such as '2A', '2B'
    def normalize_code(code):
        if code.isdigit():
            return code.zfill(2)
        return code

    referendum['Department code'] = (
        referendum['Department code'].apply(normalize_code)
    )

    # Ensure format
    regions_and_departments['code_dep'] = (
        regions_and_departments['code_dep'].astype(str)
    )

    merged = pd.merge(
        referendum,
        regions_and_departments,
        left_on='Department code',
        right_on='code_dep',
        how='inner'
    )

    merged = merged[~merged['code_dep'].str.contains('Z', na=False)]

    # Drop rows with missing values
    merged = merged.dropna()

    return merged


def compute_referendum_result_by_regions(referendum_and_areas):
    """Return a table with the absolute count for each region.

    The return DataFrame should be indexed by `code_reg` and have columns:
    ['name_reg', 'Registered', 'Abstentions', 'Null', 'Choice A', 'Choice B']
    """
    result = referendum_and_areas.groupby(['code_reg', 'name_reg']).agg({
        'Registered': 'sum',
        'Abstentions': 'sum',
        'Null': 'sum',
        'Choice A': 'sum',
        'Choice B': 'sum'
    }).reset_index()

    result = result.set_index('code_reg')

    result = result.reset_index(drop=True)

    return result


def plot_referendum_map(referendum_result_by_regions):
    """Plot a map with the results from the referendum.

    * Load the geographic data with geopandas from `regions.geojson`.
    * Merge these info into `referendum_result_by_regions`.
    * Use the method `GeoDataFrame.plot` to display the result map. The results
      should display the rate of 'Choice A' over all expressed ballots.
    * Return a gpd.GeoDataFrame with a column 'ratio' containing the results.
    """
    gdf = gpd.read_file('data/regions.geojson')

    gdf = pd.merge(
        gdf,
        referendum_result_by_regions,
        left_on='nom',
        right_on='name_reg',
        how='inner'
    )

    # Ratio: Choice A cases / Every case (Choice A + Choice B)
    gdf['ratio'] = gdf['Choice A'] / (gdf['Choice A'] + gdf['Choice B'])

    # Plot the map
    gdf.plot(column='ratio', cmap='RdYlGn', legend=True,
             figsize=(10, 10), edgecolor='black')
    plt.title('Referendum Results by Region; Choice A Ratio')

    return gdf


if __name__ == "__main__":

    referendum, df_reg, df_dep = load_data()
    regions_and_departments = merge_regions_and_departments(
        df_reg, df_dep
    )
    referendum_and_areas = merge_referendum_and_areas(
        referendum, regions_and_departments
    )
    referendum_results = compute_referendum_result_by_regions(
        referendum_and_areas
    )
    print(referendum_results)

    plot_referendum_map(referendum_results)
    plt.show()
