#pip install pandas plotly circlify

import pandas as pd
import circlify
import matplotlib.pyplot as plt

# --- 1. Cargar y limpiar datos ---
file_path = r"C:\Users\alber\OneDrive\Documentos\Indicadores Mundiales_Datos migrados.csv"

df = pd.read_csv(file_path, sep=';', encoding='utf-8-sig')
df['Año'] = pd.to_datetime(df['Año']).dt.year

# Último año y limpiar nulos/ceros
df_latest = df.sort_values(['Año', 'País/Región'], ascending=[False, True]).drop_duplicates(subset='País/Región')
df_pib = df_latest[['Región', 'País/Región', 'PIB']].dropna()
df_pib = df_pib[df_pib['PIB'] > 0]

# SELECCIÓN TOP 5 por Región
df_top5 = df_pib.sort_values(['Región', 'PIB'], ascending=[True, False])
df_top5 = df_top5.groupby('Región').head(5).reset_index(drop=True)

# --- 2. Preparar Diccionarios de Soporte (Evita los warnings) ---
unique_regions = df_top5['Región'].unique()
cmap = plt.get_cmap('tab20')
region_colors = {region: cmap(i % 20) for i, region in enumerate(unique_regions)}

# Diccionario para saber a qué región pertenece cada país sin meterlo en circlify
country_to_region = dict(zip(df_top5['País/Región'], df_top5['Región']))

# --- 3. Construir jerarquía limpia para circlify ---
data = []
for region in unique_regions:
    df_region = df_top5[df_top5['Región'] == region]
    
    children = [
        {'id': row['País/Región'], 'datum': row['PIB']} # Solo id y datum
        for _, row in df_region.iterrows()
    ]
    
    data.append({
        'id': region,
        'datum': df_region['PIB'].sum(),
        'children': children
    })

# --- 4. Generar círculos ---
circles = circlify.circlify(
    data,
    show_enclosure=True,
    target_enclosure=circlify.Circle(x=0, y=0, r=1)
)

# --- 5. Visualización ---
fig, ax = plt.subplots(figsize=(12, 12))
ax.set_xlim(-1.05, 1.05)
ax.set_ylim(-1.05, 1.05)
ax.set_aspect('equal')
ax.axis('off')

for circle in circles:
    x, y, r = circle.x, circle.y, circle.r
    if circle.ex is None: continue

    # NIVEL 1: REGIONES
    if circle.level == 1:
        region_name = circle.ex.get('id')
        color = region_colors.get(region_name)
        
        # Círculo de región
        ax.add_patch(plt.Circle((x, y), r, alpha=0.1, facecolor=color, edgecolor=color, linewidth=2))
        # Etiqueta de región
        ax.text(x, y + r + 0.02, region_name, ha='center', va='bottom', fontsize=11, fontweight='bold')

    # NIVEL 2: PAÍSES
    elif circle.level == 2:
        country_name = circle.ex.get('id')
        
        # Buscamos la región en nuestro diccionario externo para sacar el color
        parent_region = country_to_region.get(country_name)
        color = region_colors.get(parent_region, (0.5, 0.5, 0.5, 1.0))

        # Dibujar país
        ax.add_patch(plt.Circle((x, y), r, alpha=0.8, facecolor=color, edgecolor='white', linewidth=0.5))

        # Color de texto por luminancia
        r_c, g_c, b_c = float(color[0]), float(color[1]), float(color[2])
        luminance = (0.299 * r_c + 0.587 * g_c + 0.114 * b_c)
        text_col = 'white' if luminance < 0.5 else 'black'

        if r > 0.02:
            # Mostramos nombre y una versión simplificada del PIB (opcional)
            ax.text(x, y, country_name, ha='center', va='center', fontsize=7, color=text_col, wrap=True)

plt.title('Top 5 Países por PIB en cada Región', fontsize=16, fontweight='bold', pad=40)
plt.show()

import pandas as pd
import plotly.express as px

# --- Carregar dades ---
file_path = r"C:\Users\alber\OneDrive\Documentos\Indicadores Mundiales_Datos migrados.csv"

df = pd.read_csv(
    file_path,
    sep=';',
    encoding='utf-8-sig'
)

# --- Crear esperança de vida mitjana ---
df['Esperanza_vida'] = (
    df['Esperanza de vida (hombres)'] +
    df['Esperanza de vida (mujeres)']
) / 2

# --- Quedar-nos amb l’any més recent per cada país ---
df_sorted = df.sort_values('Año', ascending=False)

df_latest = df_sorted.drop_duplicates(subset='País/Región', keep='first')

# --- Seleccionar columnes clau ---
df_clean = df_latest[
    ['Región', 'País/Región', 'Población total', 'Esperanza_vida']
].dropna()

# --- Sunburst ---
fig = px.sunburst(
    df_clean,
    path=['Región', 'País/Región'],
    values='Población total',
    color='Esperanza_vida',
    title='Població i Esperança de Vida per Regió i País (últim any disponible)'
)

fig.update_layout(margin=dict(t=50, l=0, r=0, b=0))

fig.show()

import pandas as pd
import plotly.express as px

# --- Carregar dades ---
file_path = r"C:\Users\alber\OneDrive\Documentos\Indicadores Mundiales_Datos migrados.csv"

df = pd.read_csv(
    file_path,
    sep=';',
    encoding='utf-8-sig'
)

# --- Convertir any a format net ---
df['Año'] = pd.to_datetime(df['Año']).dt.year

# --- Esperança de vida mitjana ---
df['Esperanza_vida'] = (
    df['Esperanza de vida (hombres)'] +
    df['Esperanza de vida (mujeres)']
) / 2

# --- Filtrar Europa ---
df_eu = df[df['Región'] == 'Europa'].copy()

# --- Últim any disponible ---
latest_year = df_eu['Año'].max()

# --- TOP 8 en l'últim any ---
top8 = (
    df_eu[df_eu['Año'] == latest_year]
    .sort_values('Esperanza_vida', ascending=False)
    .head(8)['País/Región']
)

# --- Filtrar només aquests 8 països ---
df_top = df_eu[df_eu['País/Región'].isin(top8)].copy()

# --- Eliminar possibles NaNs ---
df_top = df_top.dropna(subset=['Esperanza_vida'])

# --- RE-RANK dins aquest grup (1–8 cada any) ---
df_top['rank'] = df_top.groupby('Año')['Esperanza_vida'] \
                      .rank(method='first', ascending=False)

# --- Bump chart ---
fig = px.line(
    df_top,
    x='Año',
    y='rank',
    color='País/Región',
    markers=True
)

# --- Invertir eix Y (1 a dalt) ---
fig.update_yaxes(autorange="reversed")

# --- Treure llegenda ---
fig.update_layout(showlegend=False)

# --- Etiquetes finals (sense errors) ---
for pais in df_top['País/Región'].unique():
    df_pais = df_top[df_top['País/Región'] == pais]
    last_point = df_pais[df_pais['Año'] == latest_year]

    if not last_point.empty:
        fig.add_annotation(
            x=latest_year,
            y=last_point['rank'].values[0],
            text=pais,
            showarrow=False,
            xanchor='left'
        )

# --- Layout final ---
fig.update_layout(
    title='Evolució del ranking d\'esperança de vida (Top 8 Europa - cohort fixa)',
    margin=dict(l=40, r=200, t=60, b=40)
)

fig.show()