pub-quiz/src/app.py

from flask import Flask, render_template
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.utils
import statsmodels.api as sm
import numpy as np
import datetime
import json
from stats import generate_stats
from player_table import generate_player_table
import constants

app = Flask(__name__)

# ---------------------------------------------------------------------------
# Data loading
# ---------------------------------------------------------------------------

def get_data_frame(filename):
    df = pd.read_csv(filename)
    df["Date"] = pd.to_datetime(df["Date"], dayfirst=True)
    df = df.sort_values("Date").reset_index(drop=True)
    return df


def build_hovertext(df, attendance_columns):
    present = [c for c in attendance_columns if c in df.columns]
    return df[present].apply(
        lambda row: ", ".join(p for p in present if row[p] == 1) or "No attendance",
        axis=1,
    )


# ---------------------------------------------------------------------------
# Charts
# ---------------------------------------------------------------------------

def generate_position_trend(df):
    """
    Line chart of relative position percentile over time (lower is better).
    Overlays a 5-game rolling average and an extended OLS trendline
    projected to the top-8th-percentile target.
    """
    df = df.copy()
    df["Date_ordinal"] = df["Date"].map(pd.Timestamp.toordinal)
    df["Relative Percentile"] = df["Relative Position"] * 100
    df["Rolling Avg (5)"] = df["Relative Percentile"].rolling(5, min_periods=1).mean()
    df["Attendees"] = build_hovertext(df, constants.PLAYER_NAME_COLUMNS)

    X = sm.add_constant(df["Date_ordinal"])
    model = sm.OLS(df["Relative Percentile"], X).fit()
    intercept = model.params["const"]
    slope = model.params["Date_ordinal"]

    target_percentile = 8.0
    min_ord = df["Date_ordinal"].min()
    max_ord = df["Date_ordinal"].max()

    predicted_ordinal = None
    if slope < 0:
        predicted_ordinal = (target_percentile - intercept) / slope

    end_ord = max(max_ord, predicted_ordinal) if predicted_ordinal and predicted_ordinal > max_ord else max_ord

    extended_ords = np.linspace(min_ord, end_ord, 200)
    extended_percentile = intercept + slope * extended_ords
    extended_dates = [datetime.date.fromordinal(int(x)) for x in extended_ords]

    fig = go.Figure()

    fig.add_scatter(
        x=df["Date"],
        y=df["Relative Percentile"],
        mode="lines+markers",
        name="Result",
        line=dict(color="#1e3a8a", width=1.5),
        marker=dict(size=6, color="#1e3a8a"),
        customdata=df["Attendees"],
        hovertemplate="<b>%{x|%d %b %Y}</b><br>Relative percentile: %{y:.0f}th<br>Squad: %{customdata}<extra></extra>",
    )

    fig.add_scatter(
        x=df["Date"],
        y=df["Rolling Avg (5)"],
        mode="lines",
        name="5-Game Avg",
        line=dict(color="#f59e0b", width=2.5),
        hovertemplate="<b>%{x|%d %b %Y}</b><br>5-Game Avg: %{y:.0f}%<extra></extra>",
    )

    fig.add_scatter(
        x=extended_dates,
        y=extended_percentile,
        mode="lines",
        name="Trend",
        line=dict(dash="dot", color="#dc2626", width=1.5),
        hoverinfo="skip",
    )

    if predicted_ordinal and predicted_ordinal > max_ord:
        target_date = datetime.date.fromordinal(int(predicted_ordinal))
        fig.add_annotation(
            x=target_date,
            y=target_percentile,
            text=f"8th percentile target: {target_date.strftime('%b %Y')}",
            showarrow=True,
            arrowhead=2,
            font=dict(size=11, color="#dc2626"),
        )

    fig.add_hline(
        y=50,
        line_dash="dot",
        line_color="#9ca3af",
        annotation_text="50th percentile",
        annotation_position="bottom right",
    )

    fig.update_layout(
        title="Relative Position Over Time",
        xaxis_title="Date",
        yaxis=dict(title="Relative Position Percentile (lower is better)", range=[0, 100], ticksuffix="th"),
        template="plotly_white",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        hovermode="x unified",
        margin=dict(t=60, b=40),
    )
    return fig


def generate_player_impact(df):
    """
    Horizontal bar chart: average relative position percentile when each player attends.
    Only shows players with >= 3 appearances.
    Green bar = lower than overall average (better); red = higher (worse).
    """
    MIN_APPEARANCES = 3
    overall_percentile = df["Relative Position"].mean() * 100

    rows = []
    for name in constants.PLAYER_NAME_COLUMNS:
        if name not in df.columns:
            continue
        attended = df[df[name] == 1]
        n = len(attended)
        if n >= MIN_APPEARANCES:
            percentile = attended["Relative Position"].mean() * 100
            rows.append({"Player": name, "Relative Percentile": round(percentile, 1), "Games": n})

    if not rows:
        return go.Figure()

    impact_df = pd.DataFrame(rows).sort_values("Relative Percentile", ascending=True)
    colors = [
        "#16a34a" if p <= overall_percentile else "#dc2626"
        for p in impact_df["Relative Percentile"]
    ]

    fig = go.Figure(
        go.Bar(
            x=impact_df["Relative Percentile"],
            y=impact_df["Player"],
            orientation="h",
            marker_color=colors,
            text=[
                f"{constants.ordinal(round(p))}  ({g} games)"
                for p, g in zip(impact_df["Relative Percentile"], impact_df["Games"])
            ],
            textposition="outside",
            hovertemplate="<b>%{y}</b><br>Avg relative percentile: %{x:.1f}th<extra></extra>",
        )
    )

    fig.add_vline(
        x=overall_percentile,
        line_dash="dot",
        line_color="#6b7280",
        annotation_text=f"Overall avg ({constants.ordinal(round(overall_percentile))})",
        annotation_position="top right",
    )

    fig.update_layout(
        title="Who Helps Most - Avg. Relative Position When Attending",
        xaxis=dict(
            title="Avg. Relative Position Percentile (lower is better)",
            range=[0, 100],
            ticksuffix="th",
        ),
        yaxis=dict(title="", autorange="reversed"),
        template="plotly_white",
        showlegend=False,
        height=max(300, len(rows) * 52),
        margin=dict(t=60, b=40, r=20),
    )
    return fig


def generate_scattergories_chart(df):
    """
    Scatter of Scattergories points vs relative position percentile with OLS trendline.
    Negative slope = scoring more in Scattergories correlates with better finish.
    """
    df = df.copy()
    df["Relative Percentile"] = df["Relative Position"] * 100

    fig = px.scatter(
        df,
        x="Points on Scattergories",
        y="Relative Percentile",
        trendline="ols",
        title="Scattergories vs Relative Position",
        labels={
            "Points on Scattergories": "Scattergories Points",
            "Relative Percentile": "Relative Position Percentile (lower is better)",
        },
        hover_data={"Relative Percentile": ":.1f"},
    )
    fig.update_traces(
        marker=dict(color="#1e3a8a", size=9, opacity=0.8),
        selector=dict(mode="markers"),
    )
    fig.update_traces(
        line=dict(color="#dc2626", dash="dot", width=2),
        selector=dict(type="scatter", mode="lines"),
    )
    fig.update_layout(
        template="plotly_white",
        yaxis=dict(ticksuffix="th", range=[0, 100]),
        xaxis=dict(dtick=1),
        margin=dict(t=60, b=40),
    )
    return fig


def generate_player_participation(df):
    """Heatmap of which player attended which game."""
    player_cols = [c for c in constants.PLAYER_NAME_COLUMNS if c in df.columns]
    df_players = df[player_cols]
    fig = px.imshow(
        df_players.T,
        labels=dict(x="Game", y="Player", color="Attended"),
        title="Player Attendance by Game",
        color_continuous_scale=constants.ATTENDANCE_COLORSCHEME,
        zmin=0,
        zmax=1,
        aspect="auto",
    )
    fig.update_coloraxes(
        colorbar=dict(
            tickvals=[0, 1],
            ticktext=["Absent", "Present"],
            lenmode="pixels",
            len=200,
        )
    )
    fig.update_layout(
        template="plotly_white",
        height=max(300, len(player_cols) * 40 + 100),
        yaxis=dict(
            tickmode="array",
            tickvals=list(range(len(player_cols))),
            ticktext=player_cols,
        ),
        margin=dict(t=60, b=40),
    )
    return fig


def generate_weekly_attendance_calendar(df):
    """Compact weekly attendance heatmap (13-column grid blocks per year)."""
    df = df.copy()
    df["Year"] = df["Date"].dt.isocalendar().year
    df["Week"] = df["Date"].dt.isocalendar().week

    attendee_columns = [
        col
        for col in df.columns
        if col
        not in {
            "Date",
            "Relative Position",
            "Number of Players",
            "Number of Teams",
            "Attendees",
            "Year",
            "Week",
            "Year-Week",
            "Absolute Position",
            "Points on Scattergories",
        }
    ]

    df["Attended"] = (df[attendee_columns].sum(axis=1) > 0).astype(int)
    weekly = df.groupby(["Year", "Week"])["Attended"].max().reset_index()

    # Build a compact matrix: 13 columns, 4 (or 5 for week 53) rows per year.
    all_years = sorted(df["Year"].unique())
    max_week = int(df["Week"].max())
    rows_per_year = 5 if max_week == 53 else 4

    grid_rows = []
    for year in all_years:
        for block in range(1, rows_per_year + 1):
            for col in range(1, 14):
                week = (block - 1) * 13 + col
                if week > 53:
                    continue
                grid_rows.append(
                    {
                        "Year": year,
                        "Block": block,
                        "Col": col,
                        "Week": week,
                    }
                )

    calendar = pd.DataFrame(grid_rows).merge(
        weekly,
        on=["Year", "Week"],
        how="left",
    )
    calendar["Attended"] = calendar["Attended"].fillna(0).astype(int)

    y_labels = []
    for year in all_years:
        for block in range(1, rows_per_year + 1):
            start = (block - 1) * 13 + 1
            end = min(block * 13, 53)
            y_labels.append(f"{year} · W{start}-{end}")

    calendar["RowLabel"] = calendar.apply(
        lambda r: f"{int(r['Year'])} · W{(int(r['Block']) - 1) * 13 + 1}-{min(int(r['Block']) * 13, 53)}",
        axis=1,
    )

    z_matrix = []
    text_matrix = []
    for label in y_labels:
        row = calendar[calendar["RowLabel"] == label].sort_values("Col")
        z_matrix.append(row["Attended"].tolist())
        text_matrix.append([f"ISO week {int(w)}" for w in row["Week"]])

    fig = go.Figure(
        data=go.Heatmap(
            x=list(range(1, 14)),
            y=y_labels,
            z=z_matrix,
            text=text_matrix,
            hovertemplate="%{y}<br>Column %{x}<br>%{text}<br>Attended: %{z}<extra></extra>",
            colorscale=constants.ATTENDANCE_COLORSCHEME,
            zmin=0,
            zmax=1,
            showscale=False,
            xgap=3,
            ygap=3,
        )
    )

    fig.update_layout(
        title="Weekly Attendance Calendar",
        xaxis=dict(title="Week Column (1-13)", tickmode="linear", dtick=1),
        yaxis_title="Year / Week Range",
        template="plotly_white",
        height=180 + len(y_labels) * 34,
        margin=dict(t=60, b=40),
    )
    return fig


# ---------------------------------------------------------------------------
# Visualisation bundle
# ---------------------------------------------------------------------------

def generate_visualisations(df):
    enc = plotly.utils.PlotlyJSONEncoder
    return {
        "position_trend": json.dumps(generate_position_trend(df), cls=enc),
        "player_impact": json.dumps(generate_player_impact(df), cls=enc),
        "scattergories_vs_position": json.dumps(generate_scattergories_chart(df), cls=enc),
        "player_participation": json.dumps(generate_player_participation(df), cls=enc),
        "calendar": json.dumps(generate_weekly_attendance_calendar(df), cls=enc),
    }


# ---------------------------------------------------------------------------
# Routes
# ---------------------------------------------------------------------------

@app.route("/")
def index():
    df = get_data_frame("data.csv")
    stats, highlights = generate_stats(df)
    player_table = generate_player_table(df)
    plots = generate_visualisations(df)
    return render_template(
        "index.html",
        plots=plots,
        stats=stats,
        highlights=highlights,
        player_table=player_table,
    )


if __name__ == "__main__":

    app.run(debug=True)