enhanced

2026-03-19 21:43:34 +00:00
parent f0a6dd85a6
commit 80a66bc44c
7 changed files with 588 additions and 317 deletions
--- a/src/app.py
+++ b/src/app.py
@@ -2,10 +2,10 @@ from flask import Flask, render_template
 import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
+import plotly.utils
 import statsmodels.api as sm
 import numpy as np
 import datetime
-from sklearn.linear_model import LinearRegression
 import json
 from stats import generate_stats
 from player_table import generate_player_table
@@ -13,6 +13,9 @@ import constants

 app = Flask(__name__)

+# ---------------------------------------------------------------------------
+# Data loading
+# ---------------------------------------------------------------------------

 def get_data_frame(filename):
    df = pd.read_csv(filename)
@@ -22,236 +25,383 @@ def get_data_frame(filename):


 def build_hovertext(df, attendance_columns):
-    return df[attendance_columns].apply(
-        lambda row: ", ".join(
-            [
-                player
-                for player in attendance_columns
-                if row[player] == 1
-            ]
-        ) or "No attendance",
-        axis=1
+    present = [c for c in attendance_columns if c in df.columns]
+    return df[present].apply(
+        lambda row: ", ".join(p for p in present if row[p] == 1) or "No attendance",
+        axis=1,
    )


-def generate_weekly_attendance_calendar(df):
-    # Compute ISO year/week and attendance
-    df["Year"] = df["Date"].dt.isocalendar().year
-    df["Week"] = df["Date"].dt.isocalendar().week
+# ---------------------------------------------------------------------------
+# Charts
+# ---------------------------------------------------------------------------

-    attendee_columns = [
-        col for col in df.columns if col not in {
-            "Date", "Relative Position", "Number of Players",
-            "Number of Teams", "Attendees", "Year", "Week", "Year-Week"
-        }
-    ]
-
-    df["Attended"] = df[attendee_columns].sum(axis=1) > 0
-    weekly_attendance = df.groupby(["Year", "Week"])[
-        "Attended"].any().astype(int).reset_index()
-
-    # Build full year/week grid
-    all_years = sorted(df["Year"].unique())
-    all_weeks = list(range(1, 53))
-
-    grid = []
-    for year in all_years:
-        for week in all_weeks:
-            grid.append({"Year": year, "Week": week})
-
-    calendar = pd.DataFrame(grid)
-    calendar = calendar.merge(weekly_attendance, on=[
-                              "Year", "Week"], how="left").fillna(0)
-    calendar["Attended"] = calendar["Attended"].astype(int)
-
-    # Plot
-    fig = go.Figure(data=go.Heatmap(
-        x=calendar["Week"],
-        y=calendar["Year"],
-        z=calendar["Attended"],
-        colorscale=constants.ATTENDANCE_COLORSCHEME,
-        zmin=0,
-        zmax=1,
-        showscale=False
-    ))
-
-    fig.update_layout(
-        title="Pub Quiz Attendance Calendar (Weekly)",
-        xaxis_title="Week Number",
-        yaxis_title="Year",
-        xaxis=dict(tickmode="linear", dtick=4),
-        template="plotly_white",
-        height=180 + len(all_years) * 40
-    )
-
-    return fig
-
-
-def generate_relative_position_over_time(df):
+def generate_position_trend(df):
+    """
+    Line chart of relative position percentile over time (lower is better).
+    Overlays a 5-game rolling average and an extended OLS trendline
+    projected to the top-8th-percentile target.
+    """
+    df = df.copy()
    df["Date_ordinal"] = df["Date"].map(pd.Timestamp.toordinal)
+    df["Relative Percentile"] = df["Relative Position"] * 100
+    df["Rolling Avg (5)"] = df["Relative Percentile"].rolling(5, min_periods=1).mean()
+    df["Attendees"] = build_hovertext(df, constants.PLAYER_NAME_COLUMNS)

    X = sm.add_constant(df["Date_ordinal"])
-    y = df["Relative Position"]
-
-    model = sm.OLS(y, X).fit()
-    df["BestFit"] = model.predict(X)
-
+    model = sm.OLS(df["Relative Percentile"], X).fit()
    intercept = model.params["const"]
    slope = model.params["Date_ordinal"]

-    target_value = 0.08
+    target_percentile = 8.0
+    min_ord = df["Date_ordinal"].min()
+    max_ord = df["Date_ordinal"].max()

-    predicted_ordinal = (target_value - intercept) / slope
+    predicted_ordinal = None
+    if slope < 0:
+        predicted_ordinal = (target_percentile - intercept) / slope

-    min_ordinal = df["Date_ordinal"].min()
-    max_ordinal = df["Date_ordinal"].max()
+    end_ord = max(max_ord, predicted_ordinal) if predicted_ordinal and predicted_ordinal > max_ord else max_ord

-    if predicted_ordinal > max_ordinal:
-        extended_ordinals = np.linspace(min_ordinal, predicted_ordinal, 100)
-    else:
-        extended_ordinals = np.linspace(min_ordinal, max_ordinal, 100)
+    extended_ords = np.linspace(min_ord, end_ord, 200)
+    extended_percentile = intercept + slope * extended_ords
+    extended_dates = [datetime.date.fromordinal(int(x)) for x in extended_ords]

-    extended_bestfit = intercept + slope * extended_ordinals
+    fig = go.Figure()

-    extended_dates = [datetime.date.fromordinal(
-        int(x)) for x in extended_ordinals]
+    fig.add_scatter(
+        x=df["Date"],
+        y=df["Relative Percentile"],
+        mode="lines+markers",
+        name="Result",
+        line=dict(color="#1e3a8a", width=1.5),
+        marker=dict(size=6, color="#1e3a8a"),
+        customdata=df["Attendees"],
+        hovertemplate="<b>%{x|%d %b %Y}</b><br>Relative percentile: %{y:.0f}th<br>Squad: %{customdata}<extra></extra>",
+    )

-    df["Attendees"] = build_hovertext(df, constants.PLAYER_NAME_COLUMNS)
-
-    fig = px.line(
-        df,
-        x="Date",
-        y="Relative Position",
-        title="Quiz Position Over Time with Extended Trendline",
-        hover_data={"Attendees": True}
+    fig.add_scatter(
+        x=df["Date"],
+        y=df["Rolling Avg (5)"],
+        mode="lines",
+        name="5-Game Avg",
+        line=dict(color="#f59e0b", width=2.5),
+        hovertemplate="<b>%{x|%d %b %Y}</b><br>5-Game Avg: %{y:.0f}%<extra></extra>",
    )

    fig.add_scatter(
        x=extended_dates,
-        y=extended_bestfit,
+        y=extended_percentile,
        mode="lines",
-        name="Extended Trendline",
-        line=dict(dash="dot", color="red")
+        name="Trend",
+        line=dict(dash="dot", color="#dc2626", width=1.5),
+        hoverinfo="skip",
    )

-    fig.update_yaxes(range=[0, 1], tickformat=".2f")
+    if predicted_ordinal and predicted_ordinal > max_ord:
+        target_date = datetime.date.fromordinal(int(predicted_ordinal))
+        fig.add_annotation(
+            x=target_date,
+            y=target_percentile,
+            text=f"8th percentile target: {target_date.strftime('%b %Y')}",
+            showarrow=True,
+            arrowhead=2,
+            font=dict(size=11, color="#dc2626"),
+        )
+
+    fig.add_hline(
+        y=50,
+        line_dash="dot",
+        line_color="#9ca3af",
+        annotation_text="50th percentile",
+        annotation_position="bottom right",
+    )
+
+    fig.update_layout(
+        title="Relative Position Over Time",
+        xaxis_title="Date",
+        yaxis=dict(title="Relative Position Percentile (lower is better)", range=[0, 100], ticksuffix="th"),
+        template="plotly_white",
+        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
+        hovermode="x unified",
+        margin=dict(t=60, b=40),
+    )
    return fig


-def generate_visualisations(df):
-    feature_columns = [
-        col
-        for col in df.columns
-        if col in constants.FEATURE_COLUMNS
+def generate_player_impact(df):
+    """
+    Horizontal bar chart: average relative position percentile when each player attends.
+    Only shows players with >= 3 appearances.
+    Green bar = lower than overall average (better); red = higher (worse).
+    """
+    MIN_APPEARANCES = 3
+    overall_percentile = df["Relative Position"].mean() * 100
+
+    rows = []
+    for name in constants.PLAYER_NAME_COLUMNS:
+        if name not in df.columns:
+            continue
+        attended = df[df[name] == 1]
+        n = len(attended)
+        if n >= MIN_APPEARANCES:
+            percentile = attended["Relative Position"].mean() * 100
+            rows.append({"Player": name, "Relative Percentile": round(percentile, 1), "Games": n})
+
+    if not rows:
+        return go.Figure()
+
+    impact_df = pd.DataFrame(rows).sort_values("Relative Percentile", ascending=True)
+    colors = [
+        "#16a34a" if p <= overall_percentile else "#dc2626"
+        for p in impact_df["Relative Percentile"]
    ]
-    x = df[feature_columns]
-    y = df["Relative Position"]

-    model = LinearRegression()
-    model.fit(x, y)
-
-    plots = {}
-
-    plots["relative_pos_over_time"] = json.dumps(
-        generate_relative_position_over_time(df),
-        cls=plotly.utils.PlotlyJSONEncoder
+    fig = go.Figure(
+        go.Bar(
+            x=impact_df["Relative Percentile"],
+            y=impact_df["Player"],
+            orientation="h",
+            marker_color=colors,
+            text=[
+                f"{constants.ordinal(round(p))}  ({g} games)"
+                for p, g in zip(impact_df["Relative Percentile"], impact_df["Games"])
+            ],
+            textposition="outside",
+            hovertemplate="<b>%{y}</b><br>Avg relative percentile: %{x:.1f}th<extra></extra>",
+        )
    )

-    df_line = df.melt(
-        id_vars="Date",
-        value_vars=["Absolute Position", "Number of Teams"],
-        var_name="Metric",
-        value_name="Value"
-    )
-    fig11 = px.line(
-        df_line,
-        x='Date',
-        y='Value',
-        color='Metric',
-        title='Absolute Position and Total Number of Teams Over Time'
-    )
-    plots["absolute_pos_over_time"] = json.dumps(
-        fig11, cls=plotly.utils.PlotlyJSONEncoder
+    fig.add_vline(
+        x=overall_percentile,
+        line_dash="dot",
+        line_color="#6b7280",
+        annotation_text=f"Overall avg ({constants.ordinal(round(overall_percentile))})",
+        annotation_position="top right",
    )

-# 2. Number of players vs position with regression line
-    fig2 = px.scatter(
+    fig.update_layout(
+        title="Who Helps Most - Avg. Relative Position When Attending",
+        xaxis=dict(
+            title="Avg. Relative Position Percentile (lower is better)",
+            range=[0, 100],
+            ticksuffix="th",
+        ),
+        yaxis=dict(title="", autorange="reversed"),
+        template="plotly_white",
+        showlegend=False,
+        height=max(300, len(rows) * 52),
+        margin=dict(t=60, b=40, r=20),
+    )
+    return fig
+
+
+def generate_scattergories_chart(df):
+    """
+    Scatter of Scattergories points vs relative position percentile with OLS trendline.
+    Negative slope = scoring more in Scattergories correlates with better finish.
+    """
+    df = df.copy()
+    df["Relative Percentile"] = df["Relative Position"] * 100
+
+    fig = px.scatter(
        df,
-        x="Number of Players",
-        y="Relative Position",
+        x="Points on Scattergories",
+        y="Relative Percentile",
        trendline="ols",
-        title="Players vs Position (%)",
+        title="Scattergories vs Relative Position",
+        labels={
+            "Points on Scattergories": "Scattergories Points",
+            "Relative Percentile": "Relative Position Percentile (lower is better)",
+        },
+        hover_data={"Relative Percentile": ":.1f"},
    )
-    fig2.update_xaxes(dtick=1)
-    plots["players_vs_position"] = json.dumps(
-        fig2, cls=plotly.utils.PlotlyJSONEncoder)
+    fig.update_traces(
+        marker=dict(color="#1e3a8a", size=9, opacity=0.8),
+        selector=dict(mode="markers"),
+    )
+    fig.update_traces(
+        line=dict(color="#dc2626", dash="dot", width=2),
+        selector=dict(type="scatter", mode="lines"),
+    )
+    fig.update_layout(
+        template="plotly_white",
+        yaxis=dict(ticksuffix="th", range=[0, 100]),
+        xaxis=dict(dtick=1),
+        margin=dict(t=60, b=40),
+    )
+    return fig

-# 3. Player participation heatmap
-    df_players = df[constants.PLAYER_NAME_COLUMNS]
-    fig3 = px.imshow(
+
+def generate_player_participation(df):
+    """Heatmap of which player attended which game."""
+    player_cols = [c for c in constants.PLAYER_NAME_COLUMNS if c in df.columns]
+    df_players = df[player_cols]
+    fig = px.imshow(
        df_players.T,
-        labels=dict(x="Games", y="Player", color="Present"),
-        title="Player Participation Heatmap",
+        labels=dict(x="Game", y="Player", color="Attended"),
+        title="Player Attendance by Game",
        color_continuous_scale=constants.ATTENDANCE_COLORSCHEME,
        zmin=0,
        zmax=1,
-        aspect="auto"
+        aspect="auto",
    )
-    fig3.update_coloraxes(
+    fig.update_coloraxes(
        colorbar=dict(
            tickvals=[0, 1],
            ticktext=["Absent", "Present"],
            lenmode="pixels",
-            len=300,
+            len=200,
        )
    )
-    fig3.update_layout(
-        template="seaborn",
-        height=600,
+    fig.update_layout(
+        template="plotly_white",
+        height=max(300, len(player_cols) * 40 + 100),
        yaxis=dict(
            tickmode="array",
-            tickvals=list(range(len(df_players.columns))),
-            ticktext=df_players.columns
+            tickvals=list(range(len(player_cols))),
+            ticktext=player_cols,
+        ),
+        margin=dict(t=60, b=40),
+    )
+    return fig
+
+
+def generate_weekly_attendance_calendar(df):
+    """Compact weekly attendance heatmap (13-column grid blocks per year)."""
+    df = df.copy()
+    df["Year"] = df["Date"].dt.isocalendar().year
+    df["Week"] = df["Date"].dt.isocalendar().week
+
+    attendee_columns = [
+        col
+        for col in df.columns
+        if col
+        not in {
+            "Date",
+            "Relative Position",
+            "Number of Players",
+            "Number of Teams",
+            "Attendees",
+            "Year",
+            "Week",
+            "Year-Week",
+            "Absolute Position",
+            "Points on Scattergories",
+        }
+    ]
+
+    df["Attended"] = (df[attendee_columns].sum(axis=1) > 0).astype(int)
+    weekly = df.groupby(["Year", "Week"])["Attended"].max().reset_index()
+
+    # Build a compact matrix: 13 columns, 4 (or 5 for week 53) rows per year.
+    all_years = sorted(df["Year"].unique())
+    max_week = int(df["Week"].max())
+    rows_per_year = 5 if max_week == 53 else 4
+
+    grid_rows = []
+    for year in all_years:
+        for block in range(1, rows_per_year + 1):
+            for col in range(1, 14):
+                week = (block - 1) * 13 + col
+                if week > 53:
+                    continue
+                grid_rows.append(
+                    {
+                        "Year": year,
+                        "Block": block,
+                        "Col": col,
+                        "Week": week,
+                    }
+                )
+
+    calendar = pd.DataFrame(grid_rows).merge(
+        weekly,
+        on=["Year", "Week"],
+        how="left",
+    )
+    calendar["Attended"] = calendar["Attended"].fillna(0).astype(int)
+
+    y_labels = []
+    for year in all_years:
+        for block in range(1, rows_per_year + 1):
+            start = (block - 1) * 13 + 1
+            end = min(block * 13, 53)
+            y_labels.append(f"{year} · W{start}-{end}")
+
+    calendar["RowLabel"] = calendar.apply(
+        lambda r: f"{int(r['Year'])} · W{(int(r['Block']) - 1) * 13 + 1}-{min(int(r['Block']) * 13, 53)}",
+        axis=1,
+    )
+
+    z_matrix = []
+    text_matrix = []
+    for label in y_labels:
+        row = calendar[calendar["RowLabel"] == label].sort_values("Col")
+        z_matrix.append(row["Attended"].tolist())
+        text_matrix.append([f"ISO week {int(w)}" for w in row["Week"]])
+
+    fig = go.Figure(
+        data=go.Heatmap(
+            x=list(range(1, 14)),
+            y=y_labels,
+            z=z_matrix,
+            text=text_matrix,
+            hovertemplate="%{y}<br>Column %{x}<br>%{text}<br>Attended: %{z}<extra></extra>",
+            colorscale=constants.ATTENDANCE_COLORSCHEME,
+            zmin=0,
+            zmax=1,
+            showscale=False,
+            xgap=3,
+            ygap=3,
        )
    )
-    plots["player_participation"] = json.dumps(
-        fig3, cls=plotly.utils.PlotlyJSONEncoder)

-# 4. Calendar view
-    plots["calendar"] = json.dumps(
-        generate_weekly_attendance_calendar(df),
-        cls=plotly.utils.PlotlyJSONEncoder
+    fig.update_layout(
+        title="Weekly Attendance Calendar",
+        xaxis=dict(title="Week Column (1-13)", tickmode="linear", dtick=1),
+        yaxis_title="Year / Week Range",
+        template="plotly_white",
+        height=180 + len(y_labels) * 34,
+        margin=dict(t=60, b=40),
    )
+    return fig

-# 5. Coefficient bar chart
-    coefficients = pd.Series(model.coef_, index=x.columns).sort_values()
-    fig5 = px.bar(
-        coefficients,
-        orientation="h",
-        labels={"value": "Coefficient", "index": "Feature"},
-        title="Linear Regression Coefficients",
-    )
-    plots["coefficients"] = json.dumps(
-        fig5, cls=plotly.utils.PlotlyJSONEncoder)

-    return plots
+# ---------------------------------------------------------------------------
+# Visualisation bundle
+# ---------------------------------------------------------------------------

+def generate_visualisations(df):
+    enc = plotly.utils.PlotlyJSONEncoder
+    return {
+        "position_trend": json.dumps(generate_position_trend(df), cls=enc),
+        "player_impact": json.dumps(generate_player_impact(df), cls=enc),
+        "scattergories_vs_position": json.dumps(generate_scattergories_chart(df), cls=enc),
+        "player_participation": json.dumps(generate_player_participation(df), cls=enc),
+        "calendar": json.dumps(generate_weekly_attendance_calendar(df), cls=enc),
+    }
+
+
+# ---------------------------------------------------------------------------
+# Routes
+# ---------------------------------------------------------------------------

@app.route("/")
 def index():
    df = get_data_frame("data.csv")
-    stats = generate_stats(df)
+    stats, highlights = generate_stats(df)
    player_table = generate_player_table(df)
    plots = generate_visualisations(df)
    return render_template(
        "index.html",
        plots=plots,
        stats=stats,
-        player_table=player_table
+        highlights=highlights,
+        player_table=player_table,
    )


 if __name__ == "__main__":
-    import plotly

    app.run(debug=True)