karstenskyt commited on
Commit
c485f29
·
verified ·
1 Parent(s): c724d07

Deploy Taipy app via scripts/manage_space.py

Browse files
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ LAKEBASE_HOST=ep-spring-rain-d2i6lozx.database.us-east-1.cloud.databricks.com
2
+ LAKEBASE_ENDPOINT_NAME=projects/soccer-analytics-dev/branches/production/endpoints/primary
dist/luxury_lakehouse-0.3.0-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3058358a8b38dc4ecd1fe414617ba9b06ee4e3e96c93ed0e3bdc94fd3b19f6c0
3
- size 380959
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59695805377583cb29800da3688cff9b709baa8dae6a135f6e847b22f67aecf2
3
+ size 381689
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  # This file was autogenerated by uv via the following command:
2
- # uv pip compile pyproject.toml --extra taipy-app --python-version 3.10 --python-platform linux -o D:\Development\karstenskyt__luxury-lakehouse-d32\hf_taipy_app\requirements.txt
3
  aniso8601==10.0.1
4
  # via flask-restful
5
  annotated-doc==0.0.4
@@ -266,7 +266,7 @@ requests==2.33.1
266
  # requests-cache
267
  requests-cache==1.3.1
268
  # via luxury-lakehouse (pyproject.toml)
269
- rich==14.3.3
270
  # via
271
  # cookiecutter
272
  # typer
 
1
  # This file was autogenerated by uv via the following command:
2
+ # uv pip compile pyproject.toml --extra taipy-app --python-version 3.10 --python-platform linux -o D:\Development\karstenskyt__luxury-lakehouse\hf_taipy_app\requirements.txt
3
  aniso8601==10.0.1
4
  # via flask-restful
5
  annotated-doc==0.0.4
 
266
  # requests-cache
267
  requests-cache==1.3.1
268
  # via luxury-lakehouse (pyproject.toml)
269
+ rich==14.3.4
270
  # via
271
  # cookiecutter
272
  # typer
src/pages/workflows.py CHANGED
@@ -57,6 +57,15 @@ page_config = PageConfig(
57
  "wf_run_volume_detail",
58
  help_text="Total pipeline runs in the last 30 days. Detail shows daily rate and average cost per run.",
59
  ),
 
 
 
 
 
 
 
 
 
60
  ],
61
  content=[
62
  ContentRow(
 
57
  "wf_run_volume_detail",
58
  help_text="Total pipeline runs in the last 30 days. Detail shows daily rate and average cost per run.",
59
  ),
60
+ StatCard(
61
+ "Avg Cold Start",
62
+ "wf_avg_cold_start",
63
+ "wf_cold_start_detail",
64
+ help_text=(
65
+ "Average serverless environment setup time (spin-up, wheel install, imports) "
66
+ "before pipeline work begins. Lower is better. Range shows fastest to slowest workflow."
67
+ ),
68
+ ),
69
  ],
70
  content=[
71
  ContentRow(
src/queries/workflows.py CHANGED
@@ -1,8 +1,8 @@
1
  """Workflow-related queries — extracted from state/workflows.py.
2
 
3
- Only the 2 SQL queries (cold costs + warm costs). Non-SQL data fetching
4
- (Jobs API, HF Hub) remains in state/workflows.py as those are API calls,
5
- not database queries.
6
  """
7
 
8
  from __future__ import annotations
@@ -17,20 +17,39 @@ from queries.common import execute_query, t, ttl_cache
17
  logger = logging.getLogger(__name__)
18
 
19
 
20
- @ttl_cache(ttl=3600)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def fetch_cold_costs() -> pd.DataFrame:
22
  """30-day aggregated costs from fct_workflow_costs_synced (cold tier).
23
 
24
- Expected columns: workflow_id, task_key, total_cost_usd, total_dbu, run_count.
25
- Grouped by workflow_id (falls back to task_key when workflow_id is NULL).
 
26
  """
27
- _empty = pd.DataFrame(columns=pd.Index(["workflow_id", "task_key", "total_cost_usd", "total_dbu", "run_count"]))
28
  try:
29
  tbl = t("fct_workflow_costs_synced")
30
  return execute_query(
31
  f"SELECT COALESCE(workflow_id, task_key) AS workflow_id, " # noqa: S608
32
  f" task_key, "
33
- f" SUM(attributed_cost_usd) AS total_cost_usd, "
34
  f" SUM(attributed_dbu) AS total_dbu, "
35
  f" COUNT(DISTINCT job_run_id) AS run_count "
36
  f"FROM {tbl} "
@@ -44,6 +63,37 @@ def fetch_cold_costs() -> pd.DataFrame:
44
  return _empty
45
 
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  @ttl_cache(ttl=120)
48
  def fetch_warm_costs() -> pd.DataFrame:
49
  """Recent cost estimates from workflow_cost_live_synced (warm tier).
 
1
  """Workflow-related queries — extracted from state/workflows.py.
2
 
3
+ Cold costs (30-day aggregate), warm costs (live), and latest-run metrics
4
+ (most recent run per workflow). Non-SQL data fetching (Jobs API, HF Hub)
5
+ remains in state/workflows.py as those are API calls, not database queries.
6
  """
7
 
8
  from __future__ import annotations
 
17
  logger = logging.getLogger(__name__)
18
 
19
 
20
+ _COLD_COST_COLS = [
21
+ "workflow_id",
22
+ "task_key",
23
+ "total_cost_usd",
24
+ "total_dbu",
25
+ "run_count",
26
+ ]
27
+
28
+ _LATEST_RUN_COLS = [
29
+ "workflow_id",
30
+ "cold_start_seconds",
31
+ "duration_seconds",
32
+ "entity_count",
33
+ "row_count",
34
+ "pipeline_state",
35
+ ]
36
+
37
+
38
+ @ttl_cache(ttl=600)
39
  def fetch_cold_costs() -> pd.DataFrame:
40
  """30-day aggregated costs from fct_workflow_costs_synced (cold tier).
41
 
42
+ Cost aggregates only timing and entity data comes from
43
+ fetch_latest_run_metrics() to avoid diluting recent values with
44
+ historical zeros.
45
  """
46
+ _empty = pd.DataFrame(columns=pd.Index(_COLD_COST_COLS))
47
  try:
48
  tbl = t("fct_workflow_costs_synced")
49
  return execute_query(
50
  f"SELECT COALESCE(workflow_id, task_key) AS workflow_id, " # noqa: S608
51
  f" task_key, "
52
+ f" SUM(effective_cost_usd) AS total_cost_usd, "
53
  f" SUM(attributed_dbu) AS total_dbu, "
54
  f" COUNT(DISTINCT job_run_id) AS run_count "
55
  f"FROM {tbl} "
 
63
  return _empty
64
 
65
 
66
+ @ttl_cache(ttl=600)
67
+ def fetch_latest_run_metrics() -> pd.DataFrame:
68
+ """Most recent run per workflow from fct_workflow_costs_synced.
69
+
70
+ Returns one row per workflow with cold_start_seconds, duration_seconds,
71
+ entity_count, row_count, and pipeline_state from the latest run.
72
+ """
73
+ _empty = pd.DataFrame(columns=pd.Index(_LATEST_RUN_COLS))
74
+ try:
75
+ tbl = t("fct_workflow_costs_synced")
76
+ return execute_query(
77
+ f"SELECT workflow_id, cold_start_seconds, duration_seconds, " # noqa: S608
78
+ f" entity_count, row_count, pipeline_state "
79
+ f"FROM ( "
80
+ f" SELECT COALESCE(workflow_id, task_key) AS workflow_id, "
81
+ f" cold_start_seconds, duration_seconds, entity_count, "
82
+ f" row_count, pipeline_state, "
83
+ f" ROW_NUMBER() OVER ("
84
+ f" PARTITION BY COALESCE(workflow_id, task_key) "
85
+ f" ORDER BY usage_date DESC, job_run_id DESC"
86
+ f" ) AS rn "
87
+ f" FROM {tbl} "
88
+ f" WHERE pipeline_state IS NOT NULL "
89
+ f") sub "
90
+ f"WHERE rn = 1",
91
+ )
92
+ except Exception:
93
+ logger.warning("Latest run metrics query failed", exc_info=True)
94
+ return _empty
95
+
96
+
97
  @ttl_cache(ttl=120)
98
  def fetch_warm_costs() -> pd.DataFrame:
99
  """Recent cost estimates from workflow_cost_live_synced (warm tier).
src/state/workflows.py CHANGED
@@ -18,7 +18,7 @@ from typing import Any
18
 
19
  import pandas as pd
20
  from cache import ttl_cache
21
- from queries.workflows import fetch_cold_costs, fetch_warm_costs
22
 
23
  from state.shared import register_page_refresher, register_page_teardown
24
  from state.workflows_dag import (
@@ -74,6 +74,8 @@ wf_total_cost_30d: str = "$0.00"
74
  wf_cost_detail: RawHtml = RawHtml("")
75
  wf_run_volume: str = "0"
76
  wf_run_volume_detail: str = ""
 
 
77
 
78
  wf_table_data: pd.DataFrame = pd.DataFrame(columns=pd.Index(WF_TABLE_COLS))
79
 
@@ -133,6 +135,8 @@ __all__ = [
133
  "wf_cost_detail",
134
  "wf_run_volume",
135
  "wf_run_volume_detail",
 
 
136
  "wf_table_data",
137
  "wf_type_filter",
138
  "wf_type_lov",
@@ -441,6 +445,7 @@ def _refresh_table(state: Any) -> None:
441
  global _wf_card_ids
442
 
443
  cold = fetch_cold_costs()
 
444
  jobs = _fetch_job_runs()
445
  hf_costs = _fetch_hf_cost_history()
446
 
@@ -491,6 +496,7 @@ def _refresh_table(state: Any) -> None:
491
  state.wf_runtime_filter,
492
  state.wf_freshness_filter,
493
  hf_costs=hf_costs,
 
494
  )
495
  state.wf_table_data = table_df
496
  _wf_card_ids = card_ids
@@ -505,6 +511,7 @@ def _refresh_table(state: Any) -> None:
505
  jobs,
506
  visible_card_ids=matched_ids if not all_filters_default else None,
507
  hf_costs=hf_costs,
 
508
  )
509
 
510
 
@@ -550,6 +557,7 @@ def _wf_auto_refresh_tick(state: Any) -> None:
550
 
551
  logger.debug("Auto-refresh tick")
552
  cold = fetch_cold_costs()
 
553
  warm = fetch_warm_costs()
554
  jobs = _fetch_job_runs()
555
  hf_costs = _fetch_hf_cost_history()
@@ -562,10 +570,11 @@ def _wf_auto_refresh_tick(state: Any) -> None:
562
  state.wf_runtime_filter,
563
  state.wf_freshness_filter,
564
  hf_costs=hf_costs,
 
565
  )
566
  state.wf_table_data = table_df
567
  _wf_card_ids = card_ids
568
- compute_stats(state, _cards, cold, warm, jobs, hf_costs=hf_costs)
569
 
570
 
571
  # ---------------------------------------------------------------------------
@@ -608,6 +617,7 @@ def wf_refresh(state: Any) -> None:
608
 
609
  # Query costs + job runs (job_runs already re-keyed to workflow_id)
610
  cold = fetch_cold_costs()
 
611
  warm = fetch_warm_costs()
612
  jobs = _fetch_job_runs()
613
 
@@ -629,12 +639,14 @@ def wf_refresh(state: Any) -> None:
629
  hf_costs = _fetch_hf_cost_history()
630
 
631
  # Build table
632
- table_df, card_ids = build_table_data(_cards, cold, jobs, "All", "All", "All", hf_costs=hf_costs)
 
 
633
  state.wf_table_data = table_df
634
  _wf_card_ids = card_ids
635
 
636
  # Stats (uses jobs for freshness, cold for cost, hf_costs for HF data)
637
- compute_stats(state, _cards, cold, warm, jobs, hf_costs=hf_costs)
638
 
639
  # Clear detail state (dashboard mode)
640
  state.wf_selected_workflow = None
 
18
 
19
  import pandas as pd
20
  from cache import ttl_cache
21
+ from queries.workflows import fetch_cold_costs, fetch_latest_run_metrics, fetch_warm_costs
22
 
23
  from state.shared import register_page_refresher, register_page_teardown
24
  from state.workflows_dag import (
 
74
  wf_cost_detail: RawHtml = RawHtml("")
75
  wf_run_volume: str = "0"
76
  wf_run_volume_detail: str = ""
77
+ wf_avg_cold_start: str = "\u2014"
78
+ wf_cold_start_detail: str = ""
79
 
80
  wf_table_data: pd.DataFrame = pd.DataFrame(columns=pd.Index(WF_TABLE_COLS))
81
 
 
135
  "wf_cost_detail",
136
  "wf_run_volume",
137
  "wf_run_volume_detail",
138
+ "wf_avg_cold_start",
139
+ "wf_cold_start_detail",
140
  "wf_table_data",
141
  "wf_type_filter",
142
  "wf_type_lov",
 
445
  global _wf_card_ids
446
 
447
  cold = fetch_cold_costs()
448
+ latest = fetch_latest_run_metrics()
449
  jobs = _fetch_job_runs()
450
  hf_costs = _fetch_hf_cost_history()
451
 
 
496
  state.wf_runtime_filter,
497
  state.wf_freshness_filter,
498
  hf_costs=hf_costs,
499
+ latest_run_metrics=latest,
500
  )
501
  state.wf_table_data = table_df
502
  _wf_card_ids = card_ids
 
511
  jobs,
512
  visible_card_ids=matched_ids if not all_filters_default else None,
513
  hf_costs=hf_costs,
514
+ latest_run_metrics=latest,
515
  )
516
 
517
 
 
557
 
558
  logger.debug("Auto-refresh tick")
559
  cold = fetch_cold_costs()
560
+ latest = fetch_latest_run_metrics()
561
  warm = fetch_warm_costs()
562
  jobs = _fetch_job_runs()
563
  hf_costs = _fetch_hf_cost_history()
 
570
  state.wf_runtime_filter,
571
  state.wf_freshness_filter,
572
  hf_costs=hf_costs,
573
+ latest_run_metrics=latest,
574
  )
575
  state.wf_table_data = table_df
576
  _wf_card_ids = card_ids
577
+ compute_stats(state, _cards, cold, warm, jobs, hf_costs=hf_costs, latest_run_metrics=latest)
578
 
579
 
580
  # ---------------------------------------------------------------------------
 
617
 
618
  # Query costs + job runs (job_runs already re-keyed to workflow_id)
619
  cold = fetch_cold_costs()
620
+ latest = fetch_latest_run_metrics()
621
  warm = fetch_warm_costs()
622
  jobs = _fetch_job_runs()
623
 
 
639
  hf_costs = _fetch_hf_cost_history()
640
 
641
  # Build table
642
+ table_df, card_ids = build_table_data(
643
+ _cards, cold, jobs, "All", "All", "All", hf_costs=hf_costs, latest_run_metrics=latest
644
+ )
645
  state.wf_table_data = table_df
646
  _wf_card_ids = card_ids
647
 
648
  # Stats (uses jobs for freshness, cold for cost, hf_costs for HF data)
649
+ compute_stats(state, _cards, cold, warm, jobs, hf_costs=hf_costs, latest_run_metrics=latest)
650
 
651
  # Clear detail state (dashboard mode)
652
  state.wf_selected_workflow = None
src/state/workflows_stats.py CHANGED
@@ -235,6 +235,8 @@ WF_TABLE_COLS = [
235
  "Status",
236
  "Last Run",
237
  "Last Duration",
 
 
238
  "Cost (30d)",
239
  "Avg/Run",
240
  "Freshness",
@@ -249,6 +251,7 @@ def build_table_data(
249
  runtime_filter: str | None = "All",
250
  freshness_filter: str | None = "All",
251
  hf_costs: dict[str, HFCostData] | None = None,
 
252
  ) -> tuple[pd.DataFrame, list[str]]:
253
  """Build dashboard table DataFrame from cards + cost data.
254
 
@@ -256,21 +259,32 @@ def build_table_data(
256
  - cold_costs: DB cold-tier costs (workflow_id column)
257
  - job_runs: Databricks Jobs API (re-keyed to workflow_id)
258
  - hf_costs: HF Hub cost history (keyed by workflow_id)
 
259
 
260
  Returns (DataFrame, card_ids) where card_ids is parallel to rows
261
  for mapping row index to card ID.
262
  """
263
  card_ids: list[str] = []
264
  hf = hf_costs or {}
 
265
 
266
  # Build cost lookups keyed by workflow_id
267
  cold_cost_lookup: dict[str, float] = {}
268
  cold_run_count_lookup: dict[str, int] = {}
269
  if not cold_costs.empty and "workflow_id" in cold_costs.columns:
270
- cold_cost_lookup = (
271
- cold_costs.set_index("workflow_id")["total_cost_usd"].apply(lambda x: float(x or 0)).to_dict()
272
- )
273
- cold_run_count_lookup = cold_costs.set_index("workflow_id")["run_count"].apply(lambda x: int(x or 0)).to_dict()
 
 
 
 
 
 
 
 
 
274
 
275
  rows = []
276
  for card_id, card in cards.items():
@@ -358,6 +372,16 @@ def build_table_data(
358
  # --- Status ---
359
  status_str = _resolve_status(hf_data, job_run, jobs_last_run_ts, hf_last_run_ts)
360
 
 
 
 
 
 
 
 
 
 
 
361
  rows.append(
362
  {
363
  "Name": card.get("name", card_id),
@@ -367,6 +391,8 @@ def build_table_data(
367
  "Status": status_str,
368
  "Last Run": last_run_str,
369
  "Last Duration": duration_str,
 
 
370
  "Cost (30d)": cost_val,
371
  "Avg/Run": avg_run_val,
372
  "Freshness": freshness_str,
@@ -493,6 +519,7 @@ def compute_stats(
493
  jobs: dict[str, dict[str, Any]],
494
  visible_card_ids: set[str] | None = None,
495
  hf_costs: dict[str, HFCostData] | None = None,
 
496
  ) -> None:
497
  """Compute stats bar metrics.
498
 
@@ -571,6 +598,12 @@ def compute_stats(
571
  else:
572
  state.wf_run_volume_detail = ""
573
 
 
 
 
 
 
 
574
 
575
  def _compute_hf_cost(
576
  cards_subset: dict[str, dict[str, Any]],
@@ -669,6 +702,38 @@ def _compute_freshness_stats(
669
  state.wf_freshness_detail = RawHtml("")
670
 
671
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
672
  __all__ = [
673
  "HFCostData",
674
  "WF_TABLE_COLS",
 
235
  "Status",
236
  "Last Run",
237
  "Last Duration",
238
+ "Cold Start",
239
+ "Entities",
240
  "Cost (30d)",
241
  "Avg/Run",
242
  "Freshness",
 
251
  runtime_filter: str | None = "All",
252
  freshness_filter: str | None = "All",
253
  hf_costs: dict[str, HFCostData] | None = None,
254
+ latest_run_metrics: pd.DataFrame | None = None,
255
  ) -> tuple[pd.DataFrame, list[str]]:
256
  """Build dashboard table DataFrame from cards + cost data.
257
 
 
259
  - cold_costs: DB cold-tier costs (workflow_id column)
260
  - job_runs: Databricks Jobs API (re-keyed to workflow_id)
261
  - hf_costs: HF Hub cost history (keyed by workflow_id)
262
+ - latest_run_metrics: most recent run per workflow (cold_start, entities)
263
 
264
  Returns (DataFrame, card_ids) where card_ids is parallel to rows
265
  for mapping row index to card ID.
266
  """
267
  card_ids: list[str] = []
268
  hf = hf_costs or {}
269
+ lrm = latest_run_metrics if latest_run_metrics is not None else pd.DataFrame()
270
 
271
  # Build cost lookups keyed by workflow_id
272
  cold_cost_lookup: dict[str, float] = {}
273
  cold_run_count_lookup: dict[str, int] = {}
274
  if not cold_costs.empty and "workflow_id" in cold_costs.columns:
275
+ idx = cold_costs.set_index("workflow_id")
276
+ cold_cost_lookup = idx["total_cost_usd"].apply(lambda x: float(x or 0)).to_dict()
277
+ cold_run_count_lookup = idx["run_count"].apply(lambda x: int(x or 0)).to_dict()
278
+
279
+ # Build latest-run lookups keyed by workflow_id
280
+ cold_start_lookup: dict[str, int] = {}
281
+ entity_count_lookup: dict[str, int] = {}
282
+ if not lrm.empty and "workflow_id" in lrm.columns:
283
+ lrm_idx = lrm.set_index("workflow_id")
284
+ if "cold_start_seconds" in lrm_idx.columns:
285
+ cold_start_lookup = lrm_idx["cold_start_seconds"].dropna().apply(int).to_dict()
286
+ if "entity_count" in lrm_idx.columns:
287
+ entity_count_lookup = lrm_idx["entity_count"].dropna().apply(int).to_dict()
288
 
289
  rows = []
290
  for card_id, card in cards.items():
 
372
  # --- Status ---
373
  status_str = _resolve_status(hf_data, job_run, jobs_last_run_ts, hf_last_run_ts)
374
 
375
+ # --- Cold start + Entities (from enriched cold tier) ---
376
+ cs = cold_start_lookup.get(card_id)
377
+ cold_start_str = "\u2014"
378
+ if cs is not None:
379
+ cs_mins, cs_secs = divmod(int(cs), 60)
380
+ cold_start_str = f"{cs_mins}m {cs_secs}s" if cs_mins else f"{cs_secs}s"
381
+
382
+ ent = entity_count_lookup.get(card_id)
383
+ entity_str = f"{int(ent):,}" if ent is not None else "\u2014"
384
+
385
  rows.append(
386
  {
387
  "Name": card.get("name", card_id),
 
391
  "Status": status_str,
392
  "Last Run": last_run_str,
393
  "Last Duration": duration_str,
394
+ "Cold Start": cold_start_str,
395
+ "Entities": entity_str,
396
  "Cost (30d)": cost_val,
397
  "Avg/Run": avg_run_val,
398
  "Freshness": freshness_str,
 
519
  jobs: dict[str, dict[str, Any]],
520
  visible_card_ids: set[str] | None = None,
521
  hf_costs: dict[str, HFCostData] | None = None,
522
+ latest_run_metrics: pd.DataFrame | None = None,
523
  ) -> None:
524
  """Compute stats bar metrics.
525
 
 
598
  else:
599
  state.wf_run_volume_detail = ""
600
 
601
+ # Cold start: from latest run per workflow (not averaged across 30 days)
602
+ lrm = latest_run_metrics if latest_run_metrics is not None else pd.DataFrame()
603
+ if not lrm.empty and visible_card_ids is not None:
604
+ lrm = lrm[lrm["workflow_id"].isin(list(visible_card_ids))]
605
+ _compute_cold_start_stats(state, lrm)
606
+
607
 
608
  def _compute_hf_cost(
609
  cards_subset: dict[str, dict[str, Any]],
 
702
  state.wf_freshness_detail = RawHtml("")
703
 
704
 
705
+ def _compute_cold_start_stats(state: Any, latest_run: pd.DataFrame) -> None:
706
+ """Compute cold start stat from latest-run-per-workflow metrics.
707
+
708
+ Uses median (robust to outliers) across workflows' most recent runs.
709
+ """
710
+ if latest_run.empty or "cold_start_seconds" not in latest_run.columns:
711
+ state.wf_avg_cold_start = "\u2014"
712
+ state.wf_cold_start_detail = ""
713
+ return
714
+
715
+ valid = latest_run[latest_run["cold_start_seconds"].notna()]
716
+ if valid.empty:
717
+ state.wf_avg_cold_start = "\u2014"
718
+ state.wf_cold_start_detail = "No enrichment data yet"
719
+ return
720
+
721
+ median_cs = float(valid["cold_start_seconds"].median())
722
+ max_cs = float(valid["cold_start_seconds"].max())
723
+ min_cs = float(valid["cold_start_seconds"].min())
724
+
725
+ mins, secs = divmod(int(median_cs), 60)
726
+ state.wf_avg_cold_start = f"{mins}m {secs}s" if mins else f"{secs}s"
727
+
728
+ # Detail: range across workflows
729
+ min_m, min_s = divmod(int(min_cs), 60)
730
+ max_m, max_s = divmod(int(max_cs), 60)
731
+ min_str = f"{min_m}m {min_s}s" if min_m else f"{min_s}s"
732
+ max_str = f"{max_m}m {max_s}s" if max_m else f"{max_s}s"
733
+
734
+ state.wf_cold_start_detail = f"range {min_str}\u2013{max_str}"
735
+
736
+
737
  __all__ = [
738
  "HFCostData",
739
  "WF_TABLE_COLS",
src/template.py CHANGED
@@ -128,6 +128,14 @@ GLOSSARY: dict[str, str] = {
128
  "FAILED = last run errored. SKIPPED = last run was skipped."
129
  ),
130
  "Trigger": ("How a workflow is initiated. Scheduled = runs on a cron interval. Manual = triggered by a human."),
 
 
 
 
 
 
 
 
131
  "Passes with Value": (
132
  "Passes where the off-ball scoring opportunity (actual OBSO) was greater than zero. "
133
  "Used as a quality proxy for 'successful' passes when pass outcome data is unavailable."
@@ -258,6 +266,8 @@ PAGE_TERMS: dict[str, list[str]] = {
258
  "Workflow Card",
259
  "Workflow Status",
260
  "Trigger",
 
 
261
  ],
262
  "Conversion-Funnel": ["A3 Entry", "Conversion Rate", "Possession"],
263
  }
 
128
  "FAILED = last run errored. SKIPPED = last run was skipped."
129
  ),
130
  "Trigger": ("How a workflow is initiated. Scheduled = runs on a cron interval. Manual = triggered by a human."),
131
+ "Cold Start": (
132
+ "Serverless environment setup time before pipeline work begins. "
133
+ "Includes compute spin-up, wheel installation, and Python imports. Lower is better."
134
+ ),
135
+ "Entities": (
136
+ "Average number of input entities (e.g., matches, players) processed per run. "
137
+ "From the pipeline's skip guard, which determines what work is needed."
138
+ ),
139
  "Passes with Value": (
140
  "Passes where the off-ball scoring opportunity (actual OBSO) was greater than zero. "
141
  "Used as a quality proxy for 'successful' passes when pass outcome data is unavailable."
 
266
  "Workflow Card",
267
  "Workflow Status",
268
  "Trigger",
269
+ "Cold Start",
270
+ "Entities",
271
  ],
272
  "Conversion-Funnel": ["A3 Entry", "Conversion Rate", "Possession"],
273
  }