Function bodies 354 total

_build_espn_team_map function · python · L40-L107 (68 LOC)

src/ncaa_eval/ingest/sync.py

def _build_espn_team_map(year: int, spellings: dict[str, int]) -> dict[str, int]:
    """Build ESPN location-name → Kaggle TeamID mapping via cbbpy's bundled team map.

    cbbpy ships a `mens_team_map.csv` that lists every D-I team per season
    with the `location` name ESPN uses internally (e.g. `"UC Santa Barbara"`,
    `"Florida Gulf Coast"`).  Using these location names as the keys in
    `team_name_to_id` means:

    * `_fetch_per_team` queries cbbpy with *exact* ESPN names → no wrong
      fuzzy match (avoids `"california-santa-barbara"` → `"California"`).
    * The schedule DataFrame's `team`/`opponent` columns also use these
      location names → `_resolve_team_id` can do direct dict lookups.

    Each ESPN location is resolved to a Kaggle ID by exact lookup in the
    Kaggle spellings dict (lowercased).  A token-set-ratio fuzzy fallback
    handles any locations not covered by the spellings.

    Falls back to the latest available season in the map if *year* is absent
    (

SyncResult class · python · L111-L118 (8 LOC)

src/ncaa_eval/ingest/sync.py

class SyncResult:
    """Summary of a single source sync operation."""

    source: str
    teams_written: int = 0
    seasons_written: int = 0
    games_written: int = 0
    seasons_cached: int = 0

SyncEngine class · python · L121-L269 (149 LOC)

src/ncaa_eval/ingest/sync.py

class SyncEngine:
    """Orchestrates data sync from external sources into the local repository.

    Args:
        repository: Repository instance used for reading and writing data.
        data_dir: Root directory for local Parquet files and cached CSVs.
    """

    def __init__(self, repository: Repository, data_dir: Path) -> None:
        self._repo = repository
        self._data_dir = data_dir

    def _espn_marker(self, year: int) -> Path:
        """Return the path of the ESPN sync marker file for *year*."""
        return self._data_dir / f".espn_synced_{year}"

    def sync_kaggle(self, force_refresh: bool = False) -> SyncResult:
        """Sync NCAA data from Kaggle with Parquet-level caching.

        Downloads CSVs (if not cached) and converts them to Parquet.
        Skips individual entities whose Parquet files already exist,
        unless *force_refresh* is ``True``.

        Args:
            force_refresh: Bypass all caches and re-fetch everything.

        Returns:

__init__ method · python · L129-L131 (3 LOC)

src/ncaa_eval/ingest/sync.py

    def __init__(self, repository: Repository, data_dir: Path) -> None:
        self._repo = repository
        self._data_dir = data_dir

_espn_marker method · python · L133-L135 (3 LOC)

src/ncaa_eval/ingest/sync.py

    def _espn_marker(self, year: int) -> Path:
        """Return the path of the ESPN sync marker file for *year*."""
        return self._data_dir / f".espn_synced_{year}"

sync_kaggle method · python · L137-L187 (51 LOC)

src/ncaa_eval/ingest/sync.py

    def sync_kaggle(self, force_refresh: bool = False) -> SyncResult:
        """Sync NCAA data from Kaggle with Parquet-level caching.

        Downloads CSVs (if not cached) and converts them to Parquet.
        Skips individual entities whose Parquet files already exist,
        unless *force_refresh* is ``True``.

        Args:
            force_refresh: Bypass all caches and re-fetch everything.

        Returns:
            SyncResult summarising teams/seasons/games written and cached.
        """
        result = SyncResult(source="kaggle")
        connector = KaggleConnector(extract_dir=self._data_dir / "kaggle")
        connector.download(force=force_refresh)  # CSV-level cache

        # Teams: Parquet-level cache
        teams_path = self._data_dir / "teams.parquet"
        if force_refresh or not teams_path.exists():
            teams = connector.fetch_teams()
            self._repo.save_teams(teams)
            result.teams_written = len(teams)
            typer.echo(f"[ka

sync_espn method · python · L189-L256 (68 LOC)

src/ncaa_eval/ingest/sync.py

    def sync_espn(self, force_refresh: bool = False) -> SyncResult:
        """Sync the most recent season's games from ESPN.

        Requires Kaggle data to be synced first (needs team and season
        mappings).  Uses a marker-file cache: if ``.espn_synced_{year}``
        exists the season is considered up-to-date unless *force_refresh*.

        ESPN games are merged with existing Kaggle games for the same
        season partition before saving (because ``save_games`` overwrites).

        Args:
            force_refresh: Bypass marker-file cache and re-fetch from ESPN.

        Returns:
            SyncResult summarising games written and seasons cached.

        Raises:
            RuntimeError: Kaggle data has not been synced yet.
        """
        result = SyncResult(source="espn")
        teams = self._repo.get_teams()
        seasons = self._repo.get_seasons()
        if not teams or not seasons:
            raise RuntimeError(
                "ESPN sync requires Kaggle

About: code-quality intelligence by Repobility · https://repobility.com

sync_all method · python · L258-L269 (12 LOC)

src/ncaa_eval/ingest/sync.py

    def sync_all(self, force_refresh: bool = False) -> list[SyncResult]:
        """Sync all configured sources: Kaggle first, then ESPN.

        Args:
            force_refresh: Bypass caches for all sources.

        Returns:
            List of SyncResult, one per source (kaggle, espn).
        """
        kaggle_result = self.sync_kaggle(force_refresh)
        espn_result = self.sync_espn(force_refresh)
        return [kaggle_result, espn_result]

ModelConfig class · python · L21-L27 (7 LOC)

src/ncaa_eval/model/base.py

class ModelConfig(BaseModel):
    """Base configuration shared by all model implementations.

    Subclasses add model-specific hyperparameters as additional fields.
    """

    model_name: str

Model class · python · L30-L62 (33 LOC)

src/ncaa_eval/model/base.py

class Model(abc.ABC):
    """Abstract base class for all NCAA prediction models.

    Every model — stateful or stateless — must implement these five
    methods so that the training CLI, evaluation engine, and persistence
    layer can treat all models uniformly.
    """

    @abc.abstractmethod
    def fit(self, X: pd.DataFrame, y: pd.Series) -> None:
        """Train the model on feature matrix *X* and labels *y*."""
        ...

    @abc.abstractmethod
    def predict_proba(self, X: pd.DataFrame) -> pd.Series:
        """Return P(team_a wins) in [0, 1] for each row of *X*."""
        ...

    @abc.abstractmethod
    def save(self, path: Path) -> None:
        """Persist the trained model to *path*."""
        ...

    @classmethod
    @abc.abstractmethod
    def load(cls, path: Path) -> Self:
        """Load a previously-saved model from *path*."""
        ...

    @abc.abstractmethod
    def get_config(self) -> ModelConfig:
        """Return the Pydantic-validated configuration fo

fit method · python · L39-L41 (3 LOC)

src/ncaa_eval/model/base.py

    def fit(self, X: pd.DataFrame, y: pd.Series) -> None:
        """Train the model on feature matrix *X* and labels *y*."""
        ...

predict_proba method · python · L44-L46 (3 LOC)

src/ncaa_eval/model/base.py

    def predict_proba(self, X: pd.DataFrame) -> pd.Series:
        """Return P(team_a wins) in [0, 1] for each row of *X*."""
        ...

save method · python · L49-L51 (3 LOC)

src/ncaa_eval/model/base.py

    def save(self, path: Path) -> None:
        """Persist the trained model to *path*."""
        ...

load method · python · L55-L57 (3 LOC)

src/ncaa_eval/model/base.py

    def load(cls, path: Path) -> Self:
        """Load a previously-saved model from *path*."""
        ...

get_config method · python · L60-L62 (3 LOC)

src/ncaa_eval/model/base.py

    def get_config(self) -> ModelConfig:
        """Return the Pydantic-validated configuration for this model."""
        ...

Generated by Repobility's multi-pass static-analysis pipeline (https://repobility.com)

StatefulModel class · python · L71-L198 (128 LOC)

src/ncaa_eval/model/base.py

class StatefulModel(Model):
    """Template base for models that process games sequentially.

    Concrete methods ``fit`` and ``predict_proba`` are provided as
    template methods.  Subclasses implement the abstract hooks:

    * ``update(game)`` — absorb a single game result
    * ``_predict_one(team_a_id, team_b_id)`` — return P(team_a wins)
    * ``start_season(season)`` — reset / prepare for a new season
    * ``get_state()`` / ``set_state(state)`` — snapshot / restore ratings
    """

    # ------------------------------------------------------------------
    # Concrete template methods
    # ------------------------------------------------------------------

    def fit(self, X: pd.DataFrame, y: pd.Series) -> None:
        """Reconstruct games from *X*/*y* and update sequentially."""
        games = self._to_games(X, y)
        current_season: int | None = None
        for game in games:
            if game.season != current_season:
                self.start_season(game.seaso

fit method · python · L87-L95 (9 LOC)

src/ncaa_eval/model/base.py

    def fit(self, X: pd.DataFrame, y: pd.Series) -> None:
        """Reconstruct games from *X*/*y* and update sequentially."""
        games = self._to_games(X, y)
        current_season: int | None = None
        for game in games:
            if game.season != current_season:
                self.start_season(game.season)
                current_season = game.season
            self.update(game)

predict_proba method · python · L97-L100 (4 LOC)

src/ncaa_eval/model/base.py

    def predict_proba(self, X: pd.DataFrame) -> pd.Series:
        """Call ``_predict_one`` per row using ``itertuples``."""
        preds: list[float] = [self._predict_one(row.team_a_id, row.team_b_id) for row in X.itertuples()]
        return pd.Series(preds, index=X.index)

_to_games method · python · L107-L169 (63 LOC)

src/ncaa_eval/model/base.py

    def _to_games(X: pd.DataFrame, y: pd.Series) -> list[Game]:
        """Reconstruct :class:`Game` objects from the feature DataFrame.

        Parameters
        ----------
        X : pd.DataFrame
            Feature matrix with columns: ``team_a_id``, ``team_b_id``,
            ``season``, ``day_num``, ``date``, ``loc_encoding``,
            ``game_id``, ``is_tournament``.  Optionally ``w_score``,
            ``l_score``, ``num_ot``.
        y : pd.Series
            Binary label — ``1`` (or ``True``) means team_a won.
        """
        # Hoist column-existence checks outside the loop (O(1) each, not O(n))
        has_scores = "w_score" in X.columns and "l_score" in X.columns
        has_num_ot = "num_ot" in X.columns

        games: list[Game] = []
        for row in X.itertuples():
            idx = row.Index
            team_a_won = bool(y.loc[idx])

            team_a_id = int(row.team_a_id)
            team_b_id = int(row.team_b_id)

            if team_a_won:

_predict_one method · python · L176-L178 (3 LOC)

src/ncaa_eval/model/base.py

    def _predict_one(self, team_a_id: int, team_b_id: int) -> float:
        """Return P(team_a wins) given team IDs."""
        ...

update method · python · L181-L183 (3 LOC)

src/ncaa_eval/model/base.py

    def update(self, game: Game) -> None:
        """Absorb the result of a single game."""
        ...

start_season method · python · L186-L188 (3 LOC)

src/ncaa_eval/model/base.py

    def start_season(self, season: int) -> None:
        """Called before the first game of each season."""
        ...

get_state method · python · L191-L193 (3 LOC)

src/ncaa_eval/model/base.py

    def get_state(self) -> dict[str, Any]:
        """Return a serialisable snapshot of internal ratings."""
        ...

All rows scored by the Repobility analyzer (https://repobility.com)

set_state method · python · L196-L198 (3 LOC)

src/ncaa_eval/model/base.py

    def set_state(self, state: dict[str, Any]) -> None:
        """Restore internal ratings from a snapshot."""
        ...

EloModelConfig class · python · L22-L37 (16 LOC)

src/ncaa_eval/model/elo.py

class EloModelConfig(ModelConfig):
    """Pydantic configuration for the Elo model.

    Fields and defaults mirror :class:`~ncaa_eval.transform.elo.EloConfig`.
    """

    model_name: Literal["elo"] = "elo"
    initial_rating: float = 1500.0
    k_early: float = 56.0
    k_regular: float = 38.0
    k_tournament: float = 47.5
    early_game_threshold: int = 20
    margin_exponent: float = 0.85
    max_margin: int = 25
    home_advantage_elo: float = 3.5
    mean_reversion_fraction: float = 0.25

EloModel class · python · L41-L187 (147 LOC)

src/ncaa_eval/model/elo.py

class EloModel(StatefulModel):
    """Elo rating model wrapping :class:`EloFeatureEngine`."""

    def __init__(self, config: EloModelConfig | None = None) -> None:
        self._config = config or EloModelConfig()
        self._engine = EloFeatureEngine(self._to_elo_config(self._config))

    # ------------------------------------------------------------------
    # StatefulModel abstract hooks
    # ------------------------------------------------------------------

    def update(self, game: Game) -> None:
        """Delegate game processing to the engine."""
        self._engine.update_game(
            w_team_id=game.w_team_id,
            l_team_id=game.l_team_id,
            w_score=game.w_score,
            l_score=game.l_score,
            loc=game.loc,
            is_tournament=game.is_tournament,
            num_ot=game.num_ot,
        )

    def start_season(self, season: int) -> None:
        """Delegate season transition to the engine."""
        self._engine.start_new_se

__init__ method · python · L44-L46 (3 LOC)

src/ncaa_eval/model/elo.py

    def __init__(self, config: EloModelConfig | None = None) -> None:
        self._config = config or EloModelConfig()
        self._engine = EloFeatureEngine(self._to_elo_config(self._config))

update method · python · L52-L62 (11 LOC)

src/ncaa_eval/model/elo.py

    def update(self, game: Game) -> None:
        """Delegate game processing to the engine."""
        self._engine.update_game(
            w_team_id=game.w_team_id,
            l_team_id=game.l_team_id,
            w_score=game.w_score,
            l_score=game.l_score,
            loc=game.loc,
            is_tournament=game.is_tournament,
            num_ot=game.num_ot,
        )

start_season method · python · L64-L66 (3 LOC)

src/ncaa_eval/model/elo.py

    def start_season(self, season: int) -> None:
        """Delegate season transition to the engine."""
        self._engine.start_new_season(season)

_predict_one method · python · L68-L72 (5 LOC)

src/ncaa_eval/model/elo.py

    def _predict_one(self, team_a_id: int, team_b_id: int) -> float:
        """Return P(team_a wins) using the Elo expected-score formula."""
        r_a = self._engine.get_rating(team_a_id)
        r_b = self._engine.get_rating(team_b_id)
        return EloFeatureEngine.expected_score(r_a, r_b)

get_state method · python · L74-L79 (6 LOC)

src/ncaa_eval/model/elo.py

    def get_state(self) -> dict[str, Any]:
        """Return ratings and game counts as a serialisable snapshot."""
        return {
            "ratings": self._engine.get_all_ratings(),
            "game_counts": dict(self._engine._game_counts),
        }

Want this analysis on your repo? https://repobility.com/scan/

set_state method · python · L81-L116 (36 LOC)

src/ncaa_eval/model/elo.py

    def set_state(self, state: dict[str, Any]) -> None:
        """Restore ratings and game counts from a snapshot.

        Parameters
        ----------
        state
            Must contain ``"ratings"`` (``dict[int, float]``) and
            ``"game_counts"`` (``dict[int, int]``) keys, as returned by
            :meth:`get_state`.  Keys may be ``int`` or ``str``; string keys
            are coerced to ``int`` so that JSON-decoded dicts (where all keys
            are strings) work correctly without silent rating loss.

        Raises
        ------
        KeyError
            If ``"ratings"`` or ``"game_counts"`` keys are absent.
        TypeError
            If either value is not a ``dict``.
        """
        if "ratings" not in state or "game_counts" not in state:
            missing = {"ratings", "game_counts"} - state.keys()
            msg = f"set_state() state dict missing required keys: {missing}"
            raise KeyError(msg)
        ratings = state["ratings"]

save method · python · L122-L132 (11 LOC)

src/ncaa_eval/model/elo.py

    def save(self, path: Path) -> None:
        """JSON-dump config and state to *path* directory."""
        path.mkdir(parents=True, exist_ok=True)
        (path / "config.json").write_text(self._config.model_dump_json())
        state = self.get_state()
        # JSON keys must be strings
        serialisable = {
            "ratings": {str(k): v for k, v in state["ratings"].items()},
            "game_counts": {str(k): v for k, v in state["game_counts"].items()},
        }
        (path / "state.json").write_text(json.dumps(serialisable))

load method · python · L135-L162 (28 LOC)

src/ncaa_eval/model/elo.py

    def load(cls, path: Path) -> Self:
        """Reconstruct an EloModel from a saved directory.

        Raises
        ------
        FileNotFoundError
            If either ``config.json`` or ``state.json`` is missing.  A missing
            file indicates an incomplete :meth:`save` (e.g., interrupted write).
        """
        config_path = path / "config.json"
        state_path = path / "state.json"
        missing = [p for p in (config_path, state_path) if not p.exists()]
        if missing:
            missing_names = ", ".join(p.name for p in missing)
            msg = (
                f"Incomplete save at {path!r}: missing {missing_names}. "
                "The save may have been interrupted."
            )
            raise FileNotFoundError(msg)
        config = EloModelConfig.model_validate_json(config_path.read_text())
        instance = cls(config)
        raw = json.loads(state_path.read_text())
        state = {
            "ratings": {int(k): v for k, v in raw["ra

get_config method · python · L168-L170 (3 LOC)

src/ncaa_eval/model/elo.py

    def get_config(self) -> EloModelConfig:
        """Return the Pydantic-validated configuration."""
        return self._config

_to_elo_config method · python · L177-L187 (11 LOC)

src/ncaa_eval/model/elo.py

    def _to_elo_config(config: EloModelConfig) -> EloConfig:
        """Convert Pydantic config to the frozen dataclass the engine expects.

        Uses :func:`dataclasses.fields` to derive the argument set from
        ``EloConfig`` at runtime, so any new field added to ``EloConfig`` is
        automatically included — without requiring a manual update here.
        ``EloModelConfig`` must keep its fields in sync with ``EloConfig``.
        """
        elo_field_names = {f.name for f in dataclasses.fields(EloConfig)}
        kwargs = {k: v for k, v in config.model_dump().items() if k in elo_field_names}
        return EloConfig(**kwargs)

LogisticRegressionConfig class · python · L20-L25 (6 LOC)