Source code for prefgraph.datasets._instacart_menu_v2
"""Instacart V2 menu-choice loader: aisle-level single-reorder with trailing-3 menus.
Construction spec:
Observation = user × order × aisle
Choice = sole reordered SKU in that (user, order, aisle) triple
Menu = products bought in trailing-3 orders in same aisle, union {choice}
Filters:
- menu_size >= min_menu_size (default 2)
- (user, aisle) pair count >= min_pair_events (default 3)
- user total events >= min_sessions (default 5)
This replaces the old department + first-in-cart construction.
Replaces: _instacart_menu.py (V1, deprecated for benchmarking)
Data: ~/.prefgraph/data/instacart/ (Kaggle Market Basket Analysis)
"""
from __future__ import annotations
import os
from collections import defaultdict
from pathlib import Path
import polars as pl
from prefgraph.core.session import MenuChoiceLog
def _find_data_dir(data_dir: str | Path | None) -> Path:
"""Search standard locations for the Instacart data directory."""
candidates = []
if data_dir is not None:
candidates.append(Path(data_dir))
env = os.environ.get("PYREVEALED_DATA_DIR") or os.environ.get("PREFGRAPH_DATA_DIR")
if env:
candidates.append(Path(env) / "instacart")
candidates.extend([
Path.home() / ".prefgraph" / "data" / "instacart",
Path.home() / ".pyrevealed" / "data" / "instacart",
])
for d in candidates:
if d.is_dir() and (d / "orders.csv").exists():
return d
searched = "\n ".join(str(c) for c in candidates)
raise FileNotFoundError(f"Instacart data not found. Searched:\n {searched}")