"""BehavioralAuditor: High-level API for behavioral consistency validation.
This module provides a user-friendly, tech-native interface for validating
user behavior consistency. Think of it as a "linter" for behavioral data.
Use this to:
- Measure behavioral consistency (integrity score)
- Measure exploitability of inconsistencies (confusion metric)
- Compare behavior across user segments or time periods
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING
from prefgraph.core.mixins import ResultSummaryMixin
from prefgraph.algorithms.garp import validate_consistency
from prefgraph.algorithms.aei import compute_integrity_score
from prefgraph.algorithms.mpi import compute_confusion_metric
from prefgraph.algorithms.bronars import compute_test_power
from prefgraph.algorithms.harp import validate_proportional_scaling
from prefgraph.algorithms.vei import compute_granular_integrity
from prefgraph.algorithms.quasilinear import test_income_invariance
from prefgraph.algorithms.gross_substitutes import test_cross_price_effect
from prefgraph.algorithms.differentiable import validate_smooth_preferences
from prefgraph.algorithms.acyclical_p import validate_strict_consistency
from prefgraph.algorithms.gapp import validate_price_preferences
from prefgraph.algorithms.abstract_choice import (
validate_menu_warp,
validate_menu_sarp,
validate_menu_consistency,
compute_menu_efficiency,
fit_menu_preferences,
)
if TYPE_CHECKING:
from prefgraph.core.session import BehaviorLog, MenuChoiceLog
from prefgraph.core.result import (
ConsistencyResult,
IntegrityResult,
ConfusionResult,
TestPowerResult,
ProportionalScalingResult,
GranularIntegrityResult,
IncomeInvarianceResult,
CrossPriceResult,
SmoothPreferencesResult,
StrictConsistencyResult,
PricePreferencesResult,
CongruenceResult,
OrdinalUtilityResult,
)
from prefgraph.core.summary import BehavioralSummary, MenuChoiceSummary
[docs]
@dataclass
class AuditReport:
"""
Comprehensive audit report for user behavior.
Attributes:
is_consistent: True if behavior passes GARP consistency check
integrity_score: Afriat Efficiency Index (0-1, higher = more consistent)
confusion_score: Money Pump Index (0-1, higher = more exploitable)
"""
is_consistent: bool
integrity_score: float
confusion_score: float
[docs]
def summary(self) -> str:
"""Return human-readable audit summary report."""
m = ResultSummaryMixin
lines = [m._format_header("BEHAVIORAL AUDIT REPORT")]
# Overall status
status = m._format_status(self.is_consistent, "PASS", "FAIL")
lines.append(f"\nOverall Status: {status}")
# Core metrics
lines.append(m._format_section("Core Metrics"))
lines.append(m._format_metric("Consistent (GARP)", self.is_consistent))
lines.append(m._format_metric("Integrity Score (AEI)", self.integrity_score))
lines.append(m._format_metric("Confusion Score (MPI)", self.confusion_score))
# Interpretation
lines.append(m._format_section("Interpretation"))
# Integrity interpretation
integrity_interp = m._format_interpretation(self.integrity_score, "efficiency")
lines.append(f" Integrity: {integrity_interp}")
# Confusion interpretation (note: score() inverts MPI)
confusion_interp = m._format_interpretation(1.0 - self.confusion_score, "mpi")
lines.append(f" Confusion: {confusion_interp}")
# Overall recommendation
lines.append(m._format_section("Recommendation"))
if self.is_consistent and self.integrity_score >= 0.95:
lines.append(" Behavior is highly consistent with utility maximization.")
lines.append(" User signal is clean and reliable.")
elif self.integrity_score >= 0.85:
lines.append(" Behavior shows minor inconsistencies.")
lines.append(" User signal is generally reliable with some noise.")
elif self.integrity_score >= 0.70:
lines.append(" Behavior shows moderate inconsistencies.")
lines.append(" Consider investigating specific violations.")
else:
lines.append(" Behavior shows significant inconsistencies.")
lines.append(" User may be confused, a bot, or shared account.")
return "\n".join(lines)
[docs]
def score(self) -> float:
"""Return scikit-learn style score in [0, 1]. Higher is better.
Returns the average of integrity score and (1 - confusion score).
"""
return (self.integrity_score + (1.0 - min(1.0, self.confusion_score))) / 2.0
def __repr__(self) -> str:
status = "PASS" if self.is_consistent else "FAIL"
return (
f"AuditReport({status}, "
f"integrity={self.integrity_score:.2f}, "
f"confusion={self.confusion_score:.2f})"
)
[docs]
class BehavioralAuditor:
"""
Validates behavioral consistency in user action logs.
BehavioralAuditor is the "linter" for user behavior. It checks if
a user's historical actions are internally consistent with utility
maximization.
Example:
>>> from prefgraph import BehavioralAuditor, BehaviorLog
>>> import numpy as np
>>> # Create behavior log
>>> log = BehaviorLog(
... cost_vectors=np.array([[1.0, 2.0], [2.0, 1.0]]),
... action_vectors=np.array([[3.0, 1.0], [1.0, 3.0]]),
... user_id="user_123"
... )
>>> # Run audit
>>> auditor = BehavioralAuditor()
>>> if auditor.validate_history(log):
... print("User behavior is consistent")
... else:
... print("Inconsistent behavior detected")
>>> # Get detailed scores
>>> score = auditor.get_integrity_score(log)
>>> print(f"Behavioral integrity: {score:.2f}")
Attributes:
precision: Numerical precision for consistency checks (default: 1e-6)
"""
[docs]
def __init__(self, precision: float = 1e-6) -> None:
"""
Initialize the auditor.
Args:
precision: Numerical precision for floating-point comparisons.
Smaller values are more strict.
"""
self.precision = precision
[docs]
def validate_history(self, log: BehaviorLog) -> bool:
"""
Check if user behavior history is internally consistent.
A consistent history means the user's choices don't contradict
each other transitively. Inconsistent behavior suggests:
- Bot (random choices)
- Shared account (multiple users)
- Confused user (bad UX)
Args:
log: BehaviorLog containing user's historical actions
Returns:
True if behavior is consistent, False otherwise
Example:
>>> if auditor.validate_history(user_log):
... trust_level = "high"
... else:
... trust_level = "low"
"""
result = validate_consistency(log, tolerance=self.precision)
return result.is_consistent
[docs]
def get_integrity_score(self, log: BehaviorLog) -> float:
"""
Get behavioral integrity score (0-1).
The integrity score measures how "clean" the behavioral signal is:
- 1.0 = Perfect integrity, fully consistent behavior
- 0.9+ = High integrity, minor noise
- 0.7-0.9 = Moderate integrity, some confusion
- <0.7 = Low integrity, likely bot or multiple users
Args:
log: BehaviorLog containing user's historical actions
Returns:
Float between 0 (chaotic) and 1 (perfectly consistent)
Example:
>>> score = auditor.get_integrity_score(user_log)
>>> if score < 0.85:
... flag_for_manual_review(user_id)
"""
result = compute_integrity_score(log, tolerance=self.precision)
return result.efficiency_index
[docs]
def get_confusion_score(self, log: BehaviorLog) -> float:
"""
Get confusion/exploitability score (0-1).
The confusion score measures how exploitable the user's
inconsistencies are. High confusion indicates:
- User not understanding the options
- Bad UX causing irrational choices
- Possible UI dark patterns
Args:
log: BehaviorLog containing user's historical actions
Returns:
Float between 0 (no confusion) and 1 (highly confused)
Example:
>>> confusion = auditor.get_confusion_score(user_log)
>>> if confusion > 0.15:
... alert_ux_team("User showing high confusion")
"""
result = compute_confusion_metric(log, tolerance=self.precision)
return result.mpi_value
[docs]
def get_consistency_details(self, log: BehaviorLog) -> ConsistencyResult:
"""
Get detailed consistency check results.
Returns the full ConsistencyResult with information about
specific inconsistencies found.
Args:
log: BehaviorLog containing user's historical actions
Returns:
ConsistencyResult with is_consistent, violations, etc.
"""
return validate_consistency(log, tolerance=self.precision)
[docs]
def get_integrity_details(self, log: BehaviorLog) -> IntegrityResult:
"""
Get detailed integrity score results.
Returns the full IntegrityResult with the underlying
consistency check at the computed threshold.
Args:
log: BehaviorLog containing user's historical actions
Returns:
IntegrityResult with integrity_score, waste_fraction, etc.
"""
return compute_integrity_score(log, tolerance=self.precision)
[docs]
def get_confusion_details(self, log: BehaviorLog) -> ConfusionResult:
"""
Get detailed confusion metric results.
Returns the full ConfusionResult with information about
the worst inconsistency cycles.
Args:
log: BehaviorLog containing user's historical actions
Returns:
ConfusionResult with confusion_score, worst_cycle, etc.
"""
return compute_confusion_metric(log, tolerance=self.precision)
[docs]
def full_audit(self, log: BehaviorLog) -> AuditReport:
"""
Run comprehensive behavioral audit.
Computes all core metrics and returns a single report.
Args:
log: BehaviorLog containing user's historical actions
Returns:
AuditReport with consistency, integrity, and confusion scores
Example:
>>> report = auditor.full_audit(user_log)
>>> print(f"Consistent: {report.is_consistent}")
>>> print(f"Integrity: {report.integrity_score:.2f}")
>>> print(f"Confusion: {report.confusion_score:.2f}")
"""
return AuditReport(
is_consistent=self.validate_history(log),
integrity_score=self.get_integrity_score(log),
confusion_score=self.get_confusion_score(log),
)
# =========================================================================
# NEW METHODS - Extended Analysis
# =========================================================================
[docs]
def compute_test_power(
self, log: BehaviorLog, n_simulations: int = 1000
) -> TestPowerResult:
"""
Compute statistical power of the consistency test.
Bronars' Power Index measures whether passing the consistency test
is statistically meaningful. Low power means even random behavior
would pass, making the consistency result uninformative.
Args:
log: BehaviorLog containing user's historical actions
n_simulations: Number of random simulations (default: 1000)
Returns:
TestPowerResult with power_index and is_significant
Example:
>>> result = auditor.compute_test_power(user_log)
>>> if result.power_index < 0.5:
... print("Warning: GARP test has low power for this data")
"""
return compute_test_power(
log, n_simulations=n_simulations, tolerance=self.precision
)
[docs]
def validate_proportional_scaling(
self, log: BehaviorLog
) -> ProportionalScalingResult:
"""
Test if user preferences scale proportionally with budget.
Homothetic preferences mean relative preferences don't change
with budget - useful for demand prediction and user segmentation.
Args:
log: BehaviorLog containing user's historical actions
Returns:
ProportionalScalingResult with is_homothetic and violations
Example:
>>> result = auditor.validate_proportional_scaling(user_log)
>>> if result.is_homothetic:
... print("User preferences scale with budget")
"""
return validate_proportional_scaling(log, tolerance=self.precision)
[docs]
def compute_granular_integrity(
self, log: BehaviorLog, efficiency_threshold: float = 0.9
) -> GranularIntegrityResult:
"""
Get per-observation integrity scores.
Unlike get_integrity_score which gives one global score, this
identifies which specific observations are problematic.
Args:
log: BehaviorLog containing user's historical actions
efficiency_threshold: Threshold below which observations are flagged (default: 0.9)
Returns:
GranularIntegrityResult with efficiency_vector and problematic_observations
Example:
>>> result = auditor.compute_granular_integrity(user_log)
>>> for idx in result.problematic_observations:
... print(f"Investigate observation {idx}")
"""
return compute_granular_integrity(
log, tolerance=self.precision, efficiency_threshold=efficiency_threshold
)
[docs]
def test_income_invariance(self, log: BehaviorLog) -> IncomeInvarianceResult:
"""
Test if user behavior is invariant to income changes.
Quasilinear preferences have constant marginal utility of money,
meaning no income effects on goods choices.
Args:
log: BehaviorLog containing user's historical actions
Returns:
IncomeInvarianceResult with is_quasilinear and violations
Example:
>>> result = auditor.test_income_invariance(user_log)
>>> if result.has_income_effects:
... print("User behavior varies with income")
"""
return test_income_invariance(log, tolerance=self.precision)
[docs]
def test_cross_price_effect(
self, log: BehaviorLog, good_g: int, good_h: int
) -> CrossPriceResult:
"""
Test cross-price relationship between two goods.
Determines if goods are substitutes (price of A up → demand for B up)
or complements (price of A up → demand for B down).
Args:
log: BehaviorLog containing user's historical actions
good_g: Index of first good
good_h: Index of second good
Returns:
CrossPriceResult with relationship classification
Example:
>>> result = auditor.test_cross_price_effect(user_log, 0, 1)
>>> print(f"Goods 0 and 1 are {result.relationship}")
"""
return test_cross_price_effect(
log, good_g=good_g, good_h=good_h, tolerance=self.precision
)
# =========================================================================
# 2024 SURVEY ALGORITHMS
# =========================================================================
[docs]
def validate_smooth_preferences(self, log: BehaviorLog) -> SmoothPreferencesResult:
"""
Test if user preferences are smooth (differentiable).
Smooth preferences enable demand function derivatives for
price sensitivity analysis. Requires both SARP (no indifferent
cycles) and price-quantity uniqueness.
Args:
log: BehaviorLog containing user's historical actions
Returns:
SmoothPreferencesResult with differentiability status
Example:
>>> result = auditor.validate_smooth_preferences(user_log)
>>> if result.is_differentiable:
... print("Preferences are smooth - can compute price elasticities")
"""
return validate_smooth_preferences(log, tolerance=self.precision)
[docs]
def validate_strict_consistency(self, log: BehaviorLog) -> StrictConsistencyResult:
"""
Test strict behavioral consistency (more lenient than full check).
Tests only strict preference cycles. Passes if violations are
only due to weak preferences (indifference). Useful for
identifying "approximately rational" behavior.
Args:
log: BehaviorLog containing user's historical actions
Returns:
StrictConsistencyResult with consistency status
Example:
>>> result = auditor.validate_strict_consistency(user_log)
>>> if result.strict_violations_only:
... print("GARP fails but only due to indifference")
"""
return validate_strict_consistency(log, tolerance=self.precision)
[docs]
def validate_price_preferences(self, log: BehaviorLog) -> PricePreferencesResult:
"""
Test if user has consistent price preferences.
The dual of consistency validation - tests if the user prefers
situations where their desired items are cheaper. Useful for
understanding price sensitivity.
Args:
log: BehaviorLog containing user's historical actions
Returns:
PricePreferencesResult with price preference consistency
Example:
>>> result = auditor.validate_price_preferences(user_log)
>>> if result.prefers_lower_prices:
... print("User consistently seeks lower prices")
"""
return validate_price_preferences(log, tolerance=self.precision)
# =========================================================================
# MENU-BASED CHOICE ANALYSIS (Abstract Choice Theory)
# =========================================================================
# =========================================================================
# UNIFIED SUMMARY
# =========================================================================
[docs]
def summary(self, log: BehaviorLog) -> "BehavioralSummary":
"""
Generate unified summary of all behavioral tests.
Runs GARP, WARP, SARP, AEI, MPI, and Houtman-Maks tests and combines
results into a single, statsmodels-style summary object.
Args:
log: BehaviorLog containing user's historical actions
Returns:
BehavioralSummary with all test results
Example:
>>> auditor = BehavioralAuditor()
>>> summary = auditor.summary(user_log)
>>> print(summary.summary()) # Print formatted report
>>> summary.score() # Get aggregate score
"""
from prefgraph.core.summary import BehavioralSummary
return BehavioralSummary.from_log(log)