Function bodies 261 total

CrossLaggedResultsAnalyzer._load_estimated_results method · python · L116-L129 (14 LOC)

src/analysis/analyze_crosslagged_results.py

    def _load_estimated_results(self, estimated_dir: Path) -> None:
        """Load estimated lag model results."""
        for var_dir in estimated_dir.iterdir():
            if var_dir.is_dir():
                var_name = var_dir.name

                # Load posterior summary
                summary_path = var_dir / "posterior_summary.csv"
                if summary_path.exists():
                    try:
                        summary_df = pd.read_csv(summary_path)
                        self.estimated_results[var_name] = summary_df
                    except Exception as e:
                        print(f"  Error loading {summary_path}: {e}")

CrossLaggedResultsAnalyzer._load_cumulative_results method · python · L131-L144 (14 LOC)

src/analysis/analyze_crosslagged_results.py

    def _load_cumulative_results(self, cumulative_dir: Path) -> None:
        """Load cumulative lag model results."""
        for var_dir in cumulative_dir.iterdir():
            if var_dir.is_dir():
                var_name = var_dir.name

                # Load posterior summary
                summary_path = var_dir / "posterior_summary.csv"
                if summary_path.exists():
                    try:
                        summary_df = pd.read_csv(summary_path)
                        self.cumulative_results[var_name] = summary_df
                    except Exception as e:
                        print(f"  Error loading {summary_path}: {e}")

CrossLaggedResultsAnalyzer.create_summary_report method · python · L146-L223 (78 LOC)

src/analysis/analyze_crosslagged_results.py

    def create_summary_report(self) -> pd.DataFrame:
        """Create comprehensive summary report."""
        print("\n" + "=" * 70)
        print("CREATING SUMMARY REPORT")
        print("=" * 70)

        if self.comparison_df is None or self.comparison_df.empty:
            print("No comparison data available.")
            return pd.DataFrame()

        # Create summary statistics
        summary_rows = []

        for var_name in self.comparison_df['variable'].unique():
            df_var = self.comparison_df[self.comparison_df['variable'] == var_name]

            # Basic statistics
            n_models = len(df_var)
            n_fixed = len(df_var[df_var['model_type'] == 'fixed'])
            n_estimated = len(df_var[df_var['model_type'] == 'estimated'])
            n_cumulative = len(df_var[df_var['model_type'] == 'cumulative'])

            # Effect sizes
            beta_means = df_var['beta_mean'].dropna()
            beta_positive = (beta_means > 0).sum()
            bet

CrossLaggedResultsAnalyzer.create_visualizations method · python · L225-L258 (34 LOC)

src/analysis/analyze_crosslagged_results.py

    def create_visualizations(self) -> None:
        """Create comprehensive visualizations."""
        print("\n" + "=" * 70)
        print("CREATING VISUALIZATIONS")
        print("=" * 70)

        if self.comparison_df is None or self.comparison_df.empty:
            print("No data available for visualizations.")
            return

        # Set style
        plt.style.use('seaborn-v0_8-whitegrid')
        sns.set_palette("husl")

        # Create visualization directory
        viz_dir = self.results_dir / "analysis_visualizations"
        viz_dir.mkdir(exist_ok=True)

        # 1. Effect size comparison across variables
        self._plot_effect_sizes(viz_dir)

        # 2. Lag-effect relationship for fixed models
        self._plot_lag_effects(viz_dir)

        # 3. Model comparison by WAIC
        self._plot_model_comparison(viz_dir)

        # 4. Credible interval widths
        self._plot_ci_widths(viz_dir)

        # 5. Posterior distributions for key parameters
        sel

CrossLaggedResultsAnalyzer._plot_effect_sizes method · python · L260-L322 (63 LOC)

src/analysis/analyze_crosslagged_results.py

    def _plot_effect_sizes(self, viz_dir: Path) -> None:
        """Plot effect sizes across variables and models."""
        df = self.comparison_df

        plt.figure(figsize=(14, 8))

        # Create grouped bar chart
        variables = df['variable'].unique()
        model_types = df['model_type'].unique()

        x_pos = np.arange(len(variables))
        bar_width = 0.8 / len(model_types)

        for i, model_type in enumerate(model_types):
            df_model = df[df['model_type'] == model_type]

            # Align by variable
            beta_means = []
            beta_errors_low = []
            beta_errors_high = []

            for var_name in variables:
                df_var = df_model[df_model['variable'] == var_name]
                if not df_var.empty:
                    # For fixed models, take average across lags
                    if model_type == 'fixed':
                        beta_mean = df_var['beta_mean'].mean()
                        beta_ci_low = df

CrossLaggedResultsAnalyzer._plot_lag_effects method · python · L324-L358 (35 LOC)

src/analysis/analyze_crosslagged_results.py

    def _plot_lag_effects(self, viz_dir: Path) -> None:
        """Plot lag-effect relationship for fixed models."""
        df_fixed = self.comparison_df[self.comparison_df['model_type'] == 'fixed']

        if df_fixed.empty:
            return

        plt.figure(figsize=(12, 8))

        # Plot each variable separately
        variables = df_fixed['variable'].unique()

        for var_name in variables:
            df_var = df_fixed[df_fixed['variable'] == var_name]

            # Sort by lag
            df_var = df_var.sort_values('lag_value')

            # Plot with error bars
            plt.errorbar(df_var['lag_value'], df_var['beta_mean'],
                        yerr=[df_var['beta_mean'] - df_var['beta_ci_low'],
                              df_var['beta_ci_high'] - df_var['beta_mean']],
                        fmt='o-', capsize=5, capthick=2, linewidth=2,
                        label=var_name, alpha=0.8, markersize=8)

        plt.axhline(y=0, color='black', linestyle='--'

CrossLaggedResultsAnalyzer._plot_model_comparison method · python · L360-L401 (42 LOC)

src/analysis/analyze_crosslagged_results.py

    def _plot_model_comparison(self, viz_dir: Path) -> None:
        """Plot model comparison by WAIC."""
        df = self.comparison_df

        if df['waic'].isna().all():
            return

        plt.figure(figsize=(12, 8))

        # Create pivot table for WAIC
        df_waic = df.pivot_table(
            index='variable',
            columns='model_type',
            values='waic',
            aggfunc='first'
        )

        # Plot WAIC as heatmap
        plt.imshow(df_waic.values, cmap='viridis', aspect='auto')
        plt.colorbar(label='WAIC (lower is better)')

        # Add labels
        plt.xticks(range(len(df_waic.columns)), df_waic.columns, rotation=45)
        plt.yticks(range(len(df_waic.index)), df_waic.index)
        plt.xlabel('Model Type', fontsize=12)
        plt.ylabel('Workout Variable', fontsize=12)
        plt.title('Model Comparison by WAIC', fontsize=14, fontweight='bold')

        # Add text values
        for i in range(len(df_waic.index)):

Source: Repobility analyzer · https://repobility.com

CrossLaggedResultsAnalyzer._plot_ci_widths method · python · L403-L426 (24 LOC)

src/analysis/analyze_crosslagged_results.py

    def _plot_ci_widths(self, viz_dir: Path) -> None:
        """Plot credible interval widths."""
        df = self.comparison_df

        # Calculate CI widths
        df['ci_width'] = df['beta_ci_high'] - df['beta_ci_low']

        plt.figure(figsize=(12, 8))

        # Group by variable and model type
        df_grouped = df.groupby(['variable', 'model_type'])['ci_width'].mean().unstack()

        # Plot as grouped bar chart
        df_grouped.plot(kind='bar', figsize=(12, 8))

        plt.xlabel('Workout Variable', fontsize=12)
        plt.ylabel('95% Credible Interval Width', fontsize=12)
        plt.title('Uncertainty in Effect Estimates by Variable and Model', fontsize=14, fontweight='bold')
        plt.legend(title='Model Type')
        plt.grid(True, alpha=0.3, axis='y')
        plt.tight_layout()
        plt.savefig(viz_dir / "ci_widths_comparison.png", dpi=150)
        plt.close()
        print(f"  Saved CI widths plot: {viz_dir / 'ci_widths_comparison.png'}")

CrossLaggedResultsAnalyzer._plot_posterior_distributions method · python · L428-L437 (10 LOC)

src/analysis/analyze_crosslagged_results.py

    def _plot_posterior_distributions(self, viz_dir: Path) -> None:
        """Plot posterior distributions for key parameters."""
        # This would require loading the full posterior samples
        # For now, we'll create a simpler version using summary statistics

        if not self.fixed_results and not self.estimated_results and not self.cumulative_results:
            return

        print("  Note: Full posterior distribution plots require loading posterior samples.")
        print("  Consider running with --load-samples flag when samples are available.")

CrossLaggedResultsAnalyzer.generate_html_report method · python · L439-L547 (109 LOC)

src/analysis/analyze_crosslagged_results.py

    def generate_html_report(self) -> None:
        """Generate HTML report with interactive visualizations."""
        print("\n" + "=" * 70)
        print("GENERATING HTML REPORT")
        print("=" * 70)

        if self.comparison_df is None or self.comparison_df.empty:
            print("No data available for HTML report.")
            return

        # Create HTML report
        report_path = self.results_dir / "analysis_report.html"

        html_content = f"""
        <!DOCTYPE html>
        <html>
        <head>
            <title>Cross-Lagged Model Analysis Report</title>
            <style>
                body {{ font-family: Arial, sans-serif; margin: 40px; }}
                h1 {{ color: #333; border-bottom: 2px solid #333; padding-bottom: 10px; }}
                h2 {{ color: #555; margin-top: 30px; }}
                table {{ border-collapse: collapse; width: 100%; margin: 20px 0; }}
                th, td {{ border: 1px solid #ddd; padding: 12px; text-align: left; }}

CrossLaggedResultsAnalyzer.run_analysis method · python · L549-L581 (33 LOC)

src/analysis/analyze_crosslagged_results.py

    def run_analysis(self, load_individual: bool = False) -> None:
        """Run complete analysis pipeline.

        Args:
            load_individual: Whether to load individual model results (slower).
        """
        print("=" * 70)
        print("CROSS-LAGGED RESULTS ANALYSIS")
        print("=" * 70)
        print(f"Results directory: {self.results_dir}")
        print("=" * 70)

        # Load comparison tables
        self.load_comparison_tables()

        # Load individual results if requested
        if load_individual:
            self.load_individual_results()

        # Create summary report
        summary_df = self.create_summary_report()

        # Create visualizations
        self.create_visualizations()

        # Generate HTML report
        self.generate_html_report()

        print("\n" + "=" * 70)
        print("ANALYSIS COMPLETE")
        print("=" * 70)
        print(f"Results saved to: {self.results_dir}")
        print("=" * 70)

main function · python · L584-L607 (24 LOC)

src/analysis/analyze_crosslagged_results.py

def main():
    parser = argparse.ArgumentParser(
        description="Analyze and visualize cross-lagged model results",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    parser.add_argument(
        "--results-dir",
        type=str,
        default="output/full_comparison",
        help="Directory containing cross-lagged model results"
    )

    parser.add_argument(
        "--load-individual",
        action="store_true",
        help="Load individual model results (slower but more detailed)"
    )

    args = parser.parse_args()

    # Create and run analyzer
    analyzer = CrossLaggedResultsAnalyzer(args.results_dir)
    analyzer.run_analysis(load_individual=args.load_individual)

HealthCrossLaggedAnalyzer.__init__ method · python · L69-L144 (76 LOC)

src/analysis/analyze_health_crosslagged.py

    def __init__(
        self,
        workout_vars: List[str],
        health_metrics: Optional[List[str]] = None,
        categories: Optional[List[str]] = None,
        data_dir: str = "data",
        output_dir: str = "output/health_crosslagged",
        fixed_lags: List[float] = [0, 1, 2, 3, 7],
        estimated_lag_prior_mean: float = 3.0,
        estimated_lag_prior_sd: float = 2.0,
        cumulative_window: int = 7,
        cumulative_step: float = 1.0,
        chains: int = 4,
        iter_warmup: int = 500,
        iter_sampling: int = 500,
        use_sparse: bool = True,
        n_inducing_points: int = 50,
        skip_plots: bool = False,
        force_refit: bool = False,
        no_cache: bool = False,
        max_metrics: int = 10,
    ):
        """Initialize analyzer with configuration.

        Args:
            workout_vars: List of workout/activity variables to analyze.
            health_metrics: Specific health metrics to analyze (if None, uses categories).

HealthCrossLaggedAnalyzer.load_data method · python · L146-L198 (53 LOC)

src/analysis/analyze_health_crosslagged.py

    def load_data(self) -> None:
        """Load all required data."""
        print("\n" + "=" * 70)
        print("LOADING DATA")
        print("=" * 70)

        # Load weight data
        print("\nLoading weight data...")
        self.weight_df = load_weight_data(self.data_dir)
        print(f"  Loaded {len(self.weight_df)} weight measurements")

        # Load health metrics
        print("\nLoading health metrics...")
        self.health_df = load_combined_health_data(self.data_dir)
        print(f"  Loaded {len(self.health_df)} days of health data")
        print(f"  Available metrics: {len(self.health_df.columns)}")

        # Get available metrics
        self.available_metrics = get_available_health_metrics()

        # Select health metrics to analyze
        self.selected_metrics = self._select_health_metrics()
        print(f"\nSelected {len(self.selected_metrics)} health metrics for analysis:")
        for metric in self.selected_metrics:
            print(f"  - {metric}"

HealthCrossLaggedAnalyzer._select_health_metrics method · python · L200-L233 (34 LOC)

src/analysis/analyze_health_crosslagged.py

    def _select_health_metrics(self) -> List[str]:
        """Select health metrics to analyze based on user input."""
        all_metrics = []

        # Get all metrics from categories if specified
        if self.categories:
            for category in self.categories:
                if category in self.available_metrics.get('categories', {}):
                    all_metrics.extend(self.available_metrics['categories'][category])

        # Add specific metrics if provided
        if self.health_metrics:
            all_metrics.extend(self.health_metrics)

        # If neither categories nor specific metrics provided, use all
        if not all_metrics:
            for category_metrics in self.available_metrics.get('categories', {}).values():
                all_metrics.extend(category_metrics)

        # Remove duplicates and limit to max_metrics
        unique_metrics = list(dict.fromkeys(all_metrics))

        # Filter out non-numeric columns and date columns
        numeric_metr

Want this analysis on your repo? https://repobility.com/scan/

HealthCrossLaggedAnalyzer._merge_data method · python · L235-L280 (46 LOC)

src/analysis/analyze_health_crosslagged.py

    def _merge_data(self) -> None:
        """Merge weight, workout, and health data."""
        print("\nMerging data...")

        # Start with weight data
        self.merged_df = self.weight_df.copy()

        # Add workout aggregates for each variable
        for workout_var in self.workout_vars:
            if workout_var in self.workout_data:
                # Get workout data for this variable
                workout_df = self.workout_data[workout_var].copy()
                # Rename 'workout_count' to the variable name
                workout_df = workout_df.rename(columns={'workout_count': workout_var})

                # Merge workout data
                self.merged_df = pd.merge(
                    self.merged_df,
                    workout_df[['date', workout_var]],
                    on='date',
                    how='left'
                )
                # Fill missing with 0
                self.merged_df[workout_var] = self.merged_df[workout_var].fillna(0)

HealthCrossLaggedAnalyzer.run_analysis method · python · L282-L309 (28 LOC)

src/analysis/analyze_health_crosslagged.py

    def run_analysis(self) -> None:
        """Run cross-lagged analysis for all workout-health metric pairs."""
        print("\n" + "=" * 70)
        print("RUNNING CROSS-LAGGED ANALYSIS")
        print("=" * 70)

        total_analyses = len(self.workout_vars) * len(self.selected_metrics)
        print(f"\nTotal analyses to run: {total_analyses}")
        print(f"Workout variables: {self.workout_vars}")
        print(f"Health metrics: {self.selected_metrics}")

        analysis_count = 0

        for workout_var in self.workout_vars:
            for health_metric in self.selected_metrics:
                analysis_count += 1
                print(f"\n{'='*60}")
                print(f"Analysis {analysis_count}/{total_analyses}: {workout_var} → {health_metric}")
                print(f"{'='*60}")

                try:
                    result = self._analyze_pair(workout_var, health_metric)
                    self.results[f"{workout_var}_{health_metric}"] = result
                e

HealthCrossLaggedAnalyzer._analyze_pair method · python · L311-L406 (96 LOC)

src/analysis/analyze_health_crosslagged.py

    def _analyze_pair(self, workout_var: str, health_metric: str) -> Dict[str, Any]:
        """Analyze cross-lagged effect for a single workout-health metric pair."""

        # Prepare data for this pair
        pair_df = self.merged_df[['date', workout_var, health_metric]].copy()
        pair_df = pair_df.dropna(subset=[workout_var, health_metric])

        if len(pair_df) < 50:
            raise ValueError(f"Insufficient data: only {len(pair_df)} complete observations")

        print(f"  Data: {len(pair_df)} complete observations")
        print(f"  Workout days: {(pair_df[workout_var] > 0).sum()}")
        print(f"  Health metric range: [{pair_df[health_metric].min():.2f}, {pair_df[health_metric].max():.2f}]")

        # Create output directory for this pair
        pair_dir = self.output_dir / f"{workout_var}_{health_metric}"
        pair_dir.mkdir(parents=True, exist_ok=True)

        # Run fixed lag comparison
        print(f"  Running fixed lag comparison...")
        fixed_r

HealthCrossLaggedAnalyzer.generate_summary_report method · python · L408-L485 (78 LOC)

src/analysis/analyze_health_crosslagged.py

    def generate_summary_report(self) -> None:
        """Generate summary report of all analyses."""
        print("\n" + "=" * 70)
        print("GENERATING SUMMARY REPORT")
        print("=" * 70)

        if not self.results:
            print("No results to summarize")
            return

        # Create summary dataframe
        summary_rows = []

        for key, result in self.results.items():
            # Extract key metrics
            workout_var = result['workout_var']
            health_metric = result['health_metric']

            # Get effect sizes from different models
            fixed_betas = []
            if result['fixed_results']:
                for lag_result in result['fixed_results'].values():
                    if 'beta_mean' in lag_result:
                        fixed_betas.append(lag_result['beta_mean'])

            estimated_beta = None
            if result['estimated_results'] and 'beta_mean' in result['estimated_results']:
                estimated

HealthCrossLaggedAnalyzer._generate_html_report method · python · L487-L593 (107 LOC)

src/analysis/analyze_health_crosslagged.py

    def _generate_html_report(self, summary_df: pd.DataFrame) -> None:
        """Generate HTML report of findings."""
        html_content = f"""
        <!DOCTYPE html>
        <html>
        <head>
            <title>Health Cross-Lagged Analysis Report</title>
            <style>
                body {{ font-family: Arial, sans-serif; margin: 40px; }}
                h1 {{ color: #333; border-bottom: 2px solid #333; padding-bottom: 10px; }}
                h2 {{ color: #555; margin-top: 30px; }}
                table {{ border-collapse: collapse; width: 100%; margin: 20px 0; }}
                th, td {{ border: 1px solid #ddd; padding: 12px; text-align: left; }}
                th {{ background-color: #f2f2f2; font-weight: bold; }}
                tr:nth-child(even) {{ background-color: #f9f9f9; }}
                .summary {{ background-color: #e8f4f8; padding: 15px; border-radius: 5px; margin: 20px 0; }}
                .positive {{ color: green; font-weight: bold; }}

HealthCrossLaggedAnalyzer._print_top_findings method · python · L595-L635 (41 LOC)

src/analysis/analyze_health_crosslagged.py

    def _print_top_findings(self, summary_df: pd.DataFrame) -> None:
        """Print top findings from analysis."""
        print("\n" + "=" * 70)
        print("TOP FINDINGS")
        print("=" * 70)

        if summary_df.empty:
            print("No results to analyze")
            return

        # Find strongest positive effects
        positive_effects = summary_df[summary_df['beta_mean'] > 0.1].copy()
        if not positive_effects.empty:
            positive_effects = positive_effects.sort_values('beta_mean', ascending=False)
            print("\nStrongest Positive Effects (workouts increase health metric):")
            for _, row in positive_effects.head(5).iterrows():
                print(f"  {row['workout_var']} → {row['health_metric']}: β = {row['beta_mean']:.3f} ± {row['beta_std']:.3f}")

        # Find strongest negative effects
        negative_effects = summary_df[summary_df['beta_mean'] < -0.1].copy()
        if not negative_effects.empty:
            negative_effe

main function · python · L638-L805 (168 LOC)

src/analysis/analyze_health_crosslagged.py

def main():
    """Main entry point."""
    parser = argparse.ArgumentParser(
        description="Analyze cross-lagged effects of workouts on health metrics"
    )

    # Required arguments
    parser.add_argument(
        "--workout-vars",
        type=str,
        required=True,
        help="Comma-separated list of workout variables to analyze"
    )

    # Health metric selection
    parser.add_argument(
        "--health-metrics",
        type=str,
        default="",
        help="Comma-separated list of specific health metrics to analyze"
    )
    parser.add_argument(
        "--categories",
        type=str,
        default="",
        help="Comma-separated list of health metric categories (sleep,stress,heart,activity,respiration,body_battery)"
    )
    parser.add_argument(
        "--max-metrics",
        type=int,
        default=10,
        help="Maximum number of health metrics to analyze (default: 10)"
    )

    # Model configuration
    parser.add_argument(
        "-

main function · python · L13-L109 (97 LOC)

src/analysis/bivariate_evaluation.py

def main():
    output_dir = Path("output/bivariate")
    output_dir.mkdir(parents=True, exist_ok=True)

    print("Fitting bivariate model (weight + resting heart rate)...")
    fit, idata, df, stan_data = fit_bivariate_model(
        chains=2,
        iter_warmup=100,
        iter_sampling=100,
        cache=False,
        force_refit=True,
        use_sparse=True,
        n_inducing_points=30,
    )

    print("\n=== Model Summary ===")
    print(f"Observations: {len(df)}")
    print(f"Date range: {df['date'].min().date()} to {df['date'].max().date()}")

    # Extract posterior samples
    posterior = idata.posterior
    print("\nPosterior variables:", list(posterior.data_vars.keys()))

    # Correlation between latent processes
    if 'correlation' in posterior:
        corr_samples = posterior['correlation'].values.flatten()
        print("\nLatent correlation posterior:")
        print(f"  Mean: {np.mean(corr_samples):.3f}")
        print(f"  SD: {np.std(corr_samples):.3f}")

Citation: Repobility (2026). State of AI-Generated Code. https://repobility.com/research/

CrossLaggedModelComparison.__init__ method · python · L76-L149 (74 LOC)

src/analysis/compare_crosslagged_models.py

    def __init__(
        self,
        workout_vars: List[str],
        data_dir: str = "data",
        output_dir: str = "output/crosslagged_comparison",
        fixed_lags: List[float] = [0, 1, 2, 3, 7],
        estimated_lag_prior_mean: float = 3.0,
        estimated_lag_prior_sd: float = 2.0,
        cumulative_window: int = 7,
        cumulative_step: float = 1.0,
        chains: int = 4,
        iter_warmup: int = 500,
        iter_sampling: int = 500,
        use_sparse: bool = True,
        n_inducing_points: int = 50,
        skip_plots: bool = False,
        force_refit: bool = False,
        no_cache: bool = False,
    ):
        """Initialize comparison with configuration.

        Args:
            workout_vars: List of workout/activity variables to analyze.
            data_dir: Path to data directory.
            output_dir: Directory for output files.
            fixed_lags: List of lag values for fixed lag model (days).
            estimated_lag_prior_mean: Prior mean

CrossLaggedModelComparison.load_data method · python · L151-L184 (34 LOC)

src/analysis/compare_crosslagged_models.py

    def load_data(self) -> None:
        """Load weight and workout data for all variables."""
        print("\n" + "=" * 70)
        print("LOADING DATA")
        print("=" * 70)

        # Load weight data (common for all variables)
        print("\n1. Loading weight data...")
        self.df_weight = load_weight_data(self.data_dir)
        print(f"   Weight measurements: {len(self.df_weight)}")

        # Load workout data for each variable
        for var_name in self.workout_vars:
            print(f"\n2. Loading workout data for '{var_name}'...")
            df_workouts_raw = load_workout_data(
                data_dir=self.data_dir,
                activity_type=var_name,
                include_exercise_details=False,
            )
            print(f"   Raw workout records: {len(df_workouts_raw)}")

            # Aggregate workouts to daily count
            df_workouts_agg = prepare_workout_aggregates(
                df_workouts_raw,
                aggregation="daily",

CrossLaggedModelComparison.run_fixed_lag_analysis method · python · L186-L241 (56 LOC)

src/analysis/compare_crosslagged_models.py

    def run_fixed_lag_analysis(self) -> None:
        """Run fixed lag model comparison."""
        print("\n" + "=" * 70)
        print("FIXED LAG MODEL ANALYSIS")
        print("=" * 70)

        # Create args object for run_fixed_lag_comparison
        class Args:
            pass

        args = Args()
        args.data_dir = str(self.data_dir)
        args.no_sparse = not self.use_sparse
        args.n_inducing_points = self.n_inducing_points
        args.chains = self.chains
        args.iter_warmup = self.iter_warmup
        args.iter_sampling = self.iter_sampling
        args.no_cache = self.no_cache
        args.force_refit = self.force_refit
        args.include_prediction_grid = False
        args.prediction_step_days = 1.0
        args.skip_plots = self.skip_plots

        # Run fixed lag analysis using demo_bivariate function
        self.fixed_results = run_fixed_lag_comparison(
            args=args,
            workout_vars=self.workout_vars,
            lag_values=self

CrossLaggedModelComparison.run_estimated_lag_analysis method · python · L243-L301 (59 LOC)

src/analysis/compare_crosslagged_models.py

    def run_estimated_lag_analysis(self) -> None:
        """Run estimated lag model analysis."""
        print("\n" + "=" * 70)
        print("ESTIMATED LAG MODEL ANALYSIS")
        print("=" * 70)

        # Create args object for run_estimated_lag_analysis
        class Args:
            pass

        args = Args()
        args.data_dir = str(self.data_dir)
        args.no_sparse = not self.use_sparse
        args.n_inducing_points = self.n_inducing_points
        args.chains = self.chains
        args.iter_warmup = self.iter_warmup
        args.iter_sampling = self.iter_sampling
        args.no_cache = self.no_cache
        args.force_refit = self.force_refit
        args.include_prediction_grid = False
        args.prediction_step_days = 1.0
        args.skip_plots = self.skip_plots
        args.lag_prior_mean = self.estimated_lag_prior_mean
        args.lag_prior_sd = self.estimated_lag_prior_sd

        # Run estimated lag analysis using demo_bivariate function
        self.esti

CrossLaggedModelComparison.run_cumulative_lag_analysis method · python · L303-L359 (57 LOC)

src/analysis/compare_crosslagged_models.py

    def run_cumulative_lag_analysis(self) -> None:
        """Run cumulative lag model analysis."""
        print("\n" + "=" * 70)
        print("CUMULATIVE LAG MODEL ANALYSIS")
        print("=" * 70)

        # Create args object for run_cumulative_lag_analysis
        class Args:
            pass

        args = Args()
        args.data_dir = str(self.data_dir)
        args.no_sparse = not self.use_sparse
        args.n_inducing_points = self.n_inducing_points
        args.chains = self.chains
        args.iter_warmup = self.iter_warmup
        args.iter_sampling = self.iter_sampling
        args.no_cache = self.no_cache
        args.force_refit = self.force_refit
        args.include_prediction_grid = False
        args.prediction_step_days = 1.0
        args.skip_plots = self.skip_plots
        args.lag_window = self.cumulative_window
        args.lag_step = self.cumulative_step

        # Run cumulative lag analysis using demo_bivariate function
        self.cumulative_results =

CrossLaggedModelComparison.create_comparison_tables method · python · L361-L413 (53 LOC)

src/analysis/compare_crosslagged_models.py

    def create_comparison_tables(self) -> pd.DataFrame:
        """Create comparison tables from all model results."""
        print("\n" + "=" * 70)
        print("CREATING COMPARISON TABLES")
        print("=" * 70)

        if not self.comparison_results:
            print("No results to compare.")
            return pd.DataFrame()

        # Create DataFrame
        df = pd.DataFrame(self.comparison_results)

        # Save raw comparison table
        df.to_csv(self.output_dir / "model_comparison_raw.csv", index=False)
        print(f"Saved raw comparison table: {self.output_dir / 'model_comparison_raw.csv'}")

        # Create summary table (best model per variable by WAIC)
        summary_rows = []
        for var_name in df['variable'].unique():
            df_var = df[df['variable'] == var_name]

            # Find best model by WAIC (lowest)
            if df_var['waic'].notna().any():
                best_idx = df_var['waic'].idxmin()
                best_row = df_var.loc[be

CrossLaggedModelComparison.create_comparison_plots method · python · L415-L541 (127 LOC)

src/analysis/compare_crosslagged_models.py

    def create_comparison_plots(self, df: pd.DataFrame) -> None:
        """Create comparison visualizations."""
        if self.skip_plots or df.empty:
            return

        print("\n" + "=" * 70)
        print("CREATING COMPARISON VISUALIZATIONS")
        print("=" * 70)

        # Set style
        plt.style.use('seaborn-v0_8-whitegrid')
        sns.set_palette("husl")

        # 1. β comparison across models for each variable
        for var_name in df['variable'].unique():
            df_var = df[df['variable'] == var_name]

            plt.figure(figsize=(12, 8))

            # Separate fixed lag points
            df_fixed = df_var[df_var['model_type'] == 'fixed']
            df_other = df_var[df_var['model_type'] != 'fixed']

            # Plot fixed lag points with line
            if not df_fixed.empty:
                # Sort by lag
                df_fixed = df_fixed.sort_values('lag_value')
                plt.errorbar(
                    df_fixed['lag_value'],

CrossLaggedModelComparison.generate_report method · python · L543-L658 (116 LOC)

src/analysis/compare_crosslagged_models.py

    def generate_report(self, df: pd.DataFrame) -> None:
        """Generate comprehensive markdown report."""
        report_path = self.output_dir / "crosslagged_comparison_report.md"

        with open(report_path, 'w') as f:
            f.write("# Cross-Lagged Model Comparison Report\n\n")
            f.write(f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write(f"**Workout variables**: {', '.join(self.workout_vars)}\n")
            f.write(f"**Weight observations**: {len(self.df_weight)}\n\n")

            f.write("## Model Configuration\n\n")
            f.write("### Fixed Lag Model\n")
            f.write(f"- Lag values: {self.fixed_lags} days\n")
            f.write(f"- MCMC: {self.chains} chains, {self.iter_warmup} warmup, {self.iter_sampling} sampling\n\n")

            f.write("### Estimated Lag Model\n")
            f.write(f"- Lag prior: N({self.estimated_lag_prior_mean}, {self.estimated_lag_prior_sd}²) days\n")
            f.write(f"- MCM

Generated by Repobility's multi-pass static-analysis pipeline (https://repobility.com)

CrossLaggedModelComparison.run_comparison method · python · L660-L707 (48 LOC)

src/analysis/compare_crosslagged_models.py

    def run_comparison(self) -> None:
        """Run full comparison pipeline."""
        print("\n" + "=" * 70)
        print("CROSS-LAGGED MODEL COMPARISON")
        print("=" * 70)
        print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        print(f"Workout variables: {', '.join(self.workout_vars)}")
        print(f"Output directory: {self.output_dir}")
        print(f"MCMC: {self.chains} chains, {self.iter_warmup} warmup, {self.iter_sampling} sampling")
        print(f"Sparse GP: {self.use_sparse} ({self.n_inducing_points} inducing points)")
        print(f"Fixed lags: {self.fixed_lags}")
        print(f"Estimated lag prior: N({self.estimated_lag_prior_mean}, {self.estimated_lag_prior_sd}²) days")
        print(f"Cumulative window: {self.cumulative_window} days (step: {self.cumulative_step} days)")
        print("=" * 70)

        # Load data
        self.load_data()

        if not self.workout_data:
            print("ERROR: No workout data loaded. Exiting.")

main function · python · L710-L861 (152 LOC)

src/analysis/compare_crosslagged_models.py

def main():
    parser = argparse.ArgumentParser(
        description="Compare different cross-lagged modeling approaches",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    # Required arguments
    parser.add_argument(
        "--workout-vars",
        type=str,
        required=True,
        help="Comma-separated list of workout/activity variables to analyze. "
             "Examples: 'strength_training,walking,cycling'"
    )

    # Model configurations
    parser.add_argument(
        "--fixed-lags",
        type=str,
        default="0,1,2,3,7",
        help="Comma-separated list of lag values for fixed lag model (days)"
    )

    parser.add_argument(
        "--estimated-lag-prior-mean",
        type=float,
        default=3.0,
        help="Prior mean for estimated lag parameter (days)"
    )

    parser.add_argument(
        "--estimated-lag-prior-sd",
        type=float,
        default=2.0,
        help="Prior standard deviation for estimated lag par

prepare_weight_for_simple_gp function · python · L14-L25 (12 LOC)

src/analysis/correlate_independent_gps.py

def prepare_weight_for_simple_gp(df_weight: pd.DataFrame) -> pd.DataFrame:
    """Prepare weight data for simple GP (aggregate to daily if multiple measurements)."""
    # If multiple measurements per day, aggregate to daily mean
    df = df_weight.copy()
    df["date"] = df["timestamp"].dt.date
    df["date"] = pd.to_datetime(df["date"])

    # Group by date
    daily = df.groupby("date")["weight_lbs"].agg(["mean", "std", "count"]).reset_index()
    daily = daily.rename(columns={"mean": "weight", "date": "timestamp"})

    return daily

align_to_common_time function · python · L28-L48 (21 LOC)

src/analysis/correlate_independent_gps.py

def align_to_common_time(df1, time_col1, df2, time_col2):
    """Align two time series to a common global time zero.

    Returns:
        df1_aligned, df2_aligned, global_t_min, global_t_max
        Each aligned DataFrame has new column 'days_since_global'.
    """
    # Convert to datetime if not already
    t1 = pd.to_datetime(df1[time_col1])
    t2 = pd.to_datetime(df2[time_col2])

    global_t_min = min(t1.min(), t2.min())
    global_t_max = max(t1.max(), t2.max())

    df1_aligned = df1.copy()
    df2_aligned = df2.copy()

    df1_aligned['days_since_global'] = (t1 - global_t_min).dt.days
    df2_aligned['days_since_global'] = (t2 - global_t_min).dt.days

    return df1_aligned, df2_aligned, global_t_min, global_t_max

sample_latent_functions_on_grid function · python · L51-L120 (70 LOC)

src/analysis/correlate_independent_gps.py

def sample_latent_functions_on_grid(
    idata_weight,
    idata_other,
    stan_data_weight,
    stan_data_other,
    n_samples: int = 100,
) -> tuple:
    """Sample latent functions on common time grid using f_pred.

    Requires that both GPs were fitted with same prediction grid (t_pred).

    Returns:
        Tuple of (t_grid_days, f_weight_samples, f_other_samples)
        where each samples matrix is n_samples x len(t_grid_days)
        t_grid_days is absolute days (since global time zero).
    """
    # Check if f_pred exists in posterior_predictive or posterior
    f_pred_weight = None
    f_pred_other = None

    if "f_pred" in idata_weight.posterior_predictive:
        f_pred_weight = idata_weight.posterior_predictive["f_pred"]
    elif "f_pred" in idata_weight.posterior:
        f_pred_weight = idata_weight.posterior["f_pred"]

    if "f_pred" in idata_other.posterior_predictive:
        f_pred_other = idata_other.posterior_predictive["f_pred"]
    elif "f_pred" in idata_ot

compute_correlation_from_samples function · python · L123-L152 (30 LOC)

src/analysis/correlate_independent_gps.py

def compute_correlation_from_samples(
    f_weight_samples: np.ndarray,
    f_other_samples: np.ndarray,
) -> dict:
    """Compute correlation statistics from latent function samples."""
    n_samples = f_weight_samples.shape[0]
    correlations = np.zeros(n_samples)

    for i in range(n_samples):
        # Pearson correlation between two vectors
        if np.std(f_weight_samples[i]) > 1e-10 and np.std(f_other_samples[i]) > 1e-10:
            corr = np.corrcoef(f_weight_samples[i], f_other_samples[i])[0, 1]
            correlations[i] = corr
        else:
            correlations[i] = np.nan

    # Remove NaN
    correlations = correlations[~np.isnan(correlations)]

    if len(correlations) == 0:
        return None

    return {
        "mean": np.mean(correlations),
        "std": np.std(correlations),
        "2.5%": np.percentile(correlations, 2.5),
        "50%": np.percentile(correlations, 50),
        "97.5%": np.percentile(correlations, 97.5),
        "samples": correlations,

analyze_weight_vo2max function · python · L155-L306 (152 LOC)

src/analysis/correlate_independent_gps.py

def analyze_weight_vo2max():
    """Analyze correlation between weight and VO2 max using independent GPs with common prediction grid."""
    output_dir = Path("output/independent_gp_correlation")
    output_dir.mkdir(parents=True, exist_ok=True)

    print("=== Weight vs VO2 Max Correlation Analysis ===")

    # Load data
    print("\n1. Loading data...")
    df_weight_raw = load_weight_data()
    df_vo2max = load_vo2max_data()

    print(f"   Weight: {len(df_weight_raw)} measurements")
    print(f"   VO2 max: {len(df_vo2max)} measurements")

    # Prepare weight data (aggregate to daily)
    df_weight_daily = prepare_weight_for_simple_gp(df_weight_raw)
    print(f"   Weight (daily): {len(df_weight_daily)} days")

    # Align both datasets to common global time
    print("\n2. Aligning to common global time...")
    df_weight_aligned, df_vo2_aligned, global_t_min, global_t_max = align_to_common_time(
        df_weight_daily, "timestamp", df_vo2max, "date"
    )
    print(f"   Global ti

plot_correlation_matrix function · python · L15-L45 (31 LOC)

src/analysis/explore_correlations.py

def plot_correlation_matrix(df: pd.DataFrame, output_dir: Path = Path("output/correlations")):
    """Plot correlation matrix between weight and other variables."""
    # Select numeric columns of interest
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    # Filter to relevant metrics (exclude derived columns)
    exclude = ["weight_day_of_week", "weight_day_of_year", "weight_variable"]
    metric_cols = [c for c in numeric_cols if c not in exclude and not c.startswith("_")]
    # Keep top 20 most relevant (prioritize weight columns and key metrics)
    weight_cols = [c for c in metric_cols if "weight" in c]
    other_cols = [c for c in metric_cols if "weight" not in c]
    # Select up to 15 other columns with most non-missing values
    other_cols = sorted(other_cols, key=lambda c: df[c].notnull().sum(), reverse=True)[:15]
    selected_cols = weight_cols + other_cols

    corr_df = df[selected_cols].corr()

    # Create figure
    plt.figure(figsize=(14, 12)

Source: Repobility analyzer · https://repobility.com

plot_scatter_pairs function · python · L48-L102 (55 LOC)

src/analysis/explore_correlations.py

def plot_scatter_pairs(df: pd.DataFrame, output_dir: Path = Path("output/correlations")):
    """Create scatter plots of weight vs key metrics."""
    key_metrics = [
        "resting_heart_rate",
        "total_steps",
        "active_kilocalories",
        "avg_stress_level",
        "moderate_intensity_minutes",
        "vigorous_intensity_minutes",
        "highly_active_seconds",
        "min_heart_rate",
        "max_heart_rate",
    ]
    # Filter to available columns
    key_metrics = [m for m in key_metrics if m in df.columns]

    # Create subplot grid
    n_cols = 3
    n_rows = (len(key_metrics) + n_cols - 1) // n_cols
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(5 * n_cols, 4 * n_rows))
    axes = axes.flatten() if n_rows > 1 else [axes]

    for idx, metric in enumerate(key_metrics):
        ax = axes[idx]
        # Drop NA pairs
        subset = df[["weight_mean", metric]].dropna()
        if len(subset) < 2:
            ax.text(0.5, 0.5, f"No data for {metric}"

plot_time_series function · python · L105-L135 (31 LOC)

src/analysis/explore_correlations.py

def plot_time_series(df: pd.DataFrame, output_dir: Path = Path("output/correlations")):
    """Plot time series of weight and key metrics."""
    # Select a few key metrics to plot alongside weight
    key_metrics = ["resting_heart_rate", "total_steps", "active_kilocalories", "avg_stress_level"]
    key_metrics = [m for m in key_metrics if m in df.columns]

    fig, axes = plt.subplots(len(key_metrics) + 1, 1, figsize=(14, 3 * (len(key_metrics) + 1)), sharex=True)

    # Plot weight
    ax = axes[0]
    ax.plot(df["date"], df["weight_mean"], label="Weight (mean)", color="blue", linewidth=1.5)
    ax.fill_between(df["date"], df["weight_min"], df["weight_max"], alpha=0.2, color="blue", label="Weight range")
    ax.set_ylabel("Weight (lbs)")
    ax.legend(loc="upper left")
    ax.grid(True, alpha=0.3)

    # Plot each metric
    for idx, metric in enumerate(key_metrics, start=1):
        ax = axes[idx]
        ax.plot(df["date"], df[metric], label=metric, color=f"C{idx}", linewidth=1.5)

main function · python · L138-L172 (35 LOC)

src/analysis/explore_correlations.py

def main():
    """Run all exploration plots."""
    output_dir = Path("output/correlations")
    output_dir.mkdir(parents=True, exist_ok=True)

    print("Loading merged data...")
    df = merge_weight_with_daily_metrics()
    print(f"Dataset shape: {df.shape}")
    print(f"Date range: {df['date'].min()} to {df['date'].max()}")

    # Basic statistics
    print("\nWeight statistics:")
    print(df[["weight_mean", "weight_std", "weight_count"]].describe())

    print("\nKey metrics statistics:")
    key = ["resting_heart_rate", "total_steps", "active_kilocalories", "avg_stress_level"]
    for metric in key:
        if metric in df.columns:
            print(f"{metric}: mean={df[metric].mean():.2f}, sd={df[metric].std():.2f}")

    # Generate plots
    print("\nGenerating correlation matrix...")
    plot_correlation_matrix(df, output_dir)

    print("\nGenerating scatter plots...")
    plot_scatter_pairs(df, output_dir)

    print("\nGenerating time series plots...")
    plot_time_serie

HealthWorkoutCorrelationAnalyzer.__init__ method · python · L47-L83 (37 LOC)

src/analysis/explore_health_workout_correlations.py

    def __init__(
        self,
        workout_vars: List[str],
        max_lag: int = 7,
        data_dir: str = "data",
        output_dir: str = "output/health_correlations",
        max_metrics: int = 20,
        min_observations: int = 50,
    ):
        """Initialize analyzer.

        Args:
            workout_vars: List of workout variables to analyze.
            max_lag: Maximum lag to compute correlations for (0-7 days).
            data_dir: Path to data directory.
            output_dir: Directory for output files.
            max_metrics: Maximum number of health metrics to analyze.
            min_observations: Minimum number of observations required.
        """
        self.workout_vars = workout_vars
        self.max_lag = max_lag
        self.data_dir = Path(data_dir)
        self.output_dir = Path(output_dir)
        self.max_metrics = max_metrics
        self.min_observations = min_observations

        # Create output directory
        self.output_dir.mkdir(paren

HealthWorkoutCorrelationAnalyzer.load_data method · python · L85-L130 (46 LOC)

src/analysis/explore_health_workout_correlations.py

    def load_data(self) -> None:
        """Load all required data."""
        print("\n" + "=" * 70)
        print("LOADING DATA")
        print("=" * 70)

        # Load health metrics
        print("\nLoading health metrics...")
        self.health_df = load_combined_health_data(self.data_dir)
        print(f"  Loaded {len(self.health_df)} days of health data")
        print(f"  Available metrics: {len(self.health_df.columns)}")

        # Get available metrics
        self.available_metrics = get_available_health_metrics()

        # Select health metrics to analyze
        self._select_health_metrics()
        print(f"\nSelected {len(self.selected_metrics)} health metrics for analysis:")
        for metric in self.selected_metrics[:10]:  # Show first 10
            print(f"  - {metric}")
        if len(self.selected_metrics) > 10:
            print(f"  ... and {len(self.selected_metrics) - 10} more")

        # Load workout data for each variable
        for var_name in self.worko

HealthWorkoutCorrelationAnalyzer._select_health_metrics method · python · L132-L153 (22 LOC)

src/analysis/explore_health_workout_correlations.py

    def _select_health_metrics(self) -> None:
        """Select health metrics to analyze."""
        # Get all metrics from all categories
        all_metrics = []
        for category_metrics in self.available_metrics.get('categories', {}).values():
            all_metrics.extend(category_metrics)

        # Remove duplicates
        unique_metrics = list(dict.fromkeys(all_metrics))

        # Filter out non-numeric columns and date columns
        numeric_metrics = []
        for metric in unique_metrics:
            if metric in self.health_df.columns:
                if pd.api.types.is_numeric_dtype(self.health_df[metric]):
                    # Check for sufficient non-missing values
                    non_missing = self.health_df[metric].notna().sum()
                    if non_missing >= self.min_observations:
                        numeric_metrics.append(metric)

        # Limit to max_metrics
        self.selected_metrics = numeric_metrics[:self.max_metrics]

HealthWorkoutCorrelationAnalyzer.compute_correlations method · python · L155-L231 (77 LOC)

src/analysis/explore_health_workout_correlations.py

    def compute_correlations(self) -> None:
        """Compute lagged correlations between workouts and health metrics."""
        print("\n" + "=" * 70)
        print("COMPUTING CORRELATIONS")
        print("=" * 70)

        total_analyses = len(self.workout_vars) * len(self.selected_metrics)
        print(f"\nTotal analyses to compute: {total_analyses}")

        for workout_var in self.workout_vars:
            if workout_var not in self.workout_data:
                print(f"\nSkipping {workout_var}: no workout data")
                continue

            print(f"\n{'='*60}")
            print(f"Analyzing: {workout_var}")
            print(f"{'='*60}")

            # Get workout data
            workout_df = self.workout_data[workout_var].copy()
            workout_df = workout_df.rename(columns={'workout_count': 'workout'})

            # Merge with health data
            merged_df = pd.merge(
                self.health_df[['date'] + self.selected_metrics],
                worko

HealthWorkoutCorrelationAnalyzer._compute_correlation method · python · L233-L267 (35 LOC)

src/analysis/explore_health_workout_correlations.py

    def _compute_correlation(
        self,
        df: pd.DataFrame,
        x_col: str,
        y_col: str,
        lag: int = 0
    ) -> Tuple[Optional[float], Optional[float]]:
        """Compute correlation between x and y with optional lag.

        For lag > 0: x(t) correlated with y(t+lag)
        """
        # Create lagged series
        if lag == 0:
            x_series = df[x_col]
            y_series = df[y_col]
        else:
            # Shift y forward by lag days
            x_series = df[x_col].iloc[:-lag] if lag > 0 else df[x_col]
            y_series = df[y_col].iloc[lag:] if lag > 0 else df[y_col]

        # Align series
        aligned_df = pd.DataFrame({
            'x': x_series.reset_index(drop=True),
            'y': y_series.reset_index(drop=True)
        }).dropna()

        if len(aligned_df) < self.min_observations:
            return None, None

        # Compute Pearson correlation
        try:
            corr, p_value = stats.pearsonr(aligned_df['x'],

Want this analysis on your repo? https://repobility.com/scan/

HealthWorkoutCorrelationAnalyzer.generate_summary_report method · python · L269-L328 (60 LOC)

src/analysis/explore_health_workout_correlations.py

    def generate_summary_report(self) -> None:
        """Generate summary report of correlation findings."""
        print("\n" + "=" * 70)
        print("GENERATING SUMMARY REPORT")
        print("=" * 70)

        if not self.correlation_results:
            print("No correlation results to summarize")
            return

        # Create summary dataframe
        summary_rows = []

        for workout_var, health_results in self.correlation_results.items():
            for health_metric, lag_results in health_results.items():
                # Find strongest correlation (absolute value)
                if not lag_results:
                    continue

                strongest = max(
                    lag_results.items(),
                    key=lambda x: abs(x[1]['correlation'])
                )
                lag, result = strongest

                summary_rows.append({
                    'workout_var': workout_var,
                    'health_metric': health_metric,

HealthWorkoutCorrelationAnalyzer._generate_html_report method · python · L330-L479 (150 LOC)

src/analysis/explore_health_workout_correlations.py

    def _generate_html_report(self, summary_df: pd.DataFrame) -> None:
        """Generate HTML report of findings."""
        # Group by significance
        significant = summary_df[summary_df['significant']]
        not_significant = summary_df[~summary_df['significant']]

        html_content = f"""
        <!DOCTYPE html>
        <html>
        <head>
            <title>Health-Workout Correlation Analysis</title>
            <style>
                body {{ font-family: Arial, sans-serif; margin: 40px; }}
                h1 {{ color: #333; border-bottom: 2px solid #333; padding-bottom: 10px; }}
                h2 {{ color: #555; margin-top: 30px; }}
                h3 {{ color: #777; margin-top: 20px; }}
                table {{ border-collapse: collapse; width: 100%; margin: 20px 0; }}
                th, td {{ border: 1px solid #ddd; padding: 12px; text-align: left; }}
                th {{ background-color: #f2f2f2; font-weight: bold; }}
                tr:nth-child(even) {{ bac

HealthWorkoutCorrelationAnalyzer._print_top_findings method · python · L481-L514 (34 LOC)

src/analysis/explore_health_workout_correlations.py

    def _print_top_findings(self, summary_df: pd.DataFrame) -> None:
        """Print top findings from analysis."""
        print("\n" + "=" * 70)
        print("TOP FINDINGS")
        print("=" * 70)

        # Top positive correlations
        positive = summary_df[summary_df['correlation'] > 0].copy()
        if not positive.empty:
            positive = positive.sort_values('correlation', ascending=False)
            print("\nStrongest Positive Correlations:")
            for _, row in positive.head(5).iterrows():
                sig = "**" if row['significant'] else ""
                print(f"  {row['workout_var']} → {row['health_metric']} (lag {row['strongest_lag']}d): r = {row['correlation']:.3f}{sig}")

        # Top negative correlations
        negative = summary_df[summary_df['correlation'] < 0].copy()
        if not negative.empty:
            negative = negative.sort_values('correlation', ascending=True)
            print("\nStrongest Negative Correlations:")

‹ prevpage 3 / 6next ›