CausalRL Report

Off-Policy Evaluation Report

Generated 2026-01-24T00:07:16.891348+00:00 Version 0.2.0 Fingerprint 87253ebe7de8254bb453ce5442e70c252f3a603c32a906a05045e40a376bae0b

Primary estimate

-0.0272343

Estimator: IS

CI: [-0.15682, 0.102351]

Data health

  • Overlap violations: 0
  • ESS ratio: 0.439807375042398
  • Tail fraction: 0.0

Decision

Proceed

No blocking warnings detected.

Estimates

Sortable table of estimator outputs.

Estimator Value StdErr Lower Upper Warnings
IS -0.027234292692463326 0.06611637767190308 -0.1568200117176415 0.10235142633271485 0
WIS -0.025991998193132713 0.06242378995891721 -0.14834037829110347 0.09635638190483803 0

Diagnostics

Overlap, ESS, weight tails, and shift metrics.

Overlap
{
  "fraction_behavior_below_threshold": 0.0,
  "fraction_target_below_threshold": 0.0,
  "min_behavior_prob": 0.07632959390848924,
  "min_target_prob": 0.026157231486371814,
  "ratio_max": 3.5684002030806057,
  "ratio_min": 0.04705472051726874,
  "ratio_q50": 0.18765879697154786,
  "ratio_q90": 2.6244420066388168,
  "ratio_q99": 3.5684002030806057,
  "support_violations": 0
}
Effective sample size
{
  "ess": 219.90368752119898,
  "ess_ratio": 0.439807375042398
}
Weight tails
{
  "kurtosis": -1.0233732664145163,
  "max": 3.5684002030806057,
  "mean": 1.0477952672241582,
  "min": 0.04705472051726874,
  "q95": 3.3050985744459047,
  "q99": 3.5684002030806057,
  "recommended_clip": 3.5684002030806057,
  "skew": 0.7313077102896869,
  "std": 1.1825346419140357,
  "tail_fraction": 0.0
}
Shift
{
  "cov_shift_fro": 0.0004537222094861404,
  "ess": 219.90368752119898,
  "mean_shift_norm": 0.008085046034013388,
  "mmd_rbf": 0.0017401005426647131
}

Metadata

Dataset, assumptions, configs, and environment.

Dataset summary
{
  "action_space_n": 4,
  "behavior_action_probs_present": true,
  "behavior_prob_max": 0.5821445995016145,
  "behavior_prob_min": 0.07632959390848924,
  "context_dim": 1,
  "num_samples": 500,
  "reward_max": 1.4403512335110324,
  "reward_mean": -0.2375694002833386,
  "reward_min": -1.551239008736725,
  "reward_std": 0.7335348450022354,
  "type": "bandit"
}
Configs
{
  "diagnostics": "default",
  "estimators": [
    "IS",
    "WIS"
  ],
  "inference": {},
  "sensitivity": {}
}
Environment
{
  "package_versions": {
    "causalrl": "0.2.0",
    "numpy": "2.4.1",
    "pandas": "2.3.3",
    "torch": "2.9.1"
  },
  "platform": "macOS-15.6.1-arm64-arm-64bit",
  "python_version": "3.11.9 | packaged by conda-forge | (main, Apr 19 2024, 18:34:54) [Clang 16.0.6 ]"
}

Payload

Complete JSON payload.

Show JSON
{
  "diagnostics": {
    "calibration": {
      "available": false
    },
    "ess": {
      "ess": 219.90368752119898,
      "ess_ratio": 0.439807375042398
    },
    "overlap": {
      "fraction_behavior_below_threshold": 0.0,
      "fraction_target_below_threshold": 0.0,
      "min_behavior_prob": 0.07632959390848924,
      "min_target_prob": 0.026157231486371814,
      "ratio_max": 3.5684002030806057,
      "ratio_min": 0.04705472051726874,
      "ratio_q50": 0.18765879697154786,
      "ratio_q90": 2.6244420066388168,
      "ratio_q99": 3.5684002030806057,
      "support_violations": 0
    },
    "shift": {
      "cov_shift_fro": 0.0004537222094861404,
      "ess": 219.90368752119898,
      "mean_shift_norm": 0.008085046034013388,
      "mmd_rbf": 0.0017401005426647131
    },
    "slices": {
      "actions": [
        {
          "action": 2,
          "behavior_prob_mean": 0.101620784880281,
          "behavior_prob_min": 0.07632959390848924,
          "count": 45,
          "ratio_mean": 3.2916943938539287
        },
        {
          "action": 3,
          "behavior_prob_mean": 0.16324723041179753,
          "behavior_prob_min": 0.08960169356760034,
          "count": 89,
          "ratio_mean": 2.4755966661413296
        },
        {
          "action": 0,
          "behavior_prob_mean": 0.49512616816592925,
          "behavior_prob_min": 0.13054535785315619,
          "count": 182,
          "ratio_mean": 0.11110709257037596
        },
        {
          "action": 1,
          "behavior_prob_mean": 0.45837060390429074,
          "behavior_prob_min": 0.24742608606524297,
          "count": 184,
          "ratio_mean": 0.7349010421427479
        }
      ]
    },
    "weights": {
      "kurtosis": -1.0233732664145163,
      "max": 3.5684002030806057,
      "mean": 1.0477952672241582,
      "min": 0.04705472051726874,
      "q95": 3.3050985744459047,
      "q99": 3.5684002030806057,
      "recommended_clip": 3.5684002030806057,
      "skew": 0.7313077102896869,
      "std": 1.1825346419140357,
      "tail_fraction": 0.0
    }
  },
  "estimates": [
    {
      "assumptions_checked": [
        "sequential_ignorability",
        "overlap",
        "behavior_policy_known"
      ],
      "assumptions_flagged": [],
      "ci": [
        -0.1568200117176415,
        0.10235142633271485
      ],
      "diagnostics": {
        "ess": {
          "ess": 219.90368752119898,
          "ess_ratio": 0.439807375042398
        },
        "max_weight": 3.5684002030806057,
        "model": {},
        "overlap": {
          "fraction_behavior_below_threshold": 0.0,
          "fraction_target_below_threshold": 0.0,
          "min_behavior_prob": 0.07632959390848924,
          "min_target_prob": 0.026157231486371814,
          "ratio_max": 3.5684002030806057,
          "ratio_min": 0.04705472051726874,
          "ratio_q50": 0.18765879697154786,
          "ratio_q90": 2.6244420066388168,
          "ratio_q99": 3.5684002030806057,
          "support_violations": 0
        },
        "weights": {
          "kurtosis": -1.0233732664145163,
          "max": 3.5684002030806057,
          "mean": 1.0477952672241582,
          "min": 0.04705472051726874,
          "q95": 3.3050985744459047,
          "q99": 3.5684002030806057,
          "recommended_clip": 3.5684002030806057,
          "skew": 0.7313077102896869,
          "std": 1.1825346419140357,
          "tail_fraction": 0.0
        }
      },
      "estimator": "IS",
      "lower_bound": -0.1568200117176415,
      "metadata": {
        "clip_rho": null,
        "diagnostics_keys": [
          "overlap",
          "ess",
          "weights",
          "max_weight",
          "model"
        ],
        "estimator": "IS",
        "normalize": false,
        "num_samples": 500,
        "required_fields": [
          "behavior_action_probs"
        ],
        "use_log_weights": true
      },
      "stderr": 0.06611637767190308,
      "upper_bound": 0.10235142633271485,
      "value": -0.027234292692463326,
      "warnings": []
    },
    {
      "assumptions_checked": [
        "sequential_ignorability",
        "overlap",
        "behavior_policy_known"
      ],
      "assumptions_flagged": [],
      "ci": [
        -0.14834037829110347,
        0.09635638190483803
      ],
      "diagnostics": {
        "ess": {
          "ess": 219.90368752119898,
          "ess_ratio": 0.439807375042398
        },
        "max_weight": 3.5684002030806057,
        "model": {},
        "overlap": {
          "fraction_behavior_below_threshold": 0.0,
          "fraction_target_below_threshold": 0.0,
          "min_behavior_prob": 0.07632959390848924,
          "min_target_prob": 0.026157231486371814,
          "ratio_max": 3.5684002030806057,
          "ratio_min": 0.04705472051726874,
          "ratio_q50": 0.18765879697154786,
          "ratio_q90": 2.6244420066388168,
          "ratio_q99": 3.5684002030806057,
          "support_violations": 0
        },
        "weights": {
          "kurtosis": -1.0233732664145163,
          "max": 3.5684002030806057,
          "mean": 1.0477952672241582,
          "min": 0.04705472051726874,
          "q95": 3.3050985744459047,
          "q99": 3.5684002030806057,
          "recommended_clip": 3.5684002030806057,
          "skew": 0.7313077102896869,
          "std": 1.1825346419140357,
          "tail_fraction": 0.0
        }
      },
      "estimator": "WIS",
      "lower_bound": -0.14834037829110347,
      "metadata": {
        "clip_rho": null,
        "diagnostics_keys": [
          "overlap",
          "ess",
          "weights",
          "max_weight",
          "model"
        ],
        "estimator": "WIS",
        "num_samples": 500,
        "required_fields": [
          "behavior_action_probs"
        ],
        "use_log_weights": true
      },
      "stderr": 0.06242378995891721,
      "upper_bound": 0.09635638190483803,
      "value": -0.025991998193132713,
      "warnings": []
    }
  ],
  "figures": [],
  "metadata": {
    "baseline_policy_name": null,
    "configs": {
      "diagnostics": "default",
      "estimators": [
        "IS",
        "WIS"
      ],
      "inference": {},
      "sensitivity": {}
    },
    "dataset_fingerprint": "87253ebe7de8254bb453ce5442e70c252f3a603c32a906a05045e40a376bae0b",
    "dataset_summary": {
      "action_space_n": 4,
      "behavior_action_probs_present": true,
      "behavior_prob_max": 0.5821445995016145,
      "behavior_prob_min": 0.07632959390848924,
      "context_dim": 1,
      "num_samples": 500,
      "reward_max": 1.4403512335110324,
      "reward_mean": -0.2375694002833386,
      "reward_min": -1.551239008736725,
      "reward_std": 0.7335348450022354,
      "type": "bandit"
    },
    "environment": {
      "package_versions": {
        "causalrl": "0.2.0",
        "numpy": "2.4.1",
        "pandas": "2.3.3",
        "torch": "2.9.1"
      },
      "platform": "macOS-15.6.1-arm64-arm-64bit",
      "python_version": "3.11.9 | packaged by conda-forge | (main, Apr 19 2024, 18:34:54) [Clang 16.0.6 ]"
    },
    "estimand": "PolicyValueEstimand(policy=TabularPolicy, discount=1.0, horizon=1, assumptions=[\u0027behavior_policy_known\u0027, \u0027overlap\u0027, \u0027sequential_ignorability\u0027])",
    "generated_at": "2026-01-24T00:07:16.891348+00:00",
    "git_sha": null,
    "package_version": "0.2.0",
    "policy_name": "TabularPolicy",
    "run_name": null,
    "seed": 0
  },
  "mode": "ope",
  "schema_version": "1.0",
  "sensitivity": null,
  "tables": {},
  "warnings": []
}