diff --git a/studies/study_013/README.md b/studies/study_013/README.md
new file mode 100644
index 00000000..8286b31e
--- /dev/null
+++ b/studies/study_013/README.md
@@ -0,0 +1,70 @@
+# Study 013: Opportunity Evaluation under Risky Conditions
+
+**Authors:** Hean Tat Keh, Maw Der Foo, Boon Chong Lim
+
+**Year:** 2002
+
+**Journal:** *Entrepreneurship Theory and Practice*, 27(2), 125-148
+
+## Description
+
+This study examines how cognitive biases affect entrepreneurs' opportunity evaluation under risky conditions. Using a survey of 77 founders of top SMEs in Singapore, the study measures four cognitive biases (overconfidence, illusion of control, belief in the law of small numbers, and planning fallacy) and tests how they influence risk perception and opportunity evaluation of a standardized business case vignette. The benchmark implementation focuses on the paper's calibration test and its reported regression findings.
+
+## Participants
+
+- **N = 77** founders and owners of the top 500 SMEs in Singapore
+- 97% male, mean age 46.6 years
+- 92.4% Chinese, 79% founded their business
+- Business revenue: 48.6% between S$1M-S$25M, 44.4% between S$25M-S$50M
+
+## Key Findings Tested
+
+| Finding | Hypothesis | Human Result |
+|---------|-----------|--------------|
+| F1 | Entrepreneurs are overconfident (mean items outside 90% CI > 1) | Mean = 5.17, SD = 2.64 |
+| F2 | Risk perception negatively predicts opportunity evaluation (H1) | beta = -0.50, t = -5.98, p < .001 |
+| F3 | Illusion of control negatively predicts risk perception in Model 1 (H5) | beta = -0.76, t = -3.34, p < .01 |
+| F4 | Illusion of control positively predicts opportunity evaluation in Model 2 | beta = 0.40, t = 2.23, p < .05 |
+| F5 | Belief in the law of small numbers positively predicts opportunity evaluation in Model 2 | beta = 1.17, t = 1.91, p < .06 |
+
+## Questionnaire Structure
+
+- **Section A:** 5 forced-choice gamble items (risk propensity)
+- **Section B:** 7 Likert items (2 filler, 2 planning fallacy, 3 illusion of control)
+- **Section C:** 10 confidence-interval estimation items (overconfidence)
+- **Section D:** Business case vignette + 4 risk perception items + 3 opportunity evaluation items + 1 optional open-ended item coded for belief in the law of small numbers
+
+## File Structure
+
+```
+study_013/
+├── index.json
+├── README.md
+├── source/
+│   ├── Keh-Foo-Lim-2002-Opportunity-Evaluation.pdf
+│   ├── metadata.json
+│   ├── specification.json
+│   ├── ground_truth.json
+│   └── materials/
+│       ├── section_a_risk_propensity.json
+│       ├── section_b_cognitive_biases.json
+│       ├── section_c_overconfidence.json
+│       └── section_d_case_vignette.json
+└── scripts/
+    ├── config.py
+    ├── evaluator.py
+    ├── study_utils.py
+    └── stats_lib.py
+```
+
+## Overconfidence Answer Key
+
+The 10 confidence-interval items reference Singapore statistics circa 1999-2000. Correct answers have been verified against:
+- Yearbook of Statistics Singapore 2000 (Department of Statistics)
+- Changi Airport Group corporate history
+- LTA Vehicle Quota Tender Results 2000-2004
+- SingStat residential dwelling datasets
+
+## Contributor
+
+Guankai Zhai ([@zgk2003](https://github.com/zgk2003))
diff --git a/studies/study_013/index.json b/studies/study_013/index.json
new file mode 100644
index 00000000..18c9b046
--- /dev/null
+++ b/studies/study_013/index.json
@@ -0,0 +1,16 @@
+{
+  "title": "Opportunity Evaluation under Risky Conditions: The Cognitive Processes of Entrepreneurs",
+  "authors": [
+    "Hean Tat Keh",
+    "Maw Der Foo",
+    "Boon Chong Lim"
+  ],
+  "year": 2002,
+  "description": "This study examines how cognitive biases affect entrepreneurs' opportunity evaluation under risky conditions. Using a survey of 77 founders of top SMEs in Singapore, the study measures overconfidence, illusion of control, belief in the law of small numbers, and planning fallacy, then relates those constructs to risk perception and opportunity evaluation for a standardized business vignette. The benchmark reproduces the paper's calibration test and its reported regression findings: entrepreneurs are overconfident, risk perception negatively predicts opportunity evaluation, illusion of control lowers risk perception and increases opportunity evaluation before mediation, and belief in the law of small numbers increases opportunity evaluation in the pre-mediation model.",
+  "contributors": [
+    {
+      "name": "Guankai Zhai",
+      "github": "https://github.com/zgk2003"
+    }
+  ]
+}
diff --git a/studies/study_013/scripts/config.py b/studies/study_013/scripts/config.py
new file mode 100644
index 00000000..6b3db1cf
--- /dev/null
+++ b/studies/study_013/scripts/config.py
@@ -0,0 +1,273 @@
+import numpy as np
+
+import sys
+sys.path.insert(0, str(__import__("pathlib").Path(__file__).resolve().parent))
+from study_utils import BaseStudyConfig, PromptBuilder, compute_construct_scores, iter_response_records
+
+import random
+
+
+AGE_DISTRIBUTION = [
+    (range(30, 40), 0.222),   # Less than 40
+    (range(40, 61), 0.715),   # 40 to 60
+    (range(61, 70), 0.063),   # More than 60
+]
+
+SEX_OPTIONS = ["male", "female"]
+SEX_WEIGHTS = [0.97, 0.03]
+
+RACE_OPTIONS = ["Chinese", "Indian", "Other"]
+RACE_WEIGHTS = [0.924, 0.045, 0.031]
+
+EDUCATION_OPTIONS = ["secondary", "postsecondary", "primary/other"]
+EDUCATION_WEIGHTS = [0.061, 0.864, 0.075]
+
+BUSINESS_SIZE_OPTIONS = [
+    "Less than S$1m",
+    "Between S$1m and S$25m",
+    "Between S$25m and S$50m",
+    "More than S$50m",
+]
+
+BUSINESS_SIZE_WEIGHTS = [0.028, 0.486, 0.444, 0.042]
+
+
+def weighted_age_sample():
+    """Sample an age from the Table 2 age distribution."""
+    r = random.random()
+    cumulative = 0
+    for age_range, prob in AGE_DISTRIBUTION:
+        cumulative += prob
+        if r < cumulative:
+            return random.choice(list(age_range))
+    return random.randint(40, 60)
+
+
+def weighted_choice(options, weights):
+    """Draw one option according to the reported sample proportions."""
+    return random.choices(options, weights=weights, k=1)[0]
+
+
+class CustomPromptBuilder(PromptBuilder):
+    """Builds the full Keh, Foo & Lim (2002) questionnaire prompt."""
+
+    def build_trial_prompt(self, trial_metadata):
+        profile = trial_metadata.get("profile") or trial_metadata.get("participant_profile", {})
+        items_a = trial_metadata.get("items_a", [])
+        items_b = trial_metadata.get("items_b", [])
+        items_c = trial_metadata.get("items_c", [])
+        items_d = trial_metadata.get("items_d", [])
+        vignette_text = trial_metadata.get("vignette_text", "")
+
+        lines = []
+        optional_question_numbers = []
+
+        # --- Persona Introduction ---
+        age = profile.get("age", 47)
+        sex = profile.get("sex", "male")
+        race = profile.get("race", "Chinese")
+        education = profile.get("education", "postsecondary")
+        business_size = profile.get("business_size", "Between S$1m and S$25m")
+        founder = profile.get("is_founder", True)
+
+        lines.append("You are participating in a research study on entrepreneurial decision-making.")
+        lines.append(
+            "Answer as one of the Singapore SME founders/owners described in the original paper."
+        )
+        lines.append(
+            f"Imagine you are a {age}-year-old {sex} entrepreneur in Singapore, "
+            f"{race}, with {education} education, who {'founded' if founder else 'bought over'} "
+            f"the business you run (annual revenue: {business_size})."
+        )
+        lines.append("Please answer all questions honestly from that participant's perspective.\n")
+
+        q_counter = 1
+
+        # --- Section A: Risk Propensity (5 forced-choice items) ---
+        lines.append("=" * 60)
+        lines.append("SECTION A: RISK PREFERENCES")
+        lines.append("=" * 60)
+        lines.append("Please answer the following five items by choosing the alternative (\"a\" or \"b\") you would feel most comfortable with.\n")
+
+        for item in items_a:
+            options = item.get("options", [])
+            lines.append(f"Q{q_counter}: Which would you prefer?")
+            lines.append(f"  a) {options[0]}")
+            lines.append(f"  b) {options[1]}")
+            lines.append(f"  (Answer Q{q_counter}=a or Q{q_counter}=b)\n")
+            item["q_idx"] = q_counter
+            q_counter += 1
+
+        # --- Section B: Cognitive Biases (7 Likert items) ---
+        lines.append("=" * 60)
+        lines.append("SECTION B: BUSINESS ATTITUDES")
+        lines.append("=" * 60)
+        lines.append("Please indicate how much you agree with each statement.")
+        lines.append("Scale: 1 = Strongly Disagree, 2 = Disagree, 3 = Slightly Disagree, 4 = Neutral, 5 = Slightly Agree, 6 = Agree, 7 = Strongly Agree\n")
+
+        for item in items_b:
+            lines.append(f"Q{q_counter}: {item['question']}")
+            lines.append(f"  (Answer Q{q_counter}=1 to Q{q_counter}=7)\n")
+            item["q_idx"] = q_counter
+            q_counter += 1
+
+        # --- Section C: Overconfidence (10 confidence-interval items) ---
+        lines.append("=" * 60)
+        lines.append("SECTION C: GENERAL KNOWLEDGE")
+        lines.append("=" * 60)
+        lines.append("For each question below, provide a LOWER LIMIT and UPPER LIMIT such that you are 90% confident the correct answer falls within your range.")
+        lines.append("If you have absolutely no idea, provide the widest reasonable range.\n")
+
+        for item in items_c:
+            unit = item.get("unit", "")
+            lines.append(f"Q{q_counter} (Lower Limit) and Q{q_counter + 1} (Upper Limit): {item['question']}")
+            lines.append(f"  Unit: {unit}")
+            lines.append(f"  (Answer Q{q_counter}=<lower> Q{q_counter + 1}=<upper>)\n")
+            item["q_idx_lower"] = q_counter
+            item["q_idx_upper"] = q_counter + 1
+            q_counter += 2
+
+        # --- Section D: Case Vignette + Risk Perception + Opportunity Evaluation ---
+        lines.append("=" * 60)
+        lines.append("SECTION D: BUSINESS CASE EVALUATION")
+        lines.append("=" * 60)
+        lines.append("Please read the following case study carefully, then answer the questions.\n")
+        lines.append(vignette_text)
+        lines.append("")
+        lines.append("Based on the case above, please indicate how much you agree with each statement.")
+        lines.append("Scale: 1 = Strongly Disagree, 2 = Disagree, 3 = Slightly Disagree, 4 = Neutral, 5 = Slightly Agree, 6 = Agree, 7 = Strongly Agree\n")
+
+        for item in items_d:
+            if item["type"] == "likert_7":
+                lines.append(f"Q{q_counter}: {item['question']}")
+                lines.append(f"  (Answer Q{q_counter}=1 to Q{q_counter}=7)\n")
+                item["q_idx"] = q_counter
+                q_counter += 1
+            elif item["type"] == "open_ended":
+                lines.append(f"Q{q_counter}: {item['question']}")
+                lines.append("  Focus on the issues that actually drive your judgment from the case as written.")
+                lines.append("  Mention extra information only if you genuinely need it.")
+                lines.append(f"  (Optional. Answer Q{q_counter}=<brief response>, write Q{q_counter}=No additional information needed, or omit Q{q_counter} to skip.)\n")
+                item["q_idx"] = q_counter
+                optional_question_numbers.append(q_counter)
+                q_counter += 1
+
+        # --- Response format ---
+        lines.append("=" * 60)
+        lines.append("RESPONSE FORMAT (MANDATORY)")
+        lines.append("=" * 60)
+        lines.append("Output ONLY answer lines in the format: Qk=<value>")
+        lines.append("One answer per line. Do not include explanations.")
+        if optional_question_numbers:
+            optional_labels = ", ".join(f"Q{idx}" for idx in optional_question_numbers)
+            required_answers = (q_counter - 1) - len(optional_question_numbers)
+            lines.append(f"All numbered items except {optional_labels} are required.")
+            lines.append(
+                f"For {optional_labels}, respond with the issues influencing your judgment, "
+                "or state that no additional information is needed."
+            )
+            lines.append(f"Expected number of answer lines: {required_answers} to {q_counter - 1}")
+        else:
+            lines.append(f"Expected number of answer lines: {q_counter - 1}")
+
+        return "\n".join(lines)
+
+
+class StudyStudy013Config(BaseStudyConfig):
+    """Study config for Keh, Foo & Lim (2002) — Opportunity Evaluation under Risky Conditions."""
+
+    prompt_builder_class = CustomPromptBuilder
+    PROMPT_VARIANT = "v1"
+
+    def create_trials(self, n_trials=None):
+        spec = self.load_specification()
+        n = n_trials if n_trials is not None else spec["participants"]["n"]
+
+        # Load all materials
+        mat_a = self.load_material("section_a_risk_propensity")
+        mat_b = self.load_material("section_b_cognitive_biases")
+        mat_c = self.load_material("section_c_overconfidence")
+        mat_d = self.load_material("section_d_case_vignette")
+
+        vignette_text = mat_d.get("vignette_text", "")
+
+        trials = []
+        for i in range(n):
+            # Generate entrepreneur profiles only from demographics reported in Table 2.
+            age = weighted_age_sample()
+            sex = weighted_choice(SEX_OPTIONS, SEX_WEIGHTS)
+            race = weighted_choice(RACE_OPTIONS, RACE_WEIGHTS)
+            education = weighted_choice(EDUCATION_OPTIONS, EDUCATION_WEIGHTS)
+            business_size = random.choices(BUSINESS_SIZE_OPTIONS, weights=BUSINESS_SIZE_WEIGHTS, k=1)[0]
+            is_founder = random.random() < 0.79
+
+            profile = {
+                "age": age,
+                "sex": sex,
+                "race": race,
+                "education": education,
+                "business_size": business_size,
+                "is_founder": is_founder,
+            }
+
+            # Deep copy items to avoid mutation across trials
+            import copy
+            trial = {
+                "sub_study_id": "keh_foo_lim_opportunity_evaluation",
+                "scenario_id": "mr_tan_vignette",
+                "scenario": "mr_tan_vignette",
+                "profile": profile,
+                "items_a": copy.deepcopy(mat_a["items"]),
+                "items_b": copy.deepcopy(mat_b["items"]),
+                "items_c": copy.deepcopy(mat_c["items"]),
+                "items_d": copy.deepcopy(mat_d["items"]),
+                "vignette_text": vignette_text,
+                "variant": self.PROMPT_VARIANT,
+            }
+            trials.append(trial)
+
+        return trials
+
+    def aggregate_results(self, raw_results):
+        """Parse Qk=value responses and compute per-participant construct scores."""
+        participants = []
+
+        for record in iter_response_records(raw_results):
+            participant_scores = compute_construct_scores(
+                record.get("response_text", ""),
+                record.get("trial_info", {}),
+            )
+            if participant_scores is not None:
+                participants.append(participant_scores)
+
+        # Compute descriptive statistics
+        if not participants:
+            return {"participants": [], "descriptive_statistics": {}, "n_valid": 0}
+
+        constructs = [
+            "risk_propensity",
+            "planning_fallacy",
+            "illusion_of_control",
+            "overconfidence",
+            "risk_perception",
+            "opportunity_evaluation",
+            "small_numbers",
+            "age",
+        ]
+
+        desc_stats = {}
+        for c in constructs:
+            values = [p[c] for p in participants if p.get(c) is not None]
+            if not values:
+                continue
+            desc_stats[c] = {
+                "mean": float(np.mean(values)),
+                "sd": float(np.std(values, ddof=1)) if len(values) > 1 else 0.0,
+                "n": len(values),
+            }
+
+        return {
+            "participants": participants,
+            "descriptive_statistics": desc_stats,
+            "n_valid": len(participants),
+        }
diff --git a/studies/study_013/scripts/evaluator.py b/studies/study_013/scripts/evaluator.py
new file mode 100644
index 00000000..9d87a542
--- /dev/null
+++ b/studies/study_013/scripts/evaluator.py
@@ -0,0 +1,289 @@
+import json
+import numpy as np
+from scipy import stats
+from pathlib import Path
+from typing import Dict, Any, List, Optional, Sequence
+
+import sys
+sys.path.insert(0, str(__import__("pathlib").Path(__file__).resolve().parent))
+from stats_lib import parse_p_value_from_reported
+from study_utils import compute_construct_scores, iter_response_records
+
+# Module-level cache for ground truth and metadata
+_ground_truth_cache = None
+_metadata_cache = None
+
+
+def _expected_direction_to_int(expected_dir_str: str) -> int:
+    """Convert expected_direction string to int: 1 = positive, -1 = negative, 0 = unknown."""
+    if not expected_dir_str:
+        return 0
+    s = str(expected_dir_str).lower()
+    if s in ("positive", "greater", ">"):
+        return 1
+    if s in ("negative", "less", "<"):
+        return -1
+    return 0
+
+
+def _select_complete_cases(
+    participant_scores: Sequence[Dict[str, Any]],
+    outcome: str,
+    predictors: Sequence[str],
+) -> Optional[Dict[str, Any]]:
+    """Build complete-case outcome and predictor matrices for OLS."""
+    rows: List[Dict[str, Any]] = []
+    for participant in participant_scores:
+        required_values = [participant.get(outcome)]
+        required_values.extend(participant.get(name) for name in predictors)
+        if any(value is None for value in required_values):
+            continue
+        rows.append(participant)
+
+    if not rows:
+        return None
+
+    y = np.array([row[outcome] for row in rows], dtype=float)
+    x = np.array([[row[name] for name in predictors] for row in rows], dtype=float)
+    return {"rows": rows, "y": y, "x": x}
+
+
+def _fit_ols(y: np.ndarray, x: np.ndarray, predictor_names: Sequence[str]) -> Optional[Dict[str, Any]]:
+    """Fit an ordinary least squares model and return coefficients and t-tests."""
+    if y.ndim != 1 or x.ndim != 2:
+        return None
+
+    n_obs, n_predictors = x.shape
+    if n_obs <= n_predictors + 1:
+        return None
+
+    design = np.column_stack([np.ones(n_obs), x])
+    rank = np.linalg.matrix_rank(design)
+    if rank < design.shape[1]:
+        return None
+
+    coefficients, _, _, _ = np.linalg.lstsq(design, y, rcond=None)
+    residuals = y - design @ coefficients
+    dof = n_obs - design.shape[1]
+    if dof <= 0:
+        return None
+
+    mse = float(np.sum(residuals ** 2) / dof)
+    covariance = mse * np.linalg.inv(design.T @ design)
+    standard_errors = np.sqrt(np.diag(covariance))
+    with np.errstate(divide="ignore", invalid="ignore"):
+        t_values = coefficients / standard_errors
+    p_values = 2 * stats.t.sf(np.abs(t_values), dof)
+
+    names = ["intercept", *predictor_names]
+    coefficient_map = {}
+    for index, name in enumerate(names):
+        coefficient_map[name] = {
+            "coefficient": float(coefficients[index]),
+            "standard_error": float(standard_errors[index]),
+            "t_value": float(t_values[index]),
+            "p_value": float(p_values[index]),
+        }
+
+    return {
+        "n_obs": n_obs,
+        "degrees_of_freedom": dof,
+        "coefficients": coefficient_map,
+    }
+
+
+def evaluate_study(results):
+    """
+    Evaluates the agent's performance on Study 013 (Keh, Foo & Lim 2002).
+    Computes construct scores from agent responses and tests hypothesized relationships.
+    Returns test_results with raw stats; no BF/PAS aggregation.
+    """
+    global _ground_truth_cache, _metadata_cache
+
+    # 1. Load Ground Truth and Metadata (with caching)
+    study_dir = Path(__file__).resolve().parent.parent / "source"
+
+    if _ground_truth_cache is None:
+        with open(study_dir / "ground_truth.json", "r") as f:
+            _ground_truth_cache = json.load(f)
+
+    if _metadata_cache is None:
+        metadata_path = study_dir / "metadata.json"
+        if metadata_path.exists():
+            with open(metadata_path, "r") as f:
+                _metadata_cache = json.load(f)
+        else:
+            _metadata_cache = {}
+
+    ground_truth = _ground_truth_cache
+    # 2. Parse all agent responses into participant score vectors
+    participant_scores = []
+
+    for response_record in iter_response_records(results):
+        scores = compute_construct_scores(
+            response_record.get("response_text", ""),
+            response_record.get("trial_info", {}),
+        )
+        if scores is not None:
+            participant_scores.append(scores)
+
+    # 3. Build test results for each finding
+    test_results = []
+
+    if not participant_scores:
+        # Return empty results if no valid participants
+        for study_gt in ground_truth.get("studies", []):
+            for finding in study_gt.get("findings", []):
+                test_results.append({
+                    "study_id": "study_013",
+                    "sub_study_id": "keh_foo_lim_opportunity_evaluation",
+                    "finding_id": finding["finding_id"],
+                    "n_agent": 0,
+                    "error": "No valid participant data",
+                })
+        return {"test_results": test_results}
+
+    overconfidence_scores = np.array([p["overconfidence"] for p in participant_scores], dtype=float)
+    n_agent = len(participant_scores)
+
+    for study_gt in ground_truth.get("studies", []):
+        for finding in study_gt.get("findings", []):
+            finding_id = finding["finding_id"]
+            stat_tests = finding.get("statistical_tests", [])
+            test_gt = stat_tests[0] if stat_tests else {}
+            expected_dir_str = test_gt.get("expected_direction", "")
+            h_expected = _expected_direction_to_int(expected_dir_str)
+            reported_stats = test_gt.get("reported_statistics", "")
+            sig_level = test_gt.get("significance_level") or 0.05
+
+            # Parse human p-value
+            human_p_value = None
+            human_significant = None
+            parsed_p, parsed_sig, _ = parse_p_value_from_reported(reported_stats, sig_level)
+            if parsed_p is not None:
+                human_p_value = parsed_p
+                human_significant = parsed_sig
+
+            t_stat = None
+            r_stat = None
+            p_value = None
+            agent_significant = None
+            direction_match = None
+            mean_agent = None
+            sd_agent = None
+            coefficient_agent = None
+            standard_error_agent = None
+            model_n = n_agent
+            model_predictors = None
+            human_coefficient = test_gt.get("reported_coefficient")
+            human_t_value = test_gt.get("reported_t_value")
+
+            if finding_id == "F1":
+                # Overconfidence: one-sample t-test against baseline of 1
+                baseline = 1.0
+                mean_agent = float(np.mean(overconfidence_scores))
+                sd_agent = float(np.std(overconfidence_scores, ddof=1)) if n_agent > 1 else 0.0
+
+                # For F1, human significance must be computed from reported stats
+                # since the reported_statistics string has no p-value
+                human_mean = 5.17
+                human_sd = 2.64
+                human_n = 77
+                human_t = (human_mean - baseline) / (human_sd / np.sqrt(human_n))
+                human_p_value = float(stats.t.sf(human_t, human_n - 1))
+                human_significant = human_p_value < sig_level
+
+                if sd_agent > 0 and n_agent >= 2:
+                    t_stat_val, p_value_val = stats.ttest_1samp(overconfidence_scores, baseline)
+                    t_stat = float(t_stat_val)
+                    # One-sided test: mean > baseline
+                    p_value = float(p_value_val / 2) if t_stat > 0 else float(1 - p_value_val / 2)
+                    agent_significant = p_value < sig_level
+                    direction_match = (mean_agent > baseline)
+
+            elif finding_id == "F2":
+                model_predictors = ["risk_perception"]
+                model_data = _select_complete_cases(
+                    participant_scores,
+                    outcome="opportunity_evaluation",
+                    predictors=model_predictors,
+                )
+                if model_data is not None:
+                    model = _fit_ols(model_data["y"], model_data["x"], model_predictors)
+                    if model is not None:
+                        model_n = model["n_obs"]
+                        coefficient = model["coefficients"]["risk_perception"]
+                        coefficient_agent = coefficient["coefficient"]
+                        standard_error_agent = coefficient["standard_error"]
+                        t_stat = coefficient["t_value"]
+                        p_value = coefficient["p_value"]
+                        agent_significant = p_value < sig_level
+                        direction_match = (coefficient_agent < 0) if h_expected == -1 else (coefficient_agent > 0)
+
+            elif finding_id in ("F3", "F4", "F5"):
+                model_predictors = [
+                    "overconfidence",
+                    "small_numbers",
+                    "planning_fallacy",
+                    "illusion_of_control",
+                    "risk_propensity",
+                    "age",
+                ]
+                target_variable = {
+                    "F3": ("risk_perception", "illusion_of_control"),
+                    "F4": ("opportunity_evaluation", "illusion_of_control"),
+                    "F5": ("opportunity_evaluation", "small_numbers"),
+                }
+                outcome_name, predictor_of_interest = target_variable[finding_id]
+                model_data = _select_complete_cases(
+                    participant_scores,
+                    outcome=outcome_name,
+                    predictors=model_predictors,
+                )
+                if model_data is not None:
+                    model = _fit_ols(model_data["y"], model_data["x"], model_predictors)
+                    if model is not None:
+                        model_n = model["n_obs"]
+                        coefficient = model["coefficients"][predictor_of_interest]
+                        coefficient_agent = coefficient["coefficient"]
+                        standard_error_agent = coefficient["standard_error"]
+                        t_stat = coefficient["t_value"]
+                        p_value = coefficient["p_value"]
+                        agent_significant = p_value < sig_level
+                        if h_expected == -1:
+                            direction_match = coefficient_agent < 0
+                        elif h_expected == 1:
+                            direction_match = coefficient_agent > 0
+                        else:
+                            direction_match = True
+
+            # Compute replication metric
+            replication = None
+            if human_significant is not None and agent_significant is not None and direction_match is not None:
+                replication = human_significant and agent_significant and direction_match
+
+            test_result = {
+                "study_id": "study_013",
+                "sub_study_id": "keh_foo_lim_opportunity_evaluation",
+                "finding_id": finding_id,
+                "n_agent": n_agent,
+                "model_n": model_n,
+                "model_predictors": model_predictors,
+                "mean_agent": mean_agent,
+                "sd_agent": sd_agent,
+                "coefficient_agent": coefficient_agent,
+                "standard_error_agent": standard_error_agent,
+                "human_coefficient": human_coefficient,
+                "human_t_value": human_t_value,
+                "t_stat": t_stat,
+                "r_stat": r_stat,
+                "p_value": float(p_value) if p_value is not None else None,
+                "significant": agent_significant,
+                "direction_match": direction_match,
+                "human_p_value": human_p_value,
+                "human_significant": human_significant,
+                "replication": replication,
+            }
+            test_results.append(test_result)
+
+    return {"test_results": test_results}
diff --git a/studies/study_013/scripts/stats_lib.py b/studies/study_013/scripts/stats_lib.py
new file mode 100644
index 00000000..b4fbb0c4
--- /dev/null
+++ b/studies/study_013/scripts/stats_lib.py
@@ -0,0 +1,86 @@
+"""
+Minimal stats utilities for evaluator.
+Parse p-value from reported_statistics strings; no BF/PAS.
+"""
+
+import re
+from typing import Tuple, Optional
+
+
+def parse_p_value_from_reported(
+    reported_statistics: str, significance_level: float = 0.05
+) -> Tuple[Optional[float], bool, str]:
+    """
+    Parse p-value from reported_statistics string.
+
+    Handles formats like:
+    - "p < .001" or "p < 0.001"
+    - "p < .05" or "p < 0.05"
+    - "p = 0.023"
+    - "F(1, 312) = 49.1, p < .001"
+
+    Args:
+        reported_statistics: String containing reported statistics
+        significance_level: Default significance level (usually 0.05)
+
+    Returns:
+        tuple: (p_value, is_significant, confidence)
+            - p_value: Parsed p-value (float) or None if not found
+            - is_significant: bool indicating if p < significance_level
+            - confidence: "high" if exact value, "medium" if inequality, "low" if inferred
+    """
+    if significance_level is None:
+        significance_level = 0.05
+
+    if not reported_statistics:
+        return None, False, "low"
+
+    text = reported_statistics.lower()
+
+    # Try to extract exact p-value: "p = 0.023" or "p=0.023"
+    exact_match = re.search(r"p\s*[=:]\s*([0-9.]+)", text)
+    if exact_match:
+        p_val = float(exact_match.group(1))
+        is_sig = p_val <= significance_level if significance_level is not None else False
+        return p_val, is_sig, "high"
+
+    # Try to extract inequality: "p < .001" or "p < 0.05"
+    inequality_match = re.search(r"p\s*[<>]\s*([0-9.]+)", text)
+    if inequality_match:
+        threshold = float(inequality_match.group(1))
+        is_less_than = "<" in text[
+            inequality_match.start() : inequality_match.end()
+        ]
+
+        if is_less_than:
+            p_val = threshold / 2.0
+            is_sig = True
+            return p_val, is_sig, "medium"
+        else:
+            p_val = threshold
+            is_sig = threshold < significance_level if significance_level is not None else False
+            return p_val, is_sig, "medium"
+
+    # If no p-value found, infer from test statistic
+    t_match = re.search(r"t\s*\([^)]+\)\s*=\s*([0-9.]+)", text)
+    if t_match:
+        t_val = abs(float(t_match.group(1)))
+        if t_val > 2.0:
+            return 0.025, True, "low"
+        return 0.10, False, "low"
+
+    f_match = re.search(r"f\s*\([^)]+\)\s*=\s*([0-9.]+)", text)
+    if f_match:
+        f_val = float(f_match.group(1))
+        if f_val > 4.0:
+            return 0.025, True, "low"
+        return 0.10, False, "low"
+
+    r_match = re.search(r"r\s*=\s*([-0-9.]+)", text)
+    if r_match:
+        r_val = abs(float(r_match.group(1)))
+        if r_val > 0.3:
+            return 0.025, True, "low"
+        return 0.10, False, "low"
+
+    return None, False, "low"
diff --git a/studies/study_013/scripts/study_utils.py b/studies/study_013/scripts/study_utils.py
new file mode 100644
index 00000000..06610ff8
--- /dev/null
+++ b/studies/study_013/scripts/study_utils.py
@@ -0,0 +1,381 @@
+"""
+Standalone study utilities: BaseStudyConfig and PromptBuilder.
+No dependency on src/; for use within each study's scripts/.
+"""
+import json
+import re
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Dict, Any, Iterable, List, Optional
+
+
+RESPONSE_LINE_PATTERN = re.compile(r"Q(\d+)\s*[:=]\s*(.+)")
+EXPLICIT_SKIP_RESPONSES = {
+    "",
+    "skip",
+    "n/a",
+    "na",
+}
+NO_ADDITIONAL_INFO_RESPONSES = {
+    "none",
+    "none needed",
+    "nothing else",
+    "nothing additional",
+    "no additional info",
+    "no additional information",
+    "no additional information needed",
+    "no more information",
+    "no more information needed",
+}
+NO_ADDITIONAL_INFO_PATTERNS = [
+    re.compile(pattern)
+    for pattern in (
+        r"\bno (?:additional|further|more) info(?:rmation)? (?:is )?needed\b",
+        r"\bno (?:additional|further|more) (?:data|research) (?:is )?needed\b",
+        r"\b(?:the|this) information given is enough\b",
+        r"\bthe case provides enough information\b",
+        r"\benough information (?:is )?(?:provided|given)\b",
+    )
+]
+STATISTICAL_REASONING_PATTERNS = [
+    re.compile(pattern)
+    for pattern in (
+        r"\bmarket research\b",
+        r"\bmarket study\b",
+        r"\bsurvey\b",
+        r"\blarger sample\b",
+        r"\bsample size\b",
+        r"\brepresentative sample\b",
+        r"\bstatistical data\b",
+        r"\bindustry statistics?\b",
+        r"\bmarket (?:size|demand|growth) data\b",
+        r"\bcustomer (?:data|survey|research)\b",
+        r"\bdemand (?:data|research)\b",
+        r"\btrend data\b",
+        r"\bmore (?:data|research|surveys?|samples?)\b",
+    )
+]
+
+
+def parse_question_responses(response_text: str) -> Dict[int, str]:
+    """Parse Qk=value or Qk: value lines into a question-number map."""
+    responses: Dict[int, str] = {}
+    for line in str(response_text or "").splitlines():
+        match = RESPONSE_LINE_PATTERN.match(line.strip())
+        if match:
+            responses[int(match.group(1))] = match.group(2).strip()
+    return responses
+
+
+def extract_numeric_value(text: Any, default: Optional[float] = None) -> Optional[float]:
+    """Extract the first numeric value from text, tolerating commas and units."""
+    if text is None:
+        return default
+    clean_text = str(text).replace(",", "")
+    match = re.search(r"(-?\d+(?:\.\d+)?)", clean_text)
+    return float(match.group(1)) if match else default
+
+
+def code_belief_in_small_numbers(text: Any) -> Optional[int]:
+    """
+    Code the open-ended vignette response.
+
+    -1: respondent asks for broader evidence such as market research / survey / data
+    +1: respondent relies on the vignette and anecdotal cues without that request,
+        including statements that no additional information is needed
+    None: blank / skipped response
+    """
+    if text is None:
+        return None
+
+    normalized = re.sub(r"\s+", " ", str(text).strip().lower())
+    canonical = normalized.strip(" .,!?:;")
+    if canonical in EXPLICIT_SKIP_RESPONSES:
+        return None
+    if canonical in NO_ADDITIONAL_INFO_RESPONSES:
+        return 1
+
+    for pattern in STATISTICAL_REASONING_PATTERNS:
+        if pattern.search(normalized):
+            return -1
+
+    for pattern in NO_ADDITIONAL_INFO_PATTERNS:
+        if pattern.search(normalized):
+            return 1
+
+    return 1
+
+
+def iter_response_records(results: Dict[str, Any]) -> Iterable[Dict[str, Any]]:
+    """
+    Yield flat response records regardless of whether results are already flattened
+    or grouped under participant summaries.
+    """
+    individual_data = results.get("individual_data") or []
+    if individual_data:
+        first = individual_data[0]
+        if isinstance(first, dict) and "response_text" in first:
+            yield from individual_data
+            return
+        if isinstance(first, dict) and "responses" in first:
+            for participant in individual_data:
+                for response in participant.get("responses", []):
+                    yield response
+            return
+
+    for participant in results.get("participant_summaries", []) or []:
+        for response in participant.get("responses", []):
+            yield response
+
+
+def compute_construct_scores(response_text: str, trial_info: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+    """Compute participant-level construct scores from one completed questionnaire."""
+    responses = parse_question_responses(response_text)
+    items_a = trial_info.get("items_a", [])
+    items_b = trial_info.get("items_b", [])
+    items_c = trial_info.get("items_c", [])
+    items_d = trial_info.get("items_d", [])
+
+    risk_propensity = 0
+    risk_answered = 0
+    for item in items_a:
+        q_idx = item.get("q_idx")
+        if q_idx and q_idx in responses:
+            choice_text = responses[q_idx].strip().lower()
+            choice_match = re.search(r"\b([ab])\b", choice_text)
+            if choice_match:
+                choice = choice_match.group(1)
+                risk_answered += 1
+                if choice == item.get("metadata", {}).get("risky_option", "a"):
+                    risk_propensity += 1
+
+    planning_fallacy = 0
+    planning_count = 0
+    illusion_of_control = 0
+    ioc_count = 0
+    for item in items_b:
+        q_idx = item.get("q_idx")
+        if not q_idx or q_idx not in responses:
+            continue
+        value = extract_numeric_value(responses[q_idx], default=None)
+        if value is None or not 1 <= value <= 7:
+            continue
+        construct = item.get("metadata", {}).get("construct")
+        if construct == "planning_fallacy":
+            planning_fallacy += value
+            planning_count += 1
+        elif construct == "illusion_of_control":
+            illusion_of_control += value
+            ioc_count += 1
+
+    overconfidence = 0
+    oc_count = 0
+    for item in items_c:
+        q_lower = item.get("q_idx_lower")
+        q_upper = item.get("q_idx_upper")
+        correct_answer = item.get("correct_answer")
+        if not q_lower or not q_upper or correct_answer is None:
+            continue
+        if q_lower not in responses or q_upper not in responses:
+            continue
+        lower = extract_numeric_value(responses[q_lower], default=None)
+        upper = extract_numeric_value(responses[q_upper], default=None)
+        if lower is None or upper is None:
+            continue
+        if lower > upper:
+            lower, upper = upper, lower
+        oc_count += 1
+        if correct_answer < lower or correct_answer > upper:
+            overconfidence += 1
+
+    risk_perception = 0
+    rp_count = 0
+    opportunity_evaluation = 0
+    oe_count = 0
+    small_numbers = None
+    for item in items_d:
+        q_idx = item.get("q_idx")
+        if not q_idx or q_idx not in responses:
+            continue
+
+        construct = item.get("metadata", {}).get("construct")
+        if construct == "belief_in_small_numbers":
+            small_numbers = code_belief_in_small_numbers(responses[q_idx])
+            continue
+
+        value = extract_numeric_value(responses[q_idx], default=None)
+        if value is None or not 1 <= value <= 7:
+            continue
+        if construct == "risk_perception":
+            risk_perception += value
+            rp_count += 1
+        elif construct == "opportunity_evaluation":
+            opportunity_evaluation += value
+            oe_count += 1
+
+    profile = trial_info.get("profile", {}) or {}
+    age = extract_numeric_value(profile.get("age"), default=None)
+
+    if (
+        risk_answered < 5
+        or planning_count < 2
+        or ioc_count < 3
+        or oc_count < 10
+        or rp_count < 4
+        or oe_count < 3
+        or age is None
+    ):
+        return None
+
+    return {
+        "risk_propensity": risk_propensity,
+        "planning_fallacy": planning_fallacy,
+        "illusion_of_control": illusion_of_control,
+        "overconfidence": overconfidence,
+        "risk_perception": risk_perception,
+        "opportunity_evaluation": opportunity_evaluation,
+        "small_numbers": small_numbers,
+        "age": age,
+        "profile": profile,
+    }
+
+
+class PromptBuilder:
+    """Build prompts from study specification and materials. study_path = source directory."""
+
+    def __init__(self, study_path: Path):
+        self.study_path = Path(study_path)
+        self.materials_path = self.study_path / "materials"
+        with open(self.study_path / "specification.json", "r", encoding="utf-8", errors="replace") as f:
+            self.specification = json.load(f)
+        instructions_file = self.materials_path / "instructions.txt"
+        self.instructions = instructions_file.read_text(encoding="utf-8", errors="replace") if instructions_file.exists() else None
+        system_prompt_file = self.materials_path / "system_prompt.txt"
+        self.system_prompt_template = system_prompt_file.read_text(encoding="utf-8", errors="replace") if system_prompt_file.exists() else None
+
+    def build_system_prompt(self, participant_profile: Dict[str, Any] = None) -> Optional[str]:
+        return self.system_prompt_template
+
+    def get_system_prompt_template(self) -> Optional[str]:
+        return self.system_prompt_template
+
+    def build_trial_prompt(self, trial_data: Dict[str, Any]) -> str:
+        return self._build_generic_trial_prompt(trial_data)
+
+    def get_instructions(self) -> str:
+        return self.instructions if self.instructions else "No instructions provided."
+
+    def _fill_template(self, template: str, data: Dict[str, Any]) -> str:
+        result = template
+        nested_pattern = r"\{\{([\w.]+)\}\}"
+        def replace_nested(match):
+            path = match.group(1)
+            value = data
+            for part in path.split("."):
+                if isinstance(value, dict) and part in value:
+                    value = value[part]
+                else:
+                    return match.group(0)
+            return str(value)
+        result = re.sub(nested_pattern, replace_nested, result)
+        if_pattern = r"\{\{#if\s+(\w+)\}\}(.*?)\{\{/if\}\}"
+        def replace_if(match):
+            if match.group(1) in data and data[match.group(1)]:
+                return match.group(2)
+            return ""
+        result = re.sub(if_pattern, replace_if, result, flags=re.DOTALL)
+        each_pattern = r"\{\{#each\s+(\w+)\}\}(.*?)\{\{/each\}\}"
+        def replace_each(match):
+            var_name, content = match.group(1), match.group(2)
+            if var_name not in data:
+                return ""
+            items = data[var_name]
+            if isinstance(items, dict):
+                parts = [content.replace("{{@key}}", str(k)).replace("{{this}}", str(v)) for k, v in items.items()]
+                return "\n".join(parts)
+            if isinstance(items, list):
+                parts = [content.replace("{{@index}}", str(i + 1)).replace("{{this}}", str(item)) for i, item in enumerate(items)]
+                return "\n".join(parts)
+            return ""
+        result = re.sub(each_pattern, replace_each, result, flags=re.DOTALL)
+        result = re.sub(r"\{\{[^}]+\}\}", "", result)
+        return result
+
+    def _build_generic_trial_prompt(self, trial_data: Dict[str, Any]) -> str:
+        return f"Trial {trial_data.get('trial_number', '?')}: Please respond to the following stimulus."
+
+
+class BaseStudyConfig(ABC):
+    """Study config base. study_path = study root (e.g. studies/study_001); data under source/."""
+
+    prompt_builder_class = PromptBuilder
+
+    def __init__(self, study_path: Path, specification: Dict[str, Any]):
+        self.study_path = Path(study_path)
+        self.source_path = self.study_path / "source"
+        self.specification = specification
+        self.study_id = specification["study_id"]
+        self.prompt_builder = self.prompt_builder_class(self.source_path)
+
+    def load_material(self, sub_study_id: str) -> Dict[str, Any]:
+        file_path = self.source_path / "materials" / f"{sub_study_id}.json"
+        if not file_path.exists():
+            raise FileNotFoundError(f"Material not found: {file_path}")
+        with open(file_path, "r", encoding="utf-8") as f:
+            return json.load(f)
+
+    def load_metadata(self) -> Dict[str, Any]:
+        with open(self.source_path / "metadata.json", "r", encoding="utf-8") as f:
+            return json.load(f)
+
+    def load_specification(self) -> Dict[str, Any]:
+        with open(self.source_path / "specification.json", "r", encoding="utf-8") as f:
+            return json.load(f)
+
+    def load_ground_truth(self) -> Dict[str, Any]:
+        with open(self.source_path / "ground_truth.json", "r", encoding="utf-8") as f:
+            return json.load(f)
+
+    def extract_numeric(self, text: str, default: float = 0.0) -> float:
+        if text is None:
+            return default
+        match = re.search(r"(-?\d+\.?\d*)", str(text))
+        return float(match.group(1)) if match else default
+
+    def extract_choice(self, text: str, options: List[str] = None) -> Optional[int]:
+        if text is None:
+            return None
+        text_s = str(text).strip()
+        if options:
+            for i, opt in enumerate(options):
+                if opt.lower() in text_s.lower():
+                    return i
+        match = re.search(r"\b([A-Z])\b", text_s.upper())
+        if match:
+            return ord(match.group(1)) - ord("A")
+        return None
+
+    @abstractmethod
+    def create_trials(self, n_trials: Optional[int] = None) -> List[Dict[str, Any]]:
+        raise NotImplementedError
+
+    def get_prompt_builder(self) -> PromptBuilder:
+        return self.prompt_builder
+
+    def get_instructions(self) -> str:
+        return self.prompt_builder.get_instructions()
+
+    def aggregate_results(self, raw_results: Dict[str, Any]) -> Dict[str, Any]:
+        return raw_results
+
+    def custom_scoring(self, results: Dict[str, Any], ground_truth: Dict[str, Any]) -> Optional[Dict[str, float]]:
+        return None
+
+    def get_n_participants(self) -> int:
+        return self.specification["participants"]["n"]
+
+    def get_study_type(self) -> str:
+        return self.specification.get("study_type", self.study_id)
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(study_id='{self.study_id}')"
diff --git a/studies/study_013/source/Keh-Foo-Lim-2002-Opportunity-Evaluation.pdf b/studies/study_013/source/Keh-Foo-Lim-2002-Opportunity-Evaluation.pdf
new file mode 100644
index 00000000..2ed46c23
Binary files /dev/null and b/studies/study_013/source/Keh-Foo-Lim-2002-Opportunity-Evaluation.pdf differ
diff --git a/studies/study_013/source/ground_truth.json b/studies/study_013/source/ground_truth.json
new file mode 100644
index 00000000..434717c5
--- /dev/null
+++ b/studies/study_013/source/ground_truth.json
@@ -0,0 +1,175 @@
+{
+  "study_id": "study_013",
+  "title": "Opportunity Evaluation under Risky Conditions: The Cognitive Processes of Entrepreneurs",
+  "authors": ["Hean Tat Keh", "Maw Der Foo", "Boon Chong Lim"],
+  "year": 2002,
+  "studies": [
+    {
+      "study_id": "keh_foo_lim_opportunity_evaluation",
+      "study_name": "Cognitive Biases and Opportunity Evaluation",
+      "findings": [
+        {
+          "finding_id": "F1",
+          "main_hypothesis": "Entrepreneurs are overconfident: the mean number of confidence-interval items where the correct answer falls outside the stated 90% range is significantly greater than 1 (the calibrated baseline).",
+          "statistical_tests": [
+            {
+              "test_name": "One-sample t-test (overconfidence score vs. calibrated baseline of 1)",
+              "statistical_hypothesis": "Mean overconfidence score (number of items outside 90% CI out of 10) > 1",
+              "reported_statistics": "Mean = 5.17, SD = 2.64, N = 77",
+              "significance_level": 0.05,
+              "expected_direction": "positive"
+            }
+          ],
+          "original_data_points": {
+            "description": "Overconfidence scores from Table 3.",
+            "data": {
+              "overconfidence": {
+                "mean": 5.17,
+                "sd": 2.64,
+                "n": 77,
+                "baseline": 1,
+                "note": "Number of items (out of 10) where the correct answer fell outside the respondent's stated 90% confidence interval."
+              }
+            }
+          }
+        },
+        {
+          "finding_id": "F2",
+          "main_hypothesis": "Risk perception negatively predicts opportunity evaluation (H1).",
+          "statistical_tests": [
+            {
+              "test_name": "OLS regression (opportunity_evaluation ~ risk_perception)",
+              "statistical_hypothesis": "The regression coefficient on risk_perception is negative.",
+              "reported_statistics": "beta = -0.50, t = -5.98, p < .001 (Table 4, H1)",
+              "reported_coefficient": -0.5,
+              "reported_t_value": -5.98,
+              "significance_level": 0.05,
+              "expected_direction": "negative"
+            }
+          ],
+          "original_data_points": {
+            "description": "Bivariate regression and descriptive statistics from Tables 3 and 4.",
+            "data": {
+              "risk_perception": {
+                "mean": 19.04,
+                "sd": 4.92,
+                "n": 77
+              },
+              "opportunity_evaluation": {
+                "mean": 12.97,
+                "sd": 4.1,
+                "n": 77
+              },
+              "correlation": -0.58,
+              "regression_beta": -0.5,
+              "regression_t": -5.98,
+              "regression_R2": 0.36
+            }
+          }
+        },
+        {
+          "finding_id": "F3",
+          "main_hypothesis": "Illusion of control negatively predicts risk perception when the other cognitive-bias measures, age, and risk propensity are controlled (H5 / Model 1).",
+          "statistical_tests": [
+            {
+              "test_name": "OLS regression (risk_perception ~ overconfidence + small_numbers + planning_fallacy + illusion_of_control + risk_propensity + age)",
+              "statistical_hypothesis": "The regression coefficient on illusion_of_control is negative.",
+              "reported_statistics": "beta = -0.76, t = -3.34, p < .01 (Table 4, Model 1)",
+              "reported_coefficient": -0.76,
+              "reported_t_value": -3.34,
+              "significance_level": 0.05,
+              "expected_direction": "negative"
+            }
+          ],
+          "original_data_points": {
+            "description": "Model 1 coefficient plus Table 3 descriptives.",
+            "data": {
+              "illusion_of_control": {
+                "mean": 12.94,
+                "sd": 3.29,
+                "n": 77
+              },
+              "risk_perception": {
+                "mean": 19.04,
+                "sd": 4.92,
+                "n": 77
+              },
+              "correlation": -0.44,
+              "regression_beta": -0.76,
+              "regression_t": -3.34,
+              "model_controls": ["overconfidence", "small_numbers", "planning_fallacy", "risk_propensity", "age"]
+            }
+          }
+        },
+        {
+          "finding_id": "F4",
+          "main_hypothesis": "Illusion of control positively predicts opportunity evaluation before the mediator is added, controlling for the other cognitive-bias measures, age, and risk propensity (Model 2).",
+          "statistical_tests": [
+            {
+              "test_name": "OLS regression (opportunity_evaluation ~ overconfidence + small_numbers + planning_fallacy + illusion_of_control + risk_propensity + age)",
+              "statistical_hypothesis": "The regression coefficient on illusion_of_control is positive.",
+              "reported_statistics": "beta = 0.40, t = 2.23, p < .05 (Table 4, Model 2)",
+              "reported_coefficient": 0.4,
+              "reported_t_value": 2.23,
+              "significance_level": 0.05,
+              "expected_direction": "positive"
+            }
+          ],
+          "original_data_points": {
+            "description": "Model 2 coefficient plus Table 3 descriptives.",
+            "data": {
+              "illusion_of_control": {
+                "mean": 12.94,
+                "sd": 3.29,
+                "n": 77
+              },
+              "opportunity_evaluation": {
+                "mean": 12.97,
+                "sd": 4.1,
+                "n": 77
+              },
+              "correlation": 0.34,
+              "regression_beta": 0.4,
+              "regression_t": 2.23,
+              "model_controls": ["overconfidence", "small_numbers", "planning_fallacy", "risk_propensity", "age"]
+            }
+          }
+        },
+        {
+          "finding_id": "F5",
+          "main_hypothesis": "Belief in the law of small numbers positively predicts opportunity evaluation before the mediator is added, controlling for the other cognitive-bias measures, age, and risk propensity (Model 2).",
+          "statistical_tests": [
+            {
+              "test_name": "OLS regression (opportunity_evaluation ~ overconfidence + small_numbers + planning_fallacy + illusion_of_control + risk_propensity + age)",
+              "statistical_hypothesis": "The regression coefficient on small_numbers is positive.",
+              "reported_statistics": "beta = 1.17, t = 1.91, p < .06 (Table 4, Model 2)",
+              "reported_coefficient": 1.17,
+              "reported_t_value": 1.91,
+              "significance_level": 0.06,
+              "expected_direction": "positive"
+            }
+          ],
+          "original_data_points": {
+            "description": "Model 2 coefficient plus Table 3 descriptives.",
+            "data": {
+              "small_numbers": {
+                "mean": -0.08,
+                "sd": 0.99,
+                "n": 77
+              },
+              "opportunity_evaluation": {
+                "mean": 12.97,
+                "sd": 4.1,
+                "n": 77
+              },
+              "correlation": 0.32,
+              "regression_beta": 1.17,
+              "regression_t": 1.91,
+              "model_controls": ["overconfidence", "planning_fallacy", "illusion_of_control", "risk_propensity", "age"]
+            }
+          }
+        }
+      ]
+    }
+  ]
+}
diff --git a/studies/study_013/source/materials/section_a_risk_propensity.json b/studies/study_013/source/materials/section_a_risk_propensity.json
new file mode 100644
index 00000000..484df9eb
--- /dev/null
+++ b/studies/study_013/source/materials/section_a_risk_propensity.json
@@ -0,0 +1,86 @@
+{
+  "sub_study_id": "section_a_risk_propensity",
+  "instructions": "Please answer the following five items by circling the alternative (\"a\" or \"b\") you would feel most comfortable with.",
+  "items": [
+    {
+      "id": "A1",
+      "question": "Which would you prefer?",
+      "options": [
+        "An 80% chance of getting $40,000",
+        "Receiving $32,000 for sure"
+      ],
+      "type": "forced_choice",
+      "metadata": {
+        "construct": "risk_propensity",
+        "risky_option": "a",
+        "expected_value_a": 32000,
+        "expected_value_b": 32000,
+        "note": "Equal expected values; choosing (a) indicates risk-seeking"
+      }
+    },
+    {
+      "id": "A2",
+      "question": "Which would you prefer?",
+      "options": [
+        "Receiving $30,000 for sure",
+        "A 20% chance of getting $150,000"
+      ],
+      "type": "forced_choice",
+      "metadata": {
+        "construct": "risk_propensity",
+        "risky_option": "b",
+        "expected_value_a": 30000,
+        "expected_value_b": 30000,
+        "note": "Equal expected values; choosing (b) indicates risk-seeking"
+      }
+    },
+    {
+      "id": "A3",
+      "question": "Which would you prefer?",
+      "options": [
+        "A 90% chance of winning $200,000",
+        "Receiving $180,000 for sure"
+      ],
+      "type": "forced_choice",
+      "metadata": {
+        "construct": "risk_propensity",
+        "risky_option": "a",
+        "expected_value_a": 180000,
+        "expected_value_b": 180000,
+        "note": "Equal expected values; choosing (a) indicates risk-seeking"
+      }
+    },
+    {
+      "id": "A4",
+      "question": "Which would you prefer?",
+      "options": [
+        "Receiving $16,000 for sure",
+        "10% chance of getting $160,000"
+      ],
+      "type": "forced_choice",
+      "metadata": {
+        "construct": "risk_propensity",
+        "risky_option": "b",
+        "expected_value_a": 16000,
+        "expected_value_b": 16000,
+        "note": "Equal expected values; choosing (b) indicates risk-seeking"
+      }
+    },
+    {
+      "id": "A5",
+      "question": "Which would you prefer?",
+      "options": [
+        "A 50% chance of getting $50,000",
+        "Receiving $25,000 for sure"
+      ],
+      "type": "forced_choice",
+      "metadata": {
+        "construct": "risk_propensity",
+        "risky_option": "a",
+        "expected_value_a": 25000,
+        "expected_value_b": 25000,
+        "note": "Equal expected values; choosing (a) indicates risk-seeking"
+      }
+    }
+  ]
+}
diff --git a/studies/study_013/source/materials/section_b_cognitive_biases.json b/studies/study_013/source/materials/section_b_cognitive_biases.json
new file mode 100644
index 00000000..b833e3e5
--- /dev/null
+++ b/studies/study_013/source/materials/section_b_cognitive_biases.json
@@ -0,0 +1,91 @@
+{
+  "sub_study_id": "section_b_cognitive_biases",
+  "instructions": "Please answer the following items by deciding how much you agree with the statements. (Circle the numbers that best reflect your opinions)",
+  "scale": {
+    "min": 1,
+    "max": 7,
+    "labels": {
+      "1": "Strongly Disagree",
+      "7": "Strongly Agree"
+    }
+  },
+  "items": [
+    {
+      "id": "B1",
+      "question": "I want to earn more than my current income level in the long run.",
+      "type": "likert_7",
+      "metadata": {
+        "construct": "filler_economic_aspiration",
+        "reverse_scored": false,
+        "used_in_analysis": false,
+        "note": "Filler item; not used in any analysis"
+      }
+    },
+    {
+      "id": "B2",
+      "question": "I am looking for businesses or employment with higher income.",
+      "type": "likert_7",
+      "metadata": {
+        "construct": "filler_economic_aspiration",
+        "reverse_scored": false,
+        "used_in_analysis": false,
+        "note": "Filler item; not used in any analysis"
+      }
+    },
+    {
+      "id": "B3",
+      "question": "I believe that past entrepreneurial experience helps in assessing riskiness of a new business.",
+      "type": "likert_7",
+      "metadata": {
+        "construct": "planning_fallacy",
+        "reverse_scored": false,
+        "used_in_analysis": true,
+        "note": "Higher scores indicate greater planning fallacy (belief that past experience predicts future outcomes)"
+      }
+    },
+    {
+      "id": "B4",
+      "question": "I believe that the key issues of running different types of businesses are similar.",
+      "type": "likert_7",
+      "metadata": {
+        "construct": "planning_fallacy",
+        "reverse_scored": false,
+        "used_in_analysis": true,
+        "note": "Higher scores indicate greater planning fallacy (overgeneralizing from past experience)"
+      }
+    },
+    {
+      "id": "B5",
+      "question": "I can accurately forecast the total demand for my business.",
+      "type": "likert_7",
+      "metadata": {
+        "construct": "illusion_of_control",
+        "reverse_scored": false,
+        "used_in_analysis": true,
+        "note": "Higher scores indicate greater illusion of control"
+      }
+    },
+    {
+      "id": "B6",
+      "question": "I can accurately forecast when larger competitors will enter the market.",
+      "type": "likert_7",
+      "metadata": {
+        "construct": "illusion_of_control",
+        "reverse_scored": false,
+        "used_in_analysis": true,
+        "note": "Higher scores indicate greater illusion of control"
+      }
+    },
+    {
+      "id": "B7",
+      "question": "I can make my business a success, even though others may fail.",
+      "type": "likert_7",
+      "metadata": {
+        "construct": "illusion_of_control",
+        "reverse_scored": false,
+        "used_in_analysis": true,
+        "note": "Higher scores indicate greater illusion of control"
+      }
+    }
+  ]
+}
diff --git a/studies/study_013/source/materials/section_c_overconfidence.json b/studies/study_013/source/materials/section_c_overconfidence.json
new file mode 100644
index 00000000..6107e14e
--- /dev/null
+++ b/studies/study_013/source/materials/section_c_overconfidence.json
@@ -0,0 +1,117 @@
+{
+  "sub_study_id": "section_c_overconfidence",
+  "instructions": "Please answer the following items, by deciding the ranges, which the right answers may be in. You should be 90% certain that the correct answers are in these ranges. If you have absolutely no idea where the answer lies, please fill in the maximum range possible for the question (i.e., 0 to 1,000).",
+  "example": {
+    "question": "What is the total population of Singapore in 1999?",
+    "lower_limit": "3 million",
+    "upper_limit": "4 million"
+  },
+  "items": [
+    {
+      "id": "C1",
+      "question": "What is the number of tourist arrivals to Singapore in 1999 (excluding Malaysian arrival by land)?",
+      "unit": "million",
+      "correct_answer": 6.9582,
+      "type": "confidence_interval",
+      "metadata": {
+        "source": "Yearbook of Statistics Singapore 2000, Table 14.1",
+        "raw_value": 6958200,
+        "raw_unit": "persons"
+      }
+    },
+    {
+      "id": "C2",
+      "question": "What is the total Gross Domestic Product of Singapore in 1999 (at current market price)?",
+      "unit": "S$ billion",
+      "correct_answer": 143.98,
+      "type": "confidence_interval",
+      "metadata": {
+        "source": "Yearbook of Statistics Singapore 2000, Table 5.2",
+        "raw_value": 143981300000,
+        "raw_unit": "S$"
+      }
+    },
+    {
+      "id": "C3",
+      "question": "How many airlines stop over at Singapore Changi Airport?",
+      "unit": "airlines",
+      "correct_answer": 64,
+      "type": "confidence_interval",
+      "metadata": {
+        "source": "Changi Airport Group, Our Story — The 2000s"
+      }
+    },
+    {
+      "id": "C4",
+      "question": "What is the number of private cars (per 1,000 people) here in Singapore in 1999?",
+      "unit": "per 1000",
+      "correct_answer": 114,
+      "type": "confidence_interval",
+      "metadata": {
+        "source": "Yearbook of Statistics Singapore 2000, Table 1.14"
+      }
+    },
+    {
+      "id": "C5",
+      "question": "What is Singapore's unemployment rate in 1999?",
+      "unit": "%",
+      "correct_answer": 3.3,
+      "type": "confidence_interval",
+      "metadata": {
+        "source": "Yearbook of Statistics Singapore 2000, Table 1.10"
+      }
+    },
+    {
+      "id": "C6",
+      "question": "What is the daily newspaper circulation (per 1,000 people) here in Singapore in 1999?",
+      "unit": "per 1000",
+      "correct_answer": 275,
+      "type": "confidence_interval",
+      "metadata": {
+        "source": "Yearbook of Statistics Singapore 2000, Table 1.14 and Table 22.4"
+      }
+    },
+    {
+      "id": "C7",
+      "question": "What is the total vehicle quota (i.e. COE) for the year from May 2000 to April 2001?",
+      "unit": "thousand",
+      "correct_answer": 109.353,
+      "type": "confidence_interval",
+      "metadata": {
+        "source": "LTA Vehicle Quota Tender Results 2000-2004",
+        "raw_value": 109353,
+        "raw_unit": "COEs"
+      }
+    },
+    {
+      "id": "C8",
+      "question": "What is the literacy rate of Singapore citizens aged 15 years and over in 1999?",
+      "unit": "%",
+      "correct_answer": 93.5,
+      "type": "confidence_interval",
+      "metadata": {
+        "source": "Yearbook of Statistics Singapore 2000, Table 1.12"
+      }
+    },
+    {
+      "id": "C9",
+      "question": "What percentage of all residential units in Singapore are HDB flats?",
+      "unit": "%",
+      "correct_answer": 81.1,
+      "type": "confidence_interval",
+      "metadata": {
+        "source": "SingStat Table TS/M400751, end-June 2000"
+      }
+    },
+    {
+      "id": "C10",
+      "question": "What is the prime lending rate (per annum) in Singapore in 1999?",
+      "unit": "%",
+      "correct_answer": 5.80,
+      "type": "confidence_interval",
+      "metadata": {
+        "source": "Yearbook of Statistics Singapore 2000, Table 15.13"
+      }
+    }
+  ]
+}
diff --git a/studies/study_013/source/materials/section_d_case_vignette.json b/studies/study_013/source/materials/section_d_case_vignette.json
new file mode 100644
index 00000000..4e75013b
--- /dev/null
+++ b/studies/study_013/source/materials/section_d_case_vignette.json
@@ -0,0 +1,81 @@
+{
+  "sub_study_id": "section_d_case_vignette",
+  "instructions": "Please answer the following questions after reading the case study.",
+  "vignette_text": "Mr. Tan is a successful manager with four years of experience at a multi-national corporation (MNC). Before that he worked in a medium sized local company for five years. The idea of being his own boss, taking calculated risks, and making a fortune all appeal to him. Hence he is thinking of starting his own business.\n\nHe has an idea for a new business and decides to ask around to see if it is a good idea. He has some very positive feedback from some potential customers and some associates who know the industry well. Mr. Tan does not have the resources to do an in-depth market research to find out whether the business is going to work and published data are too general to be useful. However he feels that there is money to be made based on the positive feedback from potential customers and his associates. He is enthusiastic about starting the business even though he has no experience in this industry or starting his own business.\n\nThere are a few MNCs in the same industry but they have not targeted the market segment that Mr. Tan is aiming for. He feels that the MNCs are likely to move into the market as long as the new business is successful and he will not be able to fend off this major threat. He is unsure whether the market is still growing or matured. If the market has reached maturity, it is likely for a new business to be squeezed out of the market. If the market is still growing, the new business will be able to survive the entry of MNCs into this market segment. He finds out that there are only a few small businesses that are still surviving in the industry.\n\nMr. Tan estimates he will need at least S$150,000 to finance the new business. As he has only S$40,000 in savings, he has to borrow from the bank or find partners to get the rest of the investment funds needed.",
+  "scale": {
+    "min": 1,
+    "max": 7,
+    "labels": {
+      "1": "Strongly Disagree",
+      "7": "Strongly Agree"
+    }
+  },
+  "items": [
+    {
+      "id": "D1",
+      "question": "The overall risk of the business is high.",
+      "type": "likert_7",
+      "metadata": {
+        "construct": "risk_perception"
+      }
+    },
+    {
+      "id": "D2",
+      "question": "The probability of failure is high.",
+      "type": "likert_7",
+      "metadata": {
+        "construct": "risk_perception"
+      }
+    },
+    {
+      "id": "D3",
+      "question": "The founder stands to lose a lot financially.",
+      "type": "likert_7",
+      "metadata": {
+        "construct": "risk_perception"
+      }
+    },
+    {
+      "id": "D4",
+      "question": "There is a lot uncertainty when predicting how well the business will do.",
+      "type": "likert_7",
+      "metadata": {
+        "construct": "risk_perception"
+      }
+    },
+    {
+      "id": "D5",
+      "question": "I will consider this business an opportunity.",
+      "type": "likert_7",
+      "metadata": {
+        "construct": "opportunity_evaluation"
+      }
+    },
+    {
+      "id": "D6",
+      "question": "This business is worth considering.",
+      "type": "likert_7",
+      "metadata": {
+        "construct": "opportunity_evaluation"
+      }
+    },
+    {
+      "id": "D7",
+      "question": "This business is feasible given the situation.",
+      "type": "likert_7",
+      "metadata": {
+        "construct": "opportunity_evaluation"
+      }
+    },
+    {
+      "id": "D8",
+      "question": "State issues that influenced your view on whether Mr. Tan should start the above business or not. (Include whatever additional information you may need to make a better decision)",
+      "type": "open_ended",
+      "metadata": {
+        "construct": "belief_in_small_numbers",
+        "coding_scheme": "Responses requesting broader evidence such as market research, larger samples, statistical data, or surveys indicate statistical reasoning (coded -1). Responses that rely on the vignette as sufficient, including statements that no additional information is needed, indicate belief in the law of small numbers (coded +1).",
+        "note": "This item is optional in the prompt. Blank or explicitly skipped responses are treated as missing during scoring; 'no additional information needed' is a substantive response and should not be treated as missing."
+      }
+    }
+  ]
+}
diff --git a/studies/study_013/source/metadata.json b/studies/study_013/source/metadata.json
new file mode 100644
index 00000000..d0ba1514
--- /dev/null
+++ b/studies/study_013/source/metadata.json
@@ -0,0 +1,85 @@
+{
+  "id": "study_013",
+  "title": "Opportunity Evaluation under Risky Conditions: The Cognitive Processes of Entrepreneurs",
+  "authors": [
+    "Hean Tat Keh",
+    "Maw Der Foo",
+    "Boon Chong Lim"
+  ],
+  "year": 2002,
+  "domain": "entrepreneurship",
+  "subdomain": "entrepreneurial_cognition",
+  "keywords": [
+    "overconfidence",
+    "illusion_of_control",
+    "planning_fallacy",
+    "belief_in_small_numbers",
+    "opportunity_evaluation",
+    "risk_perception",
+    "cognitive_biases",
+    "entrepreneurship",
+    "heuristics"
+  ],
+  "difficulty": "medium",
+  "description": "This study examines how cognitive biases affect entrepreneurs' opportunity evaluation under risky conditions. The benchmark reconstructs the paper's calibration test and its reported regression findings: entrepreneurs are overconfident, risk perception negatively predicts opportunity evaluation, illusion of control lowers risk perception and increases opportunity evaluation before the mediator is introduced, and belief in the law of small numbers increases opportunity evaluation in the pre-mediation model.",
+  "scenarios": [
+    "keh_foo_lim_opportunity_evaluation"
+  ],
+  "findings": [
+    {
+      "finding_id": "F1",
+      "main_hypothesis": "Entrepreneurs are overconfident: the mean number of confidence-interval items where the correct answer falls outside the stated 90% range is significantly greater than 1 (the calibrated baseline).",
+      "weight": 1.0,
+      "tests": [
+        {
+          "test_name": "One-sample t-test (overconfidence score vs. calibrated baseline of 1)",
+          "weight": 1.0
+        }
+      ]
+    },
+    {
+      "finding_id": "F2",
+      "main_hypothesis": "Risk perception negatively predicts opportunity evaluation (H1).",
+      "weight": 1.0,
+      "tests": [
+        {
+          "test_name": "OLS regression (opportunity_evaluation ~ risk_perception)",
+          "weight": 1.0
+        }
+      ]
+    },
+    {
+      "finding_id": "F3",
+      "main_hypothesis": "Illusion of control negatively predicts risk perception when the other cognitive-bias measures, age, and risk propensity are controlled (H5 / Model 1).",
+      "weight": 1.0,
+      "tests": [
+        {
+          "test_name": "OLS regression (risk_perception ~ overconfidence + small_numbers + planning_fallacy + illusion_of_control + risk_propensity + age)",
+          "weight": 1.0
+        }
+      ]
+    },
+    {
+      "finding_id": "F4",
+      "main_hypothesis": "Illusion of control positively predicts opportunity evaluation before the mediator is added, controlling for the other cognitive-bias measures, age, and risk propensity (Model 2).",
+      "weight": 1.0,
+      "tests": [
+        {
+          "test_name": "OLS regression (opportunity_evaluation ~ overconfidence + small_numbers + planning_fallacy + illusion_of_control + risk_propensity + age)",
+          "weight": 1.0
+        }
+      ]
+    },
+    {
+      "finding_id": "F5",
+      "main_hypothesis": "Belief in the law of small numbers positively predicts opportunity evaluation before the mediator is added, controlling for the other cognitive-bias measures, age, and risk propensity (Model 2).",
+      "weight": 1.0,
+      "tests": [
+        {
+          "test_name": "OLS regression (opportunity_evaluation ~ overconfidence + small_numbers + planning_fallacy + illusion_of_control + risk_propensity + age)",
+          "weight": 1.0
+        }
+      ]
+    }
+  ]
+}
diff --git a/studies/study_013/source/specification.json b/studies/study_013/source/specification.json
new file mode 100644
index 00000000..dd972519
--- /dev/null
+++ b/studies/study_013/source/specification.json
@@ -0,0 +1,61 @@
+{
+  "study_id": "study_013",
+  "title": "Opportunity Evaluation under Risky Conditions: The Cognitive Processes of Entrepreneurs",
+  "participants": {
+    "n": 77,
+    "population": "Founders and owners of the top 500 small and medium-sized enterprises (SMEs) in Singapore, as identified by a local business publication",
+    "demographics": {
+      "sex": {"male": 0.97, "female": 0.03},
+      "age": {"less_than_40": 0.222, "40_to_60": 0.715, "more_than_60": 0.063},
+      "race": {"chinese": 0.924, "indian": 0.045, "others": 0.031},
+      "education": {"secondary": 0.061, "postsecondary": 0.864, "primary_and_others": 0.075},
+      "ownership": {"founded": 0.79, "bought_over": 0.21},
+      "business_size": {"less_than_1m": 0.028, "1m_to_25m": 0.486, "25m_to_50m": 0.444, "more_than_50m": 0.042}
+    },
+    "by_sub_study": {
+      "keh_foo_lim_opportunity_evaluation": {
+        "n": 77,
+        "description": "All participants completed the same questionnaire (Sections A-E)."
+      }
+    }
+  },
+  "design": {
+    "type": "Cross-Sectional Survey",
+    "factors": [
+      {
+        "name": "Overconfidence",
+        "type": "measured",
+        "description": "Calibration-based overconfidence measured by 10 confidence-interval estimation items"
+      },
+      {
+        "name": "Illusion of Control",
+        "type": "measured",
+        "description": "Three Likert items measuring perceived ability to forecast and control business outcomes"
+      },
+      {
+        "name": "Planning Fallacy",
+        "type": "measured",
+        "description": "Two Likert items measuring belief that past experience predicts future outcomes"
+      },
+      {
+        "name": "Belief in Small Numbers",
+        "type": "measured",
+        "description": "Coded from the open-ended vignette response: asking for broader market evidence indicates statistical reasoning, while relying on the vignette's limited anecdotal cues indicates belief in the law of small numbers"
+      },
+      {
+        "name": "Risk Propensity",
+        "type": "measured",
+        "description": "Five forced-choice gamble items (control variable)"
+      }
+    ]
+  },
+  "procedure": {
+    "steps": [
+      "Section A: Participants make 5 forced-choice decisions between a risky gamble and a sure payoff (risk propensity measure)",
+      "Section B: Participants rate 7 statements on a 7-point Likert scale measuring cognitive biases (2 filler items, 2 planning fallacy items, 3 illusion of control items)",
+      "Section C: Participants provide lower and upper bounds for 10 factual questions about Singapore, with 90% confidence that the correct answer falls within their stated range (overconfidence measure)",
+      "Section D: Participants read a business case vignette about Mr. Tan considering starting a new business, then rate 4 risk perception items and 3 opportunity evaluation items on 7-point Likert scales, and respond to 1 open-ended question about factors influencing their assessment (belief in small numbers measure)",
+      "Section E: Participants provide demographic information (15 items)"
+    ]
+  }
+}