From a854bb70cac4cf5971f53a17ab0456ae1befd5a6 Mon Sep 17 00:00:00 2001
From: Bissbert <43237892+Bissbert@users.noreply.github.com>
Date: Sat, 14 Feb 2026 23:50:55 +0700
Subject: [PATCH] feat!: add form grouping, labels, and named references (CDL
 v1.3)

Phase 3 of CDL v2 implementation. Adds FormGroup for parenthesized form
grouping with shared features, form labels, @name/$name definitions and
references, flat_forms() backwards-compatible flattening, and 43 new
tests (163 total).

BREAKING CHANGE: CrystalDescription.forms type changed from
list[CrystalForm] to list[FormNode]. Use flat_forms() for
backwards-compatible flat list access.
---
 pyproject.toml                      |   2 +-
 src/cdl_parser/__init__.py          |  19 +-
 src/cdl_parser/constants.py         |  27 ++
 src/cdl_parser/models.py            | 206 ++++++++-
 src/cdl_parser/parser.py            | 341 ++++++++++++++-
 tests/fixtures/v1.3-test-cases.json | 223 ++++++++++
 tests/test_parser.py                | 620 ++++++++++++++++++++++++++++
 7 files changed, 1413 insertions(+), 25 deletions(-)
 create mode 100644 tests/fixtures/v1.3-test-cases.json

diff --git a/pyproject.toml b/pyproject.toml
index b44ec28..c54f843 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "gemmology-cdl-parser"
-version = "1.0.1"
+version = "1.3.0"
 description = "Crystal Description Language (CDL) parser for crystallographic visualization"
 readme = "README.md"
 license = { text = "MIT" }
diff --git a/src/cdl_parser/__init__.py b/src/cdl_parser/__init__.py
index 184d711..8660701 100644
--- a/src/cdl_parser/__init__.py
+++ b/src/cdl_parser/__init__.py
@@ -22,7 +22,7 @@
         cubic[m3m]:{111} | twin(spinel)     # Spinel-law twin
 """
 
-__version__ = "1.0.0"
+__version__ = "1.3.0"
 __author__ = "Fabian Schuh"
 __email__ = "fabian@gemmology.dev"
 
@@ -32,8 +32,10 @@
     ALL_POINT_GROUPS,
     CRYSTAL_SYSTEMS,
     DEFAULT_POINT_GROUPS,
+    FEATURE_NAMES,
     MODIFICATION_TYPES,
     NAMED_FORMS,
+    PHENOMENON_TYPES,
     POINT_GROUPS,
     TWIN_LAWS,
     TWIN_TYPES,
@@ -46,13 +48,18 @@
 from .models import (
     CrystalDescription,
     CrystalForm,
+    Definition,
+    Feature,
+    FormGroup,
+    FormNode,
     MillerIndex,
     Modification,
+    PhenomenonSpec,
     TwinSpec,
 )
 
 # Lexer/Parser internals (for advanced use)
-from .parser import Lexer, Parser, Token, TokenType, parse_cdl, validate_cdl
+from .parser import Lexer, Parser, Token, TokenType, parse_cdl, strip_comments, validate_cdl
 
 __all__ = [
     # Version
@@ -63,8 +70,13 @@
     # Data classes
     "CrystalDescription",
     "CrystalForm",
+    "Definition",
+    "Feature",
+    "FormGroup",
+    "FormNode",
     "MillerIndex",
     "Modification",
+    "PhenomenonSpec",
     "TwinSpec",
     # Exceptions
     "CDLError",
@@ -74,8 +86,10 @@
     "ALL_POINT_GROUPS",
     "CRYSTAL_SYSTEMS",
     "DEFAULT_POINT_GROUPS",
+    "FEATURE_NAMES",
     "MODIFICATION_TYPES",
     "NAMED_FORMS",
+    "PHENOMENON_TYPES",
     "POINT_GROUPS",
     "TWIN_LAWS",
     "TWIN_TYPES",
@@ -84,4 +98,5 @@
     "Parser",
     "Token",
     "TokenType",
+    "strip_comments",
 ]
diff --git a/src/cdl_parser/constants.py b/src/cdl_parser/constants.py
index d324fae..6d66c1b 100644
--- a/src/cdl_parser/constants.py
+++ b/src/cdl_parser/constants.py
@@ -120,6 +120,7 @@
     "truncate",  # Cut off corners/edges
     "taper",  # Make narrower in one direction
     "bevel",  # Add beveled edges
+    "flatten",  # Compress along an axis
 }
 
 # =============================================================================
@@ -131,3 +132,29 @@
     "penetration",  # Penetration twin (interpenetrating)
     "cyclic",  # Cyclic twin (multiple individuals)
 }
+
+# =============================================================================
+# Feature Names (CDL v1.2)
+# =============================================================================
+
+FEATURE_NAMES: set[str] = {
+    # Growth features
+    "phantom", "sector", "zoning", "skeletal", "dendritic",
+    # Surface features
+    "striation", "trigon", "etch_pit", "growth_hillock",
+    # Inclusion features
+    "inclusion", "needle", "silk", "fluid", "bubble",
+    # Color features
+    "colour", "colour_zone", "pleochroism",
+    # Other
+    "lamellar", "banding",
+}
+
+# =============================================================================
+# Phenomenon Types (CDL v1.2)
+# =============================================================================
+
+PHENOMENON_TYPES: set[str] = {
+    "asterism", "chatoyancy", "adularescence", "labradorescence",
+    "play_of_color", "colour_change", "aventurescence", "iridescence",
+}
diff --git a/src/cdl_parser/models.py b/src/cdl_parser/models.py
index 3117971..2a5078c 100644
--- a/src/cdl_parser/models.py
+++ b/src/cdl_parser/models.py
@@ -4,8 +4,10 @@
 Data classes representing Crystal Description Language components.
 """
 
+from __future__ import annotations
+
 from dataclasses import dataclass, field
-from typing import Any
+from typing import Any, Union
 
 
 @dataclass
@@ -62,6 +64,27 @@ def __repr__(self) -> str:
         return f"MillerIndex({self.h}, {self.k}, {self.l})"
 
 
+@dataclass
+class Feature:
+    """A crystal feature annotation.
+
+    Describes growth patterns, surface markings, inclusions, or color properties.
+
+    Attributes:
+        name: Feature type ('phantom', 'trigon', 'silk', 'colour', etc.)
+        values: List of feature values (numbers, identifiers, color specs)
+    """
+
+    name: str
+    values: list[int | float | str] = field(default_factory=list)
+
+    def __str__(self) -> str:
+        if self.values:
+            val_str = ", ".join(str(v) for v in self.values)
+            return f"{self.name}:{val_str}"
+        return self.name
+
+
 @dataclass
 class CrystalForm:
     """A crystal form with Miller index and scale.
@@ -73,6 +96,8 @@ class CrystalForm:
         miller: The Miller index defining the form
         scale: Distance scale (default 1.0, larger = more truncated)
         name: Original name if using named form (e.g., 'octahedron')
+        features: Optional list of feature annotations
+        label: Optional label for the form (e.g., 'prism' in prism:{10-10})
 
     Examples:
         >>> CrystalForm(MillerIndex(1, 1, 1), scale=1.0)
@@ -82,16 +107,61 @@ class CrystalForm:
     miller: MillerIndex
     scale: float = 1.0
     name: str | None = None  # Original name if using named form
+    features: list[Feature] | None = None  # Per-form features [phantom:3]
+    label: str | None = None  # Form label (v1.3)
 
     def __str__(self) -> str:
         s = str(self.miller)
         if self.name:
             s = f"{self.name}={s}"
+        if self.label:
+            s = f"{self.label}:{s}"
         if self.scale != 1.0:
             s += f"@{self.scale}"
+        if self.features:
+            feat_str = ", ".join(str(f) for f in self.features)
+            s += f"[{feat_str}]"
+        return s
+
+
+@dataclass
+class FormGroup:
+    """A group of forms with optional shared features and label.
+
+    Represents parenthesized form groups: (form + form)[shared_features]
+    """
+
+    forms: list[FormNode]
+    features: list[Feature] | None = None
+    label: str | None = None
+
+    def __str__(self) -> str:
+        form_strs = [str(f) for f in self.forms]
+        s = "(" + " + ".join(form_strs) + ")"
+        if self.label:
+            s = f"{self.label}:{s}"
+        if self.features:
+            feat_str = ", ".join(str(f) for f in self.features)
+            s += f"[{feat_str}]"
         return s
 
 
+# Type alias for form tree nodes
+FormNode = Union[CrystalForm, FormGroup]
+
+
+@dataclass
+class Definition:
+    """A named definition: @name = expression"""
+
+    name: str
+    body: list[FormNode]
+
+    def __str__(self) -> str:
+        body_str = " + ".join(str(f) for f in self.body)
+        return f"@{self.name} = {body_str}"
+
+
 @dataclass
 class Modification:
     """A morphological modification.
@@ -99,7 +169,7 @@ class Modification:
     Represents transformations applied to the crystal shape.
 
     Attributes:
-        type: Modification type ('elongate', 'truncate', 'taper', 'bevel')
+        type: Modification type ('elongate', 'truncate', 'taper', 'bevel', 'flatten')
         params: Parameters specific to the modification type
 
     Examples:
@@ -107,7 +177,7 @@ class Modification:
         >>> Modification('truncate', {'form': MillerIndex(1,0,0), 'depth': 0.3})
     """
 
-    type: str  # elongate, truncate, taper, bevel
+    type: str  # elongate, truncate, taper, bevel, flatten
     params: dict[str, Any] = field(default_factory=dict)
 
     def __str__(self) -> str:
@@ -148,6 +218,52 @@ def __str__(self) -> str:
         return f"twin({self.axis},{self.angle},{self.twin_type})"
 
 
+@dataclass
+class PhenomenonSpec:
+    """Optical phenomenon specification.
+
+    Attributes:
+        type: Phenomenon type ('asterism', 'chatoyancy', 'adularescence', etc.)
+        params: Dict of parameters (e.g. {'rays': 6, 'intensity': 'strong'})
+    """
+
+    type: str
+    params: dict[str, int | float | str] = field(default_factory=dict)
+
+    def __str__(self) -> str:
+        parts = [self.type]
+        for k, v in self.params.items():
+            parts.append(f"{k}:{v}")
+        return "phenomenon[" + ", ".join(parts) + "]"
+
+
+def _form_node_to_dict(node: FormNode) -> dict[str, Any]:
+    """Convert a FormNode to dictionary representation."""
+    if isinstance(node, CrystalForm):
+        return {
+            "type": "form",
+            "miller": node.miller.as_tuple(),
+            "scale": node.scale,
+            "name": node.name,
+            "label": node.label,
+            "features": [
+                {"name": feat.name, "values": feat.values}
+                for feat in node.features
+            ] if node.features else None,
+        }
+    elif isinstance(node, FormGroup):
+        return {
+            "type": "group",
+            "forms": [_form_node_to_dict(f) for f in node.forms],
+            "label": node.label,
+            "features": [
+                {"name": feat.name, "values": feat.values}
+                for feat in node.features
+            ] if node.features else None,
+        }
+    return {}
+
+
 @dataclass
 class CrystalDescription:
     """Complete crystal description parsed from CDL.
@@ -158,9 +274,10 @@ class CrystalDescription:
     Attributes:
         system: Crystal system ('cubic', 'hexagonal', etc.)
         point_group: Hermann-Mauguin point group symbol ('m3m', '6/mmm', etc.)
-        forms: List of crystal forms with their scales
+        forms: List of form nodes (CrystalForm or FormGroup)
         modifications: List of morphological modifications
         twin: Optional twin specification
+        definitions: Optional list of named definitions
 
     Examples:
         >>> desc = parse_cdl("cubic[m3m]:{111}@1.0 + {100}@1.3")
@@ -172,15 +289,34 @@ class CrystalDescription:
 
     system: str
     point_group: str
-    forms: list[CrystalForm] = field(default_factory=list)
+    forms: list[FormNode] = field(default_factory=list)
     modifications: list[Modification] = field(default_factory=list)
     twin: TwinSpec | None = None
+    phenomenon: PhenomenonSpec | None = None
+    doc_comments: list[str] | None = None
+    definitions: list[Definition] | None = None
+
+    def flat_forms(self) -> list[CrystalForm]:
+        """Get a flat list of all CrystalForm objects (backwards compat).
+
+        Recursively traverses FormGroup nodes to extract all CrystalForm leaves.
+        Features from parent FormGroups are merged into child forms.
+        """
+        result: list[CrystalForm] = []
+        for node in self.forms:
+            result.extend(_flatten_node(node))
+        return result
 
     def __str__(self) -> str:
         parts = [f"{self.system}[{self.point_group}]"]
 
-        # Forms
-        form_strs = [str(f.miller) + (f"@{f.scale}" if f.scale != 1.0 else "") for f in self.forms]
+        # Definitions
+        if self.definitions:
+            def_strs = [str(d) for d in self.definitions]
+            parts = def_strs + parts
+
+        # Forms (including features)
+        form_strs = [str(f) for f in self.forms]
         parts.append(":" + " + ".join(form_strs))
 
         # Modifications
@@ -192,6 +328,10 @@ def __str__(self) -> str:
         if self.twin:
             parts.append(" | " + str(self.twin))
 
+        # Phenomenon
+        if self.phenomenon:
+            parts.append(" | " + str(self.phenomenon))
+
         return "".join(parts)
 
     def to_dict(self) -> dict[str, Any]:
@@ -199,9 +339,19 @@ def to_dict(self) -> dict[str, Any]:
         return {
             "system": self.system,
             "point_group": self.point_group,
-            "forms": [
-                {"miller": f.miller.as_tuple(), "scale": f.scale, "name": f.name}
-                for f in self.forms
+            "forms": [_form_node_to_dict(f) for f in self.forms],
+            "flat_forms": [
+                {
+                    "miller": f.miller.as_tuple(),
+                    "scale": f.scale,
+                    "name": f.name,
+                    "label": f.label,
+                    "features": [
+                        {"name": feat.name, "values": feat.values}
+                        for feat in f.features
+                    ] if f.features else None,
+                }
+                for f in self.flat_forms()
             ],
             "modifications": [{"type": m.type, "params": m.params} for m in self.modifications],
             "twin": {
@@ -213,4 +363,40 @@ def to_dict(self) -> dict[str, Any]:
             }
             if self.twin
             else None,
+            "phenomenon": {
+                "type": self.phenomenon.type,
+                "params": self.phenomenon.params,
+            }
+            if self.phenomenon
+            else None,
+            "doc_comments": self.doc_comments,
+            "definitions": [
+                {"name": d.name, "body": [_form_node_to_dict(f) for f in d.body]}
+                for d in self.definitions
+            ] if self.definitions else None,
         }
+
+
+def _flatten_node(
+    node: FormNode, parent_features: list[Feature] | None = None
+) -> list[CrystalForm]:
+    """Recursively flatten a FormNode into a list of CrystalForms."""
+    if isinstance(node, CrystalForm):
+        if parent_features:
+            merged = list(parent_features)
+            if node.features:
+                merged.extend(node.features)
+            return [CrystalForm(
+                miller=node.miller, scale=node.scale,
+                name=node.name, features=merged, label=node.label,
+            )]
+        return [node]
+    elif isinstance(node, FormGroup):
+        combined_features = list(parent_features) if parent_features else []
+        if node.features:
+            combined_features.extend(node.features)
+        result: list[CrystalForm] = []
+        for child in node.forms:
+            result.extend(_flatten_node(child, combined_features if combined_features else None))
+        return result
+    return []
diff --git a/src/cdl_parser/parser.py b/src/cdl_parser/parser.py
index 179e272..c7c935c 100644
--- a/src/cdl_parser/parser.py
+++ b/src/cdl_parser/parser.py
@@ -4,6 +4,7 @@
 Lexer and parser for Crystal Description Language strings.
 """
 
+import re
 from dataclasses import dataclass
 from enum import Enum
 from typing import Any
@@ -20,11 +21,55 @@
 from .models import (
     CrystalDescription,
     CrystalForm,
+    Definition,
+    Feature,
+    FormGroup,
+    FormNode,
     MillerIndex,
     Modification,
+    PhenomenonSpec,
     TwinSpec,
 )
 
+
+def strip_comments(text: str) -> tuple[str, list[str]]:
+    """Strip comments from CDL text before lexing.
+
+    Extracts doc comments (#! Key: Value) and removes block (/* ... */)
+    and line (# ...) comments.
+
+    Args:
+        text: Raw CDL string possibly containing comments.
+
+    Returns:
+        Tuple of (cleaned text with comments removed, list of doc comment strings).
+    """
+    doc_comments: list[str] = []
+
+    # Extract doc comments (#! ...) before stripping anything else.
+    # Process line-by-line so we can identify #! lines.
+    lines = text.split("\n")
+    processed_lines: list[str] = []
+    for line in lines:
+        stripped = line.lstrip()
+        if stripped.startswith("#!"):
+            # Doc comment — capture the content after "#! " or "#!"
+            content = stripped[2:].strip()
+            doc_comments.append(content)
+            # Don't include this line in the CDL text
+        else:
+            processed_lines.append(line)
+
+    text = "\n".join(processed_lines)
+
+    # Strip block comments (/* ... */), which may span multiple lines
+    text = re.sub(r"/\*.*?\*/", "", text, flags=re.DOTALL)
+
+    # Strip line comments (# to end of line)
+    text = re.sub(r"#[^\n]*", "", text)
+
+    return text, doc_comments
+
 # =============================================================================
 # Token Types
 # =============================================================================
@@ -49,6 +94,8 @@ class TokenType(Enum):
     INTEGER = "INTEGER"
     FLOAT = "FLOAT"
     IDENTIFIER = "IDENTIFIER"
+    DOLLAR = "DOLLAR"
+    EQUALS = "EQUALS"
     EOF = "EOF"
 
 
@@ -157,6 +204,8 @@ def next_token(self) -> Token:
             ",": TokenType.COMMA,
             "(": TokenType.LPAREN,
             ")": TokenType.RPAREN,
+            "$": TokenType.DOLLAR,
+            "=": TokenType.EQUALS,
         }
 
         if ch in single_char_tokens:
@@ -213,6 +262,86 @@ def tokenize(self) -> list[Token]:
         return tokens
 
 
+# =============================================================================
+# Definition Pre-processing
+# =============================================================================
+
+
+def _preprocess_definitions(text: str) -> tuple[str, list[tuple[str, str]]]:
+    """Extract @name = expression definitions and resolve $name references.
+
+    Args:
+        text: Comment-stripped CDL text (may be multi-line).
+
+    Returns:
+        Tuple of (resolved CDL body text, list of (name, raw_body) definition pairs).
+    """
+    lines = text.split("\n")
+    definitions: list[tuple[str, str]] = []  # (name, raw_body)
+    body_lines: list[str] = []
+
+    # First pass: extract definition lines
+    for line in lines:
+        stripped = line.strip()
+        if stripped.startswith("@"):
+            # Parse @name = expression
+            match = re.match(r"@(\w+)\s*=\s*(.+)", stripped)
+            if match:
+                name = match.group(1)
+                body = match.group(2).strip()
+                definitions.append((name, body))
+                continue
+        body_lines.append(line)
+
+    # Build name -> body mapping, resolving forward references within definitions
+    resolved: dict[str, str] = {}
+    for name, body in definitions:
+        # Resolve $references within this definition body
+        resolved_body = body
+        for prev_name, prev_body in resolved.items():
+            resolved_body = re.sub(r"\$" + prev_name + r"(?!\w)", prev_body, resolved_body)
+        resolved[name] = resolved_body
+
+    # Second pass: resolve $references in the main body
+    body_text = "\n".join(body_lines)
+    for name, resolved_body in resolved.items():
+        body_text = re.sub(r"\$" + name + r"(?!\w)", resolved_body, body_text)
+
+    # Check for unresolved $references
+    unresolved = re.findall(r"\$(\w+)", body_text)
+    if unresolved:
+        raise ParseError(f"Undefined reference: ${unresolved[0]}", position=-1)
+
+    return body_text, definitions
+
+
+def _parse_definition_bodies(
+    definitions: list[tuple[str, str]],
+) -> list[Definition]:
+    """Parse raw definition bodies into Definition objects.
+
+    Each definition body is parsed as a form list.
+    """
+    result: list[Definition] = []
+    resolved_bodies: dict[str, str] = {}
+
+    for name, raw_body in definitions:
+        # Resolve references within this body
+        body = raw_body
+        for prev_name, prev_resolved in resolved_bodies.items():
+            body = re.sub(r"\$" + prev_name + r"(?!\w)", prev_resolved, body)
+        resolved_bodies[name] = body
+
+        # Parse the resolved body as a form list
+        lexer = Lexer(body)
+        tokens = lexer.tokenize()
+        parser = Parser(tokens)
+        forms = parser._parse_form_list()
+        result.append(Definition(name=name, body=forms))
+
+    return result
+
+
 # =============================================================================
 # Parser
 # =============================================================================
@@ -283,12 +412,14 @@ def parse(self) -> CrystalDescription:
         modifications = []
         if self._current().type == TokenType.PIPE:
             self._advance()  # consume |
-            # Check if it's modifications or twin
+            # Check if it's modifications, twin, or phenomenon
             if self._current().type == TokenType.IDENTIFIER:
                 ident = self._current().value.lower()
                 if ident == "twin":
                     pass  # It's a twin, not modifications
-                elif ident in {"elongate", "truncate", "taper", "bevel"}:
+                elif ident == "phenomenon":
+                    pass  # It's a phenomenon, not modifications
+                elif ident in {"elongate", "truncate", "taper", "bevel", "flatten"}:
                     modifications = self._parse_modifications()
 
         # Parse optional twin
@@ -298,26 +429,88 @@ def parse(self) -> CrystalDescription:
         if self._current().type == TokenType.IDENTIFIER and self._current().value.lower() == "twin":
             twin = self._parse_twin()
 
+        # Parse optional phenomenon
+        phenomenon = None
+        if self._current().type == TokenType.PIPE:
+            self._advance()  # consume |
+        if self._current().type == TokenType.IDENTIFIER and self._current().value.lower() == "phenomenon":
+            phenomenon = self._parse_phenomenon()
+
         return CrystalDescription(
             system=system,
             point_group=point_group,
             forms=forms,
             modifications=modifications,
             twin=twin,
+            phenomenon=phenomenon,
         )
 
-    def _parse_form_list(self) -> list[CrystalForm]:
-        """Parse form_list = form ('+' form)*"""
-        forms = [self._parse_form()]
+    def _parse_form_list(self) -> list[FormNode]:
+        """Parse form_list = form_or_group ('+' form_or_group)*"""
+        forms: list[FormNode] = [self._parse_form_or_group()]
 
         while self._current().type == TokenType.PLUS:
             self._advance()  # consume +
-            forms.append(self._parse_form())
+            forms.append(self._parse_form_or_group())
 
         return forms
 
-    def _parse_form(self) -> CrystalForm:
-        """Parse form = (form_name | miller_index) ['@' scale]"""
+    def _parse_form_or_group(self) -> FormNode:
+        """Parse either a parenthesized group or a single form.
+
+        Handles:
+        - (form + form)[features] - group
+        - label:(form + form)[features] - labeled group
+        - label:{hkl}@scale[features] - labeled form
+        - {hkl}@scale[features] - plain form
+        - named_form@scale[features] - named form
+        """
+        label = None
+
+        # Check for label: identifier followed by COLON, then LBRACE or LPAREN
+        if self._current().type == TokenType.IDENTIFIER:
+            ident = self._current().value
+            if self._peek().type == TokenType.COLON:
+                # Look at what follows the colon
+                after_colon = self._peek(2)
+                if after_colon.type == TokenType.LPAREN:
+                    # label:(group)
+                    label = ident
+                    self._advance()  # consume identifier
+                    self._advance()  # consume colon
+                elif after_colon.type == TokenType.LBRACE:
+                    # Could be label:{hkl} - but only if identifier is NOT a named form
+                    # If it IS a named form, we'd need a different syntax.
+                    # Named forms use: octahedron (no colon) - so label:{hkl} is unambiguous
+                    # when the identifier is NOT a known crystal system
+                    ident_lower = ident.lower()
+                    if ident_lower not in NAMED_FORMS:
+                        label = ident
+                        self._advance()  # consume identifier
+                        self._advance()  # consume colon
+
+        if self._current().type == TokenType.LPAREN:
+            return self._parse_group(label)
+        else:
+            return self._parse_form(label)
+
+    def _parse_group(self, label: str | None = None) -> FormGroup:
+        """Parse a parenthesized group: (form + form)[features]"""
+        self._advance()  # consume (
+
+        forms = self._parse_form_list()
+
+        self._expect(TokenType.RPAREN)
+
+        # Optional features
+        features = None
+        if self._current().type == TokenType.LBRACKET:
+            features = self._parse_features()
+
+        return FormGroup(forms=forms, features=features, label=label)
+
+    def _parse_form(self, label: str | None = None) -> CrystalForm:
+        """Parse form = (form_name | miller_index) ['@' scale] ['[' features ']']"""
         name = None
         miller = None
 
@@ -349,7 +542,12 @@ def _parse_form(self) -> CrystalForm:
             else:
                 raise ParseError("Expected scale value after @", position=scale_token.position)
 
-        return CrystalForm(miller=miller, scale=scale, name=name)
+        # Optional features [feature:value, ...]
+        features = None
+        if self._current().type == TokenType.LBRACKET:
+            features = self._parse_features()
+
+        return CrystalForm(miller=miller, scale=scale, name=name, features=features, label=label)
 
     def _parse_miller_index(self) -> MillerIndex:
         """Parse Miller index {hkl} or {hkil}.
@@ -415,7 +613,7 @@ def _parse_modification(self) -> Modification:
         mod_token = self._current()
         mod_type = self._expect(TokenType.IDENTIFIER).value.lower()
 
-        if mod_type not in {"elongate", "truncate", "taper", "bevel"}:
+        if mod_type not in {"elongate", "truncate", "taper", "bevel", "flatten"}:
             raise ParseError(f"Unknown modification type: {mod_type}", position=mod_token.position)
 
         self._expect(TokenType.LPAREN)
@@ -450,6 +648,12 @@ def _parse_modification(self) -> Modification:
             self._expect(TokenType.COLON)
             width = self._parse_number()
             params = {"edges": edges, "width": width}
+        elif mod_type == "flatten":
+            # flatten(axis:ratio)
+            axis = self._expect(TokenType.IDENTIFIER).value.lower()
+            self._expect(TokenType.COLON)
+            ratio = self._parse_number()
+            params = {"axis": axis, "ratio": ratio}
 
         self._expect(TokenType.RPAREN)
 
@@ -499,6 +703,102 @@ def _parse_twin(self) -> TwinSpec:
 
         return TwinSpec(law=law, axis=axis, angle=angle, twin_type=twin_type, count=count)
 
+    def _parse_features(self) -> list[Feature]:
+        """Parse feature list [name:value, name:value, ...]"""
+        self._advance()  # consume [
+        features = []
+
+        while self._current().type != TokenType.RBRACKET and self._current().type != TokenType.EOF:
+            # Parse feature name
+            name_token = self._expect(TokenType.IDENTIFIER)
+            name = name_token.value.lower()
+
+            # Expect colon
+            self._expect(TokenType.COLON)
+
+            # Parse values until comma or ]
+            values: list[int | float | str] = []
+            values.append(self._parse_feature_value())
+
+            # Check for more values separated by comma
+            # But distinguish "next value" from "next feature"
+            # Next feature = IDENTIFIER followed by COLON
+            while self._current().type == TokenType.COMMA:
+                next_tok = self._peek(1)
+                next_next = self._peek(2)
+                if next_tok.type == TokenType.IDENTIFIER and next_next.type == TokenType.COLON:
+                    break  # It's a new feature
+                self._advance()  # consume comma
+                values.append(self._parse_feature_value())
+
+            features.append(Feature(name=name, values=values))
+
+            # Consume comma between features
+            if self._current().type == TokenType.COMMA:
+                self._advance()
+
+        self._expect(TokenType.RBRACKET)
+        return features
+
+    def _parse_feature_value(self) -> int | float | str:
+        """Parse a single feature value (number or identifier)."""
+        token = self._current()
+        if token.type == TokenType.INTEGER:
+            return int(self._advance().value)
+        elif token.type == TokenType.FLOAT:
+            return float(self._advance().value)
+        elif token.type == TokenType.IDENTIFIER:
+            return self._advance().value.lower()
+        elif token.type == TokenType.POINT_GROUP:
+            # Handle numeric point groups like '1', '3' as values
+            value = token.value
+            try:
+                result = int(value)
+                self._advance()
+                return result
+            except ValueError:
+                pass
+            return self._advance().value
+        raise ParseError("Expected feature value", position=token.position)
+
+    def _parse_phenomenon(self) -> PhenomenonSpec:
+        """Parse phenomenon[type:value, param:value, ...]"""
+        self._expect(TokenType.IDENTIFIER)  # consume 'phenomenon'
+        self._expect(TokenType.LBRACKET)
+
+        # First token is the phenomenon type
+        phen_type = self._expect(TokenType.IDENTIFIER).value.lower()
+
+        params: dict[str, int | float | str] = {}
+
+        # Check for :value after type (e.g., asterism:6)
+        if self._current().type == TokenType.COLON:
+            self._advance()
+            val = self._parse_feature_value()
+            # Store as the primary value
+            if isinstance(val, (int, float)):
+                params["value"] = val
+            else:
+                params["intensity"] = val
+
+        # Parse additional comma-separated params
+        while self._current().type == TokenType.COMMA:
+            self._advance()
+            if self._current().type == TokenType.IDENTIFIER:
+                key = self._advance().value.lower()
+                if self._current().type == TokenType.COLON:
+                    self._advance()
+                    params[key] = self._parse_feature_value()
+                else:
+                    # Bare identifier value
+                    params[key] = True
+            elif self._current().type in (TokenType.INTEGER, TokenType.FLOAT, TokenType.POINT_GROUP):
+                val = self._parse_feature_value()
+                params["value"] = val
+
+        self._expect(TokenType.RBRACKET)
+        return PhenomenonSpec(type=phen_type, params=params)
+
     def _parse_number(self) -> float:
         """Parse a number (int or float).
 
@@ -571,10 +871,27 @@ def parse_cdl(text: str) -> CrystalDescription:
         >>> desc.forms[0].miller.i
         -1
     """
-    lexer = Lexer(text)
+    cleaned, doc_comments = strip_comments(text)
+    cleaned = cleaned.strip()
+    if not cleaned:
+        raise ParseError("Empty CDL string after stripping comments", position=0)
+
+    # Pre-process definitions (@name = expression) and resolve $references
+    body_text, raw_definitions = _preprocess_definitions(cleaned)
+    body_text = body_text.strip()
+    if not body_text:
+        raise ParseError("Empty CDL string after extracting definitions", position=0)
+
+    # Parse definition bodies into Definition objects
+    definitions = _parse_definition_bodies(raw_definitions) if raw_definitions else None
+
+    lexer = Lexer(body_text)
     tokens = lexer.tokenize()
     parser = Parser(tokens)
-    return parser.parse()
+    desc = parser.parse()
+    desc.doc_comments = doc_comments if doc_comments else None
+    desc.definitions = definitions
+    return desc
 
 
 def validate_cdl(text: str) -> tuple[bool, str | None]:
diff --git a/tests/fixtures/v1.3-test-cases.json b/tests/fixtures/v1.3-test-cases.json
new file mode 100644
index 0000000..405384d
--- /dev/null
+++ b/tests/fixtures/v1.3-test-cases.json
@@ -0,0 +1,223 @@
+{
+  "version": "1.3.0",
+  "description": "CDL v1.3 test cases for JS parser parity testing",
+  "test_cases": [
+    {
+      "name": "simple_group",
+      "cdl": "cubic[m3m]:({111} + {100})",
+      "expected": {
+        "system": "cubic",
+        "point_group": "m3m",
+        "form_count": 1,
+        "flat_form_count": 2,
+        "has_groups": true,
+        "definitions_count": 0
+      }
+    },
+    {
+      "name": "group_with_features",
+      "cdl": "cubic[m3m]:({111}@1.0 + {100}@1.3)[phantom:3]",
+      "expected": {
+        "system": "cubic",
+        "point_group": "m3m",
+        "form_count": 1,
+        "flat_form_count": 2,
+        "has_groups": true,
+        "definitions_count": 0,
+        "flat_forms_all_have_feature": "phantom"
+      }
+    },
+    {
+      "name": "group_plus_form",
+      "cdl": "cubic[m3m]:({111} + {100})[phantom:3] + {110}@0.8",
+      "expected": {
+        "system": "cubic",
+        "point_group": "m3m",
+        "form_count": 2,
+        "flat_form_count": 3,
+        "has_groups": true,
+        "definitions_count": 0
+      }
+    },
+    {
+      "name": "nested_group",
+      "cdl": "cubic[m3m]:(({111}) + {100})",
+      "expected": {
+        "system": "cubic",
+        "point_group": "m3m",
+        "form_count": 1,
+        "flat_form_count": 2,
+        "has_groups": true,
+        "definitions_count": 0
+      }
+    },
+    {
+      "name": "labeled_forms",
+      "cdl": "cubic[m3m]:core:{111}@1.0 + rim:{100}@1.3",
+      "expected": {
+        "system": "cubic",
+        "point_group": "m3m",
+        "form_count": 2,
+        "flat_form_count": 2,
+        "has_groups": false,
+        "definitions_count": 0,
+        "flat_forms": [
+          {"label": "core", "miller": [1, 1, 1], "scale": 1.0},
+          {"label": "rim", "miller": [1, 0, 0], "scale": 1.3}
+        ]
+      }
+    },
+    {
+      "name": "labeled_group",
+      "cdl": "cubic[m3m]:core:({111} + {100})[phantom:3]",
+      "expected": {
+        "system": "cubic",
+        "point_group": "m3m",
+        "form_count": 1,
+        "flat_form_count": 2,
+        "has_groups": true,
+        "group_label": "core",
+        "definitions_count": 0
+      }
+    },
+    {
+      "name": "simple_definition",
+      "cdl": "@oct = {111}@1.0\ncubic[m3m]:$oct + {100}@1.3",
+      "expected": {
+        "system": "cubic",
+        "point_group": "m3m",
+        "form_count": 2,
+        "flat_form_count": 2,
+        "has_groups": false,
+        "definitions_count": 1,
+        "flat_forms": [
+          {"miller": [1, 1, 1], "scale": 1.0},
+          {"miller": [1, 0, 0], "scale": 1.3}
+        ]
+      }
+    },
+    {
+      "name": "multiple_definitions",
+      "cdl": "@prism = {10-10}@1.0\n@rhomb = {10-11}@0.8\ntrigonal[-3m]:$prism + $rhomb",
+      "expected": {
+        "system": "trigonal",
+        "point_group": "-3m",
+        "form_count": 2,
+        "flat_form_count": 2,
+        "has_groups": false,
+        "definitions_count": 2
+      }
+    },
+    {
+      "name": "definition_referencing_definition",
+      "cdl": "@a = {111}@1.0\n@b = {100}@1.3\n@combo = $a + $b\ncubic[m3m]:$combo",
+      "expected": {
+        "system": "cubic",
+        "point_group": "m3m",
+        "form_count": 2,
+        "flat_form_count": 2,
+        "has_groups": false,
+        "definitions_count": 3
+      }
+    },
+    {
+      "name": "undefined_reference",
+      "cdl": "cubic[m3m]:$unknown",
+      "expected": {
+        "error": true,
+        "error_type": "ParseError"
+      }
+    },
+    {
+      "name": "v1_simple_octahedron",
+      "cdl": "cubic[m3m]:{111}",
+      "expected": {
+        "system": "cubic",
+        "point_group": "m3m",
+        "form_count": 1,
+        "flat_form_count": 1,
+        "has_groups": false,
+        "definitions_count": 0
+      }
+    },
+    {
+      "name": "v1_truncated_octahedron",
+      "cdl": "cubic[m3m]:{111}@1.0 + {100}@1.3",
+      "expected": {
+        "system": "cubic",
+        "point_group": "m3m",
+        "form_count": 2,
+        "flat_form_count": 2,
+        "has_groups": false,
+        "definitions_count": 0
+      }
+    },
+    {
+      "name": "v1_with_features",
+      "cdl": "cubic[m3m]:{111}@1.0[phantom:3]",
+      "expected": {
+        "system": "cubic",
+        "point_group": "m3m",
+        "form_count": 1,
+        "flat_form_count": 1,
+        "has_groups": false,
+        "definitions_count": 0,
+        "flat_forms": [
+          {"miller": [1, 1, 1], "scale": 1.0, "features": [{"name": "phantom", "values": [3]}]}
+        ]
+      }
+    },
+    {
+      "name": "v1_with_twin",
+      "cdl": "cubic[m3m]:{111} | twin(spinel)",
+      "expected": {
+        "system": "cubic",
+        "point_group": "m3m",
+        "form_count": 1,
+        "flat_form_count": 1,
+        "has_groups": false,
+        "definitions_count": 0,
+        "has_twin": true,
+        "twin_law": "spinel"
+      }
+    },
+    {
+      "name": "group_with_form_features_and_group_features",
+      "cdl": "cubic[m3m]:({111}[trigon:dense] + {100})[phantom:3]",
+      "expected": {
+        "system": "cubic",
+        "point_group": "m3m",
+        "form_count": 1,
+        "flat_form_count": 2,
+        "has_groups": true,
+        "definitions_count": 0,
+        "note": "First flat form gets phantom+trigon, second gets phantom only"
+      }
+    },
+    {
+      "name": "definitions_with_comments",
+      "cdl": "# Comment\n@oct = {111}@1.0\n# Another comment\ncubic[m3m]:$oct",
+      "expected": {
+        "system": "cubic",
+        "point_group": "m3m",
+        "form_count": 1,
+        "flat_form_count": 1,
+        "has_groups": false,
+        "definitions_count": 1
+      }
+    },
+    {
+      "name": "definition_with_doc_comments",
+      "cdl": "#! Mineral: Diamond\n@oct = {111}@1.0\ncubic[m3m]:$oct",
+      "expected": {
+        "system": "cubic",
+        "point_group": "m3m",
+        "form_count": 1,
+        "flat_form_count": 1,
+        "has_groups": false,
+        "definitions_count": 1,
+        "doc_comments": ["Mineral: Diamond"]
+      }
+    }
+  ]
+}
diff --git a/tests/test_parser.py b/tests/test_parser.py
index b95c739..843163a 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -12,8 +12,13 @@
     POINT_GROUPS,
     CrystalDescription,
     CrystalForm,
+    Definition,
+    Feature,
+    FormGroup,
+    FormNode,
     MillerIndex,
     ParseError,
+    PhenomenonSpec,
     ValidationError,
     parse_cdl,
     validate_cdl,
@@ -524,3 +529,618 @@ def test_fluorite_twin_cdl(self):
         desc = parse_cdl("cubic[m3m]:{111} | twin(fluorite)")
         assert desc.twin is not None
         assert desc.twin.law == "fluorite"
+
+
+# =============================================================================
+# Comment Tests
+# =============================================================================
+
+
+class TestComments:
+    """Test CDL comment stripping and doc comment extraction."""
+
+    def test_line_comment_at_start(self):
+        """Line comment before CDL is stripped."""
+        desc = parse_cdl("# comment\ncubic[m3m]:{111}")
+        assert desc.system == "cubic"
+        assert desc.doc_comments is None
+
+    def test_inline_comment(self):
+        """Inline comment after CDL is stripped."""
+        desc = parse_cdl("cubic[m3m]:{111} # octahedron")
+        assert desc.system == "cubic"
+        assert desc.forms[0].miller.as_tuple() == (1, 1, 1)
+
+    def test_block_comment(self):
+        """Block comment is stripped."""
+        desc = parse_cdl("/* block */cubic[m3m]:{111}")
+        assert desc.system == "cubic"
+
+    def test_multiline_block_comment(self):
+        """Multi-line block comment is stripped."""
+        desc = parse_cdl("/* multi\nline */\ncubic[m3m]:{111}")
+        assert desc.system == "cubic"
+
+    def test_doc_comment(self):
+        """Doc comment (#!) is extracted."""
+        desc = parse_cdl("#! Mineral: Diamond\ncubic[m3m]:{111}")
+        assert desc.doc_comments == ["Mineral: Diamond"]
+
+    def test_multiple_doc_comments(self):
+        """Multiple doc comments are preserved in order."""
+        cdl = "#! Mineral: Diamond\n#! Habit: Octahedral\ncubic[m3m]:{111}"
+        desc = parse_cdl(cdl)
+        assert desc.doc_comments == ["Mineral: Diamond", "Habit: Octahedral"]
+
+    def test_mixed_comments(self):
+        """Mix of line, block, and doc comments."""
+        cdl = (
+            "#! Mineral: Quartz\n"
+            "# A line comment\n"
+            "/* block */ trigonal[-3m]:{10-10} # inline"
+        )
+        desc = parse_cdl(cdl)
+        assert desc.system == "trigonal"
+        assert desc.doc_comments == ["Mineral: Quartz"]
+
+    def test_comment_only_raises(self):
+        """Comment-only input raises ParseError."""
+        with pytest.raises(ParseError):
+            parse_cdl("# just a comment\n/* block */")
+
+    def test_cdl_v1_regression(self):
+        """Existing CDL v1 strings without comments still work identically."""
+        for _name, cdl in CDL_TEST_CASES:
+            desc = parse_cdl(cdl)
+            assert isinstance(desc, CrystalDescription)
+            assert desc.doc_comments is None
+
+    def test_doc_comments_in_to_dict(self):
+        """Doc comments appear in to_dict() output."""
+        desc = parse_cdl("#! Mineral: Diamond\ncubic[m3m]:{111}")
+        d = desc.to_dict()
+        assert d["doc_comments"] == ["Mineral: Diamond"]
+
+    def test_no_doc_comments_in_to_dict(self):
+        """to_dict() has doc_comments=None when there are none."""
+        desc = parse_cdl("cubic[m3m]:{111}")
+        d = desc.to_dict()
+        assert d["doc_comments"] is None
+
+
+# =============================================================================
+# Flatten Modification Tests
+# =============================================================================
+
+
+class TestFlattenModification:
+    """Test flatten modification parsing."""
+
+    def test_flatten_basic(self):
+        """Flatten modification parses correctly."""
+        desc = parse_cdl("cubic[m3m]:{111} | flatten(a:0.5)")
+        assert len(desc.modifications) == 1
+        assert desc.modifications[0].type == "flatten"
+        assert desc.modifications[0].params["axis"] == "a"
+        assert desc.modifications[0].params["ratio"] == 0.5
+
+    def test_flatten_float_ratio(self):
+        """Flatten with float ratio."""
+        desc = parse_cdl("cubic[m3m]:{111} | flatten(c:0.75)")
+        assert desc.modifications[0].type == "flatten"
+        assert desc.modifications[0].params["axis"] == "c"
+        assert desc.modifications[0].params["ratio"] == 0.75
+
+
+# =============================================================================
+# Feature Tests (CDL v1.2)
+# =============================================================================
+
+
+class TestFeatures:
+    """Test CDL v1.2 feature parsing on crystal forms."""
+
+    def test_single_feature(self):
+        """Single feature annotation on a form."""
+        desc = parse_cdl("cubic[m3m]:{111}@1.0[trigon:dense]")
+        assert desc.forms[0].features is not None
+        assert len(desc.forms[0].features) == 1
+        assert desc.forms[0].features[0].name == "trigon"
+        assert desc.forms[0].features[0].values == ["dense"]
+
+    def test_multiple_feature_values(self):
+        """Feature with multiple values."""
+        desc = parse_cdl("cubic[m3m]:{111}[phantom:3, white]")
+        assert desc.forms[0].features is not None
+        assert len(desc.forms[0].features) == 1
+        feat = desc.forms[0].features[0]
+        assert feat.name == "phantom"
+        assert feat.values == [3, "white"]
+
+    def test_feature_numeric_value(self):
+        """Feature with numeric value."""
+        desc = parse_cdl("trigonal[32]:{10-10}@1.0[phantom:3]")
+        assert desc.forms[0].features is not None
+        feat = desc.forms[0].features[0]
+        assert feat.name == "phantom"
+        assert feat.values == [3]
+
+    def test_feature_on_second_form(self):
+        """Feature on second form only."""
+        desc = parse_cdl("cubic[m3m]:{111}@1.0 + {100}@1.3[trigon:sparse]")
+        assert desc.forms[0].features is None
+        assert desc.forms[1].features is not None
+        assert desc.forms[1].features[0].name == "trigon"
+        assert desc.forms[1].features[0].values == ["sparse"]
+
+    def test_multiple_feature_types(self):
+        """Multiple distinct features on one form."""
+        desc = parse_cdl("cubic[m3m]:{111}[trigon:dense, phantom:3]")
+        assert desc.forms[0].features is not None
+        assert len(desc.forms[0].features) == 2
+        assert desc.forms[0].features[0].name == "trigon"
+        assert desc.forms[0].features[0].values == ["dense"]
+        assert desc.forms[0].features[1].name == "phantom"
+        assert desc.forms[0].features[1].values == [3]
+
+    def test_features_with_scale(self):
+        """Features after scale value."""
+        desc = parse_cdl("cubic[m3m]:{111}@1.0[silk:dense]")
+        assert desc.forms[0].scale == 1.0
+        assert desc.forms[0].features is not None
+        assert desc.forms[0].features[0].name == "silk"
+
+    def test_no_features_backwards_compat(self):
+        """Existing CDL without features still works."""
+        desc = parse_cdl("cubic[m3m]:{111}@1.0")
+        assert desc.forms[0].features is None
+
+    def test_feature_str_representation(self):
+        """Feature __str__ method."""
+        feat = Feature("trigon", ["dense"])
+        assert str(feat) == "trigon:dense"
+        feat2 = Feature("phantom", [3, "white"])
+        assert str(feat2) == "phantom:3, white"
+
+    def test_form_str_with_features(self):
+        """CrystalForm __str__ includes features."""
+        desc = parse_cdl("cubic[m3m]:{111}@1.0[trigon:dense]")
+        form_str = str(desc.forms[0])
+        assert "[trigon:dense]" in form_str
+
+    def test_features_in_to_dict(self):
+        """Features appear in to_dict() output."""
+        desc = parse_cdl("cubic[m3m]:{111}[phantom:3]")
+        d = desc.to_dict()
+        assert d["forms"][0]["features"] is not None
+        assert d["forms"][0]["features"][0]["name"] == "phantom"
+        assert d["forms"][0]["features"][0]["values"] == [3]
+
+    def test_no_features_in_to_dict(self):
+        """to_dict() has features=None when there are none."""
+        desc = parse_cdl("cubic[m3m]:{111}")
+        d = desc.to_dict()
+        assert d["forms"][0]["features"] is None
+
+
+# =============================================================================
+# Phenomenon Tests (CDL v1.2)
+# =============================================================================
+
+
+class TestPhenomenon:
+    """Test CDL v1.2 phenomenon parsing."""
+
+    def test_asterism(self):
+        """Asterism phenomenon with numeric value."""
+        desc = parse_cdl("trigonal[-3m]:{10-11}@1.0 | phenomenon[asterism:6]")
+        assert desc.phenomenon is not None
+        assert desc.phenomenon.type == "asterism"
+        assert desc.phenomenon.params["value"] == 6
+
+    def test_chatoyancy(self):
+        """Chatoyancy phenomenon with string intensity."""
+        desc = parse_cdl("orthorhombic[mmm]:{110}@1.0 | phenomenon[chatoyancy:sharp]")
+        assert desc.phenomenon is not None
+        assert desc.phenomenon.type == "chatoyancy"
+        assert desc.phenomenon.params["intensity"] == "sharp"
+
+    def test_phenomenon_with_modifications(self):
+        """Phenomenon after modifications."""
+        desc = parse_cdl("cubic[m3m]:{111} | elongate(c:1.5) | phenomenon[asterism:6]")
+        assert len(desc.modifications) == 1
+        assert desc.phenomenon is not None
+        assert desc.phenomenon.type == "asterism"
+
+    def test_phenomenon_with_twin(self):
+        """Phenomenon after twin."""
+        desc = parse_cdl("cubic[m3m]:{111} | twin(spinel) | phenomenon[asterism:6]")
+        assert desc.twin is not None
+        assert desc.phenomenon is not None
+        assert desc.phenomenon.type == "asterism"
+
+    def test_phenomenon_multiple_params(self):
+        """Phenomenon with multiple parameters."""
+        desc = parse_cdl("trigonal[-3m]:{10-11} | phenomenon[asterism:6, intensity:strong]")
+        assert desc.phenomenon is not None
+        assert desc.phenomenon.type == "asterism"
+        assert desc.phenomenon.params["value"] == 6
+        assert desc.phenomenon.params["intensity"] == "strong"
+
+    def test_features_and_phenomenon(self):
+        """Features on form AND phenomenon on description."""
+        desc = parse_cdl("trigonal[-3m]:{10-11}@1.0[silk:dense] | phenomenon[asterism:6]")
+        assert desc.forms[0].features is not None
+        assert desc.forms[0].features[0].name == "silk"
+        assert desc.phenomenon is not None
+        assert desc.phenomenon.type == "asterism"
+
+    def test_no_phenomenon_backwards_compat(self):
+        """Existing CDL without phenomenon still works."""
+        desc = parse_cdl("cubic[m3m]:{111}@1.0")
+        assert desc.phenomenon is None
+
+    def test_phenomenon_str_representation(self):
+        """PhenomenonSpec __str__ method."""
+        phen = PhenomenonSpec("asterism", {"value": 6})
+        assert str(phen) == "phenomenon[asterism, value:6]"
+
+    def test_description_str_with_phenomenon(self):
+        """CrystalDescription __str__ includes phenomenon."""
+        desc = parse_cdl("trigonal[-3m]:{10-11}@1.0 | phenomenon[asterism:6]")
+        desc_str = str(desc)
+        assert "phenomenon[asterism" in desc_str
+
+    def test_phenomenon_in_to_dict(self):
+        """Phenomenon appears in to_dict() output."""
+        desc = parse_cdl("trigonal[-3m]:{10-11} | phenomenon[asterism:6]")
+        d = desc.to_dict()
+        assert d["phenomenon"] is not None
+        assert d["phenomenon"]["type"] == "asterism"
+        assert d["phenomenon"]["params"]["value"] == 6
+
+    def test_no_phenomenon_in_to_dict(self):
+        """to_dict() has phenomenon=None when there is none."""
+        desc = parse_cdl("cubic[m3m]:{111}")
+        d = desc.to_dict()
+        assert d["phenomenon"] is None
+
+
+# =============================================================================
+# Grouping Tests (CDL v1.3)
+# =============================================================================
+
+
+class TestGrouping:
+    """Test CDL v1.3 parenthesized form grouping."""
+
+    def test_simple_group(self):
+        """Parenthesized group of forms."""
+        desc = parse_cdl("cubic[m3m]:({111} + {100})")
+        assert len(desc.forms) == 1
+        group = desc.forms[0]
+        assert isinstance(group, FormGroup)
+        assert len(group.forms) == 2
+
+    def test_group_with_shared_features(self):
+        """Group with shared features applied to all forms."""
+        desc = parse_cdl("cubic[m3m]:({111}@1.0 + {100}@1.3)[phantom:3]")
+        assert len(desc.forms) == 1
+        group = desc.forms[0]
+        assert isinstance(group, FormGroup)
+        assert group.features is not None
+        assert group.features[0].name == "phantom"
+        # flat_forms() should merge phantom:3 into both forms
+        flat = desc.flat_forms()
+        assert len(flat) == 2
+        for f in flat:
+            assert f.features is not None
+            assert any(feat.name == "phantom" for feat in f.features)
+
+    def test_group_plus_form(self):
+        """Group combined with standalone form."""
+        desc = parse_cdl("cubic[m3m]:({111} + {100})[phantom:3] + {110}@0.8")
+        assert len(desc.forms) == 2
+        assert isinstance(desc.forms[0], FormGroup)
+        assert isinstance(desc.forms[1], CrystalForm)
+        flat = desc.flat_forms()
+        assert len(flat) == 3
+
+    def test_nested_group(self):
+        """Nested parenthesized groups."""
+        desc = parse_cdl("cubic[m3m]:(({111}) + {100})")
+        flat = desc.flat_forms()
+        assert len(flat) == 2
+        assert flat[0].miller.as_tuple() == (1, 1, 1)
+        assert flat[1].miller.as_tuple() == (1, 0, 0)
+
+    def test_group_str_representation(self):
+        """FormGroup __str__ method."""
+        desc = parse_cdl("cubic[m3m]:({111} + {100})[phantom:3]")
+        group = desc.forms[0]
+        s = str(group)
+        assert "(" in s and ")" in s
+        assert "phantom:3" in s
+
+    def test_group_in_to_dict(self):
+        """Groups appear in to_dict() output."""
+        desc = parse_cdl("cubic[m3m]:({111} + {100})[phantom:3]")
+        d = desc.to_dict()
+        assert d["forms"][0]["type"] == "group"
+        assert len(d["forms"][0]["forms"]) == 2
+        assert d["forms"][0]["features"][0]["name"] == "phantom"
+        # flat_forms in dict
+        assert len(d["flat_forms"]) == 2
+
+    def test_group_with_scales(self):
+        """Group with individually scaled forms."""
+        desc = parse_cdl("cubic[m3m]:({111}@1.0 + {100}@1.3)")
+        flat = desc.flat_forms()
+        assert flat[0].scale == 1.0
+        assert flat[1].scale == 1.3
+
+
+# =============================================================================
+# Form Label Tests (CDL v1.3)
+# =============================================================================
+
+
+class TestFormLabels:
+    """Test CDL v1.3 form labels."""
+
+    def test_labeled_miller_form(self):
+        """Form with label using Miller index."""
+        desc = parse_cdl("cubic[m3m]:core:{111}@1.0 + rim:{100}@1.3")
+        flat = desc.flat_forms()
+        assert len(flat) == 2
+        assert flat[0].label == "core"
+        assert flat[0].miller.as_tuple() == (1, 1, 1)
+        assert flat[1].label == "rim"
+        assert flat[1].miller.as_tuple() == (1, 0, 0)
+
+    def test_labeled_group(self):
+        """Group with label."""
+        desc = parse_cdl("cubic[m3m]:core:({111} + {100})[phantom:3]")
+        assert len(desc.forms) == 1
+        group = desc.forms[0]
+        assert isinstance(group, FormGroup)
+        assert group.label == "core"
+
+    def test_label_str_representation(self):
+        """Label appears in __str__ output."""
+        desc = parse_cdl("cubic[m3m]:core:{111}@1.0")
+        flat = desc.flat_forms()
+        assert "core:" in str(flat[0])
+
+    def test_unlabeled_forms_backwards_compat(self):
+        """Unlabeled forms still work identically."""
+        desc = parse_cdl("cubic[m3m]:{111}@1.0 + {100}@1.3")
+        flat = desc.flat_forms()
+        assert flat[0].label is None
+        assert flat[1].label is None
+
+    def test_label_in_to_dict(self):
+        """Labels appear in to_dict() output."""
+        desc = parse_cdl("cubic[m3m]:core:{111}@1.0")
+        d = desc.to_dict()
+        assert d["flat_forms"][0]["label"] == "core"
+
+    def test_named_form_not_treated_as_label(self):
+        """Named forms (like 'prism') are NOT treated as labels."""
+        # 'prism' is a known NAMED_FORM, so prism:{hkl} should NOT be parsed as label
+        # Instead, 'prism' alone is a named form
+        desc = parse_cdl("cubic[m3m]:octahedron@1.0")
+        flat = desc.flat_forms()
+        assert flat[0].name == "octahedron"
+        assert flat[0].label is None
+
+
+# =============================================================================
+# Named Reference Tests (CDL v1.3)
+# =============================================================================
+
+
+class TestNamedReferences:
+    """Test CDL v1.3 named definitions and $references."""
+
+    def test_simple_definition(self):
+        """Simple named definition and reference."""
+        desc = parse_cdl("@oct = {111}@1.0\ncubic[m3m]:$oct + {100}@1.3")
+        assert len(desc.flat_forms()) == 2
+        assert desc.flat_forms()[0].miller.as_tuple() == (1, 1, 1)
+        assert desc.flat_forms()[0].scale == 1.0
+
+    def test_multiple_definitions(self):
+        """Multiple definitions."""
+        cdl = "@prism = {10-10}@1.0\n@rhomb = {10-11}@0.8\ntrigonal[-3m]:$prism + $rhomb"
+        desc = parse_cdl(cdl)
+        assert len(desc.flat_forms()) == 2
+
+    def test_definition_referencing_definition(self):
+        """Definition that references another definition."""
+        cdl = "@a = {111}@1.0\n@b = {100}@1.3\n@combo = $a + $b\ncubic[m3m]:$combo"
+        desc = parse_cdl(cdl)
+        assert len(desc.flat_forms()) == 2
+
+    def test_definitions_stored(self):
+        """Definitions are stored on the CrystalDescription."""
+        desc = parse_cdl("@oct = {111}@1.0\ncubic[m3m]:$oct")
+        assert desc.definitions is not None
+        assert len(desc.definitions) == 1
+        assert desc.definitions[0].name == "oct"
+
+    def test_undefined_reference_error(self):
+        """Undefined reference raises ParseError."""
+        with pytest.raises(ParseError):
+            parse_cdl("cubic[m3m]:$unknown")
+
+    def test_definitions_with_comments(self):
+        """Definitions work with comments."""
+        cdl = "# Define forms\n@oct = {111}@1.0\n# Use them\ncubic[m3m]:$oct"
+        desc = parse_cdl(cdl)
+        assert len(desc.flat_forms()) == 1
+
+    def test_no_definitions_backwards_compat(self):
+        """CDL without definitions has definitions=None."""
+        desc = parse_cdl("cubic[m3m]:{111}")
+        assert desc.definitions is None
+
+    def test_definitions_in_to_dict(self):
+        """Definitions appear in to_dict() output."""
+        desc = parse_cdl("@oct = {111}@1.0\ncubic[m3m]:$oct")
+        d = desc.to_dict()
+        assert d["definitions"] is not None
+        assert len(d["definitions"]) == 1
+        assert d["definitions"][0]["name"] == "oct"
+
+    def test_no_definitions_in_to_dict(self):
+        """to_dict() has definitions=None when there are none."""
+        desc = parse_cdl("cubic[m3m]:{111}")
+        d = desc.to_dict()
+        assert d["definitions"] is None
+
+    def test_definition_with_doc_comments(self):
+        """Definitions work alongside doc comments."""
+        cdl = "#! Mineral: Diamond\n@oct = {111}@1.0\ncubic[m3m]:$oct"
+        desc = parse_cdl(cdl)
+        assert desc.doc_comments == ["Mineral: Diamond"]
+        assert desc.definitions is not None
+        assert len(desc.flat_forms()) == 1
+
+    def test_definition_with_features(self):
+        """Definition body with features resolves correctly."""
+        cdl = "@oct = {111}@1.0\ncubic[m3m]:$oct[phantom:3]"
+        desc = parse_cdl(cdl)
+        flat = desc.flat_forms()
+        assert len(flat) == 1
+        assert flat[0].features is not None
+        assert flat[0].features[0].name == "phantom"
+
+
+# =============================================================================
+# flat_forms() Tests (CDL v1.3)
+# =============================================================================
+
+
+class TestFlatForms:
+    """Test CDL v1.3 flat_forms() backwards compatibility method."""
+
+    def test_flat_forms_simple(self):
+        """flat_forms() on simple CDL returns same count as forms."""
+        desc = parse_cdl("cubic[m3m]:{111}@1.0 + {100}@1.3")
+        assert len(desc.flat_forms()) == 2
+
+    def test_flat_forms_group(self):
+        """flat_forms() flattens groups."""
+        desc = parse_cdl("cubic[m3m]:({111} + {100})[phantom:3]")
+        flat = desc.flat_forms()
+        assert len(flat) == 2
+        # Both should have phantom:3 feature
+        for f in flat:
+            assert f.features is not None
+            assert any(feat.name == "phantom" for feat in f.features)
+
+    def test_flat_forms_mixed(self):
+        """flat_forms() with mix of groups and plain forms."""
+        desc = parse_cdl("cubic[m3m]:({111} + {100})[phantom:3] + {110}@0.8")
+        flat = desc.flat_forms()
+        assert len(flat) == 3
+        # First two have phantom, third doesn't
+        assert flat[0].features is not None
+        assert flat[1].features is not None
+        assert flat[2].features is None
+
+    def test_flat_forms_backwards_compat(self):
+        """flat_forms() works identically for v1-style CDL."""
+        for _, cdl in [
+            ("simple", "cubic[m3m]:{111}"),
+            ("truncated", "cubic[m3m]:{111}@1.0 + {100}@1.3"),
+            ("triple", "cubic[m3m]:{111}@1.0 + {100}@0.5 + {110}@0.3"),
+        ]:
+            desc = parse_cdl(cdl)
+            flat = desc.flat_forms()
+            assert len(flat) == len(desc.forms)
+            for i, f in enumerate(flat):
+                assert f.miller == desc.forms[i].miller
+                assert f.scale == desc.forms[i].scale
+
+    def test_flat_forms_preserves_scale(self):
+        """flat_forms() preserves individual form scales."""
+        desc = parse_cdl("cubic[m3m]:({111}@1.0 + {100}@1.3)[phantom:3]")
+        flat = desc.flat_forms()
+        assert flat[0].scale == 1.0
+        assert flat[1].scale == 1.3
+
+    def test_flat_forms_nested_groups(self):
+        """flat_forms() handles nested groups."""
+        desc = parse_cdl("cubic[m3m]:(({111} + {100}) + {110})")
+        flat = desc.flat_forms()
+        assert len(flat) == 3
+
+    def test_flat_forms_feature_merge(self):
+        """flat_forms() merges parent and child features."""
+        desc = parse_cdl("cubic[m3m]:({111}[trigon:dense] + {100})[phantom:3]")
+        flat = desc.flat_forms()
+        assert len(flat) == 2
+        # First form should have both phantom (from group) and trigon (own)
+        f0_names = [feat.name for feat in flat[0].features]
+        assert "phantom" in f0_names
+        assert "trigon" in f0_names
+        # Second form should have only phantom (from group)
+        f1_names = [feat.name for feat in flat[1].features]
+        assert "phantom" in f1_names
+        assert len(f1_names) == 1
+
+
+# =============================================================================
+# Version Test (CDL v1.3)
+# =============================================================================
+
+
+class TestVersion:
+    """Test version is updated."""
+
+    def test_version_1_3(self):
+        """Version is 1.3.0."""
+        import cdl_parser
+        assert cdl_parser.__version__ == "1.3.0"
+
+
+# =============================================================================
+# v1 Regression (CDL v1.3)
+# =============================================================================
+
+
+class TestV1Regression:
+    """Ensure all v1/v1.2 CDL still works with v1.3 changes."""
+
+    @pytest.mark.parametrize("name,cdl", CDL_TEST_CASES)
+    def test_all_v1_cases_still_work(self, name, cdl):
+        """All CDL_TEST_CASES parse successfully with v1.3."""
+        desc = parse_cdl(cdl)
+        assert isinstance(desc, CrystalDescription)
+        # forms are still iterable and contain CrystalForm instances
+        for f in desc.forms:
+            assert isinstance(f, CrystalForm)
+        # flat_forms() returns same as forms for v1-style CDL
+        flat = desc.flat_forms()
+        assert len(flat) == len(desc.forms)
+
+    def test_v1_forms_are_crystal_form_instances(self):
+        """v1-style CDL forms are CrystalForm, not FormGroup."""
+        desc = parse_cdl("cubic[m3m]:{111}@1.0 + {100}@1.3")
+        for f in desc.forms:
+            assert isinstance(f, CrystalForm)
+            assert not isinstance(f, FormGroup)
+
+    def test_v1_definitions_none(self):
+        """v1-style CDL has no definitions."""
+        desc = parse_cdl("cubic[m3m]:{111}")
+        assert desc.definitions is None
+
+    def test_v1_to_dict_has_flat_forms(self):
+        """to_dict() includes flat_forms key."""
+        desc = parse_cdl("cubic[m3m]:{111}@1.0 + {100}@1.3")
+        d = desc.to_dict()
+        assert "flat_forms" in d
+        assert len(d["flat_forms"]) == 2