From a854bb70cac4cf5971f53a17ab0456ae1befd5a6 Mon Sep 17 00:00:00 2001 From: Bissbert <43237892+Bissbert@users.noreply.github.com> Date: Sat, 14 Feb 2026 23:50:55 +0700 Subject: [PATCH] feat!: add form grouping, labels, and named references (CDL v1.3) Phase 3 of CDL v2 implementation. Adds FormGroup for parenthesized form grouping with shared features, form labels, @name/$name definitions and references, flat_forms() backwards-compatible flattening, and 43 new tests (163 total). BREAKING CHANGE: CrystalDescription.forms type changed from list[CrystalForm] to list[FormNode]. Use flat_forms() for backwards-compatible flat list access. --- pyproject.toml | 2 +- src/cdl_parser/__init__.py | 19 +- src/cdl_parser/constants.py | 27 ++ src/cdl_parser/models.py | 206 ++++++++- src/cdl_parser/parser.py | 341 ++++++++++++++- tests/fixtures/v1.3-test-cases.json | 223 ++++++++++ tests/test_parser.py | 620 ++++++++++++++++++++++++++++ 7 files changed, 1413 insertions(+), 25 deletions(-) create mode 100644 tests/fixtures/v1.3-test-cases.json diff --git a/pyproject.toml b/pyproject.toml index b44ec28..c54f843 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "gemmology-cdl-parser" -version = "1.0.1" +version = "1.3.0" description = "Crystal Description Language (CDL) parser for crystallographic visualization" readme = "README.md" license = { text = "MIT" } diff --git a/src/cdl_parser/__init__.py b/src/cdl_parser/__init__.py index 184d711..8660701 100644 --- a/src/cdl_parser/__init__.py +++ b/src/cdl_parser/__init__.py @@ -22,7 +22,7 @@ cubic[m3m]:{111} | twin(spinel) # Spinel-law twin """ -__version__ = "1.0.0" +__version__ = "1.3.0" __author__ = "Fabian Schuh" __email__ = "fabian@gemmology.dev" @@ -32,8 +32,10 @@ ALL_POINT_GROUPS, CRYSTAL_SYSTEMS, DEFAULT_POINT_GROUPS, + FEATURE_NAMES, MODIFICATION_TYPES, NAMED_FORMS, + PHENOMENON_TYPES, POINT_GROUPS, TWIN_LAWS, TWIN_TYPES, @@ -46,13 +48,18 @@ from .models import ( CrystalDescription, CrystalForm, + Definition, + Feature, + FormGroup, + FormNode, MillerIndex, Modification, + PhenomenonSpec, TwinSpec, ) # Lexer/Parser internals (for advanced use) -from .parser import Lexer, Parser, Token, TokenType, parse_cdl, validate_cdl +from .parser import Lexer, Parser, Token, TokenType, parse_cdl, strip_comments, validate_cdl __all__ = [ # Version @@ -63,8 +70,13 @@ # Data classes "CrystalDescription", "CrystalForm", + "Definition", + "Feature", + "FormGroup", + "FormNode", "MillerIndex", "Modification", + "PhenomenonSpec", "TwinSpec", # Exceptions "CDLError", @@ -74,8 +86,10 @@ "ALL_POINT_GROUPS", "CRYSTAL_SYSTEMS", "DEFAULT_POINT_GROUPS", + "FEATURE_NAMES", "MODIFICATION_TYPES", "NAMED_FORMS", + "PHENOMENON_TYPES", "POINT_GROUPS", "TWIN_LAWS", "TWIN_TYPES", @@ -84,4 +98,5 @@ "Parser", "Token", "TokenType", + "strip_comments", ] diff --git a/src/cdl_parser/constants.py b/src/cdl_parser/constants.py index d324fae..6d66c1b 100644 --- a/src/cdl_parser/constants.py +++ b/src/cdl_parser/constants.py @@ -120,6 +120,7 @@ "truncate", # Cut off corners/edges "taper", # Make narrower in one direction "bevel", # Add beveled edges + "flatten", # Compress along an axis } # ============================================================================= @@ -131,3 +132,29 @@ "penetration", # Penetration twin (interpenetrating) "cyclic", # Cyclic twin (multiple individuals) } + +# ============================================================================= +# Feature Names (CDL v1.2) +# ============================================================================= + +FEATURE_NAMES: set[str] = { + # Growth features + "phantom", "sector", "zoning", "skeletal", "dendritic", + # Surface features + "striation", "trigon", "etch_pit", "growth_hillock", + # Inclusion features + "inclusion", "needle", "silk", "fluid", "bubble", + # Color features + "colour", "colour_zone", "pleochroism", + # Other + "lamellar", "banding", +} + +# ============================================================================= +# Phenomenon Types (CDL v1.2) +# ============================================================================= + +PHENOMENON_TYPES: set[str] = { + "asterism", "chatoyancy", "adularescence", "labradorescence", + "play_of_color", "colour_change", "aventurescence", "iridescence", +} diff --git a/src/cdl_parser/models.py b/src/cdl_parser/models.py index 3117971..2a5078c 100644 --- a/src/cdl_parser/models.py +++ b/src/cdl_parser/models.py @@ -4,8 +4,10 @@ Data classes representing Crystal Description Language components. """ +from __future__ import annotations + from dataclasses import dataclass, field -from typing import Any +from typing import Any, Union @dataclass @@ -62,6 +64,27 @@ def __repr__(self) -> str: return f"MillerIndex({self.h}, {self.k}, {self.l})" +@dataclass +class Feature: + """A crystal feature annotation. + + Describes growth patterns, surface markings, inclusions, or color properties. + + Attributes: + name: Feature type ('phantom', 'trigon', 'silk', 'colour', etc.) + values: List of feature values (numbers, identifiers, color specs) + """ + + name: str + values: list[int | float | str] = field(default_factory=list) + + def __str__(self) -> str: + if self.values: + val_str = ", ".join(str(v) for v in self.values) + return f"{self.name}:{val_str}" + return self.name + + @dataclass class CrystalForm: """A crystal form with Miller index and scale. @@ -73,6 +96,8 @@ class CrystalForm: miller: The Miller index defining the form scale: Distance scale (default 1.0, larger = more truncated) name: Original name if using named form (e.g., 'octahedron') + features: Optional list of feature annotations + label: Optional label for the form (e.g., 'prism' in prism:{10-10}) Examples: >>> CrystalForm(MillerIndex(1, 1, 1), scale=1.0) @@ -82,16 +107,61 @@ class CrystalForm: miller: MillerIndex scale: float = 1.0 name: str | None = None # Original name if using named form + features: list[Feature] | None = None # Per-form features [phantom:3] + label: str | None = None # Form label (v1.3) def __str__(self) -> str: s = str(self.miller) if self.name: s = f"{self.name}={s}" + if self.label: + s = f"{self.label}:{s}" if self.scale != 1.0: s += f"@{self.scale}" + if self.features: + feat_str = ", ".join(str(f) for f in self.features) + s += f"[{feat_str}]" + return s + + +@dataclass +class FormGroup: + """A group of forms with optional shared features and label. + + Represents parenthesized form groups: (form + form)[shared_features] + """ + + forms: list[FormNode] + features: list[Feature] | None = None + label: str | None = None + + def __str__(self) -> str: + form_strs = [str(f) for f in self.forms] + s = "(" + " + ".join(form_strs) + ")" + if self.label: + s = f"{self.label}:{s}" + if self.features: + feat_str = ", ".join(str(f) for f in self.features) + s += f"[{feat_str}]" return s +# Type alias for form tree nodes +FormNode = Union[CrystalForm, FormGroup] + + +@dataclass +class Definition: + """A named definition: @name = expression""" + + name: str + body: list[FormNode] + + def __str__(self) -> str: + body_str = " + ".join(str(f) for f in self.body) + return f"@{self.name} = {body_str}" + + @dataclass class Modification: """A morphological modification. @@ -99,7 +169,7 @@ class Modification: Represents transformations applied to the crystal shape. Attributes: - type: Modification type ('elongate', 'truncate', 'taper', 'bevel') + type: Modification type ('elongate', 'truncate', 'taper', 'bevel', 'flatten') params: Parameters specific to the modification type Examples: @@ -107,7 +177,7 @@ class Modification: >>> Modification('truncate', {'form': MillerIndex(1,0,0), 'depth': 0.3}) """ - type: str # elongate, truncate, taper, bevel + type: str # elongate, truncate, taper, bevel, flatten params: dict[str, Any] = field(default_factory=dict) def __str__(self) -> str: @@ -148,6 +218,52 @@ def __str__(self) -> str: return f"twin({self.axis},{self.angle},{self.twin_type})" +@dataclass +class PhenomenonSpec: + """Optical phenomenon specification. + + Attributes: + type: Phenomenon type ('asterism', 'chatoyancy', 'adularescence', etc.) + params: Dict of parameters (e.g. {'rays': 6, 'intensity': 'strong'}) + """ + + type: str + params: dict[str, int | float | str] = field(default_factory=dict) + + def __str__(self) -> str: + parts = [self.type] + for k, v in self.params.items(): + parts.append(f"{k}:{v}") + return "phenomenon[" + ", ".join(parts) + "]" + + +def _form_node_to_dict(node: FormNode) -> dict[str, Any]: + """Convert a FormNode to dictionary representation.""" + if isinstance(node, CrystalForm): + return { + "type": "form", + "miller": node.miller.as_tuple(), + "scale": node.scale, + "name": node.name, + "label": node.label, + "features": [ + {"name": feat.name, "values": feat.values} + for feat in node.features + ] if node.features else None, + } + elif isinstance(node, FormGroup): + return { + "type": "group", + "forms": [_form_node_to_dict(f) for f in node.forms], + "label": node.label, + "features": [ + {"name": feat.name, "values": feat.values} + for feat in node.features + ] if node.features else None, + } + return {} + + @dataclass class CrystalDescription: """Complete crystal description parsed from CDL. @@ -158,9 +274,10 @@ class CrystalDescription: Attributes: system: Crystal system ('cubic', 'hexagonal', etc.) point_group: Hermann-Mauguin point group symbol ('m3m', '6/mmm', etc.) - forms: List of crystal forms with their scales + forms: List of form nodes (CrystalForm or FormGroup) modifications: List of morphological modifications twin: Optional twin specification + definitions: Optional list of named definitions Examples: >>> desc = parse_cdl("cubic[m3m]:{111}@1.0 + {100}@1.3") @@ -172,15 +289,34 @@ class CrystalDescription: system: str point_group: str - forms: list[CrystalForm] = field(default_factory=list) + forms: list[FormNode] = field(default_factory=list) modifications: list[Modification] = field(default_factory=list) twin: TwinSpec | None = None + phenomenon: PhenomenonSpec | None = None + doc_comments: list[str] | None = None + definitions: list[Definition] | None = None + + def flat_forms(self) -> list[CrystalForm]: + """Get a flat list of all CrystalForm objects (backwards compat). + + Recursively traverses FormGroup nodes to extract all CrystalForm leaves. + Features from parent FormGroups are merged into child forms. + """ + result: list[CrystalForm] = [] + for node in self.forms: + result.extend(_flatten_node(node)) + return result def __str__(self) -> str: parts = [f"{self.system}[{self.point_group}]"] - # Forms - form_strs = [str(f.miller) + (f"@{f.scale}" if f.scale != 1.0 else "") for f in self.forms] + # Definitions + if self.definitions: + def_strs = [str(d) for d in self.definitions] + parts = def_strs + parts + + # Forms (including features) + form_strs = [str(f) for f in self.forms] parts.append(":" + " + ".join(form_strs)) # Modifications @@ -192,6 +328,10 @@ def __str__(self) -> str: if self.twin: parts.append(" | " + str(self.twin)) + # Phenomenon + if self.phenomenon: + parts.append(" | " + str(self.phenomenon)) + return "".join(parts) def to_dict(self) -> dict[str, Any]: @@ -199,9 +339,19 @@ def to_dict(self) -> dict[str, Any]: return { "system": self.system, "point_group": self.point_group, - "forms": [ - {"miller": f.miller.as_tuple(), "scale": f.scale, "name": f.name} - for f in self.forms + "forms": [_form_node_to_dict(f) for f in self.forms], + "flat_forms": [ + { + "miller": f.miller.as_tuple(), + "scale": f.scale, + "name": f.name, + "label": f.label, + "features": [ + {"name": feat.name, "values": feat.values} + for feat in f.features + ] if f.features else None, + } + for f in self.flat_forms() ], "modifications": [{"type": m.type, "params": m.params} for m in self.modifications], "twin": { @@ -213,4 +363,40 @@ def to_dict(self) -> dict[str, Any]: } if self.twin else None, + "phenomenon": { + "type": self.phenomenon.type, + "params": self.phenomenon.params, + } + if self.phenomenon + else None, + "doc_comments": self.doc_comments, + "definitions": [ + {"name": d.name, "body": [_form_node_to_dict(f) for f in d.body]} + for d in self.definitions + ] if self.definitions else None, } + + +def _flatten_node( + node: FormNode, parent_features: list[Feature] | None = None +) -> list[CrystalForm]: + """Recursively flatten a FormNode into a list of CrystalForms.""" + if isinstance(node, CrystalForm): + if parent_features: + merged = list(parent_features) + if node.features: + merged.extend(node.features) + return [CrystalForm( + miller=node.miller, scale=node.scale, + name=node.name, features=merged, label=node.label, + )] + return [node] + elif isinstance(node, FormGroup): + combined_features = list(parent_features) if parent_features else [] + if node.features: + combined_features.extend(node.features) + result: list[CrystalForm] = [] + for child in node.forms: + result.extend(_flatten_node(child, combined_features if combined_features else None)) + return result + return [] diff --git a/src/cdl_parser/parser.py b/src/cdl_parser/parser.py index 179e272..c7c935c 100644 --- a/src/cdl_parser/parser.py +++ b/src/cdl_parser/parser.py @@ -4,6 +4,7 @@ Lexer and parser for Crystal Description Language strings. """ +import re from dataclasses import dataclass from enum import Enum from typing import Any @@ -20,11 +21,55 @@ from .models import ( CrystalDescription, CrystalForm, + Definition, + Feature, + FormGroup, + FormNode, MillerIndex, Modification, + PhenomenonSpec, TwinSpec, ) + +def strip_comments(text: str) -> tuple[str, list[str]]: + """Strip comments from CDL text before lexing. + + Extracts doc comments (#! Key: Value) and removes block (/* ... */) + and line (# ...) comments. + + Args: + text: Raw CDL string possibly containing comments. + + Returns: + Tuple of (cleaned text with comments removed, list of doc comment strings). + """ + doc_comments: list[str] = [] + + # Extract doc comments (#! ...) before stripping anything else. + # Process line-by-line so we can identify #! lines. + lines = text.split("\n") + processed_lines: list[str] = [] + for line in lines: + stripped = line.lstrip() + if stripped.startswith("#!"): + # Doc comment — capture the content after "#! " or "#!" + content = stripped[2:].strip() + doc_comments.append(content) + # Don't include this line in the CDL text + else: + processed_lines.append(line) + + text = "\n".join(processed_lines) + + # Strip block comments (/* ... */), which may span multiple lines + text = re.sub(r"/\*.*?\*/", "", text, flags=re.DOTALL) + + # Strip line comments (# to end of line) + text = re.sub(r"#[^\n]*", "", text) + + return text, doc_comments + # ============================================================================= # Token Types # ============================================================================= @@ -49,6 +94,8 @@ class TokenType(Enum): INTEGER = "INTEGER" FLOAT = "FLOAT" IDENTIFIER = "IDENTIFIER" + DOLLAR = "DOLLAR" + EQUALS = "EQUALS" EOF = "EOF" @@ -157,6 +204,8 @@ def next_token(self) -> Token: ",": TokenType.COMMA, "(": TokenType.LPAREN, ")": TokenType.RPAREN, + "$": TokenType.DOLLAR, + "=": TokenType.EQUALS, } if ch in single_char_tokens: @@ -213,6 +262,86 @@ def tokenize(self) -> list[Token]: return tokens +# ============================================================================= +# Definition Pre-processing +# ============================================================================= + + +def _preprocess_definitions(text: str) -> tuple[str, list[tuple[str, str]]]: + """Extract @name = expression definitions and resolve $name references. + + Args: + text: Comment-stripped CDL text (may be multi-line). + + Returns: + Tuple of (resolved CDL body text, list of (name, raw_body) definition pairs). + """ + lines = text.split("\n") + definitions: list[tuple[str, str]] = [] # (name, raw_body) + body_lines: list[str] = [] + + # First pass: extract definition lines + for line in lines: + stripped = line.strip() + if stripped.startswith("@"): + # Parse @name = expression + match = re.match(r"@(\w+)\s*=\s*(.+)", stripped) + if match: + name = match.group(1) + body = match.group(2).strip() + definitions.append((name, body)) + continue + body_lines.append(line) + + # Build name -> body mapping, resolving forward references within definitions + resolved: dict[str, str] = {} + for name, body in definitions: + # Resolve $references within this definition body + resolved_body = body + for prev_name, prev_body in resolved.items(): + resolved_body = re.sub(r"\$" + prev_name + r"(?!\w)", prev_body, resolved_body) + resolved[name] = resolved_body + + # Second pass: resolve $references in the main body + body_text = "\n".join(body_lines) + for name, resolved_body in resolved.items(): + body_text = re.sub(r"\$" + name + r"(?!\w)", resolved_body, body_text) + + # Check for unresolved $references + unresolved = re.findall(r"\$(\w+)", body_text) + if unresolved: + raise ParseError(f"Undefined reference: ${unresolved[0]}", position=-1) + + return body_text, definitions + + +def _parse_definition_bodies( + definitions: list[tuple[str, str]], +) -> list[Definition]: + """Parse raw definition bodies into Definition objects. + + Each definition body is parsed as a form list. + """ + result: list[Definition] = [] + resolved_bodies: dict[str, str] = {} + + for name, raw_body in definitions: + # Resolve references within this body + body = raw_body + for prev_name, prev_resolved in resolved_bodies.items(): + body = re.sub(r"\$" + prev_name + r"(?!\w)", prev_resolved, body) + resolved_bodies[name] = body + + # Parse the resolved body as a form list + lexer = Lexer(body) + tokens = lexer.tokenize() + parser = Parser(tokens) + forms = parser._parse_form_list() + result.append(Definition(name=name, body=forms)) + + return result + + # ============================================================================= # Parser # ============================================================================= @@ -283,12 +412,14 @@ def parse(self) -> CrystalDescription: modifications = [] if self._current().type == TokenType.PIPE: self._advance() # consume | - # Check if it's modifications or twin + # Check if it's modifications, twin, or phenomenon if self._current().type == TokenType.IDENTIFIER: ident = self._current().value.lower() if ident == "twin": pass # It's a twin, not modifications - elif ident in {"elongate", "truncate", "taper", "bevel"}: + elif ident == "phenomenon": + pass # It's a phenomenon, not modifications + elif ident in {"elongate", "truncate", "taper", "bevel", "flatten"}: modifications = self._parse_modifications() # Parse optional twin @@ -298,26 +429,88 @@ def parse(self) -> CrystalDescription: if self._current().type == TokenType.IDENTIFIER and self._current().value.lower() == "twin": twin = self._parse_twin() + # Parse optional phenomenon + phenomenon = None + if self._current().type == TokenType.PIPE: + self._advance() # consume | + if self._current().type == TokenType.IDENTIFIER and self._current().value.lower() == "phenomenon": + phenomenon = self._parse_phenomenon() + return CrystalDescription( system=system, point_group=point_group, forms=forms, modifications=modifications, twin=twin, + phenomenon=phenomenon, ) - def _parse_form_list(self) -> list[CrystalForm]: - """Parse form_list = form ('+' form)*""" - forms = [self._parse_form()] + def _parse_form_list(self) -> list[FormNode]: + """Parse form_list = form_or_group ('+' form_or_group)*""" + forms: list[FormNode] = [self._parse_form_or_group()] while self._current().type == TokenType.PLUS: self._advance() # consume + - forms.append(self._parse_form()) + forms.append(self._parse_form_or_group()) return forms - def _parse_form(self) -> CrystalForm: - """Parse form = (form_name | miller_index) ['@' scale]""" + def _parse_form_or_group(self) -> FormNode: + """Parse either a parenthesized group or a single form. + + Handles: + - (form + form)[features] - group + - label:(form + form)[features] - labeled group + - label:{hkl}@scale[features] - labeled form + - {hkl}@scale[features] - plain form + - named_form@scale[features] - named form + """ + label = None + + # Check for label: identifier followed by COLON, then LBRACE or LPAREN + if self._current().type == TokenType.IDENTIFIER: + ident = self._current().value + if self._peek().type == TokenType.COLON: + # Look at what follows the colon + after_colon = self._peek(2) + if after_colon.type == TokenType.LPAREN: + # label:(group) + label = ident + self._advance() # consume identifier + self._advance() # consume colon + elif after_colon.type == TokenType.LBRACE: + # Could be label:{hkl} - but only if identifier is NOT a named form + # If it IS a named form, we'd need a different syntax. + # Named forms use: octahedron (no colon) - so label:{hkl} is unambiguous + # when the identifier is NOT a known crystal system + ident_lower = ident.lower() + if ident_lower not in NAMED_FORMS: + label = ident + self._advance() # consume identifier + self._advance() # consume colon + + if self._current().type == TokenType.LPAREN: + return self._parse_group(label) + else: + return self._parse_form(label) + + def _parse_group(self, label: str | None = None) -> FormGroup: + """Parse a parenthesized group: (form + form)[features]""" + self._advance() # consume ( + + forms = self._parse_form_list() + + self._expect(TokenType.RPAREN) + + # Optional features + features = None + if self._current().type == TokenType.LBRACKET: + features = self._parse_features() + + return FormGroup(forms=forms, features=features, label=label) + + def _parse_form(self, label: str | None = None) -> CrystalForm: + """Parse form = (form_name | miller_index) ['@' scale] ['[' features ']']""" name = None miller = None @@ -349,7 +542,12 @@ def _parse_form(self) -> CrystalForm: else: raise ParseError("Expected scale value after @", position=scale_token.position) - return CrystalForm(miller=miller, scale=scale, name=name) + # Optional features [feature:value, ...] + features = None + if self._current().type == TokenType.LBRACKET: + features = self._parse_features() + + return CrystalForm(miller=miller, scale=scale, name=name, features=features, label=label) def _parse_miller_index(self) -> MillerIndex: """Parse Miller index {hkl} or {hkil}. @@ -415,7 +613,7 @@ def _parse_modification(self) -> Modification: mod_token = self._current() mod_type = self._expect(TokenType.IDENTIFIER).value.lower() - if mod_type not in {"elongate", "truncate", "taper", "bevel"}: + if mod_type not in {"elongate", "truncate", "taper", "bevel", "flatten"}: raise ParseError(f"Unknown modification type: {mod_type}", position=mod_token.position) self._expect(TokenType.LPAREN) @@ -450,6 +648,12 @@ def _parse_modification(self) -> Modification: self._expect(TokenType.COLON) width = self._parse_number() params = {"edges": edges, "width": width} + elif mod_type == "flatten": + # flatten(axis:ratio) + axis = self._expect(TokenType.IDENTIFIER).value.lower() + self._expect(TokenType.COLON) + ratio = self._parse_number() + params = {"axis": axis, "ratio": ratio} self._expect(TokenType.RPAREN) @@ -499,6 +703,102 @@ def _parse_twin(self) -> TwinSpec: return TwinSpec(law=law, axis=axis, angle=angle, twin_type=twin_type, count=count) + def _parse_features(self) -> list[Feature]: + """Parse feature list [name:value, name:value, ...]""" + self._advance() # consume [ + features = [] + + while self._current().type != TokenType.RBRACKET and self._current().type != TokenType.EOF: + # Parse feature name + name_token = self._expect(TokenType.IDENTIFIER) + name = name_token.value.lower() + + # Expect colon + self._expect(TokenType.COLON) + + # Parse values until comma or ] + values: list[int | float | str] = [] + values.append(self._parse_feature_value()) + + # Check for more values separated by comma + # But distinguish "next value" from "next feature" + # Next feature = IDENTIFIER followed by COLON + while self._current().type == TokenType.COMMA: + next_tok = self._peek(1) + next_next = self._peek(2) + if next_tok.type == TokenType.IDENTIFIER and next_next.type == TokenType.COLON: + break # It's a new feature + self._advance() # consume comma + values.append(self._parse_feature_value()) + + features.append(Feature(name=name, values=values)) + + # Consume comma between features + if self._current().type == TokenType.COMMA: + self._advance() + + self._expect(TokenType.RBRACKET) + return features + + def _parse_feature_value(self) -> int | float | str: + """Parse a single feature value (number or identifier).""" + token = self._current() + if token.type == TokenType.INTEGER: + return int(self._advance().value) + elif token.type == TokenType.FLOAT: + return float(self._advance().value) + elif token.type == TokenType.IDENTIFIER: + return self._advance().value.lower() + elif token.type == TokenType.POINT_GROUP: + # Handle numeric point groups like '1', '3' as values + value = token.value + try: + result = int(value) + self._advance() + return result + except ValueError: + pass + return self._advance().value + raise ParseError("Expected feature value", position=token.position) + + def _parse_phenomenon(self) -> PhenomenonSpec: + """Parse phenomenon[type:value, param:value, ...]""" + self._expect(TokenType.IDENTIFIER) # consume 'phenomenon' + self._expect(TokenType.LBRACKET) + + # First token is the phenomenon type + phen_type = self._expect(TokenType.IDENTIFIER).value.lower() + + params: dict[str, int | float | str] = {} + + # Check for :value after type (e.g., asterism:6) + if self._current().type == TokenType.COLON: + self._advance() + val = self._parse_feature_value() + # Store as the primary value + if isinstance(val, (int, float)): + params["value"] = val + else: + params["intensity"] = val + + # Parse additional comma-separated params + while self._current().type == TokenType.COMMA: + self._advance() + if self._current().type == TokenType.IDENTIFIER: + key = self._advance().value.lower() + if self._current().type == TokenType.COLON: + self._advance() + params[key] = self._parse_feature_value() + else: + # Bare identifier value + params[key] = True + elif self._current().type in (TokenType.INTEGER, TokenType.FLOAT, TokenType.POINT_GROUP): + val = self._parse_feature_value() + params["value"] = val + + self._expect(TokenType.RBRACKET) + return PhenomenonSpec(type=phen_type, params=params) + def _parse_number(self) -> float: """Parse a number (int or float). @@ -571,10 +871,27 @@ def parse_cdl(text: str) -> CrystalDescription: >>> desc.forms[0].miller.i -1 """ - lexer = Lexer(text) + cleaned, doc_comments = strip_comments(text) + cleaned = cleaned.strip() + if not cleaned: + raise ParseError("Empty CDL string after stripping comments", position=0) + + # Pre-process definitions (@name = expression) and resolve $references + body_text, raw_definitions = _preprocess_definitions(cleaned) + body_text = body_text.strip() + if not body_text: + raise ParseError("Empty CDL string after extracting definitions", position=0) + + # Parse definition bodies into Definition objects + definitions = _parse_definition_bodies(raw_definitions) if raw_definitions else None + + lexer = Lexer(body_text) tokens = lexer.tokenize() parser = Parser(tokens) - return parser.parse() + desc = parser.parse() + desc.doc_comments = doc_comments if doc_comments else None + desc.definitions = definitions + return desc def validate_cdl(text: str) -> tuple[bool, str | None]: diff --git a/tests/fixtures/v1.3-test-cases.json b/tests/fixtures/v1.3-test-cases.json new file mode 100644 index 0000000..405384d --- /dev/null +++ b/tests/fixtures/v1.3-test-cases.json @@ -0,0 +1,223 @@ +{ + "version": "1.3.0", + "description": "CDL v1.3 test cases for JS parser parity testing", + "test_cases": [ + { + "name": "simple_group", + "cdl": "cubic[m3m]:({111} + {100})", + "expected": { + "system": "cubic", + "point_group": "m3m", + "form_count": 1, + "flat_form_count": 2, + "has_groups": true, + "definitions_count": 0 + } + }, + { + "name": "group_with_features", + "cdl": "cubic[m3m]:({111}@1.0 + {100}@1.3)[phantom:3]", + "expected": { + "system": "cubic", + "point_group": "m3m", + "form_count": 1, + "flat_form_count": 2, + "has_groups": true, + "definitions_count": 0, + "flat_forms_all_have_feature": "phantom" + } + }, + { + "name": "group_plus_form", + "cdl": "cubic[m3m]:({111} + {100})[phantom:3] + {110}@0.8", + "expected": { + "system": "cubic", + "point_group": "m3m", + "form_count": 2, + "flat_form_count": 3, + "has_groups": true, + "definitions_count": 0 + } + }, + { + "name": "nested_group", + "cdl": "cubic[m3m]:(({111}) + {100})", + "expected": { + "system": "cubic", + "point_group": "m3m", + "form_count": 1, + "flat_form_count": 2, + "has_groups": true, + "definitions_count": 0 + } + }, + { + "name": "labeled_forms", + "cdl": "cubic[m3m]:core:{111}@1.0 + rim:{100}@1.3", + "expected": { + "system": "cubic", + "point_group": "m3m", + "form_count": 2, + "flat_form_count": 2, + "has_groups": false, + "definitions_count": 0, + "flat_forms": [ + {"label": "core", "miller": [1, 1, 1], "scale": 1.0}, + {"label": "rim", "miller": [1, 0, 0], "scale": 1.3} + ] + } + }, + { + "name": "labeled_group", + "cdl": "cubic[m3m]:core:({111} + {100})[phantom:3]", + "expected": { + "system": "cubic", + "point_group": "m3m", + "form_count": 1, + "flat_form_count": 2, + "has_groups": true, + "group_label": "core", + "definitions_count": 0 + } + }, + { + "name": "simple_definition", + "cdl": "@oct = {111}@1.0\ncubic[m3m]:$oct + {100}@1.3", + "expected": { + "system": "cubic", + "point_group": "m3m", + "form_count": 2, + "flat_form_count": 2, + "has_groups": false, + "definitions_count": 1, + "flat_forms": [ + {"miller": [1, 1, 1], "scale": 1.0}, + {"miller": [1, 0, 0], "scale": 1.3} + ] + } + }, + { + "name": "multiple_definitions", + "cdl": "@prism = {10-10}@1.0\n@rhomb = {10-11}@0.8\ntrigonal[-3m]:$prism + $rhomb", + "expected": { + "system": "trigonal", + "point_group": "-3m", + "form_count": 2, + "flat_form_count": 2, + "has_groups": false, + "definitions_count": 2 + } + }, + { + "name": "definition_referencing_definition", + "cdl": "@a = {111}@1.0\n@b = {100}@1.3\n@combo = $a + $b\ncubic[m3m]:$combo", + "expected": { + "system": "cubic", + "point_group": "m3m", + "form_count": 2, + "flat_form_count": 2, + "has_groups": false, + "definitions_count": 3 + } + }, + { + "name": "undefined_reference", + "cdl": "cubic[m3m]:$unknown", + "expected": { + "error": true, + "error_type": "ParseError" + } + }, + { + "name": "v1_simple_octahedron", + "cdl": "cubic[m3m]:{111}", + "expected": { + "system": "cubic", + "point_group": "m3m", + "form_count": 1, + "flat_form_count": 1, + "has_groups": false, + "definitions_count": 0 + } + }, + { + "name": "v1_truncated_octahedron", + "cdl": "cubic[m3m]:{111}@1.0 + {100}@1.3", + "expected": { + "system": "cubic", + "point_group": "m3m", + "form_count": 2, + "flat_form_count": 2, + "has_groups": false, + "definitions_count": 0 + } + }, + { + "name": "v1_with_features", + "cdl": "cubic[m3m]:{111}@1.0[phantom:3]", + "expected": { + "system": "cubic", + "point_group": "m3m", + "form_count": 1, + "flat_form_count": 1, + "has_groups": false, + "definitions_count": 0, + "flat_forms": [ + {"miller": [1, 1, 1], "scale": 1.0, "features": [{"name": "phantom", "values": [3]}]} + ] + } + }, + { + "name": "v1_with_twin", + "cdl": "cubic[m3m]:{111} | twin(spinel)", + "expected": { + "system": "cubic", + "point_group": "m3m", + "form_count": 1, + "flat_form_count": 1, + "has_groups": false, + "definitions_count": 0, + "has_twin": true, + "twin_law": "spinel" + } + }, + { + "name": "group_with_form_features_and_group_features", + "cdl": "cubic[m3m]:({111}[trigon:dense] + {100})[phantom:3]", + "expected": { + "system": "cubic", + "point_group": "m3m", + "form_count": 1, + "flat_form_count": 2, + "has_groups": true, + "definitions_count": 0, + "note": "First flat form gets phantom+trigon, second gets phantom only" + } + }, + { + "name": "definitions_with_comments", + "cdl": "# Comment\n@oct = {111}@1.0\n# Another comment\ncubic[m3m]:$oct", + "expected": { + "system": "cubic", + "point_group": "m3m", + "form_count": 1, + "flat_form_count": 1, + "has_groups": false, + "definitions_count": 1 + } + }, + { + "name": "definition_with_doc_comments", + "cdl": "#! Mineral: Diamond\n@oct = {111}@1.0\ncubic[m3m]:$oct", + "expected": { + "system": "cubic", + "point_group": "m3m", + "form_count": 1, + "flat_form_count": 1, + "has_groups": false, + "definitions_count": 1, + "doc_comments": ["Mineral: Diamond"] + } + } + ] +} diff --git a/tests/test_parser.py b/tests/test_parser.py index b95c739..843163a 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -12,8 +12,13 @@ POINT_GROUPS, CrystalDescription, CrystalForm, + Definition, + Feature, + FormGroup, + FormNode, MillerIndex, ParseError, + PhenomenonSpec, ValidationError, parse_cdl, validate_cdl, @@ -524,3 +529,618 @@ def test_fluorite_twin_cdl(self): desc = parse_cdl("cubic[m3m]:{111} | twin(fluorite)") assert desc.twin is not None assert desc.twin.law == "fluorite" + + +# ============================================================================= +# Comment Tests +# ============================================================================= + + +class TestComments: + """Test CDL comment stripping and doc comment extraction.""" + + def test_line_comment_at_start(self): + """Line comment before CDL is stripped.""" + desc = parse_cdl("# comment\ncubic[m3m]:{111}") + assert desc.system == "cubic" + assert desc.doc_comments is None + + def test_inline_comment(self): + """Inline comment after CDL is stripped.""" + desc = parse_cdl("cubic[m3m]:{111} # octahedron") + assert desc.system == "cubic" + assert desc.forms[0].miller.as_tuple() == (1, 1, 1) + + def test_block_comment(self): + """Block comment is stripped.""" + desc = parse_cdl("/* block */cubic[m3m]:{111}") + assert desc.system == "cubic" + + def test_multiline_block_comment(self): + """Multi-line block comment is stripped.""" + desc = parse_cdl("/* multi\nline */\ncubic[m3m]:{111}") + assert desc.system == "cubic" + + def test_doc_comment(self): + """Doc comment (#!) is extracted.""" + desc = parse_cdl("#! Mineral: Diamond\ncubic[m3m]:{111}") + assert desc.doc_comments == ["Mineral: Diamond"] + + def test_multiple_doc_comments(self): + """Multiple doc comments are preserved in order.""" + cdl = "#! Mineral: Diamond\n#! Habit: Octahedral\ncubic[m3m]:{111}" + desc = parse_cdl(cdl) + assert desc.doc_comments == ["Mineral: Diamond", "Habit: Octahedral"] + + def test_mixed_comments(self): + """Mix of line, block, and doc comments.""" + cdl = ( + "#! Mineral: Quartz\n" + "# A line comment\n" + "/* block */ trigonal[-3m]:{10-10} # inline" + ) + desc = parse_cdl(cdl) + assert desc.system == "trigonal" + assert desc.doc_comments == ["Mineral: Quartz"] + + def test_comment_only_raises(self): + """Comment-only input raises ParseError.""" + with pytest.raises(ParseError): + parse_cdl("# just a comment\n/* block */") + + def test_cdl_v1_regression(self): + """Existing CDL v1 strings without comments still work identically.""" + for _name, cdl in CDL_TEST_CASES: + desc = parse_cdl(cdl) + assert isinstance(desc, CrystalDescription) + assert desc.doc_comments is None + + def test_doc_comments_in_to_dict(self): + """Doc comments appear in to_dict() output.""" + desc = parse_cdl("#! Mineral: Diamond\ncubic[m3m]:{111}") + d = desc.to_dict() + assert d["doc_comments"] == ["Mineral: Diamond"] + + def test_no_doc_comments_in_to_dict(self): + """to_dict() has doc_comments=None when there are none.""" + desc = parse_cdl("cubic[m3m]:{111}") + d = desc.to_dict() + assert d["doc_comments"] is None + + +# ============================================================================= +# Flatten Modification Tests +# ============================================================================= + + +class TestFlattenModification: + """Test flatten modification parsing.""" + + def test_flatten_basic(self): + """Flatten modification parses correctly.""" + desc = parse_cdl("cubic[m3m]:{111} | flatten(a:0.5)") + assert len(desc.modifications) == 1 + assert desc.modifications[0].type == "flatten" + assert desc.modifications[0].params["axis"] == "a" + assert desc.modifications[0].params["ratio"] == 0.5 + + def test_flatten_float_ratio(self): + """Flatten with float ratio.""" + desc = parse_cdl("cubic[m3m]:{111} | flatten(c:0.75)") + assert desc.modifications[0].type == "flatten" + assert desc.modifications[0].params["axis"] == "c" + assert desc.modifications[0].params["ratio"] == 0.75 + + +# ============================================================================= +# Feature Tests (CDL v1.2) +# ============================================================================= + + +class TestFeatures: + """Test CDL v1.2 feature parsing on crystal forms.""" + + def test_single_feature(self): + """Single feature annotation on a form.""" + desc = parse_cdl("cubic[m3m]:{111}@1.0[trigon:dense]") + assert desc.forms[0].features is not None + assert len(desc.forms[0].features) == 1 + assert desc.forms[0].features[0].name == "trigon" + assert desc.forms[0].features[0].values == ["dense"] + + def test_multiple_feature_values(self): + """Feature with multiple values.""" + desc = parse_cdl("cubic[m3m]:{111}[phantom:3, white]") + assert desc.forms[0].features is not None + assert len(desc.forms[0].features) == 1 + feat = desc.forms[0].features[0] + assert feat.name == "phantom" + assert feat.values == [3, "white"] + + def test_feature_numeric_value(self): + """Feature with numeric value.""" + desc = parse_cdl("trigonal[32]:{10-10}@1.0[phantom:3]") + assert desc.forms[0].features is not None + feat = desc.forms[0].features[0] + assert feat.name == "phantom" + assert feat.values == [3] + + def test_feature_on_second_form(self): + """Feature on second form only.""" + desc = parse_cdl("cubic[m3m]:{111}@1.0 + {100}@1.3[trigon:sparse]") + assert desc.forms[0].features is None + assert desc.forms[1].features is not None + assert desc.forms[1].features[0].name == "trigon" + assert desc.forms[1].features[0].values == ["sparse"] + + def test_multiple_feature_types(self): + """Multiple distinct features on one form.""" + desc = parse_cdl("cubic[m3m]:{111}[trigon:dense, phantom:3]") + assert desc.forms[0].features is not None + assert len(desc.forms[0].features) == 2 + assert desc.forms[0].features[0].name == "trigon" + assert desc.forms[0].features[0].values == ["dense"] + assert desc.forms[0].features[1].name == "phantom" + assert desc.forms[0].features[1].values == [3] + + def test_features_with_scale(self): + """Features after scale value.""" + desc = parse_cdl("cubic[m3m]:{111}@1.0[silk:dense]") + assert desc.forms[0].scale == 1.0 + assert desc.forms[0].features is not None + assert desc.forms[0].features[0].name == "silk" + + def test_no_features_backwards_compat(self): + """Existing CDL without features still works.""" + desc = parse_cdl("cubic[m3m]:{111}@1.0") + assert desc.forms[0].features is None + + def test_feature_str_representation(self): + """Feature __str__ method.""" + feat = Feature("trigon", ["dense"]) + assert str(feat) == "trigon:dense" + feat2 = Feature("phantom", [3, "white"]) + assert str(feat2) == "phantom:3, white" + + def test_form_str_with_features(self): + """CrystalForm __str__ includes features.""" + desc = parse_cdl("cubic[m3m]:{111}@1.0[trigon:dense]") + form_str = str(desc.forms[0]) + assert "[trigon:dense]" in form_str + + def test_features_in_to_dict(self): + """Features appear in to_dict() output.""" + desc = parse_cdl("cubic[m3m]:{111}[phantom:3]") + d = desc.to_dict() + assert d["forms"][0]["features"] is not None + assert d["forms"][0]["features"][0]["name"] == "phantom" + assert d["forms"][0]["features"][0]["values"] == [3] + + def test_no_features_in_to_dict(self): + """to_dict() has features=None when there are none.""" + desc = parse_cdl("cubic[m3m]:{111}") + d = desc.to_dict() + assert d["forms"][0]["features"] is None + + +# ============================================================================= +# Phenomenon Tests (CDL v1.2) +# ============================================================================= + + +class TestPhenomenon: + """Test CDL v1.2 phenomenon parsing.""" + + def test_asterism(self): + """Asterism phenomenon with numeric value.""" + desc = parse_cdl("trigonal[-3m]:{10-11}@1.0 | phenomenon[asterism:6]") + assert desc.phenomenon is not None + assert desc.phenomenon.type == "asterism" + assert desc.phenomenon.params["value"] == 6 + + def test_chatoyancy(self): + """Chatoyancy phenomenon with string intensity.""" + desc = parse_cdl("orthorhombic[mmm]:{110}@1.0 | phenomenon[chatoyancy:sharp]") + assert desc.phenomenon is not None + assert desc.phenomenon.type == "chatoyancy" + assert desc.phenomenon.params["intensity"] == "sharp" + + def test_phenomenon_with_modifications(self): + """Phenomenon after modifications.""" + desc = parse_cdl("cubic[m3m]:{111} | elongate(c:1.5) | phenomenon[asterism:6]") + assert len(desc.modifications) == 1 + assert desc.phenomenon is not None + assert desc.phenomenon.type == "asterism" + + def test_phenomenon_with_twin(self): + """Phenomenon after twin.""" + desc = parse_cdl("cubic[m3m]:{111} | twin(spinel) | phenomenon[asterism:6]") + assert desc.twin is not None + assert desc.phenomenon is not None + assert desc.phenomenon.type == "asterism" + + def test_phenomenon_multiple_params(self): + """Phenomenon with multiple parameters.""" + desc = parse_cdl("trigonal[-3m]:{10-11} | phenomenon[asterism:6, intensity:strong]") + assert desc.phenomenon is not None + assert desc.phenomenon.type == "asterism" + assert desc.phenomenon.params["value"] == 6 + assert desc.phenomenon.params["intensity"] == "strong" + + def test_features_and_phenomenon(self): + """Features on form AND phenomenon on description.""" + desc = parse_cdl("trigonal[-3m]:{10-11}@1.0[silk:dense] | phenomenon[asterism:6]") + assert desc.forms[0].features is not None + assert desc.forms[0].features[0].name == "silk" + assert desc.phenomenon is not None + assert desc.phenomenon.type == "asterism" + + def test_no_phenomenon_backwards_compat(self): + """Existing CDL without phenomenon still works.""" + desc = parse_cdl("cubic[m3m]:{111}@1.0") + assert desc.phenomenon is None + + def test_phenomenon_str_representation(self): + """PhenomenonSpec __str__ method.""" + phen = PhenomenonSpec("asterism", {"value": 6}) + assert str(phen) == "phenomenon[asterism, value:6]" + + def test_description_str_with_phenomenon(self): + """CrystalDescription __str__ includes phenomenon.""" + desc = parse_cdl("trigonal[-3m]:{10-11}@1.0 | phenomenon[asterism:6]") + desc_str = str(desc) + assert "phenomenon[asterism" in desc_str + + def test_phenomenon_in_to_dict(self): + """Phenomenon appears in to_dict() output.""" + desc = parse_cdl("trigonal[-3m]:{10-11} | phenomenon[asterism:6]") + d = desc.to_dict() + assert d["phenomenon"] is not None + assert d["phenomenon"]["type"] == "asterism" + assert d["phenomenon"]["params"]["value"] == 6 + + def test_no_phenomenon_in_to_dict(self): + """to_dict() has phenomenon=None when there is none.""" + desc = parse_cdl("cubic[m3m]:{111}") + d = desc.to_dict() + assert d["phenomenon"] is None + + +# ============================================================================= +# Grouping Tests (CDL v1.3) +# ============================================================================= + + +class TestGrouping: + """Test CDL v1.3 parenthesized form grouping.""" + + def test_simple_group(self): + """Parenthesized group of forms.""" + desc = parse_cdl("cubic[m3m]:({111} + {100})") + assert len(desc.forms) == 1 + group = desc.forms[0] + assert isinstance(group, FormGroup) + assert len(group.forms) == 2 + + def test_group_with_shared_features(self): + """Group with shared features applied to all forms.""" + desc = parse_cdl("cubic[m3m]:({111}@1.0 + {100}@1.3)[phantom:3]") + assert len(desc.forms) == 1 + group = desc.forms[0] + assert isinstance(group, FormGroup) + assert group.features is not None + assert group.features[0].name == "phantom" + # flat_forms() should merge phantom:3 into both forms + flat = desc.flat_forms() + assert len(flat) == 2 + for f in flat: + assert f.features is not None + assert any(feat.name == "phantom" for feat in f.features) + + def test_group_plus_form(self): + """Group combined with standalone form.""" + desc = parse_cdl("cubic[m3m]:({111} + {100})[phantom:3] + {110}@0.8") + assert len(desc.forms) == 2 + assert isinstance(desc.forms[0], FormGroup) + assert isinstance(desc.forms[1], CrystalForm) + flat = desc.flat_forms() + assert len(flat) == 3 + + def test_nested_group(self): + """Nested parenthesized groups.""" + desc = parse_cdl("cubic[m3m]:(({111}) + {100})") + flat = desc.flat_forms() + assert len(flat) == 2 + assert flat[0].miller.as_tuple() == (1, 1, 1) + assert flat[1].miller.as_tuple() == (1, 0, 0) + + def test_group_str_representation(self): + """FormGroup __str__ method.""" + desc = parse_cdl("cubic[m3m]:({111} + {100})[phantom:3]") + group = desc.forms[0] + s = str(group) + assert "(" in s and ")" in s + assert "phantom:3" in s + + def test_group_in_to_dict(self): + """Groups appear in to_dict() output.""" + desc = parse_cdl("cubic[m3m]:({111} + {100})[phantom:3]") + d = desc.to_dict() + assert d["forms"][0]["type"] == "group" + assert len(d["forms"][0]["forms"]) == 2 + assert d["forms"][0]["features"][0]["name"] == "phantom" + # flat_forms in dict + assert len(d["flat_forms"]) == 2 + + def test_group_with_scales(self): + """Group with individually scaled forms.""" + desc = parse_cdl("cubic[m3m]:({111}@1.0 + {100}@1.3)") + flat = desc.flat_forms() + assert flat[0].scale == 1.0 + assert flat[1].scale == 1.3 + + +# ============================================================================= +# Form Label Tests (CDL v1.3) +# ============================================================================= + + +class TestFormLabels: + """Test CDL v1.3 form labels.""" + + def test_labeled_miller_form(self): + """Form with label using Miller index.""" + desc = parse_cdl("cubic[m3m]:core:{111}@1.0 + rim:{100}@1.3") + flat = desc.flat_forms() + assert len(flat) == 2 + assert flat[0].label == "core" + assert flat[0].miller.as_tuple() == (1, 1, 1) + assert flat[1].label == "rim" + assert flat[1].miller.as_tuple() == (1, 0, 0) + + def test_labeled_group(self): + """Group with label.""" + desc = parse_cdl("cubic[m3m]:core:({111} + {100})[phantom:3]") + assert len(desc.forms) == 1 + group = desc.forms[0] + assert isinstance(group, FormGroup) + assert group.label == "core" + + def test_label_str_representation(self): + """Label appears in __str__ output.""" + desc = parse_cdl("cubic[m3m]:core:{111}@1.0") + flat = desc.flat_forms() + assert "core:" in str(flat[0]) + + def test_unlabeled_forms_backwards_compat(self): + """Unlabeled forms still work identically.""" + desc = parse_cdl("cubic[m3m]:{111}@1.0 + {100}@1.3") + flat = desc.flat_forms() + assert flat[0].label is None + assert flat[1].label is None + + def test_label_in_to_dict(self): + """Labels appear in to_dict() output.""" + desc = parse_cdl("cubic[m3m]:core:{111}@1.0") + d = desc.to_dict() + assert d["flat_forms"][0]["label"] == "core" + + def test_named_form_not_treated_as_label(self): + """Named forms (like 'prism') are NOT treated as labels.""" + # 'prism' is a known NAMED_FORM, so prism:{hkl} should NOT be parsed as label + # Instead, 'prism' alone is a named form + desc = parse_cdl("cubic[m3m]:octahedron@1.0") + flat = desc.flat_forms() + assert flat[0].name == "octahedron" + assert flat[0].label is None + + +# ============================================================================= +# Named Reference Tests (CDL v1.3) +# ============================================================================= + + +class TestNamedReferences: + """Test CDL v1.3 named definitions and $references.""" + + def test_simple_definition(self): + """Simple named definition and reference.""" + desc = parse_cdl("@oct = {111}@1.0\ncubic[m3m]:$oct + {100}@1.3") + assert len(desc.flat_forms()) == 2 + assert desc.flat_forms()[0].miller.as_tuple() == (1, 1, 1) + assert desc.flat_forms()[0].scale == 1.0 + + def test_multiple_definitions(self): + """Multiple definitions.""" + cdl = "@prism = {10-10}@1.0\n@rhomb = {10-11}@0.8\ntrigonal[-3m]:$prism + $rhomb" + desc = parse_cdl(cdl) + assert len(desc.flat_forms()) == 2 + + def test_definition_referencing_definition(self): + """Definition that references another definition.""" + cdl = "@a = {111}@1.0\n@b = {100}@1.3\n@combo = $a + $b\ncubic[m3m]:$combo" + desc = parse_cdl(cdl) + assert len(desc.flat_forms()) == 2 + + def test_definitions_stored(self): + """Definitions are stored on the CrystalDescription.""" + desc = parse_cdl("@oct = {111}@1.0\ncubic[m3m]:$oct") + assert desc.definitions is not None + assert len(desc.definitions) == 1 + assert desc.definitions[0].name == "oct" + + def test_undefined_reference_error(self): + """Undefined reference raises ParseError.""" + with pytest.raises(ParseError): + parse_cdl("cubic[m3m]:$unknown") + + def test_definitions_with_comments(self): + """Definitions work with comments.""" + cdl = "# Define forms\n@oct = {111}@1.0\n# Use them\ncubic[m3m]:$oct" + desc = parse_cdl(cdl) + assert len(desc.flat_forms()) == 1 + + def test_no_definitions_backwards_compat(self): + """CDL without definitions has definitions=None.""" + desc = parse_cdl("cubic[m3m]:{111}") + assert desc.definitions is None + + def test_definitions_in_to_dict(self): + """Definitions appear in to_dict() output.""" + desc = parse_cdl("@oct = {111}@1.0\ncubic[m3m]:$oct") + d = desc.to_dict() + assert d["definitions"] is not None + assert len(d["definitions"]) == 1 + assert d["definitions"][0]["name"] == "oct" + + def test_no_definitions_in_to_dict(self): + """to_dict() has definitions=None when there are none.""" + desc = parse_cdl("cubic[m3m]:{111}") + d = desc.to_dict() + assert d["definitions"] is None + + def test_definition_with_doc_comments(self): + """Definitions work alongside doc comments.""" + cdl = "#! Mineral: Diamond\n@oct = {111}@1.0\ncubic[m3m]:$oct" + desc = parse_cdl(cdl) + assert desc.doc_comments == ["Mineral: Diamond"] + assert desc.definitions is not None + assert len(desc.flat_forms()) == 1 + + def test_definition_with_features(self): + """Definition body with features resolves correctly.""" + cdl = "@oct = {111}@1.0\ncubic[m3m]:$oct[phantom:3]" + desc = parse_cdl(cdl) + flat = desc.flat_forms() + assert len(flat) == 1 + assert flat[0].features is not None + assert flat[0].features[0].name == "phantom" + + +# ============================================================================= +# flat_forms() Tests (CDL v1.3) +# ============================================================================= + + +class TestFlatForms: + """Test CDL v1.3 flat_forms() backwards compatibility method.""" + + def test_flat_forms_simple(self): + """flat_forms() on simple CDL returns same count as forms.""" + desc = parse_cdl("cubic[m3m]:{111}@1.0 + {100}@1.3") + assert len(desc.flat_forms()) == 2 + + def test_flat_forms_group(self): + """flat_forms() flattens groups.""" + desc = parse_cdl("cubic[m3m]:({111} + {100})[phantom:3]") + flat = desc.flat_forms() + assert len(flat) == 2 + # Both should have phantom:3 feature + for f in flat: + assert f.features is not None + assert any(feat.name == "phantom" for feat in f.features) + + def test_flat_forms_mixed(self): + """flat_forms() with mix of groups and plain forms.""" + desc = parse_cdl("cubic[m3m]:({111} + {100})[phantom:3] + {110}@0.8") + flat = desc.flat_forms() + assert len(flat) == 3 + # First two have phantom, third doesn't + assert flat[0].features is not None + assert flat[1].features is not None + assert flat[2].features is None + + def test_flat_forms_backwards_compat(self): + """flat_forms() works identically for v1-style CDL.""" + for _, cdl in [ + ("simple", "cubic[m3m]:{111}"), + ("truncated", "cubic[m3m]:{111}@1.0 + {100}@1.3"), + ("triple", "cubic[m3m]:{111}@1.0 + {100}@0.5 + {110}@0.3"), + ]: + desc = parse_cdl(cdl) + flat = desc.flat_forms() + assert len(flat) == len(desc.forms) + for i, f in enumerate(flat): + assert f.miller == desc.forms[i].miller + assert f.scale == desc.forms[i].scale + + def test_flat_forms_preserves_scale(self): + """flat_forms() preserves individual form scales.""" + desc = parse_cdl("cubic[m3m]:({111}@1.0 + {100}@1.3)[phantom:3]") + flat = desc.flat_forms() + assert flat[0].scale == 1.0 + assert flat[1].scale == 1.3 + + def test_flat_forms_nested_groups(self): + """flat_forms() handles nested groups.""" + desc = parse_cdl("cubic[m3m]:(({111} + {100}) + {110})") + flat = desc.flat_forms() + assert len(flat) == 3 + + def test_flat_forms_feature_merge(self): + """flat_forms() merges parent and child features.""" + desc = parse_cdl("cubic[m3m]:({111}[trigon:dense] + {100})[phantom:3]") + flat = desc.flat_forms() + assert len(flat) == 2 + # First form should have both phantom (from group) and trigon (own) + f0_names = [feat.name for feat in flat[0].features] + assert "phantom" in f0_names + assert "trigon" in f0_names + # Second form should have only phantom (from group) + f1_names = [feat.name for feat in flat[1].features] + assert "phantom" in f1_names + assert len(f1_names) == 1 + + +# ============================================================================= +# Version Test (CDL v1.3) +# ============================================================================= + + +class TestVersion: + """Test version is updated.""" + + def test_version_1_3(self): + """Version is 1.3.0.""" + import cdl_parser + assert cdl_parser.__version__ == "1.3.0" + + +# ============================================================================= +# v1 Regression (CDL v1.3) +# ============================================================================= + + +class TestV1Regression: + """Ensure all v1/v1.2 CDL still works with v1.3 changes.""" + + @pytest.mark.parametrize("name,cdl", CDL_TEST_CASES) + def test_all_v1_cases_still_work(self, name, cdl): + """All CDL_TEST_CASES parse successfully with v1.3.""" + desc = parse_cdl(cdl) + assert isinstance(desc, CrystalDescription) + # forms are still iterable and contain CrystalForm instances + for f in desc.forms: + assert isinstance(f, CrystalForm) + # flat_forms() returns same as forms for v1-style CDL + flat = desc.flat_forms() + assert len(flat) == len(desc.forms) + + def test_v1_forms_are_crystal_form_instances(self): + """v1-style CDL forms are CrystalForm, not FormGroup.""" + desc = parse_cdl("cubic[m3m]:{111}@1.0 + {100}@1.3") + for f in desc.forms: + assert isinstance(f, CrystalForm) + assert not isinstance(f, FormGroup) + + def test_v1_definitions_none(self): + """v1-style CDL has no definitions.""" + desc = parse_cdl("cubic[m3m]:{111}") + assert desc.definitions is None + + def test_v1_to_dict_has_flat_forms(self): + """to_dict() includes flat_forms key.""" + desc = parse_cdl("cubic[m3m]:{111}@1.0 + {100}@1.3") + d = desc.to_dict() + assert "flat_forms" in d + assert len(d["flat_forms"]) == 2