Source code for lambeq.bobcat.grammar

# Copyright 2021-2024 Cambridge Quantum Computing Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from dataclasses import asdict, dataclass
import json
from pathlib import Path

from lambeq.typing import StrPathT


[docs]@dataclass
class Grammar:
    r"""The grammar dataclass.

    Attributes
    ----------
    categories : dict of str to str
        A mapping from a plain category string to a marked up category
        string, e.g. '(NP\NP)/NP' to '((NP{Y}\NP{Y}<1>){_}/NP{Z}<2>){_}'
    binary_rules: list of tuple of str
        The list of binary rules as tuple pairs of strings,
        e.g. ('(N/N)', 'N')
    type_changing_rules : list of tuple
        The list of type changing rules, which may occur as either unary
        rules or punctuation rules, as tuples of:
            - an integer denoting the rule ID
            - a string denoting the left category, or the sole if unary
            - a string denoting the right category, or None if unary
            - a string denoting the resulting category
            - a boolean denoting whether to replace dependencies
              during parsing
        e.g. (1, 'N', None, 'NP', False)
             (50, 'S[dcl]/S[dcl]', ',', 'S/S', True)
    type_raising_rules : list of tuple
        The list of type raising rules as tuples of:
            - a string denoting the original category
            - a string denoting the resulting marked-up category
            - a character denoting the new variable
        e.g. ('NP', '(S[X]{Y}/(S[X]{Y}\NP{_}){Y}){Y}', '+')

    """

    categories: dict[str, str]
    binary_rules: list[tuple[str, str]]
    type_changing_rules: list[tuple[int, str, str | None, str, bool]]
    type_raising_rules: list[tuple[str, str, str]]

    def __post_init__(self) -> None:
        self.binary_rules = [tuple(item)  # type: ignore[misc]
                             for item in self.binary_rules]
        self.type_changing_rules = [tuple(item)  # type: ignore[misc]
                                    for item in self.type_changing_rules]
        self.type_raising_rules = [tuple(item)  # type: ignore[misc]
                                   for item in self.type_raising_rules]

[docs]    @classmethod
    def load(cls, filename: StrPathT) -> Grammar:
        """Load a grammar from a JSON file."""
        with open(filename) as f:
            data = json.load(f)
        return cls(**data)

[docs]    def save(self, filename: StrPathT) -> None:  # pragma: no cover
        """Save the grammar to a JSON file."""
        with open(filename, 'w') as f:
            json.dump(asdict(self), f, indent=1)


def read_grammar_dir(directory: StrPathT) -> Grammar:  # pragma: no cover
    """Read a grammar from a directory."""

    grammar_dir = Path(directory)
    with open(grammar_dir / 'markedup') as f:
        categories = dict(line[:-1].split(maxsplit=1) for line in f)

    with open(grammar_dir / 'all_rule_instances') as f:
        binary_rules = []
        for line in f:
            left, right = line.split()
            binary_rules.append((left, right))

    type_changing_rules = []
    with open(grammar_dir / 'type_changing_rules') as f:
        for line in f:
            id, left, right_str, res, replace_str = line.split()
            right = right_str if right_str != '_' else None
            replace = replace_str == 'replace'
            type_changing_rules.append((int(id), left, right, res, replace))

    with open(grammar_dir / 'type_raising_rules') as f:
        type_raising_rules = []
        for line in f:
            left, right, result = line.split()
            type_raising_rules.append((left, right, result))

    return Grammar(categories,
                   binary_rules,
                   type_changing_rules,
                   type_raising_rules)