# Copyright 2021-2024 Cambridge Quantum Computing Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from collections.abc import Callable
from enum import Enum
__all__ = ['TreeReader', 'TreeReaderMode']
from lambeq.backend.grammar import Box, Diagram, Id, Ty, Word
from lambeq.core.types import AtomicType
from lambeq.core.utils import SentenceType
from lambeq.text2diagram.base import Reader
from lambeq.text2diagram.bobcat_parser import BobcatParser
from lambeq.text2diagram.ccg_parser import CCGParser
from lambeq.text2diagram.ccg_rule import CCGRule
from lambeq.text2diagram.ccg_tree import CCGTree
S = AtomicType.SENTENCE
[docs]
class TreeReaderMode(Enum):
"""An enumeration for :py:class:`TreeReader`.
The words in the tree diagram can be combined using 3 modes:
.. glossary::
NO_TYPE
The 'no type' mode names every rule box :py:obj:`UNIBOX`.
RULE_ONLY
The 'rule name' mode names every rule box based on the name
of the original CCG rule. For example, for the forward
application rule :py:obj:`FA(N << N)`, the rule box will be
named :py:obj:`FA`.
RULE_TYPE
The 'rule type' mode names every rule box based on the name
and type of the original CCG rule. For example, for the
forward application rule :py:obj:`FA(N << N)`, the rule box
will be named :py:obj:`FA(N << N)`.
HEIGHT
The 'height' mode names every rule box based on the
tree height of its subtree. For example, a rule
box directly combining two words will be named
:py:obj:`layer_1`.
"""
NO_TYPE = 0
RULE_ONLY = 1
RULE_TYPE = 2
HEIGHT = 3
[docs]
class TreeReader(Reader):
"""A reader that combines words according to a parse tree."""
[docs]
def __init__(
self,
ccg_parser: CCGParser | Callable[[], CCGParser] = BobcatParser,
mode: TreeReaderMode = TreeReaderMode.NO_TYPE,
word_type: Ty = S
) -> None:
"""Initialise a tree reader.
Parameters
----------
ccg_parser : CCGParser or callable, default: BobcatParser
A :py:class:`CCGParser` object or a function that returns
it. The parse tree produced by the parser is used to
generate the tree diagram.
mode : TreeReaderMode, default: TreeReaderMode.NO_TYPE
Determines what boxes are used to combine the tree.
See :py:class:`TreeReaderMode` for options.
word_type : Ty, default: core.types.AtomicType.SENTENCE
The type of each word box. By default, it uses the sentence
type from :py:class:`.core.types.AtomicType`.
"""
if not isinstance(mode, TreeReaderMode):
raise ValueError(f'Mode must be one of {self.available_modes()}.')
if not isinstance(ccg_parser, CCGParser):
if not callable(ccg_parser):
raise ValueError(f'{ccg_parser} should be a CCGParser or a '
'function that returns a CCGParser.')
ccg_parser = ccg_parser()
if not isinstance(ccg_parser, CCGParser):
raise ValueError(f'{ccg_parser} should be a CCGParser or a '
'function that returns a CCGParser.')
self.ccg_parser = ccg_parser
self.mode = mode
self.word_type = word_type
[docs]
@classmethod
def available_modes(cls) -> list[str]:
"""The list of modes for initialising a tree reader."""
return list(TreeReaderMode)
[docs]
@staticmethod
def tree2diagram(tree: CCGTree,
mode: TreeReaderMode = TreeReaderMode.NO_TYPE,
word_type: Ty = S,
suppress_exceptions: bool = False) -> Diagram | None:
"""Convert a :py:class:`~.CCGTree` into a
:py:class:`~lambeq.backend.grammar.Diagram` .
This produces a tree-shaped diagram based on the output of the
CCG parser.
Parameters
----------
tree : :py:class:`~.CCGTree`
The CCG tree to be converted.
mode : TreeReaderMode, default: TreeReaderMode.NO_TYPE
Determines what boxes are used to combine the tree.
See :py:class:`TreeReaderMode` for options.
word_type : Ty, default: core.types.AtomicType.SENTENCE
The type of each word box. By default, it uses the sentence
type from :py:class:`.core.types.AtomicType`.
suppress_exceptions : bool, default: False
Whether to suppress exceptions. If :py:obj:`True`, then if a
sentence fails to parse, instead of raising an exception,
its return entry is :py:obj:`None`.
Returns
-------
:py:class:`lambeq.backend.grammar.Diagram` or None
The parsed diagram, or :py:obj:`None` on failure.
"""
try:
return TreeReader._tree2diagram(tree._resolved(), mode, word_type)
except Exception as e:
if suppress_exceptions:
return None
else:
raise e
@staticmethod
def _tree2diagram(tree: CCGTree,
mode: TreeReaderMode = TreeReaderMode.NO_TYPE,
word_type: Ty = S) -> Diagram:
if tree.rule == CCGRule.LEXICAL:
return Word(tree.text, word_type).to_diagram()
else:
dom = word_type ** len(tree.children)
cod = word_type
if mode == TreeReaderMode.NO_TYPE:
name = 'UNIBOX'
elif mode == TreeReaderMode.HEIGHT:
name = f'layer_{tree.height}'
elif mode == TreeReaderMode.RULE_ONLY:
name = tree.rule.value
else:
assert mode == TreeReaderMode.RULE_TYPE
types = ', '.join(str(child.biclosed_type)
for child in tree.children)
name = f'{tree.rule.value}({types})'
children = [TreeReader._tree2diagram(child, mode, word_type)
for child in tree.children]
return Id().tensor(*children) >> Box(name, dom, cod)
[docs]
def sentence2diagram(self,
sentence: SentenceType,
tokenised: bool = False,
collapse_noun_phrases: bool = True,
suppress_exceptions: bool = False) -> Diagram | None:
"""Parse a sentence into a lambeq diagram.
This produces a tree-shaped diagram based on the output of the
CCG parser.
Parameters
----------
sentence : str or list of str
The sentence to be parsed.
tokenised : bool, default: False
Whether the sentence has been passed as a list of tokens.
collapse_noun_phrases : bool, default: True
If set, then before converting each tree to a diagram, any
noun phrase types in the tree are changed into nouns. This
includes sub-types, e.g. `S/NP` becomes `S/N`.
suppress_exceptions : bool, default: False
Whether to suppress exceptions. If :py:obj:`True`, then if a
sentence fails to parse, instead of raising an exception,
its return entry is :py:obj:`None`.
Returns
-------
:py:class:`lambeq.backend.grammar.Diagram` or None
The parsed diagram, or :py:obj:`None` on failure.
"""
tree = self.ccg_parser.sentence2tree(
sentence=sentence,
tokenised=tokenised,
suppress_exceptions=suppress_exceptions)
if tree is None:
return None
if collapse_noun_phrases:
tree = tree.collapse_noun_phrases()
return self.tree2diagram(tree,
mode=self.mode,
word_type=self.word_type,
suppress_exceptions=suppress_exceptions)