diff --git a/Cargo.lock b/Cargo.lock
index 9001283..1e7d801 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -341,7 +341,7 @@ checksum = "2687e6cf9c00f48e9284cf9fd15f2ef341d03cc7743abf9df4c5f07fdee50b18"
[[package]]
name = "minimalist-grammar-parser"
version = "0.1.0"
-source = "git+https://github.com/MichaelGoodale/minimalist-grammar-parser.git#342aac1c51c1c4125e2b7ca27e9d91cde3ac73ed"
+source = "git+https://github.com/MichaelGoodale/minimalist-grammar-parser.git#4876725b425835b5a205d94fe93aab8c68046e9b"
dependencies = [
"ahash 0.8.12",
"bitvec",
@@ -557,11 +557,13 @@ name = "python-mg"
version = "0.1.0"
dependencies = [
"anyhow",
+ "itertools",
"logprob",
"minimalist-grammar-parser",
"numpy",
"pyo3",
"rand",
+ "simple-semantics",
]
[[package]]
@@ -636,7 +638,7 @@ dependencies = [
"aho-corasick",
"memchr",
"regex-automata 0.4.14",
- "regex-syntax 0.8.9",
+ "regex-syntax 0.8.10",
]
[[package]]
@@ -658,7 +660,7 @@ checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
dependencies = [
"aho-corasick",
"memchr",
- "regex-syntax 0.8.9",
+ "regex-syntax 0.8.10",
]
[[package]]
@@ -669,9 +671,9 @@ checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
[[package]]
name = "regex-syntax"
-version = "0.8.9"
+version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c"
+checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
[[package]]
name = "rustc-hash"
@@ -743,11 +745,12 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "simple-semantics"
version = "0.1.0"
-source = "git+https://github.com/MichaelGoodale/simple-semantics.git#2cef7d759b37c4a466b068f984bb549ec040b218"
+source = "git+https://github.com/MichaelGoodale/simple-semantics.git#86afc64524a554b08ca693c7911671d919fb6413"
dependencies = [
"ahash 0.8.12",
"chumsky",
"itertools",
+ "rand",
"serde",
"serde_json",
"thiserror",
diff --git a/Cargo.toml b/Cargo.toml
index 59a5b41..6ecf8e0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,6 +9,7 @@ name = "python_mg"
crate-type = ["cdylib"]
[dependencies]
+simple-semantics = { git = "https://github.com/MichaelGoodale/simple-semantics.git" }
pyo3 = { version = "0.28.0", features = [
"anyhow",
"extension-module",
@@ -22,3 +23,4 @@ anyhow = "1.0.98"
logprob = "0.2.1"
rand = "0.10.0"
numpy = "0.28.0"
+itertools = "0.14.0"
diff --git a/docs/source/index.rst b/docs/source/index.rst
index fb4db23..d56d406 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -15,6 +15,7 @@ It provides the tools necessary to generate strings from a Minimalist Grammar an
lexicon
syntax
+ semantics
metrics
diff --git a/docs/source/semantics.rst b/docs/source/semantics.rst
new file mode 100644
index 0000000..3d9b288
--- /dev/null
+++ b/docs/source/semantics.rst
@@ -0,0 +1,25 @@
+Semantics
+=========
+
+These are the classes that allow you to evaluate Language of Thought expressions generated by Semantic lexica.
+
+.. autoclass:: python_mg.semantics.Scenario
+ :members:
+ :undoc-members:
+
+.. autoclass:: python_mg.semantics.Actor
+ :members:
+ :undoc-members:
+
+.. autoclass:: python_mg.semantics.Event
+ :members:
+ :undoc-members:
+
+.. autoclass:: python_mg.semantics.PossibleEvent
+ :members:
+ :undoc-members:
+
+.. autoclass:: python_mg.semantics.ScenarioGenerator
+ :members:
+ :undoc-members:
+
diff --git a/docs/source/syntax.rst b/docs/source/syntax.rst
index 8344093..5dc30e9 100644
--- a/docs/source/syntax.rst
+++ b/docs/source/syntax.rst
@@ -1,5 +1,5 @@
-Syntax tree utilities
-=====================
+Syntax
+======
These are the classes that are useful for manipulating or plotting parse trees directly.
diff --git a/pyproject.toml b/pyproject.toml
index b9f03b4..c7a2072 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,10 +12,10 @@ classifiers = [
]
dynamic = ["version"]
dependencies = [
- "numpy>=1.24.4",
- "pillow>=10.4.0",
- "pydot>=4.0.1",
- "rustworkx>=0.15.1",
+ "numpy>=1.24.4",
+ "pillow>=10.4.0",
+ "pydot>=4.0.1",
+ "rustworkx>=0.15.1",
]
[tool.maturin]
features = ["pyo3/extension-module"]
@@ -24,16 +24,16 @@ module-name = "python_mg._lib_name"
[dependency-groups]
dev = [
- "patchelf>=0.17.2.2",
- "pytest>=8.3.5",
- "sphinx>=7.1.2",
- "sphinx-rtd-theme>=3.0.2",
+ "patchelf>=0.17.2.2",
+ "pytest>=8.3.5",
+ "sphinx>=7.1.2",
+ "sphinx-rtd-theme>=3.0.2",
]
examples = [
- "datasets>=3.1.0",
- "scipy>=1.10.1",
- "torch>=2.5.1",
- "transformers[torch]>=4.46.3",
+ "datasets>=3.1.0",
+ "scipy>=1.10.1",
+ "torch>=2.5.1",
+ "transformers[torch]>=4.46.3",
]
[tool.setuptools.package-data]
@@ -43,4 +43,12 @@ examples = [
where = ["src"]
[tool.uv]
-cache-keys = [{file = "pyproject.toml"}, {file = "Cargo.toml"}, {file = "**/*.rs"}]
+cache-keys = [
+ { file = "pyproject.toml" },
+ { file = "Cargo.toml" },
+ { file = "**/*.rs" },
+]
+
+[tool.ruff.lint]
+ignore = ["E501"]
+select = ["E", "F", "D", "ANN"]
diff --git a/python/python_mg/_lib_name.pyi b/python/python_mg/_lib_name.pyi
index aa96e0c..819fbf1 100644
--- a/python/python_mg/_lib_name.pyi
+++ b/python/python_mg/_lib_name.pyi
@@ -1,4 +1,5 @@
-from typing import Sequence
+import datetime
+from typing import Literal, Sequence
import numpy as np
import numpy.typing as npt
@@ -6,93 +7,57 @@ from python_mg.syntax import ParseTree
class MGNode:
def is_trace(self) -> bool: ...
- def trace_id(self) -> int:
- """Gets the trace id of traces and raises an error otherwise"""
-
- def lemma_string(self) -> str:
- """Format the node as a string in a tree if leaf or trace"""
-
- def is_stolen(self) -> str:
- """Checks if the head has been stolen by head-movement"""
+ def trace_id(self) -> int: ...
+ def lemma_string(self) -> str: ...
+ def is_stolen(self) -> str: ...
class MGEdge:
- def is_move(self) -> bool:
- """Checks whether the edge is a movement edge"""
-
- def is_head_move(self) -> bool:
- """Checks whether the edge is a head-movement edge"""
-
- def is_merge(self) -> bool:
- """Checks whether the edge is a merge edge"""
+ def is_move(self) -> bool: ...
+ def is_head_move(self) -> bool: ...
+ def is_merge(self) -> bool: ...
class SyntacticStructure:
- """A parse tree for some string"""
+ """A parse tree for some string."""
def __init__(self) -> None: ...
- def log_prob(self) -> float:
- """Return the log probability."""
-
- def n_steps(self) -> int:
- """Returns the number of steps in the derivation of this structure"""
-
- def contains_lexical_entry(self, s: str) -> bool:
- """Check if this structure contains a specific lexical entry (formatted as an MG entry, will raise an error if unparseable)"""
-
- def contains_word(self, s: str | None) -> bool:
- """Check if this structure contains a specific word."""
-
- def prob(self) -> float:
- """Return the probability of this syntactic structure."""
-
- def latex(self) -> str:
- """Return a LaTeX representation of this syntactic structure."""
-
- def to_tree(self) -> ParseTree:
- """Converts a syntactic structure into a graph structure"""
-
- def max_memory_load(self) -> int:
- """Gets the largest amount of movers at a single point"""
-
- def tokens(self) -> npt.NDArray[np.uint]:
- """Converts the string of this SyntacticStructure into a tokenized numpy array"""
-
+ def pronunciation(self) -> list[str]: ...
+ def log_prob(self) -> float: ...
+ def n_steps(self) -> int: ...
+ def contains_lexical_entry(self, s: str) -> bool: ...
+ def contains_word(self, s: str | None) -> bool: ...
+ def prob(self) -> float: ...
+ def latex(self) -> str: ...
+ def to_tree(self) -> ParseTree: ...
+ def max_memory_load(self) -> int: ...
+ def tokens(self) -> npt.NDArray[np.uint]: ...
+ @property
+ def meaning(self) -> list[str] | None: ...
def __to_tree_inner(
self,
) -> tuple[list[tuple[int, MGNode]], list[tuple[int, int, MGEdge]], int]: ...
class Continuation:
- """A continuation of a prefix string"""
+ """A continuation of a prefix string."""
def __init__(self, word: str) -> None: ...
@staticmethod
def EOS() -> "Continuation": ...
- def is_end_of_string(self) -> bool:
- """Check if the continuation is a end of string marker"""
-
- def is_word(self) -> bool:
- """Check if the continuation is a word"""
-
- def is_multi_word(self) -> bool:
- """Check if the continuation is an affixed word"""
+ def is_end_of_string(self) -> bool: ...
+ def is_word(self) -> bool: ...
+ def is_multi_word(self) -> bool: ...
class GrammarIterator:
def __iter__(self) -> GrammarIterator: ...
def __next__(self) -> SyntacticStructure: ...
class Lexicon:
- """A Minimalist Grammar Lexicon"""
+ """A Minimalist Grammar Lexicon."""
def __init__(self, s: str) -> None: ...
@staticmethod
- def random_lexicon(lemmas: list[str]) -> "Lexicon":
- """Generate a random lexicon from the list of lemmas"""
-
- def mdl(self, n_phonemes: int) -> float:
- """Returns the model description length of the lexicon"""
-
- def is_semantic(self) -> bool:
- """Returns whether the lexicon has semantic interpretations"""
-
+ def random_lexicon(lemmas: list[str]) -> "Lexicon": ...
+ def mdl(self, n_phonemes: int) -> float: ...
+ def is_semantic(self) -> bool: ...
def continuations(
self,
prefix: str,
@@ -102,9 +67,7 @@ class Lexicon:
max_steps: int | None = 64,
n_beams: int | None = 256,
max_strings: int | None = None,
- ) -> set[Continuation]:
- """Returns a set of all valid continuations from this prefix"""
-
+ ) -> set[Continuation]: ...
def generate_unique_strings(
self,
category: str,
@@ -113,9 +76,7 @@ class Lexicon:
max_steps: int | None = 64,
n_beams: int | None = 256,
max_strings: int | None = None,
- ) -> list[tuple[list[str], float]]:
- """Returns a list of all unique strings and their probabilities"""
-
+ ) -> list[tuple[list[str], float]]: ...
def generate_grammar(
self,
category: str,
@@ -124,9 +85,7 @@ class Lexicon:
max_steps: int | None = 64,
n_beams: int | None = 256,
max_strings: int | None = None,
- ) -> GrammarIterator:
- """Returns an iterator over all possible parses"""
-
+ ) -> GrammarIterator: ...
def parse(
self,
s: str,
@@ -136,11 +95,7 @@ class Lexicon:
max_steps: int | None = 64,
n_beams: int | None = 256,
max_strings: int | None = None,
- ) -> list[SyntacticStructure]:
- """Returns a list of all possible parses of that string.
- The string, s, should be delimited by spaces for words and hyphens for multi-word expressions from head-movement
- """
-
+ ) -> list[SyntacticStructure]: ...
def parse_tokens(
self,
s: Sequence[int] | npt.NDArray[np.uint],
@@ -150,21 +105,13 @@ class Lexicon:
max_steps: int | None = 64,
n_beams: int | None = 256,
max_strings: int | None = None,
- ) -> list[SyntacticStructure]:
- """Returns a list of all possible parses of a string represented by tokens."""
-
- def tokens(self) -> dict[str, int]:
- pass
-
- def detokenize(self, s: Sequence[int] | npt.NDArray[np.uint]) -> list[str]:
- pass
-
+ ) -> list[SyntacticStructure]: ...
+ def tokens(self) -> dict[str, int]: ...
+ def detokenize(self, s: Sequence[int] | npt.NDArray[np.uint]) -> list[str]: ...
def detokenize_batch(
self,
s: Sequence[Sequence[int]] | list[npt.NDArray[np.uint]] | npt.NDArray[np.uint],
- ) -> list[list[str]]:
- pass
-
+ ) -> list[list[str]]: ...
def token_continuations(
self,
x: npt.NDArray[np.uint],
@@ -173,5 +120,68 @@ class Lexicon:
move_prob: float = 0.5,
max_steps: int | None = 64,
n_beams: int | None = 256,
- ) -> npt.NDArray[np.bool]:
- pass
+ ) -> npt.NDArray[np.bool]: ...
+
+class Actor:
+ name: str
+ properties: set[str]
+
+ def __init__(
+ self,
+ name: str,
+ properties: set[str] | None = None,
+ ) -> None: ...
+
+class Event:
+ agent: str | None
+ patient: str | None
+ properties: set[str]
+
+ def __init__(
+ self,
+ agent: str | None = None,
+ patient: str | None = None,
+ properties: set[str] | None = None,
+ ) -> None: ...
+
+class PossibleEvent:
+ has_agent: bool
+ has_patient: bool
+ is_reflexive: bool
+ name: str
+
+ def __init__(
+ self,
+ name: str,
+ has_agent: bool = True,
+ has_patient: bool = False,
+ is_reflexive: bool = True,
+ ) -> None: ...
+ def event_kind(self) -> Literal[
+ "Transitive",
+ "TransitiveNonReflexive",
+ "Unergative",
+ "Unaccusative",
+ "Avalent",
+ ]: ...
+
+class Scenario:
+ actors: list[Actor]
+ events: list[Event]
+ questions: list[str]
+
+ def __init__(self, s: str) -> None: ...
+ def evaluate(
+ self,
+ expression: str,
+ max_steps: int | None = 256,
+ timeout: datetime.timedelta | None = None,
+ ) -> bool | Actor | Event | set[Actor] | set[Event]: ...
+ @staticmethod
+ def all_scenarios(
+ actors: list[str], event_kinds: list[PossibleEvent], actor_properties: list[str]
+ ) -> ScenarioGenerator: ...
+
+class ScenarioGenerator:
+ def __iter__(self) -> ScenarioGenerator: ...
+ def __next__(self) -> Scenario: ...
diff --git a/python/python_mg/metrics.py b/python/python_mg/metrics.py
index f1e1253..4249d07 100644
--- a/python/python_mg/metrics.py
+++ b/python/python_mg/metrics.py
@@ -10,16 +10,18 @@ def grammar_f1(
preds: npt.NDArray[np.float64],
correct: npt.NDArray[np.bool],
) -> dict[str, npt.NDArray[np.float64]]:
- """
- Compute grammar F1 scores from boolean arrays of valid next moves and predictions.
- The metric is described in `Meta-Learning Neural Mechanisms rather than Bayesian Priors `_ (Goodale et al., ACL 2025)
+ """Compute grammar F1 scores from boolean arrays of next moves and predictions.
+
+ The metric is described in `Meta-Learning Neural Mechanisms rather than Bayesian
+ Priors `_ (Goodale et al., ACL 2025)
Parameters
----------
preds : ndarray of float64
Predicted log probabilities for each token. Shape (..., seq_length, vocab_size).
correct: ndarray of int
- Boolean array for each valid token that can come next at that point in the sequence. Shape (..., seq_length, vocab_size).
+ Boolean array for each next valid token in the sequence.
+ Shape (..., seq_length, vocab_size).
Returns
-------
@@ -29,6 +31,7 @@ def grammar_f1(
- 'precision': Precision scores
- 'recall': Recall scores
- 'f1': F1 scores
+
"""
if preds.shape != correct.shape:
raise ValueError("correct and preds must have matching shapes")
@@ -66,14 +69,16 @@ def grammar_f1_from_strings(
n_beams: int | None = 256,
reduction: Literal["none", "sentence_mean", "length_mean"] = "sentence_mean",
) -> dict[str, npt.NDArray[np.float64]]:
- """
- Compute grammar F1 scores from token sequences and predictions.
- The metric is described in `Meta-Learning Neural Mechanisms rather than Bayesian Priors `_ (Goodale et al., ACL 2025)
+ """Compute grammar F1 scores from token sequences and predictions.
+
+ The metric is described in `Meta-Learning Neural Mechanisms rather than Bayesian
+ Priors `_ (Goodale et al., ACL 2025)
Parameters
----------
lexicon : Lexicon
+ the lexicon to use as ground truth for the measurement
tokens : ndarray of int
Token IDs representing the input sequences. Shape (..., seq_length).
preds : ndarray of float64
@@ -108,11 +113,11 @@ def grammar_f1_from_strings(
- 'precision': Precision scores
- 'recall': Recall scores
- 'f1': F1 scores
- """
+ """
if np.any(tokens < 0):
raise ValueError(
- "Some tokens are negative which means they will be cast to unsigned integers incorrectly"
+ "Some tokens are negative meaning they will be cast to unsigned integers incorrectly"
)
conts = lexicon.token_continuations(
diff --git a/python/python_mg/semantics.py b/python/python_mg/semantics.py
new file mode 100644
index 0000000..014a47d
--- /dev/null
+++ b/python/python_mg/semantics.py
@@ -0,0 +1,5 @@
+"""Defines tools related to semantics and interpretation of semantic grammars."""
+
+from python_mg._lib_name import Scenario, Actor, Event, PossibleEvent, ScenarioGenerator
+
+__all__ = ["Scenario", "Actor", "Event", "PossibleEvent", "ScenarioGenerator"]
diff --git a/python/python_mg/syntax.py b/python/python_mg/syntax.py
index d20e524..30790c4 100644
--- a/python/python_mg/syntax.py
+++ b/python/python_mg/syntax.py
@@ -1,3 +1,5 @@
+"""Defines tools related to syntax and viewing trees."""
+
from __future__ import annotations
from dataclasses import dataclass
from python_mg._lib_name import SyntacticStructure, MGNode, MGEdge
@@ -6,14 +8,17 @@
from rustworkx.visualization import graphviz_draw
-def sort_key(G: rx.PyDiGraph[MGNode, MGEdge], e: int) -> int:
- (n, _) = G.get_edge_endpoints_by_index(e)
+def _sort_key(G: rx.PyDiGraph[MGNode, MGEdge], e: int) -> int:
+ n, _ = G.get_edge_endpoints_by_index(e)
return G.get_node_data(n).trace_id()
@dataclass
class Mover:
- """A list of words used to indicate where movement has occurred. See :meth:`python_mg.ParseTree.base_string`"""
+ """A list of words used to indicate where movement has occurred.
+
+ See :meth:`python_mg.ParseTree.base_string`.
+ """
s: list[str | Mover | Trace]
""" The moved words """
@@ -24,13 +29,14 @@ class Mover:
@dataclass
class Trace:
- """A representation of a trace index left by movement"""
+ """A representation of a trace index left by movement."""
trace: int
""" the trace ID """
-def node_attrs(node: MGNode):
+def node_attrs(node: MGNode) -> dict[str, str]:
+ """Get the attributes that defines node styling."""
attrs = {"label": str(node), "ordering": "out"}
if node.is_stolen():
attrs["style"] = "dashed"
@@ -41,77 +47,89 @@ def node_attrs(node: MGNode):
def edge_attrs(edge: MGEdge) -> dict[str, str]:
+ """Get the attributes that defines edge styling."""
if edge.is_move() or edge.is_head_move():
return {"style": "dashed", "constraint": "false"}
return {}
class ParseTree:
- """A class used for ParseTree that is generated by :meth:`python_mg.SyntacticStructure.to_tree`.
- It can be used to get a GraphViz representation of the tree or to investigate the ParseTree as a graph.
+ """A class for ParseTree generated by :meth:`python_mg.SyntacticStructure.to_tree`.
+
+ It can be used to get a GraphViz representation of the tree or to investigate the
+ ParseTree as a graph.
"""
def __init__(
self, G: rx.PyDiGraph[MGNode, MGEdge], root: int, structure: SyntacticStructure
- ):
+ ) -> None:
+ """Make a new ParseTree."""
self.root: int = root
self.structure: SyntacticStructure = structure
movement_edges = sorted(
[x for x in G.filter_edges(lambda x: x.is_move())],
- key=lambda x: sort_key(G, x),
+ key=lambda x: _sort_key(G, x),
reverse=True,
)
movements: dict[int, int] = {}
for e in movement_edges:
- (src, tgt) = G.get_edge_endpoints_by_index(e)
+ src, tgt = G.get_edge_endpoints_by_index(e)
trace_id = G.get_node_data(src).trace_id()
movements[trace_id] = tgt
self.__movement_sources: dict[int, int] = {m: i for i, m in movements.items()}
self.G: rx.PyDiGraph[MGNode, MGEdge] = G
- """PyDiGraph[MGNode, MGEdge]: A `RustworkX `_ PyDiGraph which contains the syntactice structure of a sentence"""
+ """PyDiGraph[MGNode, MGEdge]: A `RustworkX `
+ _ PyDiGraph which contains the syntactice structure of a sentence"""
def normal_string(self) -> str:
- """The string used by a ParseTree
+ """Get the string used by a ParseTree.
Returns
-------
str
the parsed sentence
+
"""
return str(self.structure)
def base_string(self) -> list[str | Mover | Trace]:
- """A richer representation of the parsed string, with traces where movement had occurred, and :meth:`python_mg.Mover` objects to indicated moved phrases.
+ """Get a richer representation of the parsed string.
+
+ This representation has traces where movement had occurred, and
+ :meth:`python_mg.Mover` objects to indicated moved phrases.
Returns
-------
str
the parsed sentence
+
"""
linear_order = self.__explore(self.root)
return linear_order
def to_dot(self) -> str | None:
- """Converts a tree to GraphViz DOT format
+ """Convert a tree to GraphViz DOT format.
Returns
-------
str
The dot file for this tree
+
"""
return self.G.to_dot(node_attr=node_attrs, edge_attr=edge_attrs)
def to_image(self) -> Image.Image:
- """Converts a tree to a PIL Image
+ """Convert a tree to a PIL Image.
Returns
-------
Image
An image representation of the tree
+
"""
return graphviz_draw(
self.G,
@@ -144,15 +162,17 @@ def __explore(self, n_i: int) -> list[str | Mover | Trace]:
def to_tree(self: SyntacticStructure) -> ParseTree:
- """Converts a SyntacticStructure to a ParseTree
+ """Convert a SyntacticStructure to a ParseTree.
Returns
-------
- The SyntacticStructure as a :meth:`python_mg.ParseTree`
+ :meth:`python_mg.ParseTree`
+ The SyntacticStructure as a :meth:`python_mg.ParseTree`
+
"""
- (nodes, edges, root) = self.__to_tree_inner() # pyright: ignore[reportPrivateUsage]
+ nodes, edges, root = self.__to_tree_inner() # pyright: ignore[reportPrivateUsage]
- # This will usually be the identity function, but on the off chance its not, we do this.
+ # This will usually be the identity function, but if not, we do this.
# Waste computation in exchange for not having a horrible headache
old2new: dict[int, int] = {}
diff --git a/python/tests/test_mg.py b/python/tests/test_mg.py
index ec795bd..b8a3172 100644
--- a/python/tests/test_mg.py
+++ b/python/tests/test_mg.py
@@ -1,11 +1,13 @@
-import pytest
+# ruff: disable[D103,D100,E501]
+
import pickle
from python_mg import Lexicon, Continuation
+from python_mg.semantics import Scenario, Actor, Event
from python_mg.syntax import Trace, Mover
-def test_lexicon():
+def test_lexicon() -> None:
x = Lexicon("a::b= a\nb::b")
assert [str(s) for s in x.generate_grammar("a")] == ["a b"]
parse = next(x.generate_grammar("a"))
@@ -15,13 +17,13 @@ def test_lexicon():
)
-def test_pickling():
+def test_pickling() -> None:
x = Lexicon("a::b= a\nb::b")
x_pickle = pickle.dumps(x)
assert pickle.loads(x_pickle) == x
-def test_memory_load():
+def test_memory_load() -> None:
grammar = Lexicon("a::b= c= +a +e C\nb::b -a\nc::c -e")
parse = grammar.parse("c b a", "C")[0]
assert parse.max_memory_load() == 2
@@ -30,16 +32,77 @@ def test_memory_load():
assert parse.max_memory_load() == 1
-def test_semantic_lexicon():
- grammar = """John::d::a_j
-run::=d v::lambda a x some_e(e, pe_run(e), AgentOf(x,e))
-Mary::d::a_m
+def test_generation() -> None:
+ grammar = """John::d::a_John
+runs::=d v::lambda a x some_e(e, pe_run(e), AgentOf(x,e))
+Mary::d::a_Mary
+likes::d= =d v::lambda a x lambda a y some_e(e, pe_likes(e), AgentOf(y,e) & PatientOf(x, e))"""
+ lexicon = Lexicon(grammar)
+ strings = [str(p) for p in lexicon.generate_grammar("v")]
+ assert strings == [
+ "John runs",
+ "Mary runs",
+ "Mary likes John",
+ "John likes John",
+ "John likes Mary",
+ "Mary likes Mary",
+ ]
+
+
+def test_semantic_lexicon() -> None:
+ grammar = """John::d::a_John
+runs::=d v::lambda a x some_e(e, pe_run(e), AgentOf(x,e))
+Mary::d::a_Mary
likes::d= =d v::lambda a x lambda a y some_e(e, pe_likes(e), AgentOf(y,e) & PatientOf(x, e))"""
semantic_lexicon = Lexicon(grammar)
assert semantic_lexicon.is_semantic()
+ s = semantic_lexicon.parse("John likes Mary", "v")
+ assert len(s) == 1
+ parse = s[0]
+ assert parse.meaning is not None
+ assert parse.meaning == [
+ "some_e(x, pe_likes(x), AgentOf(a_John, x) & PatientOf(a_Mary, x))"
+ ]
+ meaning: str = parse.meaning[0]
+
+ s = Scenario(
+ " lambda a x some_e(e, pe_likes(e), AgentOf(x, e)); lambda a x some_e(e, pe_likes(e), PatientOf(x, e))"
+ )
+ assert len(s.questions) == 2
+
+ assert s.evaluate(meaning)
+ answers = [
+ s.evaluate(f"({q})(a_{name})") for q, name in zip(s.questions, ["John", "Mary"])
+ ]
+ assert answers[0]
+ assert answers[1]
+
+
+def test_scenario() -> None:
+ s = Scenario("")
+ assert s.actors == [Actor("John", properties={"nice", "quick"})]
+ assert s.events == [Event(agent="John", properties={"run"})]
+
+ scenarios: list[Scenario] = [
+ x for x in Scenario.all_scenarios(["John", "Mary"], [], ["kind"])
+ ]
+ assert len(scenarios) == 9
+
+ phi = Scenario("").evaluate(
+ "(lambda a x some_e(e, pe_runs(e), AgentOf(x, e)))(a_John)"
+ )
+ assert isinstance(phi, bool)
+ assert phi
+
+ john = Scenario("").evaluate(
+ "iota(x, some_e(e, pe_runs(e), AgentOf(x, e)))"
+ )
+ assert isinstance(john, Actor)
+ assert john.name == "John"
+ assert john.properties == {"cool"}
-def test_trees():
+def test_trees() -> None:
grammar = """
::T= C
::T= +W C
@@ -116,7 +179,7 @@ def test_trees():
assert tree.to_dot() == digraph
-def test_continuations():
+def test_continuations() -> None:
x = Lexicon("a::b= S\nb::b")
assert x.continuations("a", "S") == {Continuation("b")}
x = Lexicon("a::S= b= S\n::S\nb::b")
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index 45ef49e..c4ea774 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,3 +1,3 @@
[toolchain]
profile = "default"
-channel = "1.88.0"
+channel = "1.93.0"
diff --git a/src/graphing.rs b/src/graphing.rs
index 525c3a2..90617c4 100644
--- a/src/graphing.rs
+++ b/src/graphing.rs
@@ -2,6 +2,7 @@ use minimalist_grammar_parser::parsing::rules::{TreeEdge, TreeNode};
use pyo3::{exceptions::PyValueError, prelude::*};
use std::fmt::Display;
+///A node on a tree.
#[pyclass(name = "MGNode", str, eq, frozen)]
#[derive(Debug, PartialEq, Eq)]
pub struct PyMgNode(pub TreeNode<'static, String, String>);
@@ -59,6 +60,7 @@ impl PyMgNode {
}
}
+///A node representing the edge in a tree, whether in merging or movement.
#[pyclass(name = "MGEdge", str, eq, frozen)]
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
pub struct PyMgEdge(pub TreeEdge);
@@ -80,14 +82,32 @@ impl Display for PyMgEdge {
#[pymethods]
impl PyMgEdge {
+ ///Check if the edge is a movement edge.
+ ///
+ ///Returns
+ ///-------
+ ///bool
+ /// Whether it's a movement edge.
fn is_move(&self) -> bool {
matches!(self.0, TreeEdge::Move)
}
+ ///Check if the edge is a head-movement edge.
+ ///
+ ///Returns
+ ///-------
+ ///bool
+ /// Whether it's a head-movement edge.
fn is_head_move(&self) -> bool {
matches!(self.0, TreeEdge::MoveHead)
}
+ ///Check if the edge is a merge edge.
+ ///
+ ///Returns
+ ///-------
+ ///bool
+ /// Whether it's a merge edge.
fn is_merge(&self) -> bool {
matches!(self.0, TreeEdge::Merge(_))
}
diff --git a/src/lib.rs b/src/lib.rs
index 5cb8532..3439ecf 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -7,181 +7,28 @@ use std::{
use anyhow::anyhow;
use logprob::LogProb;
use minimalist_grammar_parser::{
- Generator, ParsingConfig, PhonContent, Pronounciation, RulePool,
+ Generator, ParsingConfig, PhonContent, Pronounciation,
lexicon::{LexemeId, LexicalEntry, Lexicon, SemanticLexicon},
parsing::beam::Continuation,
};
use pyo3::{exceptions::PyValueError, prelude::*};
-mod graphing;
+pub mod graphing;
use graphing::{PyMgEdge, PyMgNode};
-use crate::tokenizers::TokenMap;
-
-#[pyclass(name = "SyntacticStructure", str, eq, frozen)]
-#[derive(Debug)]
-///The representation of a syntactic structure generated by a grammar, or alternatively the result
-///of parsing a string.
-struct PySyntacticStructure {
- prob: LogProb,
- string: Vec>,
- rules: RulePool,
- lex: Py,
-}
-
-impl PartialEq for PySyntacticStructure {
- fn eq(&self, other: &Self) -> bool {
- self.prob == other.prob && self.string == other.string && self.rules == other.rules
- }
-}
-
-impl Display for PySyntacticStructure {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- let len = self.string.len();
- for (i, x) in self.string.iter().enumerate() {
- match x {
- PhonContent::Normal(s) => write!(f, "{s}")?,
- PhonContent::Affixed(items) => write!(f, "{}", items.join("-"))?,
- };
- if i != len - 1 {
- write!(f, " ")?;
- }
- }
- Ok(())
- }
-}
-
-#[pymethods]
-impl PySyntacticStructure {
- ///The log probability of generating this SyntacticStructure using its associated Lexicon.
- ///
- ///Returns
- ///-------
- ///float
- /// the log probability
- fn log_prob(&self) -> f64 {
- self.prob.into_inner()
- }
-
- fn contains_lexical_entry(&self, s: &str) -> PyResult {
- let lex = self.lex.get();
- let entry = LexicalEntry::parse(s).map_err(|e| PyValueError::new_err(e.to_string()))?;
- Ok(lex
- .lexeme_to_id
- .get(&entry)
- .is_some_and(|x| self.rules.used_lemmas().any(|y| &y == x)))
- }
-
- ///The probability of generating this SyntacticStructure using its associated Lexicon.
- ///
- ///Parameters
- ///----------
- ///s : str or None
- /// The word (or empty word) that may or may not be present
- ///
- ///Returns
- ///-------
- ///bool
- /// whether the word is present in the structure
- fn contains_word(&self, mut s: Option<&str>) -> bool {
- let lex = self.lex.get();
- if let Some(s_inner) = &s
- && s_inner.is_empty()
- {
- s = None;
- }
- lex.lemma_to_id
- .get(&s.into())
- .is_some_and(|x| self.rules.used_lemmas().any(|y| x.contains(&y)))
- }
-
- ///The probability of generating this SyntacticStructure using its associated Lexicon.
- ///
- ///Returns
- ///-------
- ///float
- /// the probability of the structure
- fn prob(&self) -> f64 {
- self.prob.into_inner().exp()
- }
-
- ///The number of derivational steps necessary to derive this SyntacticStructure using its Lexicon
- ///
- ///Returns
- ///-------
- ///int
- /// the number of steps
- fn n_steps(&self) -> usize {
- self.rules.n_steps()
- }
-
- ///Turns the SyntacticStructure into a tree that can be rendered with LaTeX.
- ///Requires including `latex-commands.tex `_) in the LaTeX preamble.
- ///
- ///Returns
- ///-------
- ///str
- /// A LaTeX representation of the parse tree
- fn latex(&self) -> String {
- let lex = self.lex.get();
- lex.lexicon
- .lexicon()
- .derivation(self.rules.clone())
- .tree()
- .latex()
- }
-
- ///The maximum number of moving elements stored in memory at one time.
- ///
- ///Returns
- ///-------
- ///int
- /// the maximum number of moved items held in memory in the derivation
- fn max_memory_load(&self) -> usize {
- self.rules.max_memory_load()
- }
-
- #[allow(clippy::type_complexity)]
- fn __to_tree_inner(&self) -> (Vec<(usize, PyMgNode)>, Vec<(usize, usize, PyMgEdge)>, usize) {
- let d = self
- .lex
- .get()
- .lexicon
- .lexicon()
- .derivation(self.rules.clone());
- let tree = d.tree();
- let (g, root) = tree.petgraph();
- let nodes = g
- .node_indices()
- .map(|n| {
- (
- n.index(),
- PyMgNode(
- g.node_weight(n)
- .unwrap()
- .clone()
- .map(|x| x.to_string(), |x| x.to_string()),
- ),
- )
- })
- .collect::>();
-
- let mut edges = g
- .edge_indices()
- .map(|e| {
- let (src, tgt) = g.edge_endpoints(e).unwrap();
- (
- src.index(),
- tgt.index(),
- PyMgEdge(*g.edge_weight(e).unwrap()),
- )
- })
- .collect::>();
-
- edges.sort_by_key(|(_, _, x)| x.0);
- (nodes, edges, root.index())
- }
-}
+mod semantics;
+mod syntax;
+mod tokenizers;
+use syntax::PySyntacticStructure;
+
+use crate::{
+ semantics::{
+ PyPossibleEvent, PyScenarioGenerator,
+ lot_types::{PyActor, PyEvent},
+ scenario::PyScenario,
+ },
+ tokenizers::TokenMap,
+};
#[derive(Debug, Clone, Eq, PartialEq)]
enum PossiblySemanticLexicon {
@@ -228,12 +75,20 @@ impl SelfOwningLexicon {
})
}
- fn lexicon(&self) -> &Lexicon<&'static str, &'static str> {
+ #[expect(clippy::needless_lifetimes)]
+ fn lexicon<'a>(&'a self) -> &'a Lexicon<&'a str, &'a str> {
match &self.lexicon {
PossiblySemanticLexicon::Normal(lexicon) => lexicon,
PossiblySemanticLexicon::Semantic(semantic_lexicon) => semantic_lexicon.lexicon(),
}
}
+
+ fn semantic_lexicon<'a>(&'a self) -> Option<&'a SemanticLexicon<'a, &'a str, &'a str>> {
+ match &self.lexicon {
+ PossiblySemanticLexicon::Normal(_) => None,
+ PossiblySemanticLexicon::Semantic(lex) => Some(lex),
+ }
+ }
}
impl Display for SelfOwningLexicon {
@@ -252,7 +107,58 @@ impl Display for SelfOwningLexicon {
)]
#[derive(Debug, Clone, Eq, PartialEq)]
///A MG grammar that can be used to generate SyntacticStructures or parse strings into
-///SyntacticStructures
+///SyntacticStructures.
+///
+///You may include semantic interpretations or not. You may also generate all valid sentences in the grammar.
+///
+///Parameters
+///----------
+///s : str
+///
+///Raises
+///------
+///ValueError
+/// If the string is not a valid lexicon.
+///
+///Examples
+///--------
+///Generating all sentences of a grammar.
+///
+///.. code-block:: python
+///
+/// grammar = """John::d
+/// runs::=d v
+/// Mary::d
+/// likes::d= =d v"""
+/// lexicon = Lexicon(grammar)
+/// strings = [str(p) for p in lexicon.generate_grammar("v")]
+/// assert strings == [
+/// "John runs",
+/// "Mary runs",
+/// "Mary likes John",
+/// "John likes John",
+/// "John likes Mary",
+/// "Mary likes Mary",
+/// ]
+///
+///Creating a lexicon with interpretations and getting the interpretation of a sentence.
+///
+///.. code-block:: python
+///
+/// grammar = """John::d::a_John
+/// run::=d v::lambda a x some_e(e, pe_run(e), AgentOf(x,e))
+/// Mary::d::a_Mary
+/// likes::d= =d v::lambda a x lambda a y some_e(e, pe_likes(e), AgentOf(y,e) & PatientOf(x, e))"""
+/// semantic_lexicon = Lexicon(grammar)
+/// assert semantic_lexicon.is_semantic()
+/// s = semantic_lexicon.parse("John likes Mary", "v")
+/// assert len(s) == 1
+/// parse = s[0]
+/// assert parse.meaning is not None
+/// assert parse.meaning == [
+/// "some_e(x, pe_likes(x), AgentOf(a_John, x) & PatientOf(a_Mary, x))"
+/// ]
+///
struct PyLexicon {
word_id: TokenMap,
lexeme_to_id: HashMap, LexemeId>,
@@ -262,14 +168,18 @@ struct PyLexicon {
lexicon: SelfOwningLexicon,
}
-mod tokenizers;
-
impl Display for PyLexicon {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "MGLexicon{{\n{}\n}}", self.lexicon)
}
}
+impl PyLexicon {
+ fn semantics<'a>(&'a self) -> Option<&'a SemanticLexicon<'a, &'a str, &'a str>> {
+ self.lexicon.semantic_lexicon()
+ }
+}
+
#[pyclass]
struct GrammarIterator {
generator: Generator, String, String>,
@@ -285,21 +195,21 @@ impl GrammarIterator {
}
fn __next__(mut slf: PyRefMut<'_, Self>) -> Option {
- if let Some(n) = slf.max_strings {
- if slf.n_strings >= n {
- return None;
- }
+ if let Some(n) = slf.max_strings
+ && slf.n_strings >= n
+ {
+ return None;
}
if let Some((prob, string, rules)) = slf.generator.next() {
slf.n_strings += 1;
let py = slf.py();
- Some(PySyntacticStructure {
+ Some(PySyntacticStructure::new(
+ slf.lexicon.clone_ref(py),
prob,
string,
rules,
- lex: slf.lexicon.clone_ref(py),
- })
+ ))
} else {
None
}
@@ -420,21 +330,31 @@ fn get_config(
impl PyLexicon {
fn from_lexicon(lexicon: SelfOwningLexicon) -> PyResult {
+ //unsafe here because the lexicon has the lifetime of the reference of the SelfOwningLexicon.
+ //We are owning it in the arc, so we have to make sure we can refer to it.
+
let lexeme_to_id: HashMap<_, LexemeId> = lexicon
.lexicon()
.lexemes_and_ids()
.map_err(|e| anyhow!(e))?
- .map(|(id, entry)| (entry, id))
+ .map(|(id, entry)| {
+ let entry: LexicalEntry<&'static str, &'static str> =
+ unsafe { std::mem::transmute(entry) };
+ (entry, id)
+ })
.collect();
let mut lemma_to_id = HashMap::default();
let mut word_id = TokenMap::default();
for leaf in lexicon.lexicon().leaves().iter().copied() {
- let lemma = *lexicon
+ let lemma = lexicon
.lexicon()
.leaf_to_lemma(leaf)
.expect("Invalid lexicon!");
+
+ let lemma: Pronounciation<&'static str> = unsafe { std::mem::transmute(*lemma) };
+
if let Pronounciation::Pronounced(word) = lemma.as_ref() {
word_id.add_word(word);
}
@@ -453,7 +373,7 @@ impl PyLexicon {
impl PyLexicon {
#[allow(clippy::too_many_arguments)]
fn inner_parse(
- slf: PyRef<'_, Self>,
+ slf: &Bound<'_, Self>,
s: &[PhonContent<&str>],
category: String,
min_log_prob: Option,
@@ -462,48 +382,27 @@ impl PyLexicon {
n_beams: Option,
max_parses: Option,
) -> PyResult> {
+ let lex = slf.borrow();
let config = get_config(min_log_prob, move_prob, max_steps, n_beams)?;
- let parser = slf
+ let parser = lex
.lexicon
.lexicon()
.parse(s, category.as_str(), &config)
.map_err(|e| PyValueError::new_err(e.to_string()))?;
- let py = slf.py();
- let self_ref: Py = slf.clone().into_pyobject(py).unwrap().into();
+ // let self_ref: Py = slf.clone().into_pyobject(py).unwrap().into();
+
if let Some(max_parses) = max_parses {
Ok(parser
.take(max_parses)
- .map(|(prob, string, rules)| PySyntacticStructure {
- prob,
- rules,
- string: string
- .iter()
- .map(|x| match x {
- PhonContent::Normal(x) => PhonContent::Normal(x.to_string()),
- PhonContent::Affixed(items) => {
- PhonContent::Affixed(items.iter().map(|x| x.to_string()).collect())
- }
- })
- .collect(),
- lex: self_ref.clone_ref(py),
+ .map(|(prob, string, rules)| {
+ PySyntacticStructure::into_syntax_structure(slf, prob, string, rules)
})
.collect())
} else {
Ok(parser
- .map(|(prob, string, rules)| PySyntacticStructure {
- prob,
- rules,
- string: string
- .iter()
- .map(|x| match x {
- PhonContent::Normal(x) => PhonContent::Normal(x.to_string()),
- PhonContent::Affixed(items) => {
- PhonContent::Affixed(items.iter().map(|x| x.to_string()).collect())
- }
- })
- .collect(),
- lex: self_ref.clone_ref(py),
+ .map(|(prob, string, rules)| {
+ PySyntacticStructure::into_syntax_structure(slf, prob, string, rules)
})
.collect())
}
@@ -512,6 +411,7 @@ impl PyLexicon {
#[pymethods]
impl PyLexicon {
+ ///Check if this lexicon has semantics
fn is_semantic(&self) -> bool {
matches!(self.lexicon.lexicon, PossiblySemanticLexicon::Semantic(_))
}
@@ -652,10 +552,10 @@ impl PyLexicon {
})
.or_insert(prob);
- if let Some(max_strings) = max_strings {
- if hashmap.len() > max_strings {
- break;
- }
+ if let Some(max_strings) = max_strings
+ && hashmap.len() > max_strings
+ {
+ break;
}
}
@@ -725,7 +625,7 @@ impl PyLexicon {
})
}
- #[allow(clippy::too_many_arguments)]
+ #[expect(clippy::too_many_arguments)]
#[pyo3(signature = (s, category, min_log_prob=-128.0, move_prob=0.5, max_steps=64, n_beams=256, max_parses=None))]
///Parses a string and returns all found parses in a list
///The string, s, should be delimited by spaces for words and hyphens for multi-word expressions from head-movement
@@ -753,7 +653,7 @@ impl PyLexicon {
///list of SyntacticStructure
/// All found parses of the string.
fn parse(
- slf: PyRef<'_, Self>,
+ slf: &Bound<'_, Self>,
s: &str,
category: String,
min_log_prob: Option,
@@ -790,5 +690,10 @@ fn python_mg(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::()?;
m.add_class::()?;
m.add_class::()?;
+ m.add_class::()?;
+ m.add_class::()?;
+ m.add_class::()?;
+ m.add_class::()?;
+ m.add_class::()?;
Ok(())
}
diff --git a/src/semantics.rs b/src/semantics.rs
new file mode 100644
index 0000000..45f924d
--- /dev/null
+++ b/src/semantics.rs
@@ -0,0 +1,283 @@
+use std::{
+ collections::{BTreeMap, BTreeSet, HashSet},
+ fmt::Display,
+ hash::Hash,
+ sync::Arc,
+ time::Duration,
+};
+
+use itertools::Itertools;
+use pyo3::{IntoPyObjectExt, exceptions::PyValueError, prelude::*};
+use simple_semantics::{
+ Entity, EventType, LanguageResult, PossibleEvent, Scenario, ScenarioIterator, ThetaRoles,
+ lambda::RootedLambdaPool,
+ language::{ExecutionConfig, Expr},
+};
+
+pub mod lot_types;
+use lot_types::{PyActor, PyEvent, convert_to_py_actor, convert_to_py_event};
+pub mod scenario;
+use scenario::PyScenario;
+
+struct LanguageResultWrapper<'a>(LanguageResult<'a>, Scenario<'a>);
+
+impl<'py> IntoPyObject<'py> for LanguageResultWrapper<'_> {
+ type Target = PyAny;
+
+ type Output = Bound<'py, Self::Target>;
+
+ type Error = PyErr;
+
+ fn into_pyobject(self, py: Python<'py>) -> Result {
+ match self.0 {
+ LanguageResult::Bool(bool) => bool.into_bound_py_any(py),
+ LanguageResult::Actor(name) => convert_to_py_actor(name, &self.1).into_bound_py_any(py),
+ LanguageResult::Event(e_i) => convert_to_py_event(e_i, &self.1)?.into_bound_py_any(py),
+ LanguageResult::ActorSet(items) => items
+ .into_iter()
+ .map(|name| convert_to_py_actor(name, &self.1))
+ .collect::>()
+ .into_bound_py_any(py),
+ LanguageResult::EventSet(items) => items
+ .into_iter()
+ .map(|e_i| convert_to_py_event(e_i, &self.1))
+ .collect::, _>>()?
+ .into_bound_py_any(py),
+ }
+ }
+}
+
+impl PyScenario {
+ fn execute<'a>(
+ &'a self,
+ mut expr: RootedLambdaPool<'a, Expr<'a>>,
+ config: Option,
+ ) -> PyResult> {
+ let scenario = self.as_scenario();
+ expr.reduce()
+ .map_err(|e| PyValueError::new_err(e.to_string()))?;
+ expr.cleanup();
+
+ let pool = expr
+ .into_pool()
+ .map_err(|e| PyValueError::new_err(e.to_string()))?;
+
+ let language_result = pool
+ .run(&scenario, config)
+ .map_err(|e| PyValueError::new_err(e.to_string()))?;
+ Ok(LanguageResultWrapper(language_result, scenario))
+ }
+}
+
+#[pymethods]
+impl PyScenario {
+ #[new]
+ fn new(s: String) -> PyResult {
+ let scenario =
+ Scenario::parse(s.as_str()).map_err(|e| PyValueError::new_err(e.to_string()))?;
+ Ok(scenario.into())
+ }
+
+ fn __repr__(&self) -> String {
+ format!("Scenario({self})")
+ }
+
+ ///Executes an language of thought expression in this scenario. Will potentially throw a PresuppositionException if
+ ///something is referenced that isn't in the scenario. It will also reduce any lambda
+ ///expressions if possible, and then will only execute the expression if it is fully reducible.
+ ///
+ ///Parameters
+ ///----------
+ ///expression : str
+ /// The expression in the language of thought to execute.
+ ///max_steps : int or None, optional
+ /// The number of steps in the virtual machine to execute before giving up.
+ /// Default is 256.
+ ///timeout : datetime.timedelta or None, optional
+ /// The amount of time before the execution gives up.
+ /// Default is None
+ ///Returns
+ ///-------
+ ///bool or Actor or Event or set[Actor] or set[Event]
+ /// the value of the expression
+ ///Raises
+ ///------
+ ///ValueError
+ /// If the expression is incorrectly formatted or if there is a presupposition error.
+ #[pyo3(signature = (expression, max_steps=64, timeout=None))]
+ fn evaluate<'a>(
+ &'a self,
+ expression: &'a str,
+ max_steps: Option,
+ timeout: Option,
+ ) -> PyResult> {
+ let expr = RootedLambdaPool::parse(expression)
+ .map_err(|e| PyValueError::new_err(e.to_string()))?;
+ self.execute(expr, Some(ExecutionConfig::new(max_steps, timeout)))
+ }
+
+ ///Creates a generator that goes over all possible scenarios that can be generated according to
+ ///the its parameters. This gets very large very quickly.
+ ///
+ ///Parameters
+ ///----------
+ ///actors : list[str]
+ /// The actors who may or may not be present.
+ ///event_kinds : list[``PossibleEvent``]
+ /// The possible kinds of events
+ ///
+ ///Returns
+ ///-------
+ ///ScenarioGenerator
+ #[staticmethod]
+ fn all_scenarios(
+ actors: Vec,
+ event_kinds: Vec,
+ actor_properties: Vec,
+ ) -> PyScenarioGenerator {
+ let parameter_holder = Arc::new(ParameterHolder {
+ actors,
+ event_kinds,
+ actor_properties,
+ });
+
+ let actors: Vec<&'static str> = parameter_holder
+ .actors
+ .iter()
+ .map(|x| {
+ let s: &'static str = unsafe { std::mem::transmute(x.as_str()) };
+ s
+ })
+ .collect::>();
+ let properties: Vec<&'static str> = parameter_holder
+ .actor_properties
+ .iter()
+ .map(|x| {
+ let s: &'static str = unsafe { std::mem::transmute(x.as_str()) };
+ s
+ })
+ .collect::>();
+
+ let event_kinds: Vec> = parameter_holder
+ .event_kinds
+ .iter()
+ .map(|x| {
+ let x = x.as_possible_event();
+ let x: PossibleEvent<'static> = unsafe { std::mem::transmute(x) };
+ x
+ })
+ .collect::>();
+
+ PyScenarioGenerator {
+ generator: Scenario::all_scenarios(&actors, &event_kinds, &properties),
+ _parameter_holder: parameter_holder,
+ }
+ }
+}
+
+/// A possible linguistic event with theta role structure.
+///
+/// Parameters
+/// ----------
+/// name : str
+/// Identifier for the event.
+/// has_agent : bool, optional
+/// Whether the event has an agent participant. Default is ``True``.
+/// has_patient : bool, optional
+/// Whether the event has a patient participant. Default is ``False``.
+/// is_reflexive : bool, optional
+/// Whether the event allows reflexive construal. Default is ``True``.
+#[pyclass(name = "PossibleEvent", eq, from_py_object)]
+#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
+pub struct PyPossibleEvent {
+ ///Whether the event takes an agent
+ #[pyo3(get, set)]
+ pub has_agent: bool,
+ ///Whether the event takes a patient
+ #[pyo3(get, set)]
+ pub has_patient: bool,
+ ///Whether the event can have the same agent and patient
+ #[pyo3(get, set)]
+ pub is_reflexive: bool,
+ ///The name of this kind of event (e.g. `running` could be a unaccusative event)
+ #[pyo3(get, set)]
+ pub name: String,
+}
+
+#[pymethods]
+impl PyPossibleEvent {
+ #[new]
+ #[pyo3(signature = (name, has_agent=true, has_patient=false, is_reflexive=true))]
+ fn new(name: String, has_agent: bool, has_patient: bool, is_reflexive: bool) -> Self {
+ PyPossibleEvent {
+ name,
+ has_agent,
+ has_patient,
+ is_reflexive,
+ }
+ }
+
+ /// Classify the event based on its argument structure.
+ ///
+ /// Returns
+ /// -------
+ /// Literal['Transitive', 'TransitiveNonReflexive', 'Unergative', 'Unaccusative', 'Avalent'].
+ fn event_type(&self) -> &'static str {
+ match (self.has_agent, self.has_patient) {
+ (true, true) if self.is_reflexive => "Transitive",
+ (true, true) => "TransitiveNonReflexive",
+ (true, false) => "Unergative",
+ (false, true) => "Unaccusative",
+ (false, false) => "Avalent",
+ }
+ }
+}
+
+impl PyPossibleEvent {
+ fn as_event_type(&self) -> EventType {
+ match (self.has_agent, self.has_patient) {
+ (true, true) if self.is_reflexive => EventType::Transitive,
+ (true, true) => EventType::TransitiveNonReflexive,
+ (true, false) => EventType::Unergative,
+ (false, true) => EventType::Unaccusative,
+ (false, false) => EventType::Avalent,
+ }
+ }
+
+ fn as_possible_event<'a>(&'a self) -> PossibleEvent<'a> {
+ PossibleEvent {
+ label: self.name.as_str(),
+ event_type: self.as_event_type(),
+ }
+ }
+}
+
+///Yields
+///------
+///Scenario
+/// Another scenario that can be generated according to the parameters.
+///
+#[pyclass(name = "ScenarioGenerator", eq, from_py_object)]
+#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
+pub struct PyScenarioGenerator {
+ generator: ScenarioIterator<'static>,
+ _parameter_holder: Arc,
+}
+
+#[pymethods]
+impl PyScenarioGenerator {
+ fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
+ slf
+ }
+
+ fn __next__(mut slf: PyRefMut<'_, Self>) -> Option {
+ slf.generator.next().map(|s| s.into())
+ }
+}
+
+#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
+struct ParameterHolder {
+ actors: Vec,
+ event_kinds: Vec,
+ actor_properties: Vec,
+}
diff --git a/src/semantics/lot_types.rs b/src/semantics/lot_types.rs
new file mode 100644
index 0000000..f07834d
--- /dev/null
+++ b/src/semantics/lot_types.rs
@@ -0,0 +1,198 @@
+use super::*;
+
+pub(super) fn convert_to_py_actor(name: &str, scenario: &Scenario<'_>) -> PyActor {
+ PyActor {
+ name: name.to_string(),
+ properties: scenario
+ .properties()
+ .iter()
+ .filter_map(|(prop, entries)| {
+ if entries.contains(&Entity::Actor(name)) {
+ Some(prop.to_string())
+ } else {
+ None
+ }
+ })
+ .collect(),
+ }
+}
+
+pub(super) fn convert_to_py_event(e_i: u8, scenario: &Scenario<'_>) -> Result {
+ let e = scenario
+ .thematic_relations()
+ .get(e_i as usize)
+ .ok_or_else(|| {
+ PyValueError::new_err(format!(
+ "Result is event {e_i}, but no such event exists in the scenario!"
+ ))
+ })?;
+
+ Ok(PyEvent {
+ agent: e.agent.map(|x| x.to_string()),
+ patient: e.patient.map(|x| x.to_string()),
+ properties: scenario
+ .properties()
+ .iter()
+ .filter_map(|(prop, entries)| {
+ if entries.contains(&Entity::Event(e_i)) {
+ Some(prop.to_string())
+ } else {
+ None
+ }
+ })
+ .collect(),
+ })
+}
+
+///Represents an actor with a name and a set of properties to be used in Scenarios.
+///
+///Parameters
+///----------
+///name : str
+/// The name of the actor.
+///properties : set[str], optional
+/// Any properties that apply to the actor. Defaults to an empty set.
+///
+///
+///Examples
+///--------
+///Creating an actor and modifying its properties:
+///
+///.. code-block:: python
+///
+/// actor = Actor("John", properties={"mean", "unfriendly"})
+/// actor.name = "Alice"
+/// actor.properties = {"nice", "friendly"}
+///
+#[pyclass(name = "Actor", eq, str, from_py_object)]
+#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
+pub struct PyActor {
+ /// The name of the actor
+ #[pyo3(get, set)]
+ pub name: String,
+
+ /// An unordered set of properties that apply to this actor
+ #[pyo3(get, set)]
+ pub properties: BTreeSet,
+}
+
+#[pymethods]
+impl PyActor {
+ #[new]
+ #[pyo3(signature = (name, properties=None))]
+ fn new(name: String, properties: Option>) -> Self {
+ PyActor {
+ name,
+ properties: properties.unwrap_or_default(),
+ }
+ }
+
+ fn __repr__(&self) -> String {
+ format!("Actor({self})")
+ }
+}
+
+impl Display for PyActor {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(
+ f,
+ "{}{}{}{}",
+ self.name,
+ if self.properties.is_empty() { "" } else { " (" },
+ self.properties.iter().join(", "),
+ if self.properties.is_empty() { "" } else { ")" },
+ )
+ }
+}
+
+///Represents an event to be used in a Scenario.
+///
+///Parameters
+///----------
+///agent : str, optional
+/// The name of the agent (if there is one)
+///patient : str, optional
+/// The name of the patient (if there is one)
+///properties : set[str], optional
+/// Any properties that apply to the event. Defaults to an empty set.
+///
+///
+///Examples
+///--------
+///Creating an event
+///
+///.. code-block:: python
+///
+/// running = Actor(agent="John", properties={"run", "quickly"})
+///
+#[pyclass(name = "Event", eq, str, from_py_object)]
+#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
+pub struct PyEvent {
+ ///The agent of the event.
+ #[pyo3(get, set)]
+ pub agent: Option,
+
+ ///The patient of the event.
+ #[pyo3(get, set)]
+ pub patient: Option,
+
+ ///Any properties of the event.
+ #[pyo3(get, set)]
+ pub properties: BTreeSet,
+}
+
+#[pymethods]
+impl PyEvent {
+ #[new]
+ #[pyo3(signature = (agent=None, patient=None, properties=None))]
+
+ fn new(
+ agent: Option,
+ patient: Option,
+ properties: Option>,
+ ) -> Self {
+ PyEvent {
+ agent,
+ patient,
+ properties: properties.unwrap_or_default(),
+ }
+ }
+
+ fn __repr__(&self) -> String {
+ format!("Event({self})")
+ }
+}
+
+impl Display for PyEvent {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(
+ f,
+ "{{{}{}{}{}{}{}}}",
+ self.agent
+ .as_deref()
+ .map(|x| format!("A = {x}"))
+ .unwrap_or("".to_string()),
+ if self.patient.is_some() && self.agent.is_some() {
+ ", "
+ } else {
+ ""
+ },
+ self.patient
+ .as_deref()
+ .map(|x| format!("P = {x}"))
+ .unwrap_or("".to_string()),
+ if self.properties.is_empty() { "" } else { " (" },
+ self.properties.iter().join(" "),
+ if self.properties.is_empty() { "" } else { ")" },
+ )
+ }
+}
+
+impl PyEvent {
+ pub fn into_theta_roles<'a>(self: &'a PyEvent) -> ThetaRoles<'a> {
+ ThetaRoles {
+ agent: self.agent.as_deref(),
+ patient: self.patient.as_deref(),
+ }
+ }
+}
diff --git a/src/semantics/scenario.rs b/src/semantics/scenario.rs
new file mode 100644
index 0000000..94beea3
--- /dev/null
+++ b/src/semantics/scenario.rs
@@ -0,0 +1,125 @@
+use super::*;
+
+///Represents a Scenario, a model that meanings are evaluated in.
+///
+///Parameters
+///----------
+///actors : list[Actor]
+/// The actors present in the scenario
+///events: list[Event]
+/// The events happening in the scenario
+///events: list[str]
+/// The questions in a scenario. (Will raise a `ValueError` if set with a `str` which is not a
+/// valid Language of Thought expression)
+#[pyclass(name = "Scenario", str, eq, from_py_object)]
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub struct PyScenario {
+ ///A list of Actors in the scenario
+ #[pyo3(get, set)]
+ actors: Vec,
+ ///A list of Events in the scenario
+ #[pyo3(get, set)]
+ events: Vec,
+
+ ///A list of questions to be asked in the scenario
+ #[pyo3(get)]
+ questions: Vec,
+}
+
+impl From> for PyScenario {
+ fn from(value: Scenario) -> Self {
+ let actors = value
+ .actors()
+ .iter()
+ .map(|x| PyActor {
+ name: x.to_string(),
+ properties: value
+ .properties()
+ .iter()
+ .filter_map(|(k, v)| {
+ if v.contains(&Entity::Actor(x)) {
+ Some(k.to_string())
+ } else {
+ None
+ }
+ })
+ .collect(),
+ })
+ .collect();
+
+ let events = value
+ .thematic_relations()
+ .iter()
+ .enumerate()
+ .map(|(i, x)| PyEvent {
+ agent: x.agent.map(|x| x.to_string()),
+ patient: x.patient.map(|x| x.to_string()),
+ properties: value
+ .properties()
+ .iter()
+ .filter_map(|(k, v)| {
+ if v.contains(&Entity::Event(u8::try_from(i).expect("Too many events!"))) {
+ Some(k.to_string())
+ } else {
+ None
+ }
+ })
+ .collect(),
+ })
+ .collect();
+
+ let questions = value.questions().iter().map(|x| x.to_string()).collect();
+
+ PyScenario {
+ actors,
+ events,
+ questions,
+ }
+ }
+}
+
+#[pymethods]
+impl PyScenario {
+ #[setter]
+ fn set_questions(&mut self, questions: Vec) -> PyResult<()> {
+ for q in &questions {
+ let _ = RootedLambdaPool::parse(q).map_err(|e| PyValueError::new_err(e.to_string()))?;
+ }
+
+ self.questions = questions;
+ Ok(())
+ }
+}
+
+impl Display for PyScenario {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}", self.as_scenario())
+ }
+}
+
+impl PyScenario {
+ pub(super) fn as_scenario<'a>(&'a self) -> Scenario<'a> {
+ let actors = self.actors.iter().map(|x| x.name.as_str()).collect();
+ let thematic_relations = self.events.iter().map(|x| x.into_theta_roles()).collect();
+ let mut properties: BTreeMap<_, Vec<_>> = BTreeMap::new();
+
+ for a in &self.actors {
+ for p in &a.properties {
+ properties
+ .entry(p.as_str())
+ .or_default()
+ .push(Entity::Actor(a.name.as_str()));
+ }
+ }
+ for (i, e) in self.events.iter().enumerate() {
+ for p in &e.properties {
+ properties
+ .entry(p.as_str())
+ .or_default()
+ .push(Entity::Event(u8::try_from(i).expect("Too many events!")));
+ }
+ }
+
+ Scenario::new(actors, thematic_relations, properties)
+ }
+}
diff --git a/src/syntax.rs b/src/syntax.rs
new file mode 100644
index 0000000..b5d0f7d
--- /dev/null
+++ b/src/syntax.rs
@@ -0,0 +1,263 @@
+use std::fmt::Display;
+
+use crate::graphing::{PyMgEdge, PyMgNode};
+
+use super::PyLexicon;
+use logprob::LogProb;
+use minimalist_grammar_parser::{PhonContent, RulePool, lexicon::LexicalEntry};
+use pyo3::{exceptions::PyValueError, prelude::*};
+
+#[pyclass(name = "SyntacticStructure", str, eq, frozen)]
+#[derive(Debug)]
+///The representation of a syntactic structure generated by a grammar, or alternatively the result
+///of parsing a string.
+pub struct PySyntacticStructure {
+ prob: LogProb,
+ string: Vec>,
+ rules: RulePool,
+ meaning: Option>,
+ lex: Py,
+}
+
+impl PartialEq for PySyntacticStructure {
+ fn eq(&self, other: &Self) -> bool {
+ self.prob == other.prob && self.string == other.string && self.rules == other.rules
+ }
+}
+
+impl Display for PySyntacticStructure {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ let len = self.string.len();
+ for (i, x) in self.string.iter().enumerate() {
+ match x {
+ PhonContent::Normal(s) => write!(f, "{s}")?,
+ PhonContent::Affixed(items) => write!(f, "{}", items.join("-"))?,
+ };
+ if i != len - 1 {
+ write!(f, " ")?;
+ }
+ }
+ Ok(())
+ }
+}
+
+impl PySyntacticStructure {
+ pub fn new(
+ lex: Py,
+ prob: LogProb,
+ string: Vec>,
+ rules: RulePool,
+ ) -> PySyntacticStructure {
+ PySyntacticStructure {
+ prob,
+ meaning: lex.get().semantics().map(|lex| {
+ rules
+ .to_interpretation(lex)
+ .map(|(a, _)| a.to_string())
+ .collect()
+ }),
+ rules,
+ string,
+ lex,
+ }
+ }
+
+ pub fn into_syntax_structure(
+ lexicon: &Bound<'_, PyLexicon>,
+ prob: LogProb,
+ string: &[PhonContent<&str>],
+ rules: RulePool,
+ ) -> PySyntacticStructure {
+ PySyntacticStructure {
+ prob,
+ meaning: lexicon.get().semantics().map(|lex| {
+ rules
+ .to_interpretation(lex)
+ .map(|(a, _)| a.to_string())
+ .collect()
+ }),
+ rules,
+ string: string
+ .iter()
+ .map(|x| match x {
+ PhonContent::Normal(x) => PhonContent::Normal(x.to_string()),
+ PhonContent::Affixed(items) => {
+ PhonContent::Affixed(items.iter().map(|x| x.to_string()).collect())
+ }
+ })
+ .collect(),
+ lex: lexicon.as_unbound().clone_ref(lexicon.py()),
+ }
+ }
+
+ pub fn lex(&self) -> &Py {
+ &self.lex
+ }
+
+ pub fn string(&self) -> &Vec> {
+ &self.string
+ }
+}
+
+#[pymethods]
+impl PySyntacticStructure {
+ ///Returns the interpretation of this SyntacticStructure, provided that its associated Lexicon
+ ///has semantics
+ #[getter]
+ fn meaning(&self) -> &Option> {
+ &self.meaning
+ }
+
+ ///The pronunciation of this SyntacticStructure.
+ ///
+ ///Returns
+ ///-------
+ ///list[str]
+ /// A list of strings of each word. Multi-morphemic words are seperated by `-`.
+ fn pronunciation(&self) -> Vec {
+ self.string
+ .iter()
+ .map(|x| x.to_string())
+ .collect::>()
+ }
+
+ ///The log probability of generating this SyntacticStructure using its associated Lexicon.
+ ///
+ ///Returns
+ ///-------
+ ///float
+ /// the log probability
+ fn log_prob(&self) -> f64 {
+ self.prob.into_inner()
+ }
+
+ ///Check whether this string (representing a lexical entry) is used in this tree.
+ ///
+ ///Returns
+ ///-------
+ ///bool
+ /// Whether the lexical entry is used
+ ///
+ ///Raises
+ ///------
+ ///ValueException
+ /// If the lexical entry is not parseable as a lexical entry.
+ fn contains_lexical_entry(&self, s: &str) -> PyResult {
+ let lex = self.lex.get();
+ let entry = LexicalEntry::parse(s).map_err(|e| PyValueError::new_err(e.to_string()))?;
+ Ok(lex
+ .lexeme_to_id
+ .get(&entry)
+ .is_some_and(|x| self.rules.used_lemmas().any(|y| &y == x)))
+ }
+
+ ///The probability of generating this SyntacticStructure using its associated Lexicon.
+ ///
+ ///Parameters
+ ///----------
+ ///s : str or None
+ /// The word (or empty word) that may or may not be present
+ ///
+ ///Returns
+ ///-------
+ ///bool
+ /// whether the word is present in the structure
+ fn contains_word(&self, mut s: Option<&str>) -> bool {
+ let lex = self.lex.get();
+ if let Some(s_inner) = &s
+ && s_inner.is_empty()
+ {
+ s = None;
+ }
+ lex.lemma_to_id
+ .get(&s.into())
+ .is_some_and(|x| self.rules.used_lemmas().any(|y| x.contains(&y)))
+ }
+
+ ///The probability of generating this SyntacticStructure using its associated Lexicon.
+ ///
+ ///Returns
+ ///-------
+ ///float
+ /// the probability of the structure
+ fn prob(&self) -> f64 {
+ self.prob.into_inner().exp()
+ }
+
+ ///The number of derivational steps necessary to derive this SyntacticStructure using its Lexicon
+ ///
+ ///Returns
+ ///-------
+ ///int
+ /// the number of steps
+ fn n_steps(&self) -> usize {
+ self.rules.n_steps()
+ }
+
+ ///Turns the SyntacticStructure into a tree that can be rendered with LaTeX.
+ ///Requires including `latex-commands.tex `_) in the LaTeX preamble.
+ ///
+ ///Returns
+ ///-------
+ ///str
+ /// A LaTeX representation of the parse tree
+ fn latex(&self) -> String {
+ let lex = self.lex.get();
+ lex.lexicon
+ .lexicon()
+ .derivation(self.rules.clone())
+ .tree()
+ .latex()
+ }
+
+ ///The maximum number of moving elements stored in memory at one time.
+ ///
+ ///Returns
+ ///-------
+ ///int
+ /// the maximum number of moved items held in memory in the derivation
+ fn max_memory_load(&self) -> usize {
+ self.rules.max_memory_load()
+ }
+
+ #[allow(clippy::type_complexity)]
+ fn __to_tree_inner(&self) -> (Vec<(usize, PyMgNode)>, Vec<(usize, usize, PyMgEdge)>, usize) {
+ let d = self
+ .lex
+ .get()
+ .lexicon
+ .lexicon()
+ .derivation(self.rules.clone());
+ let tree = d.tree();
+ let (g, root) = tree.petgraph();
+ let nodes = g
+ .node_indices()
+ .map(|n| {
+ (
+ n.index(),
+ PyMgNode(
+ g.node_weight(n)
+ .unwrap()
+ .clone()
+ .map(|x| x.to_string(), |x| x.to_string()),
+ ),
+ )
+ })
+ .collect::>();
+
+ let mut edges = g
+ .edge_indices()
+ .map(|e| {
+ let (src, tgt) = g.edge_endpoints(e).unwrap();
+ (
+ src.index(),
+ tgt.index(),
+ PyMgEdge(*g.edge_weight(e).unwrap()),
+ )
+ })
+ .collect::>();
+
+ edges.sort_by_key(|(_, _, x)| x.0);
+ (nodes, edges, root.index())
+ }
+}
diff --git a/src/tokenizers.rs b/src/tokenizers.rs
index 4b93407..ab66e5d 100644
--- a/src/tokenizers.rs
+++ b/src/tokenizers.rs
@@ -371,7 +371,7 @@ impl PyLexicon {
/// list of :meth:`python_mg.SyntacticStructure`
/// List of all parses of the token string
fn parse_tokens(
- slf: PyRef<'_, Self>,
+ slf: &Bound<'_, Self>,
s: Vec,
category: String,
min_log_prob: Option,
@@ -380,7 +380,7 @@ impl PyLexicon {
n_beams: Option,
max_parses: Option,
) -> PyResult> {
- let v = to_phon_content(&s, &slf.word_id)?;
+ let v = to_phon_content(&s, &slf.borrow().word_id)?;
PyLexicon::inner_parse(
slf,
@@ -411,10 +411,10 @@ impl PySyntacticStructure {
///ndarray of uint
/// the tokenized string.
fn tokens<'py>(slf: PyRef<'py, Self>) -> Bound<'py, PyArray1> {
- let tokens = slf.lex.get().tokens();
+ let tokens = slf.lex().get().tokens();
let mut output = vec![SOS];
- for c in &slf.string {
+ for c in slf.string() {
match c {
PhonContent::Normal(w) => output.push(
*tokens