From 1e2422cf4369c9f9e0ca5a3f14f3c1f1d998744b Mon Sep 17 00:00:00 2001 From: Michael Goodale Date: Mon, 23 Feb 2026 16:38:36 +0100 Subject: [PATCH 01/11] Added way of representing scenarios in python --- Cargo.lock | 6 +- Cargo.toml | 1 + python/python_mg/semantics.py | 1 + python/tests/test_mg.py | 9 ++ src/lib.rs | 12 +- src/semantics.rs | 266 ++++++++++++++++++++++++++++++++++ 6 files changed, 292 insertions(+), 3 deletions(-) create mode 100644 python/python_mg/semantics.py create mode 100644 src/semantics.rs diff --git a/Cargo.lock b/Cargo.lock index 9001283..825db9e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -341,7 +341,7 @@ checksum = "2687e6cf9c00f48e9284cf9fd15f2ef341d03cc7743abf9df4c5f07fdee50b18" [[package]] name = "minimalist-grammar-parser" version = "0.1.0" -source = "git+https://github.com/MichaelGoodale/minimalist-grammar-parser.git#342aac1c51c1c4125e2b7ca27e9d91cde3ac73ed" +source = "git+https://github.com/MichaelGoodale/minimalist-grammar-parser.git#a732350111aa7f49015d9c1f0535f7a87f07e0fe" dependencies = [ "ahash 0.8.12", "bitvec", @@ -562,6 +562,7 @@ dependencies = [ "numpy", "pyo3", "rand", + "simple-semantics", ] [[package]] @@ -743,11 +744,12 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simple-semantics" version = "0.1.0" -source = "git+https://github.com/MichaelGoodale/simple-semantics.git#2cef7d759b37c4a466b068f984bb549ec040b218" +source = "git+https://github.com/MichaelGoodale/simple-semantics.git#9342f11ad7e5c4817e11bbabc644067f5e73a916" dependencies = [ "ahash 0.8.12", "chumsky", "itertools", + "rand", "serde", "serde_json", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index 59a5b41..903c207 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ name = "python_mg" crate-type = ["cdylib"] [dependencies] +simple-semantics = { git = "https://github.com/MichaelGoodale/simple-semantics.git" } pyo3 = { version = "0.28.0", features = [ "anyhow", "extension-module", diff --git a/python/python_mg/semantics.py b/python/python_mg/semantics.py new file mode 100644 index 0000000..b6639ec --- /dev/null +++ b/python/python_mg/semantics.py @@ -0,0 +1 @@ +from python_mg._lib_name import Scenario, Actor, Event, PossibleEvent, ScenarioGenerator diff --git a/python/tests/test_mg.py b/python/tests/test_mg.py index ec795bd..2d96be8 100644 --- a/python/tests/test_mg.py +++ b/python/tests/test_mg.py @@ -2,6 +2,7 @@ import pickle from python_mg import Lexicon, Continuation +from python_mg.semantics import Scenario from python_mg.syntax import Trace, Mover @@ -39,6 +40,14 @@ def test_semantic_lexicon(): assert semantic_lexicon.is_semantic() +def test_scenario(): + Scenario("") + scenarios: list[Scenario] = [ + x for x in Scenario.all_scenarios(["John", "Mary"], [], ["kind"]) + ] + assert len(scenarios) == 9 + + def test_trees(): grammar = """ ::T= C diff --git a/src/lib.rs b/src/lib.rs index 5cb8532..074c3de 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,7 +16,12 @@ use pyo3::{exceptions::PyValueError, prelude::*}; mod graphing; use graphing::{PyMgEdge, PyMgNode}; -use crate::tokenizers::TokenMap; +mod semantics; + +use crate::{ + semantics::{PyActor, PyEvent, PyPossibleEvent, PyScenario, PyScenarioIterator}, + tokenizers::TokenMap, +}; #[pyclass(name = "SyntacticStructure", str, eq, frozen)] #[derive(Debug)] @@ -790,5 +795,10 @@ fn python_mg(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/semantics.rs b/src/semantics.rs new file mode 100644 index 0000000..70212d2 --- /dev/null +++ b/src/semantics.rs @@ -0,0 +1,266 @@ +use std::{collections::BTreeMap, fmt::Display, sync::Arc}; + +use pyo3::{exceptions::PyValueError, prelude::*}; +use simple_semantics::{Entity, EventType, PossibleEvent, Scenario, ScenarioIterator, ThetaRoles}; + +#[pyclass(name = "Scenario", str, eq, from_py_object)] +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct PyScenario { + actors: Vec, + events: Vec, +} + +impl From> for PyScenario { + fn from(value: Scenario) -> Self { + let actors = value + .actors() + .iter() + .map(|x| { + let properties = value + .properties() + .iter() + .filter_map(|(k, v)| { + if v.contains(&Entity::Actor(x)) { + Some(k.to_string()) + } else { + None + } + }) + .collect::>(); + PyActor { + name: x.to_string(), + properties, + } + }) + .collect(); + + let events = value + .thematic_relations() + .iter() + .enumerate() + .map(|(i, x)| { + let properties = value + .properties() + .iter() + .filter_map(|(k, v)| { + if v.contains(&Entity::Event(u8::try_from(i).expect("Too many events!"))) { + Some(k.to_string()) + } else { + None + } + }) + .collect::>(); + PyEvent { + agent: x.agent.map(|x| x.to_string()), + patient: x.patient.map(|x| x.to_string()), + properties, + } + }) + .collect(); + + PyScenario { actors, events } + } +} + +impl Display for PyScenario { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_scenario()) + } +} + +impl PyScenario { + fn as_scenario<'a>(&'a self) -> Scenario<'a> { + let actors = self.actors.iter().map(|x| x.name.as_str()).collect(); + let thematic_relations = self.events.iter().map(|x| x.into_theta_roles()).collect(); + let mut properties: BTreeMap<_, Vec<_>> = BTreeMap::new(); + + for a in &self.actors { + for p in &a.properties { + properties + .entry(p.as_str()) + .or_default() + .push(Entity::Actor(a.name.as_str())); + } + } + for (i, e) in self.events.iter().enumerate() { + for p in &e.properties { + properties + .entry(p.as_str()) + .or_default() + .push(Entity::Event(u8::try_from(i).expect("Too many events!"))); + } + } + + Scenario::new(actors, thematic_relations, properties) + } +} + +#[pymethods] +impl PyScenario { + #[new] + fn new(s: String) -> PyResult { + let scenario = + Scenario::parse(s.as_str()).map_err(|e| PyValueError::new_err(e.to_string()))?; + Ok(scenario.into()) + } + + fn __repr__(&self) -> String { + format!("Scenario({self})") + } + + #[staticmethod] + fn all_scenarios( + actors: Vec, + event_kinds: Vec, + actor_properties: Vec, + ) -> PyScenarioIterator { + let parameter_holder = Arc::new(ParameterHolder { + actors, + event_kinds, + actor_properties, + }); + + let actors: Vec<&'static str> = parameter_holder + .actors + .iter() + .map(|x| { + let s: &'static str = unsafe { std::mem::transmute(x.as_str()) }; + s + }) + .collect::>(); + let properties: Vec<&'static str> = parameter_holder + .actor_properties + .iter() + .map(|x| { + let s: &'static str = unsafe { std::mem::transmute(x.as_str()) }; + s + }) + .collect::>(); + + let event_kinds: Vec> = parameter_holder + .event_kinds + .iter() + .map(|x| { + let x = x.as_possible_event(); + let x: PossibleEvent<'static> = unsafe { std::mem::transmute(x) }; + x + }) + .collect::>(); + + PyScenarioIterator { + generator: Scenario::all_scenarios(&actors, &event_kinds, &properties), + _parameter_holder: parameter_holder, + } + } +} + +#[pyclass(name = "PossibleEvent", eq, from_py_object)] +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct PyPossibleEvent { + #[pyo3(get, set)] + pub has_agent: bool, + #[pyo3(get, set)] + pub has_patient: bool, + pub is_reflexive: bool, + #[pyo3(get, set)] + pub name: String, +} + +impl PyPossibleEvent { + fn event_type(&self) -> EventType { + match (self.has_agent, self.has_patient) { + (true, true) if self.is_reflexive => EventType::Transitive, + (true, true) => EventType::TransitiveNonReflexive, + (true, false) => EventType::Unergative, + (false, true) => EventType::Unaccusative, + (false, false) => EventType::Avalent, + } + } + + fn as_possible_event<'a>(&'a self) -> PossibleEvent<'a> { + PossibleEvent { + label: self.name.as_str(), + event_type: self.event_type(), + } + } +} + +#[pyclass(name = "Actor", eq, str, from_py_object)] +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct PyActor { + #[pyo3(get, set)] + pub name: String, + #[pyo3(get, set)] + pub properties: Vec, +} + +impl Display for PyActor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}{}{}{}", + self.name, + if self.properties.is_empty() { "" } else { " (" }, + self.properties.join(", "), + if self.properties.is_empty() { "" } else { ")" }, + ) + } +} + +#[pyclass(name = "Event", eq, str, from_py_object)] +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct PyEvent { + #[pyo3(get, set)] + pub agent: Option, + #[pyo3(get, set)] + pub patient: Option, + #[pyo3(get, set)] + pub properties: Vec, +} +impl Display for PyEvent { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{{{}{}{}}}", + self.agent.as_deref().unwrap_or(""), + if self.patient.is_some() && self.agent.is_some() { + " " + } else { + "" + }, + self.patient.as_deref().unwrap_or("") + ) + } +} + +impl PyEvent { + pub fn into_theta_roles<'a>(self: &'a PyEvent) -> ThetaRoles<'a> { + ThetaRoles { + agent: self.agent.as_deref(), + patient: self.patient.as_deref(), + } + } +} + +#[pyclass(name = "ScenarioGenerator")] +pub struct PyScenarioIterator { + generator: ScenarioIterator<'static>, + _parameter_holder: Arc, +} + +#[pymethods] +impl PyScenarioIterator { + fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { + slf + } + + fn __next__(mut slf: PyRefMut<'_, Self>) -> Option { + slf.generator.next().map(|s| s.into()) + } +} + +struct ParameterHolder { + actors: Vec, + event_kinds: Vec, + actor_properties: Vec, +} From 8fb36865cf5a4992b992203feefb4759df94fc2f Mon Sep 17 00:00:00 2001 From: Michael Goodale Date: Tue, 24 Feb 2026 12:54:01 +0100 Subject: [PATCH 02/11] Added execution of events --- Cargo.lock | 1 + Cargo.toml | 1 + python/python_mg/_lib_name.pyi | 49 +++++++ python/tests/test_mg.py | 15 +- src/lib.rs | 2 +- src/semantics.rs | 258 +++++++++++++++++++++++++++++---- 6 files changed, 296 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 825db9e..3f24510 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -557,6 +557,7 @@ name = "python-mg" version = "0.1.0" dependencies = [ "anyhow", + "itertools", "logprob", "minimalist-grammar-parser", "numpy", diff --git a/Cargo.toml b/Cargo.toml index 903c207..6ecf8e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,3 +23,4 @@ anyhow = "1.0.98" logprob = "0.2.1" rand = "0.10.0" numpy = "0.28.0" +itertools = "0.14.0" diff --git a/python/python_mg/_lib_name.pyi b/python/python_mg/_lib_name.pyi index aa96e0c..7868d6c 100644 --- a/python/python_mg/_lib_name.pyi +++ b/python/python_mg/_lib_name.pyi @@ -1,3 +1,4 @@ +import datetime from typing import Sequence import numpy as np import numpy.typing as npt @@ -175,3 +176,51 @@ class Lexicon: n_beams: int | None = 256, ) -> npt.NDArray[np.bool]: pass + +class Actor: + name: str + properties: set[str] + + def __init__( + self, + name: str, + properties: set[str] | None = None, + ) -> None: ... + +class Event: + agent: str | None + patient: str | None + properties: set[str] + + def __init__( + self, + agent: str | None = None, + patient: str | None = None, + properties: set[str] | None = None, + ) -> None: ... + +class PossibleEvent: + has_agent: bool + has_patient: bool + is_reflexive: bool + name: str + +class Scenario: + actors: list[Actor] + events: list[Event] + + def __init__(self, s: str) -> None: ... + def evaluate( + self, + expression: str, + max_steps: int | None = 256, + timeout: datetime.timedelta | None = None, + ) -> bool | Actor | Event | set[Actor] | set[Event]: ... + @staticmethod + def all_scenarios( + actors: list[str], event_kinds: list[PossibleEvent], actor_properties: list[str] + ) -> ScenarioIterator: ... + +class ScenarioIterator: + def __iter__(self) -> ScenarioIterator: ... + def __next__(self) -> Scenario: ... diff --git a/python/tests/test_mg.py b/python/tests/test_mg.py index 2d96be8..4c0a499 100644 --- a/python/tests/test_mg.py +++ b/python/tests/test_mg.py @@ -2,7 +2,7 @@ import pickle from python_mg import Lexicon, Continuation -from python_mg.semantics import Scenario +from python_mg.semantics import Scenario, Actor, Event from python_mg.syntax import Trace, Mover @@ -47,6 +47,19 @@ def test_scenario(): ] assert len(scenarios) == 9 + phi = Scenario("").evaluate( + "(lambda a x some_e(e, pe_runs(e), AgentOf(x, e)))(a_John)" + ) + assert isinstance(phi, bool) + assert phi + + john = Scenario("").evaluate( + "iota(x, some_e(e, pe_runs(e), AgentOf(x, e)))" + ) + assert isinstance(john, Actor) + assert john.name == "John" + assert john.properties == {"cool"} + def test_trees(): grammar = """ diff --git a/src/lib.rs b/src/lib.rs index 074c3de..d97ab32 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -730,7 +730,7 @@ impl PyLexicon { }) } - #[allow(clippy::too_many_arguments)] + #[expect(clippy::too_many_arguments)] #[pyo3(signature = (s, category, min_log_prob=-128.0, move_prob=0.5, max_steps=64, n_beams=256, max_parses=None))] ///Parses a string and returns all found parses in a list ///The string, s, should be delimited by spaces for words and hyphens for multi-word expressions from head-movement diff --git a/src/semantics.rs b/src/semantics.rs index 70212d2..5f35e7d 100644 --- a/src/semantics.rs +++ b/src/semantics.rs @@ -1,12 +1,25 @@ -use std::{collections::BTreeMap, fmt::Display, sync::Arc}; +use std::{ + collections::{BTreeMap, BTreeSet, HashSet}, + fmt::Display, + hash::Hash, + sync::Arc, + time::Duration, +}; -use pyo3::{exceptions::PyValueError, prelude::*}; -use simple_semantics::{Entity, EventType, PossibleEvent, Scenario, ScenarioIterator, ThetaRoles}; +use itertools::Itertools; +use pyo3::{IntoPyObjectExt, exceptions::PyValueError, prelude::*}; +use simple_semantics::{ + Entity, EventType, LanguageResult, PossibleEvent, Scenario, ScenarioIterator, ThetaRoles, + lambda::RootedLambdaPool, + language::{ExecutionConfig, Expr}, +}; #[pyclass(name = "Scenario", str, eq, from_py_object)] #[derive(Debug, Clone, Eq, PartialEq)] pub struct PyScenario { + #[pyo3(get, set)] actors: Vec, + #[pyo3(get, set)] events: Vec, } @@ -15,8 +28,9 @@ impl From> for PyScenario { let actors = value .actors() .iter() - .map(|x| { - let properties = value + .map(|x| PyActor { + name: x.to_string(), + properties: value .properties() .iter() .filter_map(|(k, v)| { @@ -26,11 +40,7 @@ impl From> for PyScenario { None } }) - .collect::>(); - PyActor { - name: x.to_string(), - properties, - } + .collect(), }) .collect(); @@ -38,8 +48,10 @@ impl From> for PyScenario { .thematic_relations() .iter() .enumerate() - .map(|(i, x)| { - let properties = value + .map(|(i, x)| PyEvent { + agent: x.agent.map(|x| x.to_string()), + patient: x.patient.map(|x| x.to_string()), + properties: value .properties() .iter() .filter_map(|(k, v)| { @@ -49,12 +61,7 @@ impl From> for PyScenario { None } }) - .collect::>(); - PyEvent { - agent: x.agent.map(|x| x.to_string()), - patient: x.patient.map(|x| x.to_string()), - properties, - } + .collect(), }) .collect(); @@ -95,6 +102,100 @@ impl PyScenario { } } +struct LanguageResultWrapper<'a>(LanguageResult<'a>, Scenario<'a>); + +fn convert_to_py_actor(name: &str, scenario: &Scenario<'_>) -> PyActor { + PyActor { + name: name.to_string(), + properties: scenario + .properties() + .iter() + .filter_map(|(prop, entries)| { + if entries.contains(&Entity::Actor(name)) { + Some(prop.to_string()) + } else { + None + } + }) + .collect(), + } +} + +fn convert_to_py_event(e_i: u8, scenario: &Scenario<'_>) -> Result { + let e = scenario + .thematic_relations() + .get(e_i as usize) + .ok_or_else(|| { + PyValueError::new_err(format!( + "Result is event {e_i}, but no such event exists in the scenario!" + )) + })?; + + Ok(PyEvent { + agent: e.agent.map(|x| x.to_string()), + patient: e.patient.map(|x| x.to_string()), + properties: scenario + .properties() + .iter() + .filter_map(|(prop, entries)| { + if entries.contains(&Entity::Event(e_i)) { + Some(prop.to_string()) + } else { + None + } + }) + .collect(), + }) +} + +impl<'py> IntoPyObject<'py> for LanguageResultWrapper<'_> { + type Target = PyAny; + + type Output = Bound<'py, Self::Target>; + + type Error = PyErr; + + fn into_pyobject(self, py: Python<'py>) -> Result { + match self.0 { + LanguageResult::Bool(bool) => bool.into_bound_py_any(py), + LanguageResult::Actor(name) => convert_to_py_actor(name, &self.1).into_bound_py_any(py), + LanguageResult::Event(e_i) => convert_to_py_event(e_i, &self.1)?.into_bound_py_any(py), + LanguageResult::ActorSet(items) => items + .into_iter() + .map(|name| convert_to_py_actor(name, &self.1)) + .collect::>() + .into_bound_py_any(py), + LanguageResult::EventSet(items) => items + .into_iter() + .map(|e_i| convert_to_py_event(e_i, &self.1)) + .collect::, _>>()? + .into_bound_py_any(py), + } + } +} + +impl PyScenario { + fn execute<'a>( + &'a self, + mut expr: RootedLambdaPool<'a, Expr<'a>>, + config: Option, + ) -> PyResult> { + let scenario = self.as_scenario(); + expr.reduce() + .map_err(|e| PyValueError::new_err(e.to_string()))?; + expr.cleanup(); + + let pool = expr + .into_pool() + .map_err(|e| PyValueError::new_err(e.to_string()))?; + + let language_result = pool + .run(&scenario, config) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + Ok(LanguageResultWrapper(language_result, scenario)) + } +} + #[pymethods] impl PyScenario { #[new] @@ -108,6 +209,48 @@ impl PyScenario { format!("Scenario({self})") } + #[pyo3(signature = (expression, max_steps=64, timeout=None))] + ///Executes an language of thought expression in this scenario. Will potentially throw a PresuppositionException if + ///something is referenced that isn't in the scenario. It will also reduce any lambda + ///expressions if possible, and then will only execute the expression if it is fully reducible. + /// + ///Parameters + ///---------- + ///expression : str + /// The expression in the language of thought to execute. + ///max_steps : int or None, optional + /// The number of steps in the virtual machine to execute before giving up. + /// Default is 256. + ///timeout : datetime.timedelta or None, optional + /// The amount of time before the execution gives up. + /// Default is None + ///Returns + ///------- + ///bool or Actor or Event or set[Actor] or set[Event] + /// The result of the language evaluation, typed according to the + /// expression's return kind: + /// + /// - ``bool`` — a plain boolean value. + /// - ``Actor`` — a single actor resolved from the model. + /// - ``Event`` — a single event resolved from the model. + /// - ``set[Actor]`` — an unordered collection of actors. + /// - ``set[Event]`` — an unordered collection of events. + /// + ///Raises + ///------ + ///PyErr + /// If conversion of an ``Event`` or ``EventSet`` variant fails. + fn evaluate<'a>( + &'a self, + expression: &'a str, + max_steps: Option, + timeout: Option, + ) -> PyResult> { + let expr = RootedLambdaPool::parse(expression) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + self.execute(expr, Some(ExecutionConfig::new(max_steps, timeout))) + } + #[staticmethod] fn all_scenarios( actors: Vec, @@ -155,7 +298,7 @@ impl PyScenario { } #[pyclass(name = "PossibleEvent", eq, from_py_object)] -#[derive(Debug, Clone, Eq, PartialEq)] +#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] pub struct PyPossibleEvent { #[pyo3(get, set)] pub has_agent: bool, @@ -186,12 +329,33 @@ impl PyPossibleEvent { } #[pyclass(name = "Actor", eq, str, from_py_object)] -#[derive(Debug, Clone, Eq, PartialEq)] +#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)] pub struct PyActor { #[pyo3(get, set)] pub name: String, #[pyo3(get, set)] - pub properties: Vec, + pub properties: BTreeSet, +} + +#[pymethods] +impl PyActor { + #[new] + #[pyo3(signature = (name, properties=None))] + ///Parameters + ///---------- + ///name : str + /// The name of the actor. + ///properties: set[str], optional + /// Any properties that apply to the actor. + ///Returns + ///------- + ///Actor + fn new(name: String, properties: Option>) -> Self { + PyActor { + name, + properties: properties.unwrap_or_default(), + } + } } impl Display for PyActor { @@ -201,34 +365,72 @@ impl Display for PyActor { "{}{}{}{}", self.name, if self.properties.is_empty() { "" } else { " (" }, - self.properties.join(", "), + self.properties.iter().join(", "), if self.properties.is_empty() { "" } else { ")" }, ) } } #[pyclass(name = "Event", eq, str, from_py_object)] -#[derive(Debug, Clone, Eq, PartialEq)] +#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] pub struct PyEvent { #[pyo3(get, set)] pub agent: Option, #[pyo3(get, set)] pub patient: Option, #[pyo3(get, set)] - pub properties: Vec, + pub properties: BTreeSet, } + +#[pymethods] +impl PyEvent { + #[new] + #[pyo3(signature = (agent=None, patient=None, properties=None))] + ///Parameters + ///---------- + ///agent : str, optional + /// The name of the agent (if there is one) + ///patient : str, optional + /// The name of the patient (if there is one) + ///properties: set[str], optional + /// Any properties that apply to the actor. + ///Returns + ///------- + ///Event + fn new( + agent: Option, + patient: Option, + properties: Option>, + ) -> Self { + PyEvent { + agent, + patient, + properties: properties.unwrap_or_default(), + } + } +} + impl Display for PyEvent { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, - "{{{}{}{}}}", - self.agent.as_deref().unwrap_or(""), + "{{{}{}{}{}{}{}}}", + self.agent + .as_deref() + .map(|x| format!("A = {x}")) + .unwrap_or("".to_string()), if self.patient.is_some() && self.agent.is_some() { - " " + ", " } else { "" }, - self.patient.as_deref().unwrap_or("") + self.patient + .as_deref() + .map(|x| format!("P = {x}")) + .unwrap_or("".to_string()), + if self.properties.is_empty() { "" } else { " (" }, + self.properties.iter().join(" "), + if self.properties.is_empty() { "" } else { ")" }, ) } } From 79dd52e35b97b88b54c530de8480c42c95ee98b7 Mon Sep 17 00:00:00 2001 From: Michael Goodale Date: Tue, 24 Feb 2026 13:18:47 +0100 Subject: [PATCH 03/11] Restructed code a bit --- src/lib.rs | 6 +- src/semantics.rs | 251 +------------------------------------ src/semantics/lot_types.rs | 164 ++++++++++++++++++++++++ src/semantics/scenario.rs | 89 +++++++++++++ 4 files changed, 262 insertions(+), 248 deletions(-) create mode 100644 src/semantics/lot_types.rs create mode 100644 src/semantics/scenario.rs diff --git a/src/lib.rs b/src/lib.rs index d97ab32..3103ca1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,7 +19,11 @@ use graphing::{PyMgEdge, PyMgNode}; mod semantics; use crate::{ - semantics::{PyActor, PyEvent, PyPossibleEvent, PyScenario, PyScenarioIterator}, + semantics::{ + PyPossibleEvent, PyScenarioIterator, + lot_types::{PyActor, PyEvent}, + scenario::PyScenario, + }, tokenizers::TokenMap, }; diff --git a/src/semantics.rs b/src/semantics.rs index 5f35e7d..c8d63fa 100644 --- a/src/semantics.rs +++ b/src/semantics.rs @@ -14,140 +14,13 @@ use simple_semantics::{ language::{ExecutionConfig, Expr}, }; -#[pyclass(name = "Scenario", str, eq, from_py_object)] -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct PyScenario { - #[pyo3(get, set)] - actors: Vec, - #[pyo3(get, set)] - events: Vec, -} - -impl From> for PyScenario { - fn from(value: Scenario) -> Self { - let actors = value - .actors() - .iter() - .map(|x| PyActor { - name: x.to_string(), - properties: value - .properties() - .iter() - .filter_map(|(k, v)| { - if v.contains(&Entity::Actor(x)) { - Some(k.to_string()) - } else { - None - } - }) - .collect(), - }) - .collect(); - - let events = value - .thematic_relations() - .iter() - .enumerate() - .map(|(i, x)| PyEvent { - agent: x.agent.map(|x| x.to_string()), - patient: x.patient.map(|x| x.to_string()), - properties: value - .properties() - .iter() - .filter_map(|(k, v)| { - if v.contains(&Entity::Event(u8::try_from(i).expect("Too many events!"))) { - Some(k.to_string()) - } else { - None - } - }) - .collect(), - }) - .collect(); - - PyScenario { actors, events } - } -} - -impl Display for PyScenario { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.as_scenario()) - } -} - -impl PyScenario { - fn as_scenario<'a>(&'a self) -> Scenario<'a> { - let actors = self.actors.iter().map(|x| x.name.as_str()).collect(); - let thematic_relations = self.events.iter().map(|x| x.into_theta_roles()).collect(); - let mut properties: BTreeMap<_, Vec<_>> = BTreeMap::new(); - - for a in &self.actors { - for p in &a.properties { - properties - .entry(p.as_str()) - .or_default() - .push(Entity::Actor(a.name.as_str())); - } - } - for (i, e) in self.events.iter().enumerate() { - for p in &e.properties { - properties - .entry(p.as_str()) - .or_default() - .push(Entity::Event(u8::try_from(i).expect("Too many events!"))); - } - } - - Scenario::new(actors, thematic_relations, properties) - } -} +pub mod lot_types; +use lot_types::{PyActor, PyEvent, convert_to_py_actor, convert_to_py_event}; +pub mod scenario; +use scenario::PyScenario; struct LanguageResultWrapper<'a>(LanguageResult<'a>, Scenario<'a>); -fn convert_to_py_actor(name: &str, scenario: &Scenario<'_>) -> PyActor { - PyActor { - name: name.to_string(), - properties: scenario - .properties() - .iter() - .filter_map(|(prop, entries)| { - if entries.contains(&Entity::Actor(name)) { - Some(prop.to_string()) - } else { - None - } - }) - .collect(), - } -} - -fn convert_to_py_event(e_i: u8, scenario: &Scenario<'_>) -> Result { - let e = scenario - .thematic_relations() - .get(e_i as usize) - .ok_or_else(|| { - PyValueError::new_err(format!( - "Result is event {e_i}, but no such event exists in the scenario!" - )) - })?; - - Ok(PyEvent { - agent: e.agent.map(|x| x.to_string()), - patient: e.patient.map(|x| x.to_string()), - properties: scenario - .properties() - .iter() - .filter_map(|(prop, entries)| { - if entries.contains(&Entity::Event(e_i)) { - Some(prop.to_string()) - } else { - None - } - }) - .collect(), - }) -} - impl<'py> IntoPyObject<'py> for LanguageResultWrapper<'_> { type Target = PyAny; @@ -328,122 +201,6 @@ impl PyPossibleEvent { } } -#[pyclass(name = "Actor", eq, str, from_py_object)] -#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)] -pub struct PyActor { - #[pyo3(get, set)] - pub name: String, - #[pyo3(get, set)] - pub properties: BTreeSet, -} - -#[pymethods] -impl PyActor { - #[new] - #[pyo3(signature = (name, properties=None))] - ///Parameters - ///---------- - ///name : str - /// The name of the actor. - ///properties: set[str], optional - /// Any properties that apply to the actor. - ///Returns - ///------- - ///Actor - fn new(name: String, properties: Option>) -> Self { - PyActor { - name, - properties: properties.unwrap_or_default(), - } - } -} - -impl Display for PyActor { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}{}{}{}", - self.name, - if self.properties.is_empty() { "" } else { " (" }, - self.properties.iter().join(", "), - if self.properties.is_empty() { "" } else { ")" }, - ) - } -} - -#[pyclass(name = "Event", eq, str, from_py_object)] -#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] -pub struct PyEvent { - #[pyo3(get, set)] - pub agent: Option, - #[pyo3(get, set)] - pub patient: Option, - #[pyo3(get, set)] - pub properties: BTreeSet, -} - -#[pymethods] -impl PyEvent { - #[new] - #[pyo3(signature = (agent=None, patient=None, properties=None))] - ///Parameters - ///---------- - ///agent : str, optional - /// The name of the agent (if there is one) - ///patient : str, optional - /// The name of the patient (if there is one) - ///properties: set[str], optional - /// Any properties that apply to the actor. - ///Returns - ///------- - ///Event - fn new( - agent: Option, - patient: Option, - properties: Option>, - ) -> Self { - PyEvent { - agent, - patient, - properties: properties.unwrap_or_default(), - } - } -} - -impl Display for PyEvent { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{{{}{}{}{}{}{}}}", - self.agent - .as_deref() - .map(|x| format!("A = {x}")) - .unwrap_or("".to_string()), - if self.patient.is_some() && self.agent.is_some() { - ", " - } else { - "" - }, - self.patient - .as_deref() - .map(|x| format!("P = {x}")) - .unwrap_or("".to_string()), - if self.properties.is_empty() { "" } else { " (" }, - self.properties.iter().join(" "), - if self.properties.is_empty() { "" } else { ")" }, - ) - } -} - -impl PyEvent { - pub fn into_theta_roles<'a>(self: &'a PyEvent) -> ThetaRoles<'a> { - ThetaRoles { - agent: self.agent.as_deref(), - patient: self.patient.as_deref(), - } - } -} - #[pyclass(name = "ScenarioGenerator")] pub struct PyScenarioIterator { generator: ScenarioIterator<'static>, diff --git a/src/semantics/lot_types.rs b/src/semantics/lot_types.rs new file mode 100644 index 0000000..5081c1a --- /dev/null +++ b/src/semantics/lot_types.rs @@ -0,0 +1,164 @@ +use super::*; + + +pub(super) fn convert_to_py_actor(name: &str, scenario: &Scenario<'_>) -> PyActor { + PyActor { + name: name.to_string(), + properties: scenario + .properties() + .iter() + .filter_map(|(prop, entries)| { + if entries.contains(&Entity::Actor(name)) { + Some(prop.to_string()) + } else { + None + } + }) + .collect(), + } +} + +pub(super) fn convert_to_py_event(e_i: u8, scenario: &Scenario<'_>) -> Result { + let e = scenario + .thematic_relations() + .get(e_i as usize) + .ok_or_else(|| { + PyValueError::new_err(format!( + "Result is event {e_i}, but no such event exists in the scenario!" + )) + })?; + + Ok(PyEvent { + agent: e.agent.map(|x| x.to_string()), + patient: e.patient.map(|x| x.to_string()), + properties: scenario + .properties() + .iter() + .filter_map(|(prop, entries)| { + if entries.contains(&Entity::Event(e_i)) { + Some(prop.to_string()) + } else { + None + } + }) + .collect(), + }) +} + + +#[pyclass(name = "Actor", eq, str, from_py_object)] +#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)] +pub struct PyActor { + #[pyo3(get, set)] + pub name: String, + #[pyo3(get, set)] + pub properties: BTreeSet, +} + +#[pymethods] +impl PyActor { + #[new] + #[pyo3(signature = (name, properties=None))] + ///Parameters + ///---------- + ///name : str + /// The name of the actor. + ///properties: set[str], optional + /// Any properties that apply to the actor. + ///Returns + ///------- + ///Actor + fn new(name: String, properties: Option>) -> Self { + PyActor { + name, + properties: properties.unwrap_or_default(), + } + } +} + +impl Display for PyActor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}{}{}{}", + self.name, + if self.properties.is_empty() { "" } else { " (" }, + self.properties.iter().join(", "), + if self.properties.is_empty() { "" } else { ")" }, + ) + } +} + + +#[pyclass(name = "Event", eq, str, from_py_object)] +#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] +pub struct PyEvent { + #[pyo3(get, set)] + pub agent: Option, + #[pyo3(get, set)] + pub patient: Option, + #[pyo3(get, set)] + pub properties: BTreeSet, +} + +#[pymethods] +impl PyEvent { + #[new] + #[pyo3(signature = (agent=None, patient=None, properties=None))] + ///Parameters + ///---------- + ///agent : str, optional + /// The name of the agent (if there is one) + ///patient : str, optional + /// The name of the patient (if there is one) + ///properties: set[str], optional + /// Any properties that apply to the actor. + ///Returns + ///------- + ///Event + fn new( + agent: Option, + patient: Option, + properties: Option>, + ) -> Self { + PyEvent { + agent, + patient, + properties: properties.unwrap_or_default(), + } + } +} + +impl Display for PyEvent { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{{{}{}{}{}{}{}}}", + self.agent + .as_deref() + .map(|x| format!("A = {x}")) + .unwrap_or("".to_string()), + if self.patient.is_some() && self.agent.is_some() { + ", " + } else { + "" + }, + self.patient + .as_deref() + .map(|x| format!("P = {x}")) + .unwrap_or("".to_string()), + if self.properties.is_empty() { "" } else { " (" }, + self.properties.iter().join(" "), + if self.properties.is_empty() { "" } else { ")" }, + ) + } +} + +impl PyEvent { + pub fn into_theta_roles<'a>(self: &'a PyEvent) -> ThetaRoles<'a> { + ThetaRoles { + agent: self.agent.as_deref(), + patient: self.patient.as_deref(), + } + } +} diff --git a/src/semantics/scenario.rs b/src/semantics/scenario.rs new file mode 100644 index 0000000..9497624 --- /dev/null +++ b/src/semantics/scenario.rs @@ -0,0 +1,89 @@ +use super::*; + +#[pyclass(name = "Scenario", str, eq, from_py_object)] +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct PyScenario { + #[pyo3(get, set)] + actors: Vec, + #[pyo3(get, set)] + events: Vec, +} + +impl From> for PyScenario { + fn from(value: Scenario) -> Self { + let actors = value + .actors() + .iter() + .map(|x| PyActor { + name: x.to_string(), + properties: value + .properties() + .iter() + .filter_map(|(k, v)| { + if v.contains(&Entity::Actor(x)) { + Some(k.to_string()) + } else { + None + } + }) + .collect(), + }) + .collect(); + + let events = value + .thematic_relations() + .iter() + .enumerate() + .map(|(i, x)| PyEvent { + agent: x.agent.map(|x| x.to_string()), + patient: x.patient.map(|x| x.to_string()), + properties: value + .properties() + .iter() + .filter_map(|(k, v)| { + if v.contains(&Entity::Event(u8::try_from(i).expect("Too many events!"))) { + Some(k.to_string()) + } else { + None + } + }) + .collect(), + }) + .collect(); + + PyScenario { actors, events } + } +} + +impl Display for PyScenario { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_scenario()) + } +} + +impl PyScenario { + pub(super) fn as_scenario<'a>(&'a self) -> Scenario<'a> { + let actors = self.actors.iter().map(|x| x.name.as_str()).collect(); + let thematic_relations = self.events.iter().map(|x| x.into_theta_roles()).collect(); + let mut properties: BTreeMap<_, Vec<_>> = BTreeMap::new(); + + for a in &self.actors { + for p in &a.properties { + properties + .entry(p.as_str()) + .or_default() + .push(Entity::Actor(a.name.as_str())); + } + } + for (i, e) in self.events.iter().enumerate() { + for p in &e.properties { + properties + .entry(p.as_str()) + .or_default() + .push(Entity::Event(u8::try_from(i).expect("Too many events!"))); + } + } + + Scenario::new(actors, thematic_relations, properties) + } +} From c779a623438bf9f9db08e042bf7b8accf5530848 Mon Sep 17 00:00:00 2001 From: Michael Goodale Date: Tue, 24 Feb 2026 17:05:20 +0100 Subject: [PATCH 04/11] Reorganized code a bit more --- src/lib.rs | 221 +++---------------------------------- src/semantics/lot_types.rs | 3 - src/syntax.rs | 219 ++++++++++++++++++++++++++++++++++++ src/tokenizers.rs | 8 +- 4 files changed, 241 insertions(+), 210 deletions(-) create mode 100644 src/syntax.rs diff --git a/src/lib.rs b/src/lib.rs index 3103ca1..2c5b233 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,16 +7,19 @@ use std::{ use anyhow::anyhow; use logprob::LogProb; use minimalist_grammar_parser::{ - Generator, ParsingConfig, PhonContent, Pronounciation, RulePool, + Generator, ParsingConfig, PhonContent, Pronounciation, lexicon::{LexemeId, LexicalEntry, Lexicon, SemanticLexicon}, parsing::beam::Continuation, }; use pyo3::{exceptions::PyValueError, prelude::*}; -mod graphing; +pub mod graphing; use graphing::{PyMgEdge, PyMgNode}; mod semantics; +mod syntax; +mod tokenizers; +use syntax::PySyntacticStructure; use crate::{ semantics::{ @@ -27,171 +30,6 @@ use crate::{ tokenizers::TokenMap, }; -#[pyclass(name = "SyntacticStructure", str, eq, frozen)] -#[derive(Debug)] -///The representation of a syntactic structure generated by a grammar, or alternatively the result -///of parsing a string. -struct PySyntacticStructure { - prob: LogProb, - string: Vec>, - rules: RulePool, - lex: Py, -} - -impl PartialEq for PySyntacticStructure { - fn eq(&self, other: &Self) -> bool { - self.prob == other.prob && self.string == other.string && self.rules == other.rules - } -} - -impl Display for PySyntacticStructure { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let len = self.string.len(); - for (i, x) in self.string.iter().enumerate() { - match x { - PhonContent::Normal(s) => write!(f, "{s}")?, - PhonContent::Affixed(items) => write!(f, "{}", items.join("-"))?, - }; - if i != len - 1 { - write!(f, " ")?; - } - } - Ok(()) - } -} - -#[pymethods] -impl PySyntacticStructure { - ///The log probability of generating this SyntacticStructure using its associated Lexicon. - /// - ///Returns - ///------- - ///float - /// the log probability - fn log_prob(&self) -> f64 { - self.prob.into_inner() - } - - fn contains_lexical_entry(&self, s: &str) -> PyResult { - let lex = self.lex.get(); - let entry = LexicalEntry::parse(s).map_err(|e| PyValueError::new_err(e.to_string()))?; - Ok(lex - .lexeme_to_id - .get(&entry) - .is_some_and(|x| self.rules.used_lemmas().any(|y| &y == x))) - } - - ///The probability of generating this SyntacticStructure using its associated Lexicon. - /// - ///Parameters - ///---------- - ///s : str or None - /// The word (or empty word) that may or may not be present - /// - ///Returns - ///------- - ///bool - /// whether the word is present in the structure - fn contains_word(&self, mut s: Option<&str>) -> bool { - let lex = self.lex.get(); - if let Some(s_inner) = &s - && s_inner.is_empty() - { - s = None; - } - lex.lemma_to_id - .get(&s.into()) - .is_some_and(|x| self.rules.used_lemmas().any(|y| x.contains(&y))) - } - - ///The probability of generating this SyntacticStructure using its associated Lexicon. - /// - ///Returns - ///------- - ///float - /// the probability of the structure - fn prob(&self) -> f64 { - self.prob.into_inner().exp() - } - - ///The number of derivational steps necessary to derive this SyntacticStructure using its Lexicon - /// - ///Returns - ///------- - ///int - /// the number of steps - fn n_steps(&self) -> usize { - self.rules.n_steps() - } - - ///Turns the SyntacticStructure into a tree that can be rendered with LaTeX. - ///Requires including `latex-commands.tex `_) in the LaTeX preamble. - /// - ///Returns - ///------- - ///str - /// A LaTeX representation of the parse tree - fn latex(&self) -> String { - let lex = self.lex.get(); - lex.lexicon - .lexicon() - .derivation(self.rules.clone()) - .tree() - .latex() - } - - ///The maximum number of moving elements stored in memory at one time. - /// - ///Returns - ///------- - ///int - /// the maximum number of moved items held in memory in the derivation - fn max_memory_load(&self) -> usize { - self.rules.max_memory_load() - } - - #[allow(clippy::type_complexity)] - fn __to_tree_inner(&self) -> (Vec<(usize, PyMgNode)>, Vec<(usize, usize, PyMgEdge)>, usize) { - let d = self - .lex - .get() - .lexicon - .lexicon() - .derivation(self.rules.clone()); - let tree = d.tree(); - let (g, root) = tree.petgraph(); - let nodes = g - .node_indices() - .map(|n| { - ( - n.index(), - PyMgNode( - g.node_weight(n) - .unwrap() - .clone() - .map(|x| x.to_string(), |x| x.to_string()), - ), - ) - }) - .collect::>(); - - let mut edges = g - .edge_indices() - .map(|e| { - let (src, tgt) = g.edge_endpoints(e).unwrap(); - ( - src.index(), - tgt.index(), - PyMgEdge(*g.edge_weight(e).unwrap()), - ) - }) - .collect::>(); - - edges.sort_by_key(|(_, _, x)| x.0); - (nodes, edges, root.index()) - } -} - #[derive(Debug, Clone, Eq, PartialEq)] enum PossiblySemanticLexicon { Normal(Lexicon<&'static str, &'static str>), @@ -271,8 +109,6 @@ struct PyLexicon { lexicon: SelfOwningLexicon, } -mod tokenizers; - impl Display for PyLexicon { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "MGLexicon{{\n{}\n}}", self.lexicon) @@ -303,12 +139,12 @@ impl GrammarIterator { if let Some((prob, string, rules)) = slf.generator.next() { slf.n_strings += 1; let py = slf.py(); - Some(PySyntacticStructure { + Some(PySyntacticStructure::new( + slf.lexicon.clone_ref(py), prob, string, rules, - lex: slf.lexicon.clone_ref(py), - }) + )) } else { None } @@ -462,7 +298,7 @@ impl PyLexicon { impl PyLexicon { #[allow(clippy::too_many_arguments)] fn inner_parse( - slf: PyRef<'_, Self>, + slf: &Bound<'_, Self>, s: &[PhonContent<&str>], category: String, min_log_prob: Option, @@ -471,48 +307,27 @@ impl PyLexicon { n_beams: Option, max_parses: Option, ) -> PyResult> { + let lex = slf.borrow(); let config = get_config(min_log_prob, move_prob, max_steps, n_beams)?; - let parser = slf + let parser = lex .lexicon .lexicon() .parse(s, category.as_str(), &config) .map_err(|e| PyValueError::new_err(e.to_string()))?; - let py = slf.py(); - let self_ref: Py = slf.clone().into_pyobject(py).unwrap().into(); + // let self_ref: Py = slf.clone().into_pyobject(py).unwrap().into(); + if let Some(max_parses) = max_parses { Ok(parser .take(max_parses) - .map(|(prob, string, rules)| PySyntacticStructure { - prob, - rules, - string: string - .iter() - .map(|x| match x { - PhonContent::Normal(x) => PhonContent::Normal(x.to_string()), - PhonContent::Affixed(items) => { - PhonContent::Affixed(items.iter().map(|x| x.to_string()).collect()) - } - }) - .collect(), - lex: self_ref.clone_ref(py), + .map(|(prob, string, rules)| { + PySyntacticStructure::into_syntax_structure(slf, prob, string, rules) }) .collect()) } else { Ok(parser - .map(|(prob, string, rules)| PySyntacticStructure { - prob, - rules, - string: string - .iter() - .map(|x| match x { - PhonContent::Normal(x) => PhonContent::Normal(x.to_string()), - PhonContent::Affixed(items) => { - PhonContent::Affixed(items.iter().map(|x| x.to_string()).collect()) - } - }) - .collect(), - lex: self_ref.clone_ref(py), + .map(|(prob, string, rules)| { + PySyntacticStructure::into_syntax_structure(slf, prob, string, rules) }) .collect()) } @@ -762,7 +577,7 @@ impl PyLexicon { ///list of SyntacticStructure /// All found parses of the string. fn parse( - slf: PyRef<'_, Self>, + slf: &Bound<'_, Self>, s: &str, category: String, min_log_prob: Option, diff --git a/src/semantics/lot_types.rs b/src/semantics/lot_types.rs index 5081c1a..024c741 100644 --- a/src/semantics/lot_types.rs +++ b/src/semantics/lot_types.rs @@ -1,6 +1,5 @@ use super::*; - pub(super) fn convert_to_py_actor(name: &str, scenario: &Scenario<'_>) -> PyActor { PyActor { name: name.to_string(), @@ -45,7 +44,6 @@ pub(super) fn convert_to_py_event(e_i: u8, scenario: &Scenario<'_>) -> Result, + string: Vec>, + rules: RulePool, + lex: Py, +} + +impl PartialEq for PySyntacticStructure { + fn eq(&self, other: &Self) -> bool { + self.prob == other.prob && self.string == other.string && self.rules == other.rules + } +} + +impl Display for PySyntacticStructure { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let len = self.string.len(); + for (i, x) in self.string.iter().enumerate() { + match x { + PhonContent::Normal(s) => write!(f, "{s}")?, + PhonContent::Affixed(items) => write!(f, "{}", items.join("-"))?, + }; + if i != len - 1 { + write!(f, " ")?; + } + } + Ok(()) + } +} + +impl PySyntacticStructure { + pub fn new( + lex: Py, + prob: LogProb, + string: Vec>, + rules: RulePool, + ) -> PySyntacticStructure { + PySyntacticStructure { + prob, + rules, + string, + lex, + } + } + + pub fn into_syntax_structure( + lexicon: &Bound<'_, PyLexicon>, + prob: LogProb, + string: &[PhonContent<&str>], + rules: RulePool, + ) -> PySyntacticStructure { + PySyntacticStructure { + prob, + rules, + string: string + .iter() + .map(|x| match x { + PhonContent::Normal(x) => PhonContent::Normal(x.to_string()), + PhonContent::Affixed(items) => { + PhonContent::Affixed(items.iter().map(|x| x.to_string()).collect()) + } + }) + .collect(), + lex: lexicon.as_unbound().clone_ref(lexicon.py()), + } + } + + pub fn lex(&self) -> &Py { + &self.lex + } + + pub fn string(&self) -> &Vec> { + &self.string + } +} + +#[pymethods] +impl PySyntacticStructure { + ///The log probability of generating this SyntacticStructure using its associated Lexicon. + /// + ///Returns + ///------- + ///float + /// the log probability + fn log_prob(&self) -> f64 { + self.prob.into_inner() + } + + fn contains_lexical_entry(&self, s: &str) -> PyResult { + let lex = self.lex.get(); + let entry = LexicalEntry::parse(s).map_err(|e| PyValueError::new_err(e.to_string()))?; + Ok(lex + .lexeme_to_id + .get(&entry) + .is_some_and(|x| self.rules.used_lemmas().any(|y| &y == x))) + } + + ///The probability of generating this SyntacticStructure using its associated Lexicon. + /// + ///Parameters + ///---------- + ///s : str or None + /// The word (or empty word) that may or may not be present + /// + ///Returns + ///------- + ///bool + /// whether the word is present in the structure + fn contains_word(&self, mut s: Option<&str>) -> bool { + let lex = self.lex.get(); + if let Some(s_inner) = &s + && s_inner.is_empty() + { + s = None; + } + lex.lemma_to_id + .get(&s.into()) + .is_some_and(|x| self.rules.used_lemmas().any(|y| x.contains(&y))) + } + + ///The probability of generating this SyntacticStructure using its associated Lexicon. + /// + ///Returns + ///------- + ///float + /// the probability of the structure + fn prob(&self) -> f64 { + self.prob.into_inner().exp() + } + + ///The number of derivational steps necessary to derive this SyntacticStructure using its Lexicon + /// + ///Returns + ///------- + ///int + /// the number of steps + fn n_steps(&self) -> usize { + self.rules.n_steps() + } + + ///Turns the SyntacticStructure into a tree that can be rendered with LaTeX. + ///Requires including `latex-commands.tex `_) in the LaTeX preamble. + /// + ///Returns + ///------- + ///str + /// A LaTeX representation of the parse tree + fn latex(&self) -> String { + let lex = self.lex.get(); + lex.lexicon + .lexicon() + .derivation(self.rules.clone()) + .tree() + .latex() + } + + ///The maximum number of moving elements stored in memory at one time. + /// + ///Returns + ///------- + ///int + /// the maximum number of moved items held in memory in the derivation + fn max_memory_load(&self) -> usize { + self.rules.max_memory_load() + } + + #[allow(clippy::type_complexity)] + fn __to_tree_inner(&self) -> (Vec<(usize, PyMgNode)>, Vec<(usize, usize, PyMgEdge)>, usize) { + let d = self + .lex + .get() + .lexicon + .lexicon() + .derivation(self.rules.clone()); + let tree = d.tree(); + let (g, root) = tree.petgraph(); + let nodes = g + .node_indices() + .map(|n| { + ( + n.index(), + PyMgNode( + g.node_weight(n) + .unwrap() + .clone() + .map(|x| x.to_string(), |x| x.to_string()), + ), + ) + }) + .collect::>(); + + let mut edges = g + .edge_indices() + .map(|e| { + let (src, tgt) = g.edge_endpoints(e).unwrap(); + ( + src.index(), + tgt.index(), + PyMgEdge(*g.edge_weight(e).unwrap()), + ) + }) + .collect::>(); + + edges.sort_by_key(|(_, _, x)| x.0); + (nodes, edges, root.index()) + } +} diff --git a/src/tokenizers.rs b/src/tokenizers.rs index 4b93407..ab66e5d 100644 --- a/src/tokenizers.rs +++ b/src/tokenizers.rs @@ -371,7 +371,7 @@ impl PyLexicon { /// list of :meth:`python_mg.SyntacticStructure` /// List of all parses of the token string fn parse_tokens( - slf: PyRef<'_, Self>, + slf: &Bound<'_, Self>, s: Vec, category: String, min_log_prob: Option, @@ -380,7 +380,7 @@ impl PyLexicon { n_beams: Option, max_parses: Option, ) -> PyResult> { - let v = to_phon_content(&s, &slf.word_id)?; + let v = to_phon_content(&s, &slf.borrow().word_id)?; PyLexicon::inner_parse( slf, @@ -411,10 +411,10 @@ impl PySyntacticStructure { ///ndarray of uint /// the tokenized string. fn tokens<'py>(slf: PyRef<'py, Self>) -> Bound<'py, PyArray1> { - let tokens = slf.lex.get().tokens(); + let tokens = slf.lex().get().tokens(); let mut output = vec![SOS]; - for c in &slf.string { + for c in slf.string() { match c { PhonContent::Normal(w) => output.push( *tokens From f1fcce5d501bb09dc5252bd5205d8f27d44decb0 Mon Sep 17 00:00:00 2001 From: Michael Goodale Date: Wed, 25 Feb 2026 11:42:02 +0100 Subject: [PATCH 05/11] Added meaning getter and updated toolchain and tests --- Cargo.lock | 8 +++--- python/python_mg/_lib_name.pyi | 4 +++ python/python_mg/semantics.py | 2 ++ python/tests/test_mg.py | 11 +++++++- rust-toolchain.toml | 2 +- src/lib.rs | 46 ++++++++++++++++++++++++++-------- src/semantics/lot_types.rs | 8 ++++++ src/syntax.rs | 20 +++++++++++++++ 8 files changed, 84 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3f24510..69f759c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -638,7 +638,7 @@ dependencies = [ "aho-corasick", "memchr", "regex-automata 0.4.14", - "regex-syntax 0.8.9", + "regex-syntax 0.8.10", ] [[package]] @@ -660,7 +660,7 @@ checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.9", + "regex-syntax 0.8.10", ] [[package]] @@ -671,9 +671,9 @@ checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" [[package]] name = "regex-syntax" -version = "0.8.9" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "rustc-hash" diff --git a/python/python_mg/_lib_name.pyi b/python/python_mg/_lib_name.pyi index 7868d6c..d64dceb 100644 --- a/python/python_mg/_lib_name.pyi +++ b/python/python_mg/_lib_name.pyi @@ -57,6 +57,10 @@ class SyntacticStructure: def tokens(self) -> npt.NDArray[np.uint]: """Converts the string of this SyntacticStructure into a tokenized numpy array""" + @property + def meaning(self) -> list[str] | None: + """Returns the associated meaning(s), if there is one""" + def __to_tree_inner( self, ) -> tuple[list[tuple[int, MGNode]], list[tuple[int, int, MGEdge]], int]: ... diff --git a/python/python_mg/semantics.py b/python/python_mg/semantics.py index b6639ec..e4fb100 100644 --- a/python/python_mg/semantics.py +++ b/python/python_mg/semantics.py @@ -1 +1,3 @@ from python_mg._lib_name import Scenario, Actor, Event, PossibleEvent, ScenarioGenerator + +__all__ = ["Scenario", "Actor", "Event", "PossibleEvent", "ScenarioGenerator"] diff --git a/python/tests/test_mg.py b/python/tests/test_mg.py index 4c0a499..e021e64 100644 --- a/python/tests/test_mg.py +++ b/python/tests/test_mg.py @@ -38,10 +38,19 @@ def test_semantic_lexicon(): likes::d= =d v::lambda a x lambda a y some_e(e, pe_likes(e), AgentOf(y,e) & PatientOf(x, e))""" semantic_lexicon = Lexicon(grammar) assert semantic_lexicon.is_semantic() + s = semantic_lexicon.parse("John likes Mary", "v") + assert len(s) == 1 + parse = s[0] + assert parse.meaning == [ + "some_e(x, pe_likes(x), AgentOf(a_j, x) & PatientOf(a_m, x))" + ] def test_scenario(): - Scenario("") + s = Scenario("") + assert s.actors == [Actor("John", properties={"nice", "quick"})] + assert s.events == [Event(agent="John", properties={"run"})] + scenarios: list[Scenario] = [ x for x in Scenario.all_scenarios(["John", "Mary"], [], ["kind"]) ] diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 45ef49e..c4ea774 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] profile = "default" -channel = "1.88.0" +channel = "1.93.0" diff --git a/src/lib.rs b/src/lib.rs index 2c5b233..f443abc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -75,12 +75,20 @@ impl SelfOwningLexicon { }) } - fn lexicon(&self) -> &Lexicon<&'static str, &'static str> { + #[expect(clippy::needless_lifetimes)] + fn lexicon<'a>(&'a self) -> &'a Lexicon<&'a str, &'a str> { match &self.lexicon { PossiblySemanticLexicon::Normal(lexicon) => lexicon, PossiblySemanticLexicon::Semantic(semantic_lexicon) => semantic_lexicon.lexicon(), } } + + fn semantic_lexicon<'a>(&'a self) -> Option<&'a SemanticLexicon<'a, &'a str, &'a str>> { + match &self.lexicon { + PossiblySemanticLexicon::Normal(_) => None, + PossiblySemanticLexicon::Semantic(lex) => Some(lex), + } + } } impl Display for SelfOwningLexicon { @@ -115,6 +123,12 @@ impl Display for PyLexicon { } } +impl PyLexicon { + fn semantics<'a>(&'a self) -> Option<&'a SemanticLexicon<'a, &'a str, &'a str>> { + self.lexicon.semantic_lexicon() + } +} + #[pyclass] struct GrammarIterator { generator: Generator, String, String>, @@ -130,10 +144,10 @@ impl GrammarIterator { } fn __next__(mut slf: PyRefMut<'_, Self>) -> Option { - if let Some(n) = slf.max_strings { - if slf.n_strings >= n { - return None; - } + if let Some(n) = slf.max_strings + && slf.n_strings >= n + { + return None; } if let Some((prob, string, rules)) = slf.generator.next() { @@ -265,21 +279,31 @@ fn get_config( impl PyLexicon { fn from_lexicon(lexicon: SelfOwningLexicon) -> PyResult { + //unsafe here because the lexicon has the lifetime of the reference of the SelfOwningLexicon. + //We are owning it in the arc, so we have to make sure we can refer to it. + let lexeme_to_id: HashMap<_, LexemeId> = lexicon .lexicon() .lexemes_and_ids() .map_err(|e| anyhow!(e))? - .map(|(id, entry)| (entry, id)) + .map(|(id, entry)| { + let entry: LexicalEntry<&'static str, &'static str> = + unsafe { std::mem::transmute(entry) }; + (entry, id) + }) .collect(); let mut lemma_to_id = HashMap::default(); let mut word_id = TokenMap::default(); for leaf in lexicon.lexicon().leaves().iter().copied() { - let lemma = *lexicon + let lemma = lexicon .lexicon() .leaf_to_lemma(leaf) .expect("Invalid lexicon!"); + + let lemma: Pronounciation<&'static str> = unsafe { std::mem::transmute(*lemma) }; + if let Pronounciation::Pronounced(word) = lemma.as_ref() { word_id.add_word(word); } @@ -476,10 +500,10 @@ impl PyLexicon { }) .or_insert(prob); - if let Some(max_strings) = max_strings { - if hashmap.len() > max_strings { - break; - } + if let Some(max_strings) = max_strings + && hashmap.len() > max_strings + { + break; } } diff --git a/src/semantics/lot_types.rs b/src/semantics/lot_types.rs index 024c741..2b8f634 100644 --- a/src/semantics/lot_types.rs +++ b/src/semantics/lot_types.rs @@ -72,6 +72,10 @@ impl PyActor { properties: properties.unwrap_or_default(), } } + + fn __repr__(&self) -> String { + format!("Actor({self})") + } } impl Display for PyActor { @@ -124,6 +128,10 @@ impl PyEvent { properties: properties.unwrap_or_default(), } } + + fn __repr__(&self) -> String { + format!("Event({self})") + } } impl Display for PyEvent { diff --git a/src/syntax.rs b/src/syntax.rs index 8a9d00d..73b8d2c 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -15,6 +15,7 @@ pub struct PySyntacticStructure { prob: LogProb, string: Vec>, rules: RulePool, + meaning: Option>, lex: Py, } @@ -49,6 +50,12 @@ impl PySyntacticStructure { ) -> PySyntacticStructure { PySyntacticStructure { prob, + meaning: lex.get().semantics().map(|lex| { + rules + .to_interpretation(lex) + .map(|(a, _)| a.to_string()) + .collect() + }), rules, string, lex, @@ -63,6 +70,12 @@ impl PySyntacticStructure { ) -> PySyntacticStructure { PySyntacticStructure { prob, + meaning: lexicon.get().semantics().map(|lex| { + rules + .to_interpretation(lex) + .map(|(a, _)| a.to_string()) + .collect() + }), rules, string: string .iter() @@ -88,6 +101,13 @@ impl PySyntacticStructure { #[pymethods] impl PySyntacticStructure { + ///Returns the interpretation of this SyntacticStructure, provided that its associated Lexicon + ///has semantics + #[getter] + fn meaning(&self) -> &Option> { + &self.meaning + } + ///The log probability of generating this SyntacticStructure using its associated Lexicon. /// ///Returns From fdd28033346adcca3bf059f41c1b4c31698c7fbd Mon Sep 17 00:00:00 2001 From: Michael Goodale Date: Wed, 25 Feb 2026 12:52:22 +0100 Subject: [PATCH 06/11] Added questions and some linting fixes --- Cargo.lock | 4 +- pyproject.toml | 34 +++++---- python/python_mg/_lib_name.pyi | 129 +++++++++------------------------ python/python_mg/metrics.py | 22 +++--- python/python_mg/semantics.py | 2 + python/python_mg/syntax.py | 58 ++++++++++----- python/tests/test_mg.py | 37 +++++++--- src/lib.rs | 4 +- src/semantics.rs | 12 +-- src/semantics/scenario.rs | 24 +++++- 10 files changed, 170 insertions(+), 156 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 69f759c..66d5ffc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -341,7 +341,7 @@ checksum = "2687e6cf9c00f48e9284cf9fd15f2ef341d03cc7743abf9df4c5f07fdee50b18" [[package]] name = "minimalist-grammar-parser" version = "0.1.0" -source = "git+https://github.com/MichaelGoodale/minimalist-grammar-parser.git#a732350111aa7f49015d9c1f0535f7a87f07e0fe" +source = "git+https://github.com/MichaelGoodale/minimalist-grammar-parser.git#23f151d41e8e06218626bf2a16a8a41248317562" dependencies = [ "ahash 0.8.12", "bitvec", @@ -745,7 +745,7 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simple-semantics" version = "0.1.0" -source = "git+https://github.com/MichaelGoodale/simple-semantics.git#9342f11ad7e5c4817e11bbabc644067f5e73a916" +source = "git+https://github.com/MichaelGoodale/simple-semantics.git#86afc64524a554b08ca693c7911671d919fb6413" dependencies = [ "ahash 0.8.12", "chumsky", diff --git a/pyproject.toml b/pyproject.toml index b9f03b4..c7a2072 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,10 +12,10 @@ classifiers = [ ] dynamic = ["version"] dependencies = [ - "numpy>=1.24.4", - "pillow>=10.4.0", - "pydot>=4.0.1", - "rustworkx>=0.15.1", + "numpy>=1.24.4", + "pillow>=10.4.0", + "pydot>=4.0.1", + "rustworkx>=0.15.1", ] [tool.maturin] features = ["pyo3/extension-module"] @@ -24,16 +24,16 @@ module-name = "python_mg._lib_name" [dependency-groups] dev = [ - "patchelf>=0.17.2.2", - "pytest>=8.3.5", - "sphinx>=7.1.2", - "sphinx-rtd-theme>=3.0.2", + "patchelf>=0.17.2.2", + "pytest>=8.3.5", + "sphinx>=7.1.2", + "sphinx-rtd-theme>=3.0.2", ] examples = [ - "datasets>=3.1.0", - "scipy>=1.10.1", - "torch>=2.5.1", - "transformers[torch]>=4.46.3", + "datasets>=3.1.0", + "scipy>=1.10.1", + "torch>=2.5.1", + "transformers[torch]>=4.46.3", ] [tool.setuptools.package-data] @@ -43,4 +43,12 @@ examples = [ where = ["src"] [tool.uv] -cache-keys = [{file = "pyproject.toml"}, {file = "Cargo.toml"}, {file = "**/*.rs"}] +cache-keys = [ + { file = "pyproject.toml" }, + { file = "Cargo.toml" }, + { file = "**/*.rs" }, +] + +[tool.ruff.lint] +ignore = ["E501"] +select = ["E", "F", "D", "ANN"] diff --git a/python/python_mg/_lib_name.pyi b/python/python_mg/_lib_name.pyi index d64dceb..9370764 100644 --- a/python/python_mg/_lib_name.pyi +++ b/python/python_mg/_lib_name.pyi @@ -7,97 +7,56 @@ from python_mg.syntax import ParseTree class MGNode: def is_trace(self) -> bool: ... - def trace_id(self) -> int: - """Gets the trace id of traces and raises an error otherwise""" - - def lemma_string(self) -> str: - """Format the node as a string in a tree if leaf or trace""" - - def is_stolen(self) -> str: - """Checks if the head has been stolen by head-movement""" + def trace_id(self) -> int: ... + def lemma_string(self) -> str: ... + def is_stolen(self) -> str: ... class MGEdge: - def is_move(self) -> bool: - """Checks whether the edge is a movement edge""" - - def is_head_move(self) -> bool: - """Checks whether the edge is a head-movement edge""" - - def is_merge(self) -> bool: - """Checks whether the edge is a merge edge""" + def is_move(self) -> bool: ... + def is_head_move(self) -> bool: ... + def is_merge(self) -> bool: ... class SyntacticStructure: - """A parse tree for some string""" + """A parse tree for some string.""" def __init__(self) -> None: ... - def log_prob(self) -> float: - """Return the log probability.""" - - def n_steps(self) -> int: - """Returns the number of steps in the derivation of this structure""" - - def contains_lexical_entry(self, s: str) -> bool: - """Check if this structure contains a specific lexical entry (formatted as an MG entry, will raise an error if unparseable)""" - - def contains_word(self, s: str | None) -> bool: - """Check if this structure contains a specific word.""" - - def prob(self) -> float: - """Return the probability of this syntactic structure.""" - - def latex(self) -> str: - """Return a LaTeX representation of this syntactic structure.""" - - def to_tree(self) -> ParseTree: - """Converts a syntactic structure into a graph structure""" - - def max_memory_load(self) -> int: - """Gets the largest amount of movers at a single point""" - - def tokens(self) -> npt.NDArray[np.uint]: - """Converts the string of this SyntacticStructure into a tokenized numpy array""" - + def log_prob(self) -> float: ... + def n_steps(self) -> int: ... + def contains_lexical_entry(self, s: str) -> bool: ... + def contains_word(self, s: str | None) -> bool: ... + def prob(self) -> float: ... + def latex(self) -> str: ... + def to_tree(self) -> ParseTree: ... + def max_memory_load(self) -> int: ... + def tokens(self) -> npt.NDArray[np.uint]: ... @property - def meaning(self) -> list[str] | None: - """Returns the associated meaning(s), if there is one""" - + def meaning(self) -> list[str] | None: ... def __to_tree_inner( self, ) -> tuple[list[tuple[int, MGNode]], list[tuple[int, int, MGEdge]], int]: ... class Continuation: - """A continuation of a prefix string""" + """A continuation of a prefix string.""" def __init__(self, word: str) -> None: ... @staticmethod def EOS() -> "Continuation": ... - def is_end_of_string(self) -> bool: - """Check if the continuation is a end of string marker""" - - def is_word(self) -> bool: - """Check if the continuation is a word""" - - def is_multi_word(self) -> bool: - """Check if the continuation is an affixed word""" + def is_end_of_string(self) -> bool: ... + def is_word(self) -> bool: ... + def is_multi_word(self) -> bool: ... class GrammarIterator: def __iter__(self) -> GrammarIterator: ... def __next__(self) -> SyntacticStructure: ... class Lexicon: - """A Minimalist Grammar Lexicon""" + """A Minimalist Grammar Lexicon.""" def __init__(self, s: str) -> None: ... @staticmethod - def random_lexicon(lemmas: list[str]) -> "Lexicon": - """Generate a random lexicon from the list of lemmas""" - - def mdl(self, n_phonemes: int) -> float: - """Returns the model description length of the lexicon""" - - def is_semantic(self) -> bool: - """Returns whether the lexicon has semantic interpretations""" - + def random_lexicon(lemmas: list[str]) -> "Lexicon": ... + def mdl(self, n_phonemes: int) -> float: ... + def is_semantic(self) -> bool: ... def continuations( self, prefix: str, @@ -107,9 +66,7 @@ class Lexicon: max_steps: int | None = 64, n_beams: int | None = 256, max_strings: int | None = None, - ) -> set[Continuation]: - """Returns a set of all valid continuations from this prefix""" - + ) -> set[Continuation]: ... def generate_unique_strings( self, category: str, @@ -118,9 +75,7 @@ class Lexicon: max_steps: int | None = 64, n_beams: int | None = 256, max_strings: int | None = None, - ) -> list[tuple[list[str], float]]: - """Returns a list of all unique strings and their probabilities""" - + ) -> list[tuple[list[str], float]]: ... def generate_grammar( self, category: str, @@ -129,9 +84,7 @@ class Lexicon: max_steps: int | None = 64, n_beams: int | None = 256, max_strings: int | None = None, - ) -> GrammarIterator: - """Returns an iterator over all possible parses""" - + ) -> GrammarIterator: ... def parse( self, s: str, @@ -141,11 +94,7 @@ class Lexicon: max_steps: int | None = 64, n_beams: int | None = 256, max_strings: int | None = None, - ) -> list[SyntacticStructure]: - """Returns a list of all possible parses of that string. - The string, s, should be delimited by spaces for words and hyphens for multi-word expressions from head-movement - """ - + ) -> list[SyntacticStructure]: ... def parse_tokens( self, s: Sequence[int] | npt.NDArray[np.uint], @@ -155,21 +104,13 @@ class Lexicon: max_steps: int | None = 64, n_beams: int | None = 256, max_strings: int | None = None, - ) -> list[SyntacticStructure]: - """Returns a list of all possible parses of a string represented by tokens.""" - - def tokens(self) -> dict[str, int]: - pass - - def detokenize(self, s: Sequence[int] | npt.NDArray[np.uint]) -> list[str]: - pass - + ) -> list[SyntacticStructure]: ... + def tokens(self) -> dict[str, int]: ... + def detokenize(self, s: Sequence[int] | npt.NDArray[np.uint]) -> list[str]: ... def detokenize_batch( self, s: Sequence[Sequence[int]] | list[npt.NDArray[np.uint]] | npt.NDArray[np.uint], - ) -> list[list[str]]: - pass - + ) -> list[list[str]]: ... def token_continuations( self, x: npt.NDArray[np.uint], @@ -178,8 +119,7 @@ class Lexicon: move_prob: float = 0.5, max_steps: int | None = 64, n_beams: int | None = 256, - ) -> npt.NDArray[np.bool]: - pass + ) -> npt.NDArray[np.bool]: ... class Actor: name: str @@ -212,6 +152,7 @@ class PossibleEvent: class Scenario: actors: list[Actor] events: list[Event] + questions: list[str] def __init__(self, s: str) -> None: ... def evaluate( diff --git a/python/python_mg/metrics.py b/python/python_mg/metrics.py index f1e1253..ba0608f 100644 --- a/python/python_mg/metrics.py +++ b/python/python_mg/metrics.py @@ -10,16 +10,18 @@ def grammar_f1( preds: npt.NDArray[np.float64], correct: npt.NDArray[np.bool], ) -> dict[str, npt.NDArray[np.float64]]: - """ - Compute grammar F1 scores from boolean arrays of valid next moves and predictions. - The metric is described in `Meta-Learning Neural Mechanisms rather than Bayesian Priors `_ (Goodale et al., ACL 2025) + """Compute grammar F1 scores from boolean arrays of next moves and predictions. + + The metric is described in `Meta-Learning Neural Mechanisms rather than Bayesian + Priors `_ (Goodale et al., ACL 2025) Parameters ---------- preds : ndarray of float64 Predicted log probabilities for each token. Shape (..., seq_length, vocab_size). correct: ndarray of int - Boolean array for each valid token that can come next at that point in the sequence. Shape (..., seq_length, vocab_size). + Boolean array for each next valid token in the sequence. + Shape (..., seq_length, vocab_size). Returns ------- @@ -29,6 +31,7 @@ def grammar_f1( - 'precision': Precision scores - 'recall': Recall scores - 'f1': F1 scores + """ if preds.shape != correct.shape: raise ValueError("correct and preds must have matching shapes") @@ -66,9 +69,10 @@ def grammar_f1_from_strings( n_beams: int | None = 256, reduction: Literal["none", "sentence_mean", "length_mean"] = "sentence_mean", ) -> dict[str, npt.NDArray[np.float64]]: - """ - Compute grammar F1 scores from token sequences and predictions. - The metric is described in `Meta-Learning Neural Mechanisms rather than Bayesian Priors `_ (Goodale et al., ACL 2025) + """Compute grammar F1 scores from token sequences and predictions. + + The metric is described in `Meta-Learning Neural Mechanisms rather than Bayesian + Priors `_ (Goodale et al., ACL 2025) Parameters @@ -108,11 +112,11 @@ def grammar_f1_from_strings( - 'precision': Precision scores - 'recall': Recall scores - 'f1': F1 scores - """ + """ if np.any(tokens < 0): raise ValueError( - "Some tokens are negative which means they will be cast to unsigned integers incorrectly" + "Some tokens are negative meaning they will be cast to unsigned integers incorrectly" ) conts = lexicon.token_continuations( diff --git a/python/python_mg/semantics.py b/python/python_mg/semantics.py index e4fb100..014a47d 100644 --- a/python/python_mg/semantics.py +++ b/python/python_mg/semantics.py @@ -1,3 +1,5 @@ +"""Defines tools related to semantics and interpretation of semantic grammars.""" + from python_mg._lib_name import Scenario, Actor, Event, PossibleEvent, ScenarioGenerator __all__ = ["Scenario", "Actor", "Event", "PossibleEvent", "ScenarioGenerator"] diff --git a/python/python_mg/syntax.py b/python/python_mg/syntax.py index d20e524..30790c4 100644 --- a/python/python_mg/syntax.py +++ b/python/python_mg/syntax.py @@ -1,3 +1,5 @@ +"""Defines tools related to syntax and viewing trees.""" + from __future__ import annotations from dataclasses import dataclass from python_mg._lib_name import SyntacticStructure, MGNode, MGEdge @@ -6,14 +8,17 @@ from rustworkx.visualization import graphviz_draw -def sort_key(G: rx.PyDiGraph[MGNode, MGEdge], e: int) -> int: - (n, _) = G.get_edge_endpoints_by_index(e) +def _sort_key(G: rx.PyDiGraph[MGNode, MGEdge], e: int) -> int: + n, _ = G.get_edge_endpoints_by_index(e) return G.get_node_data(n).trace_id() @dataclass class Mover: - """A list of words used to indicate where movement has occurred. See :meth:`python_mg.ParseTree.base_string`""" + """A list of words used to indicate where movement has occurred. + + See :meth:`python_mg.ParseTree.base_string`. + """ s: list[str | Mover | Trace] """ The moved words """ @@ -24,13 +29,14 @@ class Mover: @dataclass class Trace: - """A representation of a trace index left by movement""" + """A representation of a trace index left by movement.""" trace: int """ the trace ID """ -def node_attrs(node: MGNode): +def node_attrs(node: MGNode) -> dict[str, str]: + """Get the attributes that defines node styling.""" attrs = {"label": str(node), "ordering": "out"} if node.is_stolen(): attrs["style"] = "dashed" @@ -41,77 +47,89 @@ def node_attrs(node: MGNode): def edge_attrs(edge: MGEdge) -> dict[str, str]: + """Get the attributes that defines edge styling.""" if edge.is_move() or edge.is_head_move(): return {"style": "dashed", "constraint": "false"} return {} class ParseTree: - """A class used for ParseTree that is generated by :meth:`python_mg.SyntacticStructure.to_tree`. - It can be used to get a GraphViz representation of the tree or to investigate the ParseTree as a graph. + """A class for ParseTree generated by :meth:`python_mg.SyntacticStructure.to_tree`. + + It can be used to get a GraphViz representation of the tree or to investigate the + ParseTree as a graph. """ def __init__( self, G: rx.PyDiGraph[MGNode, MGEdge], root: int, structure: SyntacticStructure - ): + ) -> None: + """Make a new ParseTree.""" self.root: int = root self.structure: SyntacticStructure = structure movement_edges = sorted( [x for x in G.filter_edges(lambda x: x.is_move())], - key=lambda x: sort_key(G, x), + key=lambda x: _sort_key(G, x), reverse=True, ) movements: dict[int, int] = {} for e in movement_edges: - (src, tgt) = G.get_edge_endpoints_by_index(e) + src, tgt = G.get_edge_endpoints_by_index(e) trace_id = G.get_node_data(src).trace_id() movements[trace_id] = tgt self.__movement_sources: dict[int, int] = {m: i for i, m in movements.items()} self.G: rx.PyDiGraph[MGNode, MGEdge] = G - """PyDiGraph[MGNode, MGEdge]: A `RustworkX `_ PyDiGraph which contains the syntactice structure of a sentence""" + """PyDiGraph[MGNode, MGEdge]: A `RustworkX ` + _ PyDiGraph which contains the syntactice structure of a sentence""" def normal_string(self) -> str: - """The string used by a ParseTree + """Get the string used by a ParseTree. Returns ------- str the parsed sentence + """ return str(self.structure) def base_string(self) -> list[str | Mover | Trace]: - """A richer representation of the parsed string, with traces where movement had occurred, and :meth:`python_mg.Mover` objects to indicated moved phrases. + """Get a richer representation of the parsed string. + + This representation has traces where movement had occurred, and + :meth:`python_mg.Mover` objects to indicated moved phrases. Returns ------- str the parsed sentence + """ linear_order = self.__explore(self.root) return linear_order def to_dot(self) -> str | None: - """Converts a tree to GraphViz DOT format + """Convert a tree to GraphViz DOT format. Returns ------- str The dot file for this tree + """ return self.G.to_dot(node_attr=node_attrs, edge_attr=edge_attrs) def to_image(self) -> Image.Image: - """Converts a tree to a PIL Image + """Convert a tree to a PIL Image. Returns ------- Image An image representation of the tree + """ return graphviz_draw( self.G, @@ -144,15 +162,17 @@ def __explore(self, n_i: int) -> list[str | Mover | Trace]: def to_tree(self: SyntacticStructure) -> ParseTree: - """Converts a SyntacticStructure to a ParseTree + """Convert a SyntacticStructure to a ParseTree. Returns ------- - The SyntacticStructure as a :meth:`python_mg.ParseTree` + :meth:`python_mg.ParseTree` + The SyntacticStructure as a :meth:`python_mg.ParseTree` + """ - (nodes, edges, root) = self.__to_tree_inner() # pyright: ignore[reportPrivateUsage] + nodes, edges, root = self.__to_tree_inner() # pyright: ignore[reportPrivateUsage] - # This will usually be the identity function, but on the off chance its not, we do this. + # This will usually be the identity function, but if not, we do this. # Waste computation in exchange for not having a horrible headache old2new: dict[int, int] = {} diff --git a/python/tests/test_mg.py b/python/tests/test_mg.py index e021e64..f4436f7 100644 --- a/python/tests/test_mg.py +++ b/python/tests/test_mg.py @@ -1,4 +1,5 @@ -import pytest +# ruff: disable[D103,D100,E501] + import pickle from python_mg import Lexicon, Continuation @@ -6,7 +7,7 @@ from python_mg.syntax import Trace, Mover -def test_lexicon(): +def test_lexicon() -> None: x = Lexicon("a::b= a\nb::b") assert [str(s) for s in x.generate_grammar("a")] == ["a b"] parse = next(x.generate_grammar("a")) @@ -16,13 +17,13 @@ def test_lexicon(): ) -def test_pickling(): +def test_pickling() -> None: x = Lexicon("a::b= a\nb::b") x_pickle = pickle.dumps(x) assert pickle.loads(x_pickle) == x -def test_memory_load(): +def test_memory_load() -> None: grammar = Lexicon("a::b= c= +a +e C\nb::b -a\nc::c -e") parse = grammar.parse("c b a", "C")[0] assert parse.max_memory_load() == 2 @@ -31,22 +32,36 @@ def test_memory_load(): assert parse.max_memory_load() == 1 -def test_semantic_lexicon(): - grammar = """John::d::a_j +def test_semantic_lexicon() -> None: + grammar = """John::d::a_John run::=d v::lambda a x some_e(e, pe_run(e), AgentOf(x,e)) -Mary::d::a_m +Mary::d::a_Mary likes::d= =d v::lambda a x lambda a y some_e(e, pe_likes(e), AgentOf(y,e) & PatientOf(x, e))""" semantic_lexicon = Lexicon(grammar) assert semantic_lexicon.is_semantic() s = semantic_lexicon.parse("John likes Mary", "v") assert len(s) == 1 parse = s[0] + assert parse.meaning is not None assert parse.meaning == [ - "some_e(x, pe_likes(x), AgentOf(a_j, x) & PatientOf(a_m, x))" + "some_e(x, pe_likes(x), AgentOf(a_John, x) & PatientOf(a_Mary, x))" + ] + meaning: str = parse.meaning[0] + + s = Scenario( + " lambda a x some_e(e, pe_likes(e), AgentOf(x, e)); lambda a x some_e(e, pe_likes(e), PatientOf(x, e))" + ) + assert len(s.questions) == 2 + + assert s.evaluate(meaning) + answers = [ + s.evaluate(f"({q})(a_{name})") for q, name in zip(s.questions, ["John", "Mary"]) ] + assert answers[0] + assert answers[1] -def test_scenario(): +def test_scenario() -> None: s = Scenario("") assert s.actors == [Actor("John", properties={"nice", "quick"})] assert s.events == [Event(agent="John", properties={"run"})] @@ -70,7 +85,7 @@ def test_scenario(): assert john.properties == {"cool"} -def test_trees(): +def test_trees() -> None: grammar = """ ::T= C ::T= +W C @@ -147,7 +162,7 @@ def test_trees(): assert tree.to_dot() == digraph -def test_continuations(): +def test_continuations() -> None: x = Lexicon("a::b= S\nb::b") assert x.continuations("a", "S") == {Continuation("b")} x = Lexicon("a::S= b= S\n::S\nb::b") diff --git a/src/lib.rs b/src/lib.rs index f443abc..b7e1a03 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,7 +23,7 @@ use syntax::PySyntacticStructure; use crate::{ semantics::{ - PyPossibleEvent, PyScenarioIterator, + PyPossibleEvent, PyScenarioGenerator, lot_types::{PyActor, PyEvent}, scenario::PyScenario, }, @@ -639,7 +639,7 @@ fn python_mg(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/src/semantics.rs b/src/semantics.rs index c8d63fa..469d5ea 100644 --- a/src/semantics.rs +++ b/src/semantics.rs @@ -129,7 +129,7 @@ impl PyScenario { actors: Vec, event_kinds: Vec, actor_properties: Vec, - ) -> PyScenarioIterator { + ) -> PyScenarioGenerator { let parameter_holder = Arc::new(ParameterHolder { actors, event_kinds, @@ -163,7 +163,7 @@ impl PyScenario { }) .collect::>(); - PyScenarioIterator { + PyScenarioGenerator { generator: Scenario::all_scenarios(&actors, &event_kinds, &properties), _parameter_holder: parameter_holder, } @@ -201,14 +201,15 @@ impl PyPossibleEvent { } } -#[pyclass(name = "ScenarioGenerator")] -pub struct PyScenarioIterator { +#[pyclass(name = "ScenarioGenerator", eq, from_py_object)] +#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] +pub struct PyScenarioGenerator { generator: ScenarioIterator<'static>, _parameter_holder: Arc, } #[pymethods] -impl PyScenarioIterator { +impl PyScenarioGenerator { fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { slf } @@ -218,6 +219,7 @@ impl PyScenarioIterator { } } +#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] struct ParameterHolder { actors: Vec, event_kinds: Vec, diff --git a/src/semantics/scenario.rs b/src/semantics/scenario.rs index 9497624..60ba348 100644 --- a/src/semantics/scenario.rs +++ b/src/semantics/scenario.rs @@ -7,6 +7,9 @@ pub struct PyScenario { actors: Vec, #[pyo3(get, set)] events: Vec, + + #[pyo3(get)] + questions: Vec, } impl From> for PyScenario { @@ -51,7 +54,26 @@ impl From> for PyScenario { }) .collect(); - PyScenario { actors, events } + let questions = value.questions().iter().map(|x| x.to_string()).collect(); + + PyScenario { + actors, + events, + questions, + } + } +} + +#[pymethods] +impl PyScenario { + #[setter] + fn set_questions(&mut self, questions: Vec) -> PyResult<()> { + for q in &questions { + let _ = RootedLambdaPool::parse(q).map_err(|e| PyValueError::new_err(e.to_string()))?; + } + + self.questions = questions; + Ok(()) } } From 7067901b86a4e1a2e0dbb948b53229791f2b5c15 Mon Sep 17 00:00:00 2001 From: Michael Goodale Date: Wed, 25 Feb 2026 12:58:43 +0100 Subject: [PATCH 07/11] Docs --- src/lib.rs | 1 + src/syntax.rs | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index b7e1a03..621316a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -360,6 +360,7 @@ impl PyLexicon { #[pymethods] impl PyLexicon { + ///Check if this lexicon has semantics fn is_semantic(&self) -> bool { matches!(self.lexicon.lexicon, PossiblySemanticLexicon::Semantic(_)) } diff --git a/src/syntax.rs b/src/syntax.rs index 73b8d2c..a1f4b9c 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -118,6 +118,17 @@ impl PySyntacticStructure { self.prob.into_inner() } + ///Check whether this string (representing a lexical entry) is used in this tree. + /// + ///Returns + ///------- + ///bool + /// Whether the lexical entry is used + /// + ///Raises + ///------ + ///ValueException + /// If the lexical entry is not parseable as a lexical entry. fn contains_lexical_entry(&self, s: &str) -> PyResult { let lex = self.lex.get(); let entry = LexicalEntry::parse(s).map_err(|e| PyValueError::new_err(e.to_string()))?; From 8ee005417750612583308a7c3f3e45a11054d0e2 Mon Sep 17 00:00:00 2001 From: Michael Goodale Date: Wed, 25 Feb 2026 13:20:29 +0100 Subject: [PATCH 08/11] Added docs for semantics --- docs/source/index.rst | 1 + docs/source/semantics.rst | 25 +++++++++++++++++++++++++ docs/source/syntax.rst | 4 ++-- python/python_mg/_lib_name.pyi | 6 +++--- src/semantics/lot_types.rs | 15 +++++++++++++++ 5 files changed, 46 insertions(+), 5 deletions(-) create mode 100644 docs/source/semantics.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index fb4db23..d56d406 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -15,6 +15,7 @@ It provides the tools necessary to generate strings from a Minimalist Grammar an lexicon syntax + semantics metrics diff --git a/docs/source/semantics.rst b/docs/source/semantics.rst new file mode 100644 index 0000000..3d9b288 --- /dev/null +++ b/docs/source/semantics.rst @@ -0,0 +1,25 @@ +Semantics +========= + +These are the classes that allow you to evaluate Language of Thought expressions generated by Semantic lexica. + +.. autoclass:: python_mg.semantics.Scenario + :members: + :undoc-members: + +.. autoclass:: python_mg.semantics.Actor + :members: + :undoc-members: + +.. autoclass:: python_mg.semantics.Event + :members: + :undoc-members: + +.. autoclass:: python_mg.semantics.PossibleEvent + :members: + :undoc-members: + +.. autoclass:: python_mg.semantics.ScenarioGenerator + :members: + :undoc-members: + diff --git a/docs/source/syntax.rst b/docs/source/syntax.rst index 8344093..5dc30e9 100644 --- a/docs/source/syntax.rst +++ b/docs/source/syntax.rst @@ -1,5 +1,5 @@ -Syntax tree utilities -===================== +Syntax +====== These are the classes that are useful for manipulating or plotting parse trees directly. diff --git a/python/python_mg/_lib_name.pyi b/python/python_mg/_lib_name.pyi index 9370764..ff592dc 100644 --- a/python/python_mg/_lib_name.pyi +++ b/python/python_mg/_lib_name.pyi @@ -164,8 +164,8 @@ class Scenario: @staticmethod def all_scenarios( actors: list[str], event_kinds: list[PossibleEvent], actor_properties: list[str] - ) -> ScenarioIterator: ... + ) -> ScenarioGenerator: ... -class ScenarioIterator: - def __iter__(self) -> ScenarioIterator: ... +class ScenarioGenerator: + def __iter__(self) -> ScenarioGenerator: ... def __next__(self) -> Scenario: ... diff --git a/src/semantics/lot_types.rs b/src/semantics/lot_types.rs index 2b8f634..1a5cb0d 100644 --- a/src/semantics/lot_types.rs +++ b/src/semantics/lot_types.rs @@ -44,11 +44,26 @@ pub(super) fn convert_to_py_event(e_i: u8, scenario: &Scenario<'_>) -> Result, } From f47146fe74996b507394c1095d9c31dc03f223ed Mon Sep 17 00:00:00 2001 From: Michael Goodale Date: Wed, 25 Feb 2026 15:05:34 +0100 Subject: [PATCH 09/11] Bit more documentation! --- python/python_mg/_lib_name.pyi | 17 ++++++++- python/python_mg/metrics.py | 1 + src/semantics.rs | 70 ++++++++++++++++++++++++++++++++-- src/semantics/lot_types.rs | 65 +++++++++++++++++-------------- src/semantics/scenario.rs | 11 ++++++ 5 files changed, 132 insertions(+), 32 deletions(-) diff --git a/python/python_mg/_lib_name.pyi b/python/python_mg/_lib_name.pyi index ff592dc..1025acd 100644 --- a/python/python_mg/_lib_name.pyi +++ b/python/python_mg/_lib_name.pyi @@ -1,5 +1,5 @@ import datetime -from typing import Sequence +from typing import Literal, Sequence import numpy as np import numpy.typing as npt @@ -149,6 +149,21 @@ class PossibleEvent: is_reflexive: bool name: str + def __init__( + self, + name: str, + has_agent: bool = True, + has_patient: bool = False, + is_reflexive: bool = True, + ) -> None: ... + def event_kind(self) -> Literal[ + "Transitive", + "TransitiveNonReflexive", + "Unergative", + "Unaccusative", + "Avalent", + ]: ... + class Scenario: actors: list[Actor] events: list[Event] diff --git a/python/python_mg/metrics.py b/python/python_mg/metrics.py index ba0608f..4249d07 100644 --- a/python/python_mg/metrics.py +++ b/python/python_mg/metrics.py @@ -78,6 +78,7 @@ def grammar_f1_from_strings( Parameters ---------- lexicon : Lexicon + the lexicon to use as ground truth for the measurement tokens : ndarray of int Token IDs representing the input sequences. Shape (..., seq_length). preds : ndarray of float64 diff --git a/src/semantics.rs b/src/semantics.rs index 469d5ea..b855bf5 100644 --- a/src/semantics.rs +++ b/src/semantics.rs @@ -82,7 +82,6 @@ impl PyScenario { format!("Scenario({self})") } - #[pyo3(signature = (expression, max_steps=64, timeout=None))] ///Executes an language of thought expression in this scenario. Will potentially throw a PresuppositionException if ///something is referenced that isn't in the scenario. It will also reduce any lambda ///expressions if possible, and then will only execute the expression if it is fully reducible. @@ -113,6 +112,7 @@ impl PyScenario { ///------ ///PyErr /// If conversion of an ``Event`` or ``EventSet`` variant fails. + #[pyo3(signature = (expression, max_steps=64, timeout=None))] fn evaluate<'a>( &'a self, expression: &'a str, @@ -124,6 +124,24 @@ impl PyScenario { self.execute(expr, Some(ExecutionConfig::new(max_steps, timeout))) } + ///Creates an iterator that goes over all possible scenarios that can be generated according to + ///the following parameters. This gets very large very quickly. + /// + ///Parameters + ///---------- + ///actors: list[str] + /// The actors who may or may not be present. + ///event_kinds: list[``PossibleEvent``] + /// The possible kinds of events + /// + ///Returns + ///------- + ///``ScenarioGenerator`` + /// + ///Raises + ///------ + ///PyErr + /// If conversion of an ``Event`` or ``EventSet`` variant fails. #[staticmethod] fn all_scenarios( actors: Vec, @@ -170,20 +188,66 @@ impl PyScenario { } } +/// A possible linguistic event with theta role structure. +/// +/// Parameters +/// ---------- +/// name : str +/// Identifier for the event. +/// has_agent : bool, optional +/// Whether the event has an agent participant. Default is ``True``. +/// has_patient : bool, optional +/// Whether the event has a patient participant. Default is ``False``. +/// is_reflexive : bool, optional +/// Whether the event allows reflexive construal. Default is ``True``. #[pyclass(name = "PossibleEvent", eq, from_py_object)] #[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] pub struct PyPossibleEvent { + ///Whether the event takes an agent #[pyo3(get, set)] pub has_agent: bool, + ///Whether the event takes a patient #[pyo3(get, set)] pub has_patient: bool, + ///Whether the event can have the same agent and patient + #[pyo3(get, set)] pub is_reflexive: bool, + ///The name of this kind of event (e.g. `running` could be a unaccusative event) #[pyo3(get, set)] pub name: String, } +#[pymethods] +impl PyPossibleEvent { + #[new] + #[pyo3(signature = (name, has_agent=true, has_patient=false, is_reflexive=true))] + fn new(name: String, has_agent: bool, has_patient: bool, is_reflexive: bool) -> Self { + PyPossibleEvent { + name, + has_agent, + has_patient, + is_reflexive, + } + } + + /// Classify the event based on its argument structure. + /// + /// Returns + /// ------- + /// ``Literal['Transitive', 'TransitiveNonReflexive', 'Unergative', 'Unaccusative', 'Avalent']``. + fn event_type(&self) -> &'static str { + match (self.has_agent, self.has_patient) { + (true, true) if self.is_reflexive => "Transitive", + (true, true) => "TransitiveNonReflexive", + (true, false) => "Unergative", + (false, true) => "Unaccusative", + (false, false) => "Avalent", + } + } +} + impl PyPossibleEvent { - fn event_type(&self) -> EventType { + fn as_event_type(&self) -> EventType { match (self.has_agent, self.has_patient) { (true, true) if self.is_reflexive => EventType::Transitive, (true, true) => EventType::TransitiveNonReflexive, @@ -196,7 +260,7 @@ impl PyPossibleEvent { fn as_possible_event<'a>(&'a self) -> PossibleEvent<'a> { PossibleEvent { label: self.name.as_str(), - event_type: self.event_type(), + event_type: self.as_event_type(), } } } diff --git a/src/semantics/lot_types.rs b/src/semantics/lot_types.rs index 1a5cb0d..243ced3 100644 --- a/src/semantics/lot_types.rs +++ b/src/semantics/lot_types.rs @@ -44,17 +44,25 @@ pub(super) fn convert_to_py_event(e_i: u8, scenario: &Scenario<'_>) -> Result>) -> Self { PyActor { name, @@ -106,6 +105,26 @@ impl Display for PyActor { } } +///Represents an event to be used in a Scenario. +/// +///Parameters +///---------- +///agent : str, optional +/// The name of the agent (if there is one) +///patient : str, optional +/// The name of the patient (if there is one) +///properties : set[str], optional +/// Any properties that apply to the event. Defaults to an empty set. +/// +/// +///Examples +///-------- +///Creating an event +/// +///.. code-block:: python +/// +/// running = Actor(agent="John", properties={"run", "quickly"}) +/// #[pyclass(name = "Event", eq, str, from_py_object)] #[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] pub struct PyEvent { @@ -121,17 +140,7 @@ pub struct PyEvent { impl PyEvent { #[new] #[pyo3(signature = (agent=None, patient=None, properties=None))] - ///Parameters - ///---------- - ///agent : str, optional - /// The name of the agent (if there is one) - ///patient : str, optional - /// The name of the patient (if there is one) - ///properties: set[str], optional - /// Any properties that apply to the actor. - ///Returns - ///------- - ///Event + fn new( agent: Option, patient: Option, diff --git a/src/semantics/scenario.rs b/src/semantics/scenario.rs index 60ba348..454f3e7 100644 --- a/src/semantics/scenario.rs +++ b/src/semantics/scenario.rs @@ -1,5 +1,16 @@ use super::*; +///Represents a Scenario, a model that meanings are evaluated in. +/// +///Parameters +///---------- +///actors : list[Actor] +/// The actors present in the scenario +///events: list[Event] +/// The events happening in the scenario +///events: list[str] +/// The questions in a scenario. (Will raise a `ValueError` if set with a `str` which is not a +/// valid Language of Thought expression) #[pyclass(name = "Scenario", str, eq, from_py_object)] #[derive(Debug, Clone, Eq, PartialEq)] pub struct PyScenario { From 4a20806cc85f488699e67b5e6c1d2b48b2f0aa3a Mon Sep 17 00:00:00 2001 From: Michael Goodale Date: Wed, 25 Feb 2026 17:56:43 +0100 Subject: [PATCH 10/11] Lot more documentation + adding some more functions to syntactic structure --- Cargo.lock | 2 +- python/python_mg/_lib_name.pyi | 1 + python/tests/test_mg.py | 19 ++++++++++++++++- src/graphing.rs | 20 ++++++++++++++++++ src/lib.rs | 37 +++++++++++++++++++++++++++++++++- src/semantics.rs | 36 +++++++++++++-------------------- src/semantics/lot_types.rs | 5 +++++ src/semantics/scenario.rs | 3 +++ src/syntax.rs | 13 ++++++++++++ 9 files changed, 111 insertions(+), 25 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 66d5ffc..1e7d801 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -341,7 +341,7 @@ checksum = "2687e6cf9c00f48e9284cf9fd15f2ef341d03cc7743abf9df4c5f07fdee50b18" [[package]] name = "minimalist-grammar-parser" version = "0.1.0" -source = "git+https://github.com/MichaelGoodale/minimalist-grammar-parser.git#23f151d41e8e06218626bf2a16a8a41248317562" +source = "git+https://github.com/MichaelGoodale/minimalist-grammar-parser.git#4876725b425835b5a205d94fe93aab8c68046e9b" dependencies = [ "ahash 0.8.12", "bitvec", diff --git a/python/python_mg/_lib_name.pyi b/python/python_mg/_lib_name.pyi index 1025acd..819fbf1 100644 --- a/python/python_mg/_lib_name.pyi +++ b/python/python_mg/_lib_name.pyi @@ -20,6 +20,7 @@ class SyntacticStructure: """A parse tree for some string.""" def __init__(self) -> None: ... + def pronunciation(self) -> list[str]: ... def log_prob(self) -> float: ... def n_steps(self) -> int: ... def contains_lexical_entry(self, s: str) -> bool: ... diff --git a/python/tests/test_mg.py b/python/tests/test_mg.py index f4436f7..854c8eb 100644 --- a/python/tests/test_mg.py +++ b/python/tests/test_mg.py @@ -32,9 +32,26 @@ def test_memory_load() -> None: assert parse.max_memory_load() == 1 +def test_generation() -> None: + grammar = """John::d::a_John +runs::=d v::lambda a x some_e(e, pe_run(e), AgentOf(x,e)) +Mary::d::a_Mary +likes::d= =d v::lambda a x lambda a y some_e(e, pe_likes(e), AgentOf(y,e) & PatientOf(x, e))""" + lexicon = Lexicon(grammar) + strings = [" ".join(p.pronunciation()) for p in lexicon.generate_grammar("v")] + assert strings == [ + "John runs", + "Mary runs", + "Mary likes John", + "John likes John", + "John likes Mary", + "Mary likes Mary", + ] + + def test_semantic_lexicon() -> None: grammar = """John::d::a_John -run::=d v::lambda a x some_e(e, pe_run(e), AgentOf(x,e)) +runs::=d v::lambda a x some_e(e, pe_run(e), AgentOf(x,e)) Mary::d::a_Mary likes::d= =d v::lambda a x lambda a y some_e(e, pe_likes(e), AgentOf(y,e) & PatientOf(x, e))""" semantic_lexicon = Lexicon(grammar) diff --git a/src/graphing.rs b/src/graphing.rs index 525c3a2..90617c4 100644 --- a/src/graphing.rs +++ b/src/graphing.rs @@ -2,6 +2,7 @@ use minimalist_grammar_parser::parsing::rules::{TreeEdge, TreeNode}; use pyo3::{exceptions::PyValueError, prelude::*}; use std::fmt::Display; +///A node on a tree. #[pyclass(name = "MGNode", str, eq, frozen)] #[derive(Debug, PartialEq, Eq)] pub struct PyMgNode(pub TreeNode<'static, String, String>); @@ -59,6 +60,7 @@ impl PyMgNode { } } +///A node representing the edge in a tree, whether in merging or movement. #[pyclass(name = "MGEdge", str, eq, frozen)] #[derive(Debug, PartialEq, PartialOrd, Ord, Eq)] pub struct PyMgEdge(pub TreeEdge); @@ -80,14 +82,32 @@ impl Display for PyMgEdge { #[pymethods] impl PyMgEdge { + ///Check if the edge is a movement edge. + /// + ///Returns + ///------- + ///bool + /// Whether it's a movement edge. fn is_move(&self) -> bool { matches!(self.0, TreeEdge::Move) } + ///Check if the edge is a head-movement edge. + /// + ///Returns + ///------- + ///bool + /// Whether it's a head-movement edge. fn is_head_move(&self) -> bool { matches!(self.0, TreeEdge::MoveHead) } + ///Check if the edge is a merge edge. + /// + ///Returns + ///------- + ///bool + /// Whether it's a merge edge. fn is_merge(&self) -> bool { matches!(self.0, TreeEdge::Merge(_)) } diff --git a/src/lib.rs b/src/lib.rs index 621316a..4edd50e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -107,7 +107,42 @@ impl Display for SelfOwningLexicon { )] #[derive(Debug, Clone, Eq, PartialEq)] ///A MG grammar that can be used to generate SyntacticStructures or parse strings into -///SyntacticStructures +///SyntacticStructures. +/// +///You may include semantic interpretations or not. You may also generate all valid sentences in the grammar. +/// +///Parameters +///---------- +///s : str +/// +///Raises +///------ +///ValueError +/// If the string is not a valid lexicon. +/// +///Examples +///-------- +///Generating all sentences of a grammar. +/// +/// +///Creating a lexicon with interpretations and getting the interpretation of a sentence. +/// +///.. code-block:: python +/// +/// grammar = """John::d::a_John +/// run::=d v::lambda a x some_e(e, pe_run(e), AgentOf(x,e)) +/// Mary::d::a_Mary +/// likes::d= =d v::lambda a x lambda a y some_e(e, pe_likes(e), AgentOf(y,e) & PatientOf(x, e))""" +/// semantic_lexicon = Lexicon(grammar) +/// assert semantic_lexicon.is_semantic() +/// s = semantic_lexicon.parse("John likes Mary", "v") +/// assert len(s) == 1 +/// parse = s[0] +/// assert parse.meaning is not None +/// assert parse.meaning == [ +/// "some_e(x, pe_likes(x), AgentOf(a_John, x) & PatientOf(a_Mary, x))" +/// ] +/// struct PyLexicon { word_id: TokenMap, lexeme_to_id: HashMap, LexemeId>, diff --git a/src/semantics.rs b/src/semantics.rs index b855bf5..45f924d 100644 --- a/src/semantics.rs +++ b/src/semantics.rs @@ -99,19 +99,11 @@ impl PyScenario { ///Returns ///------- ///bool or Actor or Event or set[Actor] or set[Event] - /// The result of the language evaluation, typed according to the - /// expression's return kind: - /// - /// - ``bool`` — a plain boolean value. - /// - ``Actor`` — a single actor resolved from the model. - /// - ``Event`` — a single event resolved from the model. - /// - ``set[Actor]`` — an unordered collection of actors. - /// - ``set[Event]`` — an unordered collection of events. - /// + /// the value of the expression ///Raises ///------ - ///PyErr - /// If conversion of an ``Event`` or ``EventSet`` variant fails. + ///ValueError + /// If the expression is incorrectly formatted or if there is a presupposition error. #[pyo3(signature = (expression, max_steps=64, timeout=None))] fn evaluate<'a>( &'a self, @@ -124,24 +116,19 @@ impl PyScenario { self.execute(expr, Some(ExecutionConfig::new(max_steps, timeout))) } - ///Creates an iterator that goes over all possible scenarios that can be generated according to - ///the following parameters. This gets very large very quickly. + ///Creates a generator that goes over all possible scenarios that can be generated according to + ///the its parameters. This gets very large very quickly. /// ///Parameters ///---------- - ///actors: list[str] + ///actors : list[str] /// The actors who may or may not be present. - ///event_kinds: list[``PossibleEvent``] + ///event_kinds : list[``PossibleEvent``] /// The possible kinds of events /// ///Returns ///------- - ///``ScenarioGenerator`` - /// - ///Raises - ///------ - ///PyErr - /// If conversion of an ``Event`` or ``EventSet`` variant fails. + ///ScenarioGenerator #[staticmethod] fn all_scenarios( actors: Vec, @@ -234,7 +221,7 @@ impl PyPossibleEvent { /// /// Returns /// ------- - /// ``Literal['Transitive', 'TransitiveNonReflexive', 'Unergative', 'Unaccusative', 'Avalent']``. + /// Literal['Transitive', 'TransitiveNonReflexive', 'Unergative', 'Unaccusative', 'Avalent']. fn event_type(&self) -> &'static str { match (self.has_agent, self.has_patient) { (true, true) if self.is_reflexive => "Transitive", @@ -265,6 +252,11 @@ impl PyPossibleEvent { } } +///Yields +///------ +///Scenario +/// Another scenario that can be generated according to the parameters. +/// #[pyclass(name = "ScenarioGenerator", eq, from_py_object)] #[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] pub struct PyScenarioGenerator { diff --git a/src/semantics/lot_types.rs b/src/semantics/lot_types.rs index 243ced3..f07834d 100644 --- a/src/semantics/lot_types.rs +++ b/src/semantics/lot_types.rs @@ -128,10 +128,15 @@ impl Display for PyActor { #[pyclass(name = "Event", eq, str, from_py_object)] #[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] pub struct PyEvent { + ///The agent of the event. #[pyo3(get, set)] pub agent: Option, + + ///The patient of the event. #[pyo3(get, set)] pub patient: Option, + + ///Any properties of the event. #[pyo3(get, set)] pub properties: BTreeSet, } diff --git a/src/semantics/scenario.rs b/src/semantics/scenario.rs index 454f3e7..94beea3 100644 --- a/src/semantics/scenario.rs +++ b/src/semantics/scenario.rs @@ -14,11 +14,14 @@ use super::*; #[pyclass(name = "Scenario", str, eq, from_py_object)] #[derive(Debug, Clone, Eq, PartialEq)] pub struct PyScenario { + ///A list of Actors in the scenario #[pyo3(get, set)] actors: Vec, + ///A list of Events in the scenario #[pyo3(get, set)] events: Vec, + ///A list of questions to be asked in the scenario #[pyo3(get)] questions: Vec, } diff --git a/src/syntax.rs b/src/syntax.rs index a1f4b9c..b5d0f7d 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -108,6 +108,19 @@ impl PySyntacticStructure { &self.meaning } + ///The pronunciation of this SyntacticStructure. + /// + ///Returns + ///------- + ///list[str] + /// A list of strings of each word. Multi-morphemic words are seperated by `-`. + fn pronunciation(&self) -> Vec { + self.string + .iter() + .map(|x| x.to_string()) + .collect::>() + } + ///The log probability of generating this SyntacticStructure using its associated Lexicon. /// ///Returns From cb91cd1ddfa7d30edb7ec81b84540423367687bc Mon Sep 17 00:00:00 2001 From: Michael Goodale Date: Wed, 25 Feb 2026 18:14:45 +0100 Subject: [PATCH 11/11] nicer example! --- python/tests/test_mg.py | 2 +- src/lib.rs | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/python/tests/test_mg.py b/python/tests/test_mg.py index 854c8eb..b8a3172 100644 --- a/python/tests/test_mg.py +++ b/python/tests/test_mg.py @@ -38,7 +38,7 @@ def test_generation() -> None: Mary::d::a_Mary likes::d= =d v::lambda a x lambda a y some_e(e, pe_likes(e), AgentOf(y,e) & PatientOf(x, e))""" lexicon = Lexicon(grammar) - strings = [" ".join(p.pronunciation()) for p in lexicon.generate_grammar("v")] + strings = [str(p) for p in lexicon.generate_grammar("v")] assert strings == [ "John runs", "Mary runs", diff --git a/src/lib.rs b/src/lib.rs index 4edd50e..3439ecf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -124,6 +124,22 @@ impl Display for SelfOwningLexicon { ///-------- ///Generating all sentences of a grammar. /// +///.. code-block:: python +/// +/// grammar = """John::d +/// runs::=d v +/// Mary::d +/// likes::d= =d v""" +/// lexicon = Lexicon(grammar) +/// strings = [str(p) for p in lexicon.generate_grammar("v")] +/// assert strings == [ +/// "John runs", +/// "Mary runs", +/// "Mary likes John", +/// "John likes John", +/// "John likes Mary", +/// "Mary likes Mary", +/// ] /// ///Creating a lexicon with interpretations and getting the interpretation of a sentence. ///