diff --git a/CHANGELOG.md b/CHANGELOG.md index f067f4c..636721d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,73 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.2.0] - 2026-02-09 + +### Added + +#### Span Labeling Data Model (`bead.items`) + +- **Span**, **SpanLabel**, **SpanSegment** models for stand-off token-level annotation +- **SpanSpec** for defining label vocabularies and relation types +- **SpanRelation** for directed labeled relations between spans +- `add_spans_to_item()` composability function for attaching spans to any item type +- Prompt span references: `[[label]]` and `[[label:text]]` template syntax + - Auto-fills span token text or uses explicit display text + - Colors match between stimulus highlighting and prompt highlighting + - Resolved Python-side at trial generation; plugins receive pre-rendered HTML + - Early validation warning in `add_spans_to_item()`, hard validation at trial generation + +#### Tokenization (`bead.tokenization`) + +- **Token** model with `text`, `whitespace`, `index`, `token_space_after` fields +- **TokenizedText** container with token-level access and reconstruction +- Tokenizer backends: whitespace (default), spaCy, Stanza +- Lazy imports for optional NLP dependencies + +#### jsPsych Plugins (`bead.deployment.jspsych`) + +- 8 new TypeScript plugins following the `JsPsychPlugin` pattern: + - **bead-binary-choice**: two-alternative forced choice with keyboard support + - **bead-categorical**: labeled category selection (radio buttons) + - **bead-free-text**: open-ended text input with optional word count + - **bead-magnitude**: numeric magnitude estimation with reference stimulus + - **bead-multi-select**: checkbox-based multi-selection with min/max constraints + - **bead-slider-rating**: continuous slider with labeled endpoints + - **bead-rating**: Likert-scale ordinal rating with keyboard shortcuts + - **bead-span-label**: interactive span highlighting with label assignment, relations, and search +- **span-renderer** library for token-level span highlighting with overlap support +- **gallery-bundle** IIFE build aggregating all plugins for standalone HTML demos +- Keyboard navigation support in forced-choice, rating, and binary-choice plugins +- Material Design styling with responsive layout + +#### Deployment Pipeline + +- `SpanDisplayConfig` with `color_palette` and `dark_color_palette` for consistent span coloring +- `SpanColorMap` dataclass for deterministic color assignment (same label = same color pair) +- `_assign_span_colors()` shared between stimulus and prompt renderers +- `_generate_span_stimulus_html()` for token-level highlighting in deployed experiments +- Prompt span reference resolution integrated into all 5 composite trial creators (likert, slider, binary, forced-choice, span-labeling) +- Deployment CSS for `.bead-q-highlight`, `.bead-q-chip`, `.bead-span-subscript` in experiment template + +#### Interactive Gallery + +- 17 demo pages using stimuli from MegaAcceptability, MegaVeridicality, and Semantic Proto-Roles +- Demos cover all plugin types and composite span+task combinations +- Gallery documentation with tabbed Demo / Python / Trial JSON views +- Standalone HTML demos with gallery-bundle.js (no build step required) + +#### Tests + +- 79 Python span-related tests (items, tokenization, deployment) +- 42 TypeScript tests (20 plugin + 22 span-renderer) +- Prompt span reference tests: parser, color assignment, resolver, integration + +### Changed + +- Trial generation now supports span-aware stimulus rendering for all task types +- Forced-choice and rating plugins updated with keyboard shortcut support +- Span-label plugin enhanced with searchable fixed labels, interactive relation creation, and relation cleanup on span deletion + ## [0.1.0] - 2026-02-04 ### Added @@ -115,5 +182,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - CI/CD: GitHub Actions for testing, docs, PyPI publishing - Read the Docs integration -[Unreleased]: https://github.com/FACTSlab/bead/compare/v0.1.0...HEAD +[Unreleased]: https://github.com/FACTSlab/bead/compare/v0.2.0...HEAD +[0.2.0]: https://github.com/FACTSlab/bead/compare/v0.1.0...v0.2.0 [0.1.0]: https://github.com/FACTSlab/bead/releases/tag/v0.1.0 diff --git a/README.md b/README.md index 5ba8337..2db3059 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ # bead -A Python framework for constructing, deploying, and analyzing large-scale linguistic judgment experiments with active learning. - [![CI](https://github.com/FACTSlab/bead/actions/workflows/ci.yml/badge.svg)](https://github.com/FACTSlab/bead/actions/workflows/ci.yml) [![Python 3.13](https://img.shields.io/badge/python-3.13-blue.svg)](https://www.python.org/downloads/) [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE) [![Documentation](https://img.shields.io/badge/docs-readthedocs-blue.svg)](https://bead.readthedocs.io) +A Python framework for constructing, deploying, and analyzing large-scale linguistic judgment experiments with active learning. + ## Overview `bead` implements a complete pipeline for linguistic research: from lexical resource construction through experimental deployment to model training with active learning. It handles the combinatorial explosion of linguistic stimuli while maintaining full provenance tracking. diff --git a/bead/__init__.py b/bead/__init__.py index 2ad06c4..d16d8f1 100644 --- a/bead/__init__.py +++ b/bead/__init__.py @@ -6,6 +6,6 @@ from __future__ import annotations -__version__ = "0.1.0" +__version__ = "0.2.0" __author__ = "Aaron Steven White" __email__ = "aaron.white@rochester.edu" diff --git a/bead/deployment/jspsych/config.py b/bead/deployment/jspsych/config.py index f8ccd5f..b776d64 100644 --- a/bead/deployment/jspsych/config.py +++ b/bead/deployment/jspsych/config.py @@ -14,19 +14,20 @@ from bead.data.range import Range from bead.deployment.distribution import ListDistributionStrategy -# Type alias for experiment types +# type alias for experiment types type ExperimentType = Literal[ "likert_rating", "slider_rating", "binary_choice", "forced_choice", + "span_labeling", ] -# Type alias for UI themes +# type alias for UI themes type UITheme = Literal["light", "dark", "auto"] -# Factory functions for default lists +# factory functions for default lists def _empty_demographics_fields() -> list[DemographicsFieldConfig]: """Return empty demographics field list.""" return [] @@ -37,6 +38,67 @@ def _empty_instruction_pages() -> list[InstructionPage]: return [] +def _default_span_color_palette() -> list[str]: + """Return default span highlight color palette.""" + return [ + "#BBDEFB", + "#C8E6C9", + "#FFE0B2", + "#F8BBD0", + "#D1C4E9", + "#B2EBF2", + "#DCEDC8", + "#FFD54F", + ] + + +def _default_span_dark_palette() -> list[str]: + """Return default dark color palette for span subscript badges.""" + return [ + "#1565C0", + "#2E7D32", + "#E65100", + "#AD1457", + "#4527A0", + "#00838F", + "#558B2F", + "#F9A825", + ] + + +class SpanDisplayConfig(BaseModel): + """Visual configuration for span rendering in experiments. + + Attributes + ---------- + highlight_style : Literal["background", "underline", "border"] + How to visually indicate spans. + color_palette : list[str] + CSS color values for span highlighting (light backgrounds). + dark_color_palette : list[str] + CSS color values for subscript label badges (dark, index-aligned + with color_palette). + show_labels : bool + Whether to show span labels inline. + show_tooltips : bool + Whether to show tooltips on hover. + token_delimiter : str + Delimiter between tokens in display. + label_position : Literal["inline", "below", "tooltip"] + Where to display span labels. + """ + + model_config = ConfigDict(extra="forbid", frozen=True) + + highlight_style: Literal["background", "underline", "border"] = "background" + color_palette: list[str] = Field(default_factory=_default_span_color_palette) + dark_color_palette: list[str] = Field(default_factory=_default_span_dark_palette) + show_labels: bool = True + show_tooltips: bool = True + token_delimiter: str = " " + label_position: Literal["inline", "below", "tooltip"] = "inline" + + class DemographicsFieldConfig(BaseModel): """Configuration for a single demographics form field. @@ -241,7 +303,8 @@ class ExperimentConfig(BaseModel): Attributes ---------- experiment_type : ExperimentType - Type of experiment (likert_rating, slider_rating, binary_choice, forced_choice) + Type of experiment (likert_rating, slider_rating, binary_choice, + forced_choice, span_labeling). title : str Experiment title displayed to participants description : str @@ -281,6 +344,10 @@ class ExperimentConfig(BaseModel): Slopit behavioral capture integration configuration (default: disabled). When enabled, captures keystroke dynamics, focus patterns, and paste events during experiment trials for AI-assisted response detection. + span_display : SpanDisplayConfig | None + Span display configuration (default: None). Auto-enabled when items + contain span annotations. Controls highlight style, colors, and + label placement for span rendering. Examples -------- @@ -333,6 +400,10 @@ class ExperimentConfig(BaseModel): default_factory=SlopitIntegrationConfig, description="Slopit behavioral capture integration (opt-in, disabled)", ) + span_display: SpanDisplayConfig | None = Field( + default=None, + description="Span display config (auto-enabled when items have spans)", + ) class RatingScaleConfig(BaseModel): @@ -409,3 +480,4 @@ class ChoiceConfig(BaseModel): button_html: str | None = Field(default=None) required: bool = Field(default=True) randomize_choice_order: bool = Field(default=False) + layout: Literal["horizontal", "vertical"] = Field(default="horizontal") diff --git a/bead/deployment/jspsych/generator.py b/bead/deployment/jspsych/generator.py index 8e52d62..c5f2b7b 100644 --- a/bead/deployment/jspsych/generator.py +++ b/bead/deployment/jspsych/generator.py @@ -87,7 +87,7 @@ def __init__( self.rating_config = rating_config or RatingScaleConfig() self.choice_config = choice_config or ChoiceConfig() - # Setup Jinja2 environment + # setup Jinja2 environment template_dir = Path(__file__).parent / "templates" self.jinja_env = Environment(loader=FileSystemLoader(str(template_dir))) @@ -156,7 +156,7 @@ def generate( ... ) >>> # output_dir = generator.generate(lists, items, templates) """ - # Validate inputs (no fallbacks) + # validate inputs (no fallbacks) if not lists: raise ValueError( "generate() requires at least one ExperimentList. Got empty list." @@ -178,32 +178,40 @@ def generate( "provide an empty template: {item.item_template_id: ItemTemplate(...)}." ) - # Validate all item references can be resolved + # validate all item references can be resolved self._validate_item_references(lists, items) - # Validate all template references can be resolved + # validate all template references can be resolved self._validate_template_references(items, templates) - # Create directory structure + # create directory structure self._create_directory_structure() - # Write batch data files (lists, items, distribution config, trials) + # write batch data files (lists, items, distribution config, trials) self._write_lists_jsonl(lists) self._write_items_jsonl(items) self._write_distribution_config() self._write_trials_json(lists, items, templates) - # Generate HTML/CSS/JS files - self._generate_html() + # detect span usage for HTML template + span_enabled = self._detect_span_usage(items, templates) + span_wikidata = self._detect_wikidata_usage(templates) + + # generate HTML/CSS/JS files + self._generate_html(span_enabled, span_wikidata) self._generate_css() self._generate_experiment_script() self._generate_config_file() self._copy_list_distributor_script() - # Copy slopit bundle if enabled + # copy slopit bundle if enabled if self.config.slopit.enabled: self._copy_slopit_bundle() + # copy span plugin scripts if needed + if span_enabled: + self._copy_span_plugin_scripts(span_wikidata) + return self.output_dir def _validate_item_references( @@ -305,7 +313,7 @@ def _write_items_jsonl(self, items: dict[UUID, Item]) -> None: """ output_path = self.output_dir / "data" / "items.jsonl" try: - # Convert dict values to list for serialization + # convert dict values to list for serialization items_list = list(items.values()) write_jsonlines(items_list, output_path) except SerializationError as e: @@ -377,7 +385,7 @@ def _write_distribution_config(self) -> None: """ output_path = self.output_dir / "data" / "distribution.json" try: - # Use model_dump_json() to handle UUID serialization + # use model_dump_json() to handle UUID serialization json_str = self.config.distribution_strategy.model_dump_json(indent=2) output_path.write_text(json_str) except (OSError, TypeError) as e: @@ -427,9 +435,15 @@ def _create_directory_structure(self) -> None: self.output_dir.mkdir(parents=True, exist_ok=True) (self.output_dir / "css").mkdir(exist_ok=True) (self.output_dir / "js").mkdir(exist_ok=True) + (self.output_dir / "js" / "plugins").mkdir(parents=True, exist_ok=True) + (self.output_dir / "js" / "lib").mkdir(parents=True, exist_ok=True) (self.output_dir / "data").mkdir(exist_ok=True) - def _generate_html(self) -> None: + def _generate_html( + self, + span_enabled: bool = False, + span_wikidata: bool = False, + ) -> None: """Generate index.html file.""" template = self.jinja_env.get_template("index.html") @@ -438,6 +452,8 @@ def _generate_html(self) -> None: ui_theme=self.config.ui_theme, use_jatos=self.config.use_jatos, slopit_enabled=self.config.slopit.enabled, + span_enabled=span_enabled, + span_wikidata=span_wikidata, ) output_file = self.output_dir / "index.html" @@ -448,14 +464,14 @@ def _generate_css(self) -> None: template_file = Path(__file__).parent / "templates" / "experiment.css" output_file = self.output_dir / "css" / "experiment.css" - # Copy CSS template directly (no rendering needed) + # copy CSS template directly (no rendering needed) output_file.write_text(template_file.read_text()) def _generate_experiment_script(self) -> None: """Generate experiment.js file.""" template = self.jinja_env.get_template("experiment.js.template") - # Auto-generate Prolific redirect URL if completion code is provided + # auto-generate Prolific redirect URL if completion code is provided on_finish_url = self.config.on_finish_url if self.config.prolific_completion_code: on_finish_url = ( @@ -463,7 +479,7 @@ def _generate_experiment_script(self) -> None: f"cc={self.config.prolific_completion_code}" ) - # Prepare slopit config for template + # prepare slopit config for template slopit_config = None if self.config.slopit.enabled: slopit_config = { @@ -473,7 +489,7 @@ def _generate_experiment_script(self) -> None: "target_selectors": self.config.slopit.target_selectors, } - # Prepare demographics config for template + # prepare demographics config for template demographics_enabled = False demographics_title = "Participant Information" demographics_fields: list[dict[str, JsonValue]] = [] @@ -499,7 +515,7 @@ def _generate_experiment_script(self) -> None: field_data["range_max"] = field.range.max demographics_fields.append(field_data) - # Prepare instructions config for template + # prepare instructions config for template instructions_is_multi_page = isinstance( self.config.instructions, InstructionsConfig ) @@ -524,7 +540,7 @@ def _generate_experiment_script(self) -> None: } ) else: - # Simple string instructions + # simple string instructions simple_instructions = ( self.config.instructions if isinstance(self.config.instructions, str) @@ -540,12 +556,12 @@ def _generate_experiment_script(self) -> None: on_finish_url=on_finish_url, slopit_enabled=self.config.slopit.enabled, slopit_config=slopit_config, - # Demographics variables + # demographics variables demographics_enabled=demographics_enabled, demographics_title=demographics_title, demographics_fields=demographics_fields, demographics_submit_text=demographics_submit_text, - # Instructions variables + # instructions variables instructions_is_multi_page=instructions_is_multi_page, instructions_pages=instructions_pages, instructions_show_page_numbers=instructions_show_page_numbers, @@ -576,7 +592,7 @@ def _copy_slopit_bundle(self) -> None: OSError If copying fails. """ - # Look for slopit bundle in dist directory + # look for slopit bundle in dist directory dist_dir = Path(__file__).parent / "dist" bundle_path = dist_dir / "slopit-bundle.js" @@ -596,3 +612,92 @@ def _copy_slopit_bundle(self) -> None: f"Failed to copy slopit bundle to {output_path}: {e}. " f"Check write permissions." ) from e + + def _detect_span_usage( + self, + items: dict[UUID, Item], + templates: dict[UUID, ItemTemplate], + ) -> bool: + """Detect whether any items or templates use span features. + + Parameters + ---------- + items : dict[UUID, Item] + Items dictionary. + templates : dict[UUID, ItemTemplate] + Templates dictionary. + + Returns + ------- + bool + True if spans are used. + """ + # check experiment type + if self.config.experiment_type == "span_labeling": + return True + + # check items for span data + for item in items.values(): + if item.spans or item.tokenized_elements: + return True + + # check templates for span_spec + for template in templates.values(): + if template.task_spec.span_spec is not None: + return True + + return False + + def _detect_wikidata_usage( + self, + templates: dict[UUID, ItemTemplate], + ) -> bool: + """Detect whether any templates use Wikidata label source. + + Parameters + ---------- + templates : dict[UUID, ItemTemplate] + Templates dictionary. + + Returns + ------- + bool + True if Wikidata is used. + """ + for template in templates.values(): + if template.task_spec.span_spec is not None: + spec = template.task_spec.span_spec + if spec.label_source == "wikidata": + return True + if spec.relation_label_source == "wikidata": + return True + return False + + def _copy_span_plugin_scripts(self, include_wikidata: bool = False) -> None: + """Copy span plugin scripts from compiled dist/ to js/ directory. + + Parameters + ---------- + include_wikidata : bool + Whether to include the Wikidata search script. + """ + dist_dir = Path(__file__).parent / "dist" + + # create subdirectories + (self.output_dir / "js" / "plugins").mkdir(parents=True, exist_ok=True) + (self.output_dir / "js" / "lib").mkdir(parents=True, exist_ok=True) + + scripts = [ + ("plugins/span-label.js", "js/plugins/span-label.js"), + ("lib/span-renderer.js", "js/lib/span-renderer.js"), + ] + + if include_wikidata: + scripts.append(("lib/wikidata-search.js", "js/lib/wikidata-search.js")) + + for src_name, dest_name in scripts: + src_path = dist_dir / src_name + dest_path = self.output_dir / dest_name + if src_path.exists(): + dest_path.write_text(src_path.read_text()) + # silently skip if not built yet (TypeScript may not be compiled) diff --git a/bead/deployment/jspsych/package.json b/bead/deployment/jspsych/package.json index 1523da5..ac2e8ae 100644 --- a/bead/deployment/jspsych/package.json +++ b/bead/deployment/jspsych/package.json @@ -1,6 +1,6 @@ { "name": "@bead/jspsych-deployment", - "version": "0.1.0", + "version": "0.2.0", "description": "TypeScript plugins and utilities for bead jsPsych experiment deployment", "private": true, "type": "module", @@ -16,6 +16,7 @@ }, "scripts": { "build": "tsup", + "build:gallery": "tsup --config tsup.gallery.config.ts", "build:watch": "tsup --watch", "typecheck": "tsc --noEmit", "lint": "biome lint src", diff --git a/bead/deployment/jspsych/src/gallery/gallery-bundle.ts b/bead/deployment/jspsych/src/gallery/gallery-bundle.ts new file mode 100644 index 0000000..042de7e --- /dev/null +++ b/bead/deployment/jspsych/src/gallery/gallery-bundle.ts @@ -0,0 +1,47 @@ +/** + * Gallery bundle entry point + * + * Registers all bead jsPsych plugins as window globals so they can be + * loaded via a single {% endif %} + + {% if span_enabled %} + + + + {% if span_wikidata %} + + {% endif %} + {% endif %}
diff --git a/bead/deployment/jspsych/trials.py b/bead/deployment/jspsych/trials.py index 15ae59f..a946e34 100644 --- a/bead/deployment/jspsych/trials.py +++ b/bead/deployment/jspsych/trials.py @@ -2,11 +2,15 @@ This module provides functions to generate jsPsych trial objects from Item models. It supports various trial types including rating scales, -forced choice, and binary choice trials. +forced choice, binary choice, and span labeling trials. Composite tasks +(e.g., rating with span highlights) are also supported. """ from __future__ import annotations +import re +from dataclasses import dataclass + from bead.data.base import JsonValue from bead.deployment.jspsych.config import ( ChoiceConfig, @@ -15,9 +19,11 @@ ExperimentConfig, InstructionsConfig, RatingScaleConfig, + SpanDisplayConfig, ) from bead.items.item import Item from bead.items.item_template import ItemTemplate +from bead.items.spans import Span def _serialize_item_metadata( @@ -38,19 +44,19 @@ def _serialize_item_metadata( Metadata dictionary containing all item and template fields. """ return { - # Item identification + # item identification "item_id": str(item.id), "item_created": item.created_at.isoformat(), "item_modified": item.modified_at.isoformat(), - # Item template reference + # item template reference "item_template_id": str(item.item_template_id), - # Filled template references + # filled template references "filled_template_refs": [str(ref) for ref in item.filled_template_refs], - # Options (for forced_choice/multi_select) + # options (for forced_choice/multi_select) "options": list(item.options), - # Rendered elements + # rendered elements "rendered_elements": dict(item.rendered_elements), - # Unfilled slots (for cloze tasks) + # unfilled slots (for cloze tasks) "unfilled_slots": [ { "slot_name": slot.slot_name, @@ -59,7 +65,7 @@ def _serialize_item_metadata( } for slot in item.unfilled_slots ], - # Model outputs + # model outputs "model_outputs": [ { "model_name": output.model_name, @@ -72,18 +78,18 @@ def _serialize_item_metadata( } for output in item.model_outputs ], - # Constraint satisfaction + # constraint satisfaction "constraint_satisfaction": { str(k): v for k, v in item.constraint_satisfaction.items() }, - # Item-specific metadata + # item-specific metadata "item_metadata": dict(item.item_metadata), - # Template information + # template information "template_name": template.name, "template_description": template.description, "judgment_type": template.judgment_type, "task_type": template.task_type, - # Template elements + # template elements "template_elements": [ { "element_type": elem.element_type, @@ -99,9 +105,9 @@ def _serialize_item_metadata( } for elem in template.elements ], - # Template constraints + # template constraints "template_constraints": [str(c) for c in template.constraints], - # Task specification + # task specification "task_spec": { "prompt": template.task_spec.prompt, "scale_bounds": template.task_spec.scale_bounds, @@ -112,7 +118,7 @@ def _serialize_item_metadata( "text_validation_pattern": template.task_spec.text_validation_pattern, "max_length": template.task_spec.max_length, }, - # Presentation specification + # presentation specification "presentation_spec": { "mode": template.presentation_spec.mode, "chunking": ( @@ -146,10 +152,86 @@ def _serialize_item_metadata( ), "display_format": template.presentation_spec.display_format, }, - # Presentation order + # presentation order "presentation_order": template.presentation_order, - # Template metadata + # template metadata "template_metadata": dict(template.template_metadata), + # span annotation data + "spans": [ + { + "span_id": span.span_id, + "segments": [ + { + "element_name": seg.element_name, + "indices": seg.indices, + } + for seg in span.segments + ], + "head_index": span.head_index, + "label": ( + { + "label": span.label.label, + "label_id": span.label.label_id, + "confidence": span.label.confidence, + } + if span.label + else None + ), + "span_type": span.span_type, + "span_metadata": dict(span.span_metadata), + } + for span in item.spans + ], + "span_relations": [ + { + "relation_id": rel.relation_id, + "source_span_id": rel.source_span_id, + "target_span_id": rel.target_span_id, + "label": ( + { + "label": rel.label.label, + "label_id": rel.label.label_id, + "confidence": rel.label.confidence, + } + if rel.label + else None + ), + "directed": rel.directed, + "relation_metadata": dict(rel.relation_metadata), + } + for rel in item.span_relations + ], + "tokenized_elements": dict(item.tokenized_elements), + "token_space_after": {k: list(v) for k, v in item.token_space_after.items()}, + "span_spec": ( + { + "index_mode": template.task_spec.span_spec.index_mode, + "interaction_mode": template.task_spec.span_spec.interaction_mode, + "label_source": template.task_spec.span_spec.label_source, + "labels": template.task_spec.span_spec.labels, + "label_colors": template.task_spec.span_spec.label_colors, + "allow_overlapping": template.task_spec.span_spec.allow_overlapping, + "min_spans": template.task_spec.span_spec.min_spans, + "max_spans": template.task_spec.span_spec.max_spans, + "enable_relations": template.task_spec.span_spec.enable_relations, + "relation_label_source": ( + template.task_spec.span_spec.relation_label_source + ), + "relation_labels": template.task_spec.span_spec.relation_labels, + "relation_directed": template.task_spec.span_spec.relation_directed, + "min_relations": template.task_spec.span_spec.min_relations, + "max_relations": template.task_spec.span_spec.max_relations, + "wikidata_language": template.task_spec.span_spec.wikidata_language, + "wikidata_entity_types": ( + template.task_spec.span_spec.wikidata_entity_types + ), + "wikidata_result_limit": ( + template.task_spec.span_spec.wikidata_result_limit + ), + } + if template.task_spec.span_spec + else None + ), } @@ -212,24 +294,65 @@ def create_trial( >>> rating_config = RatingScaleConfig() >>> trial = create_trial(item, template, config, 0, rating_config=rating_config) >>> trial["type"] - 'html-slider-response' + 'bead-slider-rating' """ + # standalone span_labeling experiment type + if experiment_config.experiment_type == "span_labeling": + span_display = experiment_config.span_display or SpanDisplayConfig() + return _create_span_labeling_trial(item, template, span_display, trial_number) + + # for composite tasks: detect spans and use span-enhanced stimulus HTML + has_spans = bool(item.spans) and bool( + template.task_spec.span_spec if template.task_spec else False + ) + + # resolve span display config for composite tasks with spans + span_display = experiment_config.span_display or SpanDisplayConfig() + if experiment_config.experiment_type == "likert_rating": if rating_config is None: raise ValueError("rating_config required for likert_rating experiments") - return _create_likert_trial(item, template, rating_config, trial_number) + return _create_likert_trial( + item, + template, + rating_config, + trial_number, + has_spans=has_spans, + span_display=span_display, + ) elif experiment_config.experiment_type == "slider_rating": if rating_config is None: raise ValueError("rating_config required for slider_rating experiments") - return _create_slider_trial(item, template, rating_config, trial_number) + return _create_slider_trial( + item, + template, + rating_config, + trial_number, + has_spans=has_spans, + span_display=span_display, + ) elif experiment_config.experiment_type == "binary_choice": if choice_config is None: raise ValueError("choice_config required for binary_choice experiments") - return _create_binary_choice_trial(item, template, choice_config, trial_number) + return _create_binary_choice_trial( + item, + template, + choice_config, + trial_number, + has_spans=has_spans, + span_display=span_display, + ) elif experiment_config.experiment_type == "forced_choice": if choice_config is None: raise ValueError("choice_config required for forced_choice experiments") - return _create_forced_choice_trial(item, template, choice_config, trial_number) + return _create_forced_choice_trial( + item, + template, + choice_config, + trial_number, + has_spans=has_spans, + span_display=span_display, + ) else: raise ValueError( f"Unknown experiment type: {experiment_config.experiment_type}" @@ -241,6 +364,8 @@ def _create_likert_trial( template: ItemTemplate, config: RatingScaleConfig, trial_number: int, + has_spans: bool = False, + span_display: SpanDisplayConfig | None = None, ) -> dict[str, JsonValue]: """Create a Likert rating trial. @@ -254,42 +379,53 @@ def _create_likert_trial( Rating scale configuration. trial_number : int The trial number. + has_spans : bool + Whether the item has span annotations. + span_display : SpanDisplayConfig | None + Span display configuration. Returns ------- dict[str, JsonValue] - A jsPsych html-button-response trial object. + A jsPsych bead-rating trial object. """ - # Generate stimulus HTML from rendered elements - stimulus_html = _generate_stimulus_html(item) - - # Generate button labels for Likert scale - labels: list[str] = [] - for i in range(config.scale.min, config.scale.max + 1, config.step): - if config.show_numeric_labels: - labels.append(str(i)) - else: - labels.append("") - - prompt_html = ( - f'

' - f'{config.min_label}' - f'{config.max_label}' - f"

" - ) - - # Serialize complete metadata + # generate stimulus HTML from rendered elements + if has_spans and span_display: + stimulus_html = _generate_span_stimulus_html(item, span_display) + else: + stimulus_html = _generate_stimulus_html(item) + + # build scale labels dict for endpoint labels + # keys are stringified ints (JSON object keys are always strings) + scale_labels: dict[str, JsonValue] = {} + if config.min_label: + scale_labels[str(config.scale.min)] = config.min_label + if config.max_label: + scale_labels[str(config.scale.max)] = config.max_label + + # build prompt: stimulus HTML + task prompt if available + prompt = stimulus_html + if template.task_spec and template.task_spec.prompt: + task_prompt = template.task_spec.prompt + if has_spans and span_display: + color_map = _assign_span_colors(item.spans, span_display) + task_prompt = _resolve_prompt_references(task_prompt, item, color_map) + prompt += f'

{task_prompt}

' + + # serialize complete metadata metadata = _serialize_item_metadata(item, template) metadata["trial_number"] = trial_number metadata["trial_type"] = "likert_rating" return { - "type": "html-button-response", - "stimulus": stimulus_html, - "choices": labels, - "prompt": prompt_html, - "data": metadata, - "button_html": '', + "type": "bead-rating", + "prompt": prompt, + "scale_min": config.scale.min, + "scale_max": config.scale.max, + "scale_labels": scale_labels, + "require_response": config.required, + "button_label": "Continue", + "metadata": metadata, } @@ -298,6 +434,8 @@ def _create_slider_trial( template: ItemTemplate, config: RatingScaleConfig, trial_number: int, + has_spans: bool = False, + span_display: SpanDisplayConfig | None = None, ) -> dict[str, JsonValue]: """Create a slider rating trial. @@ -311,29 +449,46 @@ def _create_slider_trial( Rating scale configuration. trial_number : int The trial number. + has_spans : bool + Whether the item has span annotations. + span_display : SpanDisplayConfig | None + Span display configuration. Returns ------- dict[str, JsonValue] - A jsPsych html-slider-response trial object. + A jsPsych bead-slider-rating trial object. """ - stimulus_html = _generate_stimulus_html(item) - - # Serialize complete metadata + if has_spans and span_display: + stimulus_html = _generate_span_stimulus_html(item, span_display) + else: + stimulus_html = _generate_stimulus_html(item) + + # build prompt: stimulus HTML + resolved task prompt + prompt_html = stimulus_html + if template.task_spec and template.task_spec.prompt: + task_prompt = template.task_spec.prompt + if has_spans and span_display: + color_map = _assign_span_colors(item.spans, span_display) + task_prompt = _resolve_prompt_references(task_prompt, item, color_map) + prompt_html += f'

{task_prompt}

' + + # serialize complete metadata metadata = _serialize_item_metadata(item, template) metadata["trial_number"] = trial_number metadata["trial_type"] = "slider_rating" return { - "type": "html-slider-response", - "stimulus": stimulus_html, + "type": "bead-slider-rating", + "prompt": prompt_html, "labels": [config.min_label, config.max_label], - "min": config.scale.min, - "max": config.scale.max, + "slider_min": config.scale.min, + "slider_max": config.scale.max, "step": config.step, "slider_start": (config.scale.min + config.scale.max) // 2, "require_movement": config.required, - "data": metadata, + "button_label": "Continue", + "metadata": metadata, } @@ -342,6 +497,8 @@ def _create_binary_choice_trial( template: ItemTemplate, config: ChoiceConfig, trial_number: int, + has_spans: bool = False, + span_display: SpanDisplayConfig | None = None, ) -> dict[str, JsonValue]: """Create a binary choice trial. @@ -355,26 +512,43 @@ def _create_binary_choice_trial( Choice configuration. trial_number : int The trial number. + has_spans : bool + Whether the item has span annotations. + span_display : SpanDisplayConfig | None + Span display configuration. Returns ------- dict[str, JsonValue] - A jsPsych html-button-response trial object. + A jsPsych bead-binary-choice trial object. """ - stimulus_html = _generate_stimulus_html(item) + if has_spans and span_display: + stimulus_html = _generate_span_stimulus_html(item, span_display) + else: + stimulus_html = _generate_stimulus_html(item) - # Serialize complete metadata + # serialize complete metadata metadata = _serialize_item_metadata(item, template) metadata["trial_number"] = trial_number metadata["trial_type"] = "binary_choice" + prompt = ( + template.task_spec.prompt + if template.task_spec + else "Is this sentence acceptable?" + ) + + if has_spans and span_display: + color_map = _assign_span_colors(item.spans, span_display) + prompt = _resolve_prompt_references(prompt, item, color_map) + return { - "type": "html-button-response", + "type": "bead-binary-choice", + "prompt": prompt, "stimulus": stimulus_html, "choices": ["Yes", "No"], - "data": metadata, - "button_html": config.button_html - or '', + "require_response": config.required, + "metadata": metadata, } @@ -383,6 +557,8 @@ def _create_forced_choice_trial( template: ItemTemplate, config: ChoiceConfig, trial_number: int, + has_spans: bool = False, + span_display: SpanDisplayConfig | None = None, ) -> dict[str, JsonValue]: """Create a forced choice trial. @@ -397,29 +573,28 @@ def _create_forced_choice_trial( Choice configuration. trial_number : int The trial number. + has_spans : bool + Whether the item has span annotations. + span_display : SpanDisplayConfig | None + Span display configuration. Returns ------- dict[str, JsonValue] - A jsPsych html-button-response trial object. + A jsPsych bead-forced-choice trial object. Raises ------ ValueError If item.options is empty or has fewer than 2 options. """ - # For forced choice, use the prompt from the template as the stimulus - # (not the choices themselves) prompt = ( template.task_spec.prompt if template.task_spec else "Which option do you choose?" ) - stimulus_html = ( - f'

{prompt}

' - ) - # Extract choices from item.options + # extract alternatives from item.options (single source of truth) if not item.options: raise ValueError( f"Item {item.id} has no options. " @@ -431,20 +606,30 @@ def _create_forced_choice_trial( f"Item {item.id} has only {len(item.options)} option(s). " f"Forced choice items require at least 2 options." ) - choices = list(item.options) - # Serialize complete metadata + # for composite span tasks, render span-highlighted HTML into each alternative + alternatives: list[str] = list(item.options) + if has_spans and span_display: + color_map = _assign_span_colors(item.spans, span_display) + prompt = _resolve_prompt_references(prompt, item, color_map) + stimulus_html = _generate_span_stimulus_html(item, span_display) + prompt = stimulus_html + f"

{prompt}

" + + # serialize complete metadata metadata = _serialize_item_metadata(item, template) metadata["trial_number"] = trial_number metadata["trial_type"] = "forced_choice" return { - "type": "html-button-response", - "stimulus": stimulus_html, - "choices": choices, - "data": metadata, - "button_html": config.button_html - or '', + "type": "bead-forced-choice", + "prompt": prompt, + "alternatives": alternatives, + "layout": config.layout, + "randomize_position": config.randomize_choice_order, + "enable_keyboard": True, + "require_response": config.required, + "button_label": "Select", + "metadata": metadata, } @@ -466,18 +651,18 @@ def _generate_stimulus_html(item: Item, include_all: bool = True) -> str: if not item.rendered_elements: return "

No stimulus available

" - # Get rendered elements in a consistent order + # get rendered elements in a consistent order sorted_keys = sorted(item.rendered_elements.keys()) if include_all: - # Include all rendered elements + # include all rendered elements elements = [ f'

{item.rendered_elements[k]}

' for k in sorted_keys ] return '
' + "".join(elements) + "
" else: - # Include only the first element (for forced choice where others are options) + # include only the first element (for forced choice where others are options) first_key = sorted_keys[0] element_html = item.rendered_elements[first_key] return f'

{element_html}

' @@ -676,7 +861,7 @@ def create_instructions_trial( 2 """ if isinstance(instructions, str): - # Simple string: use html-keyboard-response (backward compatible) + # simple string: use html-keyboard-response (backward compatible) stimulus_html = ( f'
' f"

Instructions

" @@ -692,7 +877,7 @@ def create_instructions_trial( }, } - # InstructionsConfig: use jsPsych instructions plugin + # use jsPsych instructions plugin for InstructionsConfig (multi-page) pages: list[str] = [] for i, page in enumerate(instructions.pages): page_html = '
' @@ -700,7 +885,7 @@ def create_instructions_trial( page_html += f"

{page.title}

" page_html += f"
{page.content}
" - # Add page numbers if enabled + # add page numbers if enabled if instructions.show_page_numbers and len(instructions.pages) > 1: page_html += ( f'

Page {i + 1} of {len(instructions.pages)}

' @@ -721,3 +906,456 @@ def create_instructions_trial( "trial_type": "instructions", }, } + + +@dataclass(frozen=True) +class SpanColorMap: + """Light and dark color assignments for spans. + + Attributes + ---------- + light_by_span_id : dict[str, str] + Light (background) colors keyed by span_id. + dark_by_span_id : dict[str, str] + Dark (badge) colors keyed by span_id. + light_by_label : dict[str, str] + Light (background) colors keyed by label name. + dark_by_label : dict[str, str] + Dark (badge) colors keyed by label name. + """ + + light_by_span_id: dict[str, str] + dark_by_span_id: dict[str, str] + light_by_label: dict[str, str] + dark_by_label: dict[str, str] + + +def _assign_span_colors( + spans: list[Span], + span_display: SpanDisplayConfig, +) -> SpanColorMap: + """Assign light and dark colors to spans. + + Same label gets the same color pair. Unlabeled spans each get + their own color. Index-aligned light/dark palettes produce + matching background and badge colors. + + Parameters + ---------- + spans : list[Span] + Spans to assign colors to. + span_display : SpanDisplayConfig + Display configuration with light and dark palettes. + + Returns + ------- + SpanColorMap + Color assignments keyed by span_id and by label. + """ + light_palette = span_display.color_palette + dark_palette = span_display.dark_color_palette + + light_by_label: dict[str, str] = {} + dark_by_label: dict[str, str] = {} + light_by_span_id: dict[str, str] = {} + dark_by_span_id: dict[str, str] = {} + color_idx = 0 + + for span in spans: + if span.label and span.label.label: + label_name = span.label.label + if label_name not in light_by_label: + light_by_label[label_name] = light_palette[ + color_idx % len(light_palette) + ] + dark_by_label[label_name] = dark_palette[color_idx % len(dark_palette)] + color_idx += 1 + light_by_span_id[span.span_id] = light_by_label[label_name] + dark_by_span_id[span.span_id] = dark_by_label[label_name] + else: + light_by_span_id[span.span_id] = light_palette[ + color_idx % len(light_palette) + ] + dark_by_span_id[span.span_id] = dark_palette[color_idx % len(dark_palette)] + color_idx += 1 + + return SpanColorMap( + light_by_span_id=light_by_span_id, + dark_by_span_id=dark_by_span_id, + light_by_label=light_by_label, + dark_by_label=dark_by_label, + ) + + +def _generate_span_stimulus_html( + item: Item, + span_display: SpanDisplayConfig, +) -> str: + """Generate HTML with span-highlighted tokens for composite tasks. + + Renders tokens as individually wrapped ```` elements with + highlight classes and data attributes for span identification. + + Parameters + ---------- + item : Item + Item with spans and tokenized_elements. + span_display : SpanDisplayConfig + Visual configuration. + + Returns + ------- + str + HTML string with span-highlighted token elements. + """ + if not item.tokenized_elements: + return _generate_stimulus_html(item) + + html_parts: list[str] = ['
'] + + sorted_keys = sorted(item.tokenized_elements.keys()) + for element_name in sorted_keys: + tokens = item.tokenized_elements[element_name] + space_flags = item.token_space_after.get(element_name, []) + + # build token-to-span mapping + token_spans: dict[int, list[str]] = {} + for span in item.spans: + for segment in span.segments: + if segment.element_name == element_name: + for idx in segment.indices: + if idx not in token_spans: + token_spans[idx] = [] + token_spans[idx].append(span.span_id) + + # assign colors (shared with prompt reference resolution) + color_map = _assign_span_colors(item.spans, span_display) + span_colors = color_map.light_by_span_id + + html_parts.append( + f'
' + ) + + for i, token_text in enumerate(tokens): + span_ids = token_spans.get(i, []) + n_spans = len(span_ids) + + classes = ["bead-token"] + if n_spans > 0: + classes.append("highlighted") + + fallback = span_display.color_palette[0] + style_parts: list[str] = [] + if n_spans == 1: + color = span_colors.get(span_ids[0], fallback) + style_parts.append(f"background-color: {color}") + elif n_spans > 1: + # layer multiple spans + colors = [span_colors.get(sid, fallback) for sid in span_ids] + gradient = ", ".join(colors) + style_parts.append(f"background: linear-gradient({gradient})") + + style_attr = f' style="{"; ".join(style_parts)}"' if style_parts else "" + span_id_attr = f' data-span-ids="{",".join(span_ids)}"' if span_ids else "" + count_attr = f' data-span-count="{n_spans}"' if n_spans > 0 else "" + + html_parts.append( + f'" + f"{token_text}" + ) + + # add spacing + if i < len(space_flags) and space_flags[i]: + html_parts.append(" ") + + html_parts.append("
") + + html_parts.append("
") + return "".join(html_parts) + + +# prompt span reference resolution + +_SPAN_REF_PATTERN = re.compile(r"\[\[([^\]:]+?)(?::([^\]]+?))?\]\]") + + +@dataclass(frozen=True) +class _SpanReference: + """A parsed ``[[label]]`` or ``[[label:text]]`` reference.""" + + label: str + display_text: str | None + match_start: int + match_end: int + + +def _parse_prompt_references(prompt: str) -> list[_SpanReference]: + """Parse ``[[label]]`` and ``[[label:text]]`` references from a prompt. + + Parameters + ---------- + prompt : str + Prompt string potentially containing span references. + + Returns + ------- + list[_SpanReference] + Parsed references in order of appearance. + """ + return [ + _SpanReference( + label=m.group(1).strip(), + display_text=m.group(2).strip() if m.group(2) else None, + match_start=m.start(), + match_end=m.end(), + ) + for m in _SPAN_REF_PATTERN.finditer(prompt) + ] + + +def _auto_fill_span_text(label: str, item: Item) -> str: + """Reconstruct display text from a span's tokens. + + Finds the first span whose label matches, collects its token + indices from the first segment's element, and joins them + respecting ``token_space_after``. + + Parameters + ---------- + label : str + Span label to look up. + item : Item + Item with spans, tokenized_elements, and token_space_after. + + Returns + ------- + str + Reconstructed text from the span's tokens. + + Raises + ------ + ValueError + If no span with the given label exists or tokens are unavailable. + """ + target_span: Span | None = None + for span in item.spans: + if span.label and span.label.label == label: + target_span = span + break + + if target_span is None: + available = [s.label.label for s in item.spans if s.label and s.label.label] + raise ValueError( + f"Prompt references span label '{label}' but no span with " + f"that label exists. Available labels: {available}" + ) + + parts: list[str] = [] + for segment in target_span.segments: + element_name = segment.element_name + tokens = item.tokenized_elements.get(element_name, []) + space_flags = item.token_space_after.get(element_name, []) + sorted_indices = sorted(segment.indices) + for i, idx in enumerate(sorted_indices): + if idx < len(tokens): + parts.append(tokens[idx]) + if ( + i < len(sorted_indices) - 1 + and idx < len(space_flags) + and space_flags[idx] + ): + parts.append(" ") + + return "".join(parts) + + +def _resolve_prompt_references( + prompt: str, + item: Item, + color_map: SpanColorMap, +) -> str: + """Replace ``[[label]]`` references in a prompt with highlighted HTML. + + Parameters + ---------- + prompt : str + Prompt template with ``[[label]]`` or ``[[label:text]]`` refs. + item : Item + Item with spans and tokenized_elements. + color_map : SpanColorMap + Pre-computed color assignments from ``_assign_span_colors()``. + + Returns + ------- + str + Prompt with references replaced by highlighted HTML. + + Raises + ------ + ValueError + If a reference points to a nonexistent label. + """ + refs = _parse_prompt_references(prompt) + if not refs: + return prompt + + available = {s.label.label for s in item.spans if s.label and s.label.label} + for ref in refs: + if ref.label not in available: + raise ValueError( + f"Prompt references span label '{ref.label}' but no span " + f"with that label exists. Available labels: " + f"{sorted(available)}" + ) + + result = prompt + for ref in reversed(refs): + display = ( + ref.display_text + if ref.display_text is not None + else _auto_fill_span_text(ref.label, item) + ) + light = color_map.light_by_label.get(ref.label, "#BBDEFB") + dark = color_map.dark_by_label.get(ref.label, "#1565C0") + html = ( + f'' + f"{display}" + f'' + f"{ref.label}" + ) + result = result[: ref.match_start] + html + result[ref.match_end :] + + return result + + +def _create_span_labeling_trial( + item: Item, + template: ItemTemplate, + span_display: SpanDisplayConfig, + trial_number: int, +) -> dict[str, JsonValue]: + """Create a standalone span labeling trial. + + Uses the ``bead-span-label`` plugin for interactive or static span + annotation. + + Parameters + ---------- + item : Item + Item with span data. + template : ItemTemplate + Item template with span_spec. + span_display : SpanDisplayConfig + Visual configuration. + trial_number : int + Trial number. + + Returns + ------- + dict[str, JsonValue] + A jsPsych trial object using the bead-span-label plugin. + """ + metadata = _serialize_item_metadata(item, template) + metadata["trial_number"] = trial_number + metadata["trial_type"] = "span_labeling" + + prompt = ( + template.task_spec.prompt if template.task_spec else "Select and label spans" + ) + + if item.spans: + color_map = _assign_span_colors(item.spans, span_display) + prompt = _resolve_prompt_references(prompt, item, color_map) + + # serialize span data for the plugin + spans_data = [ + { + "span_id": span.span_id, + "segments": [ + {"element_name": seg.element_name, "indices": seg.indices} + for seg in span.segments + ], + "head_index": span.head_index, + "label": ( + { + "label": span.label.label, + "label_id": span.label.label_id, + "confidence": span.label.confidence, + } + if span.label + else None + ), + "span_type": span.span_type, + } + for span in item.spans + ] + + relations_data = [ + { + "relation_id": rel.relation_id, + "source_span_id": rel.source_span_id, + "target_span_id": rel.target_span_id, + "label": ( + { + "label": rel.label.label, + "label_id": rel.label.label_id, + "confidence": rel.label.confidence, + } + if rel.label + else None + ), + "directed": rel.directed, + } + for rel in item.span_relations + ] + + # serialize span_spec + span_spec_data = None + if template.task_spec.span_spec: + ss = template.task_spec.span_spec + span_spec_data = { + "index_mode": ss.index_mode, + "interaction_mode": ss.interaction_mode, + "label_source": ss.label_source, + "labels": ss.labels, + "label_colors": ss.label_colors, + "allow_overlapping": ss.allow_overlapping, + "min_spans": ss.min_spans, + "max_spans": ss.max_spans, + "enable_relations": ss.enable_relations, + "relation_label_source": ss.relation_label_source, + "relation_labels": ss.relation_labels, + "relation_directed": ss.relation_directed, + "min_relations": ss.min_relations, + "max_relations": ss.max_relations, + "wikidata_language": ss.wikidata_language, + "wikidata_entity_types": ss.wikidata_entity_types, + "wikidata_result_limit": ss.wikidata_result_limit, + } + + # serialize display config + display_config_data = { + "highlight_style": span_display.highlight_style, + "color_palette": span_display.color_palette, + "show_labels": span_display.show_labels, + "show_tooltips": span_display.show_tooltips, + "token_delimiter": span_display.token_delimiter, + "label_position": span_display.label_position, + } + + return { + "type": "bead-span-label", + "tokens": dict(item.tokenized_elements), + "space_after": {k: list(v) for k, v in item.token_space_after.items()}, + "spans": spans_data, + "relations": relations_data, + "span_spec": span_spec_data, + "display_config": display_config_data, + "prompt": prompt, + "button_label": "Continue", + "require_response": True, + "metadata": metadata, + } diff --git a/bead/deployment/jspsych/tsup.config.ts b/bead/deployment/jspsych/tsup.config.ts index 411e47f..5b74f63 100644 --- a/bead/deployment/jspsych/tsup.config.ts +++ b/bead/deployment/jspsych/tsup.config.ts @@ -6,9 +6,13 @@ export default defineConfig({ "plugins/rating": "src/plugins/rating.ts", "plugins/forced-choice": "src/plugins/forced-choice.ts", "plugins/cloze-dropdown": "src/plugins/cloze-dropdown.ts", + // Span labeling + "plugins/span-label": "src/plugins/span-label.ts", // Library "lib/list-distributor": "src/lib/list-distributor.ts", "lib/randomizer": "src/lib/randomizer.ts", + "lib/span-renderer": "src/lib/span-renderer.ts", + "lib/wikidata-search": "src/lib/wikidata-search.ts", // Slopit bundle (behavioral capture) "slopit-bundle": "src/slopit/index.ts", }, diff --git a/bead/deployment/jspsych/tsup.gallery.config.ts b/bead/deployment/jspsych/tsup.gallery.config.ts new file mode 100644 index 0000000..9612b0b --- /dev/null +++ b/bead/deployment/jspsych/tsup.gallery.config.ts @@ -0,0 +1,24 @@ +import { defineConfig } from "tsup"; + +export default defineConfig({ + entry: { + "gallery-bundle": "src/gallery/gallery-bundle.ts", + }, + format: ["iife"], + globalName: "BeadGallery", + dts: false, + sourcemap: false, + clean: false, + target: "es2020", + splitting: false, + treeshake: true, + minify: false, + // jspsych is loaded from CDN as a global; keep it external + external: ["jspsych"], + outDir: "dist", + esbuildOptions(options) { + options.banner = { + js: "/* @bead/jspsych-gallery - Interactive demo bundle */", + }; + }, +}); diff --git a/bead/deployment/jspsych/ui/styles.py b/bead/deployment/jspsych/ui/styles.py index 6210492..65811a9 100644 --- a/bead/deployment/jspsych/ui/styles.py +++ b/bead/deployment/jspsych/ui/styles.py @@ -407,5 +407,30 @@ def generate_css( .bead-choice-button {{ width: 100%; }} + +/* Span-highlighted prompt references */ +.bead-q-highlight {{ + position: relative; + padding: 1px 4px; + border-radius: 3px; + font-weight: 500; + margin-bottom: 0.6rem; +}} + +.bead-q-chip {{ + position: absolute; + bottom: -0.6rem; + right: -2px; + display: inline-flex; + align-items: center; + padding: 0px 5px; + border-radius: 0.6rem; + font-size: 0.6rem; + font-weight: 500; + color: white; + white-space: nowrap; + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.15); + line-height: 1.5; +}} """ return css diff --git a/bead/items/__init__.py b/bead/items/__init__.py index c44e02f..8c0ae9f 100644 --- a/bead/items/__init__.py +++ b/bead/items/__init__.py @@ -1,5 +1,7 @@ """Item models for experimental stimuli.""" +from __future__ import annotations + from bead.items.item import Item, ItemCollection, ModelOutput, UnfilledSlot from bead.items.item_template import ( ChunkingSpec, @@ -16,6 +18,16 @@ TaskType, TimingParams, ) +from bead.items.spans import ( + LabelSourceType, + Span, + SpanIndexMode, + SpanInteractionMode, + SpanLabel, + SpanRelation, + SpanSegment, + SpanSpec, +) __all__ = [ # Item template types @@ -37,4 +49,13 @@ "ItemCollection", "ModelOutput", "UnfilledSlot", + # Span types + "LabelSourceType", + "Span", + "SpanIndexMode", + "SpanInteractionMode", + "SpanLabel", + "SpanRelation", + "SpanSegment", + "SpanSpec", ] diff --git a/bead/items/item.py b/bead/items/item.py index 6e54a9f..f0faacc 100644 --- a/bead/items/item.py +++ b/bead/items/item.py @@ -4,17 +4,18 @@ from uuid import UUID -from pydantic import Field, field_validator +from pydantic import Field, field_validator, model_validator from bead.data.base import BeadBaseModel +from bead.items.spans import Span, SpanRelation -# Type aliases for JSON-serializable metadata values +# type aliases for JSON-serializable metadata values type MetadataValue = ( str | int | float | bool | None | dict[str, MetadataValue] | list[MetadataValue] ) -# Factory functions for default values with explicit types +# factory functions for default values with explicit types def _empty_uuid_list() -> list[UUID]: """Return empty UUID list.""" return [] @@ -55,6 +56,26 @@ def _empty_str_list() -> list[str]: return [] +def _empty_tokenized_dict() -> dict[str, list[str]]: + """Return empty tokenized elements dict.""" + return {} + + +def _empty_space_after_dict() -> dict[str, list[bool]]: + """Return empty space_after dict.""" + return {} + + +def _empty_span_list() -> list[Span]: + """Return empty Span list.""" + return [] + + +def _empty_span_relation_list() -> list[SpanRelation]: + """Return empty SpanRelation list.""" + return [] + + class UnfilledSlot(BeadBaseModel): """An unfilled slot in a cloze task item. @@ -212,6 +233,16 @@ class Item(BeadBaseModel): Constraint UUIDs mapped to satisfaction status. item_metadata : dict[str, MetadataValue] Additional metadata for this item. + spans : list[Span] + Span annotations for this item (default: empty). + span_relations : list[SpanRelation] + Relations between spans, directed or undirected (default: empty). + tokenized_elements : dict[str, list[str]] + Tokenized text for span indexing, keyed by element name + (default: empty). + token_space_after : dict[str, list[bool]] + Per-token space_after flags for artifact-free rendering + (default: empty). Examples -------- @@ -263,6 +294,59 @@ class Item(BeadBaseModel): item_metadata: dict[str, MetadataValue] = Field( default_factory=_empty_metadata_dict, description="Additional metadata" ) + # span annotation fields (all default empty, backward compatible) + spans: list[Span] = Field( + default_factory=_empty_span_list, + description="Span annotations for this item", + ) + span_relations: list[SpanRelation] = Field( + default_factory=_empty_span_relation_list, + description="Relations between spans (directed or undirected)", + ) + tokenized_elements: dict[str, list[str]] = Field( + default_factory=_empty_tokenized_dict, + description="Tokenized text for span indexing (element_name -> tokens)", + ) + token_space_after: dict[str, list[bool]] = Field( + default_factory=_empty_space_after_dict, + description="Per-token space_after flags for artifact-free rendering", + ) + + @model_validator(mode="after") + def validate_span_relations(self) -> Item: + """Validate all span_relations reference valid span_ids from spans. + + Returns + ------- + Item + Validated item. + + Raises + ------ + ValueError + If a relation references a span_id not present in spans. + """ + if self.span_relations: + if not self.spans: + raise ValueError( + "Item has span_relations but no spans. " + "All relations must reference existing spans." + ) + valid_ids = {s.span_id for s in self.spans} + for rel in self.span_relations: + if rel.source_span_id not in valid_ids: + raise ValueError( + f"SpanRelation '{rel.relation_id}' references " + f"source_span_id '{rel.source_span_id}' not found " + f"in item spans. Valid span_ids: {valid_ids}" + ) + if rel.target_span_id not in valid_ids: + raise ValueError( + f"SpanRelation '{rel.relation_id}' references " + f"target_span_id '{rel.target_span_id}' not found " + f"in item spans. Valid span_ids: {valid_ids}" + ) + return self def get_model_output( self, diff --git a/bead/items/item_template.py b/bead/items/item_template.py index 6f6f283..7864d6d 100644 --- a/bead/items/item_template.py +++ b/bead/items/item_template.py @@ -8,14 +8,16 @@ from pydantic import Field, ValidationInfo, field_validator from bead.data.base import BeadBaseModel +from bead.items.spans import SpanSpec +from bead.tokenization.config import TokenizerConfig -# Type aliases for JSON-serializable metadata values +# type aliases for JSON-serializable metadata values type MetadataValue = ( str | int | float | bool | None | dict[str, MetadataValue] | list[MetadataValue] ) -# Factory functions for default values with explicit types +# factory functions for default values with explicit types def _empty_item_element_list() -> list[ItemElement]: """Return empty ItemElement list.""" return [] @@ -41,7 +43,7 @@ def _empty_uuid_list() -> list[UUID]: return [] -# Type aliases for judgment and task types +# type aliases for judgment and task types JudgmentType = Literal[ "acceptability", # Linguistic acceptability/grammaticality/naturalness "inference", # Semantic relationship (NLI: entailment/neutral/contradiction) @@ -49,6 +51,7 @@ def _empty_uuid_list() -> list[UUID]: "plausibility", # Likelihood/plausibility of events or statements "comprehension", # Understanding/recall of content "preference", # Subjective preference between alternatives + "extraction", # Extracting structured info (labeled spans) from text ] TaskType = Literal[ @@ -60,6 +63,7 @@ def _empty_uuid_list() -> list[UUID]: "categorical", # Pick from unordered categories (UI: dropdown, radio) "free_text", # Open-ended text (UI: text input, textarea) "cloze", # Fill-in-the-blank with unfilled slots (UI: inferred) + "span_labeling", # Select and label text spans (UI: token selection) ] ElementRefType = Literal["text", "filled_template_ref"] @@ -216,6 +220,9 @@ class TaskSpec(BeadBaseModel): Regular expression pattern for validating free_text responses. max_length : int | None Maximum character length for free_text responses. + span_spec : SpanSpec | None + Span labeling specification (for span_labeling tasks or + composite tasks with span overlays). Examples -------- @@ -273,6 +280,9 @@ class TaskSpec(BeadBaseModel): default=None, description="Regex pattern for text validation" ) max_length: int | None = Field(default=None, description="Maximum text length") + span_spec: SpanSpec | None = Field( + default=None, description="Span labeling specification" + ) @field_validator("prompt") @classmethod @@ -320,6 +330,9 @@ class PresentationSpec(BeadBaseModel): display with no fixed durations. display_format : dict[str, str | int | float | bool] Additional display formatting options. + tokenizer_config : TokenizerConfig | None + Display tokenizer configuration for span annotation. When set, + controls how text is tokenized for span indexing and display. Examples -------- @@ -360,6 +373,10 @@ class PresentationSpec(BeadBaseModel): default_factory=_empty_display_format_dict, description="Display formatting options", ) + tokenizer_config: TokenizerConfig | None = Field( + default=None, + description="Display tokenizer config for span annotation", + ) class ItemElement(BeadBaseModel): @@ -655,7 +672,7 @@ def validate_presentation_order( if v is None: return v - # Get elements from validation info + # get elements from validation info elements = info.data.get("elements", []) if not elements: return v @@ -663,14 +680,14 @@ def validate_presentation_order( element_names = {e.element_name for e in elements} order_names = set(v) - # Check for names in order that aren't in elements + # check for names in order that aren't in elements extra = order_names - element_names if extra: raise ValueError( f"presentation_order contains element names not in elements: {extra}" ) - # Check for names in elements that aren't in order + # check for names in elements that aren't in order missing = element_names - order_names if missing: raise ValueError( diff --git a/bead/items/span_labeling.py b/bead/items/span_labeling.py new file mode 100644 index 0000000..796e283 --- /dev/null +++ b/bead/items/span_labeling.py @@ -0,0 +1,438 @@ +"""Utilities for creating span labeling experimental items. + +This module provides language-agnostic utilities for creating items with +span annotations. Spans can be added to any existing item type (composability) +or used as standalone span labeling tasks. + +Integration Points +------------------ +- Active Learning: bead/active_learning/ (via alignment module) +- Deployment: bead/deployment/jspsych/ (span-label plugin) +- Tokenization: bead/tokenization/ (display-level tokens) +""" + +from __future__ import annotations + +import re +import warnings +from collections.abc import Callable +from uuid import UUID, uuid4 + +from bead.items.item import Item, MetadataValue +from bead.items.spans import ( + LabelSourceType, + Span, + SpanSpec, +) +from bead.tokenization.config import TokenizerConfig +from bead.tokenization.tokenizers import TokenizedText, create_tokenizer + +_SPAN_REF_PATTERN = re.compile(r"\[\[([^\]:]+?)(?::([^\]]+?))?\]\]") + + +def tokenize_item( + item: Item, + tokenizer_config: TokenizerConfig | None = None, +) -> Item: + """Tokenize an item's rendered_elements. + + Populates ``tokenized_elements`` and ``token_space_after`` using the + configured tokenizer. Returns a new ``Item`` (does not mutate). + + Parameters + ---------- + item : Item + Item to tokenize. + tokenizer_config : TokenizerConfig | None + Tokenizer configuration. If None, uses default (spaCy English). + + Returns + ------- + Item + New item with populated tokenized_elements and token_space_after. + """ + if tokenizer_config is None: + tokenizer_config = TokenizerConfig() + + tokenize = create_tokenizer(tokenizer_config) + + tokenized_elements: dict[str, list[str]] = {} + token_space_after: dict[str, list[bool]] = {} + + for name, text in item.rendered_elements.items(): + result: TokenizedText = tokenize(text) + tokenized_elements[name] = result.token_texts + token_space_after[name] = result.space_after_flags + + # create new item with tokenization data + data = item.model_dump() + data["tokenized_elements"] = tokenized_elements + data["token_space_after"] = token_space_after + return Item(**data) + + +def _validate_span_indices( + spans: list[Span], + tokenized_elements: dict[str, list[str]], +) -> None: + """Validate span indices are within token bounds. + + Parameters + ---------- + spans : list[Span] + Spans to validate. + tokenized_elements : dict[str, list[str]] + Tokenized element data. + + Raises + ------ + ValueError + If any span index is out of bounds or references an unknown element. + """ + for span in spans: + for segment in span.segments: + if segment.element_name not in tokenized_elements: + raise ValueError( + f"Span '{span.span_id}' segment references element " + f"'{segment.element_name}' which is not in " + f"tokenized_elements. Available: " + f"{list(tokenized_elements.keys())}" + ) + n_tokens = len(tokenized_elements[segment.element_name]) + for idx in segment.indices: + if idx >= n_tokens: + raise ValueError( + f"Span '{span.span_id}' has index {idx} in element " + f"'{segment.element_name}' but element only has " + f"{n_tokens} tokens" + ) + + +def create_span_item( + text: str, + spans: list[Span], + prompt: str, + tokenizer_config: TokenizerConfig | None = None, + tokens: list[str] | None = None, + labels: list[str] | None = None, + span_spec: SpanSpec | None = None, + item_template_id: UUID | None = None, + metadata: dict[str, MetadataValue] | None = None, +) -> Item: + """Create a standalone span labeling item. + + Tokenizes text using config, validates span indices against tokens. + + Parameters + ---------- + text : str + The stimulus text. + spans : list[Span] + Pre-defined span annotations. + prompt : str + Question or instruction for the participant. + tokenizer_config : TokenizerConfig | None + Tokenizer configuration. Ignored if ``tokens`` is provided. + tokens : list[str] | None + Pre-tokenized text (overrides tokenizer). + labels : list[str] | None + Fixed label set for span labeling. + span_spec : SpanSpec | None + Span specification. If None, creates a default static spec. + item_template_id : UUID | None + Template ID. If None, generates a new UUID. + metadata : dict[str, MetadataValue] | None + Additional item metadata. + + Returns + ------- + Item + Span labeling item. + + Raises + ------ + ValueError + If text is empty or span indices are out of bounds. + """ + if not text or not text.strip(): + raise ValueError("text cannot be empty") + + if item_template_id is None: + item_template_id = uuid4() + + if span_spec is None: + span_spec = SpanSpec( + interaction_mode="static", + labels=labels, + ) + + # store span_spec in item metadata for downstream access + span_spec_data: dict[str, MetadataValue] = {} + for k, v in span_spec.model_dump(mode="json").items(): + span_spec_data[k] = v + + # tokenize + if tokens is not None: + tokenized_elements = {"text": tokens} + # infer space_after from text + token_space_after = {"text": _infer_space_after(tokens, text)} + else: + if tokenizer_config is None: + tokenizer_config = TokenizerConfig() + tokenize = create_tokenizer(tokenizer_config) + result = tokenize(text) + tokenized_elements = {"text": result.token_texts} + token_space_after = {"text": result.space_after_flags} + + # validate spans + _validate_span_indices(spans, tokenized_elements) + + item_metadata: dict[str, MetadataValue] = {"_span_spec": span_spec_data} + if metadata: + item_metadata.update(metadata) + + return Item( + item_template_id=item_template_id, + rendered_elements={"text": text, "prompt": prompt}, + spans=spans, + tokenized_elements=tokenized_elements, + token_space_after=token_space_after, + item_metadata=item_metadata, + ) + + +def create_interactive_span_item( + text: str, + prompt: str, + tokenizer_config: TokenizerConfig | None = None, + tokens: list[str] | None = None, + label_set: list[str] | None = None, + label_source: LabelSourceType = "fixed", + item_template_id: UUID | None = None, + metadata: dict[str, MetadataValue] | None = None, +) -> Item: + """Create an item for interactive span selection by participants. + + Parameters + ---------- + text : str + The stimulus text. + prompt : str + Instruction for the participant. + tokenizer_config : TokenizerConfig | None + Tokenizer configuration. + tokens : list[str] | None + Pre-tokenized text (overrides tokenizer). + label_set : list[str] | None + Fixed label set (when label_source is "fixed"). + label_source : LabelSourceType + Label source type ("fixed" or "wikidata"). + item_template_id : UUID | None + Template ID. If None, generates a new UUID. + metadata : dict[str, MetadataValue] | None + Additional item metadata. + + Returns + ------- + Item + Interactive span labeling item (no pre-defined spans). + """ + if not text or not text.strip(): + raise ValueError("text cannot be empty") + + if item_template_id is None: + item_template_id = uuid4() + + # build span spec from label parameters + span_spec = SpanSpec( + interaction_mode="interactive", + label_source=label_source, + labels=label_set, + ) + span_spec_data: dict[str, MetadataValue] = {} + for k, v in span_spec.model_dump(mode="json").items(): + span_spec_data[k] = v + + # tokenize + if tokens is not None: + tokenized_elements = {"text": tokens} + token_space_after = {"text": _infer_space_after(tokens, text)} + else: + if tokenizer_config is None: + tokenizer_config = TokenizerConfig() + tokenize = create_tokenizer(tokenizer_config) + result = tokenize(text) + tokenized_elements = {"text": result.token_texts} + token_space_after = {"text": result.space_after_flags} + + item_metadata: dict[str, MetadataValue] = {"_span_spec": span_spec_data} + if metadata: + item_metadata.update(metadata) + + return Item( + item_template_id=item_template_id, + rendered_elements={"text": text, "prompt": prompt}, + spans=[], + tokenized_elements=tokenized_elements, + token_space_after=token_space_after, + item_metadata=item_metadata, + ) + + +def add_spans_to_item( + item: Item, + spans: list[Span], + tokenizer_config: TokenizerConfig | None = None, + span_spec: SpanSpec | None = None, +) -> Item: + """Add span annotations to any existing item. + + This is the key composability function: any item (rating, forced choice, + binary, etc.) can have spans added as an overlay. Tokenizes + rendered_elements if not already tokenized. Returns a new Item. + + Parameters + ---------- + item : Item + Existing item to add spans to. + spans : list[Span] + Span annotations to add. + tokenizer_config : TokenizerConfig | None + Tokenizer configuration (used only if item lacks tokenization). + span_spec : SpanSpec | None + Span specification. + + Returns + ------- + Item + New item with spans added. + + Raises + ------ + ValueError + If span indices are out of bounds. + """ + # tokenize if needed + if not item.tokenized_elements: + item = tokenize_item(item, tokenizer_config) + + # validate spans + _validate_span_indices(spans, item.tokenized_elements) + + # warn if prompt contains [[label]] references to nonexistent span labels + prompt_text = item.rendered_elements.get("prompt", "") + if prompt_text: + all_spans = list(item.spans) + spans + span_labels = {s.label.label for s in all_spans if s.label is not None} + for match in _SPAN_REF_PATTERN.finditer(prompt_text): + ref_label = match.group(1) + if ref_label not in span_labels: + warnings.warn( + f"Prompt contains [[{ref_label}]] but no span with " + f"label '{ref_label}' exists. Available labels: " + f"{sorted(span_labels)}", + UserWarning, + stacklevel=2, + ) + + # build new item with spans + data = item.model_dump() + # merge existing spans with new ones + existing_spans = data.get("spans", []) + data["spans"] = existing_spans + [s.model_dump() for s in spans] + + # store span_spec in item metadata if provided + if span_spec is not None: + item_metadata = dict(data.get("item_metadata", {})) + span_spec_data: dict[str, MetadataValue] = {} + for k, v in span_spec.model_dump(mode="json").items(): + span_spec_data[k] = v + item_metadata["_span_spec"] = span_spec_data + data["item_metadata"] = item_metadata + + return Item(**data) + + +def create_span_items_from_texts( + texts: list[str], + span_extractor: Callable[[str, list[str]], list[Span]], + prompt: str, + tokenizer_config: TokenizerConfig | None = None, + labels: list[str] | None = None, + item_template_id: UUID | None = None, +) -> list[Item]: + """Batch create span items with automatic tokenization. + + Parameters + ---------- + texts : list[str] + List of stimulus texts. + span_extractor : Callable[[str, list[str]], list[Span]] + Function that takes (text, tokens) and returns spans. + prompt : str + Question or instruction for the participant. + tokenizer_config : TokenizerConfig | None + Tokenizer configuration. + labels : list[str] | None + Fixed label set. + item_template_id : UUID | None + Shared template ID. If None, generates one per item. + + Returns + ------- + list[Item] + Span labeling items. + """ + if tokenizer_config is None: + tokenizer_config = TokenizerConfig() + tokenize = create_tokenizer(tokenizer_config) + + items: list[Item] = [] + for text in texts: + result = tokenize(text) + tokens = result.token_texts + spans = span_extractor(text, tokens) + item = create_span_item( + text=text, + spans=spans, + prompt=prompt, + tokens=tokens, + labels=labels, + item_template_id=item_template_id, + ) + items.append(item) + + return items + + +def _infer_space_after(tokens: list[str], text: str) -> list[bool]: + """Infer space_after flags from pre-tokenized text. + + Attempts to locate each token in the original text and check if a + space follows. Falls back to True for all tokens if alignment fails. + + Parameters + ---------- + tokens : list[str] + Token strings. + text : str + Original text. + + Returns + ------- + list[bool] + Per-token space_after flags. + """ + flags: list[bool] = [] + offset = 0 + for token in tokens: + idx = text.find(token, offset) + if idx == -1: + # can't find token; assume space after + flags.append(True) + else: + end = idx + len(token) + space_after = end < len(text) and text[end] == " " + flags.append(space_after) + offset = end + return flags diff --git a/bead/items/spans.py b/bead/items/spans.py new file mode 100644 index 0000000..5de0a26 --- /dev/null +++ b/bead/items/spans.py @@ -0,0 +1,397 @@ +"""Core span annotation models. + +Provides data models for labeled spans, span segments, span labels, +span relations, and span specifications. Supports discontiguous spans, +overlapping spans (nested and intersecting), static and interactive modes, +and two label sources (fixed sets and Wikidata entity search). +""" + +from __future__ import annotations + +from typing import Literal + +from pydantic import Field, field_validator + +from bead.data.base import BeadBaseModel + +# same recursive type as in item.py and item_template.py; duplicated here +# to avoid circular imports (item.py imports Span from this module). +type MetadataValue = ( + str | int | float | bool | None | dict[str, MetadataValue] | list[MetadataValue] +) + +SpanIndexMode = Literal["token", "character"] +SpanInteractionMode = Literal["static", "interactive"] +LabelSourceType = Literal["fixed", "wikidata"] + + +# factory functions for default values +def _empty_span_segment_list() -> list[SpanSegment]: + """Return empty SpanSegment list.""" + return [] + + +def _empty_span_metadata() -> dict[str, MetadataValue]: + """Return empty metadata dict.""" + return {} + + +def _empty_relation_metadata() -> dict[str, MetadataValue]: + """Return empty metadata dict.""" + return {} + + +class SpanSegment(BeadBaseModel): + """Contiguous or discontiguous indices within a single element. + + Attributes + ---------- + element_name : str + Which rendered element this segment belongs to. + indices : list[int] + Token or character indices within the element. + """ + + element_name: str = Field(..., description="Rendered element name") + indices: list[int] = Field(..., description="Token or character indices") + + @field_validator("element_name") + @classmethod + def validate_element_name(cls, v: str) -> str: + """Validate element name is not empty. + + Parameters + ---------- + v : str + Element name to validate. + + Returns + ------- + str + Validated element name. + + Raises + ------ + ValueError + If element name is empty. + """ + if not v or not v.strip(): + raise ValueError("element_name cannot be empty") + return v.strip() + + @field_validator("indices") + @classmethod + def validate_indices(cls, v: list[int]) -> list[int]: + """Validate indices are not empty and non-negative. + + Parameters + ---------- + v : list[int] + Indices to validate. + + Returns + ------- + list[int] + Validated indices. + + Raises + ------ + ValueError + If indices are empty or contain negative values. + """ + if not v: + raise ValueError("indices cannot be empty") + if any(i < 0 for i in v): + raise ValueError("indices must be non-negative") + return v + + +class SpanLabel(BeadBaseModel): + """Label applied to a span or relation. + + Attributes + ---------- + label : str + Human-readable label text. + label_id : str | None + External identifier (e.g. Wikidata QID "Q5"). + confidence : float | None + Confidence score for model-assigned labels. + """ + + label: str = Field(..., description="Human-readable label text") + label_id: str | None = Field( + default=None, description="External ID (e.g. Wikidata QID)" + ) + confidence: float | None = Field( + default=None, description="Confidence for model-assigned labels" + ) + + @field_validator("label") + @classmethod + def validate_label(cls, v: str) -> str: + """Validate label is not empty. + + Parameters + ---------- + v : str + Label to validate. + + Returns + ------- + str + Validated label. + + Raises + ------ + ValueError + If label is empty. + """ + if not v or not v.strip(): + raise ValueError("label cannot be empty") + return v.strip() + + +class Span(BeadBaseModel): + """Labeled span across one or more elements. + + Supports discontiguous, overlapping, and nested spans. + + Attributes + ---------- + span_id : str + Unique identifier within the item. + segments : list[SpanSegment] + Index segments composing this span. + head_index : int | None + Syntactic head token index. + label : SpanLabel | None + Label applied to this span (None = to-be-labeled). + span_type : str | None + Semantic category (e.g. "entity", "event", "role"). + span_metadata : dict[str, MetadataValue] + Additional span-specific metadata. + """ + + span_id: str = Field(..., description="Unique span ID within item") + segments: list[SpanSegment] = Field( + default_factory=_empty_span_segment_list, description="Index segments" + ) + head_index: int | None = Field( + default=None, description="Syntactic head token index" + ) + label: SpanLabel | None = Field( + default=None, description="Span label (None = to-be-labeled)" + ) + span_type: str | None = Field(default=None, description="Semantic category") + span_metadata: dict[str, MetadataValue] = Field( + default_factory=_empty_span_metadata, description="Span metadata" + ) + + @field_validator("span_id") + @classmethod + def validate_span_id(cls, v: str) -> str: + """Validate span_id is not empty. + + Parameters + ---------- + v : str + Span ID to validate. + + Returns + ------- + str + Validated span ID. + + Raises + ------ + ValueError + If span_id is empty. + """ + if not v or not v.strip(): + raise ValueError("span_id cannot be empty") + return v.strip() + + +class SpanRelation(BeadBaseModel): + """A typed, directed relation between two spans. + + Used for semantic role labeling, relation extraction, entity linking, + coreference, and similar tasks. + + Attributes + ---------- + relation_id : str + Unique identifier within the item. + source_span_id : str + ``span_id`` of the source span. + target_span_id : str + ``span_id`` of the target span. + label : SpanLabel | None + Relation label (reuses SpanLabel for consistency). + directed : bool + Whether the relation is directed (A->B) or undirected (A--B). + relation_metadata : dict[str, MetadataValue] + Additional relation-specific metadata. + """ + + relation_id: str = Field(..., description="Unique relation ID within item") + source_span_id: str = Field(..., description="Source span ID") + target_span_id: str = Field(..., description="Target span ID") + label: SpanLabel | None = Field(default=None, description="Relation label") + directed: bool = Field(default=True, description="Whether relation is directed") + relation_metadata: dict[str, MetadataValue] = Field( + default_factory=_empty_relation_metadata, + description="Relation metadata", + ) + + @field_validator("relation_id") + @classmethod + def validate_relation_id(cls, v: str) -> str: + """Validate relation_id is not empty. + + Parameters + ---------- + v : str + Relation ID to validate. + + Returns + ------- + str + Validated relation ID. + + Raises + ------ + ValueError + If relation_id is empty. + """ + if not v or not v.strip(): + raise ValueError("relation_id cannot be empty") + return v.strip() + + @field_validator("source_span_id", "target_span_id") + @classmethod + def validate_span_ids(cls, v: str) -> str: + """Validate span IDs are not empty. + + Parameters + ---------- + v : str + Span ID to validate. + + Returns + ------- + str + Validated span ID. + + Raises + ------ + ValueError + If span ID is empty. + """ + if not v or not v.strip(): + raise ValueError("span ID cannot be empty") + return v.strip() + + +class SpanSpec(BeadBaseModel): + """Specification for span labeling behavior. + + Configures how spans are displayed, created, and labeled in an + experiment. Supports both fixed label sets and Wikidata entity search + for both span labels and relation labels. + + Attributes + ---------- + index_mode : SpanIndexMode + Whether spans index by token or character position. + interaction_mode : SpanInteractionMode + "static" for read-only highlights, "interactive" for participant + annotation. + label_source : LabelSourceType + Source of span labels ("fixed" or "wikidata"). + labels : list[str] | None + Fixed span label set (when label_source is "fixed"). + label_colors : dict[str, str] | None + CSS colors keyed by label name. + allow_overlapping : bool + Whether overlapping spans are permitted. + min_spans : int | None + Minimum number of spans required (interactive mode). + max_spans : int | None + Maximum number of spans allowed (interactive mode). + enable_relations : bool + Whether relation annotation is enabled. + relation_label_source : LabelSourceType + Source of relation labels. + relation_labels : list[str] | None + Fixed relation label set. + relation_label_colors : dict[str, str] | None + CSS colors keyed by relation label name. + relation_directed : bool + Default directionality for new relations. + min_relations : int | None + Minimum number of relations required (interactive mode). + max_relations : int | None + Maximum number of relations allowed (interactive mode). + wikidata_language : str + Language for Wikidata entity search. + wikidata_entity_types : list[str] | None + Restrict Wikidata search to these entity types. + wikidata_result_limit : int + Maximum number of Wikidata search results. + """ + + index_mode: SpanIndexMode = Field(default="token", description="Span indexing mode") + interaction_mode: SpanInteractionMode = Field( + default="static", description="Span interaction mode" + ) + # span label config + label_source: LabelSourceType = Field( + default="fixed", description="Span label source" + ) + labels: list[str] | None = Field(default=None, description="Fixed span label set") + label_colors: dict[str, str] | None = Field( + default=None, description="CSS colors per span label" + ) + allow_overlapping: bool = Field( + default=True, description="Whether overlapping spans are allowed" + ) + min_spans: int | None = Field( + default=None, description="Minimum required spans (interactive)" + ) + max_spans: int | None = Field( + default=None, description="Maximum allowed spans (interactive)" + ) + # relation config + enable_relations: bool = Field( + default=False, description="Whether relation annotation is enabled" + ) + relation_label_source: LabelSourceType = Field( + default="fixed", description="Relation label source" + ) + relation_labels: list[str] | None = Field( + default=None, description="Fixed relation label set" + ) + relation_label_colors: dict[str, str] | None = Field( + default=None, description="CSS colors per relation label" + ) + relation_directed: bool = Field( + default=True, description="Default directionality for relations" + ) + min_relations: int | None = Field( + default=None, description="Minimum required relations (interactive)" + ) + max_relations: int | None = Field( + default=None, description="Maximum allowed relations (interactive)" + ) + # wikidata config (shared by span labels and relation labels) + wikidata_language: str = Field( + default="en", description="Language for Wikidata entity search" + ) + wikidata_entity_types: list[str] | None = Field( + default=None, description="Restrict Wikidata entity types" + ) + wikidata_result_limit: int = Field( + default=10, description="Max Wikidata search results" + ) diff --git a/bead/tokenization/__init__.py b/bead/tokenization/__init__.py new file mode 100644 index 0000000..da26859 --- /dev/null +++ b/bead/tokenization/__init__.py @@ -0,0 +1,32 @@ +"""Configurable multilingual tokenization for span annotation. + +This package provides display-level tokenization that splits text into +word-level tokens for span annotation and UI display. Supports multiple +NLP backends (spaCy, Stanza, whitespace) for multilingual coverage. + +Display tokens are distinct from model (subword) tokens used in active +learning. The alignment module maps between the two. +""" + +from __future__ import annotations + +from bead.tokenization.config import TokenizerBackend, TokenizerConfig +from bead.tokenization.tokenizers import ( + DisplayToken, + SpacyTokenizer, + StanzaTokenizer, + TokenizedText, + WhitespaceTokenizer, + create_tokenizer, +) + +__all__ = [ + "DisplayToken", + "SpacyTokenizer", + "StanzaTokenizer", + "TokenizedText", + "TokenizerBackend", + "TokenizerConfig", + "WhitespaceTokenizer", + "create_tokenizer", +] diff --git a/bead/tokenization/alignment.py b/bead/tokenization/alignment.py new file mode 100644 index 0000000..e0524a9 --- /dev/null +++ b/bead/tokenization/alignment.py @@ -0,0 +1,108 @@ +"""Alignment between display tokens and subword model tokens. + +Maps display-token-level span indices to subword-token indices so that +active learning models can consume span annotations created in +display-token space. +""" + +from __future__ import annotations + +from typing import Protocol + + +def align_display_to_subword( + display_tokens: list[str], + subword_tokenizer: _PreTrainedTokenizerProtocol, +) -> list[list[int]]: + """Map each display token index to its corresponding subword token indices. + + Parameters + ---------- + display_tokens : list[str] + Display-level token strings (word-level). + subword_tokenizer : _PreTrainedTokenizerProtocol + A HuggingFace-compatible tokenizer with ``__call__`` and + ``convert_ids_to_tokens`` methods. + + Returns + ------- + list[list[int]] + A list where ``entry[i]`` is the list of subword token indices + for display token ``i``. Special tokens (CLS, SEP, etc.) are + excluded. + """ + alignment: list[list[int]] = [] + # tokenize each display token individually to get the mapping + subword_offset = 0 + + # first, tokenize the full text to get the complete subword sequence + full_text = " ".join(display_tokens) + full_encoding = subword_tokenizer(full_text, add_special_tokens=False) + full_ids: list[int] = full_encoding["input_ids"] + full_subword_tokens = subword_tokenizer.convert_ids_to_tokens(full_ids) + + # now align by tokenizing each display token + for display_token in display_tokens: + token_encoding = subword_tokenizer(display_token, add_special_tokens=False) + token_ids: list[int] = token_encoding["input_ids"] + n_subwords = len(token_ids) + + # map to indices in the full subword sequence + indices = list(range(subword_offset, subword_offset + n_subwords)) + # clamp to valid range + indices = [i for i in indices if i < len(full_subword_tokens)] + alignment.append(indices) + subword_offset += n_subwords + + return alignment + + +def convert_span_indices( + span_indices: list[int], + alignment: list[list[int]], +) -> list[int]: + """Convert display-token span indices to subword-token indices. + + Parameters + ---------- + span_indices : list[int] + Display-token indices forming the span. + alignment : list[list[int]] + Alignment from ``align_display_to_subword``. + + Returns + ------- + list[int] + Corresponding subword-token indices. + + Raises + ------ + IndexError + If any span index is out of range of the alignment. + """ + subword_indices: list[int] = [] + for idx in span_indices: + if idx < 0 or idx >= len(alignment): + raise IndexError( + f"Span index {idx} is out of range. " + f"Alignment covers {len(alignment)} display tokens." + ) + subword_indices.extend(alignment[idx]) + return sorted(set(subword_indices)) + + +class _PreTrainedTokenizerProtocol(Protocol): + """Structural typing protocol for HuggingFace tokenizers. + + Defines the minimal interface expected from a HuggingFace + ``PreTrainedTokenizerBase`` instance: callable tokenization + and ID-to-token conversion. + """ + + def __call__( + self, + text: str, + add_special_tokens: bool = True, + ) -> dict[str, list[int]]: ... + + def convert_ids_to_tokens(self, ids: list[int]) -> list[str]: ... diff --git a/bead/tokenization/config.py b/bead/tokenization/config.py new file mode 100644 index 0000000..c471f8d --- /dev/null +++ b/bead/tokenization/config.py @@ -0,0 +1,45 @@ +"""Tokenizer configuration model. + +Aligned with the existing ChunkingSpec pattern in bead.items.item_template, +which already supports ``parser: Literal["stanza", "spacy"]``. +""" + +from __future__ import annotations + +from typing import Literal + +from pydantic import BaseModel, ConfigDict, Field + +TokenizerBackend = Literal["spacy", "stanza", "whitespace"] + + +class TokenizerConfig(BaseModel): + """Configuration for display-level tokenization. + + Controls how text is split into word-level tokens for span annotation + and UI display. Supports multiple NLP backends for multilingual coverage. + + Attributes + ---------- + backend : TokenizerBackend + Tokenization backend to use. "spacy" (default) supports 49+ languages + and is fast and production-grade. "stanza" supports 80+ languages + with better coverage for low-resource and morphologically rich + languages. "whitespace" is a simple fallback for pre-tokenized text. + language : str + ISO 639 language code (e.g. "en", "zh", "de", "ar"). + model_name : str | None + Explicit model name (e.g. "en_core_web_sm", "zh_core_web_sm"). + When None, auto-resolved from language and backend. + """ + + model_config = ConfigDict(extra="forbid", frozen=True) + + backend: TokenizerBackend = Field( + default="spacy", description="Tokenization backend" + ) + language: str = Field(default="en", description="ISO 639 language code") + model_name: str | None = Field( + default=None, + description="Explicit model name; auto-resolved when None", + ) diff --git a/bead/tokenization/tokenizers.py b/bead/tokenization/tokenizers.py new file mode 100644 index 0000000..1c041c7 --- /dev/null +++ b/bead/tokenization/tokenizers.py @@ -0,0 +1,364 @@ +"""Concrete tokenizer implementations. + +Provides display-level tokenizers for span annotation. Each tokenizer +converts raw text into a sequence of ``DisplayToken`` objects that carry +rendering metadata (``space_after``) for artifact-free reconstruction. +""" + +from __future__ import annotations + +import re +from collections.abc import Callable, Iterator +from typing import Protocol + +from pydantic import BaseModel, ConfigDict + +from bead.tokenization.config import TokenizerConfig + + +class DisplayToken(BaseModel): + """A word-level token with rendering metadata. + + Attributes + ---------- + text : str + The token text. + space_after : bool + Whether whitespace follows this token in the original text. + start_char : int + Character offset of the token start in the original text. + end_char : int + Character offset of the token end in the original text. + """ + + model_config = ConfigDict(extra="forbid", frozen=True) + + text: str + space_after: bool = True + start_char: int + end_char: int + + +class TokenizedText(BaseModel): + """Result of display-level tokenization. + + Attributes + ---------- + tokens : list[DisplayToken] + The sequence of display tokens. + original_text : str + The original input text. + """ + + model_config = ConfigDict(extra="forbid", frozen=True) + + tokens: list[DisplayToken] + original_text: str + + @property + def token_texts(self) -> list[str]: + """Plain token strings (for ``Item.tokenized_elements``). + + Returns + ------- + list[str] + List of token text strings. + """ + return [t.text for t in self.tokens] + + @property + def space_after_flags(self) -> list[bool]: + """Per-token space_after flags (for ``Item.token_space_after``). + + Returns + ------- + list[bool] + List of boolean flags. + """ + return [t.space_after for t in self.tokens] + + def render(self) -> str: + """Reconstruct display text from tokens with correct spacing. + + Guarantees identical rendering to original when round-tripped. + + Returns + ------- + str + Reconstructed text. + """ + parts: list[str] = [] + for token in self.tokens: + parts.append(token.text) + if token.space_after: + parts.append(" ") + return "".join(parts).rstrip() + + +class WhitespaceTokenizer: + """Simple whitespace-split tokenizer. + + Fallback for pre-tokenized text or languages not supported by spaCy + or Stanza. Splits on whitespace boundaries and infers ``space_after`` + from the original character offsets. + """ + + def __call__(self, text: str) -> TokenizedText: + """Tokenize text by splitting on whitespace. + + Parameters + ---------- + text : str + Input text. + + Returns + ------- + TokenizedText + Tokenized result. + """ + tokens: list[DisplayToken] = [] + for match in re.finditer(r"\S+", text): + start = match.start() + end = match.end() + # space_after is True if there is whitespace after this token + space_after = end < len(text) and text[end] == " " + tokens.append( + DisplayToken( + text=match.group(), + space_after=space_after, + start_char=start, + end_char=end, + ) + ) + return TokenizedText(tokens=tokens, original_text=text) + + +class SpacyTokenizer: + """spaCy-based tokenizer. + + Supports 49+ languages. Auto-resolves model from language code if + ``model_name`` is not specified. Handles punctuation attachment and + multi-word token (MWT) expansion correctly. + + Parameters + ---------- + language : str + ISO 639 language code. + model_name : str | None + Explicit spaCy model name. When None, uses ``{language}_core_web_sm`` + for common languages, falling back to a blank model. + """ + + def __init__(self, language: str = "en", model_name: str | None = None) -> None: + self._language = language + self._model_name = model_name + self._nlp: Callable[..., _SpacyDocProtocol] | None = None + + def _load(self) -> Callable[..., _SpacyDocProtocol]: + if self._nlp is not None: + return self._nlp + + try: + import spacy # noqa: PLC0415 # type: ignore[reportMissingImports] + except ImportError as e: + raise ImportError( + "spaCy is required for SpacyTokenizer. " + "Install it with: pip install 'bead[tokenization]'" + ) from e + + model = self._model_name + if model is None: + model = f"{self._language}_core_web_sm" + + try: + nlp: Callable[..., _SpacyDocProtocol] = spacy.load(model) # type: ignore[assignment] + except OSError: + # fall back to blank model + nlp = spacy.blank(self._language) # type: ignore[assignment] + + self._nlp = nlp + return nlp + + def __call__(self, text: str) -> TokenizedText: + """Tokenize text using spaCy. + + Parameters + ---------- + text : str + Input text. + + Returns + ------- + TokenizedText + Tokenized result with correct ``space_after`` metadata. + """ + nlp = self._load() + doc = nlp(text) + tokens: list[DisplayToken] = [] + for token in doc: + tokens.append( + DisplayToken( + text=token.text, + space_after=token.whitespace_ != "", + start_char=token.idx, + end_char=token.idx + len(token.text), + ) + ) + return TokenizedText(tokens=tokens, original_text=text) + + +class StanzaTokenizer: + """Stanza-based tokenizer. + + Supports 80+ languages. Handles multi-word token (MWT) expansion for + languages like German, French, and Arabic. Better coverage for + low-resource and morphologically rich languages. + + Parameters + ---------- + language : str + ISO 639 language code. + model_name : str | None + Explicit Stanza model/package name. When None, uses the default + package for the language. + """ + + def __init__(self, language: str = "en", model_name: str | None = None) -> None: + self._language = language + self._model_name = model_name + self._nlp: _StanzaPipelineProtocol | None = None + + def _load(self) -> _StanzaPipelineProtocol: + if self._nlp is not None: + return self._nlp + + try: + import stanza # noqa: PLC0415 # type: ignore[reportMissingImports] + except ImportError as e: + raise ImportError( + "Stanza is required for StanzaTokenizer. " + "Install it with: pip install 'bead[tokenization]'" + ) from e + + pkg = self._model_name + pkg_kwarg = {"package": pkg} if pkg is not None else {} + + try: + nlp: _StanzaPipelineProtocol = stanza.Pipeline( # type: ignore[assignment] + lang=self._language, + processors="tokenize", + verbose=False, + **pkg_kwarg, # type: ignore[reportArgumentType] + ) + except Exception: + # download model and retry + stanza.download(self._language, verbose=False) + nlp = stanza.Pipeline( # type: ignore[assignment] + lang=self._language, + processors="tokenize", + verbose=False, + **pkg_kwarg, # type: ignore[reportArgumentType] + ) + + self._nlp = nlp + return nlp + + def __call__(self, text: str) -> TokenizedText: + """Tokenize text using Stanza. + + Parameters + ---------- + text : str + Input text. + + Returns + ------- + TokenizedText + Tokenized result with correct ``space_after`` metadata. + """ + nlp = self._load() + doc = nlp(text) + tokens: list[DisplayToken] = [] + for sentence in doc.sentences: + for token in sentence.tokens: + start_char = token.start_char + end_char = token.end_char + # stanza tokens have a misc field; space_after can be + # inferred from character offsets or the SpaceAfter=No + # annotation in the misc field. + space_after = True + if hasattr(token, "misc") and token.misc: + if "SpaceAfter=No" in token.misc: + space_after = False + elif end_char < len(text): + space_after = text[end_char] == " " + + tokens.append( + DisplayToken( + text=token.text, + space_after=space_after, + start_char=start_char, + end_char=end_char, + ) + ) + return TokenizedText(tokens=tokens, original_text=text) + + +def create_tokenizer(config: TokenizerConfig) -> Callable[[str], TokenizedText]: + """Return a tokenization function for the given config. + + Lazy-loads the NLP backend (spaCy/Stanza) on first call. + + Parameters + ---------- + config : TokenizerConfig + Tokenizer configuration. + + Returns + ------- + Callable[[str], TokenizedText] + A callable that tokenizes text. + + Raises + ------ + ValueError + If the backend is not recognized. + """ + if config.backend == "whitespace": + return WhitespaceTokenizer() + elif config.backend == "spacy": + return SpacyTokenizer(language=config.language, model_name=config.model_name) + elif config.backend == "stanza": + return StanzaTokenizer(language=config.language, model_name=config.model_name) + else: + raise ValueError(f"Unknown tokenizer backend: {config.backend}") + + +# structural typing protocols for spaCy/Stanza (avoids hard imports) +class _SpacyTokenProtocol(Protocol): + text: str + whitespace_: str + idx: int + + +class _SpacyDocProtocol(Protocol): + def __iter__(self) -> Iterator[_SpacyTokenProtocol]: ... # noqa: D105 + + +class _StanzaTokenProtocol(Protocol): + text: str + start_char: int + end_char: int + misc: str | None + + +class _StanzaSentenceProtocol(Protocol): + tokens: list[_StanzaTokenProtocol] + + +class _StanzaDocProtocol(Protocol): + sentences: list[_StanzaSentenceProtocol] + + +class _StanzaPipelineProtocol(Protocol): + def __call__(self, text: str) -> _StanzaDocProtocol: ... # noqa: D102 diff --git a/docs/api/items.md b/docs/api/items.md index b73e894..182d784 100644 --- a/docs/api/items.md +++ b/docs/api/items.md @@ -1,6 +1,6 @@ # bead.items -Stage 3 of the bead pipeline: experimental item construction with 8 task types. +Stage 3 of the bead pipeline: experimental item construction with 9 task types. ## Core Classes @@ -56,6 +56,20 @@ Stage 3 of the bead pipeline: experimental item construction with 8 task types. show_root_heading: true show_source: false +## Span Annotation Models + +::: bead.items.spans + options: + show_root_heading: true + show_source: false + +## Span Labeling Utilities + +::: bead.items.span_labeling + options: + show_root_heading: true + show_source: false + ## Item Construction ::: bead.items.constructor diff --git a/docs/api/tokenization.md b/docs/api/tokenization.md new file mode 100644 index 0000000..eab394b --- /dev/null +++ b/docs/api/tokenization.md @@ -0,0 +1,24 @@ +# bead.tokenization + +Configurable multilingual tokenization for span annotation and UI display. + +## Configuration + +::: bead.tokenization.config + options: + show_root_heading: true + show_source: false + +## Tokenizers + +::: bead.tokenization.tokenizers + options: + show_root_heading: true + show_source: false + +## Display-to-Subword Alignment + +::: bead.tokenization.alignment + options: + show_root_heading: true + show_source: false diff --git a/docs/developer-guide/setup.md b/docs/developer-guide/setup.md index 06ecd76..2599ab5 100644 --- a/docs/developer-guide/setup.md +++ b/docs/developer-guide/setup.md @@ -123,7 +123,7 @@ This command: ```bash # Check bead CLI installed uv run bead --version -# Output: bead, version 0.1.0 +# Output: bead, version 0.2.0 # Check development tools uv run pytest --version @@ -509,7 +509,7 @@ Run these commands to verify your development environment is fully functional: ```bash uv run bead --version -# Expected: bead, version 0.1.0 +# Expected: bead, version 0.2.0 ``` ### 2. Run Quick Test diff --git a/docs/examples/gallery.md b/docs/examples/gallery.md index 7e9ee48..e1c93fc 100644 --- a/docs/examples/gallery.md +++ b/docs/examples/gallery.md @@ -1,13 +1,830 @@ -# Examples Gallery +# Interactive Task Gallery -## Overview +Try each bead task interface below. Every demo is a live jsPsych experiment running in your browser. Examples use stimuli from psycholinguistics research on acceptability, veridicality, semantic proto-roles, event typicality, and telicity. -This section contains complete example projects using bead. + -## eng/argument_structure +--- -[Content to be added: walkthrough of gallery/eng/argument_structure example] +## Judgment Tasks -## Adding Your Own Examples +### Likert Rating Scale -[Content to be added] +Rate a sentence on a discrete scale with labeled endpoints. This example asks about the naturalness of the verb *hope* in an NP-to-VP raising frame. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.ordinal_scale import create_ordinal_scale_item + + item = create_ordinal_scale_item( + text="Someone hoped someone to leave.", + prompt="How natural is this sentence?", + scale_bounds=(1, 7), + scale_labels={ + 1: "Extremely unnatural", + 7: "Totally natural", + }, + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-rating", + "prompt": "How natural is this sentence?", + "stimulus": "Someone hoped someone to leave.", + "scale_min": 1, + "scale_max": 7, + "scale_labels": { + "1": "Extremely unnatural", + "7": "Totally natural" + }, + "metadata": {"verb": "hope", "frame": "NP_to_VP"} + } + ``` + +### Slider Rating + +Continuous rating on a slider scale. This example asks how prototypical an event is. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.ordinal_scale import create_ordinal_scale_item + + item = create_ordinal_scale_item( + text="The chef cooked the meal.", + prompt="How prototypical is this event?", + scale_bounds=(0, 100), + scale_labels={ + 0: "Very atypical", + 100: "Very prototypical", + }, + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-slider-rating", + "prompt": "How prototypical is this event?", + "stimulus": "The chef cooked the meal.", + "slider_min": 0, + "slider_max": 100, + "slider_start": 50, + "labels": ["Very atypical", "Very prototypical"], + "metadata": {"verb": "cook"} + } + ``` + +### Forced Choice + +Choose between two alternatives. This example uses a classic syntactic ambiguity to demonstrate comparative judgment. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.forced_choice import create_forced_choice_item + + item = create_forced_choice_item( + alternatives=[ + "The turkey", + "Something else", + ], + prompt="The turkey was ready to eat. What planned to eat?", + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-forced-choice", + "prompt": "The turkey was ready to eat. What planned to eat?", + "alternatives": [ + "The turkey", + "Something else" + ], + "layout": "vertical", + "metadata": {"sentence": "The turkey was ready to eat."} + } + ``` + +### Binary Judgment + +Yes/No acceptability judgment. This example tests the verb *persuade* in an NP-to-VP object-control frame. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.binary import create_binary_item + + item = create_binary_item( + text="Someone persuaded someone to leave.", + prompt="Is this sentence acceptable?", + options=["Acceptable", "Unacceptable"], + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-binary-choice", + "prompt": "Is this sentence acceptable?", + "stimulus": "Someone persuaded someone to leave.", + "choices": ["Acceptable", "Unacceptable"], + "metadata": {"verb": "persuade", "frame": "NP_to_VP"} + } + ``` + +### Categorical Classification + +Select one category from an unordered set. This example tests factivity using a natural inference task. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.categorical import create_categorical_item + + item = create_categorical_item( + text=( + "Sentence 1: The doctor managed to treat the patient.\n" + "Sentence 2: The patient was treated." + ), + prompt="If the first sentence is true, is the second sentence true?", + categories=["Definitely not", "Maybe", "Definitely"], + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-categorical", + "prompt": "If the first sentence is true, is the second sentence true?", + "categories": ["Definitely not", "Maybe", "Definitely"], + "metadata": {"recast_type": "factivity"} + } + ``` + +### Magnitude Estimation + +Rate a target stimulus relative to a reference using an exponential slider. The slider maps linear position to exponential values via `exp(x/100) - 1`, placing the reference value at 1/3 from the left. Arrow keys give proportional ~3% changes at any scale; the right end approaches infinity. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.magnitude import create_magnitude_item + + item = create_magnitude_item( + text="The alien cooked the pencil.", + prompt="How typical is the target relative to the reference?", + reference_text="The chef cooked the meal.", + reference_value=100, + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-magnitude", + "prompt": "How typical is the target relative to the reference?", + "prompt_position": "below", + "reference_stimulus": "The chef cooked the meal.", + "reference_value": 100, + "stimulus": "The alien cooked the pencil.", + "input_mode": "exp-slider", + "metadata": {"verb": "cook"} + } + ``` + +### Free Text Response + +Open-ended text response, single-line or multiline. This example asks for an event summarization of a historical passage. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.free_text import create_free_text_item + + item = create_free_text_item( + text="The 1846 US occupation of Monterey put an end to any Mexican " + "military presence at the Presidio. The fort was abandoned in 1866.", + prompt="Summarize the key event described in this passage.", + multiline=True, + min_length=5, + max_length=200, + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-free-text", + "prompt": "Summarize the key event described in this passage.", + "multiline": true, + "rows": 3, + "min_length": 5, + "max_length": 200, + "metadata": {"event_type": "Abandoning"} + } + ``` + +--- + +## Selection Tasks + +### Cloze (Fill-in-the-Blank) + +Dropdown selection for fill-in-the-blank gaps. This example tests temporal interpretation using a telicity cloze task with preposition, numeral, and duration unit fields. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.cloze import create_cloze_item + + item = create_cloze_item( + text="The hurricane hit the coastline {{prep}} {{numeral}} {{unit}}.", + constraints={ + "prep": ["in", "for"], + "numeral": None, # free text + "unit": ["seconds", "minutes", "hours", "days", "weeks"], + }, + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-cloze-multi", + "text": "The hurricane hit the coastline %% %% %%.", + "fields": [ + {"type": "dropdown", "options": ["in", "for"]}, + {"type": "text", "placeholder": "#"}, + {"type": "dropdown", "options": ["seconds", "minutes", "hours", "days", "weeks"]} + ], + "require_all": true + } + ``` + +### Multi-Select + +Select one or more options from a set using checkboxes. This example tests pronoun resolution in a discourse with multiple potential referents. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.multi_select import create_multi_select_item + + item = create_multi_select_item( + text="Whenever anyone laughed, the magician scowled and their " + "assistant smirked. They were secretly pleased.", + prompt="Who was secretly pleased?", + options=[ + "The magician", + "The assistant", + "Neither", + ], + min_selections=1, + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-multi-select", + "prompt": "Who was secretly pleased?", + "stimulus": "Whenever anyone laughed, the magician scowled and their assistant smirked. They were secretly pleased.", + "options": [ + "The magician", + "The assistant", + "Neither" + ], + "metadata": {"phenomenon": "pronoun_resolution"} + } + ``` + +--- + +## Span Annotation + +### Interactive Span Labeling (Fixed Labels) + +Select token ranges and assign labels from a searchable fixed set. Type to filter labels or use keyboard shortcuts 1-9. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.span_labeling import create_interactive_span_item + + item = create_interactive_span_item( + text="The committee unanimously approved the new budget proposal " + "after reviewing the evidence.", + prompt="Select and label semantic roles.", + label_set=[ + "Agent", "Patient", "Theme", "Experiencer", + "Instrument", "Beneficiary", "Location", "Time", + "Manner", "Cause", "Purpose", "Source", + "Goal", "Stimulus", "Result", "Predicate", + ], + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-span-label", + "tokens": { + "text": ["The", "committee", "unanimously", "approved", "the", + "new", "budget", "proposal", "after", "reviewing", + "the", "evidence", "."] + }, + "span_spec": { + "interaction_mode": "interactive", + "label_source": "fixed", + "labels": ["Agent", "Patient", "Theme", "Experiencer", + "Instrument", "Beneficiary", "Location", "Time", + "Manner", "Cause", "Purpose", "Source", + "Goal", "Stimulus", "Result", "Predicate"] + } + } + ``` + +### Wikidata Entity Labeling + +Interactive span labeling with Wikidata autocomplete search for labels. Select entities and search Wikidata to link them. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.span_labeling import create_interactive_span_item + + item = create_interactive_span_item( + text="Albert Einstein developed the theory of relativity " + "at the Institute for Advanced Study in Princeton.", + prompt="Select entities and search Wikidata to assign labels.", + label_source="wikidata", + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-span-label", + "tokens": { + "text": ["Albert", "Einstein", "developed", "the", "theory", + "of", "relativity", "at", "the", "Institute", + "for", "Advanced", "Study", "in", "Princeton", "."] + }, + "span_spec": { + "interaction_mode": "interactive", + "label_source": "wikidata", + "wikidata_language": "en" + } + } + ``` + +--- + +## Composite Tasks + +Span highlights work as an orthogonal overlay on any existing task type. The same item can have both span annotations and a rating scale, forced choice, or binary judgment. + +**Prompt span references**: prompts use `[[label]]` syntax to reference span labels. `[[label]]` auto-fills with the span's token text; `[[label:custom text]]` uses a custom surface form. At deployment, references are replaced with color-highlighted HTML matching the span colors in the stimulus. See the [Items API guide](../user-guide/api/items.md#prompt-span-references) for details. + +### Span + Likert Rating + +Proto-role property rating with highlighted arguments using thematic role labels. Question text uses colored highlighting that matches the span colors. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.ordinal_scale import create_ordinal_scale_item + from bead.items.span_labeling import add_spans_to_item + from bead.items.spans import Span, SpanLabel, SpanSegment + + item = create_ordinal_scale_item( + text="The boy broke the vase.", + prompt="How likely is it that [[breaker]] existed after [[event:the breaking]]?", + scale_bounds=(1, 5), + scale_labels={1: "Very unlikely", 5: "Very likely"}, + ) + + item = add_spans_to_item( + item, + spans=[ + Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0, 1])], + label=SpanLabel(label="breaker"), + ), + Span( + span_id="span_1", + segments=[SpanSegment(element_name="text", indices=[2])], + label=SpanLabel(label="event"), + ), + Span( + span_id="span_2", + segments=[SpanSegment(element_name="text", indices=[3, 4])], + label=SpanLabel(label="breakee"), + ), + ], + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-rating", + "prompt": "
...(span-highlighted tokens)...

How likely is it that The boybreaker existed after the breakingevent?

", + "scale_min": 1, + "scale_max": 5, + "scale_labels": {"1": "Very unlikely", "5": "Very likely"}, + "metadata": {"trial_type": "likert_rating"} + } + ``` + +### Span + Slider Rating + +Veridicality inference with highlighted spans but no labels (null labels). The highlighted regions draw attention to the predicate and embedded clause without adding subscript badges. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.ordinal_scale import create_ordinal_scale_item + from bead.items.span_labeling import add_spans_to_item + from bead.items.spans import Span, SpanLabel, SpanSegment + + item = create_ordinal_scale_item( + text="Jo confirmed that Bo left.", + prompt="How likely is it that someone left?", + scale_bounds=(0, 100), + ) + + item = add_spans_to_item( + item, + spans=[ + Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[1])], + label=None, + ), + Span( + span_id="span_1", + segments=[SpanSegment(element_name="text", indices=[3, 4])], + label=None, + ), + ], + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-slider-rating", + "prompt": "
...(span-highlighted tokens, no subscript badges)...

How likely is it that someone left?

", + "labels": ["Not at all", "Very much"], + "slider_min": 0, + "slider_max": 100, + "slider_start": 50, + "metadata": {"trial_type": "slider_rating"} + } + ``` + +### Span + Forced Choice + +Compare change-of-state across predicates with thematic role labels. Question text uses colored highlighting matching the span annotation colors. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.forced_choice import create_forced_choice_item + + item = create_forced_choice_item( + alternatives=[ + "The boy tapped the vase.", + "The boy hit the vase.", + ], + prompt="In which event is it more likely that the vase broke?", + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-forced-choice", + "prompt": "In which event is it more likely that the vase broke?", + "alternatives": [ + "The boy tapped the vase.", + "The boy hit the vase." + ], + "layout": "horizontal", + "metadata": {"trial_type": "forced_choice"} + } + ``` + +### Span + Binary Judgment + +Change-of-location property with four thematic role arguments. Question text uses colored highlighting matching the span colors. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.binary import create_binary_item + from bead.items.span_labeling import add_spans_to_item + from bead.items.spans import Span, SpanLabel, SpanSegment + + item = create_binary_item( + text="The merchant traded the silk for the spices.", + prompt="Did [[traded-away:the silk]] change location as a result of [[event:the trading]]?", + options=["Yes", "No"], + ) + + item = add_spans_to_item( + item, + spans=[ + Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0, 1])], + label=SpanLabel(label="trader"), + ), + Span( + span_id="span_1", + segments=[SpanSegment(element_name="text", indices=[2])], + label=SpanLabel(label="event"), + ), + Span( + span_id="span_2", + segments=[SpanSegment(element_name="text", indices=[3, 4])], + label=SpanLabel(label="traded-away"), + ), + Span( + span_id="span_3", + segments=[SpanSegment(element_name="text", indices=[6, 7])], + label=SpanLabel(label="traded-for"), + ), + ], + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-binary-choice", + "prompt": "Did the silktraded-away change location as a result of the tradingevent?", + "stimulus": "
...(span-highlighted tokens with subscript badges)...
", + "choices": ["Yes", "No"], + "metadata": {"trial_type": "binary_choice"} + } + ``` + +### Span + Free Text + +Event summarization with a highlighted event span. The annotated span draws attention to the target event in a longer passage. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.free_text import create_free_text_item + from bead.items.span_labeling import add_spans_to_item + from bead.items.spans import Span, SpanLabel, SpanSegment + + item = create_free_text_item( + text="The 1846 US occupation of Monterey put an end to any Mexican " + "military presence at the Presidio. The fort was abandoned in 1866.", + prompt="Summarize [[event:the highlighted event]] in one sentence.", + multiline=True, + min_length=5, + max_length=200, + ) + + item = add_spans_to_item( + item, + spans=[ + Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[21])], + label=SpanLabel(label="event"), + ), + ], + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-free-text", + "prompt": "Summarize the highlighted eventevent in one sentence.", + "stimulus": "
...(span-highlighted tokens)...
", + "multiline": true, + "rows": 3, + "min_length": 5, + "max_length": 200, + "metadata": {"trial_type": "free_text"} + } + ``` + +--- + +## Relation Annotation + +### Span Relations (Fixed Labels) + +Interactive span and relation annotation with searchable fixed label sets. Create spans, then use "Add Relation" to draw directed relations between them using thematic role labels. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.span_labeling import create_interactive_span_item + from bead.items.spans import SpanSpec + + item = create_interactive_span_item( + text="The scientist presented the findings to the committee " + "at the annual conference.", + prompt="Create spans and relations for semantic role labeling.", + label_set=[ + "Agent", "Patient", "Theme", "Recipient", + "Instrument", "Location", "Time", "Predicate", + "Stimulus", "Goal", + ], + span_spec=SpanSpec( + interaction_mode="interactive", + label_source="fixed", + enable_relations=True, + relation_label_source="fixed", + relation_labels=[ + "agent-of", "patient-of", "theme-of", + "recipient-of", "location-of", "time-of", + "predicate-of", + ], + relation_directed=True, + ), + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-span-label", + "tokens": { + "text": ["The", "scientist", "presented", "the", "findings", + "to", "the", "committee", "at", "the", "annual", + "conference", "."] + }, + "span_spec": { + "interaction_mode": "interactive", + "label_source": "fixed", + "labels": ["Agent", "Patient", "Theme", "Recipient", + "Instrument", "Location", "Time", "Predicate", + "Stimulus", "Goal"], + "enable_relations": true, + "relation_label_source": "fixed", + "relation_labels": ["agent-of", "patient-of", "theme-of", + "recipient-of", "location-of", "time-of", + "predicate-of"], + "relation_directed": true + } + } + ``` + +### Span Relations (Wikidata) + +Interactive entity linking and relation annotation with Wikidata search for both entity and relation labels. Useful for knowledge graph construction. + +=== "Demo" + + + +=== "Python" + + ```python + from bead.items.span_labeling import create_interactive_span_item + from bead.items.spans import SpanSpec + + item = create_interactive_span_item( + text="Marie Curie was born in Warsaw and later became " + "a professor at the University of Paris.", + prompt="Link entities via Wikidata and draw relations between them.", + label_source="wikidata", + span_spec=SpanSpec( + interaction_mode="interactive", + label_source="wikidata", + enable_relations=True, + relation_label_source="wikidata", + relation_directed=True, + ), + ) + ``` + +=== "Trial JSON" + + ```json + { + "type": "bead-span-label", + "tokens": { + "text": ["Marie", "Curie", "was", "born", "in", "Warsaw", + "and", "later", "became", "a", "professor", "at", + "the", "University", "of", "Paris", "."] + }, + "span_spec": { + "interaction_mode": "interactive", + "label_source": "wikidata", + "enable_relations": true, + "relation_label_source": "wikidata", + "relation_directed": true, + "wikidata_language": "en" + } + } + ``` diff --git a/docs/gallery/.DS_Store b/docs/gallery/.DS_Store new file mode 100644 index 0000000..443a178 Binary files /dev/null and b/docs/gallery/.DS_Store differ diff --git a/docs/gallery/css/gallery.css b/docs/gallery/css/gallery.css new file mode 100644 index 0000000..8bca016 --- /dev/null +++ b/docs/gallery/css/gallery.css @@ -0,0 +1,1323 @@ +/* Gallery demo styles - loaded by each standalone demo HTML page */ + +/* Override jsPsych's html,body { height: 100% } so that + document.documentElement.scrollHeight reflects actual content, + not the iframe viewport size (which causes a resize feedback loop). */ +html, body { + height: auto !important; + min-height: 0 !important; + overflow: hidden !important; +} + +/* Reset and base */ +*, *::before, *::after { + box-sizing: border-box; + margin: 0; + padding: 0; +} + +body { + font-family: "Roboto", -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; + font-size: 15px; + line-height: 1.6; + color: #212121; + background: #fafafa; + padding: 24px; +} + +/* Demo container */ +.gallery-demo { + max-width: 720px; + margin: 0 auto; +} + +/* jspsych overrides for iframe embedding */ +#jspsych-target { + font-family: inherit; +} + +.jspsych-display-element { + font-family: inherit; + font-size: inherit; + min-height: 0 !important; +} + +.jspsych-content-wrapper { + min-height: 0 !important; +} + +.jspsych-content { + max-width: 100% !important; +} + +/* Ensure jsPsych standard plugin buttons are visible */ +.jspsych-html-button-response-btngroup { + display: flex; + justify-content: center; + gap: 8px; + margin-top: 20px; +} + +.jspsych-html-slider-response-container { + margin-top: 16px; +} + +/* ── Rating plugin ───────────────────────────────────── */ + +.bead-rating-container { + text-align: center; + padding: 20px 0; +} + +.bead-rating-prompt { + font-size: 1.1em; + margin-bottom: 24px; + color: #424242; +} + +.bead-rating-scale { + display: flex; + justify-content: center; + gap: 8px; + margin-bottom: 24px; + flex-wrap: wrap; +} + +.bead-rating-option { + display: flex; + flex-direction: column; + align-items: center; + gap: 4px; +} + +.bead-rating-button { + width: 44px; + height: 44px; + border-radius: 50%; + border: 2px solid #bdbdbd; + background: white; + font-size: 1em; + font-weight: 500; + cursor: pointer; + transition: all 0.15s ease; + color: #424242; +} + +.bead-rating-button:hover { + border-color: #5c6bc0; + background: #e8eaf6; +} + +.bead-rating-button.selected { + border-color: #3f51b5; + background: #3f51b5; + color: white; +} + +.bead-rating-label { + font-size: 0.75em; + color: #757575; + max-width: 64px; + text-align: center; +} + +/* ── Forced choice plugin ────────────────────────────── */ + +.bead-forced-choice-container { + padding: 20px 0; +} + +.bead-forced-choice-prompt { + text-align: center; + font-size: 1.1em; + margin-bottom: 24px; + color: #424242; +} + +.bead-forced-choice-alternatives { + display: grid; + gap: 12px; + margin: 0 auto; +} + +.bead-forced-choice-alternatives.bead-layout-horizontal { + grid-template-columns: 1fr 1fr; + max-width: 600px; +} + +.bead-forced-choice-alternatives.bead-layout-vertical { + grid-template-columns: 1fr; + max-width: 480px; +} + +.bead-card { + border: 2px solid #e0e0e0; + border-radius: 8px; + padding: 20px; + background: white; + transition: all 0.15s ease; +} + +.bead-alternative { + cursor: pointer; + text-align: center; +} + +.bead-alternative:hover { + border-color: #5c6bc0; + border-top: 3px solid #5c6bc0; + box-shadow: 0 2px 8px rgba(63, 81, 181, 0.12); +} + +.bead-alternative.selected { + border-color: #3f51b5; + background: #e8eaf6; +} + +.bead-alternative-label { + font-size: 0.8em; + font-weight: 500; + color: #9e9e9e; + text-transform: uppercase; + letter-spacing: 0.5px; + margin-bottom: 8px; +} + +.bead-alternative-content { + font-size: 1em; + color: #212121; + margin-bottom: 12px; + line-height: 1.5; +} + +.bead-choice-button { + display: none; +} + +/* ── Cloze plugin ────────────────────────────────────── */ + +.bead-cloze-container { + padding: 20px 0; + text-align: center; +} + +.bead-cloze-text { + font-size: 1.1em; + line-height: 2; + margin-bottom: 24px; + color: #424242; +} + +.bead-dropdown { + padding: 4px 8px; + border: 2px solid #5c6bc0; + border-radius: 4px; + font-size: 0.95em; + background: #e8eaf6; + cursor: pointer; + appearance: auto; +} + +.bead-text-field { + padding: 4px 8px; + border: 2px solid #5c6bc0; + border-radius: 4px; + font-size: 0.95em; + width: 120px; + text-align: center; +} + +/* ── Binary choice plugin ────────────────────────────── */ + +.bead-binary-choice-container { + text-align: center; + padding: 20px 0; +} + +.bead-binary-choice-prompt { + font-size: 1.1em; + margin-bottom: 16px; + color: #424242; +} + +.bead-binary-choice-stimulus { + font-size: 1.15em; + padding: 16px 24px; + background: white; + border: 1px solid #e0e0e0; + border-left: 3px solid #FFB74D; + border-radius: 6px; + display: inline-block; + margin-bottom: 20px; +} + +.bead-binary-choice-buttons { + display: flex; + justify-content: center; + gap: 12px; +} + +.bead-binary-button { + padding: 10px 32px; + border: 2px solid #e0e0e0; + border-radius: 4px; + background: white; + font-size: 0.95em; + cursor: pointer; + transition: all 0.15s ease; +} + +.bead-binary-button:hover { + border-color: #5c6bc0; + background: #e8eaf6; +} + +.bead-binary-button.selected { + border-color: #3f51b5; + background: #3f51b5; + color: white; +} + +/* ── Slider rating plugin ────────────────────────────── */ + +.bead-slider-container { + text-align: center; + padding: 20px 0; +} + +.bead-slider-prompt { + font-size: 1.1em; + margin-bottom: 24px; + color: #424242; +} + +.bead-slider-wrapper { + max-width: 480px; + margin: 0 auto 16px; +} + +.bead-slider-labels { + display: flex; + justify-content: space-between; + margin-bottom: 8px; + font-size: 0.85em; + color: #757575; +} + +.bead-slider-input { + width: 100%; + margin: 0; + cursor: pointer; + accent-color: #3f51b5; +} + +.bead-slider-value { + margin-top: 8px; + font-size: 1.1em; + font-weight: 500; + color: #3f51b5; +} + +.bead-slider-button-container { + margin-top: 16px; + text-align: center; +} + +/* ── Shared button styles ────────────────────────────── */ + +.bead-button { + padding: 10px 32px; + border: none; + border-radius: 4px; + font-size: 0.95em; + font-weight: 500; + cursor: pointer; + transition: all 0.15s ease; +} + +.bead-continue-button { + background: #3f51b5; + color: white; +} + +.bead-continue-button:hover:not(:disabled) { + background: #303f9f; +} + +.bead-continue-button:disabled { + background: #bdbdbd; + cursor: not-allowed; +} + +.bead-rating-button-container, +.bead-cloze-button-container { + margin-top: 16px; + text-align: center; +} + +/* ── Span labeling ───────────────────────────────────── */ + +.bead-span-label-container { + text-align: left; +} + +.bead-span-label-container > .bead-rating-prompt { + font-size: 0.9em; + color: #757575; + font-weight: 400; + margin-bottom: 16px; + padding-bottom: 12px; + border-bottom: 1px solid #e0e0e0; +} + +.bead-span-container { + display: inline; + line-height: 2.6; + font-size: 1.1em; +} + +.bead-token { + display: inline; + padding: 3px 1px; + border-radius: 4px; + cursor: default; + transition: background-color 0.15s; +} + +.bead-space { + display: inline; +} + +/* Contiguous span positions: first/middle/last get merged radius */ +.bead-token.highlighted.span-single { + border-radius: 4px; + padding: 3px 4px; +} + +.bead-token.highlighted.span-first { + border-radius: 4px 0 0 4px; + padding: 3px 0 3px 4px; +} + +.bead-token.highlighted.span-middle { + border-radius: 0; + padding: 3px 0; +} + +.bead-token.highlighted.span-last { + border-radius: 0 4px 4px 0; + padding: 3px 4px 3px 0; +} + +.bead-space.highlighted { + border-radius: 0; +} + +.bead-token.interactive { + cursor: pointer; +} + +.bead-token.interactive:hover { + background-color: rgba(0, 0, 0, 0.08); +} + +.bead-token.highlighted { + position: relative; +} + +.bead-token.selecting { + background-color: #c8e6c9; + border-radius: 4px; + padding: 3px 4px; +} + +.bead-token.selecting.invalid { + background-color: #ffcdd2; +} + +.bead-label-selector { + display: flex; + flex-wrap: wrap; + gap: 4px; + margin-top: 8px; +} + +.bead-label-button { + padding: 4px 12px; + border-radius: 16px; + border: 1px solid #ccc; + cursor: pointer; + background: white; + font-size: 0.9em; +} + +.bead-label-button:hover, +.bead-label-button.active { + border-color: #1976d2; + background: #e3f2fd; +} + +/* Subscript span labels (positioned below last token of each span) */ +.bead-span-subscript { + position: absolute; + bottom: -0.6rem; + right: -2px; + display: inline-flex; + align-items: center; + gap: 2px; + padding: 0px 5px; + border-radius: 0.6rem; + font-size: 0.6rem; + font-weight: 500; + color: white; + white-space: nowrap; + z-index: 1; + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.15); + line-height: 1.5; + cursor: default; +} + +.bead-subscript-delete { + border: none; + background: none; + color: rgba(255, 255, 255, 0.6); + cursor: pointer; + font-size: 1.1em; + font-weight: bold; + padding: 0 1px; + line-height: 1; +} + +.bead-subscript-delete:hover { + color: white; +} + +/* Legacy span list (for composite demos) */ +.bead-span-list { + display: flex; + flex-wrap: wrap; + gap: 6px; + margin-top: 12px; +} + +.bead-span-badge { + display: inline-flex; + align-items: center; + gap: 4px; + padding: 2px 10px; + border-radius: 12px; + font-size: 0.72em; + font-weight: 500; + color: white; + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.12); + line-height: 1.6; +} + +/* Relation arcs */ +.bead-relation-arc-area { + position: relative; + width: 100%; +} + +.bead-relation-layer { + width: 100%; + pointer-events: none; +} + +.bead-relation-arc { + fill: none; + stroke-width: 1.5; +} + +.bead-relation-label-text { + font-size: 0.75em; + fill: #424242; +} + +.bead-relation-list { + margin-top: 8px; +} + +.bead-relation-entry { + display: flex; + align-items: center; + gap: 4px; + padding: 2px 0; +} + +/* Wikidata autocomplete */ +.bead-wikidata-panel { + flex-direction: column; +} + +.bead-wikidata-search { + position: relative; + width: 100%; + max-width: 400px; +} + +.bead-wikidata-search input { + width: 100%; + padding: 8px 12px; + border: 2px solid #5c6bc0; + border-radius: 4px; + font-size: 0.95em; + outline: none; +} + +.bead-wikidata-search input:focus { + border-color: #3f51b5; + box-shadow: 0 0 0 2px rgba(63, 81, 181, 0.12); +} + +.bead-wikidata-results { + position: absolute; + z-index: 10; + width: 100%; + max-height: 200px; + overflow-y: auto; + border: 1px solid #e0e0e0; + border-radius: 4px; + background: white; + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15); +} + +.bead-wikidata-result { + padding: 6px 8px; + cursor: pointer; +} + +.bead-wikidata-result:hover { + background: #f5f5f5; +} + +.bead-wikidata-result .qid { + color: #9e9e9e; + font-size: 0.85em; +} + +.bead-wikidata-result .description { + color: #757575; + font-size: 0.85em; +} + +/* ── Response display ────────────────────────────────── */ + +.gallery-response { + margin-top: 16px; + padding: 12px 16px; + background: #263238; + color: #b2dfdb; + border-radius: 6px; + font-family: "JetBrains Mono", "Fira Code", monospace; + font-size: 0.82em; + white-space: pre-wrap; + word-break: break-word; + max-height: 200px; + overflow-y: auto; +} + +.gallery-response-label { + font-family: "Roboto", sans-serif; + font-size: 0.75em; + font-weight: 500; + text-transform: uppercase; + letter-spacing: 0.5px; + color: #78909c; + margin-bottom: 4px; +} + +/* Reset button */ +.gallery-reset { + display: inline-block; + margin-top: 12px; + padding: 6px 16px; + border: 1px solid #bdbdbd; + border-radius: 4px; + background: white; + color: #616161; + font-size: 0.85em; + cursor: pointer; + transition: all 0.15s ease; +} + +.gallery-reset:hover { + border-color: #3f51b5; + color: #3f51b5; +} + +/* ── Categorical plugin ─────────────────────────────── */ + +.bead-categorical-container { + text-align: center; + padding: 20px 0; +} + +.bead-categorical-prompt { + font-size: 1.1em; + margin-bottom: 16px; + color: #424242; +} + +.bead-categorical-stimulus { + font-size: 1.15em; + padding: 16px 24px; + background: white; + border: 1px solid #e0e0e0; + border-left: 3px solid #4DB6AC; + border-radius: 6px; + display: inline-block; + margin-bottom: 20px; +} + +.bead-categorical-options { + display: flex; + flex-wrap: wrap; + justify-content: center; + gap: 8px; + margin-bottom: 20px; +} + +.bead-categorical-button { + padding: 10px 24px; + border: 2px solid #e0e0e0; + border-radius: 4px; + background: white; + font-size: 0.95em; + cursor: pointer; + transition: all 0.15s ease; +} + +.bead-categorical-button:hover { + border-color: #5c6bc0; + background: #e8eaf6; +} + +.bead-categorical-button.selected { + border-color: #3f51b5; + background: #3f51b5; + color: white; +} + +.bead-categorical-button-container { + margin-top: 16px; + text-align: center; +} + +/* ── Magnitude plugin ──────────────────────────────── */ + +.bead-magnitude-container { + text-align: center; + padding: 20px 0; +} + +.bead-magnitude-prompt { + font-size: 1.1em; + margin-bottom: 16px; + color: #424242; +} + +.bead-magnitude-stimulus { + font-size: 1.15em; + padding: 16px 24px; + background: white; + border: 1px solid #e0e0e0; + border-left: 3px solid #FF8A65; + border-radius: 6px; + display: inline-block; + margin-bottom: 20px; +} + +.bead-magnitude-input-wrapper { + display: flex; + align-items: center; + justify-content: center; + gap: 8px; + margin-bottom: 20px; +} + +.bead-magnitude-input { + width: 120px; + padding: 8px 12px; + border: 2px solid #bdbdbd; + border-radius: 4px; + font-size: 1.1em; + text-align: center; + outline: none; + transition: border-color 0.15s ease; +} + +.bead-magnitude-input:focus { + border-color: #3f51b5; + box-shadow: 0 0 0 2px rgba(63, 81, 181, 0.12); +} + +.bead-magnitude-unit { + font-size: 1em; + color: #757575; +} + +.bead-magnitude-button-container { + margin-top: 16px; + text-align: center; +} + +/* ── Free text plugin ──────────────────────────────── */ + +.bead-free-text-container { + text-align: center; + padding: 20px 0; +} + +.bead-free-text-prompt { + font-size: 1.1em; + margin-bottom: 16px; + color: #424242; +} + +.bead-free-text-stimulus { + font-size: 1.15em; + padding: 16px 24px; + background: white; + border: 1px solid #e0e0e0; + border-left: 3px solid #81C784; + border-radius: 6px; + display: inline-block; + margin-bottom: 20px; +} + +.bead-free-text-input { + width: 100%; + max-width: 480px; + padding: 8px 12px; + border: 2px solid #bdbdbd; + border-radius: 4px; + font-size: 1em; + font-family: inherit; + outline: none; + transition: border-color 0.15s ease; + resize: vertical; +} + +.bead-free-text-input:focus { + border-color: #3f51b5; + box-shadow: 0 0 0 2px rgba(63, 81, 181, 0.12); +} + +.bead-free-text-counter { + font-size: 0.8em; + color: #9e9e9e; + margin-top: 4px; +} + +.bead-free-text-button-container { + margin-top: 16px; + text-align: center; +} + +/* ── Multi-select plugin ───────────────────────────── */ + +.bead-multi-select-container { + text-align: center; + padding: 20px 0; +} + +.bead-multi-select-prompt { + font-size: 1.1em; + margin-bottom: 16px; + color: #424242; +} + +.bead-multi-select-stimulus { + font-size: 1.15em; + padding: 16px 24px; + background: white; + border: 1px solid #e0e0e0; + border-left: 3px solid #BA68C8; + border-radius: 6px; + display: inline-block; + margin-bottom: 20px; +} + +.bead-multi-select-options { + display: flex; + flex-direction: column; + align-items: flex-start; + gap: 6px; + margin-bottom: 20px; + max-width: 400px; + margin-left: auto; + margin-right: auto; +} + +.bead-multi-select-option { + display: flex; + align-items: center; + gap: 8px; + padding: 8px 16px; + border: 1px solid #e0e0e0; + border-radius: 4px; + background: white; + cursor: pointer; + transition: all 0.15s ease; + min-width: 280px; + text-align: left; +} + +.bead-multi-select-option:hover { + border-color: #5c6bc0; + background: #f5f5f5; +} + +.bead-multi-select-checkbox { + accent-color: #3f51b5; + width: 18px; + height: 18px; + cursor: pointer; +} + +.bead-multi-select-checkbox:disabled { + opacity: 0.4; + cursor: not-allowed; +} + +.bead-multi-select-label { + font-size: 0.95em; + color: #424242; +} + +.bead-multi-select-button-container { + margin-top: 16px; + text-align: center; +} + +/* ── Searchable label selector ─────────────────────── */ + +.bead-label-search-panel { + flex-direction: column; +} + +.bead-label-search-wrapper { + position: relative; + width: 100%; + max-width: 400px; +} + +.bead-label-search-wrapper input { + width: 100%; + padding: 8px 12px; + border: 2px solid #5c6bc0; + border-radius: 4px; + font-size: 0.95em; + outline: none; +} + +.bead-label-search-wrapper input:focus { + border-color: #3f51b5; + box-shadow: 0 0 0 2px rgba(63, 81, 181, 0.12); +} + +.bead-label-search-results { + position: absolute; + z-index: 10; + width: 100%; + max-height: 200px; + overflow-y: auto; + border: 1px solid #e0e0e0; + border-radius: 4px; + background: white; + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15); +} + +.bead-label-search-result { + padding: 6px 12px; + cursor: pointer; + display: flex; + align-items: center; + gap: 8px; +} + +.bead-label-search-result:hover, +.bead-label-search-result.highlighted { + background: #f5f5f5; +} + +.bead-label-search-result .label-color { + width: 10px; + height: 10px; + border-radius: 50%; + flex-shrink: 0; +} + +.bead-label-search-result .label-name { + font-size: 0.95em; + color: #212121; +} + +.bead-label-search-result .label-shortcut { + font-size: 0.8em; + color: #9e9e9e; + margin-left: auto; +} + +/* ── Relation creation UI ──────────────────────────── */ + +.bead-relation-controls { + display: flex; + align-items: center; + gap: 8px; + margin-top: 12px; + flex-wrap: wrap; +} + +.bead-add-relation-button { + padding: 6px 16px; + border: 1px solid #5c6bc0; + border-radius: 4px; + background: white; + color: #5c6bc0; + font-size: 0.85em; + cursor: pointer; + transition: all 0.15s ease; +} + +.bead-add-relation-button:hover { + background: #e8eaf6; +} + +.bead-add-relation-button:disabled { + border-color: #bdbdbd; + color: #bdbdbd; + cursor: not-allowed; +} + +.bead-relation-status { + font-size: 0.85em; + color: #757575; + font-style: italic; +} + +.bead-relation-cancel { + padding: 4px 12px; + border: 1px solid #e57373; + border-radius: 4px; + background: white; + color: #e57373; + font-size: 0.8em; + cursor: pointer; +} + +.bead-relation-cancel:hover { + background: #ffebee; +} + +.bead-span-subscript.relation-source { + outline: 2px solid #ff9800; + outline-offset: 1px; +} + +.bead-span-subscript.relation-target-candidate:hover { + outline: 2px dashed #2196f3; + outline-offset: 1px; +} + +.bead-relation-entry { + font-size: 0.85em; +} + +.bead-relation-delete { + border: none; + background: none; + color: #e57373; + cursor: pointer; + font-size: 1em; + padding: 0 4px; +} + +.bead-relation-delete:hover { + color: #c62828; +} + +/* ── Stimulus display (for composite tasks) ──────────── */ + +.stimulus-container { + padding: 16px; + margin-bottom: 16px; + background: white; + border: 1px solid #e0e0e0; + border-radius: 6px; + font-size: 1.05em; + line-height: 1.8; +} + +.stimulus-container .element-label { + font-size: 0.75em; + font-weight: 500; + text-transform: uppercase; + letter-spacing: 0.5px; + color: #9e9e9e; + margin-bottom: 4px; +} + +/* ── Multi-select compact layout ─────────────────────── */ + +.bead-multi-select-compact { + flex-direction: row; + flex-wrap: wrap; + justify-content: center; + align-items: center; + gap: 8px; +} + +.bead-multi-select-compact .bead-multi-select-option { + min-width: auto; + padding: 8px 14px; +} + +/* ── Magnitude estimation reference + target ─────────── */ + +.bead-magnitude-reference-header { + display: flex; + flex-direction: column; + align-items: center; + margin-bottom: -8px; + position: relative; + z-index: 1; +} + +.bead-magnitude-section-label { + font-size: 0.7em; + text-transform: uppercase; + letter-spacing: 0.5px; + color: #78909C; + font-weight: 500; + margin-bottom: 2px; +} + +.bead-magnitude-reference-chip { + display: inline-block; + background: #1565C0; + color: white; + font-weight: 700; + padding: 2px 12px; + border-radius: 12px; + font-size: 0.85em; + line-height: 1.4; +} + +.bead-magnitude-reference { + border: 1.5px dashed #B0BEC5; + background: #ECEFF1; + font-size: 0.95em; + padding: 16px 24px 12px; + border-radius: 6px; + margin-bottom: 12px; + display: inline-block; +} + +.bead-magnitude-reference-text { + font-size: 1.05em; + color: #546E7A; +} + +/* ── Magnitude exponential slider ────────────────────── */ + +.bead-magnitude-slider-wrapper { + max-width: 520px; + margin: 0 auto 16px; +} + +.bead-magnitude-slider-value { + text-align: center; + font-size: 2em; + font-weight: 700; + color: #3f51b5; + margin-bottom: 12px; + min-height: 1.2em; + font-variant-numeric: tabular-nums; +} + +.bead-magnitude-slider-track-area { + display: flex; + align-items: center; + gap: 8px; +} + +.bead-magnitude-slider-endpoint { + font-size: 0.9em; + color: #757575; + flex-shrink: 0; + width: 24px; + text-align: center; + user-select: none; +} + +.bead-magnitude-slider-right { + font-size: 1.2em; +} + +.bead-magnitude-slider-track { + position: relative; + flex: 1; + height: 8px; + background: #e0e0e0; + border-radius: 4px; + cursor: pointer; + outline: none; +} + +.bead-magnitude-slider-track:focus-visible { + box-shadow: 0 0 0 3px rgba(63, 81, 181, 0.25); +} + +.bead-magnitude-slider-fill { + position: absolute; + top: 0; + left: 0; + height: 100%; + background: #3f51b5; + border-radius: 4px 0 0 4px; + pointer-events: none; +} + +.bead-magnitude-slider-ref-tick { + position: absolute; + top: -6px; + bottom: -6px; + width: 2px; + background: #1565C0; + transform: translateX(-50%); + pointer-events: none; +} + +.bead-magnitude-slider-ref-label { + position: absolute; + top: -18px; + left: 50%; + transform: translateX(-50%); + font-size: 0.7em; + color: #1565C0; + font-weight: 500; + white-space: nowrap; + user-select: none; +} + +.bead-magnitude-slider-handle { + position: absolute; + top: 50%; + width: 22px; + height: 22px; + background: #3f51b5; + border: 2px solid white; + border-radius: 50%; + transform: translate(-50%, -50%); + cursor: grab; + box-shadow: 0 1px 4px rgba(0, 0, 0, 0.3); +} + +.bead-magnitude-slider-handle:active { + cursor: grabbing; + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.4); + transform: translate(-50%, -50%) scale(1.1); +} + +.bead-magnitude-slider-handle.hidden { + display: none; +} + +/* ── Rating / Slider stimulus ────────────────────────── */ + +.bead-rating-stimulus { + font-size: 1.15em; + padding: 16px 24px; + background: white; + border: 1px solid #e0e0e0; + border-left: 3px solid #7986CB; + border-radius: 6px; + display: inline-block; + margin-bottom: 20px; +} + +.bead-slider-stimulus { + font-size: 1.15em; + padding: 16px 24px; + background: white; + border: 1px solid #e0e0e0; + border-left: 3px solid #9FA8DA; + border-radius: 6px; + display: inline-block; + margin-bottom: 20px; +} + +/* ── Span label search: always-visible + cancel ──────── */ + +.bead-label-search-panel.bead-search-disabled { + opacity: 0.4; + pointer-events: none; +} + +.bead-label-search-panel.bead-search-disabled input { + cursor: not-allowed; +} + +.bead-search-cancel { + position: absolute; + right: 8px; + top: 50%; + transform: translateY(-50%); + border: none; + background: none; + color: #9e9e9e; + cursor: pointer; + font-size: 1.1em; + padding: 2px 4px; + line-height: 1; +} + +.bead-search-cancel:hover { + color: #e57373; +} + +/* ── Span bottom bar (search + continue inline) ──────── */ + +.bead-span-bottom-bar { + display: flex; + align-items: center; + gap: 12px; + margin-top: 12px; +} + +.bead-span-bottom-bar .bead-label-search-panel { + flex: 1; + margin-top: 0; +} + +.bead-span-bottom-bar .bead-span-bottom-spacer { + flex: 1; +} + +/* ── Question highlighting (for span+task demos) ─────── */ + +.bead-q-highlight { + position: relative; + padding: 1px 4px; + border-radius: 3px; + font-weight: 500; + margin-bottom: 0.6rem; +} + +.bead-q-chip { + position: absolute; + bottom: -0.6rem; + right: -2px; + display: inline-flex; + align-items: center; + padding: 0px 5px; + border-radius: 0.6rem; + font-size: 0.6rem; + font-weight: 500; + color: white; + white-space: nowrap; + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.15); + line-height: 1.5; +} diff --git a/docs/gallery/demos/binary-choice.html b/docs/gallery/demos/binary-choice.html new file mode 100644 index 0000000..1eaaeaa --- /dev/null +++ b/docs/gallery/demos/binary-choice.html @@ -0,0 +1,52 @@ + + + + + + Binary Judgment + + + + + + + + + + + + diff --git a/docs/gallery/demos/categorical.html b/docs/gallery/demos/categorical.html new file mode 100644 index 0000000..89c9fe6 --- /dev/null +++ b/docs/gallery/demos/categorical.html @@ -0,0 +1,52 @@ + + + + + + Categorical Classification + + + + + + + + + + + + diff --git a/docs/gallery/demos/cloze-dropdown.html b/docs/gallery/demos/cloze-dropdown.html new file mode 100644 index 0000000..be0e684 --- /dev/null +++ b/docs/gallery/demos/cloze-dropdown.html @@ -0,0 +1,55 @@ + + + + + + Cloze (Fill-in-the-Blank) + + + + + + + + + + + + diff --git a/docs/gallery/demos/forced-choice.html b/docs/gallery/demos/forced-choice.html new file mode 100644 index 0000000..ead9212 --- /dev/null +++ b/docs/gallery/demos/forced-choice.html @@ -0,0 +1,55 @@ + + + + + + Forced Choice + + + + + + + + + + + + diff --git a/docs/gallery/demos/free-text.html b/docs/gallery/demos/free-text.html new file mode 100644 index 0000000..e5a9b01 --- /dev/null +++ b/docs/gallery/demos/free-text.html @@ -0,0 +1,56 @@ + + + + + + Free Text Response + + + + + + + + + + + + diff --git a/docs/gallery/demos/magnitude.html b/docs/gallery/demos/magnitude.html new file mode 100644 index 0000000..8eb3bd9 --- /dev/null +++ b/docs/gallery/demos/magnitude.html @@ -0,0 +1,55 @@ + + + + + + Magnitude Estimation + + + + + + + + + + + + diff --git a/docs/gallery/demos/multi-select.html b/docs/gallery/demos/multi-select.html new file mode 100644 index 0000000..5fdd440 --- /dev/null +++ b/docs/gallery/demos/multi-select.html @@ -0,0 +1,58 @@ + + + + + + Multi-Select + + + + + + + + + + + + diff --git a/docs/gallery/demos/rating-likert.html b/docs/gallery/demos/rating-likert.html new file mode 100644 index 0000000..b33aa04 --- /dev/null +++ b/docs/gallery/demos/rating-likert.html @@ -0,0 +1,55 @@ + + + + + + Likert Rating Scale + + + + + + + + + + + + diff --git a/docs/gallery/demos/rating-slider.html b/docs/gallery/demos/rating-slider.html new file mode 100644 index 0000000..24708ae --- /dev/null +++ b/docs/gallery/demos/rating-slider.html @@ -0,0 +1,55 @@ + + + + + + Slider Rating + + + + + + + + + + + + diff --git a/docs/gallery/demos/span-interactive.html b/docs/gallery/demos/span-interactive.html new file mode 100644 index 0000000..e24da8b --- /dev/null +++ b/docs/gallery/demos/span-interactive.html @@ -0,0 +1,73 @@ + + + + + + Interactive Span Labeling + + + + + + + + + + + + diff --git a/docs/gallery/demos/span-relations-fixed.html b/docs/gallery/demos/span-relations-fixed.html new file mode 100644 index 0000000..02ae805 --- /dev/null +++ b/docs/gallery/demos/span-relations-fixed.html @@ -0,0 +1,74 @@ + + + + + + Span Relations (Fixed Labels) + + + + + + + + + + + + diff --git a/docs/gallery/demos/span-relations-wikidata.html b/docs/gallery/demos/span-relations-wikidata.html new file mode 100644 index 0000000..ae86ff5 --- /dev/null +++ b/docs/gallery/demos/span-relations-wikidata.html @@ -0,0 +1,73 @@ + + + + + + Span Relations (Wikidata) + + + + + + + + + + + + diff --git a/docs/gallery/demos/span-wikidata.html b/docs/gallery/demos/span-wikidata.html new file mode 100644 index 0000000..5995ad5 --- /dev/null +++ b/docs/gallery/demos/span-wikidata.html @@ -0,0 +1,73 @@ + + + + + + Wikidata Entity Labeling + + + + + + + + + + + + diff --git a/docs/gallery/demos/span-with-binary.html b/docs/gallery/demos/span-with-binary.html new file mode 100644 index 0000000..de86902 --- /dev/null +++ b/docs/gallery/demos/span-with-binary.html @@ -0,0 +1,164 @@ + + + + + + Span + Binary Judgment + + + + + + + + + + + + diff --git a/docs/gallery/demos/span-with-choice.html b/docs/gallery/demos/span-with-choice.html new file mode 100644 index 0000000..c64d622 --- /dev/null +++ b/docs/gallery/demos/span-with-choice.html @@ -0,0 +1,168 @@ + + + + + + Span + Forced Choice + + + + + + + + + + + + diff --git a/docs/gallery/demos/span-with-freetext.html b/docs/gallery/demos/span-with-freetext.html new file mode 100644 index 0000000..6acec71 --- /dev/null +++ b/docs/gallery/demos/span-with-freetext.html @@ -0,0 +1,162 @@ + + + + + + Span + Free Text + + + + + + + + + + + + diff --git a/docs/gallery/demos/span-with-rating.html b/docs/gallery/demos/span-with-rating.html new file mode 100644 index 0000000..28abe3c --- /dev/null +++ b/docs/gallery/demos/span-with-rating.html @@ -0,0 +1,166 @@ + + + + + + Span + Likert Rating + + + + + + + + + + + + diff --git a/docs/gallery/demos/span-with-slider.html b/docs/gallery/demos/span-with-slider.html new file mode 100644 index 0000000..d7f853d --- /dev/null +++ b/docs/gallery/demos/span-with-slider.html @@ -0,0 +1,154 @@ + + + + + + Span + Slider Rating + + + + + + + + + + + + diff --git a/docs/gallery/js/gallery-bundle.js b/docs/gallery/js/gallery-bundle.js new file mode 100644 index 0000000..4a050cc --- /dev/null +++ b/docs/gallery/js/gallery-bundle.js @@ -0,0 +1,2684 @@ +(function () { + 'use strict'; + + /* @bead/jspsych-gallery - Interactive demo bundle */ + var __defProp = Object.defineProperty; + var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value; + var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value); + + // src/plugins/binary-choice.ts + var info = { + name: "bead-binary-choice", + parameters: { + prompt: { + type: 8, + // ParameterType.HTML_STRING + default: "Is this sentence acceptable?" + }, + stimulus: { + type: 8, + // ParameterType.HTML_STRING + default: "" + }, + choices: { + type: 1, + // ParameterType.STRING + default: ["Yes", "No"], + array: true + }, + prompt_position: { + type: 1, + // ParameterType.STRING + default: "above" + }, + require_response: { + type: 0, + // ParameterType.BOOL + default: true + }, + metadata: { + type: 12, + // ParameterType.OBJECT + default: {} + } + } + }; + var BeadBinaryChoicePlugin = class { + constructor(jsPsych) { + __publicField(this, "jsPsych"); + this.jsPsych = jsPsych; + } + trial(display_element, trial) { + let response_index = null; + let rt = null; + const start_time = performance.now(); + let html = '
'; + if (trial.prompt && trial.prompt_position === "above") { + html += `
${trial.prompt}
`; + } + if (trial.stimulus) { + html += `
${trial.stimulus}
`; + } + if (trial.prompt && trial.prompt_position === "below") { + html += `
${trial.prompt}
`; + } + html += '
'; + for (let i = 0; i < trial.choices.length; i++) { + html += ``; + } + html += "
"; + html += "
"; + display_element.innerHTML = html; + const buttons = display_element.querySelectorAll(".bead-binary-button"); + for (const button of buttons) { + button.addEventListener("click", (e) => { + const target = e.target; + const indexAttr = target.getAttribute("data-index"); + if (indexAttr !== null) { + select_choice(Number.parseInt(indexAttr, 10)); + } + }); + } + const select_choice = (index) => { + response_index = index; + rt = performance.now() - start_time; + for (const btn of buttons) { + btn.classList.remove("selected"); + } + const selected = display_element.querySelector( + `.bead-binary-button[data-index="${index}"]` + ); + if (selected) { + selected.classList.add("selected"); + } + setTimeout(() => { + end_trial(); + }, 200); + }; + const end_trial = () => { + const trial_data = { + ...trial.metadata, + response: response_index, + response_label: response_index !== null ? trial.choices[response_index] : null, + rt + }; + display_element.innerHTML = ""; + this.jsPsych.finishTrial(trial_data); + }; + } + }; + __publicField(BeadBinaryChoicePlugin, "info", info); + + // src/plugins/categorical.ts + var info2 = { + name: "bead-categorical", + parameters: { + prompt: { + type: 8, + // ParameterType.HTML_STRING + default: "Select a category:" + }, + stimulus: { + type: 8, + // ParameterType.HTML_STRING + default: "" + }, + categories: { + type: 1, + // ParameterType.STRING + default: [], + array: true + }, + prompt_position: { + type: 1, + // ParameterType.STRING + default: "above" + }, + require_response: { + type: 0, + // ParameterType.BOOL + default: true + }, + button_label: { + type: 1, + // ParameterType.STRING + default: "Continue" + }, + metadata: { + type: 12, + // ParameterType.OBJECT + default: {} + } + } + }; + var BeadCategoricalPlugin = class { + constructor(jsPsych) { + __publicField(this, "jsPsych"); + this.jsPsych = jsPsych; + } + trial(display_element, trial) { + let selected_index = null; + const start_time = performance.now(); + let html = '
'; + if (trial.prompt && trial.prompt_position === "above") { + html += `
${trial.prompt}
`; + } + if (trial.stimulus) { + html += `
${trial.stimulus}
`; + } + if (trial.prompt && trial.prompt_position === "below") { + html += `
${trial.prompt}
`; + } + html += '
'; + for (let i = 0; i < trial.categories.length; i++) { + html += ``; + } + html += "
"; + const disabled = trial.require_response ? "disabled" : ""; + html += ` +
+ +
+ `; + html += "
"; + display_element.innerHTML = html; + const buttons = display_element.querySelectorAll(".bead-categorical-button"); + const continueBtn = display_element.querySelector( + "#bead-categorical-continue" + ); + for (const button of buttons) { + button.addEventListener("click", (e) => { + const target = e.currentTarget; + const indexAttr = target.getAttribute("data-index"); + if (indexAttr !== null) { + selected_index = Number.parseInt(indexAttr, 10); + for (const btn of buttons) { + btn.classList.remove("selected"); + } + target.classList.add("selected"); + if (continueBtn) { + continueBtn.disabled = false; + } + } + }); + } + if (continueBtn) { + continueBtn.addEventListener("click", () => { + if (!trial.require_response || selected_index !== null) { + end_trial(); + } + }); + } + const end_trial = () => { + const rt = performance.now() - start_time; + const trial_data = { + ...trial.metadata, + response: selected_index !== null ? trial.categories[selected_index] : null, + response_index: selected_index, + rt + }; + display_element.innerHTML = ""; + this.jsPsych.finishTrial(trial_data); + }; + } + }; + __publicField(BeadCategoricalPlugin, "info", info2); + + // src/plugins/cloze-dropdown.ts + var info3 = { + name: "bead-cloze-multi", + parameters: { + text: { + type: 8, + // ParameterType.HTML_STRING + default: null + }, + fields: { + type: 13, + // ParameterType.COMPLEX + default: [], + array: true + }, + require_all: { + type: 0, + // ParameterType.BOOL + default: true + }, + button_label: { + type: 1, + // ParameterType.STRING + default: "Continue" + }, + metadata: { + type: 12, + // ParameterType.OBJECT + default: {} + } + } + }; + var BeadClozeMultiPlugin = class { + constructor(jsPsych) { + __publicField(this, "jsPsych"); + this.jsPsych = jsPsych; + } + trial(display_element, trial) { + const responses = {}; + const response_times = {}; + const field_start_times = {}; + const start_time = performance.now(); + if (trial.fields.length === 0 && trial.metadata.unfilled_slots) { + trial.fields = trial.metadata.unfilled_slots.map((slot) => ({ + slot_name: slot.slot_name, + position: slot.position, + type: slot.constraint_ids.length > 0 ? "dropdown" : "text", + options: [], + // Would be populated from constraints in real implementation + placeholder: slot.slot_name + })); + } + let html = '
'; + if (trial.text) { + let processed_text = trial.text; + trial.fields.forEach((field, index) => { + const field_id = `bead-cloze-field-${index}`; + let field_html; + if (field.type === "dropdown" && field.options && field.options.length > 0) { + const optionsHtml = field.options.map((opt) => ``).join(""); + field_html = ` + + `; + } else { + field_html = ` + + `; + } + const placeholder = field.slot_name ? `{{${field.slot_name}}}` : "%%"; + processed_text = processed_text.replace(placeholder, field_html); + }); + html += `
${processed_text}
`; + } + html += ` +
+ +
+ `; + html += "
"; + display_element.innerHTML = html; + const input_fields = display_element.querySelectorAll( + ".bead-cloze-field" + ); + for (const field of input_fields) { + const field_index = field.getAttribute("data-field"); + if (field_index === null) continue; + field.addEventListener("focus", () => { + if (field_start_times[field_index] === void 0) { + field_start_times[field_index] = performance.now(); + } + }); + field.addEventListener("change", () => { + responses[field_index] = field.value; + const startTime = field_start_times[field_index]; + if (startTime !== void 0) { + response_times[field_index] = performance.now() - startTime; + } + check_completion(); + }); + field.addEventListener("input", () => { + responses[field_index] = field.value; + check_completion(); + }); + } + const continue_button = display_element.querySelector("#bead-cloze-continue"); + if (continue_button) { + continue_button.addEventListener("click", () => { + end_trial(); + }); + } + const check_completion = () => { + if (trial.require_all && continue_button) { + const all_filled = trial.fields.every((_field, index) => { + const response = responses[index.toString()]; + return response !== void 0 && response.trim() !== ""; + }); + continue_button.disabled = !all_filled; + } + }; + const end_trial = () => { + const trial_data = { + ...trial.metadata, + // Preserve all metadata + responses, + response_times, + total_rt: performance.now() - start_time + }; + display_element.innerHTML = ""; + this.jsPsych.finishTrial(trial_data); + }; + } + }; + __publicField(BeadClozeMultiPlugin, "info", info3); + + // src/plugins/forced-choice.ts + var info4 = { + name: "bead-forced-choice", + parameters: { + prompt: { + type: 8, + // ParameterType.HTML_STRING + default: "Which do you prefer?" + }, + alternatives: { + type: 1, + // ParameterType.STRING + default: [], + array: true + }, + layout: { + type: 1, + // ParameterType.STRING + default: "horizontal" + }, + randomize_position: { + type: 0, + // ParameterType.BOOL + default: true + }, + enable_keyboard: { + type: 0, + // ParameterType.BOOL + default: true + }, + require_response: { + type: 0, + // ParameterType.BOOL + default: true + }, + button_label: { + type: 1, + // ParameterType.STRING + default: "Continue" + }, + metadata: { + type: 12, + // ParameterType.OBJECT + default: {} + } + } + }; + var BeadForcedChoicePlugin = class { + constructor(jsPsych) { + __publicField(this, "jsPsych"); + this.jsPsych = jsPsych; + } + trial(display_element, trial) { + const response = { + choice: null, + choice_index: null, + position: null, + rt: null + }; + const start_time = performance.now(); + let left_index = 0; + let right_index = 1; + if (trial.randomize_position && Math.random() < 0.5) { + left_index = 1; + right_index = 0; + } + let html = '
'; + if (trial.prompt) { + html += `
${trial.prompt}
`; + } + html += `
`; + html += ` +
+
Option 1
+
${trial.alternatives[left_index] ?? "Alternative A"}
+ +
+ `; + html += ` +
+
Option 2
+
${trial.alternatives[right_index] ?? "Alternative B"}
+ +
+ `; + html += "
"; + html += "
"; + display_element.innerHTML = html; + const alternative_cards = display_element.querySelectorAll(".bead-alternative"); + for (const card of alternative_cards) { + card.addEventListener("click", () => { + const indexAttr = card.getAttribute("data-index"); + const positionAttr = card.getAttribute("data-position"); + if (indexAttr !== null && positionAttr !== null) { + const index = Number.parseInt(indexAttr, 10); + select_choice(index, positionAttr); + } + }); + } + let keyboard_listener = null; + if (trial.enable_keyboard) { + keyboard_listener = this.jsPsych.pluginAPI.getKeyboardResponse({ + callback_function: (info11) => { + const key = info11.key; + if (key === "1" || key === "ArrowLeft") { + select_choice(left_index, "left"); + } else if (key === "2" || key === "ArrowRight") { + select_choice(right_index, "right"); + } + }, + valid_responses: ["1", "2", "ArrowLeft", "ArrowRight"], + rt_method: "performance", + persist: false, + allow_held_key: false + }); + } + const select_choice = (index, position) => { + response.choice = trial.alternatives[index] ?? null; + response.choice_index = index; + response.position = position; + response.rt = performance.now() - start_time; + const alternative_cards2 = display_element.querySelectorAll(".bead-alternative"); + for (const card of alternative_cards2) { + card.classList.remove("selected"); + } + const selected_card = display_element.querySelector( + `.bead-alternative[data-position="${position}"]` + ); + if (selected_card) { + selected_card.classList.add("selected"); + } + setTimeout(() => { + end_trial(); + }, 300); + }; + const end_trial = () => { + if (keyboard_listener) { + this.jsPsych.pluginAPI.cancelKeyboardResponse(keyboard_listener); + } + const trial_data = { + ...trial.metadata, + // Spread all metadata + choice: response.choice, + choice_index: response.choice_index, + position_chosen: response.position, + left_index, + right_index, + rt: response.rt + }; + display_element.innerHTML = ""; + this.jsPsych.finishTrial(trial_data); + }; + } + }; + __publicField(BeadForcedChoicePlugin, "info", info4); + + // src/plugins/free-text.ts + var info5 = { + name: "bead-free-text", + parameters: { + prompt: { + type: 8, + // ParameterType.HTML_STRING + default: "Enter your response:" + }, + stimulus: { + type: 8, + // ParameterType.HTML_STRING + default: "" + }, + prompt_position: { + type: 1, + // ParameterType.STRING + default: "above" + }, + multiline: { + type: 0, + // ParameterType.BOOL + default: false + }, + min_length: { + type: 2, + // ParameterType.INT + default: 0 + }, + max_length: { + type: 2, + // ParameterType.INT + default: 0 + }, + placeholder: { + type: 1, + // ParameterType.STRING + default: "" + }, + rows: { + type: 2, + // ParameterType.INT + default: 4 + }, + require_response: { + type: 0, + // ParameterType.BOOL + default: true + }, + button_label: { + type: 1, + // ParameterType.STRING + default: "Continue" + }, + metadata: { + type: 12, + // ParameterType.OBJECT + default: {} + } + } + }; + var BeadFreeTextPlugin = class { + constructor(jsPsych) { + __publicField(this, "jsPsych"); + this.jsPsych = jsPsych; + } + trial(display_element, trial) { + const start_time = performance.now(); + let html = '
'; + if (trial.prompt && trial.prompt_position === "above") { + html += `
${trial.prompt}
`; + } + if (trial.stimulus) { + html += `
${trial.stimulus}
`; + } + if (trial.prompt && trial.prompt_position === "below") { + html += `
${trial.prompt}
`; + } + const maxAttr = trial.max_length > 0 ? ` maxlength="${trial.max_length}"` : ""; + const placeholderAttr = trial.placeholder ? ` placeholder="${trial.placeholder}"` : ""; + if (trial.multiline) { + html += ``; + } else { + html += ``; + } + if (trial.max_length > 0) { + html += `
0 / ${trial.max_length}
`; + } + const disabled = trial.require_response ? "disabled" : ""; + html += ` +
+ +
+ `; + html += "
"; + display_element.innerHTML = html; + const input = display_element.querySelector( + "#bead-free-text-input" + ); + const continueBtn = display_element.querySelector( + "#bead-free-text-continue" + ); + const charCount = display_element.querySelector("#bead-char-count"); + if (input) { + input.addEventListener("input", () => { + const len = input.value.length; + if (charCount) charCount.textContent = String(len); + if (continueBtn) { + const meetsMin = len >= trial.min_length; + const hasContent = input.value.trim().length > 0; + continueBtn.disabled = trial.require_response && (!hasContent || !meetsMin); + } + }); + input.focus(); + } + if (continueBtn) { + continueBtn.addEventListener("click", () => { + end_trial(); + }); + } + const end_trial = () => { + const rt = performance.now() - start_time; + const trial_data = { + ...trial.metadata, + response: input ? input.value : "", + rt + }; + display_element.innerHTML = ""; + this.jsPsych.finishTrial(trial_data); + }; + } + }; + __publicField(BeadFreeTextPlugin, "info", info5); + + // src/plugins/magnitude.ts + function computeXMax(referenceValue) { + return 3 * 100 * Math.log(referenceValue + 1); + } + function xToValue(x) { + if (x <= 0) return 0; + return Math.exp(x / 100) - 1; + } + function formatValue(value) { + if (value >= 1e6) return "\u221E"; + if (value >= 1e4) return Math.round(value).toLocaleString(); + if (value >= 100) return Math.round(value).toString(); + if (value >= 10) return value.toFixed(1); + if (value >= 1) return value.toFixed(2); + if (value > 0) return value.toFixed(3); + return "0"; + } + var info6 = { + name: "bead-magnitude", + parameters: { + prompt: { + type: 8, + // ParameterType.HTML_STRING + default: "Enter a value:" + }, + stimulus: { + type: 8, + // ParameterType.HTML_STRING + default: "" + }, + prompt_position: { + type: 1, + // ParameterType.STRING + default: "above" + }, + reference_stimulus: { + type: 8, + // ParameterType.HTML_STRING + default: "" + }, + reference_value: { + type: 2, + // ParameterType.INT + default: 100 + }, + unit: { + type: 1, + // ParameterType.STRING + default: "" + }, + input_mode: { + type: 1, + // ParameterType.STRING + default: "number" + }, + arrow_step: { + type: 3, + // ParameterType.FLOAT + default: 3 + }, + slider_start: { + type: 3, + // ParameterType.FLOAT + default: null + }, + input_min: { + type: 3, + // ParameterType.FLOAT + default: null + }, + input_max: { + type: 3, + // ParameterType.FLOAT + default: null + }, + step: { + type: 3, + // ParameterType.FLOAT + default: null + }, + placeholder: { + type: 1, + // ParameterType.STRING + default: "" + }, + require_response: { + type: 0, + // ParameterType.BOOL + default: true + }, + button_label: { + type: 1, + // ParameterType.STRING + default: "Continue" + }, + metadata: { + type: 12, + // ParameterType.OBJECT + default: {} + } + } + }; + var BeadMagnitudePlugin = class { + constructor(jsPsych) { + __publicField(this, "jsPsych"); + this.jsPsych = jsPsych; + } + trial(display_element, trial) { + const start_time = performance.now(); + const hasReference = trial.reference_stimulus !== ""; + let html = '
'; + if (trial.prompt && trial.prompt_position === "above") { + html += `
${trial.prompt}
`; + } + if (hasReference) { + html += '
'; + html += ''; + html += `
${trial.reference_value}
`; + html += "
"; + html += '
'; + html += `
${trial.reference_stimulus}
`; + html += "
"; + } + if (trial.stimulus) { + if (hasReference) { + html += ''; + } + html += `
${trial.stimulus}
`; + } + if (trial.prompt && trial.prompt_position === "below") { + html += `
${trial.prompt}
`; + } + if (trial.input_mode === "exp-slider") { + html += this.buildExpSliderHTML(trial); + } else { + html += this.buildNumberInputHTML(trial); + } + const disabled = trial.require_response ? "disabled" : ""; + html += ` +
+ +
+ `; + html += "
"; + display_element.innerHTML = html; + if (trial.input_mode === "exp-slider") { + this.setupExpSlider(display_element, trial, start_time, hasReference); + } else { + this.setupNumberInput(display_element, trial, start_time, hasReference); + } + } + // ── Number input (existing behavior) ──────────────────────────── + buildNumberInputHTML(trial) { + let html = '
'; + html += '${trial.unit}`; + } + html += "
"; + return html; + } + setupNumberInput(display_element, trial, start_time, hasReference) { + const input = display_element.querySelector("#bead-magnitude-input"); + const continueBtn = display_element.querySelector( + "#bead-magnitude-continue" + ); + if (input) { + input.addEventListener("input", () => { + if (continueBtn) { + continueBtn.disabled = trial.require_response && input.value.trim() === ""; + } + }); + input.focus(); + } + if (continueBtn) { + continueBtn.addEventListener("click", () => { + if (!trial.require_response || input && input.value.trim() !== "") { + end_trial(); + } + }); + } + const end_trial = () => { + const rt = performance.now() - start_time; + const value = input ? Number.parseFloat(input.value) : null; + const trial_data = { + ...trial.metadata, + response: Number.isNaN(value ?? Number.NaN) ? null : value, + rt + }; + if (hasReference) { + trial_data["reference_value"] = trial.reference_value; + } + display_element.innerHTML = ""; + this.jsPsych.finishTrial(trial_data); + }; + } + // ── Exponential slider ────────────────────────────────────────── + buildExpSliderHTML(trial) { + let html = '
'; + html += '
'; + html += trial.slider_start !== null ? formatValue(xToValue(trial.slider_start)) : "--"; + html += "
"; + html += '
'; + html += '0'; + html += '
`; + html += '
'; + html += `${trial.reference_value}`; + html += "
"; + const handleClass = trial.slider_start !== null ? "bead-magnitude-slider-handle" : "bead-magnitude-slider-handle hidden"; + html += `
`; + html += "
"; + html += '\u221E'; + html += "
"; + html += "
"; + return html; + } + setupExpSlider(display_element, trial, start_time, hasReference) { + const xMax = computeXMax(trial.reference_value); + let currentX = trial.slider_start ?? -1; + let hasInteracted = currentX >= 0; + const track = display_element.querySelector("#bead-magnitude-slider-track"); + const handle = display_element.querySelector("#bead-magnitude-slider-handle"); + const fill = display_element.querySelector("#bead-magnitude-slider-fill"); + const valueDisplay = display_element.querySelector( + "#bead-magnitude-slider-value" + ); + const continueBtn = display_element.querySelector( + "#bead-magnitude-continue" + ); + if (!track || !handle || !fill || !valueDisplay) return; + const updateUI = () => { + if (currentX < 0) return; + const pct = currentX / xMax * 100; + handle.style.left = `${pct}%`; + fill.style.width = `${pct}%`; + const value = xToValue(currentX); + let displayText = formatValue(value); + if (trial.unit) { + displayText += ` ${trial.unit}`; + } + valueDisplay.textContent = displayText; + track.setAttribute("aria-valuenow", String(Math.round(value))); + if (continueBtn && trial.require_response) { + continueBtn.disabled = false; + } + }; + const setPosition = (x) => { + currentX = Math.max(0, Math.min(xMax, x)); + if (!hasInteracted) { + hasInteracted = true; + handle.classList.remove("hidden"); + } + updateUI(); + }; + if (hasInteracted) { + updateUI(); + } + const onMouseDown = (e) => { + e.preventDefault(); + const rect = track.getBoundingClientRect(); + const px = e.clientX - rect.left; + const x = px / rect.width * xMax; + setPosition(x); + track.focus(); + const onMouseMove = (ev) => { + const movePx = ev.clientX - rect.left; + setPosition(movePx / rect.width * xMax); + }; + const onMouseUp = () => { + document.removeEventListener("mousemove", onMouseMove); + document.removeEventListener("mouseup", onMouseUp); + }; + document.addEventListener("mousemove", onMouseMove); + document.addEventListener("mouseup", onMouseUp); + }; + track.addEventListener("mousedown", onMouseDown); + const onTouchStart = (e) => { + e.preventDefault(); + const rect = track.getBoundingClientRect(); + const touch = e.touches[0]; + if (!touch) return; + const px = touch.clientX - rect.left; + setPosition(px / rect.width * xMax); + track.focus(); + const onTouchMove = (ev) => { + const t = ev.touches[0]; + if (!t) return; + const movePx = t.clientX - rect.left; + setPosition(movePx / rect.width * xMax); + }; + const onTouchEnd = () => { + document.removeEventListener("touchmove", onTouchMove); + document.removeEventListener("touchend", onTouchEnd); + }; + document.addEventListener("touchmove", onTouchMove, { passive: false }); + document.addEventListener("touchend", onTouchEnd); + }; + track.addEventListener("touchstart", onTouchStart, { passive: false }); + track.addEventListener("keydown", (e) => { + if (e.key === "ArrowRight" || e.key === "ArrowUp") { + e.preventDefault(); + if (!hasInteracted) { + setPosition(xMax / 3); + } else { + setPosition(currentX + trial.arrow_step); + } + } else if (e.key === "ArrowLeft" || e.key === "ArrowDown") { + e.preventDefault(); + if (!hasInteracted) { + setPosition(xMax / 3); + } else { + setPosition(currentX - trial.arrow_step); + } + } else if (e.key === "Home") { + e.preventDefault(); + setPosition(0); + } else if (e.key === "End") { + e.preventDefault(); + setPosition(xMax); + } + }); + track.focus(); + if (continueBtn) { + continueBtn.addEventListener("click", () => { + if (!trial.require_response || hasInteracted) { + end_trial(); + } + }); + } + const end_trial = () => { + const rt = performance.now() - start_time; + const value = hasInteracted ? xToValue(currentX) : null; + const trial_data = { + ...trial.metadata, + response: value !== null && Number.isFinite(value) ? Math.round(value * 1e3) / 1e3 : null, + response_x: hasInteracted ? Math.round(currentX * 100) / 100 : null, + rt + }; + if (hasReference) { + trial_data["reference_value"] = trial.reference_value; + } + display_element.innerHTML = ""; + this.jsPsych.finishTrial(trial_data); + }; + } + }; + __publicField(BeadMagnitudePlugin, "info", info6); + + // src/plugins/multi-select.ts + var info7 = { + name: "bead-multi-select", + parameters: { + prompt: { + type: 8, + // ParameterType.HTML_STRING + default: "Select all that apply:" + }, + stimulus: { + type: 8, + // ParameterType.HTML_STRING + default: "" + }, + prompt_position: { + type: 1, + // ParameterType.STRING + default: "above" + }, + options: { + type: 1, + // ParameterType.STRING + default: [], + array: true + }, + min_selections: { + type: 2, + // ParameterType.INT + default: 1 + }, + max_selections: { + type: 2, + // ParameterType.INT + default: 0 + }, + require_response: { + type: 0, + // ParameterType.BOOL + default: true + }, + button_label: { + type: 1, + // ParameterType.STRING + default: "Continue" + }, + metadata: { + type: 12, + // ParameterType.OBJECT + default: {} + } + } + }; + var BeadMultiSelectPlugin = class { + constructor(jsPsych) { + __publicField(this, "jsPsych"); + this.jsPsych = jsPsych; + } + trial(display_element, trial) { + const start_time = performance.now(); + const maxLen = Math.max(...trial.options.map((o) => o.length)); + const useCompact = maxLen < 25 && trial.options.length <= 6; + let html = '
'; + if (trial.prompt && trial.prompt_position === "above") { + html += `
${trial.prompt}
`; + } + if (trial.stimulus) { + html += `
${trial.stimulus}
`; + } + if (trial.prompt && trial.prompt_position === "below") { + html += `
${trial.prompt}
`; + } + const compactClass = useCompact ? " bead-multi-select-compact" : ""; + html += `
`; + for (let i = 0; i < trial.options.length; i++) { + const opt = trial.options[i] ?? ""; + html += ` + + `; + } + html += "
"; + const disabled = trial.require_response ? "disabled" : ""; + html += ` +
+ +
+ `; + html += "
"; + display_element.innerHTML = html; + const checkboxes = display_element.querySelectorAll( + ".bead-multi-select-checkbox" + ); + const continueBtn = display_element.querySelector( + "#bead-multi-select-continue" + ); + const updateButton = () => { + const checked = display_element.querySelectorAll( + ".bead-multi-select-checkbox:checked" + ); + const count = checked.length; + if (trial.max_selections > 0 && count >= trial.max_selections) { + for (const cb of checkboxes) { + if (!cb.checked) cb.disabled = true; + } + } else { + for (const cb of checkboxes) { + cb.disabled = false; + } + } + if (continueBtn) { + continueBtn.disabled = trial.require_response && count < trial.min_selections; + } + }; + for (const cb of checkboxes) { + cb.addEventListener("change", updateButton); + } + if (continueBtn) { + continueBtn.addEventListener("click", () => { + end_trial(); + }); + } + const end_trial = () => { + const rt = performance.now() - start_time; + const checked = display_element.querySelectorAll( + ".bead-multi-select-checkbox:checked" + ); + const selected = []; + const selected_indices = []; + for (const cb of checked) { + selected.push(cb.value); + const idx = cb.getAttribute("data-index"); + if (idx !== null) selected_indices.push(Number.parseInt(idx, 10)); + } + const trial_data = { + ...trial.metadata, + selected, + selected_indices, + rt + }; + display_element.innerHTML = ""; + this.jsPsych.finishTrial(trial_data); + }; + } + }; + __publicField(BeadMultiSelectPlugin, "info", info7); + + // src/plugins/rating.ts + var info8 = { + name: "bead-rating", + parameters: { + prompt: { + type: 8, + // ParameterType.HTML_STRING + default: null + }, + stimulus: { + type: 8, + // ParameterType.HTML_STRING + default: "" + }, + prompt_position: { + type: 1, + // ParameterType.STRING + default: "above" + }, + scale_min: { + type: 2, + // ParameterType.INT + default: 1 + }, + scale_max: { + type: 2, + // ParameterType.INT + default: 7 + }, + scale_labels: { + type: 12, + // ParameterType.OBJECT + default: {} + }, + require_response: { + type: 0, + // ParameterType.BOOL + default: true + }, + button_label: { + type: 1, + // ParameterType.STRING + default: "Continue" + }, + metadata: { + type: 12, + // ParameterType.OBJECT + default: {} + } + } + }; + var BeadRatingPlugin = class { + constructor(jsPsych) { + __publicField(this, "jsPsych"); + this.jsPsych = jsPsych; + } + trial(display_element, trial) { + const response = { + rating: null, + rt: null + }; + const start_time = performance.now(); + let html = '
'; + if (trial.prompt !== null && trial.prompt_position === "above") { + html += `
${trial.prompt}
`; + } + if (trial.stimulus) { + html += `
${trial.stimulus}
`; + } + if (trial.prompt !== null && trial.prompt_position === "below") { + html += `
${trial.prompt}
`; + } + html += '
'; + for (let i = trial.scale_min; i <= trial.scale_max; i++) { + const label = trial.scale_labels[i] ?? i; + html += ` +
+ +
${label}
+
+ `; + } + html += "
"; + html += ` +
+ +
+ `; + html += "
"; + display_element.innerHTML = html; + const rating_buttons = display_element.querySelectorAll(".bead-rating-button"); + for (const button of rating_buttons) { + button.addEventListener("click", (e) => { + const target = e.target; + const valueAttr = target.getAttribute("data-value"); + if (valueAttr !== null) { + const value = Number.parseInt(valueAttr, 10); + select_rating(value); + } + }); + } + const keyboard_listener = this.jsPsych.pluginAPI.getKeyboardResponse({ + callback_function: (info11) => { + const key = info11.key; + const num = Number.parseInt(key, 10); + if (!Number.isNaN(num) && num >= trial.scale_min && num <= trial.scale_max) { + select_rating(num); + } + }, + valid_responses: "ALL_KEYS", + rt_method: "performance", + persist: true, + allow_held_key: false + }); + const continue_button = display_element.querySelector("#bead-rating-continue"); + if (continue_button) { + continue_button.addEventListener("click", () => { + if (response.rating !== null || !trial.require_response) { + end_trial(); + } + }); + } + const select_rating = (value) => { + response.rating = value; + response.rt = performance.now() - start_time; + for (const btn of rating_buttons) { + btn.classList.remove("selected"); + } + const selected_button = display_element.querySelector( + `[data-value="${value}"]` + ); + if (selected_button) { + selected_button.classList.add("selected"); + } + if (continue_button) { + continue_button.disabled = false; + } + }; + const end_trial = () => { + if (keyboard_listener) { + this.jsPsych.pluginAPI.cancelKeyboardResponse(keyboard_listener); + } + const trial_data = { + ...trial.metadata, + // Spread all metadata + rating: response.rating, + rt: response.rt + }; + display_element.innerHTML = ""; + this.jsPsych.finishTrial(trial_data); + }; + } + }; + __publicField(BeadRatingPlugin, "info", info8); + + // src/plugins/slider-rating.ts + var info9 = { + name: "bead-slider-rating", + parameters: { + prompt: { + type: 8, + // ParameterType.HTML_STRING + default: null + }, + stimulus: { + type: 8, + // ParameterType.HTML_STRING + default: "" + }, + prompt_position: { + type: 1, + // ParameterType.STRING + default: "above" + }, + slider_min: { + type: 2, + // ParameterType.INT + default: 0 + }, + slider_max: { + type: 2, + // ParameterType.INT + default: 100 + }, + step: { + type: 2, + // ParameterType.INT + default: 1 + }, + slider_start: { + type: 2, + // ParameterType.INT + default: 50 + }, + labels: { + type: 1, + // ParameterType.STRING + default: [], + array: true + }, + require_movement: { + type: 0, + // ParameterType.BOOL + default: true + }, + button_label: { + type: 1, + // ParameterType.STRING + default: "Continue" + }, + metadata: { + type: 12, + // ParameterType.OBJECT + default: {} + } + } + }; + var BeadSliderRatingPlugin = class { + constructor(jsPsych) { + __publicField(this, "jsPsych"); + this.jsPsych = jsPsych; + } + trial(display_element, trial) { + let slider_value = trial.slider_start; + let has_moved = false; + const start_time = performance.now(); + let html = '
'; + if (trial.prompt !== null && trial.prompt_position === "above") { + html += `
${trial.prompt}
`; + } + if (trial.stimulus) { + html += `
${trial.stimulus}
`; + } + if (trial.prompt !== null && trial.prompt_position === "below") { + html += `
${trial.prompt}
`; + } + html += '
'; + if (trial.labels.length > 0) { + html += '
'; + for (const label of trial.labels) { + html += `${label}`; + } + html += "
"; + } + html += ``; + html += `
${trial.slider_start}
`; + html += "
"; + const disabled = trial.require_movement ? "disabled" : ""; + html += ` +
+ +
+ `; + html += "
"; + display_element.innerHTML = html; + const slider = display_element.querySelector(".bead-slider-input"); + const value_display = display_element.querySelector(".bead-slider-value"); + const continue_button = display_element.querySelector("#bead-slider-continue"); + if (slider) { + slider.addEventListener("input", () => { + slider_value = Number.parseFloat(slider.value); + has_moved = true; + if (value_display) { + value_display.textContent = String(slider_value); + } + if (continue_button && trial.require_movement) { + continue_button.disabled = false; + } + }); + } + if (continue_button) { + continue_button.addEventListener("click", () => { + if (!trial.require_movement || has_moved) { + end_trial(); + } + }); + } + const end_trial = () => { + const rt = performance.now() - start_time; + const trial_data = { + ...trial.metadata, + response: slider_value, + rt + }; + display_element.innerHTML = ""; + this.jsPsych.finishTrial(trial_data); + }; + } + }; + __publicField(BeadSliderRatingPlugin, "info", info9); + + // src/lib/wikidata-search.ts + var WIKIDATA_API = "https://www.wikidata.org/w/api.php"; + var CACHE_SIZE = 100; + var DEBOUNCE_MS = 300; + var cache = /* @__PURE__ */ new Map(); + function cacheKey(query, opts) { + return `${opts.language}:${query}:${opts.limit}:${(opts.entityTypes ?? []).join(",")}`; + } + function putCache(key, value) { + if (cache.size >= CACHE_SIZE) { + const firstKey = cache.keys().next().value; + if (firstKey !== void 0) { + cache.delete(firstKey); + } + } + cache.set(key, value); + } + async function searchWikidata(query, options) { + if (!query || query.trim().length === 0) { + return []; + } + const key = cacheKey(query, options); + const cached = cache.get(key); + if (cached) { + return cached; + } + const params = new URLSearchParams({ + action: "wbsearchentities", + search: query.trim(), + language: options.language, + limit: String(options.limit), + format: "json", + origin: "*" + }); + if (options.entityTypes && options.entityTypes.length > 0) { + params.set("type", options.entityTypes[0] ?? "item"); + } + const url = `${WIKIDATA_API}?${params.toString()}`; + try { + const response = await fetch(url); + if (!response.ok) { + return []; + } + const data = await response.json(); + const results = (data.search ?? []).map((item) => ({ + id: String(item["id"] ?? ""), + label: String(item["label"] ?? ""), + description: String(item["description"] ?? ""), + aliases: Array.isArray(item["aliases"]) ? item["aliases"].map(String) : [] + })); + putCache(key, results); + return results; + } catch { + return []; + } + } + var debounceTimer = null; + function debouncedSearchWikidata(query, options, callback) { + if (debounceTimer !== null) { + clearTimeout(debounceTimer); + } + debounceTimer = setTimeout(async () => { + const results = await searchWikidata(query, options); + callback(results); + }, DEBOUNCE_MS); + } + + // src/plugins/span-label.ts + var info10 = { + name: "bead-span-label", + parameters: { + tokens: { + type: 12, + // OBJECT + default: {} + }, + space_after: { + type: 12, + // OBJECT + default: {} + }, + spans: { + type: 12, + // OBJECT + default: [], + array: true + }, + relations: { + type: 12, + // OBJECT + default: [], + array: true + }, + span_spec: { + type: 12, + // OBJECT + default: null + }, + display_config: { + type: 12, + // OBJECT + default: null + }, + prompt: { + type: 8, + // HTML_STRING + default: "Select and label spans" + }, + button_label: { + type: 1, + // STRING + default: "Continue" + }, + require_response: { + type: 0, + // BOOL + default: true + }, + metadata: { + type: 12, + // OBJECT + default: {} + } + } + }; + var DEFAULT_PALETTE = [ + "#BBDEFB", + "#C8E6C9", + "#FFE0B2", + "#F8BBD0", + "#D1C4E9", + "#B2EBF2", + "#DCEDC8", + "#FFD54F" + ]; + var DARK_PALETTE = [ + "#1565C0", + "#2E7D32", + "#E65100", + "#AD1457", + "#4527A0", + "#00838F", + "#558B2F", + "#F9A825" + ]; + var BeadSpanLabelPlugin = class { + constructor(jsPsych) { + __publicField(this, "jsPsych"); + this.jsPsych = jsPsych; + } + trial(display_element, trial) { + const start_time = performance.now(); + const events = []; + const tokens = Object.keys(trial.tokens).length > 0 ? trial.tokens : trial.metadata.tokenized_elements ?? {}; + const spaceAfter = Object.keys(trial.space_after).length > 0 ? trial.space_after : trial.metadata.token_space_after ?? {}; + const spanSpec = trial.span_spec ?? trial.metadata.span_spec ?? null; + const preSpans = trial.spans.length > 0 ? trial.spans : trial.metadata.spans ?? []; + const preRelations = trial.relations.length > 0 ? trial.relations : trial.metadata.span_relations ?? []; + const palette = trial.display_config?.color_palette ?? DEFAULT_PALETTE; + const isInteractive = spanSpec?.interaction_mode === "interactive"; + const activeSpans = [...preSpans]; + const activeRelations = [...preRelations]; + let selectionStart = null; + let selectedIndices = []; + let nextSpanId = activeSpans.length; + let nextRelationId = activeRelations.length; + let relationState = "IDLE"; + let relationSource = null; + let relationTarget = null; + let html = '
'; + if (trial.prompt) { + html += `
${trial.prompt}
`; + } + const elementNames = Object.keys(tokens).sort(); + for (const elemName of elementNames) { + const elemTokens = tokens[elemName] ?? []; + const elemSpaceAfter = spaceAfter[elemName] ?? []; + html += `
`; + for (let i = 0; i < elemTokens.length; i++) { + const tokenText = elemTokens[i]; + const interactive = isInteractive ? " interactive" : ""; + html += `${tokenText}`; + if (i < elemSpaceAfter.length && elemSpaceAfter[i]) { + html += ` `; + } + } + html += "
"; + } + if (isInteractive && spanSpec?.label_source === "wikidata") { + html += '
'; + html += '
"; + } else if (isInteractive && spanSpec?.labels && spanSpec.labels.length > 0) { + html += '
'; + html += '
'; + html += ''; + html += ''; + html += ''; + html += "
"; + } + if (spanSpec?.enable_relations) { + if (isInteractive) { + html += '
'; + html += ''; + html += ''; + html += ''; + html += "
"; + if (spanSpec.relation_label_source === "wikidata") { + html += '"; + } else if (spanSpec.relation_labels && spanSpec.relation_labels.length > 0) { + html += '"; + } + } + html += '
'; + } + html += '
'; + html += `
`; + html += `"; + html += "
"; + html += "
"; + display_element.innerHTML = html; + applySpanHighlights(); + renderSpanList(); + if (isInteractive) { + setupInteractiveHandlers(); + if (spanSpec?.label_source === "wikidata") { + setupWikidataSearch(); + } else if (spanSpec?.labels && spanSpec.labels.length > 0) { + setupFixedLabelSearch(); + } + const searchCancelBtn = display_element.querySelector("#bead-search-cancel"); + if (searchCancelBtn) { + searchCancelBtn.addEventListener("click", () => { + cancelCurrentSelection(); + }); + } + if (spanSpec?.enable_relations) { + setupRelationHandlers(); + } + } + renderRelationArcsOverlay(); + renderRelationList(); + const continueBtn = display_element.querySelector("#bead-span-continue"); + if (continueBtn) { + continueBtn.addEventListener("click", () => { + endTrial(); + }); + } + function applySpanHighlights() { + const allTokens = display_element.querySelectorAll(".bead-token"); + for (const t of allTokens) { + t.classList.remove("highlighted", "span-first", "span-middle", "span-last", "span-single"); + t.removeAttribute("data-span-ids"); + t.removeAttribute("data-span-count"); + t.style.removeProperty("background-color"); + t.style.removeProperty("background"); + } + const allSpaces = display_element.querySelectorAll(".bead-space"); + for (const s of allSpaces) { + s.classList.remove("highlighted"); + s.style.removeProperty("background-color"); + s.style.removeProperty("background"); + } + const tokenSpanMap = /* @__PURE__ */ new Map(); + for (const span of activeSpans) { + for (const seg of span.segments) { + for (const idx of seg.indices) { + const key = `${seg.element_name}:${idx}`; + if (!tokenSpanMap.has(key)) { + tokenSpanMap.set(key, []); + } + tokenSpanMap.get(key)?.push(span.span_id); + } + } + } + const spanColorMap = assignColors(); + for (const t of allTokens) { + const elemName = t.getAttribute("data-element") ?? ""; + const idx = t.getAttribute("data-index") ?? ""; + const key = `${elemName}:${idx}`; + const spanIds = tokenSpanMap.get(key) ?? []; + if (spanIds.length > 0) { + t.classList.add("highlighted"); + t.setAttribute("data-span-ids", spanIds.join(",")); + t.setAttribute("data-span-count", String(spanIds.length)); + applySpanColor(t, spanIds, spanColorMap); + } + } + for (const elemName of elementNames) { + const elemTokens = tokens[elemName] ?? []; + for (let i = 0; i < elemTokens.length; i++) { + const key = `${elemName}:${i}`; + const spanIds = tokenSpanMap.get(key) ?? []; + if (spanIds.length === 0) continue; + const t = display_element.querySelector( + `.bead-token[data-element="${elemName}"][data-index="${i}"]` + ); + if (!t) continue; + const leftKey = `${elemName}:${i - 1}`; + const leftSpanIds = tokenSpanMap.get(leftKey) ?? []; + const hasLeftNeighbor = spanIds.some((id) => leftSpanIds.includes(id)); + const rightKey = `${elemName}:${i + 1}`; + const rightSpanIds = tokenSpanMap.get(rightKey) ?? []; + const hasRightNeighbor = spanIds.some((id) => rightSpanIds.includes(id)); + if (hasLeftNeighbor && hasRightNeighbor) { + t.classList.add("span-middle"); + } else if (hasLeftNeighbor) { + t.classList.add("span-last"); + } else if (hasRightNeighbor) { + t.classList.add("span-first"); + } else { + t.classList.add("span-single"); + } + if (hasRightNeighbor) { + const spaceEl = display_element.querySelector( + `.bead-space[data-element="${elemName}"][data-after="${i}"]` + ); + if (spaceEl) { + spaceEl.classList.add("highlighted"); + const sharedIds = spanIds.filter((id) => rightSpanIds.includes(id)); + applySpanColor(spaceEl, sharedIds.length > 0 ? sharedIds : spanIds, spanColorMap); + } + } + } + } + } + function applySpanColor(el, spanIds, colorMap) { + if (spanIds.length === 1) { + el.style.backgroundColor = colorMap.get(spanIds[0] ?? "") ?? palette[0] ?? "#BBDEFB"; + } else if (spanIds.length > 1) { + const colors = spanIds.map((id) => colorMap.get(id) ?? palette[0] ?? "#BBDEFB"); + const stripeWidth = 100 / colors.length; + const stops = colors.map((c, ci) => `${c} ${ci * stripeWidth}%, ${c} ${(ci + 1) * stripeWidth}%`).join(", "); + el.style.background = `linear-gradient(135deg, ${stops})`; + } + } + function assignColors() { + const colorMap = /* @__PURE__ */ new Map(); + const labelColors = spanSpec?.label_colors ?? {}; + const labelToColor = /* @__PURE__ */ new Map(); + let colorIdx = 0; + for (const span of activeSpans) { + const label = span.label?.label; + if (label && labelColors[label]) { + colorMap.set(span.span_id, labelColors[label] ?? "#BBDEFB"); + } else if (label && labelToColor.has(label)) { + colorMap.set(span.span_id, labelToColor.get(label) ?? "#BBDEFB"); + } else { + const color = palette[colorIdx % palette.length] ?? "#BBDEFB"; + colorMap.set(span.span_id, color); + if (label) labelToColor.set(label, color); + colorIdx++; + } + } + return colorMap; + } + function renderSpanList() { + const existing = display_element.querySelectorAll(".bead-span-subscript"); + for (const el of existing) el.remove(); + const darkColorMap = assignDarkColors(); + for (const span of activeSpans) { + if (!span.label?.label) continue; + const allIndices = []; + for (const seg of span.segments) { + for (const idx of seg.indices) { + allIndices.push({ elem: seg.element_name, idx }); + } + } + if (allIndices.length === 0) continue; + const lastToken = allIndices[allIndices.length - 1]; + if (!lastToken) continue; + const tokenEl = display_element.querySelector( + `.bead-token[data-element="${lastToken.elem}"][data-index="${lastToken.idx}"]` + ); + if (!tokenEl) continue; + tokenEl.style.position = "relative"; + const badge = document.createElement("span"); + badge.className = "bead-span-subscript"; + const darkColor = darkColorMap.get(span.span_id) ?? DARK_PALETTE[0] ?? "#1565C0"; + badge.style.backgroundColor = darkColor; + badge.setAttribute("data-span-id", span.span_id); + const labelSpan = document.createElement("span"); + labelSpan.textContent = span.label.label; + badge.appendChild(labelSpan); + if (isInteractive) { + const deleteBtn = document.createElement("button"); + deleteBtn.className = "bead-subscript-delete"; + deleteBtn.textContent = "\xD7"; + deleteBtn.addEventListener("click", (e) => { + e.stopPropagation(); + deleteSpan(span.span_id); + }); + badge.appendChild(deleteBtn); + } + tokenEl.appendChild(badge); + } + adjustSubscriptPositions(); + } + function adjustSubscriptPositions() { + const badges = Array.from( + display_element.querySelectorAll(".bead-span-subscript") + ); + if (badges.length < 2) return; + for (const b of badges) b.style.transform = ""; + badges.sort((a, b) => a.getBoundingClientRect().left - b.getBoundingClientRect().left); + const placed = []; + for (const badge of badges) { + let rect = badge.getBoundingClientRect(); + let shift = 0; + let hasOverlap = true; + let iterations = 0; + while (hasOverlap && iterations < 10) { + hasOverlap = false; + for (const p of placed) { + const hOverlap = rect.left < p.rect.right + 3 && rect.right > p.rect.left - 3; + const vOverlap = rect.top < p.rect.bottom + 1 && rect.bottom > p.rect.top - 1; + if (hOverlap && vOverlap) { + shift += p.rect.bottom - rect.top + 2; + badge.style.transform = `translateY(${shift}px)`; + rect = badge.getBoundingClientRect(); + hasOverlap = true; + break; + } + } + iterations++; + } + placed.push({ el: badge, rect: badge.getBoundingClientRect() }); + } + } + function assignDarkColors() { + const colorMap = /* @__PURE__ */ new Map(); + let colorIdx = 0; + const labelToColor = /* @__PURE__ */ new Map(); + for (const span of activeSpans) { + const label = span.label?.label; + if (label && labelToColor.has(label)) { + colorMap.set(span.span_id, labelToColor.get(label) ?? DARK_PALETTE[0] ?? "#1565C0"); + } else { + const color = DARK_PALETTE[colorIdx % DARK_PALETTE.length] ?? "#1565C0"; + colorMap.set(span.span_id, color); + if (label) labelToColor.set(label, color); + colorIdx++; + } + } + return colorMap; + } + function getSpanText(span) { + const parts = []; + for (const seg of span.segments) { + const elemTokens = tokens[seg.element_name] ?? []; + for (const idx of seg.indices) { + if (idx < elemTokens.length) { + parts.push(elemTokens[idx] ?? ""); + } + } + } + return parts.join(" "); + } + function setupInteractiveHandlers() { + const tokenEls = display_element.querySelectorAll(".bead-token.interactive"); + let isDragging = false; + let dragStartIdx = null; + let dragElemName = null; + for (const tokenEl of tokenEls) { + tokenEl.addEventListener("mousedown", (e) => { + e.preventDefault(); + const idx = Number.parseInt(tokenEl.getAttribute("data-index") ?? "0", 10); + const elemName = tokenEl.getAttribute("data-element") ?? ""; + isDragging = true; + dragStartIdx = idx; + dragElemName = elemName; + if (e.shiftKey && selectionStart !== null) { + const start = Math.min(selectionStart, idx); + const end = Math.max(selectionStart, idx); + selectedIndices = []; + for (let i = start; i <= end; i++) { + selectedIndices.push(i); + } + } else { + selectedIndices = [idx]; + selectionStart = idx; + } + updateSelectionUI(elemName); + showLabelPanel(); + }); + tokenEl.addEventListener("mouseover", () => { + if (!isDragging || dragStartIdx === null || dragElemName === null) return; + const idx = Number.parseInt(tokenEl.getAttribute("data-index") ?? "0", 10); + const elemName = tokenEl.getAttribute("data-element") ?? ""; + if (elemName !== dragElemName) return; + const start = Math.min(dragStartIdx, idx); + const end = Math.max(dragStartIdx, idx); + selectedIndices = []; + for (let i = start; i <= end; i++) { + selectedIndices.push(i); + } + updateSelectionUI(elemName); + }); + } + document.addEventListener("mouseup", () => { + if (isDragging) { + isDragging = false; + showLabelPanel(); + } + }); + const labelButtons = display_element.querySelectorAll(".bead-label-button"); + for (const btn of labelButtons) { + btn.addEventListener("click", () => { + const label = btn.getAttribute("data-label") ?? ""; + if (selectedIndices.length > 0 && label) { + createSpanFromSelection(label); + } + }); + } + document.addEventListener("keydown", handleKeyDown); + } + function cancelCurrentSelection() { + selectedIndices = []; + selectionStart = null; + const allTokens = display_element.querySelectorAll(".bead-token"); + for (const t of allTokens) { + t.classList.remove("selecting"); + } + const labelPanel = display_element.querySelector("#bead-label-panel"); + if (labelPanel) { + labelPanel.classList.add("bead-search-disabled"); + const searchInput = labelPanel.querySelector("input"); + if (searchInput) { + searchInput.disabled = true; + searchInput.value = ""; + searchInput.placeholder = "Select tokens to annotate..."; + } + const resultsDiv = labelPanel.querySelector( + ".bead-label-search-results, .bead-wikidata-results" + ); + if (resultsDiv) resultsDiv.style.display = "none"; + const cancelBtn = labelPanel.querySelector(".bead-search-cancel"); + if (cancelBtn) cancelBtn.style.display = "none"; + } + } + function showLabelPanel() { + const labelPanel = display_element.querySelector("#bead-label-panel"); + if (!labelPanel) return; + const hasSelection = selectedIndices.length > 0; + if (hasSelection) { + labelPanel.classList.remove("bead-search-disabled"); + const searchInput = labelPanel.querySelector("input"); + if (searchInput) { + searchInput.disabled = false; + searchInput.placeholder = "Search labels..."; + setTimeout(() => searchInput.focus(), 0); + } + const cancelBtn = labelPanel.querySelector(".bead-search-cancel"); + if (cancelBtn) cancelBtn.style.display = ""; + } else { + cancelCurrentSelection(); + } + } + function handleKeyDown(e) { + if (e.key === "Escape") { + if (selectedIndices.length > 0) { + cancelCurrentSelection(); + return; + } + } + const num = Number.parseInt(e.key, 10); + if (!Number.isNaN(num) && num >= 1 && num <= 9) { + const labels = spanSpec?.labels ?? []; + if (num <= labels.length && selectedIndices.length > 0) { + createSpanFromSelection(labels[num - 1] ?? ""); + } + } + } + function updateSelectionUI(elementName) { + const tokenEls = display_element.querySelectorAll( + `.bead-token[data-element="${elementName}"]` + ); + for (const t of tokenEls) { + const idx = Number.parseInt(t.getAttribute("data-index") ?? "0", 10); + if (selectedIndices.includes(idx)) { + t.classList.add("selecting"); + } else { + t.classList.remove("selecting"); + } + } + } + function createSpanFromSelection(label, labelId) { + const elemName = elementNames[0] ?? "text"; + const spanId = `span_${nextSpanId++}`; + const spanLabel = labelId ? { label, label_id: labelId } : { label }; + const newSpan = { + span_id: spanId, + segments: [ + { + element_name: elemName, + indices: [...selectedIndices].sort((a, b) => a - b) + } + ], + label: spanLabel + }; + activeSpans.push(newSpan); + events.push({ + type: "select", + timestamp: performance.now() - start_time, + span_id: spanId, + indices: [...selectedIndices], + label + }); + selectedIndices = []; + selectionStart = null; + applySpanHighlights(); + renderSpanList(); + renderRelationList(); + updateContinueButton(); + cancelCurrentSelection(); + } + function deleteSpan(spanId) { + const idx = activeSpans.findIndex((s) => s.span_id === spanId); + if (idx >= 0) { + activeSpans.splice(idx, 1); + for (let ri = activeRelations.length - 1; ri >= 0; ri--) { + const rel = activeRelations[ri]; + if (rel && (rel.source_span_id === spanId || rel.target_span_id === spanId)) { + activeRelations.splice(ri, 1); + } + } + events.push({ + type: "delete", + timestamp: performance.now() - start_time, + span_id: spanId + }); + applySpanHighlights(); + renderSpanList(); + renderRelationList(); + updateContinueButton(); + } + } + function setupWikidataSearch() { + const input = display_element.querySelector("#bead-wikidata-input"); + const resultsDiv = display_element.querySelector("#bead-wikidata-results"); + if (!input || !resultsDiv) return; + const searchOptions = { + language: spanSpec?.wikidata_language ?? "en", + limit: spanSpec?.wikidata_result_limit ?? 10, + ...spanSpec?.wikidata_entity_types ? { entityTypes: spanSpec.wikidata_entity_types } : {} + }; + input.addEventListener("input", () => { + const query = input.value.trim(); + if (query.length === 0) { + resultsDiv.style.display = "none"; + resultsDiv.innerHTML = ""; + return; + } + debouncedSearchWikidata(query, searchOptions, (results) => { + resultsDiv.innerHTML = ""; + if (results.length === 0) { + resultsDiv.style.display = "none"; + return; + } + resultsDiv.style.display = "block"; + for (const entity of results) { + const item = document.createElement("div"); + item.className = "bead-wikidata-result"; + item.innerHTML = `
${entity.label} ${entity.id}
${entity.description ? `
${entity.description}
` : ""}`; + item.addEventListener("click", () => { + createSpanFromSelection(entity.label, entity.id); + input.value = ""; + resultsDiv.style.display = "none"; + resultsDiv.innerHTML = ""; + }); + resultsDiv.appendChild(item); + } + }); + }); + } + function setupFixedLabelSearch() { + const input = display_element.querySelector("#bead-label-search-input"); + const resultsDiv = display_element.querySelector( + "#bead-label-search-results" + ); + if (!input || !resultsDiv) return; + const allLabels = spanSpec?.labels ?? []; + let highlightedIdx = -1; + function fuzzyMatch(query, target) { + const q = query.toLowerCase(); + const t = target.toLowerCase(); + let qi = 0; + for (let ti = 0; ti < t.length && qi < q.length; ti++) { + if (t[ti] === q[qi]) qi++; + } + return qi === q.length; + } + const renderResults = (query) => { + resultsDiv.innerHTML = ""; + const lower = query.toLowerCase(); + const filtered = lower === "" ? allLabels : allLabels.filter((l) => fuzzyMatch(lower, l)).sort((a, b) => { + const aPrefix = a.toLowerCase().startsWith(lower); + const bPrefix = b.toLowerCase().startsWith(lower); + if (aPrefix && !bPrefix) return -1; + if (!aPrefix && bPrefix) return 1; + return 0; + }); + if (filtered.length === 0) { + resultsDiv.style.display = "none"; + return; + } + resultsDiv.style.display = "block"; + highlightedIdx = -1; + for (let fi = 0; fi < filtered.length; fi++) { + const label = filtered[fi] ?? ""; + const globalIdx = allLabels.indexOf(label); + palette[globalIdx % palette.length] ?? "#BBDEFB"; + const darkColor = DARK_PALETTE[globalIdx % DARK_PALETTE.length] ?? "#1565C0"; + const shortcut = globalIdx < 9 ? `${globalIdx + 1}` : ""; + const item = document.createElement("div"); + item.className = "bead-label-search-result"; + item.setAttribute("data-label", label); + item.setAttribute("data-fi", String(fi)); + item.innerHTML = `${label}${shortcut ? `${shortcut}` : ""}`; + item.addEventListener("click", () => { + if (selectedIndices.length > 0) { + createSpanFromSelection(label); + input.value = ""; + resultsDiv.style.display = "none"; + } + }); + resultsDiv.appendChild(item); + } + }; + input.addEventListener("focus", () => { + if (selectedIndices.length > 0) { + renderResults(input.value); + } + }); + input.addEventListener("input", () => { + renderResults(input.value); + }); + input.addEventListener("keydown", (e) => { + const items = resultsDiv.querySelectorAll(".bead-label-search-result"); + if (items.length === 0) return; + if (e.key === "ArrowDown") { + e.preventDefault(); + highlightedIdx = Math.min(highlightedIdx + 1, items.length - 1); + updateHighlight(items); + } else if (e.key === "ArrowUp") { + e.preventDefault(); + highlightedIdx = Math.max(highlightedIdx - 1, 0); + updateHighlight(items); + } else if (e.key === "Enter") { + e.preventDefault(); + if (highlightedIdx >= 0 && highlightedIdx < items.length) { + const label = items[highlightedIdx]?.getAttribute("data-label") ?? ""; + if (label && selectedIndices.length > 0) { + createSpanFromSelection(label); + input.value = ""; + resultsDiv.style.display = "none"; + } + } + } else if (e.key === "Escape") { + resultsDiv.style.display = "none"; + } + }); + function updateHighlight(items) { + for (let i = 0; i < items.length; i++) { + items[i]?.classList.toggle("highlighted", i === highlightedIdx); + } + items[highlightedIdx]?.scrollIntoView({ block: "nearest" }); + } + document.addEventListener("click", (e) => { + if (!input.contains(e.target) && !resultsDiv.contains(e.target)) { + resultsDiv.style.display = "none"; + } + }); + } + function setupRelationHandlers() { + const addBtn = display_element.querySelector("#bead-add-relation"); + const cancelBtn = display_element.querySelector("#bead-relation-cancel"); + const statusEl = display_element.querySelector("#bead-relation-status"); + if (addBtn) { + addBtn.addEventListener("click", () => { + relationState = "WAITING_SOURCE"; + relationSource = null; + relationTarget = null; + updateRelationUI(); + }); + } + if (cancelBtn) { + cancelBtn.addEventListener("click", () => { + cancelRelationCreation(); + }); + } + function updateRelationUI() { + if (!addBtn || !cancelBtn || !statusEl) return; + addBtn.disabled = relationState !== "IDLE" || activeSpans.length < 2; + addBtn.style.display = relationState === "IDLE" ? "" : "none"; + cancelBtn.style.display = relationState !== "IDLE" ? "" : "none"; + if (relationState === "WAITING_SOURCE") { + statusEl.textContent = "Click a span label to select the source."; + } else if (relationState === "WAITING_TARGET") { + statusEl.textContent = "Click a span label to select the target."; + } else if (relationState === "WAITING_LABEL") { + statusEl.textContent = "Choose a relation label."; + } else { + statusEl.textContent = ""; + } + const badges = display_element.querySelectorAll(".bead-span-subscript"); + for (const badge of badges) { + badge.classList.remove("relation-source", "relation-target-candidate"); + const spanId = badge.getAttribute("data-span-id"); + if (relationState === "WAITING_SOURCE" || relationState === "WAITING_TARGET") { + badge.style.cursor = "pointer"; + if (spanId === relationSource) { + badge.classList.add("relation-source"); + } else if (relationState === "WAITING_TARGET") { + badge.classList.add("relation-target-candidate"); + } + } else { + badge.style.cursor = "default"; + } + } + const labelPanel = display_element.querySelector("#bead-relation-label-panel"); + if (labelPanel) { + labelPanel.style.display = relationState === "WAITING_LABEL" ? "flex" : "none"; + if (relationState === "WAITING_LABEL") { + const searchInput = labelPanel.querySelector("input"); + if (searchInput) setTimeout(() => searchInput.focus(), 0); + } + } + } + display_element["_updateRelationUI"] = updateRelationUI; + display_element.addEventListener("click", (e) => { + const badge = e.target.closest(".bead-span-subscript"); + if (!badge) return; + const spanId = badge.getAttribute("data-span-id"); + if (!spanId) return; + if (relationState === "WAITING_SOURCE") { + relationSource = spanId; + relationState = "WAITING_TARGET"; + updateRelationUI(); + } else if (relationState === "WAITING_TARGET") { + if (spanId === relationSource) return; + relationTarget = spanId; + relationState = "WAITING_LABEL"; + updateRelationUI(); + if (!spanSpec?.relation_labels?.length && spanSpec?.relation_label_source !== "wikidata") { + createRelation(void 0); + } + } + }); + if (spanSpec?.relation_labels && spanSpec.relation_labels.length > 0 && spanSpec.relation_label_source !== "wikidata") { + setupRelationLabelSearch(); + } + if (spanSpec?.relation_label_source === "wikidata") { + setupRelationWikidataSearch(); + } + function setupRelationLabelSearch() { + const input = display_element.querySelector("#bead-relation-label-input"); + const resultsDiv = display_element.querySelector( + "#bead-relation-label-results" + ); + if (!input || !resultsDiv) return; + const allLabels = spanSpec?.relation_labels ?? []; + let highlightedIdx = -1; + const renderResults = (query) => { + resultsDiv.innerHTML = ""; + const lower = query.toLowerCase(); + const filtered = lower === "" ? allLabels : allLabels.filter((l) => l.toLowerCase().includes(lower)); + if (filtered.length === 0) { + resultsDiv.style.display = "none"; + return; + } + resultsDiv.style.display = "block"; + highlightedIdx = -1; + for (const label of filtered) { + const item = document.createElement("div"); + item.className = "bead-label-search-result"; + item.setAttribute("data-label", label); + item.innerHTML = `${label}`; + item.addEventListener("click", () => { + createRelation({ label }); + input.value = ""; + resultsDiv.style.display = "none"; + }); + resultsDiv.appendChild(item); + } + }; + input.addEventListener("focus", () => renderResults(input.value)); + input.addEventListener("input", () => renderResults(input.value)); + input.addEventListener("keydown", (e) => { + const items = resultsDiv.querySelectorAll(".bead-label-search-result"); + if (items.length === 0) return; + if (e.key === "ArrowDown") { + e.preventDefault(); + highlightedIdx = Math.min(highlightedIdx + 1, items.length - 1); + for (let i = 0; i < items.length; i++) + items[i]?.classList.toggle("highlighted", i === highlightedIdx); + items[highlightedIdx]?.scrollIntoView({ block: "nearest" }); + } else if (e.key === "ArrowUp") { + e.preventDefault(); + highlightedIdx = Math.max(highlightedIdx - 1, 0); + for (let i = 0; i < items.length; i++) + items[i]?.classList.toggle("highlighted", i === highlightedIdx); + items[highlightedIdx]?.scrollIntoView({ block: "nearest" }); + } else if (e.key === "Enter") { + e.preventDefault(); + if (highlightedIdx >= 0 && highlightedIdx < items.length) { + const label = items[highlightedIdx]?.getAttribute("data-label") ?? ""; + if (label) { + createRelation({ label }); + input.value = ""; + resultsDiv.style.display = "none"; + } + } + } else if (e.key === "Escape") { + cancelRelationCreation(); + } + }); + } + function setupRelationWikidataSearch() { + const input = display_element.querySelector( + "#bead-relation-wikidata-input" + ); + const resultsDiv = display_element.querySelector( + "#bead-relation-wikidata-results" + ); + if (!input || !resultsDiv) return; + const searchOptions = { + language: spanSpec?.wikidata_language ?? "en", + limit: spanSpec?.wikidata_result_limit ?? 10, + entityTypes: ["property"] + }; + input.addEventListener("input", () => { + const query = input.value.trim(); + if (query.length === 0) { + resultsDiv.style.display = "none"; + resultsDiv.innerHTML = ""; + return; + } + debouncedSearchWikidata(query, searchOptions, (results) => { + resultsDiv.innerHTML = ""; + if (results.length === 0) { + resultsDiv.style.display = "none"; + return; + } + resultsDiv.style.display = "block"; + for (const entity of results) { + const item = document.createElement("div"); + item.className = "bead-wikidata-result"; + item.innerHTML = `
${entity.label} ${entity.id}
${entity.description ? `
${entity.description}
` : ""}`; + item.addEventListener("click", () => { + createRelation({ label: entity.label, label_id: entity.id }); + input.value = ""; + resultsDiv.style.display = "none"; + resultsDiv.innerHTML = ""; + }); + resultsDiv.appendChild(item); + } + }); + }); + } + function createRelation(label) { + if (!relationSource || !relationTarget) return; + const relId = `rel_${nextRelationId++}`; + const newRelation = { + relation_id: relId, + source_span_id: relationSource, + target_span_id: relationTarget, + ...label !== void 0 ? { label } : {}, + directed: spanSpec?.relation_directed ?? true + }; + activeRelations.push(newRelation); + events.push({ + type: "relation_create", + timestamp: performance.now() - start_time, + relation_id: relId, + ...label?.label !== void 0 ? { label: label.label } : {} + }); + relationState = "IDLE"; + relationSource = null; + relationTarget = null; + renderRelationArcsOverlay(); + renderRelationList(); + updateRelationUI(); + updateContinueButton(); + } + function cancelRelationCreation() { + relationState = "IDLE"; + relationSource = null; + relationTarget = null; + updateRelationUI(); + } + } + function deleteRelation(relId) { + const idx = activeRelations.findIndex((r) => r.relation_id === relId); + if (idx >= 0) { + activeRelations.splice(idx, 1); + events.push({ + type: "relation_delete", + timestamp: performance.now() - start_time, + relation_id: relId + }); + renderRelationArcsOverlay(); + renderRelationList(); + updateContinueButton(); + } + } + function renderRelationList() { + const listEl = display_element.querySelector("#bead-relation-list"); + if (!listEl) return; + listEl.innerHTML = ""; + for (const rel of activeRelations) { + const sourceSpan = activeSpans.find((s) => s.span_id === rel.source_span_id); + const targetSpan = activeSpans.find((s) => s.span_id === rel.target_span_id); + if (!sourceSpan || !targetSpan) continue; + const entry = document.createElement("div"); + entry.className = "bead-relation-entry"; + const sourceText = getSpanText(sourceSpan); + const targetText = getSpanText(targetSpan); + const labelText = rel.label?.label ?? "(no label)"; + const arrow = rel.directed ? " \u2192 " : " \u2014 "; + entry.innerHTML = `${sourceText}${arrow}${labelText}${arrow}${targetText}`; + if (isInteractive) { + const delBtn = document.createElement("button"); + delBtn.className = "bead-relation-delete"; + delBtn.textContent = "\xD7"; + delBtn.addEventListener("click", () => deleteRelation(rel.relation_id)); + entry.appendChild(delBtn); + } + listEl.appendChild(entry); + } + const updateUI = display_element["_updateRelationUI"]; + if (typeof updateUI === "function") { + updateUI(); + } + } + function computeSpanPositions() { + const positions = /* @__PURE__ */ new Map(); + const container = display_element.querySelector(".bead-span-container"); + if (!container) return positions; + const containerRect = container.getBoundingClientRect(); + for (const span of activeSpans) { + let minLeft = Number.POSITIVE_INFINITY; + let minTop = Number.POSITIVE_INFINITY; + let maxRight = Number.NEGATIVE_INFINITY; + let maxBottom = Number.NEGATIVE_INFINITY; + for (const seg of span.segments) { + for (const idx of seg.indices) { + const tokenEl = display_element.querySelector( + `.bead-token[data-element="${seg.element_name}"][data-index="${idx}"]` + ); + if (tokenEl) { + const rect = tokenEl.getBoundingClientRect(); + minLeft = Math.min(minLeft, rect.left - containerRect.left); + minTop = Math.min(minTop, rect.top - containerRect.top); + maxRight = Math.max(maxRight, rect.right - containerRect.left); + maxBottom = Math.max(maxBottom, rect.bottom - containerRect.top); + } + } + } + if (minLeft !== Number.POSITIVE_INFINITY) { + positions.set( + span.span_id, + new DOMRect(minLeft, minTop, maxRight - minLeft, maxBottom - minTop) + ); + } + } + return positions; + } + function renderRelationArcsOverlay() { + if (activeRelations.length === 0) return; + const container = display_element.querySelector(".bead-span-container"); + if (!container) return; + const existingArcDiv = display_element.querySelector(".bead-relation-arc-area"); + if (existingArcDiv) existingArcDiv.remove(); + const spanPositions = computeSpanPositions(); + if (spanPositions.size === 0) return; + const arcArea = document.createElement("div"); + arcArea.className = "bead-relation-arc-area"; + arcArea.style.position = "relative"; + arcArea.style.width = "100%"; + const baseHeight = 28; + const levelSpacing = 28; + const totalHeight = baseHeight + (activeRelations.length - 1) * levelSpacing + 12; + arcArea.style.height = `${totalHeight}px`; + arcArea.style.marginBottom = "4px"; + const svg = document.createElementNS("http://www.w3.org/2000/svg", "svg"); + svg.classList.add("bead-relation-layer"); + svg.setAttribute("width", "100%"); + svg.setAttribute("height", String(totalHeight)); + svg.style.overflow = "visible"; + const defs = document.createElementNS("http://www.w3.org/2000/svg", "defs"); + const marker = document.createElementNS("http://www.w3.org/2000/svg", "marker"); + marker.setAttribute("id", "rel-arrow"); + marker.setAttribute("markerWidth", "8"); + marker.setAttribute("markerHeight", "6"); + marker.setAttribute("refX", "8"); + marker.setAttribute("refY", "3"); + marker.setAttribute("orient", "auto"); + const polygon = document.createElementNS("http://www.w3.org/2000/svg", "polygon"); + polygon.setAttribute("points", "0 0, 8 3, 0 6"); + polygon.setAttribute("fill", "#546e7a"); + marker.appendChild(polygon); + defs.appendChild(marker); + svg.appendChild(defs); + container.getBoundingClientRect(); + arcArea.getBoundingClientRect(); + for (let i = 0; i < activeRelations.length; i++) { + const rel = activeRelations[i]; + if (!rel) continue; + const sourceRect = spanPositions.get(rel.source_span_id); + const targetRect = spanPositions.get(rel.target_span_id); + if (!sourceRect || !targetRect) continue; + const x1 = sourceRect.x + sourceRect.width / 2; + const x2 = targetRect.x + targetRect.width / 2; + const bottomY = totalHeight; + const railY = totalHeight - baseHeight - i * levelSpacing; + const r = 5; + const strokeColor = "#546e7a"; + const dir = x2 > x1 ? 1 : -1; + const d = [ + `M ${x1} ${bottomY}`, + `L ${x1} ${railY + r}`, + `Q ${x1} ${railY} ${x1 + r * dir} ${railY}`, + `L ${x2 - r * dir} ${railY}`, + `Q ${x2} ${railY} ${x2} ${railY + r}`, + `L ${x2} ${bottomY}` + ].join(" "); + const path = document.createElementNS("http://www.w3.org/2000/svg", "path"); + path.setAttribute("d", d); + path.setAttribute("stroke", strokeColor); + path.setAttribute("fill", "none"); + path.setAttribute("stroke-width", "1.5"); + if (rel.directed) { + path.setAttribute("marker-end", "url(#rel-arrow)"); + } + svg.appendChild(path); + if (rel.label?.label) { + const midX = (x1 + x2) / 2; + const labelText = rel.label.label; + const fo = document.createElementNS("http://www.w3.org/2000/svg", "foreignObject"); + const labelWidth = labelText.length * 7 + 16; + fo.setAttribute("x", String(midX - labelWidth / 2)); + fo.setAttribute("y", String(railY - 10)); + fo.setAttribute("width", String(labelWidth)); + fo.setAttribute("height", "20"); + const labelDiv = document.createElement("div"); + labelDiv.style.cssText = ` + font-size: 11px; + font-family: inherit; + color: #455a64; + background: #fafafa; + padding: 1px 6px; + border-radius: 3px; + text-align: center; + line-height: 18px; + white-space: nowrap; + `; + labelDiv.textContent = labelText; + fo.appendChild(labelDiv); + svg.appendChild(fo); + } + } + arcArea.appendChild(svg); + container.parentNode?.insertBefore(arcArea, container); + } + function updateContinueButton() { + if (!continueBtn || !isInteractive) return; + const minSpans = spanSpec?.min_spans ?? 0; + continueBtn.disabled = activeSpans.length < minSpans; + } + const endTrial = () => { + document.removeEventListener("keydown", handleKeyDown); + const trial_data = { + ...trial.metadata, + spans: activeSpans, + relations: activeRelations, + span_events: events, + rt: performance.now() - start_time + }; + display_element.innerHTML = ""; + this.jsPsych.finishTrial(trial_data); + }; + } + }; + __publicField(BeadSpanLabelPlugin, "info", info10); + + // src/gallery/gallery-bundle.ts + window.BeadRatingPlugin = BeadRatingPlugin; + window.BeadForcedChoicePlugin = BeadForcedChoicePlugin; + window.BeadBinaryChoicePlugin = BeadBinaryChoicePlugin; + window.BeadSliderRatingPlugin = BeadSliderRatingPlugin; + window.BeadClozeMultiPlugin = BeadClozeMultiPlugin; + window.BeadSpanLabelPlugin = BeadSpanLabelPlugin; + window.BeadCategoricalPlugin = BeadCategoricalPlugin; + window.BeadMagnitudePlugin = BeadMagnitudePlugin; + window.BeadFreeTextPlugin = BeadFreeTextPlugin; + window.BeadMultiSelectPlugin = BeadMultiSelectPlugin; + +})(); diff --git a/docs/index.md b/docs/index.md index 1bbecd4..0df7531 100644 --- a/docs/index.md +++ b/docs/index.md @@ -60,7 +60,7 @@ If you use bead in your research, please cite: title = {Bead: A python framework for linguistic judgment experiments with active learning}, year = {2026}, url = {https://github.com/FACTSlab/bead}, - version = {0.1.0} + version = {0.2.0} } ``` diff --git a/docs/user-guide/api/deployment.md b/docs/user-guide/api/deployment.md index aae4739..1152bca 100644 --- a/docs/user-guide/api/deployment.md +++ b/docs/user-guide/api/deployment.md @@ -272,6 +272,117 @@ When slopit is enabled, behavioral data is included in the trial results: } ``` +## Span Labeling Experiments + +Generate span labeling experiments where participants annotate text spans. + +**Basic span labeling experiment**: + +```python +from bead.deployment.distribution import ( + DistributionStrategyType, + ListDistributionStrategy, +) +from bead.deployment.jspsych.config import ExperimentConfig, SpanDisplayConfig + +# configure a span labeling experiment +config = ExperimentConfig( + experiment_type="span_labeling", + title="Named Entity Annotation", + description="Annotate named entities in text", + instructions="Select spans of text and assign entity labels.", + distribution_strategy=ListDistributionStrategy( + strategy_type=DistributionStrategyType.BALANCED + ), + randomize_trial_order=True, + show_progress_bar=True, + use_jatos=True, + span_display=SpanDisplayConfig( + highlight_style="background", + show_labels=True, + label_position="inline", + ), +) +``` + +**Customizing span display**: + +```python +from bead.deployment.jspsych.config import SpanDisplayConfig + +# configure visual appearance for span highlights +span_display = SpanDisplayConfig( + highlight_style="underline", + color_palette=["#BBDEFB", "#C8E6C9", "#FFE0B2", "#F8BBD0"], + show_labels=True, + show_tooltips=True, + label_position="tooltip", +) +``` + +**Composing spans with other task types**: span annotations can be added to any experiment type. When items contain span data, the span display renders automatically as an overlay on the existing task. For example, a rating experiment can show highlighted spans while participants rate sentences: + +```python +from bead.deployment.distribution import ( + DistributionStrategyType, + ListDistributionStrategy, +) +from bead.deployment.jspsych.config import ExperimentConfig, SpanDisplayConfig + +# rating experiment with span highlights +config = ExperimentConfig( + experiment_type="likert_rating", + title="Acceptability with Entity Highlights", + description="Rate sentences with highlighted entities", + instructions="Rate how natural each sentence sounds. Entities are highlighted.", + distribution_strategy=ListDistributionStrategy( + strategy_type=DistributionStrategyType.BALANCED + ), + use_jatos=True, + span_display=SpanDisplayConfig( + highlight_style="background", + show_labels=True, + ), +) +``` + +**Prompt span references**: prompts can reference span labels using `[[label]]` or `[[label:text]]` syntax. At trial generation time, these references are replaced with color-highlighted HTML where the colors match the corresponding span highlights in the stimulus: + +```python +from bead.items.ordinal_scale import create_ordinal_scale_item +from bead.items.span_labeling import add_spans_to_item +from bead.items.spans import Span, SpanLabel, SpanSegment + +# [[breaker]] auto-fills with the span's token text ("The boy") +# [[event:the breaking]] uses custom display text +item = create_ordinal_scale_item( + text="The boy broke the vase.", + prompt="How likely is it that [[breaker]] existed after [[event:the breaking]]?", + scale_bounds=(1, 5), + scale_labels={1: "Very unlikely", 5: "Very likely"}, +) + +item = add_spans_to_item( + item, + spans=[ + Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0, 1])], + label=SpanLabel(label="breaker"), + ), + Span( + span_id="span_1", + segments=[SpanSegment(element_name="text", indices=[2])], + label=SpanLabel(label="event"), + ), + ], +) +``` + +Color consistency is guaranteed: the same `_assign_span_colors()` function assigns deterministic light/dark color pairs to each unique label. Both the stimulus renderer and the prompt resolver use these assignments, so a span labeled "event" always gets the same background color in the target text and the same highlight color in the question text. The `SpanDisplayConfig.color_palette` (light backgrounds) and `SpanDisplayConfig.dark_color_palette` (subscript badge colors) are index-aligned, producing visually matched pairs. + +Prompts without `[[...]]` references pass through unchanged, so existing experiments are unaffected. + ## Experiment Configuration **ExperimentConfig** parameters: @@ -420,7 +531,11 @@ output_dir/ ├── index.html ├── js/ │ ├── experiment.js -│ └── list_distributor.js +│ ├── list_distributor.js +│ ├── plugins/ # span plugin (when spans are used) +│ │ └── span-label.js +│ └── lib/ # shared libraries +│ └── span-renderer.js ├── css/ │ └── experiment.css └── data/ @@ -428,7 +543,7 @@ output_dir/ ├── lists.jsonl ├── items.jsonl ├── distribution.json - └── templates.json + └── trials.json ``` ## Complete Example diff --git a/docs/user-guide/api/items.md b/docs/user-guide/api/items.md index 0e1c29a..a89d9ef 100644 --- a/docs/user-guide/api/items.md +++ b/docs/user-guide/api/items.md @@ -4,7 +4,7 @@ The `bead.items` module provides task-type-specific utilities for creating exper ## Task-Type Utilities -The items module provides 8 task-type-specific utilities for programmatic item creation. All utilities follow a consistent API pattern. +The items module provides 9 task-type-specific utilities for programmatic item creation. All utilities follow a consistent API pattern. ### Forced Choice @@ -200,6 +200,207 @@ item = create_magnitude_item( print(f"Created magnitude item with unit: {item.item_metadata.get('unit')}") ``` +### Span Labeling + +Create items with span annotations for entity labeling, relation extraction, and similar tasks. Spans can be added as standalone items or composed onto any existing task type. + +**Standalone span item with pre-defined spans**: + +```python +from bead.items.span_labeling import create_span_item +from bead.items.spans import Span, SpanSegment, SpanLabel +from bead.tokenization.config import TokenizerConfig + +# create a span item with pre-tokenized text and labeled spans +item = create_span_item( + text="The quick brown fox jumps over the lazy dog", + spans=[ + Span( + span_id="s1", + segments=[SpanSegment(element_name="text", indices=[1, 2])], + label=SpanLabel(label="ADJ"), + ), + Span( + span_id="s2", + segments=[SpanSegment(element_name="text", indices=[3])], + label=SpanLabel(label="NOUN"), + ), + ], + prompt="Review the highlighted spans:", + tokenizer_config=TokenizerConfig(backend="whitespace"), +) + +print(f"Created span item with {len(item.spans)} spans") +print(f"Tokens: {item.tokenized_elements['text']}") +``` + +**Interactive span item for participant annotation**: + +```python +from bead.items.span_labeling import create_interactive_span_item +from bead.tokenization.config import TokenizerConfig + +# create an interactive item where participants select and label spans +item = create_interactive_span_item( + text="Marie Curie discovered radium in Paris.", + prompt="Select all named entities and assign a label:", + tokenizer_config=TokenizerConfig(backend="whitespace"), + label_set=["PERSON", "LOCATION", "SUBSTANCE"], + label_source="fixed", +) + +print("Created interactive span item") +print(f"Tokens: {item.tokenized_elements['text']}") +``` + +**Composing spans onto an existing item** (any task type): + +```python +from bead.items.ordinal_scale import create_ordinal_scale_item +from bead.items.span_labeling import add_spans_to_item +from bead.items.spans import Span, SpanSegment, SpanLabel +from bead.tokenization.config import TokenizerConfig + +# start with a rating item +rating_item = create_ordinal_scale_item( + text="The scientist discovered a new element.", + scale_bounds=(1, 7), + prompt="Rate the naturalness of this sentence:", +) + +# add span annotations as an overlay +item_with_spans = add_spans_to_item( + item=rating_item, + spans=[ + Span( + span_id="agent", + segments=[SpanSegment(element_name="text", indices=[0, 1])], + label=SpanLabel(label="AGENT"), + ), + ], + tokenizer_config=TokenizerConfig(backend="whitespace"), +) + +print(f"Original spans: {len(rating_item.spans)}") +print(f"After adding: {len(item_with_spans.spans)}") +``` + +### Prompt Span References + +When composing spans with other task types, prompts can reference span labels using `[[label]]` syntax. At deployment time, these references are replaced with color-highlighted HTML that matches the span colors in the stimulus text. + +**Syntax**: + +| Pattern | Behavior | +|---------|----------| +| `[[label]]` | Auto-fills with the span's token text (e.g., "The boy") | +| `[[label:custom text]]` | Uses the provided text instead (e.g., "the breaking") | + +**Example**: a rating item with highlighted prompt references: + +```python +from bead.items.ordinal_scale import create_ordinal_scale_item +from bead.items.span_labeling import add_spans_to_item +from bead.items.spans import Span, SpanLabel, SpanSegment + +item = create_ordinal_scale_item( + text="The boy broke the vase.", + prompt="How likely is it that [[breaker]] existed after [[event:the breaking]]?", + scale_bounds=(1, 5), + scale_labels={1: "Very unlikely", 5: "Very likely"}, +) + +item = add_spans_to_item( + item, + spans=[ + Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0, 1])], + label=SpanLabel(label="breaker"), + ), + Span( + span_id="span_1", + segments=[SpanSegment(element_name="text", indices=[2])], + label=SpanLabel(label="event"), + ), + ], +) +``` + +When this item is deployed, the prompt renders as: + +> How likely is it that The boy existed after the breaking? + +Colors are assigned deterministically: the same label always gets the same color pair in both the stimulus and the prompt. Auto-fill (`[[breaker]]`) reconstructs the span's token text by joining tokens from `tokenized_elements` and respecting `token_space_after` flags. Custom text (`[[event:the breaking]]`) lets you use a different surface form when the prompt needs a morphological variant of the span text (e.g., "ran" in the target vs. "the running" in the prompt). + +If a prompt references a label that doesn't exist among the item's spans, `add_spans_to_item()` issues a warning at item construction time, and trial generation raises a `ValueError`. + +**Adding tokenization to an existing item**: + +```python +from bead.items.binary import create_binary_item +from bead.items.span_labeling import tokenize_item +from bead.tokenization.config import TokenizerConfig + +# create a binary item without tokenization +binary_item = create_binary_item( + text="The cat sat on the mat.", + prompt="Is this sentence grammatical?", +) + +# add tokenization data +tokenized = tokenize_item( + binary_item, + tokenizer_config=TokenizerConfig(backend="whitespace"), +) + +print(f"Tokenized elements: {list(tokenized.tokenized_elements.keys())}") +print(f"Tokens for 'text': {tokenized.tokenized_elements.get('text')}") +``` + +**Batch creation with a span extractor**: + +```python +from bead.items.span_labeling import create_span_items_from_texts +from bead.items.spans import Span, SpanSegment, SpanLabel +from bead.tokenization.config import TokenizerConfig + + +# define a span extractor function +def find_capitalized_spans(text: str, tokens: list[str]) -> list[Span]: + """Extract spans for capitalized words (simple NER heuristic).""" + spans: list[Span] = [] + for i, token in enumerate(tokens): + if token[0].isupper() and i > 0: + spans.append( + Span( + span_id=f"cap_{i}", + segments=[SpanSegment(element_name="text", indices=[i])], + label=SpanLabel(label="ENTITY"), + ) + ) + return spans + + +sentences = [ + "Marie Curie was born in Warsaw.", + "Albert Einstein developed relativity in Berlin.", + "Ada Lovelace wrote the first algorithm.", +] + +items = create_span_items_from_texts( + texts=sentences, + span_extractor=find_capitalized_spans, + prompt="Review the detected entities:", + tokenizer_config=TokenizerConfig(backend="whitespace"), + labels=["ENTITY"], +) + +print(f"Created {len(items)} span items") +for item in items: + print(f" {item.rendered_elements['text']}: {len(item.spans)} spans") +``` + ## Language Model Scoring Score items with language models: @@ -319,7 +520,7 @@ print(f"Created {len(afc_items)} 2AFC items") 1. **NO Silent Fallbacks**: All errors raise `ValueError` with descriptive messages 2. **Strict Validation**: Use `zip(..., strict=True)`, explicit parameter checks -3. **Consistent API**: Same pattern across all 8 task types +3. **Consistent API**: Same pattern across all 9 task types 4. **Automatic Metadata**: Utilities populate task-specific metadata (n_options, scale_min/max, etc.) ## Task Type Summary @@ -334,6 +535,7 @@ print(f"Created {len(afc_items)} 2AFC items") | `cloze` | Fill-in-blank | `create_cloze_item()` | | `multi_select` | Checkboxes | `create_multi_select_item()` | | `magnitude` | Numeric | `create_magnitude_item()` | +| `span_labeling` | Entity/span annotation | `create_span_item()` | ## Next Steps diff --git a/mkdocs.yml b/mkdocs.yml index c6803fa..f31d346 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -93,6 +93,7 @@ nav: - bead.items: api/items.md - bead.lists: api/lists.md - bead.deployment: api/deployment.md + - bead.tokenization: api/tokenization.md - bead.active_learning: api/active_learning.md - bead.config: api/config.md - bead.data: api/data.md @@ -101,4 +102,4 @@ nav: - Setup: developer-guide/setup.md - Testing: developer-guide/testing.md - Contributing: developer-guide/contributing.md - - Examples: examples/gallery.md + - Task Gallery: examples/gallery.md diff --git a/pyproject.toml b/pyproject.toml index cde3739..edeb938 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "bead" -version = "0.1.0" +version = "0.2.0" description = "Lexicon and Template Collection Construction Pipeline for Acceptability and Inference Judgment Data" authors = [{name = "Aaron Steven White", email = "aaron.white@rochester.edu"}] readme = "README.md" @@ -88,6 +88,10 @@ ui = [ behavioral-analysis = [ "slopit>=0.1.0", ] +tokenization = [ + "spacy>=3.7", + "stanza>=1.8", +] [project.scripts] bead = "bead.cli.main:cli" diff --git a/tests/deployment/jspsych/test_plugins.py b/tests/deployment/jspsych/test_plugins.py index 750d06e..2a141b7 100644 --- a/tests/deployment/jspsych/test_plugins.py +++ b/tests/deployment/jspsych/test_plugins.py @@ -92,16 +92,16 @@ def test_forced_choice_plugin_preserves_metadata() -> None: def test_all_plugins_have_version() -> None: - """Test that all plugins have version 0.1.0.""" + """Test that all plugins have version 0.2.0.""" plugin_dir = Path("bead/deployment/jspsych/src/plugins") # exclude test files plugins = [p for p in plugin_dir.glob("*.ts") if not p.name.endswith(".test.ts")] - assert len(plugins) == 3, "Expected 3 plugins" + assert len(plugins) == 10, f"Expected 10 plugins, found {len(plugins)}" for plugin_path in plugins: content = plugin_path.read_text() - assert "0.1.0" in content, f"Plugin {plugin_path.name} missing version" + assert "0.2.0" in content, f"Plugin {plugin_path.name} missing version" def test_all_plugins_have_author() -> None: @@ -120,7 +120,12 @@ def test_compiled_plugins_exist() -> None: dist_dir = Path("bead/deployment/jspsych/dist/plugins") assert dist_dir.exists(), "dist/plugins directory should exist after build" - expected_plugins = ["rating.js", "forced-choice.js", "cloze-dropdown.js"] + expected_plugins = [ + "rating.js", + "forced-choice.js", + "cloze-dropdown.js", + "span-label.js", + ] for plugin in expected_plugins: plugin_path = dist_dir / plugin assert plugin_path.exists(), f"Compiled plugin {plugin} should exist" diff --git a/tests/deployment/jspsych/test_span_trials.py b/tests/deployment/jspsych/test_span_trials.py new file mode 100644 index 0000000..7ce97e4 --- /dev/null +++ b/tests/deployment/jspsych/test_span_trials.py @@ -0,0 +1,353 @@ +"""Tests for span-aware trial generation.""" + +from __future__ import annotations + +from uuid import uuid4 + +from bead.deployment.distribution import ( + DistributionStrategyType, + ListDistributionStrategy, +) +from bead.deployment.jspsych.config import ( + ExperimentConfig, + SpanDisplayConfig, +) +from bead.deployment.jspsych.trials import ( + _create_span_labeling_trial, + _generate_span_stimulus_html, + _serialize_item_metadata, + create_trial, +) +from bead.items.item import Item +from bead.items.item_template import ItemTemplate, PresentationSpec, TaskSpec +from bead.items.spans import ( + Span, + SpanLabel, + SpanRelation, + SpanSegment, + SpanSpec, +) + + +def _make_strategy() -> ListDistributionStrategy: + """Create a test distribution strategy.""" + return ListDistributionStrategy(strategy_type=DistributionStrategyType.BALANCED) + + +class TestSpanMetadataSerialization: + """Test span data in _serialize_item_metadata.""" + + def test_spans_serialized(self) -> None: + """Test that spans are included in metadata.""" + span = Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0, 1])], + label=SpanLabel(label="Person"), + ) + + item = Item( + item_template_id=uuid4(), + rendered_elements={"text": "John Smith"}, + spans=[span], + tokenized_elements={"text": ["John", "Smith"]}, + token_space_after={"text": [True, False]}, + ) + + template = ItemTemplate( + name="test", + judgment_type="extraction", + task_type="span_labeling", + task_spec=TaskSpec(prompt="Label entities"), + presentation_spec=PresentationSpec(mode="static"), + ) + + metadata = _serialize_item_metadata(item, template) + + assert "spans" in metadata + assert len(metadata["spans"]) == 1 + assert metadata["spans"][0]["span_id"] == "span_0" + assert metadata["spans"][0]["label"]["label"] == "Person" + + def test_tokenized_elements_serialized(self) -> None: + """Test that tokenized_elements are included.""" + item = Item( + item_template_id=uuid4(), + tokenized_elements={"text": ["Hello", "world"]}, + token_space_after={"text": [True, False]}, + ) + + template = ItemTemplate( + name="test", + judgment_type="acceptability", + task_type="ordinal_scale", + task_spec=TaskSpec(prompt="Rate this"), + presentation_spec=PresentationSpec(mode="static"), + ) + + metadata = _serialize_item_metadata(item, template) + + assert metadata["tokenized_elements"] == {"text": ["Hello", "world"]} + assert metadata["token_space_after"] == {"text": [True, False]} + + def test_span_relations_serialized(self) -> None: + """Test that span_relations are serialized.""" + spans = [ + Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0])], + ), + Span( + span_id="span_1", + segments=[SpanSegment(element_name="text", indices=[2])], + ), + ] + + rel = SpanRelation( + relation_id="rel_0", + source_span_id="span_0", + target_span_id="span_1", + label=SpanLabel(label="agent-of"), + directed=True, + ) + + item = Item( + item_template_id=uuid4(), + spans=spans, + span_relations=[rel], + ) + + template = ItemTemplate( + name="test", + judgment_type="extraction", + task_type="span_labeling", + task_spec=TaskSpec(prompt="Label"), + presentation_spec=PresentationSpec(mode="static"), + ) + + metadata = _serialize_item_metadata(item, template) + + assert len(metadata["span_relations"]) == 1 + assert metadata["span_relations"][0]["directed"] is True + assert metadata["span_relations"][0]["source_span_id"] == "span_0" + + def test_span_spec_serialized(self) -> None: + """Test that span_spec from template is serialized.""" + item = Item(item_template_id=uuid4()) + + span_spec = SpanSpec( + interaction_mode="interactive", + labels=["PER", "ORG"], + min_spans=1, + ) + + template = ItemTemplate( + name="test", + judgment_type="extraction", + task_type="span_labeling", + task_spec=TaskSpec(prompt="Label", span_spec=span_spec), + presentation_spec=PresentationSpec(mode="static"), + ) + + metadata = _serialize_item_metadata(item, template) + + assert metadata["span_spec"] is not None + assert metadata["span_spec"]["interaction_mode"] == "interactive" + assert metadata["span_spec"]["labels"] == ["PER", "ORG"] + + def test_no_span_spec_is_none(self) -> None: + """Test that span_spec is None when not set.""" + item = Item(item_template_id=uuid4()) + + template = ItemTemplate( + name="test", + judgment_type="acceptability", + task_type="ordinal_scale", + task_spec=TaskSpec(prompt="Rate"), + presentation_spec=PresentationSpec(mode="static"), + ) + + metadata = _serialize_item_metadata(item, template) + assert metadata["span_spec"] is None + + +class TestSpanStimulusHtml: + """Test span-highlighted stimulus HTML generation.""" + + def test_static_spans_markup(self) -> None: + """Test that static spans produce highlighted tokens.""" + span = Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0, 1])], + label=SpanLabel(label="Person"), + ) + + item = Item( + item_template_id=uuid4(), + spans=[span], + tokenized_elements={"text": ["John", "Smith", "is", "here"]}, + token_space_after={"text": [True, True, True, False]}, + ) + + config = SpanDisplayConfig() + html = _generate_span_stimulus_html(item, config) + + assert "bead-token" in html + assert "highlighted" in html + assert 'data-index="0"' in html + assert "John" in html + + def test_no_tokenization_fallback(self) -> None: + """Test fallback when no tokenized_elements.""" + item = Item( + item_template_id=uuid4(), + rendered_elements={"text": "Hello world"}, + ) + + config = SpanDisplayConfig() + html = _generate_span_stimulus_html(item, config) + + assert "stimulus-container" in html + + def test_space_after_rendering(self) -> None: + """Test that space_after controls spacing in output.""" + item = Item( + item_template_id=uuid4(), + spans=[], + tokenized_elements={"text": ["don", "'t"]}, + token_space_after={"text": [False, False]}, + ) + + config = SpanDisplayConfig() + html = _generate_span_stimulus_html(item, config) + + # Tokens should be adjacent (no space between don and 't) + assert ( + "don't" in html or "don" in html + ) + + +class TestSpanLabelingTrial: + """Test standalone span labeling trial creation.""" + + def test_trial_structure(self) -> None: + """Test span labeling trial has correct structure.""" + item = Item( + item_template_id=uuid4(), + tokenized_elements={"text": ["The", "cat"]}, + token_space_after={"text": [True, False]}, + ) + + template = ItemTemplate( + name="test", + judgment_type="extraction", + task_type="span_labeling", + task_spec=TaskSpec(prompt="Select entities"), + presentation_spec=PresentationSpec(mode="static"), + ) + + config = SpanDisplayConfig() + trial = _create_span_labeling_trial(item, template, config, 0) + + assert trial["type"] == "bead-span-label" + assert trial["prompt"] == "Select entities" + assert trial["button_label"] == "Continue" + assert trial["metadata"]["trial_type"] == "span_labeling" + + def test_trial_metadata(self) -> None: + """Test span labeling trial includes metadata.""" + item = Item( + item_template_id=uuid4(), + rendered_elements={"text": "Hello"}, + tokenized_elements={"text": ["Hello"]}, + ) + + template = ItemTemplate( + name="test", + judgment_type="extraction", + task_type="span_labeling", + task_spec=TaskSpec(prompt="Label"), + presentation_spec=PresentationSpec(mode="static"), + ) + + config = SpanDisplayConfig() + trial = _create_span_labeling_trial(item, template, config, 5) + + assert trial["metadata"]["trial_number"] == 5 + assert trial["metadata"]["item_id"] == str(item.id) + + def test_trial_includes_span_data(self) -> None: + """Test span labeling trial includes spans, relations, spec, config.""" + span = Span( + span_id="s0", + segments=[SpanSegment(element_name="text", indices=[0])], + label=SpanLabel(label="PER", confidence=0.95), + ) + item = Item( + item_template_id=uuid4(), + spans=[span], + tokenized_elements={"text": ["Alice", "ran"]}, + token_space_after={"text": [True, False]}, + ) + + span_spec = SpanSpec( + interaction_mode="interactive", + labels=["PER", "ORG"], + ) + template = ItemTemplate( + name="test", + judgment_type="extraction", + task_type="span_labeling", + task_spec=TaskSpec(prompt="Label", span_spec=span_spec), + presentation_spec=PresentationSpec(mode="static"), + ) + + config = SpanDisplayConfig() + trial = _create_span_labeling_trial(item, template, config, 0) + + # Span data + assert len(trial["spans"]) == 1 + assert trial["spans"][0]["span_id"] == "s0" + assert trial["spans"][0]["label"]["confidence"] == 0.95 + + # Relations (empty) + assert trial["relations"] == [] + + # Span spec + assert trial["span_spec"] is not None + assert trial["span_spec"]["interaction_mode"] == "interactive" + assert trial["span_spec"]["labels"] == ["PER", "ORG"] + + # Display config + assert trial["display_config"] is not None + assert trial["display_config"]["highlight_style"] == "background" + + +class TestSpanCompositeTrial: + """Test composite trials (e.g., rating + spans).""" + + def test_span_labeling_experiment_type(self) -> None: + """Test create_trial routes to span labeling.""" + item = Item( + item_template_id=uuid4(), + tokenized_elements={"text": ["Hello"]}, + ) + + template = ItemTemplate( + name="test", + judgment_type="extraction", + task_type="span_labeling", + task_spec=TaskSpec(prompt="Label"), + presentation_spec=PresentationSpec(mode="static"), + ) + + config = ExperimentConfig( + experiment_type="span_labeling", + title="Test", + description="Test", + instructions="Test", + distribution_strategy=_make_strategy(), + ) + + trial = create_trial(item, template, config, 0) + + assert trial["type"] == "bead-span-label" diff --git a/tests/deployment/jspsych/test_trials.py b/tests/deployment/jspsych/test_trials.py index c194393..dc85f92 100644 --- a/tests/deployment/jspsych/test_trials.py +++ b/tests/deployment/jspsych/test_trials.py @@ -20,9 +20,14 @@ InstructionPage, InstructionsConfig, RatingScaleConfig, + SpanDisplayConfig, ) from bead.deployment.jspsych.trials import ( + SpanColorMap, + _assign_span_colors, _generate_stimulus_html, + _parse_prompt_references, + _resolve_prompt_references, create_completion_trial, create_consent_trial, create_demographics_trial, @@ -31,6 +36,7 @@ ) from bead.items.item import Item from bead.items.item_template import ItemTemplate, PresentationSpec, TaskSpec +from bead.items.spans import Span, SpanLabel, SpanSegment class TestCreateTrial: @@ -52,10 +58,11 @@ def test_likert_rating( rating_config=sample_rating_config, ) - assert trial["type"] == "html-button-response" - assert len(trial["choices"]) == 7 - assert trial["data"]["item_id"] == str(sample_item.id) - assert trial["data"]["trial_type"] == "likert_rating" + assert trial["type"] == "bead-rating" + assert trial["scale_min"] == 1 + assert trial["scale_max"] == 7 + assert trial["metadata"]["item_id"] == str(sample_item.id) + assert trial["metadata"]["trial_type"] == "likert_rating" def test_slider_rating( self, sample_item: Item, sample_item_template: ItemTemplate @@ -80,10 +87,10 @@ def test_slider_rating( rating_config=rating_config, ) - assert trial["type"] == "html-slider-response" - assert trial["min"] == 1 - assert trial["max"] == 7 - assert trial["data"]["trial_type"] == "slider_rating" + assert trial["type"] == "bead-slider-rating" + assert trial["slider_min"] == 1 + assert trial["slider_max"] == 7 + assert trial["metadata"]["trial_type"] == "slider_rating" def test_binary_choice( self, sample_item: Item, sample_item_template: ItemTemplate @@ -108,9 +115,9 @@ def test_binary_choice( choice_config=choice_config, ) - assert trial["type"] == "html-button-response" + assert trial["type"] == "bead-binary-choice" assert trial["choices"] == ["Yes", "No"] - assert trial["data"]["trial_type"] == "binary_choice" + assert trial["metadata"]["trial_type"] == "binary_choice" def test_forced_choice(self) -> None: """Test forced choice trial creation.""" @@ -152,9 +159,9 @@ def test_forced_choice(self) -> None: choice_config=choice_config, ) - assert trial["type"] == "html-button-response" - assert len(trial["choices"]) == 2 - assert trial["data"]["trial_type"] == "forced_choice" + assert trial["type"] == "bead-forced-choice" + assert len(trial["alternatives"]) == 2 + assert trial["metadata"]["trial_type"] == "forced_choice" def test_missing_config_raises_error(self) -> None: """Test trial creation with missing required config.""" @@ -235,8 +242,8 @@ def test_metadata_inclusion( rating_config=rating_config, ) - assert trial["data"]["trial_number"] == 5 - assert trial["data"]["item_metadata"] == sample_item.item_metadata + assert trial["metadata"]["trial_number"] == 5 + assert trial["metadata"]["item_metadata"] == sample_item.item_metadata class TestLikertConfiguration: @@ -285,9 +292,10 @@ def test_custom_labels(self) -> None: rating_config=rating_config, ) - assert "Strongly disagree" in trial["prompt"] - assert "Strongly agree" in trial["prompt"] - assert len(trial["choices"]) == 5 + assert trial["scale_labels"]["1"] == "Strongly disagree" + assert trial["scale_labels"]["5"] == "Strongly agree" + assert trial["scale_min"] == 1 + assert trial["scale_max"] == 5 class TestSliderConfiguration: @@ -483,3 +491,206 @@ def test_completion_trial_custom_message(self) -> None: trial = create_completion_trial(completion_message=custom_message) assert custom_message in trial["stimulus"] + + +class TestParsePromptReferences: + """Tests for _parse_prompt_references().""" + + def test_no_references(self) -> None: + """Plain text without references returns an empty list.""" + refs = _parse_prompt_references("How natural is this sentence?") + + assert refs == [] + + def test_auto_fill_reference(self) -> None: + """Single auto-fill reference is parsed with label and no display_text.""" + refs = _parse_prompt_references("How natural is [[agent]]?") + + assert len(refs) == 1 + assert refs[0].label == "agent" + assert refs[0].display_text is None + + def test_explicit_text_reference(self) -> None: + """Explicit text reference is parsed with both label and display_text.""" + refs = _parse_prompt_references("Did [[event:the breaking]] happen?") + + assert len(refs) == 1 + assert refs[0].label == "event" + assert refs[0].display_text == "the breaking" + + def test_multiple_references(self) -> None: + """Multiple references are parsed in order of appearance.""" + refs = _parse_prompt_references("Did [[agent]] cause [[event:the breaking]]?") + + assert len(refs) == 2 + assert refs[0].label == "agent" + assert refs[0].display_text is None + assert refs[1].label == "event" + assert refs[1].display_text == "the breaking" + + +class TestAssignSpanColors: + """Tests for _assign_span_colors() and SpanColorMap.""" + + def test_same_label_same_color(self) -> None: + """Two spans with the same label receive identical colors.""" + spans = [ + Span( + span_id="s0", + segments=[SpanSegment(element_name="text", indices=[0])], + label=SpanLabel(label="agent"), + ), + Span( + span_id="s1", + segments=[SpanSegment(element_name="text", indices=[1])], + label=SpanLabel(label="agent"), + ), + ] + span_display = SpanDisplayConfig() + + color_map = _assign_span_colors(spans, span_display) + + assert color_map.light_by_span_id["s0"] == color_map.light_by_span_id["s1"] + assert color_map.dark_by_span_id["s0"] == color_map.dark_by_span_id["s1"] + + def test_different_labels_different_colors(self) -> None: + """Two spans with different labels receive different light colors.""" + spans = [ + Span( + span_id="s0", + segments=[SpanSegment(element_name="text", indices=[0])], + label=SpanLabel(label="agent"), + ), + Span( + span_id="s1", + segments=[SpanSegment(element_name="text", indices=[1])], + label=SpanLabel(label="patient"), + ), + ] + span_display = SpanDisplayConfig() + + color_map = _assign_span_colors(spans, span_display) + + assert color_map.light_by_span_id["s0"] != color_map.light_by_span_id["s1"] + + def test_unlabeled_span_gets_own_color(self) -> None: + """An unlabeled span receives its own unique color.""" + spans = [ + Span( + span_id="s0", + segments=[SpanSegment(element_name="text", indices=[0])], + label=SpanLabel(label="agent"), + ), + Span( + span_id="s1", + segments=[SpanSegment(element_name="text", indices=[1])], + label=None, + ), + ] + span_display = SpanDisplayConfig() + + color_map = _assign_span_colors(spans, span_display) + + assert "s1" in color_map.light_by_span_id + assert color_map.light_by_span_id["s1"] != color_map.light_by_span_id["s0"] + + +class TestResolvePromptReferences: + """Tests for _resolve_prompt_references().""" + + @pytest.fixture + def span_item(self) -> Item: + """Create an item with tokenized elements and spans.""" + return Item( + item_template_id=uuid4(), + rendered_elements={"text": "The boy broke the vase."}, + tokenized_elements={ + "text": ["The", "boy", "broke", "the", "vase", "."], + }, + token_space_after={"text": [True, True, True, True, False, False]}, + spans=[ + Span( + span_id="span_0", + segments=[ + SpanSegment(element_name="text", indices=[0, 1]), + ], + label=SpanLabel(label="breaker"), + ), + Span( + span_id="span_1", + segments=[ + SpanSegment(element_name="text", indices=[2]), + ], + label=SpanLabel(label="event"), + ), + ], + ) + + @pytest.fixture + def color_map(self, span_item: Item) -> SpanColorMap: + """Assign colors to the span_item's spans.""" + span_display = SpanDisplayConfig() + return _assign_span_colors(span_item.spans, span_display) + + def test_no_refs_backward_compat( + self, span_item: Item, color_map: SpanColorMap + ) -> None: + """Prompt without references is returned unchanged.""" + result = _resolve_prompt_references("How natural?", span_item, color_map) + + assert result == "How natural?" + + def test_auto_fill_produces_html( + self, span_item: Item, color_map: SpanColorMap + ) -> None: + """Auto-fill reference produces highlighted HTML with span text.""" + result = _resolve_prompt_references( + "Did [[breaker]] do it?", span_item, color_map + ) + + assert "bead-q-highlight" in result + assert "bead-q-chip" in result + assert "breaker" in result + assert "The boy" in result + + def test_explicit_text_produces_html( + self, span_item: Item, color_map: SpanColorMap + ) -> None: + """Explicit text reference renders the specified text with label.""" + result = _resolve_prompt_references( + "Did [[event:the breaking]] happen?", span_item, color_map + ) + + assert "the breaking" in result + assert "event" in result + assert "bead-q-highlight" in result + + def test_nonexistent_label_raises_value_error( + self, span_item: Item, color_map: SpanColorMap + ) -> None: + """Reference to a nonexistent label raises ValueError.""" + with pytest.raises(ValueError, match="nonexistent"): + _resolve_prompt_references( + "Did [[nonexistent]] do it?", span_item, color_map + ) + + def test_color_consistency(self, span_item: Item, color_map: SpanColorMap) -> None: + """Resolved HTML uses the same colors as the color map.""" + result = _resolve_prompt_references( + "Did [[breaker]] do it?", span_item, color_map + ) + + expected_light = color_map.light_by_label["breaker"] + expected_dark = color_map.dark_by_label["breaker"] + + assert expected_light in result + assert expected_dark in result + + def test_same_label_twice(self, span_item: Item, color_map: SpanColorMap) -> None: + """Two references to the same label use the same background color.""" + result = _resolve_prompt_references( + "Did [[breaker]] meet [[breaker:him]]?", span_item, color_map + ) + + expected_light = color_map.light_by_label["breaker"] + assert result.count(expected_light) == 2 diff --git a/tests/items/test_span_labeling.py b/tests/items/test_span_labeling.py new file mode 100644 index 0000000..98ccc6f --- /dev/null +++ b/tests/items/test_span_labeling.py @@ -0,0 +1,310 @@ +"""Tests for span labeling item creation utilities.""" + +from __future__ import annotations + +from uuid import uuid4 + +import pytest + +from bead.items.item import Item +from bead.items.span_labeling import ( + add_spans_to_item, + create_interactive_span_item, + create_span_item, + create_span_items_from_texts, + tokenize_item, +) +from bead.items.spans import ( + Span, + SpanLabel, + SpanSegment, +) +from bead.tokenization.config import TokenizerConfig + + +class TestCreateSpanItem: + """Test create_span_item() function.""" + + def test_create_basic(self) -> None: + """Test creating a basic span item.""" + spans = [ + Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0, 1])], + label=SpanLabel(label="Person"), + ), + ] + + item = create_span_item( + text="John Smith is here.", + spans=spans, + prompt="Identify the entities.", + tokenizer_config=TokenizerConfig(backend="whitespace"), + ) + + assert isinstance(item, Item) + assert item.rendered_elements["text"] == "John Smith is here." + assert item.rendered_elements["prompt"] == "Identify the entities." + assert len(item.spans) == 1 + assert item.tokenized_elements["text"] == ["John", "Smith", "is", "here."] + + def test_with_pre_tokenized(self) -> None: + """Test creating span item with pre-tokenized text.""" + tokens = ["John", "Smith", "is", "here", "."] + spans = [ + Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0, 1])], + label=SpanLabel(label="Person"), + ), + ] + + item = create_span_item( + text="John Smith is here.", + spans=spans, + prompt="Identify the entities.", + tokens=tokens, + ) + + assert item.tokenized_elements["text"] == tokens + + def test_empty_text_raises(self) -> None: + """Test that empty text raises error.""" + with pytest.raises(ValueError, match="text cannot be empty"): + create_span_item(text="", spans=[], prompt="Test") + + def test_invalid_span_index_raises(self) -> None: + """Test that out-of-bounds span index raises error.""" + spans = [ + Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[99])], + ), + ] + + with pytest.raises(ValueError, match="index 99"): + create_span_item( + text="Short text.", + spans=spans, + prompt="Test", + tokenizer_config=TokenizerConfig(backend="whitespace"), + ) + + def test_with_labels(self) -> None: + """Test creating span item with label set.""" + item = create_span_item( + text="The cat sat.", + spans=[], + prompt="Label spans.", + labels=["Person", "Location"], + tokenizer_config=TokenizerConfig(backend="whitespace"), + ) + + assert isinstance(item, Item) + + def test_with_metadata(self) -> None: + """Test creating span item with metadata.""" + item = create_span_item( + text="Hello world.", + spans=[], + prompt="Test", + metadata={"source": "test"}, + tokenizer_config=TokenizerConfig(backend="whitespace"), + ) + + assert item.item_metadata["source"] == "test" + + +class TestCreateInteractiveSpanItem: + """Test create_interactive_span_item() function.""" + + def test_create_basic(self) -> None: + """Test creating interactive span item.""" + item = create_interactive_span_item( + text="The cat sat on the mat.", + prompt="Select all entities.", + tokenizer_config=TokenizerConfig(backend="whitespace"), + ) + + assert isinstance(item, Item) + assert item.spans == [] # No pre-defined spans + assert "text" in item.tokenized_elements + + def test_with_label_set(self) -> None: + """Test interactive item with fixed label set.""" + item = create_interactive_span_item( + text="Hello world.", + prompt="Select spans.", + label_set=["PER", "ORG", "LOC"], + tokenizer_config=TokenizerConfig(backend="whitespace"), + ) + + assert isinstance(item, Item) + + def test_empty_text_raises(self) -> None: + """Test that empty text raises error.""" + with pytest.raises(ValueError, match="text cannot be empty"): + create_interactive_span_item(text="", prompt="Test") + + +class TestAddSpansToItem: + """Test add_spans_to_item() function.""" + + def test_add_to_ordinal_item(self) -> None: + """Test adding spans to an ordinal scale item.""" + # Create base ordinal item + item = Item( + item_template_id=uuid4(), + rendered_elements={"text": "The cat sat.", "prompt": "Rate this."}, + item_metadata={"scale_min": 1, "scale_max": 7}, + ) + + spans = [ + Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[1])], + label=SpanLabel(label="Entity"), + ), + ] + + result = add_spans_to_item( + item, + spans, + tokenizer_config=TokenizerConfig(backend="whitespace"), + ) + + assert len(result.spans) == 1 + assert result.item_metadata["scale_min"] == 1 # preserved + assert result.rendered_elements["text"] == "The cat sat." # preserved + + def test_add_to_already_tokenized(self) -> None: + """Test adding spans to already tokenized item.""" + item = Item( + item_template_id=uuid4(), + rendered_elements={"text": "Hello world"}, + tokenized_elements={"text": ["Hello", "world"]}, + token_space_after={"text": [True, False]}, + ) + + spans = [ + Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0])], + ), + ] + + result = add_spans_to_item(item, spans) + + assert len(result.spans) == 1 + # Token data preserved + assert result.tokenized_elements["text"] == ["Hello", "world"] + + def test_preserves_existing_fields(self) -> None: + """Test that adding spans preserves all existing fields.""" + template_id = uuid4() + item = Item( + item_template_id=template_id, + rendered_elements={"text": "Test text"}, + options=["A", "B"], + item_metadata={"key": "value"}, + ) + + result = add_spans_to_item( + item, + spans=[], + tokenizer_config=TokenizerConfig(backend="whitespace"), + ) + + assert result.item_template_id == template_id + assert result.options == ["A", "B"] + assert result.item_metadata["key"] == "value" + + def test_invalid_span_raises(self) -> None: + """Test that invalid span index raises error.""" + item = Item( + item_template_id=uuid4(), + rendered_elements={"text": "Hi"}, + tokenized_elements={"text": ["Hi"]}, + ) + + spans = [ + Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[99])], + ), + ] + + with pytest.raises(ValueError, match="index 99"): + add_spans_to_item(item, spans) + + +class TestTokenizeItem: + """Test tokenize_item() function.""" + + def test_whitespace_tokenizer(self) -> None: + """Test tokenizing with whitespace backend.""" + item = Item( + item_template_id=uuid4(), + rendered_elements={"text": "Hello world"}, + ) + + result = tokenize_item(item, TokenizerConfig(backend="whitespace")) + + assert result.tokenized_elements["text"] == ["Hello", "world"] + assert result.token_space_after["text"] == [True, False] + + def test_multiple_elements(self) -> None: + """Test tokenizing item with multiple rendered elements.""" + item = Item( + item_template_id=uuid4(), + rendered_elements={ + "context": "The cat sat.", + "target": "The dog ran.", + }, + ) + + result = tokenize_item(item, TokenizerConfig(backend="whitespace")) + + assert "context" in result.tokenized_elements + assert "target" in result.tokenized_elements + assert result.tokenized_elements["context"] == ["The", "cat", "sat."] + assert result.tokenized_elements["target"] == ["The", "dog", "ran."] + + def test_default_config(self) -> None: + """Test tokenizing with default config.""" + item = Item( + item_template_id=uuid4(), + rendered_elements={"text": "Hello"}, + ) + + # Should not raise (uses spacy by default, or falls back) + result = tokenize_item(item) + assert "text" in result.tokenized_elements + + +class TestCreateSpanItemsFromTexts: + """Test create_span_items_from_texts() function.""" + + def test_batch_create(self) -> None: + """Test batch creating span items.""" + + def extractor(text: str, tokens: list[str]) -> list[Span]: + return [ + Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0])], + label=SpanLabel(label="First"), + ), + ] + + items = create_span_items_from_texts( + texts=["Hello world.", "Goodbye world."], + span_extractor=extractor, + prompt="Label first word.", + tokenizer_config=TokenizerConfig(backend="whitespace"), + ) + + assert len(items) == 2 + assert all(len(item.spans) == 1 for item in items) + assert items[0].rendered_elements["text"] == "Hello world." + assert items[1].rendered_elements["text"] == "Goodbye world." diff --git a/tests/items/test_spans.py b/tests/items/test_spans.py new file mode 100644 index 0000000..90706e2 --- /dev/null +++ b/tests/items/test_spans.py @@ -0,0 +1,400 @@ +"""Tests for span annotation models.""" + +from __future__ import annotations + +from uuid import uuid4 + +import pytest + +from bead.items.item import Item +from bead.items.spans import ( + Span, + SpanLabel, + SpanRelation, + SpanSegment, + SpanSpec, +) + + +class TestSpanSegment: + """Test SpanSegment model.""" + + def test_create(self) -> None: + """Test creating a SpanSegment.""" + segment = SpanSegment(element_name="text", indices=[0, 1, 2]) + + assert segment.element_name == "text" + assert segment.indices == [0, 1, 2] + + def test_empty_element_name_raises(self) -> None: + """Test that empty element_name raises error.""" + with pytest.raises(ValueError, match="element_name cannot be empty"): + SpanSegment(element_name="", indices=[0]) + + def test_empty_indices_raises(self) -> None: + """Test that empty indices raises error.""" + with pytest.raises(ValueError, match="indices cannot be empty"): + SpanSegment(element_name="text", indices=[]) + + def test_negative_indices_raises(self) -> None: + """Test that negative indices raises error.""" + with pytest.raises(ValueError, match="indices must be non-negative"): + SpanSegment(element_name="text", indices=[-1, 0]) + + def test_discontiguous_indices(self) -> None: + """Test discontiguous indices are valid.""" + segment = SpanSegment(element_name="text", indices=[0, 2, 5]) + assert segment.indices == [0, 2, 5] + + +class TestSpanLabel: + """Test SpanLabel model.""" + + def test_create_basic(self) -> None: + """Test creating a basic SpanLabel.""" + label = SpanLabel(label="Person") + + assert label.label == "Person" + assert label.label_id is None + assert label.confidence is None + + def test_create_with_id(self) -> None: + """Test creating SpanLabel with external ID.""" + label = SpanLabel(label="human", label_id="Q5") + + assert label.label == "human" + assert label.label_id == "Q5" + + def test_create_with_confidence(self) -> None: + """Test creating SpanLabel with confidence.""" + label = SpanLabel(label="Person", confidence=0.95) + + assert label.confidence == 0.95 + + def test_empty_label_raises(self) -> None: + """Test that empty label raises error.""" + with pytest.raises(ValueError, match="label cannot be empty"): + SpanLabel(label="") + + +class TestSpan: + """Test Span model.""" + + def test_create_basic(self) -> None: + """Test creating a basic Span.""" + span = Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0, 1])], + ) + + assert span.span_id == "span_0" + assert len(span.segments) == 1 + assert span.label is None + assert span.head_index is None + + def test_create_with_label(self) -> None: + """Test creating Span with label.""" + span = Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0, 1])], + label=SpanLabel(label="Person"), + ) + + assert span.label is not None + assert span.label.label == "Person" + + def test_discontiguous_segments(self) -> None: + """Test span with discontiguous segments.""" + span = Span( + span_id="span_0", + segments=[ + SpanSegment(element_name="text", indices=[0, 1]), + SpanSegment(element_name="text", indices=[5, 6]), + ], + ) + + assert len(span.segments) == 2 + + def test_cross_element_segments(self) -> None: + """Test span with segments across elements.""" + span = Span( + span_id="span_0", + segments=[ + SpanSegment(element_name="context", indices=[0, 1]), + SpanSegment(element_name="target", indices=[2, 3]), + ], + ) + + assert span.segments[0].element_name == "context" + assert span.segments[1].element_name == "target" + + def test_with_metadata(self) -> None: + """Test span with metadata.""" + span = Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0])], + span_metadata={"source": "manual"}, + ) + + assert span.span_metadata["source"] == "manual" + + def test_empty_span_id_raises(self) -> None: + """Test that empty span_id raises error.""" + with pytest.raises(ValueError, match="span_id cannot be empty"): + Span(span_id="") + + +class TestSpanRelation: + """Test SpanRelation model.""" + + def test_create_directed(self) -> None: + """Test creating a directed relation.""" + rel = SpanRelation( + relation_id="rel_0", + source_span_id="span_0", + target_span_id="span_1", + label=SpanLabel(label="agent-of"), + ) + + assert rel.relation_id == "rel_0" + assert rel.directed is True + assert rel.label is not None + assert rel.label.label == "agent-of" + + def test_create_undirected(self) -> None: + """Test creating an undirected relation.""" + rel = SpanRelation( + relation_id="rel_0", + source_span_id="span_0", + target_span_id="span_1", + directed=False, + ) + + assert rel.directed is False + + def test_with_wikidata_label(self) -> None: + """Test relation with Wikidata label_id.""" + rel = SpanRelation( + relation_id="rel_0", + source_span_id="span_0", + target_span_id="span_1", + label=SpanLabel(label="instance of", label_id="P31"), + ) + + assert rel.label is not None + assert rel.label.label_id == "P31" + + def test_empty_relation_id_raises(self) -> None: + """Test that empty relation_id raises error.""" + with pytest.raises(ValueError, match="relation_id cannot be empty"): + SpanRelation( + relation_id="", + source_span_id="span_0", + target_span_id="span_1", + ) + + def test_empty_span_id_raises(self) -> None: + """Test that empty source/target span_id raises error.""" + with pytest.raises(ValueError, match="span ID cannot be empty"): + SpanRelation( + relation_id="rel_0", + source_span_id="", + target_span_id="span_1", + ) + + +class TestSpanOnItem: + """Test span fields on Item model.""" + + def test_item_with_no_spans(self) -> None: + """Test item defaults have empty span fields.""" + item = Item(item_template_id=uuid4()) + + assert item.spans == [] + assert item.span_relations == [] + assert item.tokenized_elements == {} + assert item.token_space_after == {} + + def test_item_with_spans(self) -> None: + """Test item with span annotations.""" + span = Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0, 1])], + label=SpanLabel(label="Person"), + ) + + item = Item( + item_template_id=uuid4(), + rendered_elements={"text": "The cat"}, + spans=[span], + tokenized_elements={"text": ["The", "cat"]}, + token_space_after={"text": [True, False]}, + ) + + assert len(item.spans) == 1 + assert item.spans[0].span_id == "span_0" + assert item.tokenized_elements["text"] == ["The", "cat"] + + def test_item_with_relations(self) -> None: + """Test item with span relations.""" + spans = [ + Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0])], + ), + Span( + span_id="span_1", + segments=[SpanSegment(element_name="text", indices=[2])], + ), + ] + + rel = SpanRelation( + relation_id="rel_0", + source_span_id="span_0", + target_span_id="span_1", + label=SpanLabel(label="agent-of"), + ) + + item = Item( + item_template_id=uuid4(), + rendered_elements={"text": "The cat chased the mouse"}, + spans=spans, + span_relations=[rel], + tokenized_elements={"text": ["The", "cat", "chased", "the", "mouse"]}, + ) + + assert len(item.span_relations) == 1 + assert item.span_relations[0].source_span_id == "span_0" + + def test_relation_invalid_span_id_raises(self) -> None: + """Test that relation referencing invalid span_id raises error.""" + spans = [ + Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0])], + ), + ] + + rel = SpanRelation( + relation_id="rel_0", + source_span_id="span_0", + target_span_id="span_99", # does not exist + ) + + with pytest.raises(ValueError, match="target_span_id 'span_99' not found"): + Item( + item_template_id=uuid4(), + spans=spans, + span_relations=[rel], + ) + + def test_relations_without_spans_raises(self) -> None: + """Test that relations without any spans raises error.""" + rel = SpanRelation( + relation_id="rel_0", + source_span_id="span_0", + target_span_id="span_1", + ) + + with pytest.raises(ValueError, match="has span_relations but no spans"): + Item( + item_template_id=uuid4(), + span_relations=[rel], + ) + + def test_serialization_round_trip(self) -> None: + """Test Item with spans serializes and deserializes correctly.""" + span = Span( + span_id="span_0", + segments=[SpanSegment(element_name="text", indices=[0, 1])], + label=SpanLabel(label="Person", label_id="Q5"), + ) + + item = Item( + item_template_id=uuid4(), + rendered_elements={"text": "John Smith"}, + spans=[span], + tokenized_elements={"text": ["John", "Smith"]}, + token_space_after={"text": [True, False]}, + ) + + # Serialize and deserialize + data = item.model_dump() + restored = Item(**data) + + assert len(restored.spans) == 1 + assert restored.spans[0].span_id == "span_0" + assert restored.spans[0].label is not None + assert restored.spans[0].label.label == "Person" + assert restored.spans[0].label.label_id == "Q5" + assert restored.tokenized_elements == {"text": ["John", "Smith"]} + assert restored.token_space_after == {"text": [True, False]} + + +class TestSpanSpec: + """Test SpanSpec model.""" + + def test_default_values(self) -> None: + """Test SpanSpec default values.""" + spec = SpanSpec() + + assert spec.index_mode == "token" + assert spec.interaction_mode == "static" + assert spec.label_source == "fixed" + assert spec.labels is None + assert spec.allow_overlapping is True + assert spec.enable_relations is False + assert spec.wikidata_language == "en" + assert spec.wikidata_result_limit == 10 + + def test_interactive_with_labels(self) -> None: + """Test interactive span spec with fixed labels.""" + spec = SpanSpec( + interaction_mode="interactive", + label_source="fixed", + labels=["Person", "Organization", "Location"], + min_spans=1, + max_spans=10, + ) + + assert spec.interaction_mode == "interactive" + assert spec.labels == ["Person", "Organization", "Location"] + assert spec.min_spans == 1 + assert spec.max_spans == 10 + + def test_wikidata_config(self) -> None: + """Test Wikidata label source configuration.""" + spec = SpanSpec( + label_source="wikidata", + wikidata_language="de", + wikidata_entity_types=["item"], + wikidata_result_limit=20, + ) + + assert spec.label_source == "wikidata" + assert spec.wikidata_language == "de" + assert spec.wikidata_entity_types == ["item"] + + def test_relation_config(self) -> None: + """Test relation annotation configuration.""" + spec = SpanSpec( + enable_relations=True, + relation_label_source="fixed", + relation_labels=["agent-of", "patient-of"], + relation_directed=True, + min_relations=0, + max_relations=5, + ) + + assert spec.enable_relations is True + assert spec.relation_labels == ["agent-of", "patient-of"] + assert spec.relation_directed is True + + def test_label_colors(self) -> None: + """Test label color configuration.""" + spec = SpanSpec( + labels=["PER", "ORG"], + label_colors={"PER": "#FF0000", "ORG": "#00FF00"}, + ) + + assert spec.label_colors == {"PER": "#FF0000", "ORG": "#00FF00"} diff --git a/tests/tokenization/__init__.py b/tests/tokenization/__init__.py new file mode 100644 index 0000000..e1cf4f4 --- /dev/null +++ b/tests/tokenization/__init__.py @@ -0,0 +1 @@ +"""Tokenization test package.""" diff --git a/tests/tokenization/test_tokenizers.py b/tests/tokenization/test_tokenizers.py new file mode 100644 index 0000000..30ff1ab --- /dev/null +++ b/tests/tokenization/test_tokenizers.py @@ -0,0 +1,198 @@ +"""Tests for tokenizer implementations.""" + +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from bead.tokenization.config import TokenizerConfig +from bead.tokenization.tokenizers import ( + DisplayToken, + TokenizedText, + WhitespaceTokenizer, + create_tokenizer, +) + + +class TestWhitespaceTokenizer: + """Test WhitespaceTokenizer.""" + + def test_simple_sentence(self) -> None: + """Test tokenizing a simple English sentence.""" + tokenizer = WhitespaceTokenizer() + result = tokenizer("The cat sat on the mat.") + + assert isinstance(result, TokenizedText) + assert result.token_texts == ["The", "cat", "sat", "on", "the", "mat."] + + def test_empty_string(self) -> None: + """Test tokenizing empty string.""" + tokenizer = WhitespaceTokenizer() + result = tokenizer("") + + assert result.tokens == [] + assert result.token_texts == [] + + def test_single_word(self) -> None: + """Test tokenizing single word.""" + tokenizer = WhitespaceTokenizer() + result = tokenizer("Hello") + + assert result.token_texts == ["Hello"] + assert result.tokens[0].space_after is False + + def test_space_after_flags(self) -> None: + """Test space_after flags are correct.""" + tokenizer = WhitespaceTokenizer() + result = tokenizer("The cat sat.") + + assert result.tokens[0].space_after is True # "The " + assert result.tokens[1].space_after is True # "cat " + assert result.tokens[2].space_after is False # "sat." (end) + + def test_multiple_spaces(self) -> None: + """Test handling of multiple spaces.""" + tokenizer = WhitespaceTokenizer() + result = tokenizer("The cat") + + # Whitespace tokenizer treats any whitespace as delimiter + assert len(result.tokens) == 2 + + def test_character_offsets(self) -> None: + """Test character offsets are correct.""" + tokenizer = WhitespaceTokenizer() + result = tokenizer("The cat") + + assert result.tokens[0].start_char == 0 + assert result.tokens[0].end_char == 3 + assert result.tokens[1].start_char == 4 + assert result.tokens[1].end_char == 7 + + def test_round_trip(self) -> None: + """Test that render() reproduces the original text.""" + tokenizer = WhitespaceTokenizer() + text = "The cat sat on the mat." + result = tokenizer(text) + + assert result.render() == text + + def test_round_trip_trailing_space(self) -> None: + """Test round trip strips trailing space.""" + tokenizer = WhitespaceTokenizer() + result = tokenizer("Hello world") + + assert result.render() == "Hello world" + + def test_pre_tokenized(self) -> None: + """Test with pre-tokenized text (tab-separated).""" + tokenizer = WhitespaceTokenizer() + result = tokenizer("word1\tword2\tword3") + + assert len(result.tokens) == 3 + + +class TestDisplayToken: + """Test DisplayToken model.""" + + def test_create(self) -> None: + """Test creating a DisplayToken.""" + token = DisplayToken( + text="hello", + space_after=True, + start_char=0, + end_char=5, + ) + + assert token.text == "hello" + assert token.space_after is True + assert token.start_char == 0 + assert token.end_char == 5 + + def test_default_space_after(self) -> None: + """Test default space_after is True.""" + token = DisplayToken(text="hello", start_char=0, end_char=5) + assert token.space_after is True + + +class TestTokenizedText: + """Test TokenizedText model.""" + + def test_token_texts(self) -> None: + """Test token_texts property.""" + result = TokenizedText( + tokens=[ + DisplayToken(text="The", start_char=0, end_char=3), + DisplayToken(text="cat", start_char=4, end_char=7), + ], + original_text="The cat", + ) + + assert result.token_texts == ["The", "cat"] + + def test_space_after_flags(self) -> None: + """Test space_after_flags property.""" + result = TokenizedText( + tokens=[ + DisplayToken(text="The", space_after=True, start_char=0, end_char=3), + DisplayToken(text="cat", space_after=False, start_char=4, end_char=7), + ], + original_text="The cat", + ) + + assert result.space_after_flags == [True, False] + + def test_render(self) -> None: + """Test render reconstructs text.""" + result = TokenizedText( + tokens=[ + DisplayToken(text="The", space_after=True, start_char=0, end_char=3), + DisplayToken(text="cat", space_after=True, start_char=4, end_char=7), + DisplayToken(text="sat.", space_after=False, start_char=8, end_char=12), + ], + original_text="The cat sat.", + ) + + assert result.render() == "The cat sat." + + def test_render_no_trailing_space(self) -> None: + """Test render strips trailing spaces.""" + result = TokenizedText( + tokens=[ + DisplayToken(text="hello", space_after=True, start_char=0, end_char=5), + ], + original_text="hello ", + ) + + assert result.render() == "hello" + + +class TestCreateTokenizer: + """Test create_tokenizer factory.""" + + def test_whitespace_backend(self) -> None: + """Test creating whitespace tokenizer.""" + config = TokenizerConfig(backend="whitespace") + tokenizer = create_tokenizer(config) + + result = tokenizer("Hello world") + assert result.token_texts == ["Hello", "world"] + + def test_unknown_backend_raises(self) -> None: + """Test that unknown backend raises ValueError.""" + # Pydantic validation will reject invalid Literal values + with pytest.raises(ValidationError): + TokenizerConfig(backend="unknown") + + def test_spacy_backend_without_install(self) -> None: + """Test that spaCy backend works or raises ImportError gracefully.""" + config = TokenizerConfig(backend="spacy", language="en") + tokenizer = create_tokenizer(config) + # Just test that the factory returns something callable + assert callable(tokenizer) + + def test_default_config(self) -> None: + """Test default config uses spacy.""" + config = TokenizerConfig() + assert config.backend == "spacy" + assert config.language == "en" + assert config.model_name is None diff --git a/uv.lock b/uv.lock index f4dbba9..6dde50f 100644 --- a/uv.lock +++ b/uv.lock @@ -177,7 +177,7 @@ wheels = [ [[package]] name = "bead" -version = "0.1.0" +version = "0.2.0" source = { editable = "." } dependencies = [ { name = "accelerate" }, @@ -231,6 +231,10 @@ dev = [ stats = [ { name = "statsmodels" }, ] +tokenization = [ + { name = "spacy" }, + { name = "stanza" }, +] training = [ { name = "pytorch-lightning" }, { name = "tensorboard" }, @@ -275,6 +279,8 @@ requires-dist = [ { name = "scipy", specifier = ">=1.11.0" }, { name = "sentence-transformers", specifier = ">=2.0.0" }, { name = "slopit", marker = "extra == 'behavioral-analysis'", specifier = ">=0.1.0" }, + { name = "spacy", marker = "extra == 'tokenization'", specifier = ">=3.7" }, + { name = "stanza", marker = "extra == 'tokenization'", specifier = ">=1.8" }, { name = "statsmodels", specifier = ">=0.14.6" }, { name = "statsmodels", marker = "extra == 'stats'", specifier = ">=0.14.0" }, { name = "tensorboard", marker = "extra == 'training'", specifier = ">=2.13.0" }, @@ -285,7 +291,7 @@ requires-dist = [ { name = "unimorph", specifier = ">=0.0.4" }, { name = "uuid-utils", specifier = ">=0.7.0" }, ] -provides-extras = ["dev", "api", "training", "stats", "ui", "behavioral-analysis"] +provides-extras = ["dev", "api", "training", "stats", "ui", "behavioral-analysis", "tokenization"] [[package]] name = "black" @@ -616,6 +622,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" }, ] +[[package]] +name = "emoji" +version = "2.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/78/0d2db9382c92a163d7095fc08efff7800880f830a152cfced40161e7638d/emoji-2.15.0.tar.gz", hash = "sha256:eae4ab7d86456a70a00a985125a03263a5eac54cd55e51d7e184b1ed3b6757e4", size = 615483, upload-time = "2025-09-21T12:13:02.755Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/5e/4b5aaaabddfacfe36ba7768817bd1f71a7a810a43705e531f3ae4c690767/emoji-2.15.0-py3-none-any.whl", hash = "sha256:205296793d66a89d88af4688fa57fd6496732eb48917a87175a023c8138995eb", size = 608433, upload-time = "2025-09-21T12:13:01.197Z" }, +] + [[package]] name = "evaluate" version = "0.4.6" @@ -2946,6 +2961,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/95/81/6ea10ef6228ce4438a240c803639f7ccf5eae3469fbc015f33bd84aa8df1/srsly-2.5.2-cp314-cp314t-win_amd64.whl", hash = "sha256:8e2b9058623c44b07441eb0d711dfdf6302f917f0634d0a294cae37578dcf899", size = 676105, upload-time = "2025-11-17T14:10:43.633Z" }, ] +[[package]] +name = "stanza" +version = "1.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "emoji" }, + { name = "networkx" }, + { name = "numpy" }, + { name = "protobuf" }, + { name = "requests" }, + { name = "torch" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/07/e5/acd22862a75f424d98bb690fec9ab292da6e797cab367fa8fa451c547637/stanza-1.11.0.tar.gz", hash = "sha256:42ba9d4752e74c4e1e6fc2ca96e98bb8fa194049782cc35fde2a5118fd5f75ab", size = 1484551, upload-time = "2025-10-05T06:44:03.665Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/47/c6f8dd24ca100f6c260209b27be4d2e0ae68f13d4b2b4b1b343876c9e765/stanza-1.11.0-py3-none-any.whl", hash = "sha256:3a0bcf24830e32e88f6d0cff1e757661e53ed1b60149fa7f72211d61c6dab063", size = 1706081, upload-time = "2025-10-05T06:43:59.247Z" }, +] + [[package]] name = "statsmodels" version = "0.14.6" @@ -3136,6 +3169,7 @@ dependencies = [ { name = "typing-extensions" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/1b/af5fccb50c341bd69dc016769503cb0857c1423fbe9343410dfeb65240f2/torch-2.10.0-1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7350f6652dfd761f11f9ecb590bfe95b573e2961f7a242eccb3c8e78348d26fe", size = 79498248, upload-time = "2026-02-06T17:37:31.982Z" }, { url = "https://files.pythonhosted.org/packages/c9/6f/f2e91e34e3fcba2e3fc8d8f74e7d6c22e74e480bbd1db7bc8900fdf3e95c/torch-2.10.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5c4d217b14741e40776dd7074d9006fd28b8a97ef5654db959d8635b2fe5f29b", size = 146004247, upload-time = "2026-01-21T16:24:29.335Z" }, { url = "https://files.pythonhosted.org/packages/98/fb/5160261aeb5e1ee12ee95fe599d0541f7c976c3701d607d8fc29e623229f/torch-2.10.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6b71486353fce0f9714ca0c9ef1c850a2ae766b409808acd58e9678a3edb7738", size = 915716445, upload-time = "2026-01-21T16:22:45.353Z" }, { url = "https://files.pythonhosted.org/packages/6a/16/502fb1b41e6d868e8deb5b0e3ae926bbb36dab8ceb0d1b769b266ad7b0c3/torch-2.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:c2ee399c644dc92ef7bc0d4f7e74b5360c37cdbe7c5ba11318dda49ffac2bc57", size = 113757050, upload-time = "2026-01-21T16:24:19.204Z" },