Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 48 additions & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,57 @@ permissions:
contents: write

jobs:
release:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: actions/setup-python@v5
with:
python-version: "3.13"

- name: Install build dependencies
run: pip install build

- name: Build package
run: python -m build

- name: Upload dist artifacts
uses: actions/upload-artifact@v4
with:
name: dist
path: dist/

publish:
needs: build
runs-on: ubuntu-latest
environment: pypi
permissions:
id-token: write
steps:
- name: Download dist artifacts
uses: actions/download-artifact@v4
with:
name: dist
path: dist/

- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1

github-release:
needs: build
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- uses: actions/checkout@v4

- name: Download dist artifacts
uses: actions/download-artifact@v4
with:
name: dist
path: dist/

- name: Extract changelog for this version
id: changelog
run: |
Expand All @@ -26,3 +72,4 @@ jobs:
uses: softprops/action-gh-release@v2
with:
body_path: release_notes.md
files: dist/*
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ build/
.mypy_cache/
.ruff_cache/
.coverage
uv.lock
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
# Changelog

## Unreleased

### Added

- `[[tool.check-unicode.overrides]]` per-file config: apply different
allow-lists, severity, and confusable settings per file pattern
- Per-file severity: override `severity` to `"warning"` for specific file
patterns so findings don't affect exit code
- Per-file confusable toggle: enable or disable `check-confusables` per file
pattern
- `uv.lock` added to `.gitignore`

## 0.3.3 - 2026-02-23

### Fixed
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,14 @@ classifiers = [
]
dynamic = [ "version" ]
optional-dependencies.dev = [ "bump-my-version", "mypy", "pytest", "pytest-cov", "ruff" ]
urls.Changelog = "https://github.com/mit-d/check-unicode/blob/main/CHANGELOG.md"
urls.Issues = "https://github.com/mit-d/check-unicode/issues"
urls.Repository = "https://github.com/mit-d/check-unicode"
scripts.check-unicode = "check_unicode.main:main"

[tool.hatch]
build.targets.wheel.packages = [ "src/check_unicode" ]
build.targets.sdist.include = [ "docs/check-unicode.1" ]
build.targets.sdist.include = [ "src/", "docs/check-unicode.1" ]
version.path = "src/check_unicode/__init__.py"

[tool.ruff]
Expand Down
148 changes: 139 additions & 9 deletions src/check_unicode/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import sys
import textwrap
import tomllib
from dataclasses import dataclass
from pathlib import Path
from typing import Any

Expand Down Expand Up @@ -54,6 +55,20 @@
}


@dataclass(frozen=True)
class Override:
"""Per-file override from [[tool.check-unicode.overrides]]."""

patterns: tuple[str, ...]
codepoints: frozenset[int]
ranges: tuple[tuple[int, int], ...]
categories: frozenset[str]
printable: bool | None # None = inherit global
scripts: frozenset[str]
severity: str | None # None = inherit global
check_confusables: bool | None # None = inherit global


def _parse_codepoint(s: str) -> int:
"""Parse 'U+XXXX' or '0xXXXX' into an integer codepoint."""
s = s.strip()
Expand Down Expand Up @@ -427,19 +442,129 @@ def _build_exclude_patterns(
return patterns


def _build_overrides(config: dict[str, Any]) -> tuple[Override, ...]:
"""Parse [[overrides]] entries from the config into Override objects."""
raw = config.get("overrides", [])
overrides: list[Override] = []
for entry in raw:
if "files" not in entry:
msg = "Each [[overrides]] entry must have a 'files' key"
raise ValueError(msg)
patterns = tuple(entry["files"])
codepoints, ranges, categories, printable_val, scripts = _allow_from_config(
entry
)
# For override, printable is None when not set (inherit global)
printable: bool | None = (
True if printable_val else None if "allow-printable" not in entry else False
)
severity: str | None = entry.get("severity")
check_confusables: bool | None = entry.get("check-confusables")
overrides.append(
Override(
patterns=patterns,
codepoints=frozenset(codepoints),
ranges=tuple(ranges),
categories=frozenset(categories),
printable=printable,
scripts=frozenset(scripts),
severity=severity,
check_confusables=check_confusables,
)
)
return tuple(overrides)


def _file_matches_override(filepath: str, override: Override) -> bool:
"""Check whether a filepath matches any pattern in an override."""
name = Path(filepath).name
return any(
fnmatch.fnmatch(filepath, pat) or fnmatch.fnmatch(name, pat)
for pat in override.patterns
)


def _resolve_allow_for_file(
filepath: str,
base_allow: AllowConfig,
overrides: tuple[Override, ...],
) -> AllowConfig:
"""Merge matching overrides onto the base AllowConfig for a file."""
codepoints = set(base_allow.codepoints)
ranges = list(base_allow.ranges)
categories = set(base_allow.categories)
printable = base_allow.printable
scripts = set(base_allow.scripts)

for ovr in overrides:
if not _file_matches_override(filepath, ovr):
continue
codepoints |= ovr.codepoints
ranges.extend(ovr.ranges)
categories |= ovr.categories
if ovr.printable is not None:
printable = ovr.printable
scripts |= ovr.scripts

return AllowConfig(
codepoints=frozenset(codepoints),
ranges=tuple(ranges),
categories=frozenset(categories),
printable=printable,
scripts=frozenset(scripts),
)


def _resolve_file_settings(
filepath: str,
global_severity: str,
*,
global_confusables: bool,
overrides: tuple[Override, ...],
) -> tuple[str, bool]:
"""Return (severity, do_confusables) for a file after applying overrides."""
severity = global_severity
do_confusables = global_confusables
for ovr in overrides:
if not _file_matches_override(filepath, ovr):
continue
if ovr.severity is not None:
severity = ovr.severity
if ovr.check_confusables is not None:
do_confusables = ovr.check_confusables
return severity, do_confusables


def _scan_files(
files: list[str],
allow: AllowConfig,
overrides: tuple[Override, ...],
*,
do_confusables: bool,
) -> list[Finding]:
"""Scan files for non-ASCII and (optionally) confusable characters."""
severity: str,
) -> tuple[list[Finding], bool]:
"""Scan files for non-ASCII and (optionally) confusable characters.

Returns (findings, has_errors) where has_errors is True if any finding
came from a file whose effective severity is "error".
"""
findings: list[Finding] = []
has_errors = False
for filepath in files:
findings.extend(check_file(filepath, allow))
if do_confusables:
findings.extend(check_confusables(filepath))
return findings
file_allow = _resolve_allow_for_file(filepath, allow, overrides)
file_severity, file_confusables = _resolve_file_settings(
filepath,
severity,
global_confusables=do_confusables,
overrides=overrides,
)
file_findings = check_file(filepath, file_allow)
if file_confusables:
file_findings.extend(check_confusables(filepath))
if file_findings and file_severity == "error":
has_errors = True
findings.extend(file_findings)
return findings, has_errors


def main(argv: list[str] | None = None) -> int:
Expand All @@ -462,6 +587,7 @@ def main(argv: list[str] | None = None) -> int:
severity = args.severity or config.get("severity", "error")
allow = _build_allow_config(args, config)
do_confusables = args.check_confusables or config.get("check-confusables", False)
overrides = _build_overrides(config)

# Filter out excluded files
exclude_patterns = _build_exclude_patterns(args, config)
Expand All @@ -474,16 +600,20 @@ def main(argv: list[str] | None = None) -> int:
if args.fix:
fixed = [fix_file(filepath) for filepath in files]
any_fixed = any(fixed)
all_findings = _scan_files(files, allow, do_confusables=do_confusables)
all_findings, has_errors = _scan_files(
files, allow, overrides, do_confusables=do_confusables, severity=severity
)
if all_findings:
print_findings(all_findings, no_color=args.no_color, quiet=args.quiet)
return 1 if any_fixed or all_findings else 0

# Check mode
all_findings = _scan_files(files, allow, do_confusables=do_confusables)
all_findings, has_errors = _scan_files(
files, allow, overrides, do_confusables=do_confusables, severity=severity
)
if all_findings:
print_findings(all_findings, no_color=args.no_color, quiet=args.quiet)
return 0 if severity == "warning" else 1
return 1 if has_errors else 0

return 0

Expand Down
Loading