diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 09027305..05e67938 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,10 +57,10 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Set up Python 3.8 + - name: Set up Python uses: actions/setup-python@v4 with: - python-version: 3.8 + python-version: "3.11" - name: Install dependencies run: | @@ -112,17 +112,22 @@ jobs: package-dir: bindings/python output-dir: wheelhouse env: - CIBW_ENVIRONMENT_WINDOWS: TOKENIZER_ROOT='${{ github.workspace }}\install' - CIBW_BEFORE_ALL: bindings/python/tools/prepare_build_environment.sh + CIBW_ENVIRONMENT_LINUX: TOKENIZER_ROOT=/project/build/install ICU_ROOT=/project/icu + CIBW_ENVIRONMENT_MACOS: TOKENIZER_ROOT=${GITHUB_WORKSPACE}/build/install + CIBW_ENVIRONMENT_WINDOWS: TOKENIZER_ROOT=${GITHUB_WORKSPACE}/build/install + CIBW_BEFORE_ALL_LINUX: bindings/python/tools/prepare_build_environment_linux.sh + CIBW_BEFORE_ALL_MACOS: bindings/python/tools/prepare_build_environment_macos.sh CIBW_BEFORE_ALL_WINDOWS: bash bindings/python/tools/prepare_build_environment_windows.sh CIBW_BEFORE_BUILD: pip install pybind11==2.10.1 CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 CIBW_MANYLINUX_AARCH64_IMAGE: manylinux2014 + CIBW_BUILD: "cp310-* cp311-* cp312-*" CIBW_TEST_COMMAND: pytest {project}/bindings/python/test/test.py CIBW_TEST_REQUIRES: pytest CIBW_ARCHS: ${{ matrix.arch }} CIBW_SKIP: pp* *-musllinux_* CIBW_TEST_SKIP: "*-macosx_arm64" + CIBW_REPAIR_WHEEL_COMMAND_MACOS: "" - name: Upload Python wheels uses: actions/upload-artifact@v4 diff --git a/CHANGELOG.md b/CHANGELOG.md index 157e34fc..62aa903a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,15 @@ The project follows [semantic versioning 2.0.0](https://semver.org/). The API co ### Fixes and improvements +## [v1.38.0](https://github.com/OpenNMT/Tokenizer/releases/tag/v1.38.0) (2025-12-30) + +### Fixes and improvements + +* drop python 3.9 and under +* add python 3.12 + +### Fixes and improvements + ## [v1.37.1](https://github.com/OpenNMT/Tokenizer/releases/tag/v1.37.1) (2023-03-01) ### Fixes and improvements diff --git a/bindings/python/pyonmttok/version.py b/bindings/python/pyonmttok/version.py index 4769d763..46362d72 100644 --- a/bindings/python/pyonmttok/version.py +++ b/bindings/python/pyonmttok/version.py @@ -1,3 +1,3 @@ """Version information.""" -__version__ = "1.37.1" +__version__ = "1.38.0" diff --git a/bindings/python/setup.py b/bindings/python/setup.py index 5c2b441a..178eda86 100644 --- a/bindings/python/setup.py +++ b/bindings/python/setup.py @@ -78,12 +78,9 @@ def _maybe_add_library_root(lib_name, header_only=False): "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Topic :: Text Processing :: Linguistic", "Topic :: Software Development :: Libraries :: Python Modules", ], @@ -94,7 +91,7 @@ def _maybe_add_library_root(lib_name, header_only=False): keywords="tokenization opennmt unicode bpe sentencepiece subword", packages=find_packages(), package_data=package_data, - python_requires=">=3.6", + python_requires=">=3.10", setup_requires=["pytest-runner"], tests_require=["pytest"], ext_modules=[tokenizer_module], diff --git a/bindings/python/tools/prepare_build_environment.sh b/bindings/python/tools/prepare_build_environment.sh deleted file mode 100755 index 7ec955d3..00000000 --- a/bindings/python/tools/prepare_build_environment.sh +++ /dev/null @@ -1,48 +0,0 @@ -#! /bin/bash - -set -e -set -x - -ROOT_DIR=$PWD -ICU_ROOT=$ROOT_DIR/icu -CMAKE_EXTRA_ARGS="" - -if [ "$CIBW_ARCHS" == "arm64" ]; then - - # Download ICU ARM64 binaries from Homebrew. - brew fetch --force --bottle-tag=arm64_big_sur icu4c \ - | grep "Downloaded to" \ - | awk '{ print $3 }' \ - | xargs -I{} tar xf {} -C $ROOT_DIR - - mv icu4c/*.* $ICU_ROOT - - # Remove dynamic libraries to force static link. - rm $ICU_ROOT/lib/*.dylib - - CMAKE_EXTRA_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64" - -else - - # Download and compile ICU from sources. - ICU_VERSION=${ICU_VERSION:-73.2} - curl -L -O https://github.com/unicode-org/icu/releases/download/release-${ICU_VERSION/./-}/icu4c-${ICU_VERSION/./_}-src.tgz - tar xf icu4c-*-src.tgz - cd icu/source - CFLAGS="-O3 -fPIC" CXXFLAGS="-O3 -fPIC" ./configure --disable-shared --enable-static --prefix=$ICU_ROOT - make -j2 install - -fi - -cd $ROOT_DIR - -# Install cmake. -pip install "cmake==3.18.*" - -# Build Tokenizer. -rm -rf build -mkdir build -cd build -cmake -DLIB_ONLY=ON -DICU_ROOT=$ICU_ROOT $CMAKE_EXTRA_ARGS .. -VERBOSE=1 make -j2 install -cd $ROOT_DIR diff --git a/bindings/python/tools/prepare_build_environment_linux.sh b/bindings/python/tools/prepare_build_environment_linux.sh new file mode 100755 index 00000000..6495c328 --- /dev/null +++ b/bindings/python/tools/prepare_build_environment_linux.sh @@ -0,0 +1,29 @@ +#! /bin/bash + +set -e +set -x + +ROOT_DIR=$PWD +ICU_ROOT=$ROOT_DIR/icu +CMAKE_EXTRA_ARGS="" + +# Download and compile ICU from sources. +ICU_VERSION=${ICU_VERSION:-73.2} +curl -L -O https://github.com/unicode-org/icu/releases/download/release-${ICU_VERSION/./-}/icu4c-${ICU_VERSION/./_}-src.tgz +tar xf icu4c-*-src.tgz +cd icu/source +CFLAGS="-O3 -fPIC" CXXFLAGS="-O3 -fPIC" ./configure --disable-shared --enable-static --prefix=$ICU_ROOT +make -j2 install + +cd $ROOT_DIR + +# Install cmake. +pip install cmake + +# Build Tokenizer. +rm -rf build +mkdir build +cd build +cmake -DLIB_ONLY=ON -DICU_ROOT=$ICU_ROOT $CMAKE_EXTRA_ARGS .. +VERBOSE=1 make -j2 install +cd $ROOT_DIR diff --git a/bindings/python/tools/prepare_build_environment_macos.sh b/bindings/python/tools/prepare_build_environment_macos.sh new file mode 100755 index 00000000..773783ef --- /dev/null +++ b/bindings/python/tools/prepare_build_environment_macos.sh @@ -0,0 +1,43 @@ +#! /bin/bash + +set -e +set -x + +ROOT_DIR="$PWD" +ICU_ROOT="$ROOT_DIR/icu" +CMAKE_EXTRA_ARGS="" + +mkdir -p "$ICU_ROOT" + +# Install ICU via Homebrew +brew install icu4c +ICU_PREFIX="$(brew --prefix icu4c)" + +# Copy ICU into local prefix +rsync -a "$ICU_PREFIX/" "$ICU_ROOT/" + +# Remove dynamic libraries to force static linking +rm -f "$ICU_ROOT/lib/"*.dylib || true + +# Explicit Apple Silicon handling +if [[ "$(uname -m)" == "arm64" ]]; then + CMAKE_EXTRA_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64" +fi + +# Install cmake +pip install cmake + +# Build Tokenizer +rm -rf build +mkdir build +cd build +cmake \ + -DLIB_ONLY=ON \ + -DICU_ROOT="$ICU_ROOT" \ + -DCMAKE_INSTALL_PREFIX="$ROOT_DIR/build/install" \ + $CMAKE_EXTRA_ARGS \ + .. + +VERBOSE=1 make -j2 install +cd "$ROOT_DIR" +