diff --git a/README.md b/README.md deleted file mode 100644 index ecfeecd..0000000 --- a/README.md +++ /dev/null @@ -1,65 +0,0 @@ -# kernels - -
-kernel-builder logo -

- PyPI - Version - GitHub tag - Test kernels - -

-
-
- -The Kernel Hub allows Python libraries and applications to load compute -kernels directly from the [Hub](https://hf.co/). To support this kind -of dynamic loading, Hub kernels differ from traditional Python kernel -packages in that they are made to be: - -- Portable: a kernel can be loaded from paths outside `PYTHONPATH`. -- Unique: multiple versions of the same kernel can be loaded in the - same Python process. -- Compatible: kernels must support all recent versions of Python and - the different PyTorch build configurations (various CUDA versions - and C++ ABIs). Furthermore, older C library versions must be supported. - -## Components - -- You can load kernels from the Hub using the [`kernels`](kernels/) Python package. -- If you are a kernel author, you can build your kernels with [kernel-builder](builder/). -- Hugging Face maintains a set of kernels in [kernels-community](https://huggingface.co/kernels-community). - -## 🚀 Quick Start - -Install the `kernels` Python package with `pip` (requires `torch>=2.5` and CUDA): - -```bash -pip install kernels -``` - -Here is how you would use the [activation](https://huggingface.co/kernels-community/activation) kernels from the Hugging Face Hub: - -```python -import torch - -from kernels import get_kernel - -# Download optimized kernels from the Hugging Face hub -activation = get_kernel("kernels-community/activation", version=1) - -# Random tensor -x = torch.randn((10, 10), dtype=torch.float16, device="cuda") - -# Run the kernel -y = torch.empty_like(x) -activation.gelu_fast(y, x) - -print(y) -``` - -You can [search for kernels](https://huggingface.co/models?other=kernels) on -the Hub. - -## 📚 Documentation - -Read the [documentation of kernels and kernel-builder](https://huggingface.co/docs/kernels/). diff --git a/docs/source/cli.md b/docs/source/cli.md index c7588c5..8b104e6 100644 --- a/docs/source/cli.md +++ b/docs/source/cli.md @@ -31,3 +31,9 @@ your kernel builds to the Hub. To know the supported arguments run: `kernels upl - If a repo with the `repo_id` already exists and if it contains a `build` with the build variant being uploaded, it will attempt to delete the files existing under it. - Make sure to be authenticated (run `hf auth login` if not) to be able to perform uploads to the Hub. + +### kernels create-and-upload-card + +Use `kernels create-and-upload-card --card-path README.md` to generate a basic homepage +for the kernel. Find an example [here](https://hf.co/kernels-community/kernel-card-template). You can +optionally push it to the Hub by specifying a `--repo-id`. diff --git a/kernels/pyproject.toml b/kernels/pyproject.toml index b97a68f..9458a9c 100644 --- a/kernels/pyproject.toml +++ b/kernels/pyproject.toml @@ -54,7 +54,7 @@ kernels = "kernels.cli:main" "kernels.lock" = "kernels.lockfile:write_egg_lockfile" [tool.setuptools.package-data] -kernels = ["python_depends.json"] +kernels = ["python_depends.json", "card_template.md"] [tool.isort] profile = "black" diff --git a/kernels/src/kernels/card_template.md b/kernels/src/kernels/card_template.md new file mode 100644 index 0000000..ddffe02 --- /dev/null +++ b/kernels/src/kernels/card_template.md @@ -0,0 +1,34 @@ +--- +{{ card_data }} +--- + + + +{{ model_description }} + +## How to use + +```python +# TODO: add an example code snippet for running this kernel +``` + +## Available functions + +[TODO: add the functions available through this kernel] + +## Supported backends + +[TODO: add the backends this kernel supports] + +## Benchmarks + +[TODO: provide benchmarks if available] + +## Code source + +[TODO: provide original code source and other relevant citations if available] + +## Notes + +[TODO: provide additional notes about this kernel if needed] diff --git a/kernels/src/kernels/cli.py b/kernels/src/kernels/cli.py index 3d7f54e..25bd342 100644 --- a/kernels/src/kernels/cli.py +++ b/kernels/src/kernels/cli.py @@ -14,6 +14,14 @@ ) from kernels.versions_cli import print_kernel_versions from kernels.init import run_init, parse_kernel_name +from kernels.kernel_card_utils import ( + _load_or_create_kernel_card, + _update_benchmark, + _update_kernel_card_available_funcs, + _update_kernel_card_license, + _update_kernel_card_backends, + _update_kernel_card_usage, +) from .doc import generate_readme_for_kernel @@ -181,6 +189,37 @@ def main(): ) init_parser.set_defaults(func=run_init) + repocard_parser = subparsers.add_parser( + "create-and-upload-card", + help="Create and optionally upload a kernel card.", + ) + repocard_parser.add_argument( + "kernel_dir", + type=str, + help="Path to the kernels source.", + ) + repocard_parser.add_argument( + "--card-path", type=str, required=True, help="Path to save the card to." + ) + repocard_parser.add_argument( + "--description", + type=str, + default=None, + help="Description to introduce the kernel.", + ) + repocard_parser.add_argument( + "--repo-id", + type=str, + default=None, + help="If specified it will be pushed to a repository on the Hub.", + ) + repocard_parser.add_argument( + "--create-pr", + action="store_true", + help="If specified it will create a PR on the `repo_id`.", + ) + repocard_parser.set_defaults(func=create_and_upload_card) + args = parser.parse_args() args.func(args) @@ -249,6 +288,36 @@ def upload_kernels(args): ) +def create_and_upload_card(args): + if not args.repo_id and args.create_pr: + raise ValueError("`create_pr` cannot be True when `repo_id` is None.") + + kernel_dir = Path(args.kernel_dir).resolve() + kernel_card = _load_or_create_kernel_card( + kernel_description=args.description, license="apache-2.0" + ) + + updated_card = _update_kernel_card_usage( + kernel_card=kernel_card, local_path=kernel_dir + ) + updated_card = _update_kernel_card_available_funcs( + kernel_card=kernel_card, local_path=kernel_dir + ) + updated_card = _update_kernel_card_backends( + kernel_card=kernel_card, local_path=kernel_dir + ) + updated_card = _update_benchmark(kernel_card=kernel_card, local_path=kernel_dir) + updated_card = _update_kernel_card_license( + kernel_card=kernel_card, local_path=kernel_dir + ) + + card_path = args.card_path + updated_card.save(card_path) + + if args.repo_id: + updated_card.push_to_hub(repo_id=args.repo_id, create_pr=args.create_pr) + + class _JSONEncoder(json.JSONEncoder): def default(self, o): if dataclasses.is_dataclass(o): diff --git a/kernels/src/kernels/kernel_card_utils.py b/kernels/src/kernels/kernel_card_utils.py new file mode 100644 index 0000000..b36973e --- /dev/null +++ b/kernels/src/kernels/kernel_card_utils.py @@ -0,0 +1,246 @@ +import ast +import re +from pathlib import Path + +from .compat import tomllib +from typing import Any +from huggingface_hub import ModelCard, ModelCardData +from huggingface_hub.errors import EntryNotFoundError, RepositoryNotFoundError + +KERNEL_CARD_TEMPLATE_PATH = Path(__file__).parent / "card_template.md" +DESCRIPTION = """ +This is the repository card of {repo_id} that has been pushed on the Hub. It was built to be used with the [`kernels` library](https://github.com/huggingface/kernels). This card was automatically generated. +""" +EXAMPLE_CODE = """```python +# make sure `kernels` is installed: `pip install -U kernels` +from kernels import get_kernel + +kernel_module = get_kernel("{repo_id}") # <- change the ID if needed +{func_name} = kernel_module.{func_name} + +{func_name}(...) +```""" +LIBRARY_NAME = "kernels" + +is_jinja_available = False +try: + import jinja2 # noqa + + is_jinja_available = True +except ImportError: + pass + + +def _load_or_create_kernel_card( + repo_id_or_path: str = "REPO_ID", + token: str | None = None, + kernel_description: str | None = None, + license: str | None = None, + force_update_content: bool = False, +) -> ModelCard: + if not is_jinja_available: + raise ValueError( + "Modelcard rendering is based on Jinja templates." + " Please make sure to have `jinja` installed before using `load_or_create_model_card`." + " To install it, please run `pip install Jinja2`." + ) + + kernel_card = None + + if not force_update_content: + try: + kernel_card = ModelCard.load(repo_id_or_path, token=token) + except (EntryNotFoundError, RepositoryNotFoundError): + pass # Will create from template below + + if kernel_card is None: + kernel_description = kernel_description or DESCRIPTION + kernel_card = ModelCard.from_template( + card_data=ModelCardData(license=license, library_name=LIBRARY_NAME), + template_path=str(KERNEL_CARD_TEMPLATE_PATH), + model_description=kernel_description, + ) + + return kernel_card + + +def _parse_build_toml(local_path: str | Path) -> dict | None: + local_path = Path(local_path) + build_toml_path = local_path / "build.toml" + + if not build_toml_path.exists(): + return None + + try: + with open(build_toml_path, "rb") as f: + return tomllib.load(f) + except Exception: + return None + + +def _find_torch_ext_init(local_path: str | Path) -> Path | None: + local_path = Path(local_path) + + config = _parse_build_toml(local_path) + if not config: + return None + + try: + kernel_name = config.get("general", {}).get("name") + if not kernel_name: + return None + + module_name = kernel_name.replace("-", "_") + init_file = local_path / "torch-ext" / module_name / "__init__.py" + + if init_file.exists(): + return init_file + + return None + except Exception: + return None + + +def _extract_functions_from_all(init_file_path: Path) -> list[str] | None: + try: + content = init_file_path.read_text() + + tree = ast.parse(content) + + for node in ast.walk(tree): + if isinstance(node, ast.Assign): + for target in node.targets: + if isinstance(target, ast.Name) and target.id == "__all__": + if isinstance(node.value, ast.List): + functions = [] + for elt in node.value.elts: + if isinstance(elt, ast.Constant): + func_name = str(elt.value) + functions.append(func_name) + return functions if functions else None + return None + except Exception: + return None + + +def _update_kernel_card_usage( + kernel_card: ModelCard, + local_path: str | Path, + repo_id: str = "REPO_ID", +) -> ModelCard: + init_file = _find_torch_ext_init(local_path) + + if not init_file: + return kernel_card + + func_names = _extract_functions_from_all(init_file) + + if not func_names: + return kernel_card + + func_name = func_names[0] + example_code = EXAMPLE_CODE.format(repo_id=repo_id, func_name=func_name) + + card_content = str(kernel_card.content) + pattern = r"(## How to use\s*\n\n)```python\n# TODO: add an example code snippet for running this kernel\n```" + + if re.search(pattern, card_content): + updated_content = re.sub(pattern, r"\1" + example_code, card_content) + kernel_card.content = updated_content + + return kernel_card + + +def _update_kernel_card_available_funcs( + kernel_card: ModelCard, local_path: str | Path +) -> ModelCard: + init_file = _find_torch_ext_init(local_path) + + if not init_file: + return kernel_card + + func_names = _extract_functions_from_all(init_file) + + if not func_names: + return kernel_card + + functions_list = "\n".join(f"- `{func}`" for func in func_names) + + card_content = str(kernel_card.content) + pattern = r"(## Available functions\s*\n\n)\[TODO: add the functions available through this kernel\]" + + if re.search(pattern, card_content): + updated_content = re.sub(pattern, r"\1" + functions_list, card_content) + kernel_card.content = updated_content + + return kernel_card + + +def _update_kernel_card_backends( + kernel_card: ModelCard, local_path: str | Path +) -> ModelCard: + config = _parse_build_toml(local_path) + if not config: + return kernel_card + + general_config = config.get("general", {}) + + card_content = str(kernel_card.content) + + backends = general_config.get("backends") + if backends: + backends_list = "\n".join(f"- {backend}" for backend in backends) + pattern = r"(## Supported backends\s*\n\n)\[TODO: add the backends this kernel supports\]" + if re.search(pattern, card_content): + card_content = re.sub(pattern, r"\1" + backends_list, card_content) + + # TODO: should we consider making it a separate utility? + kernel_configs = config.get("kernel", {}) + cuda_capabilities = [] + if kernel_configs: + for k in kernel_configs: + cuda_cap_for_config = kernel_configs[k].get("cuda-capabilities") + if cuda_cap_for_config: + cuda_capabilities.extend(cuda_cap_for_config) + cuda_capabilities: set[Any] = set(cuda_capabilities) # type: ignore[no-redef] + if cuda_capabilities: + cuda_list = "\n".join(f"- {cap}" for cap in cuda_capabilities) + cuda_section = f"## CUDA Capabilities\n\n{cuda_list}\n\n" + pattern = r"(## Benchmarks)" + if re.search(pattern, card_content): + card_content = re.sub(pattern, cuda_section + r"\1", card_content) + + kernel_card.content = card_content + return kernel_card + + +def _update_kernel_card_license( + kernel_card: ModelCard, local_path: str | Path +) -> ModelCard: + config = _parse_build_toml(local_path) + if not config: + return kernel_card + + existing_license = kernel_card.data.get("license", None) + license_from_config = config.get("general", {}).get("license", None) + final_license = license_from_config or existing_license + kernel_card.data["license"] = final_license + return kernel_card + + +def _update_benchmark(kernel_card: ModelCard, local_path: str | Path): + local_path = Path(local_path) + + benchmark_file = local_path / "benchmarks" / "benchmark.py" + if not benchmark_file.exists(): + return kernel_card + + card_content = str(kernel_card.content) + benchmark_text = '\n\nBenchmarking script is available for this kernel. Make sure to run `kernels benchmark org-id/repo-id` (replace "org-id" and "repo-id" with actual values).' + + pattern = r"(## Benchmarks)" + if re.search(pattern, card_content): + updated_content = re.sub(pattern, r"\1" + benchmark_text, card_content) + kernel_card.content = updated_content + + return kernel_card diff --git a/kernels/tests/test_kernel_card.py b/kernels/tests/test_kernel_card.py new file mode 100644 index 0000000..397da44 --- /dev/null +++ b/kernels/tests/test_kernel_card.py @@ -0,0 +1,275 @@ +import tempfile +from pathlib import Path +from dataclasses import dataclass + +import pytest + +from kernels.cli import create_and_upload_card + + +@dataclass +class CardArgs: + kernel_dir: str + card_path: str + description: str | None = None + repo_id: str | None = None + create_pr: bool = False + + +@pytest.fixture +def mock_kernel_dir(): + with tempfile.TemporaryDirectory() as tmpdir: + kernel_dir = Path(tmpdir) + + build_toml = kernel_dir / "build.toml" + build_toml.write_text( + """[general] +name = "test_kernel" +backends = ["cuda", "metal"] +license = "apache-2.0" +version = 1 + +[general.hub] +repo-id = "test-org/test-kernel" + +[kernel._test] +backend = "cuda" +cuda-capabilities = ["8.0", "8.9"] +""" + ) + + torch_ext_dir = kernel_dir / "torch-ext" / "test_kernel" + torch_ext_dir.mkdir(parents=True) + + init_file = torch_ext_dir / "__init__.py" + init_file.write_text( + """from .core import func1, func2 + +__all__ = ["func1", "func2", "func3"] +""" + ) + + core_file = torch_ext_dir / "core.py" + core_file.write_text( + """def func1(): + pass + +def func2(): + pass + +def func3(): + pass +""" + ) + + yield kernel_dir + + +@pytest.fixture +def mock_kernel_dir_with_benchmark(mock_kernel_dir): + benchmarks_dir = mock_kernel_dir / "benchmarks" + benchmarks_dir.mkdir() + + benchmark_file = benchmarks_dir / "benchmark.py" + benchmark_file.write_text( + """import time + +def benchmark(): + # Simple benchmark + start = time.time() + # ... benchmark code ... + end = time.time() + return end - start +""" + ) + + return mock_kernel_dir + + +@pytest.fixture +def mock_kernel_dir_minimal(): + with tempfile.TemporaryDirectory() as tmpdir: + kernel_dir = Path(tmpdir) + + build_toml = kernel_dir / "build.toml" + build_toml.write_text( + """[general] +name = "minimal_kernel" +backends = ["cuda"] +""" + ) + + yield kernel_dir + + +def test_create_and_upload_card_basic(mock_kernel_dir): + with tempfile.TemporaryDirectory() as tmpdir: + card_path = Path(tmpdir) / "README.md" + + args = CardArgs( + kernel_dir=str(mock_kernel_dir), + card_path=str(card_path), + description="This is a test kernel for testing purposes.", + ) + + create_and_upload_card(args) + + assert card_path.exists() + + card_content = card_path.read_text() + + assert "---" in card_content + assert "This is a test kernel for testing purposes." in card_content + + +def test_create_and_upload_card_updates_usage(mock_kernel_dir): + """Test that usage code snippet is properly generated.""" + with tempfile.TemporaryDirectory() as tmpdir: + card_path = Path(tmpdir) / "README.md" + + args = CardArgs( + kernel_dir=str(mock_kernel_dir), + card_path=str(card_path), + ) + + create_and_upload_card(args) + + card_content = card_path.read_text() + + assert "## How to use" in card_content + assert "from kernels import get_kernel" in card_content + assert "func1" in card_content + assert "TODO: add an example code snippet" not in card_content + + +def test_create_and_upload_card_updates_available_functions(mock_kernel_dir): + with tempfile.TemporaryDirectory() as tmpdir: + card_path = Path(tmpdir) / "README.md" + + args = CardArgs( + kernel_dir=str(mock_kernel_dir), + card_path=str(card_path), + ) + + create_and_upload_card(args) + + card_content = card_path.read_text() + + assert "## Available functions" in card_content + assert "- `func1`" in card_content + assert "- `func2`" in card_content + assert "- `func3`" in card_content + assert ( + "[TODO: add the functions available through this kernel]" + not in card_content + ) + + +def test_create_and_upload_card_updates_backends(mock_kernel_dir): + with tempfile.TemporaryDirectory() as tmpdir: + card_path = Path(tmpdir) / "README.md" + + args = CardArgs( + kernel_dir=str(mock_kernel_dir), + card_path=str(card_path), + ) + + create_and_upload_card(args) + + card_content = card_path.read_text() + + assert "## Supported backends" in card_content + assert "- cuda" in card_content + assert "- metal" in card_content + assert "[TODO: add the backends this kernel supports]" not in card_content + + +def test_create_and_upload_card_updates_cuda_capabilities(mock_kernel_dir): + with tempfile.TemporaryDirectory() as tmpdir: + card_path = Path(tmpdir) / "README.md" + + args = CardArgs( + kernel_dir=str(mock_kernel_dir), + card_path=str(card_path), + ) + + create_and_upload_card(args) + + card_content = card_path.read_text() + + assert "## CUDA Capabilities" in card_content + assert "- 8.0" in card_content or "- 8.9" in card_content + + +def test_create_and_upload_card_updates_license(mock_kernel_dir): + with tempfile.TemporaryDirectory() as tmpdir: + card_path = Path(tmpdir) / "README.md" + + args = CardArgs( + kernel_dir=str(mock_kernel_dir), + card_path=str(card_path), + ) + + create_and_upload_card(args) + + card_content = card_path.read_text() + + assert "license: apache-2.0" in card_content + + +def test_create_and_upload_card_with_benchmark(mock_kernel_dir_with_benchmark): + with tempfile.TemporaryDirectory() as tmpdir: + card_path = Path(tmpdir) / "README.md" + + args = CardArgs( + kernel_dir=str(mock_kernel_dir_with_benchmark), + card_path=str(card_path), + ) + + create_and_upload_card(args) + + card_content = card_path.read_text() + + assert "## Benchmarks" in card_content + assert "Benchmarking script is available for this kernel" in card_content + assert "kernels benchmark" in card_content + + +def test_create_and_upload_card_minimal_structure(mock_kernel_dir_minimal): + with tempfile.TemporaryDirectory() as tmpdir: + card_path = Path(tmpdir) / "README.md" + + args = CardArgs( + kernel_dir=str(mock_kernel_dir_minimal), + card_path=str(card_path), + ) + + create_and_upload_card(args) + + assert card_path.exists() + + card_content = card_path.read_text() + + assert "---" in card_content + assert "## How to use" in card_content + assert "## Available functions" in card_content + assert "## Supported backends" in card_content + + +def test_create_and_upload_card_custom_description(mock_kernel_dir): + with tempfile.TemporaryDirectory() as tmpdir: + card_path = Path(tmpdir) / "README.md" + + custom_desc = "My custom kernel description with special features." + + args = CardArgs( + kernel_dir=str(mock_kernel_dir), + card_path=str(card_path), + description=custom_desc, + ) + + create_and_upload_card(args) + + card_content = card_path.read_text() + + assert custom_desc in card_content