From 049e7ea00b4662f4501c2bc20f02422d9a6a1884 Mon Sep 17 00:00:00 2001 From: Spartan322 Date: Thu, 5 Jun 2025 18:59:02 -0400 Subject: [PATCH] Add license_builder SCons builder function Add env.to_raw_cstring helper Add env.to_escaped_cstring helper Add env.Run helper Add env.CommandNoCache helper --- SConstruct | 80 ++++++++++++++++++++ build/license_info.py | 167 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 247 insertions(+) create mode 100644 build/license_info.py diff --git a/SConstruct b/SConstruct index 3c9f396..af6e6b4 100644 --- a/SConstruct +++ b/SConstruct @@ -4,6 +4,7 @@ import os import platform import sys +from typing import List, Union import SCons @@ -11,6 +12,7 @@ import SCons from build.option_handler import OptionsClass from build.glob_recursive import GlobRecursive from build.git_info import get_git_info +from build.license_info import license_builder from build.cache import show_progress def normalize_path(val, env): @@ -273,5 +275,83 @@ env.SetupOptions = SetupOptions env.FinalizeOptions = FinalizeOptions env.GlobRecursive = GlobRecursive env.get_git_info = get_git_info +env.license_builder = license_builder + +def to_raw_cstring(value: Union[str, List[str]]) -> str: + MAX_LITERAL = 35 * 1024 + + if isinstance(value, list): + value = "\n".join(value) + "\n" + + split: List[bytes] = [] + offset = 0 + encoded = value.encode() + + while offset <= len(encoded): + segment = encoded[offset : offset + MAX_LITERAL] + offset += MAX_LITERAL + if len(segment) == MAX_LITERAL: + # Try to segment raw strings at double newlines to keep readable. + pretty_break = segment.rfind(b"\n\n") + if pretty_break != -1: + segment = segment[: pretty_break + 1] + offset -= MAX_LITERAL - pretty_break - 1 + # If none found, ensure we end with valid utf8. + # https://github.com/halloleo/unicut/blob/master/truncate.py + elif segment[-1] & 0b10000000: + last_11xxxxxx_index = [i for i in range(-1, -5, -1) if segment[i] & 0b11000000 == 0b11000000][0] + last_11xxxxxx = segment[last_11xxxxxx_index] + if not last_11xxxxxx & 0b00100000: + last_char_length = 2 + elif not last_11xxxxxx & 0b0010000: + last_char_length = 3 + elif not last_11xxxxxx & 0b0001000: + last_char_length = 4 + + if last_char_length > -last_11xxxxxx_index: + segment = segment[:last_11xxxxxx_index] + offset += last_11xxxxxx_index + + split += [segment] + + if len(split) == 1: + return f'R"({split[0].decode()})"' + else: + # Wrap multiple segments in parenthesis to suppress `string-concatenation` warnings on clang. + return "({})".format(" ".join(f'R"({segment.decode()})"' for segment in split)) + + +C_ESCAPABLES = [ + ("\\", "\\\\"), + ("\a", "\\a"), + ("\b", "\\b"), + ("\f", "\\f"), + ("\n", "\\n"), + ("\r", "\\r"), + ("\t", "\\t"), + ("\v", "\\v"), + # ("'", "\\'"), # Skip, as we're only dealing with full strings. + ('"', '\\"'), + ] +C_ESCAPE_TABLE = str.maketrans(dict((x, y) for x, y in C_ESCAPABLES)) + +def to_escaped_cstring(value: str) -> str: + return value.translate(C_ESCAPE_TABLE) + +def Run(env, function, **kwargs): + return SCons.Action.Action(function, "$GENCOMSTR", **kwargs) + +def CommandNoCache(env, target, sources, command, **kwargs): + result = env.Command(target, sources, command, **kwargs) + env.NoCache(result) + for key, val in kwargs.items(): + env.Depends(result, env.Value({ key: val })) + return result + +env.to_raw_cstring = to_raw_cstring +env.to_escaped_cstring = to_escaped_cstring + +env.__class__.Run = Run +env.__class__.CommandNoCache = CommandNoCache Return("env") \ No newline at end of file diff --git a/build/license_info.py b/build/license_info.py new file mode 100644 index 0000000..7652328 --- /dev/null +++ b/build/license_info.py @@ -0,0 +1,167 @@ +from collections import OrderedDict +from io import TextIOWrapper + + +def get_license_info(src_copyright): + class LicenseReader: + def __init__(self, license_file: TextIOWrapper): + self._license_file = license_file + self.line_num = 0 + self.current = self.next_line() + + def next_line(self): + line = self._license_file.readline() + self.line_num += 1 + while line.startswith("#"): + line = self._license_file.readline() + self.line_num += 1 + self.current = line + return line + + def next_tag(self): + if ":" not in self.current: + return ("", []) + tag, line = self.current.split(":", 1) + lines = [line.strip()] + while self.next_line() and self.current.startswith(" "): + lines.append(self.current.strip()) + return (tag, lines) + + projects = OrderedDict() + license_list = [] + + with open(src_copyright, "r", encoding="utf-8") as copyright_file: + reader = LicenseReader(copyright_file) + part = {} + while reader.current: + tag, content = reader.next_tag() + if tag in ("Files", "Copyright", "License"): + part[tag] = content[:] + elif tag == "Comment" and part: + # attach non-empty part to named project + projects[content[0]] = projects.get(content[0], []) + [part] + + if not tag or not reader.current: + # end of a paragraph start a new part + if "License" in part and "Files" not in part: + # no Files tag in this one, so assume standalone license + license_list.append(part["License"]) + part = {} + reader.next_line() + + data_list: list = [] + for project in iter(projects.values()): + for part in project: + part["file_index"] = len(data_list) + data_list += part["Files"] + part["copyright_index"] = len(data_list) + data_list += part["Copyright"] + + return {"data": data_list, "projects": projects, "parts": part, "licenses": license_list} + + +def license_builder(target, source, env): + name_prefix = env.get("name_prefix", "project") + prefix_upper = name_prefix.upper() + prefix_capital = name_prefix.capitalize() + + license_text_name = f"{prefix_upper}_LICENSE_TEXT" + component_copyright_part_name = f"{prefix_capital}ComponentCopyrightPart" + component_copyright_name = f"{prefix_capital}ComponentCopyright" + copyright_data_name = f"{prefix_upper}_COPYRIGHT_DATA" + copyright_parts_name = f"{prefix_upper}_COPYRIGHT_PARTS" + copyright_info_name = f"{prefix_upper}_COPYRIGHT_INFO" + license_name = f"{prefix_capital}License" + licenses_name = f"{prefix_upper}_LICENSES" + + src_copyright = get_license_info(str(source[0])) + src_license = str(source[1]) + + with open(src_license, "r", encoding="utf-8") as file: + license_text = file.read() + + def copyright_data_str() -> str: + result = "" + for line in src_copyright["data"]: + result += f'\t\t"{line}",\n' + return result + + part_indexes = {} + + def copyright_part_str() -> str: + part_index = 0 + result = "" + for project_name, project in iter(src_copyright["projects"].items()): + part_indexes[project_name] = part_index + for part in project: + result += ( + f'\t\t{{ "{env.to_escaped_cstring(part["License"][0])}", ' + + f"{{ &{copyright_data_name}[{part['file_index']}], {len(part['Files'])} }}, " + + f"{{ &{copyright_data_name}[{part['copyright_index']}], {len(part['Copyright'])} }} }},\n" + ) + part_index += 1 + return result + + def copyright_info_str() -> str: + result = "" + for project_name, project in iter(src_copyright["projects"].items()): + result += ( + f'\t\t{{ "{env.to_escaped_cstring(project_name)}", ' + + f"{{ &{copyright_parts_name}[{part_indexes[project_name]}], {len(project)} }} }},\n" + ) + return result + + def license_list_str() -> str: + result = "" + for license in iter(src_copyright["licenses"]): + result += ( + f'\t\t{{ "{env.to_escaped_cstring(license[0])}",' + + f'\n\t\t {env.to_raw_cstring([line if line != "." else "" for line in license[1:]])} }}, \n' + ) + return result + + with open(str(target[0]), "wt", encoding="utf-8", newline="\n") as file: + file.write("/* THIS FILE IS GENERATED. EDITS WILL BE LOST. */\n\n") + file.write( + f"""\ +#pragma once + +#include +#include +#include + +namespace OpenVic {{ + static constexpr std::string_view {license_text_name} = {{ + {env.to_raw_cstring(license_text)} + }}; + + struct {component_copyright_part_name} {{ + std::string_view license; + std::span files; + std::span copyright_statements; + }}; + + struct {component_copyright_name} {{ + std::string_view name; + std::span parts; + }}; + + static constexpr std::array {copyright_data_name} = std::to_array({{ +{copyright_data_str()}\t}}); + + static constexpr std::array {copyright_parts_name} = std::to_array<{component_copyright_part_name}>({{ +{copyright_part_str()}\t}}); + + static constexpr std::array {copyright_info_name} = std::to_array<{component_copyright_name}>({{ +{copyright_info_str()}\t}}); + + struct {license_name} {{ + std::string_view license_name; + std::string_view license_body; + }}; + + static constexpr std::array {licenses_name} = std::to_array<{license_name}>({{ +{license_list_str()}\t}}); +}} +""" + )