diff --git a/document_services.py b/document_services.py index a694ac94..cc194a13 100644 --- a/document_services.py +++ b/document_services.py @@ -2,14 +2,10 @@ import re import sys from os import path +from abc import ABC, abstractmethod import traceback as tb -from typing import Any - -# https://regex101.com/r/kgCV7K -MOODLESTRUCT_REGEX = r"(?:['\"](\w+)['\"]\s*=>|return)\s*new\s*" \ - r"external_value\s*\(\s*(PARAM_\w+)\s*,\s*((?:(['\"]).+?\4)(?:\s*\.\s*(?:\w+::format\(\))|'.*?')*)" \ - r"(?:,\s*(\w+)(?:,\s*([^,]+?)(?:,\s*(\w+),?)?)?)?\s*\)" +from typing import Any, Iterable HAS_WARNED = False CURRENT_SERVICE: str | None = None @@ -93,6 +89,238 @@ def parse_nullable(inpot: str) -> bool | None: warn(f"found weird value for nullable: {inpot}") return None +class PHPExpression(ABC): + @abstractmethod + def __str__(self) -> str: + raise NotImplementedError() + +class PHPString(PHPExpression, ABC): + @abstractmethod + def get_value(self) -> str: + raise NotImplementedError() + +class PHPStringLiteral(PHPString): + __slots__ = ('value') + value: str + + def __init__(self, val: str): + self.value = val + + def __str__(self) -> str: + return f"'{self.value.replace('\'', '\\\'')}'" + + def get_value(self) -> str: + return self.value + +class PHPConcat(PHPString): + __slots__ = ('left', 'right') + left: PHPString + right: PHPString + + def __init__(self, left: PHPString, right: PHPString): + self.left = left + self.right = right + + def __str__(self) -> str: + return f"{self.left}.{self.right}" + + def get_value(self) -> str: + return self.left.get_value() + self.right.get_value() + +class PHPUserID(PHPExpression): + def __str__(self) -> str: + return "$USER->id" + +class PHPArray(PHPExpression): + __slots__ = ('keys', 'values') + + keys: list[PHPString] | None + values: list[PHPExpression] + + def __init__(self, vals: list[PHPExpression], keys: list[PHPString] | None = None): + self.keys = keys + self.values = vals + + def __str__(self) -> str: + inner: Iterable[str] + if self.keys is not None: + inner = (f"{k} => {v}" for k, v in zip(self.keys, self.values)) + else: + inner = (str(v) for v in self.values) + return '[' + ", ".join(inner) + ']' + +class PHPClassMemberFunction(PHPExpression): + __slots__ = ('classname', 'funcname', 'fp') + classname: str + funcname: str + fp: str | None + + def __init__(self, classname: str, funcname: str, fp: str | None): + self.classname = classname + self.funcname = funcname + self.fp = fp + + def resolve(self) -> PHPExpression: + meth_pattern = rf"public static function {self.funcname}\(\)(?: ?: ?\w+)? ?{{(?P.*?)}}" + + if self.fp is None: + # already warned in parse_imports, we don't need to warn again + return PHPConstant('null') + + with open(self.fp, "r") as f: + new_file_content = f.read() + meth_matches: list[str] = re.findall(meth_pattern, new_file_content, re.DOTALL) + if len(meth_matches) == 0: + warn(f"couldn't find {self} inside {self.fp}") + return PHPConstant('null') + elif len(meth_matches) > 1: + raise Exception(f"Found multiple definitions for {self} inside {self.fp}") + else: + imports = extract_imports(new_file_content) + result = parse_code(meth_matches[0], imports) + + return result + + def __str__(self) -> str: + return f"{self.classname}::{self.funcname}()" + +class PHPEnumFormat(PHPClassMemberFunction, PHPString): + def resolve(self) -> PHPString: + # https://regex101.com/r/p5FzCh + casepattern = r"const (\w+) = (\d+|true|false|(['\"]).*?\3)" + + fullbody_pattern = f"class {self.classname} extends Enum {{.*?}}" + + fp = f"lbplanner/classes/enums/{self.classname}.php" + if not path.exists(fp): + warn(f"Couldn't find enum file {fp}") + return PHPStringLiteral("") + with open(fp, "r") as f: + matches: list[str] = re.findall(fullbody_pattern, f.read(), re.DOTALL) + if len(matches) == 1: + body = matches[0] + else: + warn(f"couldn't parse enum {self.classname}", matches) + + cases = {} + matches = re.findall(casepattern, body) + for match in matches: + # capitalizing first letter, if exists + name = "".join([match[0][0].upper(), match[0][1:].lower()]) + cases[name] = match[1].replace("'", '"') + + return PHPStringLiteral("{ " + ", ".join([f"{name} = {value}" for name, value in cases.items()]) + " }") + + def get_value(self) -> str: + return self.resolve().get_value() + +class PHPConstructor(PHPExpression): + __slots__ = ('name', 'parameters') + + name: str + parameters: list[PHPExpression] + + def __init__(self, name: str, params: list[PHPExpression]): + self.name = name + self.parameters = params + + def __str__(self) -> str: + return f"new {self.name}(" + ", ".join(str(p) for p in self.parameters) + ")" + + def toIR(self) -> 'IRElement': + match self.name: + case 'external_function_parameters' | 'external_single_structure': + assert isinstance(self.parameters[0], PHPArray) + arr = self.parameters[0] + fields = {} + if len(arr.values) != 0: + assert arr.keys is not None + for k, v in zip(arr.keys, arr.values): + assert isinstance(v, PHPConstructor) + fields[k.get_value()] = v.toIR() + + desc = "" + if len(self.parameters) >= 2: + assert isinstance(self.parameters[1], PHPString) + desc = self.parameters[1].get_value() + + required = True + if len(self.parameters) >= 3: + assert isinstance(self.parameters[2], PHPConstant) + _required = parse_isrequired(self.parameters[2].name) + if _required is not None: + required = _required + + return IRObject(fields, description=desc, required=required) + case 'external_multiple_structure': + assert isinstance(self.parameters[0], PHPConstructor) + con = self.parameters[0] + + desc = "" + if len(self.parameters) >= 2: + assert isinstance(self.parameters[1], PHPString) + desc = self.parameters[1].get_value() + + required = True + if len(self.parameters) >= 3: + assert isinstance(self.parameters[2], PHPConstant) + _required = parse_isrequired(self.parameters[2].name) + if _required is not None: + required = _required + + return IRArray(con.toIR(), description=desc, required=required) + case 'external_value': + assert isinstance(self.parameters[0], PHPConstant) + assert isinstance(self.parameters[1], PHPString) + type = convert_php_type_to_normal_type(self.parameters[0].name) + desc = self.parameters[1].get_value() + + required = True + if len(self.parameters) >= 3: + assert isinstance(self.parameters[2], PHPConstant) + _required = parse_isrequired(self.parameters[2].name) + if _required is not None: + required = _required + + default: None | bool | str = None + if len(self.parameters) >= 4: + if isinstance(self.parameters[3], PHPConstant): + match self.parameters[3].name: + case 'null': + default = None + case 'false': + default = False + case 'true': + default = True + case _: + warn("unknown PHPConstant as default", self.parameters[3]) + default = None + elif isinstance(self.parameters[3], PHPUserID): + default = "derived from token" + + nullable = False + if len(self.parameters) >= 5: + assert isinstance(self.parameters[4], PHPConstant) + _nullable = parse_nullable(self.parameters[4].name) + if _nullable is not None: + nullable = _nullable + + return IRValue(type, default_value=default, nullable=nullable, description=desc, required=required) + case _: + warn(f"unkown constructor name: {self.name}") + return IRValue(None, None, nullable=True) + +class PHPConstant(PHPExpression): + __slots__ = ('name') + + name: str + + def __init__(self, name: str): + self.name = name + + def __str__(self) -> str: + return self.name + class SlotsDict: @property def __dict__(self): @@ -103,35 +331,6 @@ def __dict__(self): slots = cls.__slots__ + slots return {name: self.__getattribute__(name) for name in slots} -class ReturnInfo(SlotsDict): - __slots__ = ('type', 'description', 'nullable') - - def __init__(self, type: str, description: str, nullable: str): - self.type = convert_php_type_to_normal_type(type) - self.description = description - self.nullable = parse_nullable(nullable) - -class ParamInfo(SlotsDict): - __slots__ = ('type', 'description', 'required', 'default_value', 'nullable') - - def __init__(self, - type: str, - description: str, - required: str, - default_value: str, - nullable: str): - - self.type = convert_php_type_to_normal_type(type) - - defval, deftype = explain_php_value(default_value) - if defval is not None and deftype != self.type: - warn(f"Type of default value does not match parameter type - {deftype} != {self.type}", description) - - self.description = description - self.required = parse_isrequired(required) - self.default_value = defval - self.nullable = parse_nullable(nullable) - class FunctionInfo(SlotsDict): __slots__ = ('name', 'group', 'capabilities', 'description', 'path') @@ -143,18 +342,256 @@ def __init__(self, name: str, group: str, capabilities: list[str], description: self.path = path class FunctionInfoEx(FunctionInfo): - __slots__ = ('parameters', 'returns', 'returns_multiple') + __slots__ = ('parameters', 'returns') def __init__(self, parent: FunctionInfo, - parameters: dict[str, ParamInfo], - returns: dict[str, ReturnInfo], - returns_multiple: bool): + parameters: 'IRElement | None', + returns: 'IRElement | None'): super().__init__(**parent.__dict__) self.parameters = parameters self.returns = returns - self.returns_multiple = returns_multiple + +class IRElement(SlotsDict, ABC): + __slots__ = ('description', 'required', 'type') + + def __init__(self, description: str, required: bool): + self.description = description + self.required = required + +class IRValue(IRElement): + __slots__ = ('default_value', 'type', 'nullable') + + def __init__(self, type, default_value, nullable: bool, **kwargs): + self.type = type + self.default_value = default_value + self.nullable = nullable + super().__init__(**kwargs) + +class IRObject(IRElement): + __slots__ = ('fields',) + fields: dict[str, IRElement] + + def __init__(self, fields: dict[str, IRElement], **kwargs): + self.fields = fields + self.type = 'ObjectValue' + super().__init__(**kwargs) + +class IRArray(IRElement): + __slots__ = ('value',) + value: IRElement + + def __init__(self, value: IRElement, **kwargs): + self.value = value + self.type = 'ArrayValue' + super().__init__(**kwargs) + +def parse_code(code: str, imports: list[str]) -> PHPExpression: + code = code.strip() + while True: + i, expr = parse_statement(code, imports) + if expr is not None: + return expr + code = code[i:].strip() + +def parse_statement(code: str, imports: list[str]) -> tuple[int, PHPExpression | None]: + buf = [] + i = 0 + while True: + c = code[i] + if c.isalpha() or c == '_': + buf.append(c) + i += 1 + elif c.isspace(): + i += 1 + if len(buf) == 0: + continue + + word = "".join(buf) + buf = [] + + if word == 'global': + # just skip this statement; we're not interested in globals + return i + code[i:].index(';') + 1, None + elif word == 'return': + iplus, expr = parse_expression(code[i:], imports) + i += iplus + + return i + 1, expr + else: + raise ValueError(f"unknown keyword: {word}") + elif c == ';': + return i + 1, None + elif code[i:i + 2] == '//': + i += code[i:].index('\n') + else: + raise ValueError(f"unknown char: {c}") + +def parse_expression(code: str, imports: list[str]) -> tuple[int, PHPExpression | None]: + expr: PHPExpression | None = None + + buf: list[str] = [] + i = 0 + while True: + if len(buf) == 0 and code[i:].startswith('$USER->id'): + assert expr is None + i += len('$USER->id') + expr = PHPUserID() + + c = code[i] + if c.isalpha() or c == '_': + assert expr is None + buf.append(c) + i += 1 + elif c.isspace(): + i += 1 + if len(buf) == 0: + continue + + word = "".join(buf) + buf = [] + + if word == 'new': + iplus, expr = parse_constructor(code[i:], imports) + i += iplus + else: + # just assume this is a constant + assert expr is None + expr = PHPConstant(word) + elif c == '[': + assert expr is None + i += 1 + if len(buf) > 0: + raise NotImplementedError("map access not implemented") + + iplus, expr = parse_array(code[i:]) + i += iplus + elif c in '\'"': + assert len(buf) == 0 + assert expr is None + + iplus, expr = parse_string(code[i:]) + i += iplus + elif c == '.': + assert isinstance(expr, PHPString) + i += 1 + iplus, after = parse_expression(code[i:], imports) + i += iplus + assert isinstance(after, PHPString) + expr = PHPConcat(expr, after) + elif code[i:i + 2] == '::': + # remote value + assert len(buf) > 0 + assert expr is None + i += 2 + iplus = code[i:].index('(') + funcname = code[i:i + iplus] + classname = "".join(buf) + i += iplus + assert code[i:i + 2] == '()' + i += 2 + C: type[PHPClassMemberFunction] + fp_import: str | None + if funcname == 'format': + C = PHPEnumFormat + fp_import = path.join(path.dirname(__file__), "lbplanner", "enums", f"{classname}.php") + else: + C = PHPClassMemberFunction + fp_import = find_import(imports, classname) + expr = C(classname, funcname, fp_import).resolve() + buf = [] + else: + # unkown character? simply bail + if len(buf) > 0: + # assume we have a constant on our hands + word = "".join(buf) + assert expr is None + expr = PHPConstant(word) + return i, expr + +def parse_constructor(code: str, imports: list[str]) -> tuple[int, PHPConstructor]: + paramlist: list[PHPExpression] = [] + fnname, parenth, params = code.partition('(') + assert fnname.replace('_', '').isalpha() + assert parenth == '(' # if parenthesis not found, parenth is an empty string + offset = len(fnname) + 1 + i = 0 + while True: + iplus, expr = parse_expression(params[i:], imports) + i += iplus + + if expr is not None: + paramlist.append(expr) + + if params[i] == ',': + i += 1 + elif params[i] == ')': + return i + offset + 1, PHPConstructor(fnname, paramlist) + else: + raise ValueError(f"unknown char: {params[i]}") + +def parse_array(code: str) -> tuple[int, PHPArray]: + associative: bool | None = None + keys: list[PHPString] = [] + vals: list[PHPExpression] = [] + + i = 0 + while True: + iplus, expr = parse_expression(code[i:], imports) + i += iplus + + if code[i] == ',': + i += 1 + assert expr is not None + vals.append(expr) + if associative is None: + associative = False + elif code[i:i + 2] == '=>': + i += 2 + assert isinstance(expr, PHPString) + keys.append(expr) + associative = True + elif code[i] == ']': + if expr is not None: + vals.append(expr) + if associative is True: + assert len(keys) == len(vals) + return i + 1, PHPArray(vals, keys if associative else None) + else: + raise ValueError(f"unknown char: {code[i]}") + +def parse_string(code: str) -> tuple[int, PHPStringLiteral]: + quotetype = code[0] + assert quotetype in '\'"' + simple = quotetype == '\'' + if not simple: + raise NotImplementedError() # TODO + result: list[str] = [] + i = 1 + while True: + c = code[i] + i += 1 + if c == quotetype: + return i, PHPStringLiteral("".join(result)) + elif c == '\\': + i += 1 + if code[i] == quotetype: + result.append(quotetype) + elif code[i] == '\\': + result.append('\\') + elif simple: + result.append('\\') + result.append(code[i]) + elif code[i] == 'n': + result.append('\n') + elif code[i] == 'r': + result.append('\r') + else: + raise NotImplementedError(f"can't escape \"{code[i]}\" in double-quoted string") + else: + if simple: + result.append(c) def extract_function_info(file_content: str) -> list[FunctionInfo]: function_info = [] @@ -229,233 +666,118 @@ def extract_php_functions(php_code: str, name: str) -> tuple[str | None, str | N return parameters_function, returns_function -def parse_imports(input_str: str, symbol: str) -> str | None: - use_pattern = fr"use ((?:\w+\\)+){symbol};" - uses: list[str] = re.findall(use_pattern, input_str) +def find_import(uses: list[str], symbol: str) -> str | None: namespaces = { # it's technically possible to import from outside /classes/ - "local_lbplanner\\helpers": "classes/helpers", - "local_lbplanner\\enums": "classes/enums", - "local_lbplanner\\polyfill": "classes/polyfill", - "local_lbplanner\\model": "classes/model", + "helpers": "classes/helpers", + "enums": "classes/enums", + "polyfill": "classes/polyfill", + "model": "classes/model", } fp_l: list[str] = [] for use in uses: + im_symbol = use.split('\\')[-1].replace(';', '') + found = False + if im_symbol.startswith('{'): + for subsymbol in im_symbol.split(','): + if subsymbol.strip() == symbol: + found = True + break + else: + found = symbol == im_symbol + if not found: + continue for namespace, p in namespaces.items(): if use.startswith(namespace): fp_l.append(path.join(path.dirname(__file__), "lbplanner", p, f"{symbol}.php")) if len(fp_l) > 1: - warn(f"found potential import collision for {symbol}", input_str) + warn(f"found potential import collision for {symbol}", uses) return None elif len(fp_l) == 0: - warn(f"Couldn't find symbol: {symbol}", input_str) + warn(f"couldn't find symbol: {symbol}", uses) return None else: return fp_l[0] -def parse_phpstuff(inpot: str) -> str: - # https://regex101.com/r/p5FzCh - casepattern = r"const (\w+) = (\d+|true|false|(['\"]).*?\3)" - - if inpot.endswith('::format()'): - enum_name = inpot[:-10] - fullbody_pattern = f"class {enum_name} extends Enum {{.*?}}" - - fp = f"lbplanner/classes/enums/{enum_name}.php" - if not path.exists(fp): - warn(f"Couldn't find enum file {fp}") - return "" - with open(fp, "r") as f: - matches: list[str] = re.findall(fullbody_pattern, f.read(), re.DOTALL) - if len(matches) == 1: - body = matches[0] - else: - warn(f"couldn't parse enum {enum_name}", matches) - - cases = {} - matches = re.findall(casepattern, body) - for match in matches: - # capitalizing first letter, if exists - name = "".join([match[0][0].upper(), match[0][1:].lower()]) - cases[name] = match[1].replace("'", '"') - - return "{ " + ", ".join([f"{name} = {value}" for name, value in cases.items()]) + " }" - else: - warn('unknown phpstuff', inpot) - return "" - -def parse_phpstring(inpot: str) -> str: - WHITESPACE = '. \t\n\r' # the . is for string concat in php - - out = [] - strlit = False - quotetype = '' - tmp_refarr: list[str] = [] - for char in inpot: - if char in '\'"': - if not strlit: - strlit = True - quotetype = char - if len(tmp_refarr) > 0: - out.append(parse_phpstuff("".join(tmp_refarr))) - tmp_refarr = [] - else: - if char == quotetype: - strlit = False - else: - out.append(char) - else: - if strlit: - out.append(char) - else: - if char in WHITESPACE: - continue - else: - tmp_refarr.append(char) - - if len(tmp_refarr) > 0: - out.append(parse_phpstuff("".join(tmp_refarr))) - tmp_refarr = [] - - return "".join(out) - -def parse_returns(input_str: str, file_content: str, name: str) -> tuple[dict[str, ReturnInfo], bool]: - # https://regex101.com/r/gUtsX3/ - redir_pattern = r"(\w+)::(\w+)(? list[str]: + prefix = "use local_lbplanner\\" + imports = [] - # Check for the presence of 'external_multiple_structure' - is_multiple_structure = "external_multiple_structure" in input_str + for line in input_str.splitlines(False): + if line.startswith(prefix): + imports.append(line.removeprefix(prefix)) - redir_matches: list[list[str]] = re.findall(redir_pattern, input_str) - if len(redir_matches) > 1: - warn(f"Couldn't parse return values in {name}", input_str) - return ({}, False) + return imports - if len(redir_matches) == 1: - match = redir_matches[0] - meth_pattern = rf"public static function {match[1]}\(\)(?: ?: ?\w+)? ?{{(?P.*?)}}" - - fp = parse_imports(file_content, match[0]) - if fp is None: - # already warned in parse_imports, we don't need to warn again - return {}, is_multiple_structure - - with open(fp, "r") as f: - new_file_content = f.read() - meth_matches: list[str] = re.findall(meth_pattern, new_file_content, re.DOTALL) - if len(meth_matches) == 0: - warn(f"Couldn't find {match[0]}::{match[1]}() inside {fp} for {name}") - return ({}, False) - elif len(meth_matches) > 1: - raise Exception(f"Found multiple definitions for {match[0]}::{match[1]}() inside {fp}") - else: - result = parse_returns(meth_matches[0], new_file_content, fp) - - # if multiple_structure is detected here, add it - if is_multiple_structure: - return (result[0], True) - else: - return result - - matches: list[list[str]] = re.findall(MOODLESTRUCT_REGEX, input_str) - - output_dict = {} - for match in matches: - key = match[0] - if key is None: - if len(matches) > 1: - warn("got empty return key name in a structure larger than 1", matches) - else: - key = '' +def parse_function(input_text: str, imports: list[str]) -> IRElement | None: + ss = input_text.index('{') + se = input_text.rindex('}') + func_body = input_text[ss + 1:se] - if not parse_isrequired(match[4]): - warn(f"found optional value in returns structure for {name}", input_str) + expr = parse_code(func_body, imports) - default_str = match[5] - if default_str not in ('null', ''): - warn(f"found non-null 'default value' in returns structure for {name}: {default_str}", input_str) - - output_dict[key] = ReturnInfo(match[1], parse_phpstring(match[2]), match[6]) - - if len(output_dict) == 0: - if re.match(nullensure_pattern, input_str) is None: - warn(f"could not find any returns in non-empty {name}", input_str) - - return output_dict, is_multiple_structure - -def parse_params(input_text: str) -> dict[str, ParamInfo]: - # Regular expression to match the parameters inside the 'new external_value()' function - - # Find all matches of the pattern in the input text - matches: list[list[str]] = re.findall(MOODLESTRUCT_REGEX, input_text) - - if len(matches) == 0: - nullensure_pattern = r".*return new external_function_parameters(\s*\[\]\s*);.*" - if re.match(nullensure_pattern, input_text) is not None: - warn("could not parse params", input_text) - return {} - - result = {} - for match in matches: - param_name = match[0] - result[param_name] = ParamInfo( - match[1], - parse_phpstring(match[2]), - match[4], - match[5], - match[6], - ) + if isinstance(expr, PHPConstant) and expr.name == 'null': + return None + elif not isinstance(expr, PHPConstructor): + warn("non-constructor at top level", expr) + return None - return result + topelement = expr.toIR() + if isinstance(topelement, IRObject) and len(topelement.fields) == 0: + return None + else: + return topelement if __name__ == "__main__": with open("lbplanner/db/services.php", "r") as file: content = file.read() - infos = extract_function_info(content) - complete_info = [] + infos = extract_function_info(content) - for i, info in enumerate(infos): + complete_info = [] - CURRENT_SERVICE = info.name + for i, info in enumerate(infos): - with open(info.path, "r") as func_file: - func_content = func_file.read() - params_func, returns_func = extract_php_functions(func_content, info.path) + CURRENT_SERVICE = info.name - if returns_func is None or params_func is None: - continue + with open(info.path, "r") as func_file: + func_content = func_file.read() + imports = extract_imports(func_content) + params_func, returns_func = extract_php_functions(func_content, info.path) - returns, returns_multiple = parse_returns(returns_func, func_content, info.path) + if returns_func is None or params_func is None: + continue - params = parse_params(params_func) + returns = parse_function(returns_func, imports) - complete_info.append(FunctionInfoEx(info, params, returns, returns_multiple)) + params = parse_function(params_func, imports) - CURRENT_SERVICE = None + complete_info.append(FunctionInfoEx(info, params, returns)) - data = json.dumps(complete_info, default=lambda x: x.__dict__) + CURRENT_SERVICE = None - if sys.argv[1] == "-": - print(data) - else: - declaration = f"const funcs = {data}" - - script: str - with open(f"{sys.argv[1]}/script.js", "r") as f: - script = f.read() - lines = script.splitlines() - for i in range(len(lines)): - if lines[i].startswith('const funcs = '): - lines[i] = declaration - script = "\n".join(lines) - - with open(f"{sys.argv[1]}/script.js", "w") as f: - f.write(script) + # TODO: intermediary step + + data = json.dumps(complete_info, default=lambda x: x.__dict__) + + if sys.argv[1] == "-": + print(data) + else: + declaration = f"const funcs = {data}" + + script: str + with open(f"{sys.argv[1]}/script.js", "r") as f: + script = f.read() + lines = script.splitlines() + for i in range(len(lines)): + if lines[i].startswith('const funcs = '): + lines[i] = declaration + script = "\n".join(lines) + + with open(f"{sys.argv[1]}/script.js", "w") as f: + f.write(script) if HAS_WARNED: sys.exit(1)