Skip to content

Commit b2e328d

Browse files
committed
vendor and update commentjson since it is abandonded
also applied vaidik/commentjson#52 and added type annotations This gets rid of the deprecation we see in pytest: https://github.com/Skyvern-AI/skyvern/actions/runs/15564515234/job/43825105717#step:12:31-36 ``` .venv/lib/python3.11/site-packages/ddtrace/internal/module.py:295 /home/runner/work/skyvern/skyvern/.venv/lib/python3.11/site-packages/ddtrace/internal/module.py:295: DeprecationWarning: module 'sre_parse' is deprecated self.loader.exec_module(module) .venv/lib/python3.11/site-packages/ddtrace/internal/module.py:295 /home/runner/work/skyvern/skyvern/.venv/lib/python3.11/site-packages/ddtrace/internal/module.py:295: DeprecationWarning: module 'sre_constants' is deprecated self.loader.exec_module(module) ```
1 parent c1e19d2 commit b2e328d

File tree

4 files changed

+180
-20
lines changed

4 files changed

+180
-20
lines changed

poetry.lock

Lines changed: 11 additions & 18 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ alembic = "^1.12.1"
3939
python-jose = {extras = ["cryptography"], version = "^3.3.0"}
4040
cachetools = "^5.3.2"
4141
aioboto3 = "^14.3.0"
42-
commentjson = "^0.9.0"
4342
asyncache = "^0.3.1"
4443
orjson = "^3.9.10"
4544
structlog = "^23.2.0"
@@ -74,6 +73,7 @@ google-cloud-aiplatform = "^1.90.0"
7473
alive-progress = "^3.2.0"
7574
colorama = "^0.4.6"
7675
types-boto3 = {extras = ["full"], version = "^1.38.31"}
76+
lark = "^1.2.2"
7777

7878
[tool.poetry.group.dev.dependencies]
7979
isort = "^5.13.2"
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
# vendored from https://github.com/vaidik/commentjson/blob/master/commentjson/commentjson.py since that project seems to be abandoned.
2+
3+
import codecs
4+
import json
5+
import traceback
6+
from typing import Any, TypeVar
7+
8+
import lark
9+
from lark import Lark
10+
from lark.lexer import Token
11+
from lark.reconstruct import Reconstructor
12+
from lark.tree import Tree
13+
14+
parser = Lark(
15+
"""
16+
?start: value
17+
?value: object
18+
| array
19+
| string
20+
| SIGNED_NUMBER -> number
21+
| "true" -> true
22+
| "false" -> false
23+
| "null" -> null
24+
array : "[" [value ("," value)*] TRAILING_COMMA? "]"
25+
object : "{" [pair ("," pair)*] TRAILING_COMMA? "}"
26+
pair : string ":" value
27+
string : ESCAPED_STRING
28+
29+
COMMENT: /(#|\\/\\/)[^\\n]*/
30+
TRAILING_COMMA: ","
31+
32+
%import common.ESCAPED_STRING
33+
%import common.SIGNED_NUMBER
34+
%import common.WS
35+
%ignore WS
36+
%ignore COMMENT
37+
""",
38+
maybe_placeholders=False,
39+
parser="lalr",
40+
)
41+
42+
serializer = Reconstructor(parser)
43+
44+
45+
def detect_encoding(b: bytes) -> str:
46+
"""
47+
Taken from `json` package in CPython 3.7.
48+
49+
Source can be found at https://bit.ly/2OHqCIK.
50+
"""
51+
52+
bstartswith = b.startswith
53+
if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
54+
return "utf-32"
55+
if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
56+
return "utf-16"
57+
if bstartswith(codecs.BOM_UTF8):
58+
return "utf-8-sig"
59+
60+
if len(b) >= 4:
61+
if not b[0]:
62+
# 00 00 -- -- - utf-32-be
63+
# 00 XX -- -- - utf-16-be
64+
return "utf-16-be" if b[1] else "utf-32-be"
65+
if not b[1]:
66+
# XX 00 00 00 - utf-32-le
67+
# XX 00 00 XX - utf-16-le
68+
# XX 00 XX -- - utf-16-le
69+
return "utf-16-le" if b[2] or b[3] else "utf-32-le"
70+
elif len(b) == 2:
71+
if not b[0]:
72+
# 00 XX - utf-16-be
73+
return "utf-16-be"
74+
if not b[1]:
75+
# XX 00 - utf-16-le
76+
return "utf-16-le"
77+
# default
78+
return "utf-8"
79+
80+
81+
class BaseException(Exception):
82+
"""Base exception to be implemented and raised while handling exceptions
83+
raised by libraries used in `commentjson`.
84+
85+
Sets message of self in a way that it clearly calls out that the exception
86+
was raised by another library, along with the entire stacktrace of the
87+
exception raised by the other library.
88+
"""
89+
90+
library: str | None = None
91+
message: str
92+
93+
def __init__(self, exc: Exception) -> None:
94+
if self.library is None:
95+
raise NotImplementedError("Value of library must be set in the inherited exception class.")
96+
97+
tb = traceback.format_exc()
98+
tb = "\n".join(" " * 4 + line_ for line_ in tb.split("\n"))
99+
100+
error = getattr(exc, "msg", None) or getattr(exc, "message", None) or str(exc)
101+
self.message = "\n".join(
102+
[
103+
"JSON Library Exception\n",
104+
("Exception thrown by library ({}): \033[4;37m{}\033[0m\n".format(self.library, error)),
105+
"%s" % tb,
106+
]
107+
)
108+
Exception.__init__(self, self.message)
109+
110+
111+
class ParserException(BaseException):
112+
"""Exception raised when the `lark` raises an exception i.e.
113+
the exception is not caused by `commentjson` and caused by the use of
114+
`lark` in `commentjson`.
115+
"""
116+
117+
library = "lark"
118+
119+
120+
class JSONLibraryException(BaseException):
121+
"""Exception raised when the `json` raises an exception i.e.
122+
the exception is not caused by `commentjson` and caused by the use of
123+
`json` in `commentjson`.
124+
125+
.. note::
126+
127+
As of now, ``commentjson`` supports only standard library's ``json``
128+
module. It might start supporting other widely-used contributed JSON
129+
libraries in the future.
130+
"""
131+
132+
library = "json"
133+
134+
135+
T = TypeVar("T", Tree, Token)
136+
137+
138+
def _remove_trailing_commas(tree: T) -> T:
139+
if isinstance(tree, Tree):
140+
tree.children = [
141+
_remove_trailing_commas(ch)
142+
for ch in tree.children
143+
if not (isinstance(ch, Token) and ch.type == "TRAILING_COMMA")
144+
]
145+
return tree
146+
147+
148+
def loads(text: str | bytes | bytearray, *args: Any, **kwargs: Any) -> Any:
149+
"""Deserialize `text` (a `str` or `unicode` instance containing a JSON
150+
document with Python or JavaScript like comments) to a Python object.
151+
152+
:param text: serialized JSON string with or without comments.
153+
:param kwargs: all the arguments that `json.loads <http://docs.python.org/
154+
2/library/json.html#json.loads>`_ accepts.
155+
:returns: dict or list.
156+
"""
157+
158+
if isinstance(text, (bytes, bytearray)):
159+
text = text.decode(detect_encoding(text), "surrogatepass")
160+
161+
try:
162+
parsed = _remove_trailing_commas(parser.parse(text))
163+
final_text = serializer.reconstruct(parsed)
164+
except lark.exceptions.UnexpectedCharacters:
165+
raise ValueError("Unable to parse text", text)
166+
167+
return json.loads(final_text, *args, **kwargs)

skyvern/forge/sdk/api/llm/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
import re
55
from typing import Any
66

7-
import commentjson
87
import json_repair
98
import litellm
109
import structlog
1110

1211
from skyvern.constants import MAX_IMAGE_MESSAGES
12+
from skyvern.forge.sdk.api.llm import commentjson
1313
from skyvern.forge.sdk.api.llm.exceptions import EmptyLLMResponseError, InvalidLLMResponseFormat
1414

1515
LOG = structlog.get_logger()

0 commit comments

Comments
 (0)