Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions src/encoder/records/text.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,32 @@
import base64
import datetime
import re
import struct
from html.entities import codepoint2name
from typing import Self
from xml.sax.saxutils import escape as xml_escape

from .constants import DICTIONARY
from .datatypes import Decimal, MultiByteInt31
from .record import record


# Regex to match invalid XML 1.0 characters (control chars except tab, newline, carriage return)
_INVALID_XML_CHARS = re.compile(r'[\x00-\x08\x0b\x0c\x0e-\x1f]')


def _sanitize_xml_text(value: str) -> str:
"""Sanitize a string for safe inclusion in XML.

1. Removes invalid XML 1.0 control characters
2. Escapes XML special characters (<, >, &, ", ')
"""
# Remove invalid control characters
clean = _INVALID_XML_CHARS.sub('', value)
# Escape XML special characters
return xml_escape(clean, entities={'"': '&quot;', "'": '&apos;'})


class Text(record): ...


Expand Down Expand Up @@ -150,7 +168,7 @@ def to_bytes(self) -> bytes:
return bytes

def __str__(self):
return self.value
return _sanitize_xml_text(self.value)

@classmethod
def parse(cls, fp) -> Self:
Expand All @@ -170,7 +188,7 @@ def to_bytes(self) -> bytes:
return bytes

def __str__(self):
return self.value
return _sanitize_xml_text(self.value)

@classmethod
def parse(cls, fp) -> Self:
Expand All @@ -190,7 +208,7 @@ def to_bytes(self) -> bytes:
return bytes

def __str__(self):
return self.value
return _sanitize_xml_text(self.value)

@classmethod
def parse(cls, fp) -> Self:
Expand Down Expand Up @@ -332,11 +350,7 @@ def __init__(self, value: str):
self.value = value

def __str__(self):
# TODO: check if having unexcaped value is a problem?
# removed the return excape(self.value) because str() was used
# in the print_records function, so it printed stuff like
# amp(value) and stuff.
return self.value
return _sanitize_xml_text(self.value)

def to_bytes(self) -> bytes:
data = self.value.encode("utf-8")
Expand All @@ -360,7 +374,7 @@ def __init__(self, value: str):
self.value = value

def __str__(self):
return self.value
return _sanitize_xml_text(self.value)

def to_bytes(self) -> bytes:
data = self.value.encode("utf-8")
Expand All @@ -384,7 +398,7 @@ def __init__(self, value: str):
self.value = value

def __str__(self):
return self.value
return _sanitize_xml_text(self.value)

def to_bytes(self) -> bytes:
data = self.value.encode("utf-8")
Expand Down