From 9cfada586b17ccd61a8c5b97ff3018c3c704f449 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 11:00:57 +0100 Subject: [PATCH 1/8] Collect f.write(pack( in for_loops into a single f.write(pack, and keep num bytes written --- src/shapefile.py | 96 ++++++++++++++++++++++++++---------------------- 1 file changed, 52 insertions(+), 44 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index b5e6ed4..046443c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -859,7 +859,7 @@ def from_byte_stream(cls, b_io, next_shape, oid=None, bbox=None): # pylint: dis @staticmethod def write_to_byte_stream(b_io, s, i, bbox, mbox, zbox): # pylint: disable=unused-argument - pass + return 0 class _CanHaveBBox(Shape): @@ -892,7 +892,7 @@ def _set_bbox_from_byte_stream(self, b_io): @staticmethod def _write_bbox_to_byte_stream(b_io, i, bbox): try: - b_io.write(pack("<4d", *bbox)) + return b_io.write(pack("<4d", *bbox)) except error: raise ShapefileException( f"Failed to write bounding box for record {i}. Expected floats." @@ -904,7 +904,7 @@ def _get_npoints_from_byte_stream(b_io): @staticmethod def _write_npoints_to_byte_stream(b_io, s): - b_io.write(pack(" mpos and p[mpos] is not None - else NODATA, - ) - ) + if len(p) > mpos and p[mpos] is not None: + ms.append(p[mpos]) + else: + ms.append(NODATA) + + num_bytes_written += b_io.write(pack(f"<{len(ms)} 2 else 0)) + zs = [p[2] if len(p) > 2 else 0 + for p in s.points + ] + + num_bytes_written += b_io.write(pack(f"<{len(zs)}d", *zs)) except error: raise ShapefileException( f"Failed to write elevation values for record {i}. Expected floats." ) + return num_bytes_written + class MultiPatch(_HasM, _HasZ, _CanHaveParts): shapeType = MULTIPATCH @@ -1223,8 +1230,7 @@ def _set_part_types_from_byte_stream(self, b_io, nParts): @staticmethod def _write_part_types_to_byte_stream(b_io, s): - for partType in s.partTypes: - b_io.write(pack("2i", self.shpNum, 0)) start = f.tell() + n = 0 # Shape Type if self.shapeType is None and s.shapeType != NULL: self.shapeType = s.shapeType @@ -2986,10 +2993,10 @@ def __shpRecord(self, s): self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None ) - f.write(pack(" Date: Wed, 30 Jul 2025 11:09:28 +0100 Subject: [PATCH 2/8] Fix typo in struct format str for ms --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 046443c..eb4f5d6 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1167,7 +1167,7 @@ def _write_ms_to_byte_stream(b_io, s, i, mbox): else: ms.append(NODATA) - num_bytes_written += b_io.write(pack(f"<{len(ms)} Date: Wed, 30 Jul 2025 11:21:55 +0100 Subject: [PATCH 3/8] Write to BytesIO buffer, then write that to .shp file once its length is known --- src/shapefile.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index eb4f5d6..18cf0ef 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2969,8 +2969,9 @@ def __shpRecord(self, s): offset = f.tell() # Record number, Content length place holder self.shpNum += 1 - f.write(pack(">2i", self.shpNum, 0)) - start = f.tell() + + # f.write(pack(">2i", self.shpNum, 0)) + # start = f.tell() n = 0 # Shape Type if self.shapeType is None and s.shapeType != NULL: @@ -2992,12 +2993,14 @@ def __shpRecord(self, s): new_zbox = ( self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None ) + + b_io = io.BytesIO() - n += f.write(pack("i", length)) + # f.seek(start - 4) + # f.write(pack(">i", length)) + f.write(pack(">2i", self.shpNum, length)) + b_io.seek(0) + f.write(b_io.read()) - f.seek(finish) + # f.seek(finish) return offset, length From 402e00f64d3e9a5ec6c274990684a912fcf6465e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 11:24:30 +0100 Subject: [PATCH 4/8] Reformat --- src/shapefile.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 18cf0ef..8b9dcec 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1002,6 +1002,7 @@ def write_to_byte_stream(b_io, s, i, bbox, mbox, zbox): return n + class _CanHaveParts(_CanHaveBBox): # The parts attribute is initialised by # the base class Shape's __init__, to parts or []. @@ -1099,6 +1100,7 @@ def write_to_byte_stream(b_io, s, i, bbox, mbox, zbox): # pylint: disable=unuse return n + class Polyline(_CanHaveParts): shapeType = POLYLINE @@ -1166,7 +1168,7 @@ def _write_ms_to_byte_stream(b_io, s, i, mbox): ms.append(p[mpos]) else: ms.append(NODATA) - + num_bytes_written += b_io.write(pack(f"<{len(ms)}d", *ms)) except error: @@ -1209,9 +1211,7 @@ def _write_zs_to_byte_stream(b_io, s, i, zbox): zs = s.z else: # if z values are stored as 3rd dimension - zs = [p[2] if len(p) > 2 else 0 - for p in s.points - ] + zs = [p[2] if len(p) > 2 else 0 for p in s.points] num_bytes_written += b_io.write(pack(f"<{len(zs)}d", *zs)) except error: @@ -2969,7 +2969,7 @@ def __shpRecord(self, s): offset = f.tell() # Record number, Content length place holder self.shpNum += 1 - + # f.write(pack(">2i", self.shpNum, 0)) # start = f.tell() n = 0 @@ -2993,7 +2993,7 @@ def __shpRecord(self, s): new_zbox = ( self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None ) - + b_io = io.BytesIO() n += b_io.write(pack(" Date: Wed, 30 Jul 2025 12:26:48 +0100 Subject: [PATCH 5/8] Avoid file seeks. Write shape to in memory buffer first, in Writer.__shape. --- src/shapefile.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 8b9dcec..c554080 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2967,12 +2967,8 @@ def shape( def __shpRecord(self, s): f = self.__getFileObj(self.shp) offset = f.tell() - # Record number, Content length place holder self.shpNum += 1 - # f.write(pack(">2i", self.shpNum, 0)) - # start = f.tell() - n = 0 # Shape Type if self.shapeType is None and s.shapeType != NULL: self.shapeType = s.shapeType @@ -2994,8 +2990,20 @@ def __shpRecord(self, s): self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None ) + # Create an in-memory binary buffer to avoid + # unnecessary seeks to files on disk + # (other ops are already buffered until .seek + # or .flush is called if not using RawIOBase). + # https://docs.python.org/3/library/io.html#id2 + # https://docs.python.org/3/library/io.html#io.BufferedWriter b_io = io.BytesIO() + # Record number, Content length place holder + b_io.write(pack(">2i", self.shpNum, -1)) + + # Track number of content bytes written. Excluding self.shpNum and length t.b.c. + n = 0 + n += b_io.write(pack("i", length)) - f.write(pack(">2i", self.shpNum, length)) - b_io.seek(0) - f.write(b_io.read()) - # f.seek(finish) + # 4 bytes in is the content length field + b_io.seek(4) + b_io.write(pack(">i", length)) + # Flush to file. + b_io.seek(0) + f.write(b_io.read()) return offset, length def __shxRecord(self, offset, length): From 581edf31d54d6a67be87de1d6e61b91f36060e18 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 12:28:12 +0100 Subject: [PATCH 6/8] Reformat --- src/shapefile.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index c554080..2efaf30 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3016,7 +3016,6 @@ def __shpRecord(self, s): zbox=new_zbox, ) - # Finalize record length as 16-bit words length = n // 2 From da2df6320db11774c8ddb08508dd8dec8b1b246e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 12:38:38 +0100 Subject: [PATCH 7/8] Read recLength_bytes into in memory buffer in Reader.__shape --- src/shapefile.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 2efaf30..b604f31 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2074,17 +2074,21 @@ def __shape( # Convert from num of 16 bit words, to 8 bit bytes recLength_bytes = 2 * recLength - next_shape = f.tell() + recLength_bytes + # next_shape = f.tell() + recLength_bytes - shapeType = unpack(" Date: Wed, 30 Jul 2025 13:45:11 +0100 Subject: [PATCH 8/8] Replace index look up with enumerate, tuple with set & simplify bool expression --- src/shapefile.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index b604f31..d0ec177 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -653,10 +653,8 @@ def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: # the geojson spec does not define a proper null-geometry type # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries return {"type": "Point", "coordinates": ()} - # return {"type": "Point", "coordinates": tuple()} #type: ignore return {"type": "Point", "coordinates": self.points[0]} - # return {"type": "Point", "coordinates": tuple(self.points[0])} # type: ignore if self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: if len(self.points) == 0: @@ -669,7 +667,6 @@ def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: return { "type": "MultiPoint", "coordinates": self.points, - # "coordinates": [tuple(p) for p in self.points], #type: ignore } if self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: @@ -684,7 +681,6 @@ def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: return { "type": "LineString", "coordinates": self.points, - # "coordinates": [tuple(p) for p in self.points], #type: ignore } # multilinestring @@ -695,11 +691,9 @@ def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: ps = part continue - # coordinates.append([tuple(p) for p in self.points[ps:part]]) coordinates.append(list(self.points[ps:part])) ps = part - # coordinates.append([tuple(p) for p in self.points[part:]]) # assert len(self.parts) >1 # so disable pylint rule coordinates.append(list(self.points[part:])) # pylint: disable=undefined-loop-variable return {"type": "MultiLineString", "coordinates": coordinates} @@ -713,16 +707,14 @@ def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: # get all polygon rings rings = [] - for i in range(len(self.parts)): + for i, start in enumerate(self.parts): # get indexes of start and end points of the ring - start = self.parts[i] try: end = self.parts[i + 1] except IndexError: end = len(self.points) # extract the points that make up the ring - # ring = [tuple(p) for p in self.points[start:end]] ring = list(self.points[start:end]) rings.append(ring) @@ -2076,7 +2068,8 @@ def __shape( # next_shape = f.tell() + recLength_bytes - # Read entire record into memory + # Read entire record into memory to avoid having to call + # seek on the file afterwards b_io = io.BytesIO(f.read(recLength_bytes)) b_io.seek(0) @@ -2363,7 +2356,7 @@ def __record( # parse each value record = [] for (__name, typ, __size, deci), value in zip(fieldTuples, recordContents): - if typ in ("N", "F"): + if typ in {"N", "F"}: # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b"\0")[0] value = value.replace(b"*", b"") # QGIS NULL is all '*' chars @@ -2687,9 +2680,7 @@ def close(self): # Flush files for attribute in (self.shp, self.shx, self.dbf): - if hasattr(attribute, "flush") and not ( - hasattr(attribute, "closed") and attribute.closed - ): + if hasattr(attribute, "flush") and not getattr(attribute, "closed", False): try: attribute.flush() except OSError: