From 6454a3f5bd31bc2db7c554ae71d36e7cbac115a3 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 20:08:48 +0100 Subject: [PATCH 01/15] Move creation of shape record to use shapeType read for that shape., not the whole file --- src/shapefile.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 14da4bf..9356ed7 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1638,7 +1638,6 @@ def __shape( # pylint: disable=attribute-defined-outside-init f = self.__getFileObj(self.shp) - record = SHAPE_CLASS_FROM_SHAPETYPE[self.shapeType](oid=oid) # record = Shape(oid=oid) # Previously, we also set __zmin = __zmax = __mmin = __mmax = None nParts: Optional[int] = None @@ -1647,7 +1646,7 @@ def __shape( # Determine the start of the next record next_shape = f.tell() + (2 * recLength) shapeType = unpack(" Date: Mon, 28 Jul 2025 20:09:58 +0100 Subject: [PATCH 02/15] Use the shapeType variable we just read --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 9356ed7..b6e0dea 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1646,7 +1646,7 @@ def __shape( # Determine the start of the next record next_shape = f.tell() + (2 * recLength) shapeType = unpack(" Date: Mon, 28 Jul 2025 20:17:03 +0100 Subject: [PATCH 03/15] Refactor Reader.__shape into __read_shape_from_shp_file --- src/shapefile.py | 237 +++++++++++++++++++++++++---------------------- 1 file changed, 125 insertions(+), 112 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index b6e0dea..6cdcbcb 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -940,6 +940,129 @@ class MultiPointZ(MultiPointM, _HasZ): } +def __read_shape_from_shp_file(f): + """ Constructs a Shape from an open .shp file. Something else + is required to have first read the .shp file's header. + Leaves the shp file's .tell() in the correct position for + a subsequent call to this, to build the next shape. + """ + # record = Shape(oid=oid) + # Previously, we also set __zmin = __zmax = __mmin = __mmax = None + nParts: Optional[int] = None + nPoints: Optional[int] = None + (__recNum, recLength) = unpack(">2i", f.read(8)) + # Determine the start of the next record + next_shape = f.tell() + (2 * recLength) + shapeType = unpack("= 16: + __mmin, __mmax = unpack("<2d", f.read(16)) + # Measure values less than -10e38 are nodata values according to the spec + if next_shape - f.tell() >= nPoints * 8: + record.m = [] + for m in _Array[float]( + "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) + ): + if m > NODATA: + record.m.append(m) + else: + record.m.append(None) + else: + record.m = [None for _ in range(nPoints)] + + # Read a single point + # if shapeType in (1, 11, 21): + if isinstance(record, Point): + x, y = _Array[float]("d", unpack("<2d", f.read(16))) + + record.points = [(x, y)] + if bbox is not None: + # create bounding box for Point by duplicating coordinates + # skip shape if no overlap with bounding box + if not bbox_overlap(bbox, (x, y, x, y)): + f.seek(next_shape) + return None + + # Read a single Z value + # if shapeType == 11: + if isinstance(record, PointZ): + record.z = tuple(unpack("= 8: + (m,) = unpack(" NODATA: + record.m = (m,) + else: + record.m = (None,) + + # pylint: enable=attribute-defined-outside-init + # Seek to the end of this record as defined by the record header because + # the shapefile spec doesn't require the actual content to meet the header + # definition. Probably allowed for lazy feature deletion. + + f.seek(next_shape) + + return record + + + class _Record(list): """ A class to hold a record. Subclasses list to ensure compatibility with @@ -1638,120 +1761,10 @@ def __shape( # pylint: disable=attribute-defined-outside-init f = self.__getFileObj(self.shp) - # record = Shape(oid=oid) - # Previously, we also set __zmin = __zmax = __mmin = __mmax = None - nParts: Optional[int] = None - nPoints: Optional[int] = None - (__recNum, recLength) = unpack(">2i", f.read(8)) - # Determine the start of the next record - next_shape = f.tell() + (2 * recLength) - shapeType = unpack("= 16: - __mmin, __mmax = unpack("<2d", f.read(16)) - # Measure values less than -10e38 are nodata values according to the spec - if next_shape - f.tell() >= nPoints * 8: - record.m = [] - for m in _Array[float]( - "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) - ): - if m > NODATA: - record.m.append(m) - else: - record.m.append(None) - else: - record.m = [None for _ in range(nPoints)] - - # Read a single point - # if shapeType in (1, 11, 21): - if isinstance(record, Point): - x, y = _Array[float]("d", unpack("<2d", f.read(16))) - - record.points = [(x, y)] - if bbox is not None: - # create bounding box for Point by duplicating coordinates - # skip shape if no overlap with bounding box - if not bbox_overlap(bbox, (x, y, x, y)): - f.seek(next_shape) - return None - - # Read a single Z value - # if shapeType == 11: - if isinstance(record, PointZ): - record.z = tuple(unpack("= 8: - (m,) = unpack(" NODATA: - record.m = (m,) - else: - record.m = (None,) - - # pylint: enable=attribute-defined-outside-init - # Seek to the end of this record as defined by the record header because - # the shapefile spec doesn't require the actual content to meet the header - # definition. Probably allowed for lazy feature deletion. - - f.seek(next_shape) - - return record + return shape def __shxHeader(self): """Reads the header information from a .shx file.""" From bb75c01a9c31fffc202ad5643566b01d52861577 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 20:27:06 +0100 Subject: [PATCH 04/15] Pass args of __shape to _read_shape_from_shp_file --- src/shapefile.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 6cdcbcb..ea1da9f 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -940,11 +940,13 @@ class MultiPointZ(MultiPointM, _HasZ): } -def __read_shape_from_shp_file(f): - """ Constructs a Shape from an open .shp file. Something else - is required to have first read the .shp file's header. - Leaves the shp file's .tell() in the correct position for - a subsequent call to this, to build the next shape. +def _read_shape_from_shp_file( + f, oid=None, bbox=None +): # oid: Optional[int] = None, bbox: Optional[BBox] = None): + """Constructs a Shape from an open .shp file. Something else + is required to have first read the .shp file's header. + Leaves the shp file's .tell() in the correct position for + a subsequent call to this, to build the next shape. """ # record = Shape(oid=oid) # Previously, we also set __zmin = __zmax = __mmin = __mmax = None @@ -999,9 +1001,7 @@ def __read_shape_from_shp_file(f): # if shapeType in (13, 15, 18, 31): if isinstance(record, _HasZ): __zmin, __zmax = unpack("<2d", f.read(16)) - record.z = _Array[float]( - "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) - ) + record.z = _Array[float]("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))) # Read m extremes and values # if shapeType in (13, 23, 15, 25, 18, 28, 31): @@ -1062,7 +1062,6 @@ def __read_shape_from_shp_file(f): return record - class _Record(list): """ A class to hold a record. Subclasses list to ensure compatibility with @@ -1762,7 +1761,7 @@ def __shape( # pylint: disable=attribute-defined-outside-init f = self.__getFileObj(self.shp) - shape = __read_shape_from_shp_file(f) + shape = _read_shape_from_shp_file(f, oid, bbox) return shape From 91db4c2ed73c547011043cd517cfb97b1d90eb6f Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 20:34:11 +0100 Subject: [PATCH 05/15] Try to make fewer isinstance calls --- src/shapefile.py | 53 ++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index ea1da9f..89ab02f 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -948,7 +948,7 @@ def _read_shape_from_shp_file( Leaves the shp file's .tell() in the correct position for a subsequent call to this, to build the next shape. """ - # record = Shape(oid=oid) + # shape = Shape(oid=oid) # Previously, we also set __zmin = __zmax = __mmin = __mmax = None nParts: Optional[int] = None nPoints: Optional[int] = None @@ -956,77 +956,78 @@ def _read_shape_from_shp_file( # Determine the start of the next record next_shape = f.tell() + (2 * recLength) shapeType = unpack("= 16: __mmin, __mmax = unpack("<2d", f.read(16)) # Measure values less than -10e38 are nodata values according to the spec if next_shape - f.tell() >= nPoints * 8: - record.m = [] + shape.m = [] for m in _Array[float]( "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) ): if m > NODATA: - record.m.append(m) + shape.m.append(m) else: - record.m.append(None) + shape.m.append(None) else: - record.m = [None for _ in range(nPoints)] + shape.m = [None for _ in range(nPoints)] # Read a single point # if shapeType in (1, 11, 21): - if isinstance(record, Point): + if ShapeClass is Point: x, y = _Array[float]("d", unpack("<2d", f.read(16))) - record.points = [(x, y)] + shape.points = [(x, y)] if bbox is not None: # create bounding box for Point by duplicating coordinates # skip shape if no overlap with bounding box @@ -1036,21 +1037,21 @@ def _read_shape_from_shp_file( # Read a single Z value # if shapeType == 11: - if isinstance(record, PointZ): - record.z = tuple(unpack("= 8: (m,) = unpack(" NODATA: - record.m = (m,) + shape.m = (m,) else: - record.m = (None,) + shape.m = (None,) # pylint: enable=attribute-defined-outside-init # Seek to the end of this record as defined by the record header because From 092d2cd62f5df7b7a891aee2e3fb23e918af3066 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 20:36:07 +0100 Subject: [PATCH 06/15] Correct retvar name --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 89ab02f..6c3086f 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1060,7 +1060,7 @@ def _read_shape_from_shp_file( f.seek(next_shape) - return record + return shape class _Record(list): From aec4f52d6f32fe8c858399f173aff3f1a2b0bcd1 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 20:42:11 +0100 Subject: [PATCH 07/15] Replace isinstance test with if ShapeClass is MultiPatch --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 6c3086f..ff11679 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -989,7 +989,7 @@ def _read_shape_from_shp_file( # Read part types for Multipatch - 31 # if shapeType == 31: - if isinstance(shape, MultiPatch): + if ShapeClass is MultiPatch: shape.partTypes = _Array[int]( "i", unpack(f"<{nParts}i", f.read(nParts * 4)) ) From b3cf1b8ce124d0d0f759b5cccc957cae7012b3e5 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 20:47:24 +0100 Subject: [PATCH 08/15] Refactor into Shape class method --- src/shapefile.py | 251 ++++++++++++++++++++++++----------------------- 1 file changed, 128 insertions(+), 123 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index ff11679..81ac867 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -844,6 +844,134 @@ def shapeTypeName(self) -> str: def __repr__(self): return f"Shape #{self.__oid}: {self.shapeTypeName}" + @classmethod + def _from_shp_file(cls, oid=None, bbox=None): + # For Null shapes create an empty points list for consistency + # if shapeType == 0: + if cls is NullShape: + record.points = [] + # All shape types capable of having a bounding box + # elif shapeType in (3, 13, 23, 5, 15, 25, 8, 18, 28, 31): + elif isinstance(shape, _CanHaveBBox): + # record.bbox = tuple(_Array[float]("d", unpack("<4d", f.read(32)))) + shape.bbox = _Array[float]("d", unpack("<4d", f.read(32))) + # if bbox specified and no overlap, skip this shape + if bbox is not None and not bbox_overlap(bbox, tuple(shape.bbox)): + # because we stop parsing this shape, skip to beginning of + # next shape before we return + f.seek(next_shape) + return None + # Shape types with parts + # if shapeType in (3, 13, 23, 5, 15, 25, 31): + if any(cls is class_ for class_ in (shape, (Polyline, Polygon, MultiPatch))): + nParts = unpack("= 16: + __mmin, __mmax = unpack("<2d", f.read(16)) + # Measure values less than -10e38 are nodata values according to the spec + if next_shape - f.tell() >= nPoints * 8: + shape.m = [] + for m in _Array[float]( + "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) + ): + if m > NODATA: + shape.m.append(m) + else: + shape.m.append(None) + else: + shape.m = [None for _ in range(nPoints)] + + # Read a single point + # if shapeType in (1, 11, 21): + if cls is Point: + x, y = _Array[float]("d", unpack("<2d", f.read(16))) + + shape.points = [(x, y)] + if bbox is not None: + # create bounding box for Point by duplicating coordinates + # skip shape if no overlap with bounding box + if not bbox_overlap(bbox, (x, y, x, y)): + f.seek(next_shape) + return None + + # Read a single Z value + # if shapeType == 11: + if cls is PointZ: + shape.z = tuple(unpack("= 8: + (m,) = unpack(" NODATA: + shape.m = (m,) + else: + shape.m = (None,) + + # pylint: enable=attribute-defined-outside-init + # Seek to the end of this record as defined by the record header because + # the shapefile spec doesn't require the actual content to meet the header + # definition. Probably allowed for lazy feature deletion. + + +def _read_shape_from_shp_file( + f, oid=None, bbox=None +): # oid: Optional[int] = None, bbox: Optional[BBox] = None): + """Constructs a Shape from an open .shp file. Something else + is required to have first read the .shp file's header. + Leaves the shp file's .tell() in the correct position for + a subsequent call to this, to build the next shape. + """ + # shape = Shape(oid=oid) + # Previously, we also set __zmin = __zmax = __mmin = __mmax = None + nParts: Optional[int] = None + nPoints: Optional[int] = None + (__recNum, recLength) = unpack(">2i", f.read(8)) + # Determine the start of the next record + next_shape = f.tell() + (2 * recLength) + shapeType = unpack("2i", f.read(8)) - # Determine the start of the next record - next_shape = f.tell() + (2 * recLength) - shapeType = unpack("= 16: - __mmin, __mmax = unpack("<2d", f.read(16)) - # Measure values less than -10e38 are nodata values according to the spec - if next_shape - f.tell() >= nPoints * 8: - shape.m = [] - for m in _Array[float]( - "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) - ): - if m > NODATA: - shape.m.append(m) - else: - shape.m.append(None) - else: - shape.m = [None for _ in range(nPoints)] - - # Read a single point - # if shapeType in (1, 11, 21): - if ShapeClass is Point: - x, y = _Array[float]("d", unpack("<2d", f.read(16))) - - shape.points = [(x, y)] - if bbox is not None: - # create bounding box for Point by duplicating coordinates - # skip shape if no overlap with bounding box - if not bbox_overlap(bbox, (x, y, x, y)): - f.seek(next_shape) - return None - - # Read a single Z value - # if shapeType == 11: - if ShapeClass is PointZ: - shape.z = tuple(unpack("= 8: - (m,) = unpack(" NODATA: - shape.m = (m,) - else: - shape.m = (None,) - - # pylint: enable=attribute-defined-outside-init - # Seek to the end of this record as defined by the record header because - # the shapefile spec doesn't require the actual content to meet the header - # definition. Probably allowed for lazy feature deletion. - - f.seek(next_shape) - - return shape - - class _Record(list): """ A class to hold a record. Subclasses list to ensure compatibility with From f425fe2080d52be8bf555de287fa6fac5f24c1fe Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 21:13:45 +0100 Subject: [PATCH 09/15] Restore necessary isinstance check --- src/shapefile.py | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 81ac867..5961b2a 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -845,11 +845,17 @@ def __repr__(self): return f"Shape #{self.__oid}: {self.shapeTypeName}" @classmethod - def _from_shp_file(cls, oid=None, bbox=None): + def _from_shp_file(cls, f, recLength, next_shape, oid=None, bbox=None): + # Previously, we also set __zmin = __zmax = __mmin = __mmax = None + nParts: Optional[int] = None + nPoints: Optional[int] = None + + shape = cls(oid=oid) + # For Null shapes create an empty points list for consistency # if shapeType == 0: if cls is NullShape: - record.points = [] + shape.points = [] # All shape types capable of having a bounding box # elif shapeType in (3, 13, 23, 5, 15, 25, 8, 18, 28, 31): elif isinstance(shape, _CanHaveBBox): @@ -863,7 +869,7 @@ def _from_shp_file(cls, oid=None, bbox=None): return None # Shape types with parts # if shapeType in (3, 13, 23, 5, 15, 25, 31): - if any(cls is class_ for class_ in (shape, (Polyline, Polygon, MultiPatch))): + if issubclass(cls, (Polyline, Polygon, MultiPatch)): nParts = unpack("2i", f.read(8)) + (__recNum, recLength) = unpack_2_int32_be(f.read(8)) # Determine the start of the next record next_shape = f.tell() + (2 * recLength) shapeType = unpack(" Date: Mon, 28 Jul 2025 21:18:50 +0100 Subject: [PATCH 10/15] Satisfy Pylint --- pyproject.toml | 3 ++- src/shapefile.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index aa11da4..a8e14c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,6 +114,7 @@ load-plugins=[ # Silence warning: shapefile.py:2076:20: W0212: Access to a protected # member _from_geojson of a client class (protected-access) +# shapefile.py:950:16: W0201: Attribute 'm' defined outside __init__ (attribute-defined-outside-init) # Silence remarks: # src\shapefile.py:338:0: R0914: Too many local variables (21/15) (too-many-locals) # src\shapefile.py:338:0: R0912: Too many branches (24/12) (too-many-branches) @@ -133,6 +134,6 @@ load-plugins=[ # https://github.com/christopherpickering/pylint-per-file-ignores/issues/160 [tool.pylint.'messages control'] per-file-ignores = [ - "/src/shapefile.py:W0212,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0917,R1732", + "/src/shapefile.py:W0212,W0201,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0917,R1732", "test_shapefile.py:W0212,R1732", ] diff --git a/src/shapefile.py b/src/shapefile.py index 5961b2a..10f91f8 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -845,7 +845,7 @@ def __repr__(self): return f"Shape #{self.__oid}: {self.shapeTypeName}" @classmethod - def _from_shp_file(cls, f, recLength, next_shape, oid=None, bbox=None): + def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): # Previously, we also set __zmin = __zmax = __mmin = __mmax = None nParts: Optional[int] = None nPoints: Optional[int] = None @@ -971,7 +971,7 @@ def _read_shape_from_shp_file( next_shape = f.tell() + (2 * recLength) shapeType = unpack(" Date: Mon, 28 Jul 2025 21:48:32 +0100 Subject: [PATCH 11/15] Only call f.seek(next_shape) in _read_shape_from_shp_file --- src/shapefile.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 10f91f8..02d3519 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -865,7 +865,6 @@ def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): if bbox is not None and not bbox_overlap(bbox, tuple(shape.bbox)): # because we stop parsing this shape, skip to beginning of # next shape before we return - f.seek(next_shape) return None # Shape types with parts # if shapeType in (3, 13, 23, 5, 15, 25, 31): @@ -928,7 +927,6 @@ def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): # create bounding box for Point by duplicating coordinates # skip shape if no overlap with bounding box if not bbox_overlap(bbox, (x, y, x, y)): - f.seek(next_shape) return None # Read a single Z value From 2073cb151b97415c7ef1181293149ee4090834d4 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 21:51:08 +0100 Subject: [PATCH 12/15] Don't need to set NullShape().points = [], it already is. --- src/shapefile.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 02d3519..02e5250 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -852,10 +852,6 @@ def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): shape = cls(oid=oid) - # For Null shapes create an empty points list for consistency - # if shapeType == 0: - if cls is NullShape: - shape.points = [] # All shape types capable of having a bounding box # elif shapeType in (3, 13, 23, 5, 15, 25, 8, 18, 28, 31): elif isinstance(shape, _CanHaveBBox): From 9201e480f52ca4a6e78a3f19d53559ed940828cf Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 21:52:31 +0100 Subject: [PATCH 13/15] Replace elif with if --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 02e5250..64c8fe5 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -854,7 +854,7 @@ def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): # All shape types capable of having a bounding box # elif shapeType in (3, 13, 23, 5, 15, 25, 8, 18, 28, 31): - elif isinstance(shape, _CanHaveBBox): + if isinstance(shape, _CanHaveBBox): # record.bbox = tuple(_Array[float]("d", unpack("<4d", f.read(32)))) shape.bbox = _Array[float]("d", unpack("<4d", f.read(32))) # if bbox specified and no overlap, skip this shape From b7302b64a36bbb95d8f9b55b509b9799cf055ab7 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 22:47:25 +0100 Subject: [PATCH 14/15] Polymorphic! (Errors) --- src/shapefile.py | 234 +++++++++++++++++++++++++++-------------------- 1 file changed, 134 insertions(+), 100 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 64c8fe5..d56bcb0 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -844,111 +844,87 @@ def shapeTypeName(self) -> str: def __repr__(self): return f"Shape #{self.__oid}: {self.shapeTypeName}" + # pylint: disable=unused-argument + def _get_and_set_bbox_from_shp_file(self, f): + return None + + @staticmethod + def _get_nparts_from_shp_file(f): + return None + + @staticmethod + def _get_npoints_from_shp_file(f): + return None + + def _set_parts_from_shp_file(self, f, nParts): + pass + + def _set_part_types_from_shp_file(self, f, nParts): + pass + + def _set_points_from_shp_file(self, f, nPoints): + pass + + def _set_z_from_shp_file(self, f, nPoints): + pass + + def _set_m_from_shp_file(self, f, nPoints, next_shape): + pass + + def _get_and_set_2D_point_from_shp_file(self, f): + return None + + def _set_single_point_z_from_shp_file(self, f): + pass + + def _set_single_point_m_from_shp_file(self, f, next_shape): + pass + + # pylint: enable=unused-argument + @classmethod def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): - # Previously, we also set __zmin = __zmax = __mmin = __mmax = None - nParts: Optional[int] = None - nPoints: Optional[int] = None - shape = cls(oid=oid) - # All shape types capable of having a bounding box - # elif shapeType in (3, 13, 23, 5, 15, 25, 8, 18, 28, 31): - if isinstance(shape, _CanHaveBBox): - # record.bbox = tuple(_Array[float]("d", unpack("<4d", f.read(32)))) - shape.bbox = _Array[float]("d", unpack("<4d", f.read(32))) - # if bbox specified and no overlap, skip this shape - if bbox is not None and not bbox_overlap(bbox, tuple(shape.bbox)): - # because we stop parsing this shape, skip to beginning of - # next shape before we return - return None - # Shape types with parts - # if shapeType in (3, 13, 23, 5, 15, 25, 31): - if issubclass(cls, (Polyline, Polygon, MultiPatch)): - nParts = unpack("= 16: - __mmin, __mmax = unpack("<2d", f.read(16)) - # Measure values less than -10e38 are nodata values according to the spec - if next_shape - f.tell() >= nPoints * 8: - shape.m = [] - for m in _Array[float]( - "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) - ): - if m > NODATA: - shape.m.append(m) - else: - shape.m.append(None) - else: - shape.m = [None for _ in range(nPoints)] + shape._set_z_from_shp_file(f, nPoints) + + shape._set_m_from_shp_file(f, nPoints, next_shape) # Read a single point # if shapeType in (1, 11, 21): - if cls is Point: - x, y = _Array[float]("d", unpack("<2d", f.read(16))) - - shape.points = [(x, y)] - if bbox is not None: - # create bounding box for Point by duplicating coordinates - # skip shape if no overlap with bounding box - if not bbox_overlap(bbox, (x, y, x, y)): - return None - - # Read a single Z value - # if shapeType == 11: - if cls is PointZ: - shape.z = tuple(unpack("= 8: - (m,) = unpack(" NODATA: - shape.m = (m,) - else: - shape.m = (None,) + point_2D = shape._get_and_set_2D_point_from_shp_file(f) # pylint: disable=assignment-from-none - return shape + if bbox is not None and point_2D is not None: + x, y = point_2D # pylint: disable=unpacking-non-sequence + # create bounding box for Point by duplicating coordinates + # skip shape if no overlap with bounding box + if not bbox_overlap(bbox, (x, y, x, y)): + return None - # pylint: enable=attribute-defined-outside-init - # Seek to the end of this record as defined by the record header because - # the shapefile spec doesn't require the actual content to meet the header - # definition. Probably allowed for lazy feature deletion. + shape._set_single_point_z_from_shp_file(f) + + shape._set_single_point_m_from_shp_file(f, next_shape) + + return shape def _read_shape_from_shp_file( @@ -967,6 +943,9 @@ def _read_shape_from_shp_file( ShapeClass = SHAPE_CLASS_FROM_SHAPETYPE[shapeType] shape = ShapeClass._from_shp_file(f, next_shape, oid=oid, bbox=bbox) + # Seek to the end of this record as defined by the record header because + # the shapefile spec doesn't require the actual content to meet the header + # definition. Probably allowed for lazy feature deletion. f.seek(next_shape) return shape @@ -983,16 +962,43 @@ class _CanHaveBBox(Shape): # Not a BBox because the legacy implementation was a list, not a 4-tuple. bbox: Optional[Sequence[float]] = None + def _get_and_set_bbox_from_shp_file(self, f): + # record.bbox = tuple(_Array[float]("d", unpack("<4d", f.read(32)))) + self.bbox = _Array[float]("d", unpack("<4d", f.read(32))) + return self.bbox + + @staticmethod + def _get_npoints_from_shp_file(f): + return unpack("= 16: + __mmin, __mmax = unpack("<2d", f.read(16)) + # Measure values less than -10e38 are nodata values according to the spec + if next_shape - f.tell() >= nPoints * 8: + self.m = [] + for m in _Array[float]("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))): + if m > NODATA: + self.m.append(m) + else: + self.m.append(None) + else: + self.m = [None for _ in range(nPoints)] + class _HasZ(Shape): z: Sequence[float] + def _set_z_from_shp_file(self, f, nPoints): + __zmin, __zmax = unpack("<2d", f.read(16)) # pylint: disable=unused-private-member + self.z = _Array[float]("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))) + -class MultiPatch(_HasM, _HasZ, _CanHaveBBox): +class MultiPatch(_HasM, _HasZ, _CanHaveParts): shapeType = MULTIPATCH + def _set_part_types_from_shp_file(self, f, nParts): + self.partTypes = _Array[int]("i", unpack(f"<{nParts}i", f.read(nParts * 4))) + class PointM(Point, _HasM): shapeType = POINTM @@ -1018,6 +1045,17 @@ class PointM(Point, _HasM): # PyShp encodes None m values as NODATA m = (None,) + def _set_single_point_m_from_shp_file(self, f, next_shape): + if next_shape - f.tell() >= 8: + (m,) = unpack(" NODATA: + self.m = (m,) + else: + self.m = (None,) + class PolylineM(Polyline, _HasM): shapeType = POLYLINEM @@ -1036,6 +1074,9 @@ class PointZ(PointM, _HasZ): # same default as in Writer.__shpRecord (if s.shapeType == 11:) z: Sequence[float] = (0.0,) + def _set_single_point_z_from_shp_file(self, f): + self.z = tuple(unpack(" Optional[Shape]: """Returns the header info and geometry for a single shape.""" - # pylint: disable=attribute-defined-outside-init f = self.__getFileObj(self.shp) shape = _read_shape_from_shp_file(f, oid, bbox) @@ -1901,7 +1938,6 @@ def iterShapes(self, bbox: Optional[BBox] = None) -> Iterator[Optional[Shape]]: def __dbfHeader(self): """Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger""" - # pylint: disable=attribute-defined-outside-init if not self.dbf: raise ShapefileException( "Shapefile Reader requires a shapefile or file-like object. (no dbf file found)" @@ -1948,8 +1984,6 @@ def __dbfHeader(self): self.__fullRecStruct = recStruct self.__fullRecLookup = recLookup - # pylint: enable=attribute-defined-outside-init - def __recordFmt(self, fields: Optional[Container[str]] = None) -> tuple[str, int]: """Calculates the format and size of a .dbf record. Optional 'fields' arg specifies which fieldnames to unpack and which to ignore. Note that this From 408c7d17bfe7e1789e25e1adaa47d44b7def6d53 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 22:59:25 +0100 Subject: [PATCH 15/15] Don't overwrite user specified bbox with the one read from shp file --- src/shapefile.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index d56bcb0..321b215 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -845,8 +845,8 @@ def __repr__(self): return f"Shape #{self.__oid}: {self.shapeTypeName}" # pylint: disable=unused-argument - def _get_and_set_bbox_from_shp_file(self, f): - return None + def _set_bbox_from_shp_file(self, f): + pass @staticmethod def _get_nparts_from_shp_file(f): @@ -886,7 +886,7 @@ def _set_single_point_m_from_shp_file(self, f, next_shape): def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): shape = cls(oid=oid) - bbox = shape._get_and_set_bbox_from_shp_file(f) # pylint: disable=assignment-from-none + shape._set_bbox_from_shp_file(f) # pylint: disable=assignment-from-none # if bbox specified and no overlap, skip this shape if bbox is not None and not bbox_overlap(bbox, tuple(shape.bbox)): # pylint: disable=no-member @@ -962,10 +962,9 @@ class _CanHaveBBox(Shape): # Not a BBox because the legacy implementation was a list, not a 4-tuple. bbox: Optional[Sequence[float]] = None - def _get_and_set_bbox_from_shp_file(self, f): + def _set_bbox_from_shp_file(self, f): # record.bbox = tuple(_Array[float]("d", unpack("<4d", f.read(32)))) self.bbox = _Array[float]("d", unpack("<4d", f.read(32))) - return self.bbox @staticmethod def _get_npoints_from_shp_file(f):