From ce8b059bca6f078107ed1b52ab741dd61705e5a7 Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Sun, 6 Jul 2025 15:09:31 +0000 Subject: [PATCH 001/220] Update to python 3.9. - Some hand editing - `pyupgrade --py39-plus` --- pyproject.toml | 3 +- setup.cfg | 3 +- shapefile.py | 243 ++++++++++++++++------------------------------ test_shapefile.py | 6 +- 4 files changed, 89 insertions(+), 166 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index df8e737f..fed78f76 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ line-length = 88 indent-width = 4 # Assume Python 3.9 -target-version = "py37" +target-version = "py39" [tool.ruff.lint] # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. @@ -67,7 +67,6 @@ skip-magic-trailing-comma = false line-ending = "auto" - [tool.pylint.MASTER] load-plugins=[ "pylint_per_file_ignores", diff --git a/setup.cfg b/setup.cfg index 906abd3a..d13d43bb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,7 +16,6 @@ keywords = gis, geospatial, geographic, shapefile, shapefiles classifiers = Development Status :: 5 - Production/Stable Programming Language :: Python - Programming Language :: Python :: 2.7 Programming Language :: Python :: 3 Topic :: Scientific/Engineering :: GIS Topic :: Software Development :: Libraries @@ -24,7 +23,7 @@ classifiers = [options] py_modules = shapefile -python_requires = >=2.7 +python_requires = >=3.9 [bdist_wheel] universal=1 diff --git a/shapefile.py b/shapefile.py index 211fd48f..fdd49cd7 100644 --- a/shapefile.py +++ b/shapefile.py @@ -3,21 +3,25 @@ Provides read and write support for ESRI Shapefiles. authors: jlawheadgeospatialpython.com maintainer: karim.bahgat.norwaygmail.com -Compatible with Python versions 2.7-3.x +Compatible with Python versions >=3.9 """ __version__ = "2.4.0" import array +from datetime import date import io import logging import os import sys import tempfile import time -import zipfile -from datetime import date from struct import Struct, calcsize, error, pack, unpack +import zipfile + +from urllib.error import HTTPError +from urllib.parse import urlparse, urlunparse +from urllib.request import Request, urlopen # Create named logger logger = logging.getLogger(__name__) @@ -79,118 +83,48 @@ 5: "RING", } - -# Python 2-3 handling - -PYTHON3 = sys.version_info[0] == 3 - -if PYTHON3: - xrange = range - izip = zip - - from urllib.error import HTTPError - from urllib.parse import urlparse, urlunparse - from urllib.request import Request, urlopen - -else: - from itertools import izip - - from urllib2 import HTTPError, Request, urlopen - from urlparse import urlparse, urlunparse - - # Helpers MISSING = [None, ""] NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. -if PYTHON3: - - def b(v, encoding="utf-8", encodingErrors="strict"): - if isinstance(v, str): - # For python 3 encode str to bytes. - return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): - # Already bytes. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return b"" - else: - # Force string representation. - return str(v).encode(encoding, encodingErrors) - - def u(v, encoding="utf-8", encodingErrors="strict"): - if isinstance(v, bytes): - # For python 3 decode bytes to str. - return v.decode(encoding, encodingErrors) - elif isinstance(v, str): - # Already str. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) - - def is_string(v): - return isinstance(v, str) - -else: - - def b(v, encoding="utf-8", encodingErrors="strict"): - if isinstance(v, unicode): - # For python 2 encode unicode to bytes. - return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): - # Already bytes. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return unicode(v).encode(encoding, encodingErrors) - - def u(v, encoding="utf-8", encodingErrors="strict"): - if isinstance(v, bytes): - # For python 2 decode bytes to unicode. - return v.decode(encoding, encodingErrors) - elif isinstance(v, unicode): - # Already unicode. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) - - def is_string(v): - return isinstance(v, basestring) - -if sys.version_info[0:2] >= (3, 6): +def b(v, encoding="utf-8", encodingErrors="strict"): + if isinstance(v, str): + # For python 3 encode str to bytes. + return v.encode(encoding, encodingErrors) + elif isinstance(v, bytes): + # Already bytes. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return b"" + else: + # Force string representation. + return str(v).encode(encoding, encodingErrors) + +def u(v, encoding="utf-8", encodingErrors="strict"): + if isinstance(v, bytes): + # For python 3 decode bytes to str. + return v.decode(encoding, encodingErrors) + elif isinstance(v, str): + # Already str. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return "" + else: + # Force string representation. + return bytes(v).decode(encoding, encodingErrors) - def pathlike_obj(path): - if isinstance(path, os.PathLike): - return os.fsdecode(path) - else: - return path -else: - - def pathlike_obj(path): - if is_string(path): - return path - elif hasattr(path, "__fspath__"): - return path.__fspath__() - else: - try: - return str(path) - except: - return path +def is_string(v): + return isinstance(v, str) +def pathlike_obj(path): + if isinstance(path, os.PathLike): + return os.fsdecode(path) + else: + return path # Begin @@ -311,8 +245,7 @@ def ring_sample(coords, ccw=False): def itercoords(): # iterate full closed ring - for p in coords: - yield p + yield from coords # finally, yield the second coordinate to the end to allow checking the last triplet yield coords[1] @@ -350,7 +283,7 @@ def itercoords(): def ring_contains_ring(coords1, coords2): """Returns True if all vertexes in coords2 are fully inside coords1.""" - return all((ring_contains_point(coords1, p2) for p2 in coords2)) + return all(ring_contains_point(coords1, p2) for p2 in coords2) def organize_polygon_rings(rings, return_errors=None): @@ -398,7 +331,7 @@ def organize_polygon_rings(rings, return_errors=None): return polys # first determine each hole's candidate exteriors based on simple bbox contains test - hole_exteriors = dict([(hole_i, []) for hole_i in xrange(len(holes))]) + hole_exteriors = {hole_i: [] for hole_i in range(len(holes))} exterior_bboxes = [ring_bbox(ring) for ring in exteriors] for hole_i in hole_exteriors.keys(): hole_bbox = ring_bbox(holes[hole_i]) @@ -478,7 +411,7 @@ def organize_polygon_rings(rings, return_errors=None): return polys -class Shape(object): +class Shape: def __init__( self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None ): @@ -566,7 +499,7 @@ def __geo_interface__(self): else: # get all polygon rings rings = [] - for i in xrange(len(self.parts)): + for i in range(len(self.parts)): # get indexes of start and end points of the ring start = self.parts[i] try: @@ -712,7 +645,7 @@ def shapeTypeName(self): return SHAPETYPE_LOOKUP[self.shapeType] def __repr__(self): - return "Shape #{}: {}".format(self.__oid, self.shapeTypeName) + return f"Shape #{self.__oid}: {self.shapeTypeName}" class _Record(list): @@ -763,10 +696,10 @@ def __getattr__(self, item): index = self.__field_positions[item] return list.__getitem__(self, index) except KeyError: - raise AttributeError("{} is not a field name".format(item)) + raise AttributeError(f"{item} is not a field name") except IndexError: raise IndexError( - "{} found as a field but not enough values available.".format(item) + f"{item} found as a field but not enough values available." ) def __setattr__(self, key, value): @@ -783,7 +716,7 @@ def __setattr__(self, key, value): index = self.__field_positions[key] return list.__setitem__(self, index, value) except KeyError: - raise AttributeError("{} is not a field name".format(key)) + raise AttributeError(f"{key} is not a field name") def __getitem__(self, item): """ @@ -804,7 +737,7 @@ def __getitem__(self, item): if index is not None: return list.__getitem__(self, index) else: - raise IndexError('"{}" is not a field name and not an int'.format(item)) + raise IndexError(f'"{item}" is not a field name and not an int') def __setitem__(self, key, value): """ @@ -822,7 +755,7 @@ def __setitem__(self, key, value): if index is not None: return list.__setitem__(self, index, value) else: - raise IndexError("{} is not a field name and not an int".format(key)) + raise IndexError(f"{key} is not a field name and not an int") @property def oid(self): @@ -834,15 +767,15 @@ def as_dict(self, date_strings=False): Returns this Record as a dictionary using the field names as keys :return: dict """ - dct = dict((f, self[i]) for f, i in self.__field_positions.items()) + dct = {f: self[i] for f, i in self.__field_positions.items()} if date_strings: for k, v in dct.items(): if isinstance(v, date): - dct[k] = "{:04d}{:02d}{:02d}".format(v.year, v.month, v.day) + dct[k] = f"{v.year:04d}{v.month:02d}{v.day:02d}" return dct def __repr__(self): - return "Record #{}: {}".format(self.__oid, list(self)) + return f"Record #{self.__oid}: {list(self)}" def __dir__(self): """ @@ -866,7 +799,7 @@ def __eq__(self, other): return list.__eq__(self, other) -class ShapeRecord(object): +class ShapeRecord: """A ShapeRecord object containing a shape along with its attributes. Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" @@ -892,7 +825,7 @@ class Shapes(list): to return a GeometryCollection dictionary.""" def __repr__(self): - return "Shapes: {}".format(list(self)) + return f"Shapes: {list(self)}" @property def __geo_interface__(self): @@ -912,7 +845,7 @@ class ShapeRecords(list): to return a FeatureCollection dictionary.""" def __repr__(self): - return "ShapeRecords: {}".format(list(self)) + return f"ShapeRecords: {list(self)}" @property def __geo_interface__(self): @@ -929,7 +862,7 @@ class ShapefileException(Exception): pass -class Reader(object): +class Reader: """Reads the three files of a shapefile as a unit or separately. If one of the three files (.shp, .shx, .dbf) is missing no exception is thrown until you try @@ -1157,7 +1090,7 @@ def __str__(self): ) if self.dbf: info.append( - " {} records ({} fields)".format(len(self), len(self.fields)) + f" {len(self)} records ({len(self.fields)} fields)" ) return "\n".join(info) @@ -1224,8 +1157,7 @@ def __len__(self): def __iter__(self): """Iterates through the shapes/records in the shapefile.""" - for shaperec in self.iterShapeRecords(): - yield shaperec + yield from self.iterShapeRecords() @property def __geo_interface__(self): @@ -1250,7 +1182,7 @@ def load(self, shapefile=None): self.load_dbf(shapeName) if not (self.shp or self.dbf): raise ShapefileException( - "Unable to open %s.dbf or %s.shp." % (shapeName, shapeName) + f"Unable to open {shapeName}.dbf or {shapeName}.shp." ) if self.shp: self.__shpHeader() @@ -1265,13 +1197,13 @@ def load_shp(self, shapefile_name): """ shp_ext = "shp" try: - self.shp = open("%s.%s" % (shapefile_name, shp_ext), "rb") + self.shp = open(f"{shapefile_name}.{shp_ext}", "rb") self._files_to_close.append(self.shp) - except IOError: + except OSError: try: - self.shp = open("%s.%s" % (shapefile_name, shp_ext.upper()), "rb") + self.shp = open(f"{shapefile_name}.{shp_ext.upper()}", "rb") self._files_to_close.append(self.shp) - except IOError: + except OSError: pass def load_shx(self, shapefile_name): @@ -1280,13 +1212,13 @@ def load_shx(self, shapefile_name): """ shx_ext = "shx" try: - self.shx = open("%s.%s" % (shapefile_name, shx_ext), "rb") + self.shx = open(f"{shapefile_name}.{shx_ext}", "rb") self._files_to_close.append(self.shx) - except IOError: + except OSError: try: - self.shx = open("%s.%s" % (shapefile_name, shx_ext.upper()), "rb") + self.shx = open(f"{shapefile_name}.{shx_ext.upper()}", "rb") self._files_to_close.append(self.shx) - except IOError: + except OSError: pass def load_dbf(self, shapefile_name): @@ -1295,13 +1227,13 @@ def load_dbf(self, shapefile_name): """ dbf_ext = "dbf" try: - self.dbf = open("%s.%s" % (shapefile_name, dbf_ext), "rb") + self.dbf = open(f"{shapefile_name}.{dbf_ext}", "rb") self._files_to_close.append(self.dbf) - except IOError: + except OSError: try: - self.dbf = open("%s.%s" % (shapefile_name, dbf_ext.upper()), "rb") + self.dbf = open(f"{shapefile_name}.{dbf_ext.upper()}", "rb") self._files_to_close.append(self.dbf) - except IOError: + except OSError: pass def __del__(self): @@ -1313,7 +1245,7 @@ def close(self): if hasattr(attribute, "close"): try: attribute.close() - except IOError: + except OSError: pass self._files_to_close = [] @@ -1337,7 +1269,7 @@ def __restrictIndex(self, i): rmax = self.numRecords - 1 if abs(i) > rmax: raise IndexError( - "Shape or Record index: %s out of range. Max index: %s" % (i, rmax) + f"Shape or Record index: {i} out of range. Max index: {rmax}" ) if i < 0: i = range(self.numRecords)[i] @@ -1561,7 +1493,7 @@ def iterShapes(self, bbox=None): if self.numShapes: # Iterate exactly the number of shapes from shx header - for i in xrange(self.numShapes): + for i in range(self.numShapes): # MAYBE: check if more left of file or exit early? shape = self.__shape(oid=i, bbox=bbox) if shape: @@ -1624,7 +1556,7 @@ def __dbfHeader(self): # store all field positions for easy lookups # note: fieldLookup gives the index position of a field inside Reader.fields - self.__fieldLookup = dict((f[0], i) for i, f in enumerate(self.fields)) + self.__fieldLookup = {f[0]: i for i, f in enumerate(self.fields)} # by default, read all fields except the deletion flag, hence "[1:]" # note: recLookup gives the index position of a field inside a _Record list @@ -1676,7 +1608,7 @@ def __recordFields(self, fields=None): # make sure the given fieldnames exist for name in fields: if name not in self.__fieldLookup or name == "DeletionFlag": - raise ValueError('"{}" is not a valid field name'.format(name)) + raise ValueError(f'"{name}" is not a valid field name') # fetch relevant field info tuples fieldTuples = [] for fieldinfo in self.fields[1:]: @@ -1684,7 +1616,7 @@ def __recordFields(self, fields=None): if name in fields: fieldTuples.append(fieldinfo) # store the field positions - recLookup = dict((f[0], i) for i, f in enumerate(fieldTuples)) + recLookup = {f[0]: i for i, f in enumerate(fieldTuples)} else: # use all the dbf fields fieldTuples = self.fields[1:] # sans deletion flag @@ -1850,7 +1782,7 @@ def iterRecords(self, fields=None, start=0, stop=None): recSize = self.__recordLength f.seek(self.__dbfHdrLength + (start * recSize)) fieldTuples, recLookup, recStruct = self.__recordFields(fields) - for i in xrange(start, stop): + for i in range(start, stop): r = self.__record( oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct ) @@ -1891,7 +1823,7 @@ def iterShapeRecords(self, fields=None, bbox=None): """ if bbox is None: # iterate through all shapes and records - for shape, record in izip( + for shape, record in zip( self.iterShapes(), self.iterRecords(fields=fields) ): yield ShapeRecord(shape=shape, record=record) @@ -1908,7 +1840,7 @@ def iterShapeRecords(self, fields=None, bbox=None): yield ShapeRecord(shape=shape, record=record) -class Writer(object): +class Writer: """Provides write support for ESRI Shapefiles.""" def __init__(self, target=None, shapeType=None, autoBalance=False, **kwargs): @@ -2015,7 +1947,7 @@ def close(self): ): try: attribute.flush() - except IOError: + except OSError: pass # Close any files that the writer opened (but not those given by user) @@ -2023,7 +1955,7 @@ def close(self): if hasattr(attribute, "close"): try: attribute.close() - except IOError: + except OSError: pass self._files_to_close = [] @@ -2494,7 +2426,7 @@ def record(self, *recordList, **recordDict): if self.autoBalance and self.recNum > self.shpNum: self.balance() - fieldCount = sum((1 for field in self.fields if field[0] != "DeletionFlag")) + fieldCount = sum(1 for field in self.fields if field[0] != "DeletionFlag") if recordList: record = list(recordList) while len(record) < fieldCount: @@ -2909,9 +2841,6 @@ def _test(args=sys.argv[1:], verbosity=0): class Py23DocChecker(doctest.OutputChecker): def check_output(self, want, got, optionflags): - if sys.version_info[0] == 2: - got = re.sub("u'(.*?)'", "'\\1'", got) - got = re.sub('u"(.*?)"', '"\\1"', got) res = doctest.OutputChecker.check_output(self, want, got, optionflags) return res diff --git a/test_shapefile.py b/test_shapefile.py index 1b7182f9..b55c1f7b 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -6,11 +6,7 @@ import json import os.path -try: - from pathlib import Path -except ImportError: - # pathlib2 is a dependency of pytest >= 3.7 - from pathlib2 import Path +from pathlib import Path # third party imports import pytest From 3e3462089712f8d4f49415ac5baee0eb360cf0d3 Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Sun, 6 Jul 2025 15:12:52 +0000 Subject: [PATCH 002/220] =?UTF-8?q?izip=20=E2=86=92=20zip?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shapefile.py b/shapefile.py index fdd49cd7..57f3630c 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1338,7 +1338,7 @@ def __shape(self, oid=None, bbox=None): # Read points - produces a list of [x,y] values if nPoints: flat = unpack("<%sd" % (2 * nPoints), f.read(16 * nPoints)) - record.points = list(izip(*(iter(flat),) * 2)) + record.points = list(zip(*(iter(flat),) * 2)) # Read z extremes and values if shapeType in (13, 15, 18, 31): (zmin, zmax) = unpack("<2d", f.read(16)) From 61505d121d2f7a0aee2e9c7ca7c87b3438d32faf Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Sun, 6 Jul 2025 16:05:39 +0000 Subject: [PATCH 003/220] pyproject.toml: Set target-version to py39 to match comment. --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fed78f76..697b6e67 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,6 @@ requires = ["setuptools"] build-backend = "setuptools.build_meta" - [tool.ruff] # Exclude a variety of commonly ignored directories. exclude = [ From 2c9aeaa5f1bbbc419d5dc0282bcee75951adc577 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 15:47:48 +0100 Subject: [PATCH 004/220] Run ruff format and pre-commit hooks --- shapefile.py | 13 +++++++------ test_shapefile.py | 1 - 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/shapefile.py b/shapefile.py index 57f3630c..9e62edca 100644 --- a/shapefile.py +++ b/shapefile.py @@ -9,16 +9,15 @@ __version__ = "2.4.0" import array -from datetime import date import io import logging import os import sys import tempfile import time -from struct import Struct, calcsize, error, pack, unpack import zipfile - +from datetime import date +from struct import Struct, calcsize, error, pack, unpack from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen @@ -103,6 +102,7 @@ def b(v, encoding="utf-8", encodingErrors="strict"): # Force string representation. return str(v).encode(encoding, encodingErrors) + def u(v, encoding="utf-8", encodingErrors="strict"): if isinstance(v, bytes): # For python 3 decode bytes to str. @@ -117,15 +117,18 @@ def u(v, encoding="utf-8", encodingErrors="strict"): # Force string representation. return bytes(v).decode(encoding, encodingErrors) + def is_string(v): return isinstance(v, str) + def pathlike_obj(path): if isinstance(path, os.PathLike): return os.fsdecode(path) else: return path + # Begin @@ -1089,9 +1092,7 @@ def __str__(self): ) ) if self.dbf: - info.append( - f" {len(self)} records ({len(self.fields)} fields)" - ) + info.append(f" {len(self)} records ({len(self.fields)} fields)") return "\n".join(info) def __enter__(self): diff --git a/test_shapefile.py b/test_shapefile.py index b55c1f7b..5f9b855d 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -5,7 +5,6 @@ import datetime import json import os.path - from pathlib import Path # third party imports From 48a6a47b44189b9a5f0f6968876906ff870c34a6 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 15:57:58 +0100 Subject: [PATCH 005/220] Remove reference to removed constant --- shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shapefile.py b/shapefile.py index 9e62edca..5dff293f 100644 --- a/shapefile.py +++ b/shapefile.py @@ -2802,7 +2802,7 @@ def _replace_remote_url( fragment=fragment, ) - new_url = urlunparse(new_parsed) if PYTHON3 else urlunparse(list(new_parsed)) + new_url = urlunparse(new_parsed) return new_url From 8954a9766729531066328d5e23a53caf144d7249 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 16:04:49 +0100 Subject: [PATCH 006/220] Set continue-on-error: true on tests of Python 2.7, ..., 3.8 --- .github/workflows/run_tests_hooks_and_tools.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/run_tests_hooks_and_tools.yml b/.github/workflows/run_tests_hooks_and_tools.yml index 468b2e2b..548d9dc7 100644 --- a/.github/workflows/run_tests_hooks_and_tools.yml +++ b/.github/workflows/run_tests_hooks_and_tools.yml @@ -31,6 +31,7 @@ jobs: pylint --disable=R,C test_shapefile.py test_on_EOL_Pythons: + continue-on-error: true strategy: fail-fast: false matrix: From 5e1beec08f8d24e11e6bf38a5cecbd6cbb8d913f Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 16:34:31 +0100 Subject: [PATCH 007/220] Update .pre-commit-config.yaml --- .pre-commit-config.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ffe59bf6..85e04eba 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,3 +13,7 @@ repos: hooks: - id: check-yaml - id: trailing-whitespace +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.17.0 + hooks: + - id: mypy \ No newline at end of file From cb0c527dee4993e426fe0929e4e280e612ca22d3 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 16:56:03 +0100 Subject: [PATCH 008/220] Add type hints to doctest runner and filter --- shapefile.py | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/shapefile.py b/shapefile.py index 5dff293f..ba2f4fac 100644 --- a/shapefile.py +++ b/shapefile.py @@ -9,6 +9,7 @@ __version__ = "2.4.0" import array +import doctest import io import logging import os @@ -18,6 +19,7 @@ import zipfile from datetime import date from struct import Struct, calcsize, error, pack, unpack +from typing import Iterable, Iterator from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen @@ -25,6 +27,8 @@ # Create named logger logger = logging.getLogger(__name__) +doctest.NORMALIZE_WHITESPACE = 1 + # Module settings VERBOSE = True @@ -2714,11 +2718,7 @@ def field(self, name, fieldType="C", size="50", decimal=0): # Begin Testing -def _get_doctests(): - import doctest - - doctest.NORMALIZE_WHITESPACE = 1 - +def _get_doctests() -> doctest.DocTest: # run tests with open("README.md", "rb") as fobj: tests = doctest.DocTestParser().get_doctest( @@ -2732,7 +2732,11 @@ def _get_doctests(): return tests -def _filter_network_doctests(examples, include_network=False, include_non_network=True): +def _filter_network_doctests( + examples: Iterable[doctest.Example], + include_network: bool = False, + include_non_network: bool = True, +) -> Iterator[doctest.Example]: globals_from_network_doctests = set() if not (include_network or include_non_network): @@ -2773,16 +2777,16 @@ def _filter_network_doctests(examples, include_network=False, include_non_networ def _replace_remote_url( - old_url, + old_url: str, # Default port of Python http.server and Python 2's SimpleHttpServer - port=8000, - scheme="http", - netloc="localhost", - path=None, - params="", - query="", - fragment="", -): + port: int = 8000, + scheme: str = "http", + netloc: str = "localhost", + path: str | None = None, + params: str = "", + query: str = "", + fragment: str = "", +) -> str: old_parsed = urlparse(old_url) # Strip subpaths, so an artefacts @@ -2806,15 +2810,12 @@ def _replace_remote_url( return new_url -def _test(args=sys.argv[1:], verbosity=0): +def _test(args: list[str] = sys.argv[1:], verbosity: bool = False) -> int: if verbosity == 0: print("Getting doctests...") - import doctest import re - doctest.NORMALIZE_WHITESPACE = 1 - tests = _get_doctests() if len(args) >= 2 and args[0] == "-m": From 2457a953f5a4e26ac7607a8f31bda7ce60b74818 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 17:02:10 +0100 Subject: [PATCH 009/220] Use typing.Optional instead of T | None 3.10 syntax --- .github/workflows/run_tests_hooks_and_tools.yml | 2 -- shapefile.py | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run_tests_hooks_and_tools.yml b/.github/workflows/run_tests_hooks_and_tools.yml index 548d9dc7..b6dd3330 100644 --- a/.github/workflows/run_tests_hooks_and_tools.yml +++ b/.github/workflows/run_tests_hooks_and_tools.yml @@ -36,8 +36,6 @@ jobs: fail-fast: false matrix: python-version: [ - "2.7", - "3.5", "3.6", "3.7", "3.8", diff --git a/shapefile.py b/shapefile.py index ba2f4fac..1399aa35 100644 --- a/shapefile.py +++ b/shapefile.py @@ -19,7 +19,7 @@ import zipfile from datetime import date from struct import Struct, calcsize, error, pack, unpack -from typing import Iterable, Iterator +from typing import Iterable, Iterator, Optional from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen @@ -2782,7 +2782,7 @@ def _replace_remote_url( port: int = 8000, scheme: str = "http", netloc: str = "localhost", - path: str | None = None, + path: Optional[str] = None, params: str = "", query: str = "", fragment: str = "", From 72bdfeb7cb10031a8385255e183b0dcbdc19d88c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 17:05:09 +0100 Subject: [PATCH 010/220] Remove EOL Python tests, so that we can use list[] 3.9 type syntax --- .../workflows/run_tests_hooks_and_tools.yml | 34 ------------------- 1 file changed, 34 deletions(-) diff --git a/.github/workflows/run_tests_hooks_and_tools.yml b/.github/workflows/run_tests_hooks_and_tools.yml index b6dd3330..42c981e1 100644 --- a/.github/workflows/run_tests_hooks_and_tools.yml +++ b/.github/workflows/run_tests_hooks_and_tools.yml @@ -30,40 +30,6 @@ jobs: run: | pylint --disable=R,C test_shapefile.py - test_on_EOL_Pythons: - continue-on-error: true - strategy: - fail-fast: false - matrix: - python-version: [ - "3.6", - "3.7", - "3.8", - ] - - runs-on: ubuntu-latest - container: - image: python:${{ matrix.python-version }} - - steps: - - uses: actions/checkout@v4 - with: - path: ./Pyshp - - - name: Non-network tests - uses: ./Pyshp/.github/actions/test - with: - pyshp_repo_directory: ./Pyshp - python-version: ${{ matrix.python-version }} - - - name: Network tests - uses: ./Pyshp/.github/actions/test - with: - extra_args: '-m network' - replace_remote_urls_with_localhost: 'yes' - pyshp_repo_directory: ./Pyshp - python-version: ${{ matrix.python-version }} - test_on_supported_Pythons: strategy: fail-fast: false From d199fa34691551643038aee32b2f7981f5ecd95b Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 17:15:00 +0100 Subject: [PATCH 011/220] Type hint b and u helper and other functions --- shapefile.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/shapefile.py b/shapefile.py index 1399aa35..271d7382 100644 --- a/shapefile.py +++ b/shapefile.py @@ -19,7 +19,7 @@ import zipfile from datetime import date from struct import Struct, calcsize, error, pack, unpack -from typing import Iterable, Iterator, Optional +from typing import Any, Iterable, Iterator, Optional, Union from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen @@ -92,7 +92,9 @@ NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. -def b(v, encoding="utf-8", encodingErrors="strict"): +def b( + v: Union[str, bytes], encoding: str = "utf-8", encodingErrors: str = "strict" +) -> bytes: if isinstance(v, str): # For python 3 encode str to bytes. return v.encode(encoding, encodingErrors) @@ -107,7 +109,9 @@ def b(v, encoding="utf-8", encodingErrors="strict"): return str(v).encode(encoding, encodingErrors) -def u(v, encoding="utf-8", encodingErrors="strict"): +def u( + v: Union[str, bytes], encoding: str = "utf-8", encodingErrors: str = "strict" +) -> str: if isinstance(v, bytes): # For python 3 decode bytes to str. return v.decode(encoding, encodingErrors) @@ -122,11 +126,11 @@ def u(v, encoding="utf-8", encodingErrors="strict"): return bytes(v).decode(encoding, encodingErrors) -def is_string(v): +def is_string(v: Any) -> bool: return isinstance(v, str) -def pathlike_obj(path): +def pathlike_obj(path: Any) -> Any: if isinstance(path, os.PathLike): return os.fsdecode(path) else: From 7a895ce4b95ddfa855367d240b3f003fc6012cd7 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 17:24:08 +0100 Subject: [PATCH 012/220] Type hint signed_area --- shapefile.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/shapefile.py b/shapefile.py index 271d7382..ddf53fcc 100644 --- a/shapefile.py +++ b/shapefile.py @@ -17,6 +17,7 @@ import tempfile import time import zipfile +from collections.abc import Collection from datetime import date from struct import Struct, calcsize, error, pack, unpack from typing import Any, Iterable, Iterator, Optional, Union @@ -148,7 +149,16 @@ def __repr__(self): return str(self.tolist()) -def signed_area(coords, fast=False): +def signed_area( + coords: Collection[ + Union[ + tuple[float, float], + tuple[float, float, float], + tuple[float, float, float, float], + ] + ], + fast: bool = False, +) -> float: """Return the signed area enclosed by a ring using the linear time algorithm. A value >= 0 indicates a counter-clockwise oriented ring. A faster version is possible by setting 'fast' to True, which returns From 0f1e006b518b5180ccace0cd01fda2e615db01c4 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 17:47:03 +0100 Subject: [PATCH 013/220] Add more type hints --- shapefile.py | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/shapefile.py b/shapefile.py index ddf53fcc..3c6ebc9d 100644 --- a/shapefile.py +++ b/shapefile.py @@ -20,7 +20,7 @@ from collections.abc import Collection from datetime import date from struct import Struct, calcsize, error, pack, unpack -from typing import Any, Iterable, Iterator, Optional, Union +from typing import Any, Iterable, Iterator, Optional, Reversible, Union from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen @@ -149,14 +149,16 @@ def __repr__(self): return str(self.tolist()) +Point2D = tuple[float, float] +PointZ = tuple[float, float, float] +PointZM = tuple[float, float, float, float] + +Coord = Union[Point2D, PointZ, PointZM] +Coords = Collection[Coord] + + def signed_area( - coords: Collection[ - Union[ - tuple[float, float], - tuple[float, float, float], - tuple[float, float, float, float], - ] - ], + coords: Coords, fast: bool = False, ) -> float: """Return the signed area enclosed by a ring using the linear time @@ -174,7 +176,7 @@ def signed_area( return area2 / 2.0 -def is_cw(coords): +def is_cw(coords: Coords) -> bool: """Returns True if a polygon ring has clockwise orientation, determined by a negatively signed area. """ @@ -182,35 +184,38 @@ def is_cw(coords): return area2 < 0 -def rewind(coords): +def rewind(coords: Reversible[Coord]) -> list[Coord]: """Returns the input coords in reversed order.""" return list(reversed(coords)) -def ring_bbox(coords): +BBox = tuple[float, float, float, float] + + +def ring_bbox(coords: Coords) -> BBox: """Calculates and returns the bounding box of a ring.""" xs, ys = zip(*coords) bbox = min(xs), min(ys), max(xs), max(ys) return bbox -def bbox_overlap(bbox1, bbox2): - """Tests whether two bounding boxes overlap, returning a boolean""" +def bbox_overlap(bbox1: BBox, bbox2: BBox) -> bool: + """Tests whether two bounding boxes overlap.""" xmin1, ymin1, xmax1, ymax1 = bbox1 xmin2, ymin2, xmax2, ymax2 = bbox2 overlap = xmin1 <= xmax2 and xmax1 >= xmin2 and ymin1 <= ymax2 and ymax1 >= ymin2 return overlap -def bbox_contains(bbox1, bbox2): - """Tests whether bbox1 fully contains bbox2, returning a boolean""" +def bbox_contains(bbox1: BBox, bbox2: BBox) -> bool: + """Tests whether bbox1 fully contains bbox2.""" xmin1, ymin1, xmax1, ymax1 = bbox1 xmin2, ymin2, xmax2, ymax2 = bbox2 contains = xmin1 < xmin2 and xmax1 > xmax2 and ymin1 < ymin2 and ymax1 > ymax2 return contains -def ring_contains_point(coords, p): +def ring_contains_point(coords: list[Coord], p: Point2D) -> bool: """Fast point-in-polygon crossings algorithm, MacMartin optimization. Adapted from code by Eric Haynes @@ -255,7 +260,7 @@ def ring_contains_point(coords, p): return inside_flag -def ring_sample(coords, ccw=False): +def ring_sample(coords: list[Coord], ccw: bool = False) -> Coord: """Return a sample point guaranteed to be within a ring, by efficiently finding the first centroid of a coordinate triplet whose orientation matches the orientation of the ring and passes the point-in-ring test. @@ -302,7 +307,7 @@ def itercoords(): raise Exception("Unexpected error: Unable to find a ring sample point.") -def ring_contains_ring(coords1, coords2): +def ring_contains_ring(coords1: list[Coord], coords2: list[Point2D]) -> bool: """Returns True if all vertexes in coords2 are fully inside coords1.""" return all(ring_contains_point(coords1, p2) for p2 in coords2) From 165d99d3242df35364dba66704930b2c7e9a0176 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 18:15:49 +0100 Subject: [PATCH 014/220] Annotate organize_polygon_rings --- shapefile.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/shapefile.py b/shapefile.py index 3c6ebc9d..8a22c9f5 100644 --- a/shapefile.py +++ b/shapefile.py @@ -260,7 +260,7 @@ def ring_contains_point(coords: list[Coord], p: Point2D) -> bool: return inside_flag -def ring_sample(coords: list[Coord], ccw: bool = False) -> Coord: +def ring_sample(coords: list[Coord], ccw: bool = False) -> Point2D: """Return a sample point guaranteed to be within a ring, by efficiently finding the first centroid of a coordinate triplet whose orientation matches the orientation of the ring and passes the point-in-ring test. @@ -312,7 +312,9 @@ def ring_contains_ring(coords1: list[Coord], coords2: list[Point2D]) -> bool: return all(ring_contains_point(coords1, p2) for p2 in coords2) -def organize_polygon_rings(rings, return_errors=None): +def organize_polygon_rings( + rings: Iterable[list[Coord]], return_errors: Optional[dict[str, int]] = None +) -> list[list[list[Coord]]]: """Organize a list of coordinate rings into one or more polygons with holes. Returns a list of polygons, where each polygon is composed of a single exterior ring, and one or more interior holes. If a return_errors dict is provided (optional), @@ -357,7 +359,9 @@ def organize_polygon_rings(rings, return_errors=None): return polys # first determine each hole's candidate exteriors based on simple bbox contains test - hole_exteriors = {hole_i: [] for hole_i in range(len(holes))} + hole_exteriors: dict[int, list[int]] = { + hole_i: [] for hole_i in range(len(holes)) + } exterior_bboxes = [ring_bbox(ring) for ring in exteriors] for hole_i in hole_exteriors.keys(): hole_bbox = ring_bbox(holes[hole_i]) From c5c07eb953c46bec945601b250f29d2ae046906f Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 18:30:34 +0100 Subject: [PATCH 015/220] Begin type annotations of Shape --- shapefile.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/shapefile.py b/shapefile.py index 8a22c9f5..d8498e2d 100644 --- a/shapefile.py +++ b/shapefile.py @@ -443,7 +443,12 @@ def organize_polygon_rings( class Shape: def __init__( - self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None + self, + shapeType: int = NULL, + points: Optional[Coords] = None, + parts: Optional[list[int]] = None, + partTypes: Optional[list[int]] = None, + oid: Optional[int] = None, ): """Stores the geometry of the different shape types specified in the Shapefile spec. Shape types are @@ -463,7 +468,7 @@ def __init__( self.partTypes = partTypes # and a dict to silently record any errors encountered - self._errors = {} + self._errors: dict[str, int] = {} # add oid if oid is not None: @@ -666,12 +671,12 @@ def _from_geojson(geoj): return shape @property - def oid(self): + def oid(self) -> int: """The index position of the shape in the original shapefile""" return self.__oid @property - def shapeTypeName(self): + def shapeTypeName(self) -> str: return SHAPETYPE_LOOKUP[self.shapeType] def __repr__(self): From c3fc7f6666cefd9aa58f8a46bdf75d438c4ca073 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 19:15:32 +0100 Subject: [PATCH 016/220] Replace kwargs.pop with actual key word args --- .pre-commit-config.yaml | 1 + shapefile.py | 77 ++++++++++++++++++++++++++--------------- 2 files changed, 51 insertions(+), 27 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 85e04eba..3849c557 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,6 +8,7 @@ repos: hooks: - id: isort name: isort (python) + args: ["--profile", "black"] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v2.3.0 hooks: diff --git a/shapefile.py b/shapefile.py index d8498e2d..e56b3b27 100644 --- a/shapefile.py +++ b/shapefile.py @@ -20,7 +20,7 @@ from collections.abc import Collection from datetime import date from struct import Struct, calcsize, error, pack, unpack -from typing import Any, Iterable, Iterator, Optional, Reversible, Union +from typing import Any, Iterable, Iterator, Optional, Reversible, TypedDict, Union from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen @@ -87,6 +87,25 @@ 5: "RING", } +# Custom type variables + +Point2D = tuple[float, float] +PointZ = tuple[float, float, float] +PointZM = tuple[float, float, float, float] + +Coord = Union[Point2D, PointZ, PointZM] +Coords = list[Coord] + +BBox = tuple[float, float, float, float] + + +class GeoJSONT(TypedDict): + type: str + coordinates: Union[ + tuple[()], Point2D, PointZ, PointZM, Coords, list[Coords], list[list[Coords]] + ] + + # Helpers MISSING = [None, ""] @@ -149,14 +168,6 @@ def __repr__(self): return str(self.tolist()) -Point2D = tuple[float, float] -PointZ = tuple[float, float, float] -PointZM = tuple[float, float, float, float] - -Coord = Union[Point2D, PointZ, PointZM] -Coords = Collection[Coord] - - def signed_area( coords: Coords, fast: bool = False, @@ -189,9 +200,6 @@ def rewind(coords: Reversible[Coord]) -> list[Coord]: return list(reversed(coords)) -BBox = tuple[float, float, float, float] - - def ring_bbox(coords: Coords) -> BBox: """Calculates and returns the bounding box of a ring.""" xs, ys = zip(*coords) @@ -445,7 +453,7 @@ class Shape: def __init__( self, shapeType: int = NULL, - points: Optional[Coords] = None, + points: Optional[list[Coord]] = None, parts: Optional[list[int]] = None, partTypes: Optional[list[int]] = None, oid: Optional[int] = None, @@ -477,16 +485,18 @@ def __init__( self.__oid = -1 @property - def __geo_interface__(self): + def __geo_interface__(self) -> GeoJSONT: if self.shapeType in [POINT, POINTM, POINTZ]: # point if len(self.points) == 0: # the shape has no coordinate information, i.e. is 'empty' # the geojson spec does not define a proper null-geometry type # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {"type": "Point", "coordinates": tuple()} + return {"type": "Point", "coordinates": ()} + # return {"type": "Point", "coordinates": tuple()} #type: ignore else: - return {"type": "Point", "coordinates": tuple(self.points[0])} + return {"type": "Point", "coordinates": self.points[0]} + # return {"type": "Point", "coordinates": tuple(self.points[0])} # type: ignore elif self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: if len(self.points) == 0: # the shape has no coordinate information, i.e. is 'empty' @@ -497,7 +507,8 @@ def __geo_interface__(self): # multipoint return { "type": "MultiPoint", - "coordinates": [tuple(p) for p in self.points], + "coordinates": self.points, + # "coordinates": [tuple(p) for p in self.points], #type: ignore } elif self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: if len(self.parts) == 0: @@ -509,7 +520,8 @@ def __geo_interface__(self): # linestring return { "type": "LineString", - "coordinates": [tuple(p) for p in self.points], + "coordinates": self.points, + # "coordinates": [tuple(p) for p in self.points], #type: ignore } else: # multilinestring @@ -520,10 +532,12 @@ def __geo_interface__(self): ps = part continue else: - coordinates.append([tuple(p) for p in self.points[ps:part]]) + # coordinates.append([tuple(p) for p in self.points[ps:part]]) + coordinates.append([p for p in self.points[ps:part]]) ps = part else: - coordinates.append([tuple(p) for p in self.points[part:]]) + # coordinates.append([tuple(p) for p in self.points[part:]]) + coordinates.append([p for p in self.points[part:]]) return {"type": "MultiLineString", "coordinates": coordinates} elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: if len(self.parts) == 0: @@ -543,7 +557,8 @@ def __geo_interface__(self): end = len(self.points) # extract the points that make up the ring - ring = [tuple(p) for p in self.points[start:end]] + # ring = [tuple(p) for p in self.points[start:end]] + ring = [p for p in self.points[start:end]] rings.append(ring) # organize rings into list of polygons, where each polygon is defined as list of rings. @@ -918,7 +933,7 @@ class Reader: but they can be. """ - def __init__(self, *args, **kwargs): + def __init__(self, *args, encoding="utf-8", encodingErrors="strict", **kwargs): self.shp = None self.shx = None self.dbf = None @@ -931,8 +946,8 @@ def __init__(self, *args, **kwargs): self.fields = [] self.__dbfHdrLength = 0 self.__fieldLookup = {} - self.encoding = kwargs.pop("encoding", "utf-8") - self.encodingErrors = kwargs.pop("encodingErrors", "strict") + self.encoding = encoding + self.encodingErrors = encodingErrors # See if a shapefile name was passed as the first argument if len(args) > 0: path = pathlike_obj(args[0]) @@ -1876,7 +1891,15 @@ def iterShapeRecords(self, fields=None, bbox=None): class Writer: """Provides write support for ESRI Shapefiles.""" - def __init__(self, target=None, shapeType=None, autoBalance=False, **kwargs): + def __init__( + self, + target=None, + shapeType=None, + autoBalance=False, + encoding="utf-8", + encodingErrors="strict", + **kwargs, + ): self.target = target self.autoBalance = autoBalance self.fields = [] @@ -1920,8 +1943,8 @@ def __init__(self, target=None, shapeType=None, autoBalance=False, **kwargs): # Use deletion flags in dbf? Default is false (0). Note: Currently has no effect, records should NOT contain deletion flags. self.deletionFlag = 0 # Encoding - self.encoding = kwargs.pop("encoding", "utf-8") - self.encodingErrors = kwargs.pop("encodingErrors", "strict") + self.encoding = encoding + self.encodingErrors = encodingErrors def __len__(self): """Returns the current number of features written to the shapefile. From 209421c1846bd406dd164ea37426b2ccf3fd1224 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 20:46:31 +0100 Subject: [PATCH 017/220] Remove code duplication in constituent file loading (more kwargs) --- shapefile.py | 189 +++++++++++++++++++++++++++++---------------------- 1 file changed, 107 insertions(+), 82 deletions(-) diff --git a/shapefile.py b/shapefile.py index e56b3b27..0a99fbe2 100644 --- a/shapefile.py +++ b/shapefile.py @@ -20,7 +20,7 @@ from collections.abc import Collection from datetime import date from struct import Struct, calcsize, error, pack, unpack -from typing import Any, Iterable, Iterator, Optional, Reversible, TypedDict, Union +from typing import IO, Any, Iterable, Iterator, Optional, Reversible, TypedDict, Union from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen @@ -912,6 +912,10 @@ class ShapefileException(Exception): pass +class _NoShpSentinel(object): + pass + + class Reader: """Reads the three files of a shapefile as a unit or separately. If one of the three files (.shp, .shx, @@ -933,10 +937,25 @@ class Reader: but they can be. """ - def __init__(self, *args, encoding="utf-8", encodingErrors="strict", **kwargs): - self.shp = None - self.shx = None - self.dbf = None + CONSTITUENT_FILE_EXTS = ["shp", "shx", "dbf"] + assert all(ext.islower() for ext in CONSTITUENT_FILE_EXTS) + + def _assert_ext_is_supported(self, ext: str): + assert ext in self.CONSTITUENT_FILE_EXTS + + def __init__( + self, + *args, + encoding="utf-8", + encodingErrors="strict", + shp=_NoShpSentinel, + shx=None, + dbf=None, + **kwargs, + ): + # self.shp = None + # self.shx = None + # self.dbf = None self._files_to_close = [] self.shapeName = "Not specified" self._offsets = [] @@ -1014,19 +1033,20 @@ def __init__(self, *args, encoding="utf-8", encodingErrors="strict", **kwargs): shapefile = os.path.splitext(shapefile)[ 0 ] # root shapefile name - for ext in ["SHP", "SHX", "DBF", "shp", "shx", "dbf"]: - try: - member = archive.open(shapefile + "." + ext) - # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() - fileobj = tempfile.NamedTemporaryFile( - mode="w+b", delete=True - ) - fileobj.write(member.read()) - fileobj.seek(0) - setattr(self, ext.lower(), fileobj) - self._files_to_close.append(fileobj) - except: - pass + for lower_ext in self.CONSTITUENT_FILE_EXTS: + for cased_ext in [lower_ext, lower_ext.upper()]: + try: + member = archive.open(f"{shapefile}.{cased_ext}") + # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() + fileobj = tempfile.NamedTemporaryFile( + mode="w+b", delete=True + ) + fileobj.write(member.read()) + fileobj.seek(0) + setattr(self, lower_ext, fileobj) + self._files_to_close.append(fileobj) + except: + pass # Close and delete the temporary zipfile try: zipfileobj.close() @@ -1086,46 +1106,47 @@ def __init__(self, *args, encoding="utf-8", encodingErrors="strict", **kwargs): self.load(path) return - # Otherwise, load from separate shp/shx/dbf args (must be path or file-like) - if "shp" in kwargs: - if hasattr(kwargs["shp"], "read"): - self.shp = kwargs["shp"] - # Copy if required - try: - self.shp.seek(0) - except (NameError, io.UnsupportedOperation): - self.shp = io.BytesIO(self.shp.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["shp"]) - self.load_shp(baseName) - - if "shx" in kwargs: - if hasattr(kwargs["shx"], "read"): - self.shx = kwargs["shx"] - # Copy if required - try: - self.shx.seek(0) - except (NameError, io.UnsupportedOperation): - self.shx = io.BytesIO(self.shx.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["shx"]) - self.load_shx(baseName) + if shp is _NoShpSentinel: + self.shp = None + self.shx = None + else: + self.shp = self._seek_0_on_file_obj_wrap_or_open_from_name("shp", shp) + self.shx = self._seek_0_on_file_obj_wrap_or_open_from_name("shx", shx) - if "dbf" in kwargs: - if hasattr(kwargs["dbf"], "read"): - self.dbf = kwargs["dbf"] - # Copy if required - try: - self.dbf.seek(0) - except (NameError, io.UnsupportedOperation): - self.dbf = io.BytesIO(self.dbf.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["dbf"]) - self.load_dbf(baseName) + self.dbf = self._seek_0_on_file_obj_wrap_or_open_from_name("dbf", dbf) # Load the files if self.shp or self.dbf: - self.load() + self._try_to_set_constituent_file_headers() + + def _seek_0_on_file_obj_wrap_or_open_from_name( + self, + ext: str, + # File name, file object or anything with a read() method that returns bytes. + # TODO: Create simple Protocol with a read() method + file_: Optional[Union[str, IO[bytes]]], + ) -> Union[None, io.BytesIO, IO[bytes]]: + # assert ext in {'shp', 'dbf', 'shx'} + self._assert_ext_is_supported(ext) + + if file_ is None: + return None + + if isinstance(file_, str): + baseName, __ = os.path.splitext(file_) + return self._load_constituent_file(baseName, ext) + + if hasattr(file_, "read"): + # Copy if required + try: + file_.seek(0) # type: ignore + return file_ + except (NameError, io.UnsupportedOperation): + return io.BytesIO(file_.read()) + + raise ShapefileException( + f"Could not load shapefile constituent file from: {file_}" + ) def __str__(self): """ @@ -1232,6 +1253,9 @@ def load(self, shapefile=None): raise ShapefileException( f"Unable to open {shapeName}.dbf or {shapeName}.shp." ) + self._try_to_set_constituent_file_headers() + + def _try_to_set_constituent_file_headers(self): if self.shp: self.__shpHeader() if self.dbf: @@ -1239,50 +1263,51 @@ def load(self, shapefile=None): if self.shx: self.__shxHeader() - def load_shp(self, shapefile_name): + def _try_get_open_constituent_file(self, shapefile_name: str, ext: str): """ - Attempts to load file with .shp extension as both lower and upper case + Attempts to open a .shp, .dbf or .shx file, + with both lower case and upper case file extensions, + and return it. If it was not possible to open the file, None is returned. """ - shp_ext = "shp" + # typing.LiteralString is only available from PYthon 3.11 onwards. + # https://docs.python.org/3/library/typing.html#typing.LiteralString + self._assert_ext_is_supported(ext) try: - self.shp = open(f"{shapefile_name}.{shp_ext}", "rb") - self._files_to_close.append(self.shp) + return open(f"{shapefile_name}.{ext}", "rb") except OSError: try: - self.shp = open(f"{shapefile_name}.{shp_ext.upper()}", "rb") - self._files_to_close.append(self.shp) + return open(f"{shapefile_name}.{ext.upper()}", "rb") except OSError: - pass + return None + + def _load_constituent_file(self, shapefile_name: str, ext: str): + """ + Attempts to open a .shp, .dbf or .shx file, with the extension + as both lower and upper case, and if successful append it to + self._files_to_close. + """ + shp_dbf_or_dhx_file = self._try_get_open_constituent_file(shapefile_name, ext) + if shp_dbf_or_dhx_file is not None: + self._files_to_close.append(shp_dbf_or_dhx_file) + return shp_dbf_or_dhx_file + + def load_shp(self, shapefile_name): + """ + Attempts to load file with .shp extension as both lower and upper case + """ + self.shp = self._load_constituent_file(shapefile_name, "shp") def load_shx(self, shapefile_name): """ Attempts to load file with .shx extension as both lower and upper case """ - shx_ext = "shx" - try: - self.shx = open(f"{shapefile_name}.{shx_ext}", "rb") - self._files_to_close.append(self.shx) - except OSError: - try: - self.shx = open(f"{shapefile_name}.{shx_ext.upper()}", "rb") - self._files_to_close.append(self.shx) - except OSError: - pass + self.shx = self._load_constituent_file(shapefile_name, "shx") def load_dbf(self, shapefile_name): """ Attempts to load file with .dbf extension as both lower and upper case """ - dbf_ext = "dbf" - try: - self.dbf = open(f"{shapefile_name}.{dbf_ext}", "rb") - self._files_to_close.append(self.dbf) - except OSError: - try: - self.dbf = open(f"{shapefile_name}.{dbf_ext.upper()}", "rb") - self._files_to_close.append(self.dbf) - except OSError: - pass + self.dbf = self._load_constituent_file(shapefile_name, "dbf") def __del__(self): self.close() From f4fdf2ccbdfdbaeeb656c92d890366bbe11dc2ae Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 21:36:36 +0100 Subject: [PATCH 018/220] Add docstring to sentinel. --- shapefile.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/shapefile.py b/shapefile.py index 0a99fbe2..12f94d1a 100644 --- a/shapefile.py +++ b/shapefile.py @@ -913,6 +913,12 @@ class ShapefileException(Exception): class _NoShpSentinel(object): + """For use as a default value for shp to preserve the + behaviour (from when all keyword args were gathered + in the **kwargs dict) in case someone explictly + called Reader(shp=None) to load self.shx. + """ + pass @@ -1106,15 +1112,14 @@ def __init__( self.load(path) return + self.shp = self._seek_0_on_file_obj_wrap_or_open_from_name("shp", shp) + self.dbf = self._seek_0_on_file_obj_wrap_or_open_from_name("dbf", dbf) + if shp is _NoShpSentinel: - self.shp = None self.shx = None else: - self.shp = self._seek_0_on_file_obj_wrap_or_open_from_name("shp", shp) self.shx = self._seek_0_on_file_obj_wrap_or_open_from_name("shx", shx) - self.dbf = self._seek_0_on_file_obj_wrap_or_open_from_name("dbf", dbf) - # Load the files if self.shp or self.dbf: self._try_to_set_constituent_file_headers() From 1bb2e380e70f98ddae160baeccbc6aa4f1cfeb41 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 21:59:20 +0100 Subject: [PATCH 019/220] Restore self.shp = None etc. Replace *args with a kwarg: shapefile_path --- shapefile.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/shapefile.py b/shapefile.py index 12f94d1a..866eb316 100644 --- a/shapefile.py +++ b/shapefile.py @@ -951,7 +951,8 @@ def _assert_ext_is_supported(self, ext: str): def __init__( self, - *args, + shapefile_path: str = "", + *, encoding="utf-8", encodingErrors="strict", shp=_NoShpSentinel, @@ -959,23 +960,23 @@ def __init__( dbf=None, **kwargs, ): - # self.shp = None - # self.shx = None - # self.dbf = None + self.shp = None + self.shx = None + self.dbf = None self._files_to_close = [] self.shapeName = "Not specified" - self._offsets = [] + self._offsets: list[int] = [] self.shpLength = None self.numRecords = None self.numShapes = None - self.fields = [] + self.fields: list[list[str]] = [] self.__dbfHdrLength = 0 - self.__fieldLookup = {} + self.__fieldLookup: dict[str, int] = {} self.encoding = encoding self.encodingErrors = encodingErrors # See if a shapefile name was passed as the first argument - if len(args) > 0: - path = pathlike_obj(args[0]) + if shapefile_path: + path = pathlike_obj(shapefile_path) if is_string(path): if ".zip" in path: # Shapefile is inside a zipfile @@ -992,6 +993,8 @@ def __init__( else: zpath = path[: path.find(".zip") + 4] shapefile = path[path.find(".zip") + 4 + 1 :] + + zipfileobj: Union[tempfile._TemporaryFileWrapper, io.BufferedReader] # Create a zip file handle if zpath.startswith("http"): # Zipfile is from a url From 2b1aa2f10b960cf86d25f334a4d3a516e58aa47a Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 22:05:31 +0100 Subject: [PATCH 020/220] Don't load shp from sentinel --- shapefile.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/shapefile.py b/shapefile.py index 866eb316..c1a2d76b 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1115,14 +1115,12 @@ def __init__( self.load(path) return - self.shp = self._seek_0_on_file_obj_wrap_or_open_from_name("shp", shp) - self.dbf = self._seek_0_on_file_obj_wrap_or_open_from_name("dbf", dbf) - - if shp is _NoShpSentinel: - self.shx = None - else: + if shp is not _NoShpSentinel: + self.shp = self._seek_0_on_file_obj_wrap_or_open_from_name("shp", shp) self.shx = self._seek_0_on_file_obj_wrap_or_open_from_name("shx", shx) + self.dbf = self._seek_0_on_file_obj_wrap_or_open_from_name("dbf", dbf) + # Load the files if self.shp or self.dbf: self._try_to_set_constituent_file_headers() From 9e157d6132f0422c3ffd9c0310f762468046ce83 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 22:41:40 +0100 Subject: [PATCH 021/220] Replace kwargs.get with key word args and defaults. --- shapefile.py | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/shapefile.py b/shapefile.py index c1a2d76b..e5632981 100644 --- a/shapefile.py +++ b/shapefile.py @@ -963,7 +963,7 @@ def __init__( self.shp = None self.shx = None self.dbf = None - self._files_to_close = [] + self._files_to_close: list[IO[bytes]] = [] self.shapeName = "Not specified" self._offsets: list[int] = [] self.shpLength = None @@ -1269,7 +1269,11 @@ def _try_to_set_constituent_file_headers(self): if self.shx: self.__shxHeader() - def _try_get_open_constituent_file(self, shapefile_name: str, ext: str): + def _try_get_open_constituent_file( + self, + shapefile_name: str, + ext: str, + ) -> Union[IO[bytes], None]: """ Attempts to open a .shp, .dbf or .shx file, with both lower case and upper case file extensions, @@ -1277,7 +1281,9 @@ def _try_get_open_constituent_file(self, shapefile_name: str, ext: str): """ # typing.LiteralString is only available from PYthon 3.11 onwards. # https://docs.python.org/3/library/typing.html#typing.LiteralString + # assert ext in {'shp', 'dbf', 'shx'} self._assert_ext_is_supported(ext) + try: return open(f"{shapefile_name}.{ext}", "rb") except OSError: @@ -1286,7 +1292,11 @@ def _try_get_open_constituent_file(self, shapefile_name: str, ext: str): except OSError: return None - def _load_constituent_file(self, shapefile_name: str, ext: str): + def _load_constituent_file( + self, + shapefile_name: str, + ext: str, + ) -> Union[IO[bytes], None]: """ Attempts to open a .shp, .dbf or .shx file, with the extension as both lower and upper case, and if successful append it to @@ -1341,7 +1351,7 @@ def __getFileObj(self, f): self.load() return f - def __restrictIndex(self, i): + def __restrictIndex(self, i: int) -> int: """Provides list-like handling of a record index with a clearer error message if the index is out of bounds.""" if self.numRecords: @@ -1929,6 +1939,10 @@ def __init__( autoBalance=False, encoding="utf-8", encodingErrors="strict", + *, + shp=None, + shx=None, + dbf=None, **kwargs, ): self.target = target @@ -1948,8 +1962,7 @@ def __init__( self.shp = self.__getFileObj(os.path.splitext(target)[0] + ".shp") self.shx = self.__getFileObj(os.path.splitext(target)[0] + ".shx") self.dbf = self.__getFileObj(os.path.splitext(target)[0] + ".dbf") - elif kwargs.get("shp") or kwargs.get("shx") or kwargs.get("dbf"): - shp, shx, dbf = kwargs.get("shp"), kwargs.get("shx"), kwargs.get("dbf") + elif shp or shx or dbf: if shp: self.shp = self.__getFileObj(shp) if shx: @@ -2046,13 +2059,11 @@ def close(self): pass self._files_to_close = [] - def __getFileObj(self, f): + def __getFileObj(self, f: Union[IO[bytes], str]) -> IO[bytes]: """Safety handler to verify file-like objects""" if not f: raise ShapefileException("No file-like object available.") - elif hasattr(f, "write"): - return f - else: + if isinstance(f, str): pth = os.path.split(f)[0] if pth and not os.path.exists(pth): os.makedirs(pth) @@ -2060,6 +2071,10 @@ def __getFileObj(self, f): self._files_to_close.append(fp) return fp + if hasattr(f, "write"): + return f + raise Exception(f"Unsupported file-like: {f}") + def __shpFileLength(self): """Calculates the file length of the shp file.""" # Remember starting position From ec43361f609f316811c45b33508c3d06c7fc9b92 Mon Sep 17 00:00:00 2001 From: Mike Taves Date: Tue, 22 Jul 2025 09:33:17 +1200 Subject: [PATCH 022/220] Set minimum Python 3.9, move project metadata to pyproject.toml --- .github/actions/test/action.yml | 2 +- README.md | 2 -- pyproject.toml | 36 ++++++++++++++++++++++++++++++++- requirements.test.txt | 2 -- setup.cfg | 29 -------------------------- setup.py | 3 --- 6 files changed, 36 insertions(+), 38 deletions(-) delete mode 100644 requirements.test.txt delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index c6ca65a4..0184dfe3 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -87,7 +87,7 @@ runs: working-directory: ${{ inputs.pyshp_repo_directory }} run: | python -m pip install --upgrade pip - pip install -r requirements.test.txt + pip install -e .[test] - name: Pytest shell: bash diff --git a/README.md b/README.md index c55e2043..caf5f339 100644 --- a/README.md +++ b/README.md @@ -74,8 +74,6 @@ Both the Esri and XBase file-formats are very simple in design and memory efficient which is part of the reason the shapefile format remains popular despite the numerous ways to store and exchange GIS data available today. -Pyshp is compatible with Python 2.7-3.x. - This document provides examples for using PyShp to read and write shapefiles. However many more examples are continually added to the blog [http://GeospatialPython.com](http://GeospatialPython.com), and by searching for PyShp on [https://gis.stackexchange.com](https://gis.stackexchange.com). diff --git a/pyproject.toml b/pyproject.toml index 697b6e67..945c86c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,40 @@ requires = ["setuptools"] build-backend = "setuptools.build_meta" +[project] +name = "pyshp" +authors = [ + {name = "Joel Lawhead", email = "jlawhead@geospatialpython.com"}, +] +maintainers = [ + {name = "Karim Bahgat", email = "karim.bahgat.norway@gmail.com"} +] +readme = "README.md" +keywords = ["gis", "geospatial", "geographic", "shapefile", "shapefiles"] +description = "Pure Python read/write support for ESRI Shapefile format" +license = "MIT" +license-files = ["LICENSE.TXT"] +dynamic = ["version"] +requires-python = ">=3.9" +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering :: GIS", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", +] + +[project.optional-dependencies] +test = ["pytest"] + +[project.urls] +Repository = "https://github.com/GeospatialPython/pyshp" + +[tool.setuptools.dynamic] +version = {attr = "shapefile.__version__"} + [tool.ruff] # Exclude a variety of commonly ignored directories. exclude = [ @@ -84,4 +118,4 @@ load-plugins=[ per-file-ignores = """ shapefile.py:W0212 test_shapefile.py:W0212 -""" \ No newline at end of file +""" diff --git a/requirements.test.txt b/requirements.test.txt deleted file mode 100644 index 11141738..00000000 --- a/requirements.test.txt +++ /dev/null @@ -1,2 +0,0 @@ -pytest >= 3.7 -setuptools diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index d13d43bb..00000000 --- a/setup.cfg +++ /dev/null @@ -1,29 +0,0 @@ -[metadata] -name = pyshp -version = attr: shapefile.__version__ -description = Pure Python read/write support for ESRI Shapefile format -long_description = file: README.md -long_description_content_type = text/markdown -author = Joel Lawhead -author_email = jlawhead@geospatialpython.com -maintainer = Karim Bahgat -maintainer_email = karim.bahgat.norway@gmail.com -url = https://github.com/GeospatialPython/pyshp -download_url = https://pypi.org/project/pyshp/ -license = MIT -license_files = LICENSE.TXT -keywords = gis, geospatial, geographic, shapefile, shapefiles -classifiers = - Development Status :: 5 - Production/Stable - Programming Language :: Python - Programming Language :: Python :: 3 - Topic :: Scientific/Engineering :: GIS - Topic :: Software Development :: Libraries - Topic :: Software Development :: Libraries :: Python Modules - -[options] -py_modules = shapefile -python_requires = >=3.9 - -[bdist_wheel] -universal=1 diff --git a/setup.py b/setup.py deleted file mode 100644 index 60684932..00000000 --- a/setup.py +++ /dev/null @@ -1,3 +0,0 @@ -from setuptools import setup - -setup() From f23a08c3b48562fa6a014a8e7ef1db5bb8716185 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 23:08:39 +0100 Subject: [PATCH 023/220] Update changelog.txt --- changelog.txt | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/changelog.txt b/changelog.txt index 533d704e..be977915 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,26 @@ +VERSION 2.4.0 + +2025-07-21 + Forthcoming Breaking Change. Support for Python 2 and Pythons <= 3.8 to be dropped. + * PyShp 2.4.0 is the latest (and likely last) version of PyShp to support Python 2.7 and Pythons <= 3.8. + These CPython versions have reached [end of life](https://devguide.python.org/versions/#versions). + * Future development will focus on PyShp v3.0.0 onwards (currently intended to supporting Pythons >= 3.9). + * This will not break any projects, as pip and other package managers should not install PyShp 3.0.0 + (after its release) in unsupported Pythons. But we no longer promise such projects will get PyShp's latest + bug fixes and features. + * If this negatively impacts your project, all feedback about this decision is welcome + on our [the discussion page](https://github.com/GeospatialPython/pyshp/discussions/290). + + + New Features: + * Reader.iterRecords now allows start and stop to be specified, to lookup smaller ranges of records. + * Equality comparisons between Records now also require the fields to be the same (and in the same order). + + Development: + * Code quality tools (Ruff format) run on PyShp + * Network, non-network, or all doctests selectable via command line args + * Network tests made runnable on localhost. + VERSION 2.3.1 From 38321121935924cd655c78e22f5e50525acefb19 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 23:33:12 +0100 Subject: [PATCH 024/220] Require geojType to be equal to string literals, not substrings --- shapefile.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/shapefile.py b/shapefile.py index e5632981..f5eb76c6 100644 --- a/shapefile.py +++ b/shapefile.py @@ -604,7 +604,7 @@ def __geo_interface__(self) -> GeoJSONT: ) @staticmethod - def _from_geojson(geoj): + def _from_geojson(geoj) -> Shape: # create empty shape shape = Shape() # set shapeType @@ -634,7 +634,7 @@ def _from_geojson(geoj): elif geojType in ("MultiPoint", "LineString"): shape.points = geoj["coordinates"] shape.parts = [0] - elif geojType in ("Polygon"): + elif geojType in ("Polygon",): points = [] parts = [] index = 0 @@ -653,7 +653,7 @@ def _from_geojson(geoj): index += len(ext_or_hole) shape.points = points shape.parts = parts - elif geojType in ("MultiLineString"): + elif geojType in ("MultiLineString",): points = [] parts = [] index = 0 @@ -663,7 +663,7 @@ def _from_geojson(geoj): index += len(linestring) shape.points = points shape.parts = parts - elif geojType in ("MultiPolygon"): + elif geojType in ("MultiPolygon",): points = [] parts = [] index = 0 From 024deedb1ed745fb898c6bcf209f41e65ad26970 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 23:42:20 +0100 Subject: [PATCH 025/220] Add from __future__ import annotations --- shapefile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/shapefile.py b/shapefile.py index f5eb76c6..b365895d 100644 --- a/shapefile.py +++ b/shapefile.py @@ -6,6 +6,8 @@ Compatible with Python versions >=3.9 """ +from __future__ import annotations + __version__ = "2.4.0" import array From 577dcef3ed4de37c0acec29a60656c898549f6fd Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 22 Jul 2025 23:06:20 +0100 Subject: [PATCH 026/220] Make shapefile_path (in Reader.__init__) positional only (with default) to preserve old API --- shapefile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/shapefile.py b/shapefile.py index b365895d..1f43402a 100644 --- a/shapefile.py +++ b/shapefile.py @@ -954,6 +954,7 @@ def _assert_ext_is_supported(self, ext: str): def __init__( self, shapefile_path: str = "", + /, *, encoding="utf-8", encodingErrors="strict", From 6dccd8f7eae3a9cb041cf1531e4c0800e1ca47df Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 22 Jul 2025 23:29:06 +0100 Subject: [PATCH 027/220] Type shp, dbf and shx to Optional[BinaryFileT] (shp also Sentinel) --- shapefile.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/shapefile.py b/shapefile.py index 1f43402a..eca5a7d1 100644 --- a/shapefile.py +++ b/shapefile.py @@ -100,8 +100,12 @@ BBox = tuple[float, float, float, float] +# File name, file object or anything with a read() method that returns bytes. +# TODO: Create simple Protocol with a read() method +BinaryFileT = Union[str, IO[bytes]] -class GeoJSONT(TypedDict): + +class GeoJsonShapeT(TypedDict): type: str coordinates: Union[ tuple[()], Point2D, PointZ, PointZM, Coords, list[Coords], list[list[Coords]] @@ -487,7 +491,7 @@ def __init__( self.__oid = -1 @property - def __geo_interface__(self) -> GeoJSONT: + def __geo_interface__(self) -> GeoJsonShapeT: if self.shapeType in [POINT, POINTM, POINTZ]: # point if len(self.points) == 0: @@ -914,7 +918,7 @@ class ShapefileException(Exception): pass -class _NoShpSentinel(object): +class __NoShpSentinel(object): """For use as a default value for shp to preserve the behaviour (from when all keyword args were gathered in the **kwargs dict) in case someone explictly @@ -956,11 +960,11 @@ def __init__( shapefile_path: str = "", /, *, - encoding="utf-8", - encodingErrors="strict", - shp=_NoShpSentinel, - shx=None, - dbf=None, + encoding: str = "utf-8", + encodingErrors: str = "strict", + shp: Union[__NoShpSentinel, Optional[BinaryFileT]] = __NoShpSentinel(), + shx: Optional[BinaryFileT] = None, + dbf: Optional[BinaryFileT] = None, **kwargs, ): self.shp = None @@ -1118,22 +1122,20 @@ def __init__( self.load(path) return - if shp is not _NoShpSentinel: - self.shp = self._seek_0_on_file_obj_wrap_or_open_from_name("shp", shp) - self.shx = self._seek_0_on_file_obj_wrap_or_open_from_name("shx", shx) + if not isinstance(shp, __NoShpSentinel): + self.shp = self.__seek_0_on_file_obj_wrap_or_open_from_name("shp", shp) + self.shx = self.__seek_0_on_file_obj_wrap_or_open_from_name("shx", shx) - self.dbf = self._seek_0_on_file_obj_wrap_or_open_from_name("dbf", dbf) + self.dbf = self.__seek_0_on_file_obj_wrap_or_open_from_name("dbf", dbf) # Load the files if self.shp or self.dbf: self._try_to_set_constituent_file_headers() - def _seek_0_on_file_obj_wrap_or_open_from_name( + def __seek_0_on_file_obj_wrap_or_open_from_name( self, ext: str, - # File name, file object or anything with a read() method that returns bytes. - # TODO: Create simple Protocol with a read() method - file_: Optional[Union[str, IO[bytes]]], + file_: Optional[BinaryFileT], ) -> Union[None, io.BytesIO, IO[bytes]]: # assert ext in {'shp', 'dbf', 'shx'} self._assert_ext_is_supported(ext) @@ -1245,7 +1247,7 @@ def __geo_interface__(self): return fcollection @property - def shapeTypeName(self): + def shapeTypeName(self) -> str: return SHAPETYPE_LOOKUP[self.shapeType] def load(self, shapefile=None): From 58b5e69b7f8dfaffe912dec5f282b76c3969f07c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 22 Jul 2025 23:32:26 +0100 Subject: [PATCH 028/220] Make shp sentinel a sunder not a dunder (to avoid name mangling) --- shapefile.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/shapefile.py b/shapefile.py index eca5a7d1..be7d1e74 100644 --- a/shapefile.py +++ b/shapefile.py @@ -918,7 +918,7 @@ class ShapefileException(Exception): pass -class __NoShpSentinel(object): +class _NoShpSentinel(object): """For use as a default value for shp to preserve the behaviour (from when all keyword args were gathered in the **kwargs dict) in case someone explictly @@ -962,7 +962,7 @@ def __init__( *, encoding: str = "utf-8", encodingErrors: str = "strict", - shp: Union[__NoShpSentinel, Optional[BinaryFileT]] = __NoShpSentinel(), + shp: Union[_NoShpSentinel, Optional[BinaryFileT]] = _NoShpSentinel(), shx: Optional[BinaryFileT] = None, dbf: Optional[BinaryFileT] = None, **kwargs, @@ -1122,7 +1122,7 @@ def __init__( self.load(path) return - if not isinstance(shp, __NoShpSentinel): + if not isinstance(shp, _NoShpSentinel): self.shp = self.__seek_0_on_file_obj_wrap_or_open_from_name("shp", shp) self.shx = self.__seek_0_on_file_obj_wrap_or_open_from_name("shx", shx) From 701e9b0a853e22abb67954588ae5926ead0c7852 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 22 Jul 2025 23:56:51 +0100 Subject: [PATCH 029/220] Replace most "%s" % (,...) and "{}".format with f-strings Some %d date formatting strings remain --- shapefile.py | 114 ++++++++++++++++++++------------------------------- 1 file changed, 44 insertions(+), 70 deletions(-) diff --git a/shapefile.py b/shapefile.py index be7d1e74..fe584689 100644 --- a/shapefile.py +++ b/shapefile.py @@ -574,9 +574,7 @@ def __geo_interface__(self) -> GeoJsonShapeT: # if VERBOSE is True, issue detailed warning about any shape errors # encountered during the Shapefile to GeoJSON conversion if VERBOSE and self._errors: - header = "Possible issue encountered when converting Shape #{} to GeoJSON: ".format( - self.oid - ) + header = f"Possible issue encountered when converting Shape #{self.oid} to GeoJSON: " orphans = self._errors.get("polygon_orphaned_holes", None) if orphans: msg = ( @@ -605,8 +603,7 @@ def __geo_interface__(self) -> GeoJsonShapeT: else: raise Exception( - 'Shape type "%s" cannot be represented as GeoJSON.' - % SHAPETYPE_LOOKUP[self.shapeType] + f'Shape type "{SHAPETYPE_LOOKUP[self.shapeType]}" cannot be represented as GeoJSON.' ) @staticmethod @@ -630,7 +627,7 @@ def _from_geojson(geoj) -> Shape: elif geojType == "MultiPolygon": shapeType = POLYGON else: - raise Exception("Cannot create Shape from GeoJSON type '%s'" % geojType) + raise Exception(f"Cannot create Shape from GeoJSON type '{geojType}'") shape.shapeType = shapeType # set points and parts @@ -990,8 +987,7 @@ def __init__( if path.count(".zip") > 1: # Multiple nested zipfiles raise ShapefileException( - "Reading from multiple nested zipfiles is not supported: %s" - % path + f"Reading from multiple nested zipfiles is not supported: {path}" ) # Split into zipfile and shapefile paths if path.endswith(".zip"): @@ -1041,9 +1037,8 @@ def __init__( shapefile = shapefiles[0] else: raise ShapefileException( - "Zipfile contains more than one shapefile: %s. Please specify the full \ - path to the shapefile you would like to open." - % shapefiles + f"Zipfile contains more than one shapefile: {shapefiles}. " + "Please specify the full path to the shapefile you would like to open." ) # Try to extract file-like objects from zipfile shapefile = os.path.splitext(shapefile)[ @@ -1075,7 +1070,7 @@ def __init__( return else: raise ShapefileException( - "No shp or dbf file found in zipfile: %s" % path + f"No shp or dbf file found in zipfile: {path}" ) elif path.startswith("http"): @@ -1113,7 +1108,7 @@ def __init__( return else: raise ShapefileException( - "No shp or dbf file found at url: %s" % path + f"No shp or dbf file found at url: {path}" ) else: @@ -1166,9 +1161,7 @@ def __str__(self): info = ["shapefile Reader"] if self.shp: info.append( - " {} shapes (type '{}')".format( - len(self), SHAPETYPE_LOOKUP[self.shapeType] - ) + f" {len(self)} shapes (type '{SHAPETYPE_LOOKUP[self.shapeType]}')" ) if self.dbf: info.append(f" {len(self)} records ({len(self.fields)} fields)") @@ -1425,18 +1418,18 @@ def __shape(self, oid=None, bbox=None): nPoints = unpack("= 16: @@ -1444,7 +1437,7 @@ def __shape(self, oid=None, bbox=None): # Measure values less than -10e38 are nodata values according to the spec if next - f.tell() >= nPoints * 8: record.m = [] - for m in _Array("d", unpack("<%sd" % nPoints, f.read(nPoints * 8))): + for m in _Array("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))): if m > NODATA: record.m.append(m) else: @@ -1552,9 +1545,7 @@ def shape(self, i=0, bbox=None): # If the index was not found, it likely means the .shp file is incomplete if _i != i: raise ShapefileException( - "Shape index {} is out of bounds; the .shp file only contains {} shapes".format( - i, _i - ) + f"Shape index {i} is out of bounds; the .shp file only contains {_i} shapes" ) # Seek to the offset and read the shape @@ -1666,14 +1657,14 @@ def __recordFmt(self, fields=None): """ if self.numRecords is None: self.__dbfHeader() - structcodes = ["%ds" % fieldinfo[2] for fieldinfo in self.fields] + structcodes = [f"{fieldinfo[2]}s" for fieldinfo in self.fields] if fields is not None: # only unpack specified fields, ignore others using padbytes (x) structcodes = [ code if fieldinfo[0] in fields or fieldinfo[0] == "DeletionFlag" # always unpack delflag - else "%dx" % fieldinfo[2] + else f"{fieldinfo[2]}x" for fieldinfo, code in zip(self.fields, structcodes) ] fmt = "".join(structcodes) @@ -1738,10 +1729,8 @@ def __record(self, fieldTuples, recLookup, recStruct, oid=None): # check that values match fields if len(fieldTuples) != len(recordContents): raise ShapefileException( - "Number of record values ({}) is different from the requested \ - number of fields ({})".format( - len(recordContents), len(fieldTuples) - ) + f"Number of record values ({len(recordContents)}) is different from the requested " + f"number of fields ({len(fieldTuples)})" ) # parse each value @@ -1868,8 +1857,7 @@ def iterRecords(self, fields=None, start=0, stop=None): stop = self.numRecords elif abs(stop) > self.numRecords: raise IndexError( - "abs(stop): %s exceeds number of records: %s." - % (abs(stop), self.numRecords) + f"abs(stop): {abs(stop)} exceeds number of records: {self.numRecords}." ) elif stop < 0: stop = range(self.numRecords)[stop] @@ -1960,9 +1948,7 @@ def __init__( target = pathlike_obj(target) if not is_string(target): raise Exception( - "The target filepath {} must be of type str/unicode or path-like, not {}.".format( - repr(target), type(target) - ) + f"The target filepath {target!r} must be of type str/unicode or path-like, not {type(target)}." ) self.shp = self.__getFileObj(os.path.splitext(target)[0] + ".shp") self.shx = self.__getFileObj(os.path.splitext(target)[0] + ".shx") @@ -2032,8 +2018,8 @@ def close(self): if self.recNum != self.shpNum: raise ShapefileException( "When saving both the dbf and shp file, " - "the number of records (%s) must correspond " - "with the number of shapes (%s)" % (self.recNum, self.shpNum) + f"the number of records ({self.recNum}) must correspond " + f"with the number of shapes ({self.shpNum})" ) # Fill in the blank headers if self.shp and shp_open: @@ -2105,8 +2091,8 @@ def __bbox(self, s): # any shape that is not null should have at least one point, and only those should be sent here. # could also mean that earlier code failed to add points to a non-null shape. raise Exception( - "Cannot create bbox. Expected a valid shape with at least one point. Got a shape of type '%s' and 0 points." - % s.shapeType + "Cannot create bbox. Expected a valid shape with at least one point. " + f"Got a shape of type '{s.shapeType}' and 0 points." ) bbox = [min(x), min(y), max(x), max(y)] # update global @@ -2326,8 +2312,8 @@ def __shpRecord(self, s): self.shapeType = s.shapeType if s.shapeType != NULL and s.shapeType != self.shapeType: raise Exception( - "The shape's type (%s) must match the type of the shapefile (%s)." - % (s.shapeType, self.shapeType) + f"The shape's type ({s.shapeType}) must match " + f"the type of the shapefile ({self.shapeType})." ) f.write(pack(" 2 else 0)) for p in s.points] except error: raise ShapefileException( - "Failed to write elevation values for record %s. Expected floats." - % self.shpNum + f"Failed to write elevation values for record {self.shpNum}. Expected floats." ) # Write m extremes and values # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA @@ -2398,8 +2380,7 @@ def __shpRecord(self, s): f.write(pack("<2d", *self.__mbox(s))) except error: raise ShapefileException( - "Failed to write measure extremes for record %s. Expected floats" - % self.shpNum + f"Failed to write measure extremes for record {self.shpNum}. Expected floats" ) try: if hasattr(s, "m"): @@ -2407,7 +2388,7 @@ def __shpRecord(self, s): # fmt: off f.write( pack( - "<%sd" % len(s.m), + f"<{len(s.m)}d", *[m if m is not None else NODATA for m in s.m] ) ) @@ -2429,8 +2410,7 @@ def __shpRecord(self, s): ] except error: raise ShapefileException( - "Failed to write measure values for record %s. Expected floats" - % self.shpNum + f"Failed to write measure values for record {self.shpNum}. Expected floats" ) # Write a single point if s.shapeType in (1, 11, 21): @@ -2438,8 +2418,7 @@ def __shpRecord(self, s): f.write(pack("<2d", s.points[0][0], s.points[0][1])) except error: raise ShapefileException( - "Failed to write point for record %s. Expected floats." - % self.shpNum + f"Failed to write point for record {self.shpNum}. Expected floats." ) # Write a single Z value # Note: missing z values are autoset to 0, but not sure if this is ideal. @@ -2455,8 +2434,7 @@ def __shpRecord(self, s): f.write(pack(" Date: Wed, 23 Jul 2025 01:25:54 +0100 Subject: [PATCH 030/220] Update shapefile.py --- shapefile.py | 78 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 20 deletions(-) diff --git a/shapefile.py b/shapefile.py index fe584689..c27b21de 100644 --- a/shapefile.py +++ b/shapefile.py @@ -103,6 +103,9 @@ # File name, file object or anything with a read() method that returns bytes. # TODO: Create simple Protocol with a read() method BinaryFileT = Union[str, IO[bytes]] +BinaryFileStreamT = Union[IO[bytes], io.BytesIO] + +RecordValue = Union[float, str, date] class GeoJsonShapeT(TypedDict): @@ -717,7 +720,12 @@ class _Record(list): >>> print(r.ID) """ - def __init__(self, field_positions, values, oid=None): + def __init__( + self, + field_positions: dict[str, int], + values: Iterable[RecordValue], + oid: Optional[int] = None, + ): """ A Record should be created by the Reader class @@ -732,7 +740,7 @@ def __init__(self, field_positions, values, oid=None): self.__oid = -1 list.__init__(self, values) - def __getattr__(self, item): + def __getattr__(self, item: str) -> RecordValue: """ __getattr__ is called if an attribute is used that does not exist in the normal sense. For example r=Record(...), r.ID @@ -755,7 +763,7 @@ def __getattr__(self, item): f"{item} found as a field but not enough values available." ) - def __setattr__(self, key, value): + def __setattr__(self, key: str, value: RecordValue): """ Sets a value of a field attribute :param key: The field name @@ -811,11 +819,11 @@ def __setitem__(self, key, value): raise IndexError(f"{key} is not a field name and not an int") @property - def oid(self): + def oid(self) -> int: """The index position of the record in the original shapefile""" return self.__oid - def as_dict(self, date_strings=False): + def as_dict(self, date_strings: bool = False) -> dict[str, RecordValue]: """ Returns this Record as a dictionary using the field names as keys :return: dict @@ -830,7 +838,7 @@ def as_dict(self, date_strings=False): def __repr__(self): return f"Record #{self.__oid}: {list(self)}" - def __dir__(self): + def __dir__(self) -> list[str]: """ Helps to show the field names in an interactive environment like IPython. See: http://ipython.readthedocs.io/en/stable/config/integrating.html @@ -856,7 +864,7 @@ class ShapeRecord: """A ShapeRecord object containing a shape along with its attributes. Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" - def __init__(self, shape=None, record=None): + def __init__(self, shape: Optional[Shape] = None, record: Optional[_Record] = None): self.shape = shape self.record = record @@ -967,12 +975,12 @@ def __init__( self.shp = None self.shx = None self.dbf = None - self._files_to_close: list[IO[bytes]] = [] + self._files_to_close: list[BinaryFileStreamT] = [] self.shapeName = "Not specified" self._offsets: list[int] = [] - self.shpLength = None - self.numRecords = None - self.numShapes = None + self.shpLength: Optional[int] = None + self.numRecords: Optional[int] = None + self.numShapes: Optional[int] = None self.fields: list[list[str]] = [] self.__dbfHdrLength = 0 self.__fieldLookup: dict[str, int] = {} @@ -1131,7 +1139,7 @@ def __seek_0_on_file_obj_wrap_or_open_from_name( self, ext: str, file_: Optional[BinaryFileT], - ) -> Union[None, io.BytesIO, IO[bytes]]: + ) -> Union[None, IO[bytes]]: # assert ext in {'shp', 'dbf', 'shx'} self._assert_ext_is_supported(ext) @@ -1615,6 +1623,7 @@ def __dbfHeader(self): self.numRecords, self.__dbfHdrLength, self.__recordLength = unpack( " Optional[_Record]: """Reads and returns a dbf record row as a list of values. Requires specifying a list of field info tuples 'fieldTuples', a record name-index dict 'recLookup', and a Struct instance 'recStruct' for unpacking these fields. @@ -1801,7 +1816,9 @@ def __record(self, fieldTuples, recLookup, recStruct, oid=None): return _Record(recLookup, record, oid) - def record(self, i=0, fields=None): + def record( + self, i: int = 0, fields: Optional[list[str]] = None + ) -> Optional[_Record]: """Returns a specific dbf record based on the supplied index. To only read some of the fields, specify the 'fields' arg as a list of one or more fieldnames. @@ -1818,7 +1835,7 @@ def record(self, i=0, fields=None): oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct ) - def records(self, fields=None): + def records(self, fields: Optional[list[str]] = None) -> list[_Record]: """Returns all records in a dbf file. To only read some of the fields, specify the 'fields' arg as a list of one or more fieldnames. @@ -1829,7 +1846,7 @@ def records(self, fields=None): f = self.__getFileObj(self.dbf) f.seek(self.__dbfHdrLength) fieldTuples, recLookup, recStruct = self.__recordFields(fields) - for i in range(self.numRecords): + for i in range(self.numRecords): # type: ignore r = self.__record( oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct ) @@ -1837,7 +1854,12 @@ def records(self, fields=None): records.append(r) return records - def iterRecords(self, fields=None, start=0, stop=None): + def iterRecords( + self, + fields=Optional[list[str]], + start: int = 0, + stop: Optional[int] = None, + ) -> Iterator[Optional[_Record]]: """Returns a generator of records in a dbf file. Useful for large shapefiles or dbf files. To only read some of the fields, specify the 'fields' arg as a @@ -1851,6 +1873,8 @@ def iterRecords(self, fields=None, start=0, stop=None): """ if self.numRecords is None: self.__dbfHeader() + if not isinstance(self.numRecords, int): + raise Exception("Error when reading number of Records in dbf file header") f = self.__getFileObj(self.dbf) start = self.__restrictIndex(start) if stop is None: @@ -1871,7 +1895,12 @@ def iterRecords(self, fields=None, start=0, stop=None): if r: yield r - def shapeRecord(self, i=0, fields=None, bbox=None): + def shapeRecord( + self, + i: int = 0, + fields: Optional[list[str]] = None, + bbox: Optional[BBox] = None, + ) -> Optional[ShapeRecord]: """Returns a combination geometry and attribute record for the supplied record index. To only read some of the fields, specify the 'fields' arg as a @@ -1884,8 +1913,13 @@ def shapeRecord(self, i=0, fields=None, bbox=None): if shape: record = self.record(i, fields=fields) return ShapeRecord(shape=shape, record=record) + return None - def shapeRecords(self, fields=None, bbox=None): + def shapeRecords( + self, + fields: Optional[list[str]] = None, + bbox: Optional[BBox] = None, + ) -> ShapeRecords: """Returns a list of combination geometry/attribute records for all records in a shapefile. To only read some of the fields, specify the 'fields' arg as a @@ -1895,7 +1929,11 @@ def shapeRecords(self, fields=None, bbox=None): """ return ShapeRecords(self.iterShapeRecords(fields=fields, bbox=bbox)) - def iterShapeRecords(self, fields=None, bbox=None): + def iterShapeRecords( + self, + fields: Optional[list[str]] = None, + bbox: Optional[BBox] = None, + ) -> Iterator[ShapeRecord]: """Returns a generator of combination geometry/attribute records for all records in a shapefile. To only read some of the fields, specify the 'fields' arg as a From bab4f5fe0272bebbc57520fb9f871f15fb2a0b24 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 23 Jul 2025 01:31:08 +0100 Subject: [PATCH 031/220] Restore type annotation back to one, instead of default value --- shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shapefile.py b/shapefile.py index c27b21de..2ebca9a7 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1856,7 +1856,7 @@ def records(self, fields: Optional[list[str]] = None) -> list[_Record]: def iterRecords( self, - fields=Optional[list[str]], + fields: Optional[list[str]] = None, start: int = 0, stop: Optional[int] = None, ) -> Iterator[Optional[_Record]]: From bd8735ca2df3e9c97764bc42738120a75e073649 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 23 Jul 2025 12:50:40 +0100 Subject: [PATCH 032/220] Remove Python 2 specific steps and python-version input variable --- .github/actions/test/action.yml | 22 +------------------ .../workflows/run_tests_hooks_and_tools.yml | 1 - 2 files changed, 1 insertion(+), 22 deletions(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 0184dfe3..fb3a8365 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -17,9 +17,6 @@ inputs: description: Path to where the PyShp repo was checked out to (to keep separate from Shapefiles & artefacts repo). required: false default: '.' - python-version: - description: Set to "2.7" to use caddy instead of python -m SimpleHTTPServer - required: true @@ -50,7 +47,7 @@ runs: path: ./PyShp_test_shapefile - name: Serve shapefiles and zip file artefacts on localhost - if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' && inputs.python-version != '2.7'}} + if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' }} shell: bash working-directory: ./PyShp_test_shapefile run: | @@ -58,23 +55,6 @@ runs: echo "HTTP_SERVER_PID=$!" >> $GITHUB_ENV sleep 4 # give server time to start - - name: Download and unzip Caddy binary - if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' && inputs.python-version == '2.7'}} - working-directory: . - shell: bash - run: | - curl -L https://github.com/caddyserver/caddy/releases/download/v2.10.0/caddy_2.10.0_linux_amd64.tar.gz --output caddy.tar.gz - tar -xzf caddy.tar.gz - - - name: Serve shapefiles and zip file artefacts on localhost using Caddy - if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' && inputs.python-version == '2.7'}} - shell: bash - working-directory: . - run: | - ./caddy file-server --root ./PyShp_test_shapefile --listen :8000 & - echo "HTTP_SERVER_PID=$!" >> $GITHUB_ENV - sleep 2 # give server time to start - - name: Doctests shell: bash working-directory: ${{ inputs.pyshp_repo_directory }} diff --git a/.github/workflows/run_tests_hooks_and_tools.yml b/.github/workflows/run_tests_hooks_and_tools.yml index 42c981e1..cf3f223c 100644 --- a/.github/workflows/run_tests_hooks_and_tools.yml +++ b/.github/workflows/run_tests_hooks_and_tools.yml @@ -71,4 +71,3 @@ jobs: extra_args: '-m network' replace_remote_urls_with_localhost: 'yes' pyshp_repo_directory: ./Pyshp - python-version: ${{ matrix.python-version }} From bf16858cfbb8a6ba49f3ad7157d8f815a1ddfbf2 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 23 Jul 2025 14:23:31 +0100 Subject: [PATCH 033/220] Add and use custom build action --- .../actions/build_wheel_and_sdist/action.yml | 48 +++++++++++++++++++ .github/actions/test/action.yml | 45 ++++++++++++----- .github/workflows/deploy.yml | 26 +++++----- ...ools.yml => run_checks_build_and_test.yml} | 7 +++ 4 files changed, 101 insertions(+), 25 deletions(-) create mode 100644 .github/actions/build_wheel_and_sdist/action.yml rename .github/workflows/{run_tests_hooks_and_tools.yml => run_checks_build_and_test.yml} (89%) diff --git a/.github/actions/build_wheel_and_sdist/action.yml b/.github/actions/build_wheel_and_sdist/action.yml new file mode 100644 index 00000000..e01260d9 --- /dev/null +++ b/.github/actions/build_wheel_and_sdist/action.yml @@ -0,0 +1,48 @@ +name: + Build a Wheel and source distribution. + +description: + Run pyproject-build on the repo + + +runs: + using: "composite" + steps: + # The PyShp repo is required to already be checked out into ., + # e.g. by the calling workflow using: + # steps: + # - uses: actions/checkout@v4 + # + # and then calling this Action with: + # - name: Run tests + # uses: .github/actions/build_wheel_and_sdist + + # The Python to be tested with is required to already be setup, + # with "python" and "pip" on the system Path + # (so that this custom Action can be used with both reproducible + # Pythons from Python docker images, and more frequently deprecated Pythons + # from - uses: actions/setup-python@v5) + + - name: Install build (PyPA's pyproject-build) + shell: bash + run: | + python -m pip install --upgrade pip + python -m pip install build + - name: Build package + + - name: Show versions for logs. + shell: bash + run: | + python --version + python -m build --version + + - name: Build the default distributions (wheel & sdist, in dist/). + shell: bash + working-directory: ${{ inputs.pyshp_repo_directory }} + run: python -m build + + - name: Upload built distributions + uses: actions/upload-artifact@v4 + with: + name: PyShp_wheel_and_sdist + path: dist \ No newline at end of file diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index fb3a8365..276bccd5 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -24,12 +24,20 @@ runs: using: "composite" steps: # The PyShp repo is required to already be checked out into pyshp_repo_directory, + # and the wheel to be tested in ./dist within it. # e.g. by the calling workflow using: # steps: # - uses: actions/checkout@v4 # with: # path: ./Pyshp + # + # - name: Build distribution artefacts (wheel and sdist) + # uses: ./Pyshp/.github/actions/build_wheel_and_sdist + # with: + # pyshp_repo_directory: ./Pyshp + # # and then calling this Action with: + # # - name: Run tests # uses: ./Pyshp/.github/actions/test # with: @@ -37,7 +45,12 @@ runs: # replace_remote_urls_with_localhost: 'yes' # pyshp_repo_directory: ./Pyshp - # The Python to be tested with is required to already be setup, with "python" and "pip" on the system Path + # The Python to be tested with is required to already be setup, + # with "python" and "pip" on the system Path + # (so that this custom Action can be used with both reproducible + # Pythons from Python docker images, and more frequently deprecated Pythons + # from - uses: actions/setup-python@v5) + - name: Checkout shapefiles and zip file artefacts repo if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' }} @@ -62,26 +75,36 @@ runs: REPLACE_REMOTE_URLS_WITH_LOCALHOST: ${{ inputs.replace_remote_urls_with_localhost }} run: python shapefile.py ${{ inputs.extra_args }} - - name: Install test dependencies. + + - name: Download wheel and sdist (built in previous jobs) + uses: actions/download-artifact@v4 + with: + name: python_wheel_gcc_linux + path: dist + + - name: Install PyShp from the wheel (built in prev step) + shell: bash + working-directory: ${{ inputs.pyshp_repo_directory }}/dist + run: | + WHEEL_NAME=$(ls pyshp-*-py3-none-any.whl) + python -m pip install $WHEEL_NAME[test] + + - name: Show Python and Pytest versions for logs. shell: bash - working-directory: ${{ inputs.pyshp_repo_directory }} run: | - python -m pip install --upgrade pip - pip install -e .[test] + python --version + python -m pytest --version - - name: Pytest + - name: Run Pytest shell: bash working-directory: ${{ inputs.pyshp_repo_directory }} env: REPLACE_REMOTE_URLS_WITH_LOCALHOST: ${{ inputs.replace_remote_urls_with_localhost }} run: | + echo "Ensure the tests import the installed wheel" + mv shapefile.py shapefile_repo.py pytest -rA --tb=short ${{ inputs.extra_args }} - - name: Show versions for logs. - shell: bash - run: | - python --version - python -m pytest --version # - name: Test http server diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index c66adc89..0ddd8b71 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -13,16 +13,13 @@ on: types: [published] jobs: - test: - # In general, tests should be run after building a distribution, to test that distribution. - # However as long as PyShp is a pure Python library, with pure Python deps (or no deps) - # then this would only test the packaging process, not so much the code as there are - # no binaries. - uses: ./.github/workflows/run_tests_hooks_and_tools.yml + format_type_check_lint_build_and_test: + # Builds and uploads wheel and sdist + uses: ./.github/workflows/run_checks_build_and_test.yml deploy: - # Prevent deployment of releases that fail any hooks (e.g. linting) or that fail any tests. - needs: test + # Prevent deployment of releases that fail any tests or checks (e.g. linting). + needs: format_type_check_lint_build_and_test runs-on: ubuntu-latest steps: @@ -33,13 +30,13 @@ jobs: python-version: '3.x' + - name: Download wheel and sdist (built in previous job) + uses: actions/download-artifact@v4 + with: + name: PyShp_wheel_and_sdist + path: dist + - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install build - - name: Build package - run: python -m build - name: Publish package if: github.repository == 'GeospatialPython/pyshp' @@ -47,3 +44,4 @@ jobs: with: user: __token__ password: ${{ secrets.PYPI_INTEGRATION }} + packages-dir: dist/ diff --git a/.github/workflows/run_tests_hooks_and_tools.yml b/.github/workflows/run_checks_build_and_test.yml similarity index 89% rename from .github/workflows/run_tests_hooks_and_tools.yml rename to .github/workflows/run_checks_build_and_test.yml index cf3f223c..8d1b7ddb 100644 --- a/.github/workflows/run_tests_hooks_and_tools.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -30,7 +30,14 @@ jobs: run: | pylint --disable=R,C test_shapefile.py + build_wheel_and_sdist: + runs-on: ubuntu-latest + steps: + - name: Build wheel from the project repo + uses: ./Pyshp/.github/actions/build_wheel_and_sdist + test_on_supported_Pythons: + needs: build_wheel_and_sdist strategy: fail-fast: false matrix: From 83ea1a46eeb3c28648516aa81a28a8d6aa8d3fbe Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 23 Jul 2025 14:28:30 +0100 Subject: [PATCH 034/220] Check out repo and setup Python before build --- .github/actions/test/action.yml | 4 +--- .github/workflows/run_checks_build_and_test.yml | 3 +++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 276bccd5..8b08b60e 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -33,8 +33,6 @@ runs: # # - name: Build distribution artefacts (wheel and sdist) # uses: ./Pyshp/.github/actions/build_wheel_and_sdist - # with: - # pyshp_repo_directory: ./Pyshp # # and then calling this Action with: # @@ -84,7 +82,7 @@ runs: - name: Install PyShp from the wheel (built in prev step) shell: bash - working-directory: ${{ inputs.pyshp_repo_directory }}/dist + working-directory: dist/ run: | WHEEL_NAME=$(ls pyshp-*-py3-none-any.whl) python -m pip install $WHEEL_NAME[test] diff --git a/.github/workflows/run_checks_build_and_test.yml b/.github/workflows/run_checks_build_and_test.yml index 8d1b7ddb..f4c9dc1e 100644 --- a/.github/workflows/run_checks_build_and_test.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -33,6 +33,8 @@ jobs: build_wheel_and_sdist: runs-on: ubuntu-latest steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 - name: Build wheel from the project repo uses: ./Pyshp/.github/actions/build_wheel_and_sdist @@ -77,4 +79,5 @@ jobs: with: extra_args: '-m network' replace_remote_urls_with_localhost: 'yes' + # Checkout to ./PyShp, as the test job also needs to check out the artefact repo pyshp_repo_directory: ./Pyshp From 9f14db5f72a506671ef24fcf61e95d1f1612e068 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 23 Jul 2025 14:35:48 +0100 Subject: [PATCH 035/220] Fix path to custom build action --- .github/workflows/run_checks_build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_checks_build_and_test.yml b/.github/workflows/run_checks_build_and_test.yml index f4c9dc1e..022fecb6 100644 --- a/.github/workflows/run_checks_build_and_test.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -36,7 +36,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 - name: Build wheel from the project repo - uses: ./Pyshp/.github/actions/build_wheel_and_sdist + uses: .github/actions/build_wheel_and_sdist test_on_supported_Pythons: needs: build_wheel_and_sdist From d97ca10736229ebbfced02d68a8642204462a26c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 23 Jul 2025 14:36:48 +0100 Subject: [PATCH 036/220] Refix path to custom build action --- .github/workflows/run_checks_build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_checks_build_and_test.yml b/.github/workflows/run_checks_build_and_test.yml index 022fecb6..50adf282 100644 --- a/.github/workflows/run_checks_build_and_test.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -36,7 +36,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 - name: Build wheel from the project repo - uses: .github/actions/build_wheel_and_sdist + uses: ./.github/actions/build_wheel_and_sdist test_on_supported_Pythons: needs: build_wheel_and_sdist From ef5dba1cebb850500a6e2875b80aa413a56368f0 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 23 Jul 2025 14:38:51 +0100 Subject: [PATCH 037/220] Remove empty step --- .github/actions/build_wheel_and_sdist/action.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/actions/build_wheel_and_sdist/action.yml b/.github/actions/build_wheel_and_sdist/action.yml index e01260d9..8c2b3b8e 100644 --- a/.github/actions/build_wheel_and_sdist/action.yml +++ b/.github/actions/build_wheel_and_sdist/action.yml @@ -28,7 +28,6 @@ runs: run: | python -m pip install --upgrade pip python -m pip install build - - name: Build package - name: Show versions for logs. shell: bash From 2651884c7e7921775338f8e3163b4df3a488d156 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 23 Jul 2025 14:50:49 +0100 Subject: [PATCH 038/220] Create setup.cfg --- setup.cfg | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 setup.cfg diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..906abd3a --- /dev/null +++ b/setup.cfg @@ -0,0 +1,30 @@ +[metadata] +name = pyshp +version = attr: shapefile.__version__ +description = Pure Python read/write support for ESRI Shapefile format +long_description = file: README.md +long_description_content_type = text/markdown +author = Joel Lawhead +author_email = jlawhead@geospatialpython.com +maintainer = Karim Bahgat +maintainer_email = karim.bahgat.norway@gmail.com +url = https://github.com/GeospatialPython/pyshp +download_url = https://pypi.org/project/pyshp/ +license = MIT +license_files = LICENSE.TXT +keywords = gis, geospatial, geographic, shapefile, shapefiles +classifiers = + Development Status :: 5 - Production/Stable + Programming Language :: Python + Programming Language :: Python :: 2.7 + Programming Language :: Python :: 3 + Topic :: Scientific/Engineering :: GIS + Topic :: Software Development :: Libraries + Topic :: Software Development :: Libraries :: Python Modules + +[options] +py_modules = shapefile +python_requires = >=2.7 + +[bdist_wheel] +universal=1 From 24ae08e8f6d0d8927c02ba996ea96115b30b6203 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 23 Jul 2025 14:56:38 +0100 Subject: [PATCH 039/220] Correct name of upload artefact (to PyShp_wheel_and_sdist) and remove unsupported input variable --- .github/actions/test/action.yml | 2 +- .github/workflows/run_checks_build_and_test.yml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 8b08b60e..a65d902e 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -77,7 +77,7 @@ runs: - name: Download wheel and sdist (built in previous jobs) uses: actions/download-artifact@v4 with: - name: python_wheel_gcc_linux + name: PyShp_wheel_and_sdist path: dist - name: Install PyShp from the wheel (built in prev step) diff --git a/.github/workflows/run_checks_build_and_test.yml b/.github/workflows/run_checks_build_and_test.yml index 50adf282..80d80ee1 100644 --- a/.github/workflows/run_checks_build_and_test.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -72,7 +72,6 @@ jobs: uses: ./Pyshp/.github/actions/test with: pyshp_repo_directory: ./Pyshp - python-version: ${{ matrix.python-version }} - name: Network tests uses: ./Pyshp/.github/actions/test From 5d91192b67f77e82f9e9b163a0801614ea93a07c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 23 Jul 2025 15:03:24 +0100 Subject: [PATCH 040/220] Relax glob used to find wheel --- .github/actions/test/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index a65d902e..46777582 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -84,7 +84,7 @@ runs: shell: bash working-directory: dist/ run: | - WHEEL_NAME=$(ls pyshp-*-py3-none-any.whl) + WHEEL_NAME=$(ls pyshp-*py3-none-any.whl) python -m pip install $WHEEL_NAME[test] - name: Show Python and Pytest versions for logs. From d73866d80a472a2bf945b720c06f58c655dd0057 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 23 Jul 2025 15:09:44 +0100 Subject: [PATCH 041/220] Restore shapefile.py after renaming it, to ensure subsequent doctests in next step can find it --- .github/actions/test/action.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 46777582..622b9340 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -100,8 +100,9 @@ runs: REPLACE_REMOTE_URLS_WITH_LOCALHOST: ${{ inputs.replace_remote_urls_with_localhost }} run: | echo "Ensure the tests import the installed wheel" - mv shapefile.py shapefile_repo.py + mv shapefile.py __tmp.py pytest -rA --tb=short ${{ inputs.extra_args }} + mv __tmp.py shapefile.py From ed32cf6efc37141d55bc3ae08f3f71f5ee38fbe7 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 23 Jul 2025 15:16:17 +0100 Subject: [PATCH 042/220] Bump to 3.0.0 and supported versions to Python 3.9 --- setup.cfg | 5 ++--- shapefile.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/setup.cfg b/setup.cfg index 906abd3a..1a2d80cc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,15 +16,14 @@ keywords = gis, geospatial, geographic, shapefile, shapefiles classifiers = Development Status :: 5 - Production/Stable Programming Language :: Python - Programming Language :: Python :: 2.7 - Programming Language :: Python :: 3 + Programming Language :: Python :: 3.9 Topic :: Scientific/Engineering :: GIS Topic :: Software Development :: Libraries Topic :: Software Development :: Libraries :: Python Modules [options] py_modules = shapefile -python_requires = >=2.7 +python_requires = >=3.9 [bdist_wheel] universal=1 diff --git a/shapefile.py b/shapefile.py index 2ebca9a7..fe48f3e7 100644 --- a/shapefile.py +++ b/shapefile.py @@ -8,7 +8,7 @@ from __future__ import annotations -__version__ = "2.4.0" +__version__ = "3.0.0-alpha" import array import doctest From 13ba0b0782d7130014fd4e47dc2f1c13538c367c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 23 Jul 2025 15:25:41 +0100 Subject: [PATCH 043/220] Run on PRs to all branches. --- .github/workflows/run_checks_build_and_test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/run_checks_build_and_test.yml b/.github/workflows/run_checks_build_and_test.yml index 80d80ee1..3e9cadb3 100644 --- a/.github/workflows/run_checks_build_and_test.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -5,7 +5,6 @@ name: Run pre-commit hooks and tests on: push: pull_request: - branches: [ master, ] workflow_call: workflow_dispatch: From cecb3c1b24f99e13c99adcb07c6f3d42fd3eb926 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 23 Jul 2025 18:48:36 +0100 Subject: [PATCH 044/220] Switch date field "{}".format()s for f-strings --- shapefile.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/shapefile.py b/shapefile.py index fe48f3e7..1f8bbf87 100644 --- a/shapefile.py +++ b/shapefile.py @@ -2613,11 +2613,9 @@ def __dbfRecord(self, record): elif fieldType == "D": # date: 8 bytes - date stored as a string in the format YYYYMMDD. if isinstance(value, date): - value = "{:04d}{:02d}{:02d}".format( - value.year, value.month, value.day - ) + value = f"{value.year:04d}{value.month:02d}{value.day:02d}" elif isinstance(value, list) and len(value) == 3: - value = "{:04d}{:02d}{:02d}".format(*value) + value = f"{value[0]:04d}{value[1]:02d}{value[2]:02d}" elif value in MISSING: value = b"0" * 8 # QGIS NULL for date type elif is_string(value) and len(value) == 8: From d8198a01f528117d14cb4c5ffada6bd57ba00b13 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 10:40:30 +0100 Subject: [PATCH 045/220] Mark slow test --- pytest.ini | 1 + test_shapefile.py | 1 + 2 files changed, 2 insertions(+) diff --git a/pytest.ini b/pytest.ini index dbc031ba..39fbfaed 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,3 +1,4 @@ [pytest] markers = network: marks tests requiring network access + slow: marks other tests that cause bottlenecks diff --git a/test_shapefile.py b/test_shapefile.py index 5f9b855d..55793c4a 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -982,6 +982,7 @@ def test_record_oid(): assert shaperec.record.oid == i +@pytest.mark.slow def test_iterRecords_start_stop(): """ Assert that Reader.iterRecords(start, stop) From 0825aeb62f8e8bd2df43ee31c1df5554105f87d3 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 10:52:33 +0100 Subject: [PATCH 046/220] Allow doctests to also be run from test_shapefile.py --- shapefile.py | 6 +++++- test_shapefile.py | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/shapefile.py b/shapefile.py index 1f8bbf87..2582e56d 100644 --- a/shapefile.py +++ b/shapefile.py @@ -2979,10 +2979,14 @@ def summarize(self): return failure_count -if __name__ == "__main__": +def main(): """ Doctests are contained in the file 'README.md', and are tested using the built-in testing libraries. """ failure_count = _test() sys.exit(failure_count) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_shapefile.py b/test_shapefile.py index 55793c4a..5db487d6 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -1851,3 +1851,9 @@ def test_write_empty_shapefile(tmpdir, shape_type): assert len(r.records()) == 0 # test shapes are empty assert len(r.shapes()) == 0 + + +# This allows a PyShp wheel installed in the env to be tested +# against the doctests. +if __name__ == "__main__": + shapefile.main() \ No newline at end of file From 62519c05667cb0728cc7b976f321dc50b99ea9cf Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 10:54:34 +0100 Subject: [PATCH 047/220] Use the test_shapefile.py doctests entrypoint to test wheels in CI --- .github/actions/test/action.yml | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 622b9340..61dfe13c 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -66,14 +66,6 @@ runs: echo "HTTP_SERVER_PID=$!" >> $GITHUB_ENV sleep 4 # give server time to start - - name: Doctests - shell: bash - working-directory: ${{ inputs.pyshp_repo_directory }} - env: - REPLACE_REMOTE_URLS_WITH_LOCALHOST: ${{ inputs.replace_remote_urls_with_localhost }} - run: python shapefile.py ${{ inputs.extra_args }} - - - name: Download wheel and sdist (built in previous jobs) uses: actions/download-artifact@v4 with: @@ -87,6 +79,13 @@ runs: WHEEL_NAME=$(ls pyshp-*py3-none-any.whl) python -m pip install $WHEEL_NAME[test] + - name: Doctests + shell: bash + working-directory: ${{ inputs.pyshp_repo_directory }} + env: + REPLACE_REMOTE_URLS_WITH_LOCALHOST: ${{ inputs.replace_remote_urls_with_localhost }} + run: python test_shapefile.py ${{ inputs.extra_args }} + - name: Show Python and Pytest versions for logs. shell: bash run: | From e4567ee107006f390fe822391accf047bc6ce688 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 10:56:57 +0100 Subject: [PATCH 048/220] Add new line at end of files --- shapefile.py | 2 +- test_shapefile.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/shapefile.py b/shapefile.py index 2582e56d..4d5ad68f 100644 --- a/shapefile.py +++ b/shapefile.py @@ -2989,4 +2989,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/test_shapefile.py b/test_shapefile.py index 5db487d6..04994af8 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -1856,4 +1856,4 @@ def test_write_empty_shapefile(tmpdir, shape_type): # This allows a PyShp wheel installed in the env to be tested # against the doctests. if __name__ == "__main__": - shapefile.main() \ No newline at end of file + shapefile.main() From 2ba7f42b7dd0e25c7c75b6f8052c975d801e5547 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 11:00:27 +0100 Subject: [PATCH 049/220] Update README.md --- README.md | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index caf5f339..a8ed822e 100644 --- a/README.md +++ b/README.md @@ -1446,7 +1446,7 @@ ESRI White Paper](http://downloads.esri.com/support/whitepapers/ao_/J9749_MultiP The testing framework is pytest, and the tests are located in test_shapefile.py. This includes an extensive set of unit tests of the various pyshp features, and tests against various input data. -In the same folder as README.md and shapefile.py, from the command line run +In the same folder as README.md and shapefile.py, from the command line run: ```shell python -m pytest @@ -1455,12 +1455,21 @@ python -m pytest Additionally, all the code and examples located in this file, README.md, is tested and verified with the builtin doctest framework. A special routine for invoking the doctest is run when calling directly on shapefile.py. -In the same folder as README.md and shapefile.py, from the command line run +In the same folder as README.md and shapefile.py, from the command line run: ```shell python shapefile.py ``` +This tests the code inside shapefile.py itself. To test an installed PyShp wheel against +the doctests, the same special routine can be invoked (in an env with the wheel and pytest +installed) from the test file: + + +```shell +python test_shapefile.py +``` + Linux/Mac and similar platforms may need to run `$ dos2unix README.md` in order to correct line endings in README.md, if Git has not automatically changed them. @@ -1497,8 +1506,6 @@ REPLACE_REMOTE_URLS_WITH_LOCALHOST=yes && python shapefile.py The network tests alone can also be run (without also running all the tests that don't make network requests) using: `pytest -m network` (or the doctests using: `python shapefile.py -m network`). -(*) The steps to host the files using Caddy for PYthon 2 are in ./actions/test/action.yml. For reasons as -yet unknown, shapefile.py's Reader class in Python 2 Pytest, can't connect to a Python 2 SimpleHTTPServer. # Contributors From d17387df8cfc3ef12a9264e00e53a3a9a51eb58d Mon Sep 17 00:00:00 2001 From: Mike Taves Date: Tue, 22 Jul 2025 21:51:18 +1200 Subject: [PATCH 050/220] Change to hatchling build backend, and move to src layout --- .github/workflows/run_checks_build_and_test.yml | 1 + MANIFEST.in | 2 -- pyproject.toml | 15 +++++++++++---- shapefile.py => src/shapefile.py | 0 4 files changed, 12 insertions(+), 6 deletions(-) delete mode 100644 MANIFEST.in rename shapefile.py => src/shapefile.py (100%) diff --git a/.github/workflows/run_checks_build_and_test.yml b/.github/workflows/run_checks_build_and_test.yml index 3e9cadb3..bbafa088 100644 --- a/.github/workflows/run_checks_build_and_test.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -25,6 +25,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pytest pylint pylint-per-file-ignores + pip install -e . - name: run Pylint for errors and warnings only, on test_shapefile.py run: | pylint --disable=R,C test_shapefile.py diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 19d54f29..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -include *.md *.txt *.TXT -recursive-include shapefiles *.dbf *.sbn *.sbx *.shp *.shx diff --git a/pyproject.toml b/pyproject.toml index 945c86c0..af799f78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] -requires = ["setuptools"] -build-backend = "setuptools.build_meta" +requires = ["hatchling"] +build-backend = "hatchling.build" [project] name = "pyshp" @@ -33,8 +33,15 @@ test = ["pytest"] [project.urls] Repository = "https://github.com/GeospatialPython/pyshp" -[tool.setuptools.dynamic] -version = {attr = "shapefile.__version__"} +[tool.hatch.build.targets.sdist] +only-include = ["src", "shapefiles", "test_shapefile.py"] + +[tool.hatch.build.targets.wheel] +only-include = ["src"] +sources = {"src" = ""} # move from "src" directory for wheel + +[tool.hatch.version] +path = "src/shapefile.py" [tool.ruff] # Exclude a variety of commonly ignored directories. diff --git a/shapefile.py b/src/shapefile.py similarity index 100% rename from shapefile.py rename to src/shapefile.py From f9834623dc6b49b1b677de3bae4518802fd670e0 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 23 Jul 2025 15:34:34 +0100 Subject: [PATCH 051/220] Don't rename shapefile.py. It can't be imported from src now anyway. --- .github/actions/test/action.yml | 6 +----- setup.cfg | 29 ----------------------------- 2 files changed, 1 insertion(+), 34 deletions(-) delete mode 100644 setup.cfg diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 61dfe13c..4a2fd996 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -97,11 +97,7 @@ runs: working-directory: ${{ inputs.pyshp_repo_directory }} env: REPLACE_REMOTE_URLS_WITH_LOCALHOST: ${{ inputs.replace_remote_urls_with_localhost }} - run: | - echo "Ensure the tests import the installed wheel" - mv shapefile.py __tmp.py - pytest -rA --tb=short ${{ inputs.extra_args }} - mv __tmp.py shapefile.py + run: pytest -rA --tb=short ${{ inputs.extra_args }} diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 1a2d80cc..00000000 --- a/setup.cfg +++ /dev/null @@ -1,29 +0,0 @@ -[metadata] -name = pyshp -version = attr: shapefile.__version__ -description = Pure Python read/write support for ESRI Shapefile format -long_description = file: README.md -long_description_content_type = text/markdown -author = Joel Lawhead -author_email = jlawhead@geospatialpython.com -maintainer = Karim Bahgat -maintainer_email = karim.bahgat.norway@gmail.com -url = https://github.com/GeospatialPython/pyshp -download_url = https://pypi.org/project/pyshp/ -license = MIT -license_files = LICENSE.TXT -keywords = gis, geospatial, geographic, shapefile, shapefiles -classifiers = - Development Status :: 5 - Production/Stable - Programming Language :: Python - Programming Language :: Python :: 3.9 - Topic :: Scientific/Engineering :: GIS - Topic :: Software Development :: Libraries - Topic :: Software Development :: Libraries :: Python Modules - -[options] -py_modules = shapefile -python_requires = >=3.9 - -[bdist_wheel] -universal=1 From 8f01e97b766122214fafea7546d2d1b75ad88253 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 12:35:08 +0100 Subject: [PATCH 052/220] Update .gitignore --- .gitignore | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index d1734202..41085890 100644 --- a/.gitignore +++ b/.gitignore @@ -22,5 +22,8 @@ dist/ .vscode .dmypy.json .python-version -.venv -venv +.venv/ +venv/ +.mypy_cache/ +.pytest_cache/ +.ruff_cache/ From e2fb8c8d90fadea9babaaa63c5c50ee24a14acd4 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 12:35:12 +0100 Subject: [PATCH 053/220] Update changelog.txt --- changelog.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/changelog.txt b/changelog.txt index be977915..c15a4141 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,18 @@ +VERSION 3.0.0-alpha + +Python 2 and Python 3.8 support dropped + +2025-07-22 + Code quality + * Type hints + * f-strings + * Remove Python 2 specific functions. + * Run doctests against wheels. + * Testing of wheels before publishing them + * pyproject.toml src layout + * Slow test marked. + + VERSION 2.4.0 2025-07-21 From e77950189796655a41028d6cd0d581eb34012cfb Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 12:35:30 +0100 Subject: [PATCH 054/220] Update run_checks_build_and_test.yml --- .github/workflows/run_checks_build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_checks_build_and_test.yml b/.github/workflows/run_checks_build_and_test.yml index bbafa088..931a625f 100644 --- a/.github/workflows/run_checks_build_and_test.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -28,7 +28,7 @@ jobs: pip install -e . - name: run Pylint for errors and warnings only, on test_shapefile.py run: | - pylint --disable=R,C test_shapefile.py + pylint --disable=R,C test_shapefile.py src/shapefile.py build_wheel_and_sdist: runs-on: ubuntu-latest From d7823715372e2b2fb573553c3da880154307d463 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 12:57:57 +0100 Subject: [PATCH 055/220] Delete unreachable else clause --- src/shapefile.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 4d5ad68f..923a4c77 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -277,6 +277,10 @@ def ring_contains_point(coords: list[Coord], p: Point2D) -> bool: return inside_flag +class RingSamplingError(Exception): + pass + + def ring_sample(coords: list[Coord], ccw: bool = False) -> Point2D: """Return a sample point guaranteed to be within a ring, by efficiently finding the first centroid of a coordinate triplet whose orientation @@ -320,8 +324,11 @@ def itercoords(): # remove oldest triplet coord to allow iterating to next triplet triplet.pop(0) - else: - raise Exception("Unexpected error: Unable to find a ring sample point.") + raise RingSamplingError( + f"Unexpected error: Unable to find a ring sample point in: {coords}." + "Ensure the ring's coordinates are oriented clockwise, " + "and ensure the area enclosed is non-zero. " + ) def ring_contains_ring(coords1: list[Coord], coords2: list[Point2D]) -> bool: @@ -544,9 +551,7 @@ def __geo_interface__(self) -> GeoJsonShapeT: # coordinates.append([tuple(p) for p in self.points[ps:part]]) coordinates.append([p for p in self.points[ps:part]]) ps = part - else: - # coordinates.append([tuple(p) for p in self.points[part:]]) - coordinates.append([p for p in self.points[part:]]) + return {"type": "MultiLineString", "coordinates": coordinates} elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: if len(self.parts) == 0: From 78684e9d5675379da13091fcfa1d38948e2ec590 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 13:01:02 +0100 Subject: [PATCH 056/220] Add (and use) specific GeoJSON Exception --- src/shapefile.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 923a4c77..3a960b4f 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -19,7 +19,6 @@ import tempfile import time import zipfile -from collections.abc import Collection from datetime import date from struct import Struct, calcsize, error, pack, unpack from typing import IO, Any, Iterable, Iterator, Optional, Reversible, TypedDict, Union @@ -465,6 +464,10 @@ def organize_polygon_rings( return polys +class GeoJSON_Error(Exception): + pass + + class Shape: def __init__( self, @@ -610,7 +613,7 @@ def __geo_interface__(self) -> GeoJsonShapeT: return {"type": "MultiPolygon", "coordinates": polys} else: - raise Exception( + raise GeoJSON_Error( f'Shape type "{SHAPETYPE_LOOKUP[self.shapeType]}" cannot be represented as GeoJSON.' ) @@ -635,7 +638,7 @@ def _from_geojson(geoj) -> Shape: elif geojType == "MultiPolygon": shapeType = POLYGON else: - raise Exception(f"Cannot create Shape from GeoJSON type '{geojType}'") + raise GeoJSON_Error(f"Cannot create Shape from GeoJSON type '{geojType}'") shape.shapeType = shapeType # set points and parts From b98a82a5b8c5549a2f88b40b85a1695522989be6 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 14:10:02 +0100 Subject: [PATCH 057/220] Reformat --- src/shapefile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 3a960b4f..d295e261 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -325,8 +325,8 @@ def itercoords(): raise RingSamplingError( f"Unexpected error: Unable to find a ring sample point in: {coords}." - "Ensure the ring's coordinates are oriented clockwise, " - "and ensure the area enclosed is non-zero. " + "Ensure the ring's coordinates are oriented clockwise, " + "and ensure the area enclosed is non-zero. " ) From 4f1a850498198e166fb6b9b4f96f98d768b5d35d Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 14:24:27 +0100 Subject: [PATCH 058/220] Restore code in else clause in Shape.__ geo_interface__ --- src/shapefile.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index d295e261..367b0b5c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -326,7 +326,7 @@ def itercoords(): raise RingSamplingError( f"Unexpected error: Unable to find a ring sample point in: {coords}." "Ensure the ring's coordinates are oriented clockwise, " - "and ensure the area enclosed is non-zero. " + "and ensure the area enclosed is non-zero. " ) @@ -555,6 +555,9 @@ def __geo_interface__(self) -> GeoJsonShapeT: coordinates.append([p for p in self.points[ps:part]]) ps = part + # coordinates.append([tuple(p) for p in self.points[part:]]) + coordinates.append([p for p in self.points[part:]]) + return {"type": "MultiLineString", "coordinates": coordinates} elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: if len(self.parts) == 0: From e365093ef5da30dca6d48a2464931488b5784149 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 15:41:29 +0100 Subject: [PATCH 059/220] Suppress Pylint undefined loop variable and consider raise from warnings --- .../workflows/run_checks_build_and_test.yml | 1 + src/shapefile.py | 31 +++++++++++++------ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/.github/workflows/run_checks_build_and_test.yml b/.github/workflows/run_checks_build_and_test.yml index 931a625f..88ffe5a3 100644 --- a/.github/workflows/run_checks_build_and_test.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -27,6 +27,7 @@ jobs: pip install pytest pylint pylint-per-file-ignores pip install -e . - name: run Pylint for errors and warnings only, on test_shapefile.py + continue-on-error: true run: | pylint --disable=R,C test_shapefile.py src/shapefile.py diff --git a/src/shapefile.py b/src/shapefile.py index 367b0b5c..3fd192bc 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -556,8 +556,8 @@ def __geo_interface__(self) -> GeoJsonShapeT: ps = part # coordinates.append([tuple(p) for p in self.points[part:]]) - coordinates.append([p for p in self.points[part:]]) - + coordinates.append([p for p in self.points[part:]]) # pylint: disable=undefined-loop-variable + return {"type": "MultiLineString", "coordinates": coordinates} elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: if len(self.parts) == 0: @@ -762,17 +762,19 @@ def __getattr__(self, item: str) -> RecordValue: and IndexError, if the field exists but the field's corresponding value in the Record does not exist """ + # pylint: disable=raise-missing-from try: if item == "__setstate__": # Prevent infinite loop from copy.deepcopy() raise AttributeError("_Record does not implement __setstate__") index = self.__field_positions[item] return list.__getitem__(self, index) except KeyError: - raise AttributeError(f"{item} is not a field name") + raise AttributeError(f"{item} is not a field name") except IndexError: raise IndexError( f"{item} found as a field but not enough values available." ) + # pylint: enable=raise-missing-from def __setattr__(self, key: str, value: RecordValue): """ @@ -788,7 +790,7 @@ def __setattr__(self, key: str, value: RecordValue): index = self.__field_positions[key] return list.__setitem__(self, index, value) except KeyError: - raise AttributeError(f"{key} is not a field name") + raise AttributeError(f"{key} is not a field name") # pylint: disable=raise-missing-from def __getitem__(self, item): """ @@ -827,7 +829,7 @@ def __setitem__(self, key, value): if index is not None: return list.__setitem__(self, index, value) else: - raise IndexError(f"{key} is not a field name and not an int") + raise IndexError(f"{key} is not a field name and not an int") # pylint: disable=raise-missing-from @property def oid(self) -> int: @@ -931,8 +933,6 @@ def __geo_interface__(self): class ShapefileException(Exception): """An exception to handle shapefile specific problems.""" - pass - class _NoShpSentinel(object): """For use as a default value for shp to preserve the @@ -941,9 +941,6 @@ class _NoShpSentinel(object): called Reader(shp=None) to load self.shx. """ - pass - - class Reader: """Reads the three files of a shapefile as a unit or separately. If one of the three files (.shp, .shx, @@ -1409,6 +1406,8 @@ def __shpHeader(self): def __shape(self, oid=None, bbox=None): """Returns the header info and geometry for a single shape.""" + + # pylint: disable=attribute-defined-outside-init f = self.__getFileObj(self.shp) record = Shape(oid=oid) nParts = nPoints = zmin = zmax = mmin = mmax = None @@ -1487,6 +1486,7 @@ def __shape(self, oid=None, bbox=None): record.m = [m] else: record.m = [None] + # pylint: enable=attribute-defined-outside-init # Seek to the end of this record as defined by the record header because # the shapefile spec doesn't require the actual content to meet the header # definition. Probably allowed for lazy feature deletion. @@ -2224,6 +2224,8 @@ def __shapefileHeader(self, fileObj, headerType="shp"): """Writes the specified header type to the specified file-like object. Several of the shapefile formats are so similar that a single generic method to read or write them is warranted.""" + + # pylint: disable=raise-missing-from f = self.__getFileObj(fileObj) f.seek(0) # File code, Unused bytes @@ -2281,6 +2283,8 @@ def __shapefileHeader(self, fileObj, headerType="shp"): raise ShapefileException( "Failed to write shapefile elevation and measure values. Floats required." ) + + # pylint: enable=raise-missing-from def __dbfHeader(self): """Writes the dbf header and field descriptors.""" @@ -2350,6 +2354,8 @@ def shape(self, s): self.__shxRecord(offset, length) def __shpRecord(self, s): + + # pylint: disable=raise-missing-from f = self.__getFileObj(self.shp) offset = f.tell() # Record number, Content length place holder @@ -2532,10 +2538,13 @@ def __shpRecord(self, s): f.seek(start - 4) f.write(pack(">i", length)) f.seek(finish) + # pylint: enable=raise-missing-from return offset, length def __shxRecord(self, offset, length): """Writes the shx records.""" + + # pylint: disable=raise-missing-from f = self.__getFileObj(self.shx) try: f.write(pack(">i", offset // 2)) @@ -2544,6 +2553,8 @@ def __shxRecord(self, offset, length): "The .shp file has reached its file size limit > 4294967294 bytes (4.29 GB). To fix this, break up your file into multiple smaller ones." ) f.write(pack(">i", length)) + + # pylint: enable=raise-missing-from def record(self, *recordList, **recordDict): """Creates a dbf attribute record. You can submit either a sequence of From 90c31c0acd96376d49ad49807f2aa7ef4c0904f7 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 15:53:59 +0100 Subject: [PATCH 060/220] Make some exceptions more specific, and unpack unassigned list comprehensions --- src/shapefile.py | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 3fd192bc..5ff2ea47 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -326,7 +326,7 @@ def itercoords(): raise RingSamplingError( f"Unexpected error: Unable to find a ring sample point in: {coords}." "Ensure the ring's coordinates are oriented clockwise, " - "and ensure the area enclosed is non-zero. " + "and ensure the area enclosed is non-zero. " ) @@ -556,7 +556,7 @@ def __geo_interface__(self) -> GeoJsonShapeT: ps = part # coordinates.append([tuple(p) for p in self.points[part:]]) - coordinates.append([p for p in self.points[part:]]) # pylint: disable=undefined-loop-variable + coordinates.append([p for p in self.points[part:]]) # pylint: disable=undefined-loop-variable return {"type": "MultiLineString", "coordinates": coordinates} elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: @@ -769,7 +769,7 @@ def __getattr__(self, item: str) -> RecordValue: index = self.__field_positions[item] return list.__getitem__(self, index) except KeyError: - raise AttributeError(f"{item} is not a field name") + raise AttributeError(f"{item} is not a field name") except IndexError: raise IndexError( f"{item} found as a field but not enough values available." @@ -790,7 +790,7 @@ def __setattr__(self, key: str, value: RecordValue): index = self.__field_positions[key] return list.__setitem__(self, index, value) except KeyError: - raise AttributeError(f"{key} is not a field name") # pylint: disable=raise-missing-from + raise AttributeError(f"{key} is not a field name") # pylint: disable=raise-missing-from def __getitem__(self, item): """ @@ -829,7 +829,7 @@ def __setitem__(self, key, value): if index is not None: return list.__setitem__(self, index, value) else: - raise IndexError(f"{key} is not a field name and not an int") # pylint: disable=raise-missing-from + raise IndexError(f"{key} is not a field name and not an int") # pylint: disable=raise-missing-from @property def oid(self) -> int: @@ -941,6 +941,7 @@ class _NoShpSentinel(object): called Reader(shp=None) to load self.shx. """ + class Reader: """Reads the three files of a shapefile as a unit or separately. If one of the three files (.shp, .shx, @@ -1406,7 +1407,7 @@ def __shpHeader(self): def __shape(self, oid=None, bbox=None): """Returns the header info and geometry for a single shape.""" - + # pylint: disable=attribute-defined-outside-init f = self.__getFileObj(self.shp) record = Shape(oid=oid) @@ -1996,7 +1997,7 @@ def __init__( if target: target = pathlike_obj(target) if not is_string(target): - raise Exception( + raise TypeError( f"The target filepath {target!r} must be of type str/unicode or path-like, not {type(target)}." ) self.shp = self.__getFileObj(os.path.splitext(target)[0] + ".shp") @@ -2010,7 +2011,7 @@ def __init__( if dbf: self.dbf = self.__getFileObj(dbf) else: - raise Exception( + raise TypeError( "Either the target filepath, or any of shp, shx, or dbf must be set to create a shapefile." ) # Initiate with empty headers, to be finalized upon closing @@ -2113,7 +2114,7 @@ def __getFileObj(self, f: Union[IO[bytes], str]) -> IO[bytes]: if hasattr(f, "write"): return f - raise Exception(f"Unsupported file-like: {f}") + raise ShapefileException(f"Unsupported file-like object: {f}") def __shpFileLength(self): """Calculates the file length of the shp file.""" @@ -2139,7 +2140,7 @@ def __bbox(self, s): # this should not happen. # any shape that is not null should have at least one point, and only those should be sent here. # could also mean that earlier code failed to add points to a non-null shape. - raise Exception( + raise ValueError( "Cannot create bbox. Expected a valid shape with at least one point. " f"Got a shape of type '{s.shapeType}' and 0 points." ) @@ -2224,7 +2225,7 @@ def __shapefileHeader(self, fileObj, headerType="shp"): """Writes the specified header type to the specified file-like object. Several of the shapefile formats are so similar that a single generic method to read or write them is warranted.""" - + # pylint: disable=raise-missing-from f = self.__getFileObj(fileObj) f.seek(0) @@ -2283,7 +2284,7 @@ def __shapefileHeader(self, fileObj, headerType="shp"): raise ShapefileException( "Failed to write shapefile elevation and measure values. Floats required." ) - + # pylint: enable=raise-missing-from def __dbfHeader(self): @@ -2343,10 +2344,10 @@ def shape(self, s): if isinstance(s, dict): s = Shape._from_geojson(s) else: - raise Exception( + raise TypeError( "Can only write Shape objects, GeoJSON dictionaries, " "or objects with the __geo_interface__, " - "not: %r" % s + f"not: {s}" ) # Write to file offset, length = self.__shpRecord(s) @@ -2354,7 +2355,6 @@ def shape(self, s): self.__shxRecord(offset, length) def __shpRecord(self, s): - # pylint: disable=raise-missing-from f = self.__getFileObj(self.shp) offset = f.tell() @@ -2366,7 +2366,7 @@ def __shpRecord(self, s): if self.shapeType is None and s.shapeType != NULL: self.shapeType = s.shapeType if s.shapeType != NULL and s.shapeType != self.shapeType: - raise Exception( + raise ShapefileException( f"The shape's type ({s.shapeType}) must match " f"the type of the shapefile ({self.shapeType})." ) @@ -2422,7 +2422,8 @@ def __shpRecord(self, s): f.write(pack(f"<{len(s.z)}d", *s.z)) else: # if z values are stored as 3rd dimension - [f.write(pack(" 2 else 0)) for p in s.points] + for p in s.points: + f.write(pack(" 2 else 0)) except error: raise ShapefileException( f"Failed to write elevation values for record {self.shpNum}. Expected floats." @@ -2452,7 +2453,7 @@ def __shpRecord(self, s): # if m values are stored as 3rd/4th dimension # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) mpos = 3 if s.shapeType in (13, 15, 18, 31) else 2 - [ + for p in s.points: f.write( pack( " 4294967294 bytes (4.29 GB). To fix this, break up your file into multiple smaller ones." ) f.write(pack(">i", length)) - + # pylint: enable=raise-missing-from def record(self, *recordList, **recordDict): From f791dae59a3ebd322c9e50ac4853a9937b13ddfe Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 16:00:33 +0100 Subject: [PATCH 061/220] Update changelog.txt --- changelog.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.txt b/changelog.txt index c15a4141..48a534a6 100644 --- a/changelog.txt +++ b/changelog.txt @@ -11,7 +11,7 @@ Python 2 and Python 3.8 support dropped * Testing of wheels before publishing them * pyproject.toml src layout * Slow test marked. - + VERSION 2.4.0 From 3c58ae9064dff3280ecab61210bbd43ac9a43ec1 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 16:00:58 +0100 Subject: [PATCH 062/220] Make except only catch specific exceptions --- src/shapefile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 5ff2ea47..9b587888 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -979,7 +979,7 @@ def __init__( shp: Union[_NoShpSentinel, Optional[BinaryFileT]] = _NoShpSentinel(), shx: Optional[BinaryFileT] = None, dbf: Optional[BinaryFileT] = None, - **kwargs, + **kwargs, # pylint: disable=unused-argument ): self.shp = None self.shx = None @@ -1073,7 +1073,7 @@ def __init__( fileobj.seek(0) setattr(self, lower_ext, fileobj) self._files_to_close.append(fileobj) - except: + except (OSError, AttributeError): pass # Close and delete the temporary zipfile try: From 88ebe035ad49cfa265678c555d344a4f52910eb2 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 16:05:48 +0100 Subject: [PATCH 063/220] Replace dbf Exception with ShapefileException --- src/shapefile.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 9b587888..4dd82011 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -979,7 +979,7 @@ def __init__( shp: Union[_NoShpSentinel, Optional[BinaryFileT]] = _NoShpSentinel(), shx: Optional[BinaryFileT] = None, dbf: Optional[BinaryFileT] = None, - **kwargs, # pylint: disable=unused-argument + **kwargs, # pylint: disable=unused-argument ): self.shp = None self.shx = None @@ -1078,7 +1078,7 @@ def __init__( # Close and delete the temporary zipfile try: zipfileobj.close() - except: + except Exception: pass # Try to load shapefile if self.shp or self.dbf: @@ -1886,7 +1886,9 @@ def iterRecords( if self.numRecords is None: self.__dbfHeader() if not isinstance(self.numRecords, int): - raise Exception("Error when reading number of Records in dbf file header") + raise ShapefileException( + "Error when reading number of Records in dbf file header" + ) f = self.__getFileObj(self.dbf) start = self.__restrictIndex(start) if stop is None: From be1f5d2b8e1afd1e94839043d073ecc7fab0cff3 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 16:08:02 +0100 Subject: [PATCH 064/220] Suppress KeyErrors from opening Zip archives --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 4dd82011..aefaaec3 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1073,7 +1073,7 @@ def __init__( fileobj.seek(0) setattr(self, lower_ext, fileobj) self._files_to_close.append(fileobj) - except (OSError, AttributeError): + except (OSError, AttributeError, KeyError): pass # Close and delete the temporary zipfile try: From edbd735214616b7b14c42c14f01257171a42adda Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 16:21:11 +0100 Subject: [PATCH 065/220] Delete Py23DocChecker --- src/shapefile.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index aefaaec3..7173fed1 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1078,7 +1078,7 @@ def __init__( # Close and delete the temporary zipfile try: zipfileobj.close() - except Exception: + except: # pylint disable=broad-exception-caught pass # Try to load shapefile if self.shp or self.dbf: @@ -1988,7 +1988,7 @@ def __init__( shp=None, shx=None, dbf=None, - **kwargs, + **kwargs, # pylint: disable=unused-argument ): self.target = target self.autoBalance = autoBalance @@ -2976,15 +2976,7 @@ def _test(args: list[str] = sys.argv[1:], verbosity: bool = False) -> int: new_url = _replace_remote_url(old_url) example.source = example.source.replace(old_url, new_url) - class Py23DocChecker(doctest.OutputChecker): - def check_output(self, want, got, optionflags): - res = doctest.OutputChecker.check_output(self, want, got, optionflags) - return res - - def summarize(self): - doctest.OutputChecker.summarize(True) - - runner = doctest.DocTestRunner(checker=Py23DocChecker(), verbose=verbosity) + runner = doctest.DocTestRunner(verbose=verbosity) if verbosity == 0: print(f"Running {len(tests.examples)} doctests...") From 87d97eb8fb82df7df9d3c793edab73e8687e2c85 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 16:27:28 +0100 Subject: [PATCH 066/220] Rename unpack helper --- src/shapefile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 7173fed1..88db4930 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1223,13 +1223,13 @@ def __len__(self): shpLength = shp.tell() shp.seek(100) # Do a fast shape iteration until end of file. - unpack = Struct(">2i").unpack + unpack_2_int32_be = Struct(">2i").unpack offsets = [] pos = shp.tell() while pos < shpLength: offsets.append(pos) # Unpack the shape header only - (recNum, recLength) = unpack(shp.read(8)) + (recNum, recLength) = unpack_2_int32_be(shp.read(8)) # Jump to next shape position pos += 8 + (2 * recLength) shp.seek(pos) From f8abdf2e5e1d6f02428a4ef42fbf7d8f2e45163e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 16:40:42 +0100 Subject: [PATCH 067/220] Catch Type and ValueErrors only when forming date --- src/shapefile.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 88db4930..9f9e143f 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -119,6 +119,8 @@ class GeoJsonShapeT(TypedDict): MISSING = [None, ""] NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. +unpack_2_int32_be = Struct(">2i").unpack + def b( v: Union[str, bytes], encoding: str = "utf-8", encodingErrors: str = "strict" @@ -1078,7 +1080,7 @@ def __init__( # Close and delete the temporary zipfile try: zipfileobj.close() - except: # pylint disable=broad-exception-caught + except: # pylint: disable=broad-exception-caught pass # Try to load shapefile if self.shp or self.dbf: @@ -1223,7 +1225,6 @@ def __len__(self): shpLength = shp.tell() shp.seek(100) # Do a fast shape iteration until end of file. - unpack_2_int32_be = Struct(">2i").unpack offsets = [] pos = shp.tell() while pos < shpLength: @@ -1414,7 +1415,7 @@ def __shape(self, oid=None, bbox=None): nParts = nPoints = zmin = zmax = mmin = mmax = None (recNum, recLength) = unpack(">2i", f.read(8)) # Determine the start of the next record - next = f.tell() + (2 * recLength) + next_shape = f.tell() + (2 * recLength) shapeType = unpack("= 16: + if next_shape - f.tell() >= 16: (mmin, mmax) = unpack("<2d", f.read(16)) # Measure values less than -10e38 are nodata values according to the spec - if next - f.tell() >= nPoints * 8: + if next_shape - f.tell() >= nPoints * 8: record.m = [] for m in _Array("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))): if m > NODATA: @@ -1471,14 +1472,14 @@ def __shape(self, oid=None, bbox=None): point_bbox = list(record.points[0] + record.points[0]) # skip shape if no overlap with bounding box if not bbox_overlap(bbox, point_bbox): - f.seek(next) + f.seek(next_shape) return None # Read a single Z value if shapeType == 11: record.z = list(unpack("= 8: + if next_shape - f.tell() >= 8: (m,) = unpack("2i").unpack _i = 0 offset = shp.tell() while offset < shpLength: @@ -1557,7 +1557,7 @@ def shape(self, i=0, bbox=None): # Reached the requested index, exit loop with the offset value break # Unpack the shape header only - (recNum, recLength) = unpack(shp.read(8)) + (recNum, recLength) = unpack_2_int32_be(shp.read(8)) # Jump to next shape position offset += 8 + (2 * recLength) shp.seek(offset) @@ -1804,7 +1804,7 @@ def __record( # return as python date object y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8]) value = date(y, m, d) - except: + except (TypeError, ValueError): # if invalid date, just return as unicode string so user can decide value = u(value.strip()) elif typ == "L": From 98077e5855048168758f43d6d709a2b4aee613b0 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 16:54:24 +0100 Subject: [PATCH 068/220] Prefix names of unused variables with __ --- src/shapefile.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 9f9e143f..b255dba0 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1080,7 +1080,7 @@ def __init__( # Close and delete the temporary zipfile try: zipfileobj.close() - except: # pylint: disable=broad-exception-caught + except: # pylint: disable=bare-except pass # Try to load shapefile if self.shp or self.dbf: @@ -1230,7 +1230,7 @@ def __len__(self): while pos < shpLength: offsets.append(pos) # Unpack the shape header only - (recNum, recLength) = unpack_2_int32_be(shp.read(8)) + (__recNum, recLength) = unpack_2_int32_be(shp.read(8)) # Jump to next shape position pos += 8 + (2 * recLength) shp.seek(pos) @@ -1266,7 +1266,7 @@ def load(self, shapefile=None): object. Normally this method would be called by the constructor with the file name as an argument.""" if shapefile: - (shapeName, ext) = os.path.splitext(shapefile) + (shapeName, __ext) = os.path.splitext(shapefile) self.shapeName = shapeName self.load_shp(shapeName) self.load_shx(shapeName) @@ -1386,6 +1386,8 @@ def __shpHeader(self): raise ShapefileException( "Shapefile Reader requires a shapefile or file-like object. (no shp file found" ) + + # pylint: disable=attribute-defined-outside-init shp = self.shp # File length (16-bit word * 2 = bytes) shp.seek(24) @@ -1406,14 +1408,17 @@ def __shpHeader(self): else: self.mbox.append(None) + # pylint: enable=attribute-defined-outside-init + def __shape(self, oid=None, bbox=None): """Returns the header info and geometry for a single shape.""" # pylint: disable=attribute-defined-outside-init f = self.__getFileObj(self.shp) record = Shape(oid=oid) - nParts = nPoints = zmin = zmax = mmin = mmax = None - (recNum, recLength) = unpack(">2i", f.read(8)) + # Formerly we also set __zmin = __zmax = __mmin = __mmax = None + nParts = nPoints = None + (__recNum, recLength) = unpack(">2i", f.read(8)) # Determine the start of the next record next_shape = f.tell() + (2 * recLength) shapeType = unpack("= 16: - (mmin, mmax) = unpack("<2d", f.read(16)) + __mmin, __mmax = unpack("<2d", f.read(16)) # Measure values less than -10e38 are nodata values according to the spec if next_shape - f.tell() >= nPoints * 8: record.m = [] @@ -1557,7 +1562,7 @@ def shape(self, i=0, bbox=None): # Reached the requested index, exit loop with the offset value break # Unpack the shape header only - (recNum, recLength) = unpack_2_int32_be(shp.read(8)) + (__recNum, recLength) = unpack_2_int32_be(shp.read(8)) # Jump to next shape position offset += 8 + (2 * recLength) shp.seek(offset) @@ -1625,6 +1630,8 @@ def iterShapes(self, bbox=None): def __dbfHeader(self): """Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger""" + + # pylint: disable=attribute-defined-outside-init if not self.dbf: raise ShapefileException( "Shapefile Reader requires a shapefile or file-like object. (no dbf file found)" @@ -1638,7 +1645,7 @@ def __dbfHeader(self): # read fields numFields = (self.__dbfHdrLength - 33) // 32 - for field in range(numFields): + for __field in range(numFields): fieldDesc = list(unpack("<11sc4xBB14x", dbf.read(32))) name = 0 idx = 0 @@ -1667,10 +1674,12 @@ def __dbfHeader(self): # by default, read all fields except the deletion flag, hence "[1:]" # note: recLookup gives the index position of a field inside a _Record list fieldnames = [f[0] for f in self.fields[1:]] - fieldTuples, recLookup, recStruct = self.__recordFields(fieldnames) + __fieldTuples, recLookup, recStruct = self.__recordFields(fieldnames) self.__fullRecStruct = recStruct self.__fullRecLookup = recLookup + # pylint: enable=attribute-defined-outside-init + def __recordFmt(self, fields=None): """Calculates the format and size of a .dbf record. Optional 'fields' arg specifies which fieldnames to unpack and which to ignore. Note that this @@ -1709,7 +1718,7 @@ def __recordFields(self, fields=None): # first ignore repeated field names (order doesn't matter) fields = list(set(fields)) # get the struct - fmt, fmtSize = self.__recordFmt(fields=fields) + fmt, __fmtSize = self.__recordFmt(fields=fields) recStruct = Struct(fmt) # make sure the given fieldnames exist for name in fields: @@ -1762,7 +1771,7 @@ def __record( # parse each value record = [] - for (name, typ, size, deci), value in zip(fieldTuples, recordContents): + for (__name, typ, __size, deci), value in zip(fieldTuples, recordContents): if typ in ("N", "F"): # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b"\0")[0] @@ -2980,7 +2989,7 @@ def _test(args: list[str] = sys.argv[1:], verbosity: bool = False) -> int: if verbosity == 0: print(f"Running {len(tests.examples)} doctests...") - failure_count, test_count = runner.run(tests) + failure_count, __test_count = runner.run(tests) # print results if verbosity: From 3e0a7ca06be37fc8af2cafe93e40fe6534fbce5d Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 17:27:51 +0100 Subject: [PATCH 069/220] Rename Pylint workflow step --- .github/workflows/run_checks_build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_checks_build_and_test.yml b/.github/workflows/run_checks_build_and_test.yml index 88ffe5a3..f965e509 100644 --- a/.github/workflows/run_checks_build_and_test.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -26,7 +26,7 @@ jobs: python -m pip install --upgrade pip pip install pytest pylint pylint-per-file-ignores pip install -e . - - name: run Pylint for errors and warnings only, on test_shapefile.py + - name: run Pylint for errors and warnings only continue-on-error: true run: | pylint --disable=R,C test_shapefile.py src/shapefile.py From dc7115d7770d689a9b76951e3ca57cd02daac0de Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 17:33:14 +0100 Subject: [PATCH 070/220] Suppress TODO warnings --- src/shapefile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index b255dba0..330096b3 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -100,7 +100,7 @@ BBox = tuple[float, float, float, float] # File name, file object or anything with a read() method that returns bytes. -# TODO: Create simple Protocol with a read() method +# TODO: Create simple Protocol with a read() method pylint: disable=fixme BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO] @@ -1972,7 +1972,7 @@ def iterShapeRecords( yield ShapeRecord(shape=shape, record=record) else: # only iterate where shape.bbox overlaps with the given bbox - # TODO: internal __record method should be faster but would have to + # TODO: internal __record method should be faster but would have to pylint: disable=fixme # make sure to seek to correct file location... # fieldTuples,recLookup,recStruct = self.__recordFields(fields) From 0eb6a68cfd72cf9acfbb4358bc63083713d9b6bc Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 17:43:33 +0100 Subject: [PATCH 071/220] Remove some unnecessary elses, and replace elif with ifs; dedent returns therein --- src/shapefile.py | 189 ++++++++++++++++++++++++----------------------- 1 file changed, 95 insertions(+), 94 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 330096b3..6d46417c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -128,15 +128,14 @@ def b( if isinstance(v, str): # For python 3 encode str to bytes. return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): + if isinstance(v, bytes): # Already bytes. return v - elif v is None: + if v is None: # Since we're dealing with text, interpret None as "" return b"" - else: - # Force string representation. - return str(v).encode(encoding, encodingErrors) + # Force string representation. + return str(v).encode(encoding, encodingErrors) def u( @@ -145,15 +144,14 @@ def u( if isinstance(v, bytes): # For python 3 decode bytes to str. return v.decode(encoding, encodingErrors) - elif isinstance(v, str): + if isinstance(v, str): # Already str. return v - elif v is None: + if v is None: # Since we're dealing with text, interpret None as "" return "" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) + # Force string representation. + return bytes(v).decode(encoding, encodingErrors) def is_string(v: Any) -> bool: @@ -163,8 +161,8 @@ def is_string(v: Any) -> bool: def pathlike_obj(path: Any) -> Any: if isinstance(path, os.PathLike): return os.fsdecode(path) - else: - return path + + return path # Begin @@ -193,8 +191,8 @@ def signed_area( area2 = sum(xs[i] * (ys[i + 1] - ys[i - 1]) for i in range(1, len(coords))) if fast: return area2 - else: - return area2 / 2.0 + + return area2 / 2.0 def is_cw(coords: Coords) -> bool: @@ -374,7 +372,7 @@ def organize_polygon_rings( # multiple exteriors, ie multi-polygon, have to group holes with correct exterior # shapefile format does not specify which holes belong to which exteriors # so have to do efficient multi-stage checking of hole-to-exterior containment - elif len(exteriors) > 1: + if len(exteriors) > 1: # exit early if no holes if not holes: polys = [] @@ -457,13 +455,12 @@ def organize_polygon_rings( return polys # no exteriors, be nice and assume due to incorrect winding order - else: - if return_errors is not None: - return_errors["polygon_only_holes"] = len(holes) - exteriors = holes - # add as single exterior without any holes - polys = [[ext] for ext in exteriors] - return polys + if return_errors is not None: + return_errors["polygon_only_holes"] = len(holes) + exteriors = holes + # add as single exterior without any holes + polys = [[ext] for ext in exteriors] + return polys class GeoJSON_Error(Exception): @@ -515,107 +512,111 @@ def __geo_interface__(self) -> GeoJsonShapeT: # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries return {"type": "Point", "coordinates": ()} # return {"type": "Point", "coordinates": tuple()} #type: ignore - else: - return {"type": "Point", "coordinates": self.points[0]} - # return {"type": "Point", "coordinates": tuple(self.points[0])} # type: ignore - elif self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: + + return {"type": "Point", "coordinates": self.points[0]} + # return {"type": "Point", "coordinates": tuple(self.points[0])} # type: ignore + + if self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: if len(self.points) == 0: # the shape has no coordinate information, i.e. is 'empty' # the geojson spec does not define a proper null-geometry type # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries return {"type": "MultiPoint", "coordinates": []} - else: - # multipoint - return { - "type": "MultiPoint", - "coordinates": self.points, - # "coordinates": [tuple(p) for p in self.points], #type: ignore - } - elif self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: + + # multipoint + return { + "type": "MultiPoint", + "coordinates": self.points, + # "coordinates": [tuple(p) for p in self.points], #type: ignore + } + + if self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: if len(self.parts) == 0: # the shape has no coordinate information, i.e. is 'empty' # the geojson spec does not define a proper null-geometry type # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries return {"type": "LineString", "coordinates": []} - elif len(self.parts) == 1: + + if len(self.parts) == 1: # linestring return { "type": "LineString", "coordinates": self.points, # "coordinates": [tuple(p) for p in self.points], #type: ignore } - else: - # multilinestring - ps = None - coordinates = [] - for part in self.parts: - if ps is None: - ps = part - continue - else: - # coordinates.append([tuple(p) for p in self.points[ps:part]]) - coordinates.append([p for p in self.points[ps:part]]) - ps = part - # coordinates.append([tuple(p) for p in self.points[part:]]) - coordinates.append([p for p in self.points[part:]]) # pylint: disable=undefined-loop-variable + # multilinestring + ps = None + coordinates = [] + for part in self.parts: + if ps is None: + ps = part + continue + else: + # coordinates.append([tuple(p) for p in self.points[ps:part]]) + coordinates.append([p for p in self.points[ps:part]]) + ps = part + + # coordinates.append([tuple(p) for p in self.points[part:]]) + coordinates.append([p for p in self.points[part:]]) # pylint: disable=undefined-loop-variable - return {"type": "MultiLineString", "coordinates": coordinates} - elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: + return {"type": "MultiLineString", "coordinates": coordinates} + + if self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: if len(self.parts) == 0: # the shape has no coordinate information, i.e. is 'empty' # the geojson spec does not define a proper null-geometry type # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries return {"type": "Polygon", "coordinates": []} - else: - # get all polygon rings - rings = [] - for i in range(len(self.parts)): - # get indexes of start and end points of the ring - start = self.parts[i] - try: - end = self.parts[i + 1] - except IndexError: - end = len(self.points) - - # extract the points that make up the ring - # ring = [tuple(p) for p in self.points[start:end]] - ring = [p for p in self.points[start:end]] - rings.append(ring) - - # organize rings into list of polygons, where each polygon is defined as list of rings. - # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). - polys = organize_polygon_rings(rings, self._errors) - - # if VERBOSE is True, issue detailed warning about any shape errors - # encountered during the Shapefile to GeoJSON conversion - if VERBOSE and self._errors: - header = f"Possible issue encountered when converting Shape #{self.oid} to GeoJSON: " - orphans = self._errors.get("polygon_orphaned_holes", None) - if orphans: - msg = ( - header - + "Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ + + # get all polygon rings + rings = [] + for i in range(len(self.parts)): + # get indexes of start and end points of the ring + start = self.parts[i] + try: + end = self.parts[i + 1] + except IndexError: + end = len(self.points) + + # extract the points that make up the ring + # ring = [tuple(p) for p in self.points[start:end]] + ring = [p for p in self.points[start:end]] + rings.append(ring) + + # organize rings into list of polygons, where each polygon is defined as list of rings. + # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). + polys = organize_polygon_rings(rings, self._errors) + + # if VERBOSE is True, issue detailed warning about any shape errors + # encountered during the Shapefile to GeoJSON conversion + if VERBOSE and self._errors: + header = f"Possible issue encountered when converting Shape #{self.oid} to GeoJSON: " + orphans = self._errors.get("polygon_orphaned_holes", None) + if orphans: + msg = ( + header + + "Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ but the Shape contained interior holes (defined by counter-clockwise orientation in the shapefile format) that were \ orphaned, i.e. not contained by any exterior rings. The rings were still included but were \ encoded as GeoJSON exterior rings instead of holes." - ) - logger.warning(msg) - only_holes = self._errors.get("polygon_only_holes", None) - if only_holes: - msg = ( - header - + "Shapefile format requires that polygons contain at least one exterior ring, \ + ) + logger.warning(msg) + only_holes = self._errors.get("polygon_only_holes", None) + if only_holes: + msg = ( + header + + "Shapefile format requires that polygons contain at least one exterior ring, \ but the Shape was entirely made up of interior holes (defined by counter-clockwise orientation in the shapefile format). The rings were \ still included but were encoded as GeoJSON exterior rings instead of holes." - ) - logger.warning(msg) + ) + logger.warning(msg) - # return as geojson - if len(polys) == 1: - return {"type": "Polygon", "coordinates": polys[0]} - else: - return {"type": "MultiPolygon", "coordinates": polys} + # return as geojson + if len(polys) == 1: + return {"type": "Polygon", "coordinates": polys[0]} + + return {"type": "MultiPolygon", "coordinates": polys} else: raise GeoJSON_Error( From f5752a00233db9b46ec1122605a88822c6704443 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 18:50:43 +0100 Subject: [PATCH 072/220] Replace .append on trivial list comps with .extend. Remove more elses. --- src/shapefile.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 6d46417c..30a1482c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -552,13 +552,13 @@ def __geo_interface__(self) -> GeoJsonShapeT: if ps is None: ps = part continue - else: - # coordinates.append([tuple(p) for p in self.points[ps:part]]) - coordinates.append([p for p in self.points[ps:part]]) - ps = part + + # coordinates.append([tuple(p) for p in self.points[ps:part]]) + coordinates.extend(self.points[ps:part]) + ps = part # coordinates.append([tuple(p) for p in self.points[part:]]) - coordinates.append([p for p in self.points[part:]]) # pylint: disable=undefined-loop-variable + coordinates.extend(self.points[part:]) # pylint: disable=undefined-loop-variable return {"type": "MultiLineString", "coordinates": coordinates} @@ -581,7 +581,7 @@ def __geo_interface__(self) -> GeoJsonShapeT: # extract the points that make up the ring # ring = [tuple(p) for p in self.points[start:end]] - ring = [p for p in self.points[start:end]] + ring = list(self.points[start:end]) rings.append(ring) # organize rings into list of polygons, where each polygon is defined as list of rings. @@ -813,8 +813,8 @@ def __getitem__(self, item): index = None if index is not None: return list.__getitem__(self, index) - else: - raise IndexError(f'"{item}" is not a field name and not an int') + + raise IndexError(f'"{item}" is not a field name and not an int') def __setitem__(self, key, value): """ @@ -831,8 +831,8 @@ def __setitem__(self, key, value): index = self.__field_positions.get(key) if index is not None: return list.__setitem__(self, index, value) - else: - raise IndexError(f"{key} is not a field name and not an int") # pylint: disable=raise-missing-from + + raise IndexError(f"{key} is not a field name and not an int") # pylint: disable=raise-missing-from @property def oid(self) -> int: @@ -937,7 +937,7 @@ class ShapefileException(Exception): """An exception to handle shapefile specific problems.""" -class _NoShpSentinel(object): +class _NoShpSentinel: """For use as a default value for shp to preserve the behaviour (from when all keyword args were gathered in the **kwargs dict) in case someone explictly From 7b07b5627b301b65b5032d7cc0a5357b8969918f Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 18:58:09 +0100 Subject: [PATCH 073/220] Remove last of unnecessary else: s --- src/shapefile.py | 94 +++++++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 50 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 30a1482c..f02c6a3c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -618,10 +618,9 @@ def __geo_interface__(self) -> GeoJsonShapeT: return {"type": "MultiPolygon", "coordinates": polys} - else: - raise GeoJSON_Error( - f'Shape type "{SHAPETYPE_LOOKUP[self.shapeType]}" cannot be represented as GeoJSON.' - ) + raise GeoJSON_Error( + f'Shape type "{SHAPETYPE_LOOKUP[self.shapeType]}" cannot be represented as GeoJSON.' + ) @staticmethod def _from_geojson(geoj) -> Shape: @@ -1053,7 +1052,7 @@ def __init__( raise ShapefileException( "Zipfile does not contain any shapefiles" ) - elif len(shapefiles) == 1: + if len(shapefiles) == 1: shapefile = shapefiles[0] else: raise ShapefileException( @@ -1088,12 +1087,12 @@ def __init__( # Load and exit early self.load() return - else: - raise ShapefileException( - f"No shp or dbf file found in zipfile: {path}" - ) - elif path.startswith("http"): + raise ShapefileException( + f"No shp or dbf file found in zipfile: {path}" + ) + + if path.startswith("http"): # Shapefile is from a url # Download each file to temporary path and treat as normal shapefile path urlinfo = urlparse(path) @@ -1126,16 +1125,13 @@ def __init__( # Load and exit early self.load() return - else: - raise ShapefileException( - f"No shp or dbf file found at url: {path}" - ) - else: - # Local file path to a shapefile - # Load and exit early - self.load(path) - return + raise ShapefileException(f"No shp or dbf file found at url: {path}") + + # Local file path to a shapefile + # Load and exit early + self.load(path) + return if not isinstance(shp, _NoShpSentinel): self.shp = self.__seek_0_on_file_obj_wrap_or_open_from_name("shp", shp) @@ -1208,7 +1204,7 @@ def __len__(self): return self.numRecords - elif self.shp: + if self.shp: # Otherwise use shape count if self.shx: if self.numShapes is None: @@ -1216,36 +1212,34 @@ def __len__(self): return self.numShapes - else: - # Index file not available, iterate all shapes to get total count - if self.numShapes is None: - # Determine length of shp file - shp = self.shp - checkpoint = shp.tell() - shp.seek(0, 2) - shpLength = shp.tell() - shp.seek(100) - # Do a fast shape iteration until end of file. - offsets = [] - pos = shp.tell() - while pos < shpLength: - offsets.append(pos) - # Unpack the shape header only - (__recNum, recLength) = unpack_2_int32_be(shp.read(8)) - # Jump to next shape position - pos += 8 + (2 * recLength) - shp.seek(pos) - # Set numShapes and offset indices - self.numShapes = len(offsets) - self._offsets = offsets - # Return to previous file position - shp.seek(checkpoint) - - return self.numShapes - - else: - # No file loaded yet, treat as 'empty' shapefile - return 0 + # Index file not available, iterate all shapes to get total count + if self.numShapes is None: + # Determine length of shp file + shp = self.shp + checkpoint = shp.tell() + shp.seek(0, 2) + shpLength = shp.tell() + shp.seek(100) + # Do a fast shape iteration until end of file. + offsets = [] + pos = shp.tell() + while pos < shpLength: + offsets.append(pos) + # Unpack the shape header only + (__recNum, recLength) = unpack_2_int32_be(shp.read(8)) + # Jump to next shape position + pos += 8 + (2 * recLength) + shp.seek(pos) + # Set numShapes and offset indices + self.numShapes = len(offsets) + self._offsets = offsets + # Return to previous file position + shp.seek(checkpoint) + + return self.numShapes + + # No file loaded yet, treat as 'empty' shapefile + return 0 def __iter__(self): """Iterates through the shapes/records in the shapefile.""" From c8dce92419b820f3f76be79bf3a604a259f784bd Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 19:20:10 +0100 Subject: [PATCH 074/220] Add a gen exp and a contains test, and suppress weird yield next() issue --- pyproject.toml | 25 ++++++++++++++++++++----- src/shapefile.py | 6 +++--- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index af799f78..ac039593 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -115,14 +115,29 @@ load-plugins=[ [tool.pylint.'MESSAGES CONTROL'] # Silence warning: shapefile.py:2076:20: W0212: Access to a protected # member _from_geojson of a client class (protected-access) -# +# Silence remarks: +# src\shapefile.py:338:0: R0914: Too many local variables (21/15) (too-many-locals) +# src\shapefile.py:338:0: R0912: Too many branches (24/12) (too-many-branches) +# src\shapefile.py:338:0: R0915: Too many statements (52/50) (too-many-statements) +# src\shapefile.py:470:0: R0902: Too many instance attributes (9/7) (too-many-instance-attributes) +# src\shapefile.py:471:4: R0913: Too many arguments (6/5) (too-many-arguments) +# src\shapefile.py:471:4: R0917: Too many positional arguments (6/5) (too-many-positional-arguments) +# src\shapefile.py:506:4: R0911: Too many return statements (10/6) (too-many-return-statements) +# src\shapefile.py:878:0: R0903: Too few public methods (0/2) (too-few-public-methods) # Silence warnings: test_shapefile.py:{783,786,799,803,06,1195}:19: # W0212: Access to a protected member _offsets of a # client class (protected-access) # # Toml multi-line string used instead of array due to: # https://github.com/christopherpickering/pylint-per-file-ignores/issues/160 -per-file-ignores = """ - shapefile.py:W0212 - test_shapefile.py:W0212 -""" +per-file-ignores = [ + "src/shapefile.py:W0212", + "src/shapefile.py:R0902", + "src/shapefile.py:R0903", + "src/shapefile.py:R0911", + "src/shapefile.py:R0912", + "src/shapefile.py:R0914", + "src/shapefile.py:R0915", + "src/shapefile.py:R0917", + "test_shapefile.py:W0212", +] diff --git a/src/shapefile.py b/src/shapefile.py index f02c6a3c..d60e9ed3 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2314,7 +2314,7 @@ def __dbfHeader(self): raise ShapefileException( "Shapefile dbf header length exceeds maximum length." ) - recordLength = sum([int(field[2]) for field in fields]) + 1 + recordLength = sum(int(field[2]) for field in fields) + 1 header = pack( " Date: Fri, 25 Jul 2025 19:28:22 +0100 Subject: [PATCH 075/220] Enable R level Pylint warnings --- .github/workflows/run_checks_build_and_test.yml | 2 +- pyproject.toml | 15 +++++---------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/.github/workflows/run_checks_build_and_test.yml b/.github/workflows/run_checks_build_and_test.yml index f965e509..419b45bd 100644 --- a/.github/workflows/run_checks_build_and_test.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -29,7 +29,7 @@ jobs: - name: run Pylint for errors and warnings only continue-on-error: true run: | - pylint --disable=R,C test_shapefile.py src/shapefile.py + pylint --disable=C test_shapefile.py src/shapefile.py build_wheel_and_sdist: runs-on: ubuntu-latest diff --git a/pyproject.toml b/pyproject.toml index ac039593..aa11da45 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,7 +112,6 @@ load-plugins=[ "pylint_per_file_ignores", ] -[tool.pylint.'MESSAGES CONTROL'] # Silence warning: shapefile.py:2076:20: W0212: Access to a protected # member _from_geojson of a client class (protected-access) # Silence remarks: @@ -124,20 +123,16 @@ load-plugins=[ # src\shapefile.py:471:4: R0917: Too many positional arguments (6/5) (too-many-positional-arguments) # src\shapefile.py:506:4: R0911: Too many return statements (10/6) (too-many-return-statements) # src\shapefile.py:878:0: R0903: Too few public methods (0/2) (too-few-public-methods) +# src\shapefile.py:1981:0: R0904: Too many public methods (23/20) (too-many-public-methods) +# src\shapefile.py:2117:17: R1732: Consider using 'with' for resource-allocating operations (consider-using-with) # Silence warnings: test_shapefile.py:{783,786,799,803,06,1195}:19: # W0212: Access to a protected member _offsets of a # client class (protected-access) # # Toml multi-line string used instead of array due to: # https://github.com/christopherpickering/pylint-per-file-ignores/issues/160 +[tool.pylint.'messages control'] per-file-ignores = [ - "src/shapefile.py:W0212", - "src/shapefile.py:R0902", - "src/shapefile.py:R0903", - "src/shapefile.py:R0911", - "src/shapefile.py:R0912", - "src/shapefile.py:R0914", - "src/shapefile.py:R0915", - "src/shapefile.py:R0917", - "test_shapefile.py:W0212", + "/src/shapefile.py:W0212,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0917,R1732", + "test_shapefile.py:W0212,R1732", ] From f51f0f24464c6bd255c8ea8fa06619545e8a6a0b Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 19:33:33 +0100 Subject: [PATCH 076/220] Fix bug --- src/shapefile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index d60e9ed3..3ca0e41f 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -554,11 +554,11 @@ def __geo_interface__(self) -> GeoJsonShapeT: continue # coordinates.append([tuple(p) for p in self.points[ps:part]]) - coordinates.extend(self.points[ps:part]) + coordinates.append(list(self.points[ps:part])) ps = part # coordinates.append([tuple(p) for p in self.points[part:]]) - coordinates.extend(self.points[part:]) # pylint: disable=undefined-loop-variable + coordinates.append(list(self.points[part:])) return {"type": "MultiLineString", "coordinates": coordinates} From ac0142a0823efb36261d2e4033dd22d9387e52ab Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 21:22:03 +0100 Subject: [PATCH 077/220] Allow pylint step to fail CI. Suppress undefined-loop-variable --- .github/workflows/run_checks_build_and_test.yml | 1 - src/shapefile.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/run_checks_build_and_test.yml b/.github/workflows/run_checks_build_and_test.yml index 419b45bd..f9e4b2e0 100644 --- a/.github/workflows/run_checks_build_and_test.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -27,7 +27,6 @@ jobs: pip install pytest pylint pylint-per-file-ignores pip install -e . - name: run Pylint for errors and warnings only - continue-on-error: true run: | pylint --disable=C test_shapefile.py src/shapefile.py diff --git a/src/shapefile.py b/src/shapefile.py index 3ca0e41f..ee4ebe76 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -558,7 +558,7 @@ def __geo_interface__(self) -> GeoJsonShapeT: ps = part # coordinates.append([tuple(p) for p in self.points[part:]]) - coordinates.append(list(self.points[part:])) + coordinates.append(list(self.points[part:])) # pylint: disable=undefined-loop-variable (assert len(self.parts) >1) return {"type": "MultiLineString", "coordinates": coordinates} From f442a6fb8a5d6eb1e052ebf85ce62b68f0b74c06 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 25 Jul 2025 21:28:01 +0100 Subject: [PATCH 078/220] Remove extra comments after pylint directive --- src/shapefile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index ee4ebe76..5ba3c61c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -558,8 +558,8 @@ def __geo_interface__(self) -> GeoJsonShapeT: ps = part # coordinates.append([tuple(p) for p in self.points[part:]]) - coordinates.append(list(self.points[part:])) # pylint: disable=undefined-loop-variable (assert len(self.parts) >1) - + # assert len(self.parts) >1 # so disable pylint rule + coordinates.append(list(self.points[part:])) # pylint: disable=undefined-loop-variable return {"type": "MultiLineString", "coordinates": coordinates} if self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: From 54a49a306402a9cb1207a5f6493308786f709858 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 14:12:01 +0100 Subject: [PATCH 079/220] Rename pathlike_obj and define specific and generic overloads for it --- src/shapefile.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 5ba3c61c..27e1c65a 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -21,7 +21,18 @@ import zipfile from datetime import date from struct import Struct, calcsize, error, pack, unpack -from typing import IO, Any, Iterable, Iterator, Optional, Reversible, TypedDict, Union +from typing import ( + IO, + Any, + Iterable, + Iterator, + Optional, + Reversible, + TypedDict, + TypeVar, + Union, + overload, +) from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen @@ -158,9 +169,16 @@ def is_string(v: Any) -> bool: return isinstance(v, str) -def pathlike_obj(path: Any) -> Any: +T = TypeVar("T") + + +@overload +def fsdecode_if_pathlike(path: os.PathLike) -> str: ... +@overload +def fsdecode_if_pathlike(path: T) -> T: ... +def fsdecode_if_pathlike(path): if isinstance(path, os.PathLike): - return os.fsdecode(path) + return os.fsdecode(path) # str return path @@ -999,7 +1017,7 @@ def __init__( self.encodingErrors = encodingErrors # See if a shapefile name was passed as the first argument if shapefile_path: - path = pathlike_obj(shapefile_path) + path = fsdecode_if_pathlike(shapefile_path) if is_string(path): if ".zip" in path: # Shapefile is inside a zipfile @@ -2001,7 +2019,7 @@ def __init__( self.shp = self.shx = self.dbf = None self._files_to_close = [] if target: - target = pathlike_obj(target) + target = fsdecode_if_pathlike(target) if not is_string(target): raise TypeError( f"The target filepath {target!r} must be of type str/unicode or path-like, not {type(target)}." From 4c1498d1a7ec58ce595e73e26c81180a4481f01c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 14:16:44 +0100 Subject: [PATCH 080/220] Add int and bool to RecordValue --- src/shapefile.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 27e1c65a..344da232 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -115,7 +115,9 @@ BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO] -RecordValue = Union[float, str, date] +RecordValue = Union[ + bool, int, float, str, date +] # A Possible value in a Shapefile record, e.g. L, N, F, C, D types class GeoJsonShapeT(TypedDict): From c8334c5a4b9424a126ab44f612d56272a912b28e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 14:47:51 +0100 Subject: [PATCH 081/220] Make Reader.__getFileObj generic --- src/shapefile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 344da232..117643f6 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -993,7 +993,7 @@ def _assert_ext_is_supported(self, ext: str): def __init__( self, - shapefile_path: str = "", + shapefile_path: Union[str, os.PathLike] = "", /, *, encoding: str = "utf-8", @@ -1369,7 +1369,7 @@ def close(self): pass self._files_to_close = [] - def __getFileObj(self, f): + def __getFileObj(self, f: Optional[T]) -> T: """Checks to see if the requested shapefile file object is available. If not a ShapefileException is raised.""" if not f: From 350752ebebb1a86cf163bef74f89a9239e19ee6e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 14:56:37 +0100 Subject: [PATCH 082/220] Type hint Reader.__shapeIndex --- src/shapefile.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 117643f6..df06cf22 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1425,6 +1425,7 @@ def __shpHeader(self): # pylint: enable=attribute-defined-outside-init + # def __shape(self, oid: Optional[int] = None, bbox: Optional[BBox] = None) -> Shape: def __shape(self, oid=None, bbox=None): """Returns the header info and geometry for a single shape.""" @@ -1540,9 +1541,9 @@ def __shxOffsets(self): shxRecords = _Array("i", shx.read(2 * self.numShapes * 4)) if sys.byteorder != "big": shxRecords.byteswap() - self._offsets = [2 * el for el in shxRecords[::2]] + self._offsets: list[int] = [2 * el for el in shxRecords[::2]] - def __shapeIndex(self, i=None): + def __shapeIndex(self, i: Optional[int] = None) -> Optional[int]: """Returns the offset in a .shp file for a shape based on information in the .shx index file.""" shx = self.shx @@ -1554,7 +1555,7 @@ def __shapeIndex(self, i=None): self.__shxOffsets() return self._offsets[i] - def shape(self, i=0, bbox=None): + def shape(self, i: int = 0, bbox: Optional[BBox] = None): """Returns a shape object for a shape in the geometry record file. If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), From dc94a2cfc8cccb37891482cdf0fd226cbeb5f420 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 15:36:39 +0100 Subject: [PATCH 083/220] Type hint Reader.__shape --- src/shapefile.py | 106 +++++++++++++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 41 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index df06cf22..eb750ae3 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -24,10 +24,13 @@ from typing import ( IO, Any, + Collection, + Generic, Iterable, Iterator, Optional, Reversible, + Sequence, TypedDict, TypeVar, Union, @@ -99,8 +102,9 @@ 5: "RING", } -# Custom type variables +## Custom type variables +T = TypeVar("T") Point2D = tuple[float, float] PointZ = tuple[float, float, float] PointZM = tuple[float, float, float, float] @@ -171,9 +175,6 @@ def is_string(v: Any) -> bool: return isinstance(v, str) -T = TypeVar("T") - - @overload def fsdecode_if_pathlike(path: os.PathLike) -> str: ... @overload @@ -188,7 +189,7 @@ def fsdecode_if_pathlike(path): # Begin -class _Array(array.array): +class _Array(array.array, Generic[T]): """Converts python tuples to lists of the appropriate type. Used to unpack different shapefile header parts.""" @@ -235,7 +236,7 @@ def ring_bbox(coords: Coords) -> BBox: return bbox -def bbox_overlap(bbox1: BBox, bbox2: BBox) -> bool: +def bbox_overlap(bbox1: BBox, bbox2: Collection[float]) -> bool: """Tests whether two bounding boxes overlap.""" xmin1, ymin1, xmax1, ymax1 = bbox1 xmin2, ymin2, xmax2, ymax2 = bbox2 @@ -492,8 +493,8 @@ def __init__( self, shapeType: int = NULL, points: Optional[list[Coord]] = None, - parts: Optional[list[int]] = None, - partTypes: Optional[list[int]] = None, + parts: Optional[Sequence[int]] = None, + partTypes: Optional[Sequence[int]] = None, oid: Optional[int] = None, ): """Stores the geometry of the different shape types @@ -522,6 +523,10 @@ def __init__( else: self.__oid = -1 + self.z: Optional[Union[list[Optional[float]], _Array[float]]] = None + self.m: Optional[list[Optional[float]]] = None + self.bbox: Optional[_Array[float]] = None + @property def __geo_interface__(self) -> GeoJsonShapeT: if self.shapeType in [POINT, POINTM, POINTZ]: @@ -1425,15 +1430,17 @@ def __shpHeader(self): # pylint: enable=attribute-defined-outside-init - # def __shape(self, oid: Optional[int] = None, bbox: Optional[BBox] = None) -> Shape: - def __shape(self, oid=None, bbox=None): + def __shape( + self, oid: Optional[int] = None, bbox: Optional[BBox] = None + ) -> Optional[Shape]: """Returns the header info and geometry for a single shape.""" # pylint: disable=attribute-defined-outside-init f = self.__getFileObj(self.shp) record = Shape(oid=oid) - # Formerly we also set __zmin = __zmax = __mmin = __mmax = None - nParts = nPoints = None + # Previously, we also set __zmin = __zmax = __mmin = __mmax = None + nParts: Optional[int] = None + nPoints: Optional[int] = None (__recNum, recLength) = unpack(">2i", f.read(8)) # Determine the start of the next record next_shape = f.tell() + (2 * recLength) @@ -1444,7 +1451,7 @@ def __shape(self, oid=None, bbox=None): record.points = [] # All shape types capable of having a bounding box elif shapeType in (3, 5, 8, 13, 15, 18, 23, 25, 28, 31): - record.bbox = _Array("d", unpack("<4d", f.read(32))) + record.bbox = _Array[float]("d", unpack("<4d", f.read(32))) # if bbox specified and no overlap, skip this shape if bbox is not None and not bbox_overlap(bbox, record.bbox): # because we stop parsing this shape, skip to beginning of @@ -1454,40 +1461,52 @@ def __shape(self, oid=None, bbox=None): # Shape types with parts if shapeType in (3, 5, 13, 15, 23, 25, 31): nParts = unpack("= 16: - __mmin, __mmax = unpack("<2d", f.read(16)) - # Measure values less than -10e38 are nodata values according to the spec - if next_shape - f.tell() >= nPoints * 8: - record.m = [] - for m in _Array("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))): - if m > NODATA: - record.m.append(m) - else: - record.m.append(None) - else: - record.m = [None for _ in range(nPoints)] + + # Read z extremes and values + if shapeType in (13, 15, 18, 31): + __zmin, __zmax = unpack("<2d", f.read(16)) + record.z = _Array[float]( + "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) + ) + + # Read m extremes and values + if shapeType in (13, 15, 18, 23, 25, 28, 31): + if next_shape - f.tell() >= 16: + __mmin, __mmax = unpack("<2d", f.read(16)) + # Measure values less than -10e38 are nodata values according to the spec + if next_shape - f.tell() >= nPoints * 8: + record.m = [] + for m in _Array[float]( + "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) + ): + if m > NODATA: + record.m.append(m) + else: + record.m.append(None) + else: + record.m = [None for _ in range(nPoints)] + # Read a single point if shapeType in (1, 11, 21): - record.points = [_Array("d", unpack("<2d", f.read(16)))] + array_2D = _Array[float]("d", unpack("<2d", f.read(16))) + + record.points = [tuple(array_2D)] if bbox is not None: # create bounding box for Point by duplicating coordinates point_bbox = list(record.points[0] + record.points[0]) @@ -1495,9 +1514,11 @@ def __shape(self, oid=None, bbox=None): if not bbox_overlap(bbox, point_bbox): f.seek(next_shape) return None + # Read a single Z value if shapeType == 11: record.z = list(unpack("= 8: @@ -1509,11 +1530,14 @@ def __shape(self, oid=None, bbox=None): record.m = [m] else: record.m = [None] + # pylint: enable=attribute-defined-outside-init # Seek to the end of this record as defined by the record header because # the shapefile spec doesn't require the actual content to meet the header # definition. Probably allowed for lazy feature deletion. + f.seek(next_shape) + return record def __shxHeader(self): @@ -1538,7 +1562,7 @@ def __shxOffsets(self): # Jump to the first record. shx.seek(100) # Each index record consists of two nrs, we only want the first one - shxRecords = _Array("i", shx.read(2 * self.numShapes * 4)) + shxRecords = _Array[int]("i", shx.read(2 * self.numShapes * 4)) if sys.byteorder != "big": shxRecords.byteswap() self._offsets: list[int] = [2 * el for el in shxRecords[::2]] From cfcc376f8c0414130b7b69f10c6093a23844ee53 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 17:01:23 +0100 Subject: [PATCH 084/220] Suppress ..bbox, z, .m attr-defined mypy errors on Shape --- src/shapefile.py | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index eb750ae3..9cf86223 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -523,9 +523,9 @@ def __init__( else: self.__oid = -1 - self.z: Optional[Union[list[Optional[float]], _Array[float]]] = None - self.m: Optional[list[Optional[float]]] = None - self.bbox: Optional[_Array[float]] = None + # self.z: Optional[Union[list[Optional[float]], _Array[float]]] = None + # self.m: Optional[list[Optional[float]]] = None + # self.bbox: Optional[_Array[float]] = None @property def __geo_interface__(self) -> GeoJsonShapeT: @@ -1451,9 +1451,9 @@ def __shape( record.points = [] # All shape types capable of having a bounding box elif shapeType in (3, 5, 8, 13, 15, 18, 23, 25, 28, 31): - record.bbox = _Array[float]("d", unpack("<4d", f.read(32))) + record.bbox = _Array[float]("d", unpack("<4d", f.read(32))) # type: ignore [attr-defined] # if bbox specified and no overlap, skip this shape - if bbox is not None and not bbox_overlap(bbox, record.bbox): + if bbox is not None and not bbox_overlap(bbox, record.bbox): # type: ignore [attr-defined] # because we stop parsing this shape, skip to beginning of # next shape before we return f.seek(next_shape) @@ -1462,6 +1462,12 @@ def __shape( if shapeType in (3, 5, 13, 15, 23, 25, 31): nParts = unpack("= nPoints * 8: - record.m = [] + record.m = [] # type: ignore [attr-defined] for m in _Array[float]( "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) ): if m > NODATA: - record.m.append(m) + record.m.append(m) # type: ignore [attr-defined] else: - record.m.append(None) + record.m.append(None) # type: ignore [attr-defined] else: - record.m = [None for _ in range(nPoints)] + record.m = [None for _ in range(nPoints)] # type: ignore [attr-defined] # Read a single point if shapeType in (1, 11, 21): @@ -1517,7 +1519,7 @@ def __shape( # Read a single Z value if shapeType == 11: - record.z = list(unpack(" NODATA: - record.m = [m] + record.m = [m] # type: ignore [attr-defined] else: - record.m = [None] + record.m = [None] # type: ignore [attr-defined] # pylint: enable=attribute-defined-outside-init # Seek to the end of this record as defined by the record header because From 8c0187532d236d369668e7c5d0e10a81a59e6648 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 17:08:12 +0100 Subject: [PATCH 085/220] Reorder shapetype codes to group according to attributes. --- src/shapefile.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 9cf86223..55437399 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1450,7 +1450,7 @@ def __shape( if shapeType == 0: record.points = [] # All shape types capable of having a bounding box - elif shapeType in (3, 5, 8, 13, 15, 18, 23, 25, 28, 31): + elif shapeType in (3, 13, 23, 5, 15, 25, 8, 18, 28, 31): record.bbox = _Array[float]("d", unpack("<4d", f.read(32))) # type: ignore [attr-defined] # if bbox specified and no overlap, skip this shape if bbox is not None and not bbox_overlap(bbox, record.bbox): # type: ignore [attr-defined] @@ -1459,11 +1459,11 @@ def __shape( f.seek(next_shape) return None # Shape types with parts - if shapeType in (3, 5, 13, 15, 23, 25, 31): + if shapeType in (3, 13, 23, 5, 15, 25, 31): nParts = unpack("= 16: __mmin, __mmax = unpack("<2d", f.read(16)) # Measure values less than -10e38 are nodata values according to the spec From aa5560c058c47fe800766b5ae49b9a980f5bd278 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 17:28:16 +0100 Subject: [PATCH 086/220] Type hint FieldTuples --- src/shapefile.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 55437399..3c70d2b7 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -25,6 +25,7 @@ IO, Any, Collection, + Container, Generic, Iterable, Iterator, @@ -119,6 +120,7 @@ BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO] +FieldTuple = tuple[str, str, int, bool] RecordValue = Union[ bool, int, float, str, date ] # A Possible value in a Shapefile record, e.g. L, N, F, C, D types @@ -1017,7 +1019,7 @@ def __init__( self.shpLength: Optional[int] = None self.numRecords: Optional[int] = None self.numShapes: Optional[int] = None - self.fields: list[list[str]] = [] + self.fields: list[FieldTuple] = [] self.__dbfHdrLength = 0 self.__fieldLookup: dict[str, int] = {} self.encoding = encoding @@ -1581,7 +1583,7 @@ def __shapeIndex(self, i: Optional[int] = None) -> Optional[int]: self.__shxOffsets() return self._offsets[i] - def shape(self, i: int = 0, bbox: Optional[BBox] = None): + def shape(self, i: int = 0, bbox: Optional[BBox] = None) -> Optional[Shape]: """Returns a shape object for a shape in the geometry record file. If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), @@ -1619,7 +1621,7 @@ def shape(self, i: int = 0, bbox: Optional[BBox] = None): shp.seek(offset) return self.__shape(oid=i, bbox=bbox) - def shapes(self, bbox=None): + def shapes(self, bbox: Optional[BBox] = None) -> Shapes: """Returns all shapes in a shapefile. To only read shapes within a given spatial region, specify the 'bbox' arg as a list or tuple of xmin,ymin,xmax,ymax. @@ -1628,7 +1630,7 @@ def shapes(self, bbox=None): shapes.extend(self.iterShapes(bbox=bbox)) return shapes - def iterShapes(self, bbox=None): + def iterShapes(self, bbox: Optional[BBox] = None) -> Iterator[Optional[Shape]]: """Returns a generator of shapes in a shapefile. Useful for handling large shapefiles. To only read shapes within a given spatial region, specify the 'bbox' @@ -1722,7 +1724,7 @@ def __dbfHeader(self): # pylint: enable=attribute-defined-outside-init - def __recordFmt(self, fields=None): + def __recordFmt(self, fields: Optional[Container[str]] = None) -> tuple[str, int]: """Calculates the format and size of a .dbf record. Optional 'fields' arg specifies which fieldnames to unpack and which to ignore. Note that this always includes the DeletionFlag at index 0, regardless of the 'fields' arg. @@ -1748,7 +1750,9 @@ def __recordFmt(self, fields=None): fmtSize += 1 return (fmt, fmtSize) - def __recordFields(self, fields=None): + def __recordFields( + self, fields: Optional[Iterable[str]] = None + ) -> tuple[list[FieldTuple], dict[str, int], Struct]: """Returns the necessary info required to unpack a record's fields, restricted to a subset of fieldnames 'fields' if specified. Returns a list of field info tuples, a name-index lookup dict, @@ -1758,19 +1762,19 @@ def __recordFields(self, fields=None): if fields is not None: # restrict info to the specified fields # first ignore repeated field names (order doesn't matter) - fields = list(set(fields)) + unique_fields = list(set(fields)) # get the struct - fmt, __fmtSize = self.__recordFmt(fields=fields) + fmt, __fmtSize = self.__recordFmt(fields=unique_fields) recStruct = Struct(fmt) # make sure the given fieldnames exist - for name in fields: + for name in unique_fields: if name not in self.__fieldLookup or name == "DeletionFlag": raise ValueError(f'"{name}" is not a valid field name') # fetch relevant field info tuples fieldTuples = [] for fieldinfo in self.fields[1:]: name = fieldinfo[0] - if name in fields: + if name in unique_fields: fieldTuples.append(fieldinfo) # store the field positions recLookup = {f[0]: i for i, f in enumerate(fieldTuples)} @@ -1783,7 +1787,7 @@ def __recordFields(self, fields=None): def __record( self, - fieldTuples: list[tuple[str, str, int, bool]], + fieldTuples: list[FieldTuple], recLookup: dict[str, int], recStruct: Struct, oid: Optional[int] = None, From b8c84551a09a4e44f926be5a5eb7519a4e9369e3 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 17:45:03 +0100 Subject: [PATCH 087/220] Type hint Writer.__init__ --- src/shapefile.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 3c70d2b7..2a0b65a4 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2034,23 +2034,23 @@ class Writer: def __init__( self, - target=None, - shapeType=None, - autoBalance=False, - encoding="utf-8", - encodingErrors="strict", + target: Union[str, os.PathLike, None] = None, + shapeType: Optional[int] = None, + autoBalance: bool = False, *, - shp=None, - shx=None, - dbf=None, + encoding: str = "utf-8", + encodingErrors: str = "strict", + shp: Optional[BinaryFileT] = None, + shx: Optional[BinaryFileT] = None, + dbf: Optional[BinaryFileT] = None, **kwargs, # pylint: disable=unused-argument ): self.target = target self.autoBalance = autoBalance - self.fields = [] + self.fields: list[FieldTuple] = [] self.shapeType = shapeType self.shp = self.shx = self.dbf = None - self._files_to_close = [] + self._files_to_close: list[BinaryFileStreamT] = [] if target: target = fsdecode_if_pathlike(target) if not is_string(target): From b91d6d7b4b5f635c8c3ce3081f683c2d40e8308a Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 18:11:18 +0100 Subject: [PATCH 088/220] Type Writer.__getFileObj as generic bounded to Protocol. I couldn't make my Readable Protocol idea work for the particular implementation of __seek_0_on_file_obj_wrap_or_open_from_name - the generic for Seekable absorbs it, so it requires type negations / algebra. --- src/shapefile.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 2a0b65a4..e6392cc9 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -29,7 +29,9 @@ Generic, Iterable, Iterator, + NoReturn, Optional, + Protocol, Reversible, Sequence, TypedDict, @@ -115,8 +117,12 @@ BBox = tuple[float, float, float, float] + +class BinaryWritable(Protocol): + def write(self, data: bytes): ... + + # File name, file object or anything with a read() method that returns bytes. -# TODO: Create simple Protocol with a read() method pylint: disable=fixme BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO] @@ -2157,7 +2163,15 @@ def close(self): pass self._files_to_close = [] - def __getFileObj(self, f: Union[IO[bytes], str]) -> IO[bytes]: + W = TypeVar("W", bound=BinaryWritable) + + @overload + def __getFileObj(self, f: str) -> IO[bytes]: ... + @overload + def __getFileObj(self, f: None) -> NoReturn: ... + @overload + def __getFileObj(self, f: W) -> W: ... + def __getFileObj(self, f): """Safety handler to verify file-like objects""" if not f: raise ShapefileException("No file-like object available.") From 98b094b5b819fc4bef52b8209ca1c83c001a6ce3 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 18:19:44 +0100 Subject: [PATCH 089/220] Type Writer.shapeTypeName, and make return "NULL" if self.shapeType is None --- src/shapefile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index e6392cc9..1641dcbe 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2275,8 +2275,8 @@ def __mbox(self, s): return mbox @property - def shapeTypeName(self): - return SHAPETYPE_LOOKUP[self.shapeType] + def shapeTypeName(self) -> str: + return SHAPETYPE_LOOKUP[self.shapeType or 0] def bbox(self): """Returns the current bounding box for the shapefile which is From b999d8ab7a54b55fdee25fd3af6017067a7df692 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 20:17:17 +0100 Subject: [PATCH 090/220] Type hint Writer.shp, .dbf & .shx --- src/shapefile.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 1641dcbe..0f0529b5 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -122,6 +122,10 @@ class BinaryWritable(Protocol): def write(self, data: bytes): ... +class BinaryWritableSeekable(BinaryWritable): + def seek(self, i: int): ... + + # File name, file object or anything with a read() method that returns bytes. BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO] @@ -139,6 +143,11 @@ class GeoJsonShapeT(TypedDict): ] +class HasGeoInterface(Protocol): + @property + def __geo_interface__(self) -> Any: ... + + # Helpers MISSING = [None, ""] @@ -2046,16 +2055,18 @@ def __init__( *, encoding: str = "utf-8", encodingErrors: str = "strict", - shp: Optional[BinaryFileT] = None, - shx: Optional[BinaryFileT] = None, - dbf: Optional[BinaryFileT] = None, + shp: Optional[BinaryWritableSeekable] = None, + shx: Optional[BinaryWritableSeekable] = None, + dbf: Optional[BinaryWritableSeekable] = None, **kwargs, # pylint: disable=unused-argument ): self.target = target self.autoBalance = autoBalance self.fields: list[FieldTuple] = [] self.shapeType = shapeType - self.shp = self.shx = self.dbf = None + self.shp: Optional[Union[BinaryFileStreamT, BinaryWritableSeekable]] = None + self.shx: Optional[Union[BinaryFileStreamT, BinaryWritableSeekable]] = None + self.dbf: Optional[Union[BinaryFileStreamT, BinaryWritableSeekable]] = None self._files_to_close: list[BinaryFileStreamT] = [] if target: target = fsdecode_if_pathlike(target) @@ -2163,7 +2174,7 @@ def close(self): pass self._files_to_close = [] - W = TypeVar("W", bound=BinaryWritable) + W = TypeVar("W", bound=BinaryWritableSeekable) @overload def __getFileObj(self, f: str) -> IO[bytes]: ... @@ -2292,7 +2303,11 @@ def mbox(self): """Returns the current m extremes for the shapefile.""" return self._mbox - def __shapefileHeader(self, fileObj, headerType="shp"): + def __shapefileHeader( + self, + fileObj: Union[str, BinaryWritableSeekable], + headerType: str = "shp", + ): """Writes the specified header type to the specified file-like object. Several of the shapefile formats are so similar that a single generic method to read or write them is warranted.""" @@ -2404,14 +2419,17 @@ def __dbfHeader(self): # Terminator f.write(b"\r") - def shape(self, s): + def shape( + self, + s: Union[Shape, HasGeoInterface, dict], + ): # Balance if already not balanced if self.autoBalance and self.recNum < self.shpNum: self.balance() # Check is shape or import from geojson if not isinstance(s, Shape): if hasattr(s, "__geo_interface__"): - s = s.__geo_interface__ + s = s.__geo_interface__ # type: ignore [assignment] if isinstance(s, dict): s = Shape._from_geojson(s) else: From c0e88605d5729184d448f2235aa43acc07df86df Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 20:36:22 +0100 Subject: [PATCH 091/220] Simplify type hints. --- src/shapefile.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 0f0529b5..88a76809 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -123,12 +123,13 @@ def write(self, data: bytes): ... class BinaryWritableSeekable(BinaryWritable): - def seek(self, i: int): ... + def seek(self, i: int): ... # pylint: disable=unused-argument + def tell(self): ... # File name, file object or anything with a read() method that returns bytes. BinaryFileT = Union[str, IO[bytes]] -BinaryFileStreamT = Union[IO[bytes], io.BytesIO] +BinaryFileStreamT = Union[IO[bytes], io.BytesIO, BinaryWritableSeekable] FieldTuple = tuple[str, str, int, bool] RecordValue = Union[ @@ -2064,9 +2065,9 @@ def __init__( self.autoBalance = autoBalance self.fields: list[FieldTuple] = [] self.shapeType = shapeType - self.shp: Optional[Union[BinaryFileStreamT, BinaryWritableSeekable]] = None - self.shx: Optional[Union[BinaryFileStreamT, BinaryWritableSeekable]] = None - self.dbf: Optional[Union[BinaryFileStreamT, BinaryWritableSeekable]] = None + self.shp: Optional[BinaryFileStreamT] = None + self.shx: Optional[BinaryFileStreamT] = None + self.dbf: Optional[BinaryFileStreamT] = None self._files_to_close: list[BinaryFileStreamT] = [] if target: target = fsdecode_if_pathlike(target) @@ -2305,7 +2306,7 @@ def mbox(self): def __shapefileHeader( self, - fileObj: Union[str, BinaryWritableSeekable], + fileObj: Optional[BinaryWritableSeekable], headerType: str = "shp", ): """Writes the specified header type to the specified file-like object. From 9d089596c0eafbefff5fba14c3d8622481a7e593 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 20:40:04 +0100 Subject: [PATCH 092/220] Type hint Writer.record --- src/shapefile.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 88a76809..1cc7ea62 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2645,7 +2645,9 @@ def __shxRecord(self, offset, length): # pylint: enable=raise-missing-from - def record(self, *recordList, **recordDict): + def record( + self, *recordList: Iterable[RecordValue], **recordDict: dict[str, RecordValue] + ): """Creates a dbf attribute record. You can submit either a sequence of field values or keyword arguments of field names and values. Before adding records you must add fields for the record values using the From df75bd023664580ce547af0a6f31a10ff500a068 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 20:45:36 +0100 Subject: [PATCH 093/220] Type hint Writer.point --- src/shapefile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 1cc7ea62..9b586d77 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2775,11 +2775,11 @@ def null(self): """Creates a null shape.""" self.shape(Shape(NULL)) - def point(self, x, y): + def point(self, x: float, y: float): """Creates a POINT shape.""" shapeType = POINT pointShape = Shape(shapeType) - pointShape.points.append([x, y]) + pointShape.points.append((x, y)) self.shape(pointShape) def pointm(self, x, y, m=None): From a0ba5839dcd1e54761d6c28070a60ec59f7c5393 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 20:56:36 +0100 Subject: [PATCH 094/220] Type hint Writer.multipoint. Simplify Writer.multipointm --- src/shapefile.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 9b586d77..41f63592 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2799,14 +2799,12 @@ def pointz(self, x, y, z=0, m=None): pointShape.points.append([x, y, z, m]) self.shape(pointShape) - def multipoint(self, points): + def multipoint(self, points: Coords): """Creates a MULTIPOINT shape. Points is a list of xy values.""" shapeType = MULTIPOINT - points = [ - points - ] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) + # nest the points inside a list to be compatible with the generic shapeparts method + self._shapeparts(parts=[points], shapeType=shapeType) def multipointm(self, points): """Creates a MULTIPOINTM shape. @@ -2921,9 +2919,8 @@ def _shapeparts(self, parts, shapeType): # add points for point in part: # Ensure point is list - if not isinstance(point, list): - point = list(point) - polyShape.points.append(point) + point_list = list(point) + polyShape.points.append(point_list) # write the shape self.shape(polyShape) From 41366bb63bc172eeaabbd21f143087e326343f87 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 22:51:23 +0100 Subject: [PATCH 095/220] Type hint Writer.field. Correct type of FieldTuple --- src/shapefile.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 41f63592..3bf86a45 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -131,7 +131,7 @@ def tell(self): ... BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO, BinaryWritableSeekable] -FieldTuple = tuple[str, str, int, bool] +FieldTuple = tuple[str, str, int, int] RecordValue = Union[ bool, int, float, str, date ] # A Possible value in a Shapefile record, e.g. L, N, F, C, D types @@ -2924,13 +2924,20 @@ def _shapeparts(self, parts, shapeType): # write the shape self.shape(polyShape) - def field(self, name, fieldType="C", size="50", decimal=0): + def field( + # Types of args should match *FieldTuple + self, + name: str, + fieldType: str = "C", + size: int = 50, + decimal: int = 0, + ): """Adds a dbf field descriptor to the shapefile.""" if fieldType == "D": - size = "8" + size = 8 decimal = 0 elif fieldType == "L": - size = "1" + size = 1 decimal = 0 if len(self.fields) >= 2046: raise ShapefileException( From 0f3c56f20585c68b7cb99dbe63dafe3db5d3a4e1 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 22:54:59 +0100 Subject: [PATCH 096/220] Type hint Writer.line & Writer.poly --- src/shapefile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 3bf86a45..5d00458c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2827,7 +2827,7 @@ def multipointz(self, points): ] # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=points, shapeType=shapeType) - def line(self, lines): + def line(self, lines: Collection[Coords]): """Creates a POLYLINE shape. Lines is a collection of lines, each made up of a list of xy values.""" shapeType = POLYLINE @@ -2848,7 +2848,7 @@ def linez(self, lines): shapeType = POLYLINEZ self._shapeparts(parts=lines, shapeType=shapeType) - def poly(self, polys): + def poly(self, polys: Collection[Coords]): """Creates a POLYGON shape. Polys is a collection of polygons, each made up of a list of xy values. Note that for ordinary polygons the coordinates must run in a clockwise direction. From 3edbc72bfcf7e0aafab1ee3e472dc01bc614c1c0 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 27 Jul 2025 15:15:56 +0100 Subject: [PATCH 097/220] list[Coord] -> Coords --- src/shapefile.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 5d00458c..3762ff0c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -242,7 +242,7 @@ def is_cw(coords: Coords) -> bool: return area2 < 0 -def rewind(coords: Reversible[Coord]) -> list[Coord]: +def rewind(coords: Reversible[Coord]) -> Coords: """Returns the input coords in reversed order.""" return list(reversed(coords)) @@ -270,7 +270,7 @@ def bbox_contains(bbox1: BBox, bbox2: BBox) -> bool: return contains -def ring_contains_point(coords: list[Coord], p: Point2D) -> bool: +def ring_contains_point(coords: Coords, p: Point2D) -> bool: """Fast point-in-polygon crossings algorithm, MacMartin optimization. Adapted from code by Eric Haynes @@ -319,7 +319,7 @@ class RingSamplingError(Exception): pass -def ring_sample(coords: list[Coord], ccw: bool = False) -> Point2D: +def ring_sample(coords: Coords, ccw: bool = False) -> Point2D: """Return a sample point guaranteed to be within a ring, by efficiently finding the first centroid of a coordinate triplet whose orientation matches the orientation of the ring and passes the point-in-ring test. @@ -369,14 +369,14 @@ def itercoords(): ) -def ring_contains_ring(coords1: list[Coord], coords2: list[Point2D]) -> bool: +def ring_contains_ring(coords1: Coords, coords2: list[Point2D]) -> bool: """Returns True if all vertexes in coords2 are fully inside coords1.""" return all(ring_contains_point(coords1, p2) for p2 in coords2) def organize_polygon_rings( - rings: Iterable[list[Coord]], return_errors: Optional[dict[str, int]] = None -) -> list[list[list[Coord]]]: + rings: Iterable[Coords], return_errors: Optional[dict[str, int]] = None +) -> list[list[Coords]]: """Organize a list of coordinate rings into one or more polygons with holes. Returns a list of polygons, where each polygon is composed of a single exterior ring, and one or more interior holes. If a return_errors dict is provided (optional), @@ -510,7 +510,7 @@ class Shape: def __init__( self, shapeType: int = NULL, - points: Optional[list[Coord]] = None, + points: Optional[Coords] = None, parts: Optional[Sequence[int]] = None, partTypes: Optional[Sequence[int]] = None, oid: Optional[int] = None, From bde8771a55dcdc2be2006db98133bb074967a4c4 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 27 Jul 2025 15:58:59 +0100 Subject: [PATCH 098/220] Distinguish between PointM and Point3D, and allow PointZ[3] to be None --- src/shapefile.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 3762ff0c..a1e423d6 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -109,10 +109,11 @@ T = TypeVar("T") Point2D = tuple[float, float] -PointZ = tuple[float, float, float] -PointZM = tuple[float, float, float, float] +Point3D = tuple[float, float, float] +PointM = tuple[float, float, Optional[float]] +PointZ = tuple[float, float, float, Optional[float]] -Coord = Union[Point2D, PointZ, PointZM] +Coord = Union[Point2D, Point2D, Point3D] Coords = list[Coord] BBox = tuple[float, float, float, float] @@ -140,7 +141,7 @@ def tell(self): ... class GeoJsonShapeT(TypedDict): type: str coordinates: Union[ - tuple[()], Point2D, PointZ, PointZM, Coords, list[Coords], list[list[Coords]] + tuple[()], Point2D, PointM, PointZ, Coords, list[Coords], list[list[Coords]] ] From 928ed2d27755f4ece1fa944288c93935bea5370c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 27 Jul 2025 16:56:44 +0100 Subject: [PATCH 099/220] Relax GeoJsonShapeT for now --- src/shapefile.py | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index a1e423d6..abee5dc2 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -116,6 +116,9 @@ Coord = Union[Point2D, Point2D, Point3D] Coords = list[Coord] +Point = Union[Point2D, PointM, PointZ] +Points = list[Point] + BBox = tuple[float, float, float, float] @@ -138,11 +141,6 @@ def tell(self): ... ] # A Possible value in a Shapefile record, e.g. L, N, F, C, D types -class GeoJsonShapeT(TypedDict): - type: str - coordinates: Union[ - tuple[()], Point2D, PointM, PointZ, Coords, list[Coords], list[list[Coords]] - ] class HasGeoInterface(Protocol): @@ -255,11 +253,11 @@ def ring_bbox(coords: Coords) -> BBox: return bbox -def bbox_overlap(bbox1: BBox, bbox2: Collection[float]) -> bool: +def bbox_overlap(bbox1: BBox, bbox2: BBox) -> bool: """Tests whether two bounding boxes overlap.""" xmin1, ymin1, xmax1, ymax1 = bbox1 xmin2, ymin2, xmax2, ymax2 = bbox2 - overlap = xmin1 <= xmax2 and xmax1 >= xmin2 and ymin1 <= ymax2 and ymax1 >= ymin2 + overlap = xmin1 <= xmax2 and xmin2 <= xmax1 and ymin1 <= ymax2 and ymin2 <= ymax1 return overlap @@ -267,7 +265,7 @@ def bbox_contains(bbox1: BBox, bbox2: BBox) -> bool: """Tests whether bbox1 fully contains bbox2.""" xmin1, ymin1, xmax1, ymax1 = bbox1 xmin2, ymin2, xmax2, ymax2 = bbox2 - contains = xmin1 < xmin2 and xmax1 > xmax2 and ymin1 < ymin2 and ymax1 > ymax2 + contains = xmin1 < xmin2 and xmax2 < xmax1 and ymin1 < ymin2 and ymax2 < ymax1 return contains @@ -511,7 +509,7 @@ class Shape: def __init__( self, shapeType: int = NULL, - points: Optional[Coords] = None, + points: Optional[Points] = None, parts: Optional[Sequence[int]] = None, partTypes: Optional[Sequence[int]] = None, oid: Optional[int] = None, @@ -547,7 +545,7 @@ def __init__( # self.bbox: Optional[_Array[float]] = None @property - def __geo_interface__(self) -> GeoJsonShapeT: + def __geo_interface__(self): if self.shapeType in [POINT, POINTM, POINTZ]: # point if len(self.points) == 0: @@ -1435,7 +1433,7 @@ def __shpHeader(self): shp.seek(32) self.shapeType = unpack(" Date: Sun, 27 Jul 2025 16:57:30 +0100 Subject: [PATCH 100/220] Reformat --- src/shapefile.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index abee5dc2..2152ee7a 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -141,8 +141,6 @@ def tell(self): ... ] # A Possible value in a Shapefile record, e.g. L, N, F, C, D types - - class HasGeoInterface(Protocol): @property def __geo_interface__(self) -> Any: ... From e481bcde944e8c08fe2682b658af90b29fcf2970 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 27 Jul 2025 16:59:00 +0100 Subject: [PATCH 101/220] Change bbox in doctests to a tuple --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a8ed822e..52b8de78 100644 --- a/README.md +++ b/README.md @@ -406,7 +406,7 @@ and the bounding box area the shapefile covers: >>> len(sf) 663 >>> sf.bbox - [-122.515048, 37.652916, -122.327622, 37.863433] + (-122.515048, 37.652916, -122.327622, 37.863433) Finally, if you would prefer to work with the entire shapefile in a different format, you can convert all of it to a GeoJSON dictionary, although you may lose From 243a3acb16253d3f0b5c9a8d39f34041aa2d6bd8 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 27 Jul 2025 17:00:29 +0100 Subject: [PATCH 102/220] Remove unused import --- src/shapefile.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 2152ee7a..e3a84b13 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -34,7 +34,6 @@ Protocol, Reversible, Sequence, - TypedDict, TypeVar, Union, overload, From 4da8203d019af995233449a36cf080ce7aec19f7 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 27 Jul 2025 18:10:05 +0100 Subject: [PATCH 103/220] Type hint Writer."shape" methods --- src/shapefile.py | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index e3a84b13..2c7d291e 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2801,15 +2801,13 @@ def multipoint(self, points: Coords): # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=[points], shapeType=shapeType) - def multipointm(self, points): + def multipointm(self, points: list[PointM]): """Creates a MULTIPOINTM shape. Points is a list of xym values. If the m (measure) value is not included, it defaults to None (NoData).""" shapeType = MULTIPOINTM - points = [ - points - ] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) + # nest the points inside a list to be compatible with the generic shapeparts method + self._shapeparts(parts=[points], shapeType=shapeType) def multipointz(self, points): """Creates a MULTIPOINTZ shape. @@ -2817,25 +2815,23 @@ def multipointz(self, points): If the z (elevation) value is not included, it defaults to 0. If the m (measure) value is not included, it defaults to None (NoData).""" shapeType = MULTIPOINTZ - points = [ - points - ] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) + # nest the points inside a list to be compatible with the generic shapeparts method + self._shapeparts(parts=[points], shapeType=shapeType) - def line(self, lines: Collection[Coords]): + def line(self, lines: list[Coords]): """Creates a POLYLINE shape. Lines is a collection of lines, each made up of a list of xy values.""" shapeType = POLYLINE self._shapeparts(parts=lines, shapeType=shapeType) - def linem(self, lines): + def linem(self, lines: list[Points]): """Creates a POLYLINEM shape. Lines is a collection of lines, each made up of a list of xym values. If the m (measure) value is not included, it defaults to None (NoData).""" shapeType = POLYLINEM self._shapeparts(parts=lines, shapeType=shapeType) - def linez(self, lines): + def linez(self, lines: list[Points]): """Creates a POLYLINEZ shape. Lines is a collection of lines, each made up of a list of xyzm values. If the z (elevation) value is not included, it defaults to 0. @@ -2843,7 +2839,7 @@ def linez(self, lines): shapeType = POLYLINEZ self._shapeparts(parts=lines, shapeType=shapeType) - def poly(self, polys: Collection[Coords]): + def poly(self, polys: list[Coords]): """Creates a POLYGON shape. Polys is a collection of polygons, each made up of a list of xy values. Note that for ordinary polygons the coordinates must run in a clockwise direction. @@ -2851,7 +2847,7 @@ def poly(self, polys: Collection[Coords]): shapeType = POLYGON self._shapeparts(parts=polys, shapeType=shapeType) - def polym(self, polys): + def polym(self, polys: list[Points]): """Creates a POLYGONM shape. Polys is a collection of polygons, each made up of a list of xym values. Note that for ordinary polygons the coordinates must run in a clockwise direction. @@ -2860,7 +2856,7 @@ def polym(self, polys): shapeType = POLYGONM self._shapeparts(parts=polys, shapeType=shapeType) - def polyz(self, polys): + def polyz(self, polys: list[Points]): """Creates a POLYGONZ shape. Polys is a collection of polygons, each made up of a list of xyzm values. Note that for ordinary polygons the coordinates must run in a clockwise direction. @@ -2870,7 +2866,7 @@ def polyz(self, polys): shapeType = POLYGONZ self._shapeparts(parts=polys, shapeType=shapeType) - def multipatch(self, parts, partTypes): + def multipatch(self, parts: list[list[PointZ]], partTypes: list[int]): """Creates a MULTIPATCH shape. Parts is a collection of 3D surface patches, each made up of a list of xyzm values. PartTypes is a list of types that define each of the surface patches. @@ -2886,11 +2882,12 @@ def multipatch(self, parts, partTypes): # set part index position polyShape.parts.append(len(polyShape.points)) # add points - for point in part: - # Ensure point is list - if not isinstance(point, list): - point = list(point) - polyShape.points.append(point) + # for point in part: + # # Ensure point is list + # if not isinstance(point, list): + # point = list(point) + # polyShape.points.append(point) + polyShape.points.extend(part) polyShape.partTypes = partTypes # write the shape self.shape(polyShape) From b012280a2e41610dabb91b6acbefdd2da09208a0 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 27 Jul 2025 18:13:27 +0100 Subject: [PATCH 104/220] Remove unused import --- src/shapefile.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 2c7d291e..0c1b4016 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -24,7 +24,6 @@ from typing import ( IO, Any, - Collection, Container, Generic, Iterable, From 519e09d3a34ca46452cee98d779a0fd7ce8a0f18 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 27 Jul 2025 18:58:53 +0100 Subject: [PATCH 105/220] Simplify Writer.multipatch type sig, and make more general --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 0c1b4016..4dd1aee6 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2865,7 +2865,7 @@ def polyz(self, polys: list[Points]): shapeType = POLYGONZ self._shapeparts(parts=polys, shapeType=shapeType) - def multipatch(self, parts: list[list[PointZ]], partTypes: list[int]): + def multipatch(self, parts: list[Points], partTypes: list[int]): """Creates a MULTIPATCH shape. Parts is a collection of 3D surface patches, each made up of a list of xyzm values. PartTypes is a list of types that define each of the surface patches. From cf217363b565bfd398904a442066c55324e4f199 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 27 Jul 2025 19:19:54 +0100 Subject: [PATCH 106/220] Type hint Writer._shapeparts --- src/shapefile.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 4dd1aee6..2aedbe4f 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2793,14 +2793,14 @@ def pointz(self, x: float, y: float, z: float = 0.0, m: Optional[float] = None): pointShape.points.append((x, y, z, m)) self.shape(pointShape) - def multipoint(self, points: Coords): + def multipoint(self, points: Points): """Creates a MULTIPOINT shape. Points is a list of xy values.""" shapeType = MULTIPOINT # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=[points], shapeType=shapeType) - def multipointm(self, points: list[PointM]): + def multipointm(self, points: Points): """Creates a MULTIPOINTM shape. Points is a list of xym values. If the m (measure) value is not included, it defaults to None (NoData).""" @@ -2808,7 +2808,7 @@ def multipointm(self, points: list[PointM]): # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=[points], shapeType=shapeType) - def multipointz(self, points): + def multipointz(self, points: Points): """Creates a MULTIPOINTZ shape. Points is a list of xyzm values. If the z (elevation) value is not included, it defaults to 0. @@ -2817,7 +2817,7 @@ def multipointz(self, points): # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=[points], shapeType=shapeType) - def line(self, lines: list[Coords]): + def line(self, lines: list[Points]): """Creates a POLYLINE shape. Lines is a collection of lines, each made up of a list of xy values.""" shapeType = POLYLINE @@ -2838,7 +2838,7 @@ def linez(self, lines: list[Points]): shapeType = POLYLINEZ self._shapeparts(parts=lines, shapeType=shapeType) - def poly(self, polys: list[Coords]): + def poly(self, polys: list[Points]): """Creates a POLYGON shape. Polys is a collection of polygons, each made up of a list of xy values. Note that for ordinary polygons the coordinates must run in a clockwise direction. @@ -2891,7 +2891,7 @@ def multipatch(self, parts: list[Points], partTypes: list[int]): # write the shape self.shape(polyShape) - def _shapeparts(self, parts, shapeType): + def _shapeparts(self, parts: list[Points], shapeType: int): """Internal method for adding a shape that has multiple collections of points (parts): lines, polygons, and multipoint shapes. """ @@ -2908,10 +2908,11 @@ def _shapeparts(self, parts, shapeType): # set part index position polyShape.parts.append(len(polyShape.points)) # add points - for point in part: - # Ensure point is list - point_list = list(point) - polyShape.points.append(point_list) + # for point in part: + # # Ensure point is list + # point_list = list(point) + # polyShape.points.append(point_list) + polyShape.points.extend(part) # write the shape self.shape(polyShape) From b8aa53997b089a1bf8cb8abb09c87be66031fad4 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 09:31:05 +0100 Subject: [PATCH 107/220] Update shapefile.py --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 2aedbe4f..6ce0cc7e 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -111,7 +111,7 @@ PointM = tuple[float, float, Optional[float]] PointZ = tuple[float, float, float, Optional[float]] -Coord = Union[Point2D, Point2D, Point3D] +Coord = Union[Point2D, Point3D] Coords = list[Coord] Point = Union[Point2D, PointM, PointZ] From 8051e48335fa7653e4c6385960df7984efa6150d Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 10:41:40 +0100 Subject: [PATCH 108/220] Attempt 2 at typing GeoJSON --- src/shapefile.py | 75 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 70 insertions(+), 5 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 6ce0cc7e..02e8ab66 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -28,11 +28,13 @@ Generic, Iterable, Iterator, + Literal, NoReturn, Optional, Protocol, Reversible, Sequence, + TypedDict, TypeVar, Union, overload, @@ -143,6 +145,69 @@ class HasGeoInterface(Protocol): @property def __geo_interface__(self) -> Any: ... +class GeoJSONPoint(TypedDict): + type: Literal["Point"] + # We fix to a tuple (to statically check the length is 2, 3 or 4) but + # RFC7946 only requires: "A position is an array of numbers. There MUST be two or more + # elements. " + # RFC7946 also requires long/lat easting/northing which we do not enforce, + # and despite the SHOULD NOT, we may use a 4th element for Shapefile M Measures. + coordinates: Point + +class GeoJSONMultiPoint(TypedDict): + type: Literal["MultiPoint"] + coordinates: Points + +class GeoJSONLineString(TypedDict): + type: Literal["LineString"] + # "Two or more positions" not enforced by type checker + # https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.4 + coordinates: Points + +class GeoJSONMultiLineString(TypedDict): + type: Literal["MultiLineString"] + coordinates: list[Points] + +class GeoJSONPolygon(TypedDict): + type: Literal["Polygon"] + # Other requirements for Polygon not enforced by type checker + # https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.6 + coordinates: list[Points] + +class GeoJSONMultiPolygon(TypedDict): + type: Literal["MultiPolygon"] + coordinates: list[list[Points]] + +GeoJSONHomogeneousGeometryObject = Union[ + GeoJSONPoint, GeoJSONMultiPoint, + GeoJSONLineString, GeoJSONMultiLineString, + GeoJSONPolygon, GeoJSONMultiPolygon, +] + +class GeoJSONGeometryCollection(TypedDict): + type: Literal["GeometryCollection"] + geometries: list[GeoJSONHomogeneousGeometryObject] + +# RFC7946 3.1 +GeoJSONObject = Union[GeoJSONHomogeneousGeometryObject, GeoJSONGeometryCollection] + +class GeoJSONFeature(TypedDict): + type: Literal["Feature"] + properties: Optional[dict[str, Any]] # RFC7946 3.2 "(any JSON object or a JSON null value)" + geometry: Optional[GeoJSONObject] + + +class GeoJSONFeatureCollection(TypedDict, total= False): + type: Literal["FeatureCollection"] + features: list[GeoJSONFeature] + # bbox is optional + # typing.NotRequired requires Python 3.11 + # and we must support 3.9 (at least until October) + # https://docs.python.org/3/library/typing.html#typing.Required + # Is there a backport? + bbox: list[float] + + # Helpers @@ -541,7 +606,7 @@ def __init__( # self.bbox: Optional[_Array[float]] = None @property - def __geo_interface__(self): + def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: if self.shapeType in [POINT, POINTM, POINTZ]: # point if len(self.points) == 0: @@ -922,7 +987,7 @@ def __init__(self, shape: Optional[Shape] = None, record: Optional[_Record] = No self.record = record @property - def __geo_interface__(self): + def __geo_interface__(self) -> GeoJSONFeature: return { "type": "Feature", "properties": self.record.as_dict(date_strings=True), @@ -942,7 +1007,7 @@ def __repr__(self): return f"Shapes: {list(self)}" @property - def __geo_interface__(self): + def __geo_interface__(self) -> GeoJSONGeometryCollection: # Note: currently this will fail if any of the shapes are null-geometries # could be fixed by storing the shapefile shapeType upon init, returning geojson type with empty coords collection = { @@ -962,7 +1027,7 @@ def __repr__(self): return f"ShapeRecords: {list(self)}" @property - def __geo_interface__(self): + def __geo_interface__(self) -> GeoJSONFeatureCollection: collection = { "type": "FeatureCollection", "features": [shaperec.__geo_interface__ for shaperec in self], @@ -1284,7 +1349,7 @@ def __iter__(self): yield from self.iterShapeRecords() @property - def __geo_interface__(self): + def __geo_interface__(self) -> GeoJSONFeatureCollection: shaperecords = self.shapeRecords() fcollection = shaperecords.__geo_interface__ fcollection["bbox"] = list(self.bbox) From 2c66018b9a7dce3b9088b7b8783a85c90c9b06eb Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 11:21:32 +0100 Subject: [PATCH 109/220] Refine and relax typed dicts as needs arise --- src/shapefile.py | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 02e8ab66..9488e695 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -152,7 +152,7 @@ class GeoJSONPoint(TypedDict): # elements. " # RFC7946 also requires long/lat easting/northing which we do not enforce, # and despite the SHOULD NOT, we may use a 4th element for Shapefile M Measures. - coordinates: Point + coordinates: Union[Point, tuple[()]] class GeoJSONMultiPoint(TypedDict): type: Literal["MultiPoint"] @@ -197,9 +197,11 @@ class GeoJSONFeature(TypedDict): geometry: Optional[GeoJSONObject] -class GeoJSONFeatureCollection(TypedDict, total= False): +class GeoJSONFeatureCollection(TypedDict): type: Literal["FeatureCollection"] features: list[GeoJSONFeature] + +class GeoJSONFeatureCollectionWithBBox(GeoJSONFeatureCollection, total=False): # bbox is optional # typing.NotRequired requires Python 3.11 # and we must support 3.9 (at least until October) @@ -990,14 +992,16 @@ def __init__(self, shape: Optional[Shape] = None, record: Optional[_Record] = No def __geo_interface__(self) -> GeoJSONFeature: return { "type": "Feature", - "properties": self.record.as_dict(date_strings=True), + "properties": None + if self.record is None + else self.record.as_dict(date_strings=True), "geometry": None - if self.shape.shapeType == NULL + if self.shape is None or self.shape.shapeType == NULL else self.shape.__geo_interface__, } -class Shapes(list): +class Shapes(list[Optional[Shape]]): """A class to hold a list of Shape objects. Subclasses list to ensure compatibility with former work and to reuse all the optimizations of the builtin list. In addition to the list interface, this also provides the GeoJSON __geo_interface__ @@ -1010,14 +1014,17 @@ def __repr__(self): def __geo_interface__(self) -> GeoJSONGeometryCollection: # Note: currently this will fail if any of the shapes are null-geometries # could be fixed by storing the shapefile shapeType upon init, returning geojson type with empty coords - collection = { - "type": "GeometryCollection", - "geometries": [shape.__geo_interface__ for shape in self], - } + collection = GeoJSONGeometryCollection( + type= "GeometryCollection", + geometries = [shape.__geo_interface__ + for shape in self + if shape is not None + ], + ) return collection -class ShapeRecords(list): +class ShapeRecords(list[ShapeRecord]): """A class to hold a list of ShapeRecord objects. Subclasses list to ensure compatibility with former work and to reuse all the optimizations of the builtin list. In addition to the list interface, this also provides the GeoJSON __geo_interface__ @@ -1030,9 +1037,12 @@ def __repr__(self): def __geo_interface__(self) -> GeoJSONFeatureCollection: collection = { "type": "FeatureCollection", - "features": [shaperec.__geo_interface__ for shaperec in self], + "features": [] #shaperec.__geo_interface__ for shaperec in self], } - return collection + return GeoJSONFeatureCollection( + type="FeatureCollection", + features=[shaperec.__geo_interface__ for shaperec in self], + ) class ShapefileException(Exception): @@ -1349,10 +1359,12 @@ def __iter__(self): yield from self.iterShapeRecords() @property - def __geo_interface__(self) -> GeoJSONFeatureCollection: + def __geo_interface__(self) -> GeoJSONFeatureCollectionWithBBox: shaperecords = self.shapeRecords() - fcollection = shaperecords.__geo_interface__ - fcollection["bbox"] = list(self.bbox) + fcollection = GeoJSONFeatureCollectionWithBBox( + bbox = list(self.bbox), + **shaperecords.__geo_interface__, + ) return fcollection @property From aca60cf50bc83d70747b21e1bcd2e40a60796651 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 11:40:03 +0100 Subject: [PATCH 110/220] Typehint all __geo_interfaces__ (mostly, but not entirely GeoJSON compliant) --- .../workflows/run_checks_build_and_test.yml | 2 +- src/shapefile.py | 77 +++++++++++-------- 2 files changed, 44 insertions(+), 35 deletions(-) diff --git a/.github/workflows/run_checks_build_and_test.yml b/.github/workflows/run_checks_build_and_test.yml index f9e4b2e0..89d8c207 100644 --- a/.github/workflows/run_checks_build_and_test.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -26,7 +26,7 @@ jobs: python -m pip install --upgrade pip pip install pytest pylint pylint-per-file-ignores pip install -e . - - name: run Pylint for errors and warnings only + - name: run Pylint for errors, warnings and remarks only (ignore Comments/ Code style) run: | pylint --disable=C test_shapefile.py src/shapefile.py diff --git a/src/shapefile.py b/src/shapefile.py index 9488e695..8008fe10 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -145,55 +145,70 @@ class HasGeoInterface(Protocol): @property def __geo_interface__(self) -> Any: ... + class GeoJSONPoint(TypedDict): type: Literal["Point"] - # We fix to a tuple (to statically check the length is 2, 3 or 4) but + # We fix to a tuple (to statically check the length is 2, 3 or 4) but # RFC7946 only requires: "A position is an array of numbers. There MUST be two or more # elements. " # RFC7946 also requires long/lat easting/northing which we do not enforce, # and despite the SHOULD NOT, we may use a 4th element for Shapefile M Measures. - coordinates: Union[Point, tuple[()]] - + coordinates: Union[Point, tuple[()]] + + class GeoJSONMultiPoint(TypedDict): type: Literal["MultiPoint"] coordinates: Points + class GeoJSONLineString(TypedDict): type: Literal["LineString"] # "Two or more positions" not enforced by type checker # https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.4 coordinates: Points - + + class GeoJSONMultiLineString(TypedDict): type: Literal["MultiLineString"] coordinates: list[Points] + class GeoJSONPolygon(TypedDict): type: Literal["Polygon"] - # Other requirements for Polygon not enforced by type checker + # Other requirements for Polygon not enforced by type checker # https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.6 coordinates: list[Points] + class GeoJSONMultiPolygon(TypedDict): type: Literal["MultiPolygon"] coordinates: list[list[Points]] + GeoJSONHomogeneousGeometryObject = Union[ - GeoJSONPoint, GeoJSONMultiPoint, - GeoJSONLineString, GeoJSONMultiLineString, - GeoJSONPolygon, GeoJSONMultiPolygon, + GeoJSONPoint, + GeoJSONMultiPoint, + GeoJSONLineString, + GeoJSONMultiLineString, + GeoJSONPolygon, + GeoJSONMultiPolygon, ] + class GeoJSONGeometryCollection(TypedDict): type: Literal["GeometryCollection"] geometries: list[GeoJSONHomogeneousGeometryObject] + # RFC7946 3.1 GeoJSONObject = Union[GeoJSONHomogeneousGeometryObject, GeoJSONGeometryCollection] + class GeoJSONFeature(TypedDict): type: Literal["Feature"] - properties: Optional[dict[str, Any]] # RFC7946 3.2 "(any JSON object or a JSON null value)" + properties: Optional[ + dict[str, Any] + ] # RFC7946 3.2 "(any JSON object or a JSON null value)" geometry: Optional[GeoJSONObject] @@ -201,16 +216,16 @@ class GeoJSONFeatureCollection(TypedDict): type: Literal["FeatureCollection"] features: list[GeoJSONFeature] + class GeoJSONFeatureCollectionWithBBox(GeoJSONFeatureCollection, total=False): # bbox is optional - # typing.NotRequired requires Python 3.11 + # typing.NotRequired requires Python 3.11 # and we must support 3.9 (at least until October) # https://docs.python.org/3/library/typing.html#typing.Required # Is there a backport? bbox: list[float] - # Helpers MISSING = [None, ""] @@ -278,7 +293,7 @@ def __repr__(self): def signed_area( - coords: Coords, + coords: Points, fast: bool = False, ) -> float: """Return the signed area enclosed by a ring using the linear time @@ -296,7 +311,7 @@ def signed_area( return area2 / 2.0 -def is_cw(coords: Coords) -> bool: +def is_cw(coords: Points) -> bool: """Returns True if a polygon ring has clockwise orientation, determined by a negatively signed area. """ @@ -304,14 +319,14 @@ def is_cw(coords: Coords) -> bool: return area2 < 0 -def rewind(coords: Reversible[Coord]) -> Coords: +def rewind(coords: Reversible[Point]) -> Points: """Returns the input coords in reversed order.""" return list(reversed(coords)) -def ring_bbox(coords: Coords) -> BBox: +def ring_bbox(coords: Points) -> BBox: """Calculates and returns the bounding box of a ring.""" - xs, ys = zip(*coords) + xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values bbox = min(xs), min(ys), max(xs), max(ys) return bbox @@ -332,7 +347,7 @@ def bbox_contains(bbox1: BBox, bbox2: BBox) -> bool: return contains -def ring_contains_point(coords: Coords, p: Point2D) -> bool: +def ring_contains_point(coords: Points, p: Point2D) -> bool: """Fast point-in-polygon crossings algorithm, MacMartin optimization. Adapted from code by Eric Haynes @@ -381,7 +396,7 @@ class RingSamplingError(Exception): pass -def ring_sample(coords: Coords, ccw: bool = False) -> Point2D: +def ring_sample(coords: Points, ccw: bool = False) -> Point2D: """Return a sample point guaranteed to be within a ring, by efficiently finding the first centroid of a coordinate triplet whose orientation matches the orientation of the ring and passes the point-in-ring test. @@ -431,14 +446,15 @@ def itercoords(): ) -def ring_contains_ring(coords1: Coords, coords2: list[Point2D]) -> bool: +def ring_contains_ring(coords1: Points, coords2: list[Point]) -> bool: """Returns True if all vertexes in coords2 are fully inside coords1.""" - return all(ring_contains_point(coords1, p2) for p2 in coords2) + # Ignore Z and M values in coords2 + return all(ring_contains_point(coords1, p2[:2]) for p2 in coords2) def organize_polygon_rings( - rings: Iterable[Coords], return_errors: Optional[dict[str, int]] = None -) -> list[list[Coords]]: + rings: Iterable[Points], return_errors: Optional[dict[str, int]] = None +) -> list[list[Points]]: """Organize a list of coordinate rings into one or more polygons with holes. Returns a list of polygons, where each polygon is composed of a single exterior ring, and one or more interior holes. If a return_errors dict is provided (optional), @@ -992,8 +1008,8 @@ def __init__(self, shape: Optional[Shape] = None, record: Optional[_Record] = No def __geo_interface__(self) -> GeoJSONFeature: return { "type": "Feature", - "properties": None - if self.record is None + "properties": None + if self.record is None else self.record.as_dict(date_strings=True), "geometry": None if self.shape is None or self.shape.shapeType == NULL @@ -1015,11 +1031,8 @@ def __geo_interface__(self) -> GeoJSONGeometryCollection: # Note: currently this will fail if any of the shapes are null-geometries # could be fixed by storing the shapefile shapeType upon init, returning geojson type with empty coords collection = GeoJSONGeometryCollection( - type= "GeometryCollection", - geometries = [shape.__geo_interface__ - for shape in self - if shape is not None - ], + type="GeometryCollection", + geometries=[shape.__geo_interface__ for shape in self if shape is not None], ) return collection @@ -1035,10 +1048,6 @@ def __repr__(self): @property def __geo_interface__(self) -> GeoJSONFeatureCollection: - collection = { - "type": "FeatureCollection", - "features": [] #shaperec.__geo_interface__ for shaperec in self], - } return GeoJSONFeatureCollection( type="FeatureCollection", features=[shaperec.__geo_interface__ for shaperec in self], @@ -1362,7 +1371,7 @@ def __iter__(self): def __geo_interface__(self) -> GeoJSONFeatureCollectionWithBBox: shaperecords = self.shapeRecords() fcollection = GeoJSONFeatureCollectionWithBBox( - bbox = list(self.bbox), + bbox=list(self.bbox), **shaperecords.__geo_interface__, ) return fcollection From b35482c60c7a96edef76225565495b7a4ed185cd Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 14:10:19 +0100 Subject: [PATCH 111/220] Define Shape subclasses for each supported Shape type --- src/shapefile.py | 143 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 111 insertions(+), 32 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 8008fe10..60ca3157 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -110,14 +110,14 @@ T = TypeVar("T") Point2D = tuple[float, float] Point3D = tuple[float, float, float] -PointM = tuple[float, float, Optional[float]] -PointZ = tuple[float, float, float, Optional[float]] +PointMT = tuple[float, float, Optional[float]] +PointZT = tuple[float, float, float, Optional[float]] Coord = Union[Point2D, Point3D] Coords = list[Coord] -Point = Union[Point2D, PointM, PointZ] -Points = list[Point] +PointT = Union[Point2D, PointMT, PointZT] +PointsT = list[PointT] BBox = tuple[float, float, float, float] @@ -153,36 +153,36 @@ class GeoJSONPoint(TypedDict): # elements. " # RFC7946 also requires long/lat easting/northing which we do not enforce, # and despite the SHOULD NOT, we may use a 4th element for Shapefile M Measures. - coordinates: Union[Point, tuple[()]] + coordinates: Union[PointT, tuple[()]] class GeoJSONMultiPoint(TypedDict): type: Literal["MultiPoint"] - coordinates: Points + coordinates: PointsT class GeoJSONLineString(TypedDict): type: Literal["LineString"] # "Two or more positions" not enforced by type checker # https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.4 - coordinates: Points + coordinates: PointsT class GeoJSONMultiLineString(TypedDict): type: Literal["MultiLineString"] - coordinates: list[Points] + coordinates: list[PointsT] class GeoJSONPolygon(TypedDict): type: Literal["Polygon"] # Other requirements for Polygon not enforced by type checker # https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.6 - coordinates: list[Points] + coordinates: list[PointsT] class GeoJSONMultiPolygon(TypedDict): type: Literal["MultiPolygon"] - coordinates: list[list[Points]] + coordinates: list[list[PointsT]] GeoJSONHomogeneousGeometryObject = Union[ @@ -293,7 +293,7 @@ def __repr__(self): def signed_area( - coords: Points, + coords: PointsT, fast: bool = False, ) -> float: """Return the signed area enclosed by a ring using the linear time @@ -311,7 +311,7 @@ def signed_area( return area2 / 2.0 -def is_cw(coords: Points) -> bool: +def is_cw(coords: PointsT) -> bool: """Returns True if a polygon ring has clockwise orientation, determined by a negatively signed area. """ @@ -319,12 +319,12 @@ def is_cw(coords: Points) -> bool: return area2 < 0 -def rewind(coords: Reversible[Point]) -> Points: +def rewind(coords: Reversible[PointT]) -> PointsT: """Returns the input coords in reversed order.""" return list(reversed(coords)) -def ring_bbox(coords: Points) -> BBox: +def ring_bbox(coords: PointsT) -> BBox: """Calculates and returns the bounding box of a ring.""" xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values bbox = min(xs), min(ys), max(xs), max(ys) @@ -347,7 +347,7 @@ def bbox_contains(bbox1: BBox, bbox2: BBox) -> bool: return contains -def ring_contains_point(coords: Points, p: Point2D) -> bool: +def ring_contains_point(coords: PointsT, p: Point2D) -> bool: """Fast point-in-polygon crossings algorithm, MacMartin optimization. Adapted from code by Eric Haynes @@ -396,7 +396,7 @@ class RingSamplingError(Exception): pass -def ring_sample(coords: Points, ccw: bool = False) -> Point2D: +def ring_sample(coords: PointsT, ccw: bool = False) -> Point2D: """Return a sample point guaranteed to be within a ring, by efficiently finding the first centroid of a coordinate triplet whose orientation matches the orientation of the ring and passes the point-in-ring test. @@ -446,15 +446,15 @@ def itercoords(): ) -def ring_contains_ring(coords1: Points, coords2: list[Point]) -> bool: +def ring_contains_ring(coords1: PointsT, coords2: list[PointT]) -> bool: """Returns True if all vertexes in coords2 are fully inside coords1.""" # Ignore Z and M values in coords2 return all(ring_contains_point(coords1, p2[:2]) for p2 in coords2) def organize_polygon_rings( - rings: Iterable[Points], return_errors: Optional[dict[str, int]] = None -) -> list[list[Points]]: + rings: Iterable[PointsT], return_errors: Optional[dict[str, int]] = None +) -> list[list[PointsT]]: """Organize a list of coordinate rings into one or more polygons with holes. Returns a list of polygons, where each polygon is composed of a single exterior ring, and one or more interior holes. If a return_errors dict is provided (optional), @@ -585,10 +585,12 @@ class GeoJSON_Error(Exception): class Shape: + shapeType = NULL + def __init__( self, shapeType: int = NULL, - points: Optional[Points] = None, + points: Optional[PointsT] = None, parts: Optional[Sequence[int]] = None, partTypes: Optional[Sequence[int]] = None, oid: Optional[int] = None, @@ -604,7 +606,8 @@ def __init__( list of shapes. For MultiPatch geometry, partTypes designates the patch type of each of the parts. """ - self.shapeType = shapeType + if self.shapeType != shapeType: + self.shapeType = shapeType self.points = points or [] self.parts = parts or [] if partTypes: @@ -838,6 +841,82 @@ def __repr__(self): return f"Shape #{self.__oid}: {self.shapeTypeName}" +class NullShape(Shape): + # Shape.shapeType = NULL already, + # to preserve handling of default args in Shape.__init__ + # Repeated for clarity. + shapeType = NULL + + +class _CanHaveBBox(Shape): + # Not a BBox because the legacy implementation was a list, not a 4-tuple. + bbox: Optional[list[float]] = None + + +class Point(Shape): + shapeType = 1 + + +class Polyline(_CanHaveBBox): + shapeType = 3 + + +class Polygon(_CanHaveBBox): + shapeType = 5 + + +class MultiPoint(_CanHaveBBox): + shapeType = 8 + + +class _HasM(Shape): + m: Sequence[Optional[float]] + + +class _HasZ(Shape): + z: Sequence[float] + + +class MultiPatch(_HasM, _HasZ, _CanHaveBBox): + shapeType = 31 + + +class PointM(Point, _HasM): + # same default as in Writer.__shpRecord (if s.shapeType in (11, 21):) + # PyShp encodes None m values as NODATA + m = (None,) + shapeType = 21 + + +class PolylineM(Polyline, _HasM): + shapeType = 23 + + +class PolygonM(Polygon, _HasM): + shapeType = 25 + + +class MultiPointM(MultiPoint, _HasM): + shapeType = 28 + + +class PointZ(PointM, _HasZ): + shapeType = 11 + z = (0,) # same default as in Writer.__shpRecord (if s.shapeType == 11:) + + +class PolylineZ(PolylineM, _HasZ): + shapeType = 13 + + +class PolygonZ(PolygonM, _HasZ): + shapeType = 15 + + +class MultiPointZ(MultiPointM, _HasZ): + shapeType = 18 + + class _Record(list): """ A class to hold a record. Subclasses list to ensure compatibility with @@ -2879,14 +2958,14 @@ def pointz(self, x: float, y: float, z: float = 0.0, m: Optional[float] = None): pointShape.points.append((x, y, z, m)) self.shape(pointShape) - def multipoint(self, points: Points): + def multipoint(self, points: PointsT): """Creates a MULTIPOINT shape. Points is a list of xy values.""" shapeType = MULTIPOINT # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=[points], shapeType=shapeType) - def multipointm(self, points: Points): + def multipointm(self, points: PointsT): """Creates a MULTIPOINTM shape. Points is a list of xym values. If the m (measure) value is not included, it defaults to None (NoData).""" @@ -2894,7 +2973,7 @@ def multipointm(self, points: Points): # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=[points], shapeType=shapeType) - def multipointz(self, points: Points): + def multipointz(self, points: PointsT): """Creates a MULTIPOINTZ shape. Points is a list of xyzm values. If the z (elevation) value is not included, it defaults to 0. @@ -2903,20 +2982,20 @@ def multipointz(self, points: Points): # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=[points], shapeType=shapeType) - def line(self, lines: list[Points]): + def line(self, lines: list[PointsT]): """Creates a POLYLINE shape. Lines is a collection of lines, each made up of a list of xy values.""" shapeType = POLYLINE self._shapeparts(parts=lines, shapeType=shapeType) - def linem(self, lines: list[Points]): + def linem(self, lines: list[PointsT]): """Creates a POLYLINEM shape. Lines is a collection of lines, each made up of a list of xym values. If the m (measure) value is not included, it defaults to None (NoData).""" shapeType = POLYLINEM self._shapeparts(parts=lines, shapeType=shapeType) - def linez(self, lines: list[Points]): + def linez(self, lines: list[PointsT]): """Creates a POLYLINEZ shape. Lines is a collection of lines, each made up of a list of xyzm values. If the z (elevation) value is not included, it defaults to 0. @@ -2924,7 +3003,7 @@ def linez(self, lines: list[Points]): shapeType = POLYLINEZ self._shapeparts(parts=lines, shapeType=shapeType) - def poly(self, polys: list[Points]): + def poly(self, polys: list[PointsT]): """Creates a POLYGON shape. Polys is a collection of polygons, each made up of a list of xy values. Note that for ordinary polygons the coordinates must run in a clockwise direction. @@ -2932,7 +3011,7 @@ def poly(self, polys: list[Points]): shapeType = POLYGON self._shapeparts(parts=polys, shapeType=shapeType) - def polym(self, polys: list[Points]): + def polym(self, polys: list[PointsT]): """Creates a POLYGONM shape. Polys is a collection of polygons, each made up of a list of xym values. Note that for ordinary polygons the coordinates must run in a clockwise direction. @@ -2941,7 +3020,7 @@ def polym(self, polys: list[Points]): shapeType = POLYGONM self._shapeparts(parts=polys, shapeType=shapeType) - def polyz(self, polys: list[Points]): + def polyz(self, polys: list[PointsT]): """Creates a POLYGONZ shape. Polys is a collection of polygons, each made up of a list of xyzm values. Note that for ordinary polygons the coordinates must run in a clockwise direction. @@ -2951,7 +3030,7 @@ def polyz(self, polys: list[Points]): shapeType = POLYGONZ self._shapeparts(parts=polys, shapeType=shapeType) - def multipatch(self, parts: list[Points], partTypes: list[int]): + def multipatch(self, parts: list[PointsT], partTypes: list[int]): """Creates a MULTIPATCH shape. Parts is a collection of 3D surface patches, each made up of a list of xyzm values. PartTypes is a list of types that define each of the surface patches. @@ -2977,7 +3056,7 @@ def multipatch(self, parts: list[Points], partTypes: list[int]): # write the shape self.shape(polyShape) - def _shapeparts(self, parts: list[Points], shapeType: int): + def _shapeparts(self, parts: list[PointsT], shapeType: int): """Internal method for adding a shape that has multiple collections of points (parts): lines, polygons, and multipoint shapes. """ From c5a186f6e939a724ef3a67c65f94d0cf7e9de4e6 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 15:56:53 +0100 Subject: [PATCH 112/220] Use Shape subclasses to avoid the need for # type: ignore [attr-defined] --- src/shapefile.py | 142 ++++++++++++++++++++++++++++------------------- 1 file changed, 85 insertions(+), 57 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 60ca3157..c8ebf24c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -584,12 +584,19 @@ class GeoJSON_Error(Exception): pass +class _NoShapeTypeSentinel: + """For use as a default value for Shape.__init__ to + preserve old behaviour for anyone who explictly + called Shape(shapeType=None). + """ + + class Shape: - shapeType = NULL + shapeType: int = NULL def __init__( self, - shapeType: int = NULL, + shapeType: Union[int, _NoShapeTypeSentinel] = _NoShapeTypeSentinel(), points: Optional[PointsT] = None, parts: Optional[Sequence[int]] = None, partTypes: Optional[Sequence[int]] = None, @@ -606,7 +613,7 @@ def __init__( list of shapes. For MultiPatch geometry, partTypes designates the patch type of each of the parts. """ - if self.shapeType != shapeType: + if not isinstance(shapeType, _NoShapeTypeSentinel): self.shapeType = shapeType self.points = points or [] self.parts = parts or [] @@ -850,7 +857,7 @@ class NullShape(Shape): class _CanHaveBBox(Shape): # Not a BBox because the legacy implementation was a list, not a 4-tuple. - bbox: Optional[list[float]] = None + bbox: Optional[Sequence[float]] = None class Point(Shape): @@ -882,10 +889,10 @@ class MultiPatch(_HasM, _HasZ, _CanHaveBBox): class PointM(Point, _HasM): + shapeType = 21 # same default as in Writer.__shpRecord (if s.shapeType in (11, 21):) # PyShp encodes None m values as NODATA m = (None,) - shapeType = 21 class PolylineM(Polyline, _HasM): @@ -902,7 +909,8 @@ class MultiPointM(MultiPoint, _HasM): class PointZ(PointM, _HasZ): shapeType = 11 - z = (0,) # same default as in Writer.__shpRecord (if s.shapeType == 11:) + # same default as in Writer.__shpRecord (if s.shapeType == 11:) + z: Sequence[float] = (0.0,) class PolylineZ(PolylineM, _HasZ): @@ -917,6 +925,24 @@ class MultiPointZ(MultiPointM, _HasZ): shapeType = 18 +SHAPE_CLASS_FROM_SHAPETYPE: dict[int, type[Shape]] = { + NULL: NullShape, + POINT: Point, + POLYLINE: Polyline, + POLYGON: Polygon, + MULTIPOINT: MultiPoint, + POINTZ: PointZ, + POLYLINEZ: PolylineZ, + POLYGONZ: PolygonZ, + MULTIPOINTZ: MultiPointZ, + POINTM: PointM, + POLYLINEM: PolylineM, + POLYGONM: PolygonM, + MULTIPOINTM: MultiPointM, + MULTIPATCH: MultiPatch, +} + + class _Record(list): """ A class to hold a record. Subclasses list to ensure compatibility with @@ -1615,7 +1641,8 @@ def __shape( # pylint: disable=attribute-defined-outside-init f = self.__getFileObj(self.shp) - record = Shape(oid=oid) + record = SHAPE_CLASS_FROM_SHAPETYPE[self.shapeType](oid=oid) + # record = Shape(oid=oid) # Previously, we also set __zmin = __zmax = __mmin = __mmax = None nParts: Optional[int] = None nPoints: Optional[int] = None @@ -1625,23 +1652,28 @@ def __shape( shapeType = unpack("= 16: __mmin, __mmax = unpack("<2d", f.read(16)) # Measure values less than -10e38 are nodata values according to the spec if next_shape - f.tell() >= nPoints * 8: - record.m = [] # type: ignore [attr-defined] + record.m = [] for m in _Array[float]( "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) ): if m > NODATA: - record.m.append(m) # type: ignore [attr-defined] + record.m.append(m) else: - record.m.append(None) # type: ignore [attr-defined] + record.m.append(None) else: - record.m = [None for _ in range(nPoints)] # type: ignore [attr-defined] + record.m = [None for _ in range(nPoints)] # Read a single point - if shapeType in (1, 11, 21): + # if shapeType in (1, 11, 21): + if isinstance(record, Point): x, y = _Array[float]("d", unpack("<2d", f.read(16))) record.points = [(x, y)] @@ -1695,20 +1731,22 @@ def __shape( return None # Read a single Z value - if shapeType == 11: - record.z = list(unpack("= 8: (m,) = unpack(" NODATA: - record.m = [m] # type: ignore [attr-defined] + record.m = (m,) else: - record.m = [None] # type: ignore [attr-defined] + record.m = (None,) # pylint: enable=attribute-defined-outside-init # Seek to the end of this record as defined by the record header because @@ -2932,20 +2970,18 @@ def balance(self): def null(self): """Creates a null shape.""" - self.shape(Shape(NULL)) + self.shape(NullShape()) def point(self, x: float, y: float): """Creates a POINT shape.""" - shapeType = POINT - pointShape = Shape(shapeType) + pointShape = Point() pointShape.points.append((x, y)) self.shape(pointShape) def pointm(self, x: float, y: float, m: Optional[float] = None): """Creates a POINTM shape. If the m (measure) value is not set, it defaults to NoData.""" - shapeType = POINTM - pointShape = Shape(shapeType) + pointShape = PointM() pointShape.points.append((x, y, m)) self.shape(pointShape) @@ -2953,63 +2989,55 @@ def pointz(self, x: float, y: float, z: float = 0.0, m: Optional[float] = None): """Creates a POINTZ shape. If the z (elevation) value is not set, it defaults to 0. If the m (measure) value is not set, it defaults to NoData.""" - shapeType = POINTZ - pointShape = Shape(shapeType) + pointShape = PointZ() pointShape.points.append((x, y, z, m)) self.shape(pointShape) def multipoint(self, points: PointsT): """Creates a MULTIPOINT shape. Points is a list of xy values.""" - shapeType = MULTIPOINT # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=[points], shapeType=shapeType) + self._shapeparts(parts=[points], polyShape=MultiPoint()) def multipointm(self, points: PointsT): """Creates a MULTIPOINTM shape. Points is a list of xym values. If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPOINTM # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=[points], shapeType=shapeType) + self._shapeparts(parts=[points], polyShape=MultiPointM()) def multipointz(self, points: PointsT): """Creates a MULTIPOINTZ shape. Points is a list of xyzm values. If the z (elevation) value is not included, it defaults to 0. If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPOINTZ # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=[points], shapeType=shapeType) + self._shapeparts(parts=[points], polyShape=MultiPointZ()) def line(self, lines: list[PointsT]): """Creates a POLYLINE shape. Lines is a collection of lines, each made up of a list of xy values.""" - shapeType = POLYLINE - self._shapeparts(parts=lines, shapeType=shapeType) + self._shapeparts(parts=lines, polyShape=Polyline()) def linem(self, lines: list[PointsT]): """Creates a POLYLINEM shape. Lines is a collection of lines, each made up of a list of xym values. If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYLINEM - self._shapeparts(parts=lines, shapeType=shapeType) + self._shapeparts(parts=lines, polyShape=PolylineM()) def linez(self, lines: list[PointsT]): """Creates a POLYLINEZ shape. Lines is a collection of lines, each made up of a list of xyzm values. If the z (elevation) value is not included, it defaults to 0. If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYLINEZ - self._shapeparts(parts=lines, shapeType=shapeType) + self._shapeparts(parts=lines, polyShape=PolylineZ()) def poly(self, polys: list[PointsT]): """Creates a POLYGON shape. Polys is a collection of polygons, each made up of a list of xy values. Note that for ordinary polygons the coordinates must run in a clockwise direction. If some of the polygons are holes, these must run in a counterclockwise direction.""" - shapeType = POLYGON - self._shapeparts(parts=polys, shapeType=shapeType) + self._shapeparts(parts=polys, polyShape=Polygon()) def polym(self, polys: list[PointsT]): """Creates a POLYGONM shape. @@ -3017,8 +3045,7 @@ def polym(self, polys: list[PointsT]): Note that for ordinary polygons the coordinates must run in a clockwise direction. If some of the polygons are holes, these must run in a counterclockwise direction. If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYGONM - self._shapeparts(parts=polys, shapeType=shapeType) + self._shapeparts(parts=polys, polyShape=PolygonM()) def polyz(self, polys: list[PointsT]): """Creates a POLYGONZ shape. @@ -3027,8 +3054,7 @@ def polyz(self, polys: list[PointsT]): If some of the polygons are holes, these must run in a counterclockwise direction. If the z (elevation) value is not included, it defaults to 0. If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYGONZ - self._shapeparts(parts=polys, shapeType=shapeType) + self._shapeparts(parts=polys, polyShape=PolygonZ()) def multipatch(self, parts: list[PointsT], partTypes: list[int]): """Creates a MULTIPATCH shape. @@ -3038,8 +3064,7 @@ def multipatch(self, parts: list[PointsT], partTypes: list[int]): TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. If the z (elevation) value is not included, it defaults to 0. If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPATCH - polyShape = Shape(shapeType) + polyShape = MultiPatch() polyShape.parts = [] polyShape.points = [] for part in parts: @@ -3056,15 +3081,18 @@ def multipatch(self, parts: list[PointsT], partTypes: list[int]): # write the shape self.shape(polyShape) - def _shapeparts(self, parts: list[PointsT], shapeType: int): + def _shapeparts( + self, parts: list[PointsT], polyShape: Union[Polyline, Polygon, MultiPoint] + ): """Internal method for adding a shape that has multiple collections of points (parts): lines, polygons, and multipoint shapes. """ - polyShape = Shape(shapeType) polyShape.parts = [] polyShape.points = [] # Make sure polygon rings (parts) are closed - if shapeType in (5, 15, 25, 31): + + # if shapeType in (5, 15, 25, 31): + if isinstance(polyShape, Polygon): for part in parts: if part[0] != part[-1]: part.append(part[0]) From fe4e0c7efbc11b17c4a4a65285c11687166876c5 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 16:23:27 +0100 Subject: [PATCH 113/220] Replace big if elif block with dict --- src/shapefile.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index c8ebf24c..14da4bf3 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -194,6 +194,16 @@ class GeoJSONMultiPolygon(TypedDict): GeoJSONMultiPolygon, ] +GEOJSON_TO_SHAPETYPE: dict[str, int] = { + "Null": NULL, + "Point": POINT, + "LineString": POLYLINE, + "Polygon": POLYGON, + "MultiPoint": MULTIPOINT, + "MultiLineString": POLYLINE, + "MultiPolygon": POLYGON, +} + class GeoJSONGeometryCollection(TypedDict): type: Literal["GeometryCollection"] @@ -759,23 +769,10 @@ def _from_geojson(geoj) -> Shape: shape = Shape() # set shapeType geojType = geoj["type"] if geoj else "Null" - if geojType == "Null": - shapeType = NULL - elif geojType == "Point": - shapeType = POINT - elif geojType == "LineString": - shapeType = POLYLINE - elif geojType == "Polygon": - shapeType = POLYGON - elif geojType == "MultiPoint": - shapeType = MULTIPOINT - elif geojType == "MultiLineString": - shapeType = POLYLINE - elif geojType == "MultiPolygon": - shapeType = POLYGON + if geojType in GEOJSON_TO_SHAPETYPE: + shape.shapeType = GEOJSON_TO_SHAPETYPE[geojType] else: raise GeoJSON_Error(f"Cannot create Shape from GeoJSON type '{geojType}'") - shape.shapeType = shapeType # set points and parts if geojType == "Point": From b90d8f117c17ba2df2ab40c5c50385c7a8ba9850 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 18:01:37 +0100 Subject: [PATCH 114/220] Add speed test --- .github/actions/test/action.yml | 2 +- .github/workflows/speed_test.yml | 70 +++++++++++++++++++++++++++ speed_test.py | 82 ++++++++++++++++++++++++++++++++ 3 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/speed_test.yml create mode 100644 speed_test.py diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 4a2fd996..e1dc3ca9 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -72,7 +72,7 @@ runs: name: PyShp_wheel_and_sdist path: dist - - name: Install PyShp from the wheel (built in prev step) + - name: Install PyShp from the wheel (downloaded in prev step) shell: bash working-directory: dist/ run: | diff --git a/.github/workflows/speed_test.yml b/.github/workflows/speed_test.yml new file mode 100644 index 00000000..822982b5 --- /dev/null +++ b/.github/workflows/speed_test.yml @@ -0,0 +1,70 @@ + +name: Run the speed tests + +on: + push: + pull_request: + workflow_dispatch: + +jobs: + + + build_wheel_and_sdist: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - name: Build wheel from the project repo + uses: ./.github/actions/build_wheel_and_sdist + + run_speed_tests: + needs: build_wheel_and_sdist + strategy: + fail-fast: false + matrix: + python-version: [ + "3.9", + "3.13", + ] + os: [ + "windows-latest", + "ubuntu-24.04", + ] + + + runs-on: ${{ matrix.os }} + steps: + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Download wheel and sdist (built in previous job) + uses: actions/download-artifact@v4 + with: + name: PyShp_wheel_and_sdist + path: dist + + - name: Install PyShp + test deps from the wheel (downloaded in prev step) + shell: bash + working-directory: dist/ + run: | + WHEEL_NAME=$(ls pyshp-*py3-none-any.whl) + python -m pip install $WHEEL_NAME[test] + + - uses: actions/checkout@v4 + with: + path: ./Pyshp + + - name: Checkout shapefiles and zip file artefacts repo + uses: actions/checkout@v4 + with: + repository: JamesParrott/PyShp_test_shapefile + path: ./PyShp_test_shapefile + + - name: Run Benchmarks + env: + PYSHP_TEST_REPO: ./PyShp_test_shapefile + run: python speed_test.py + + + diff --git a/speed_test.py b/speed_test.py new file mode 100644 index 00000000..951ee598 --- /dev/null +++ b/speed_test.py @@ -0,0 +1,82 @@ +# Based on Taneli Hukkinen's https://github.com/hukkin/tomli-w/blob/master/benchmark/run.py + +from __future__ import annotations + +from collections.abc import Callable +import functools +import os +from pathlib import Path +import timeit + +import shapefile as shp + +import test_shapefile + +DEFAULT_PYSHP_TEST_REPO = fr"{os.getenv('USERPROFILE')}\Coding\repos\PyShp_test_shapefile" +PYSHP_TEST_REPO = Path(os.getenv("PYSHP_TEST_REPO", DEFAULT_PYSHP_TEST_REPO)) +REPO_ROOT = Path(__file__).parent + + +blockgroups_file = REPO_ROOT / "shapefiles" / "blockgroups.shp" +edit_file = REPO_ROOT / "shapefiles" / "test" / "edit.shp" +merge_file = REPO_ROOT / "shapefiles" / "test" / "merge.shp" +states_provinces_file = PYSHP_TEST_REPO / "ne_10m_admin_1_states_provinces.shp" +tiny_countries_file = PYSHP_TEST_REPO / "ne_110m_admin_0_tiny_countries.shp" +gis_osm_natural_file = PYSHP_TEST_REPO / "gis_osm_natural_a_free_1.zip" + + +def benchmark( + name: str, + run_count: int, + func: Callable, + col_width: tuple, + compare_to: float | None = None, +) -> float: + placeholder = "Running..." + print(f"{name:>{col_width[0]}} | {placeholder}", end="", flush=True) + time_taken = timeit.timeit(func, number=run_count) + print("\b" * len(placeholder), end="") + time_suffix = " s" + print(f"{time_taken:{col_width[1]-len(time_suffix)}.3g}{time_suffix}", end="") + print() + return time_taken + + + +def open_shapefile_with_PyShp(target: Union[str, os.PathLike]): + with shp.Reader(target) as r: + for shapeRecord in r.iterShapeRecords(): + pass + + +READER_TESTS = { + "Blockgroups": blockgroups_file, + "Edit": edit_file, + "Merge": merge_file, + "States_35MB": states_provinces_file, + "Tiny Countries": tiny_countries_file, + "GIS_OSM_zip_10MB": gis_osm_natural_file, +} + + +def run(run_count: int) -> None: + col_width = (21, 10) + col_head = ("parser", "exec time", "performance (more is better)") + # Load files to avoid one off delays that only affect first disk seek + for file_path in READER_TESTS.values(): + file_path.read_bytes() + print(f"Running benchmarks {run_count} times:") + print("-" * col_width[0] + "---" + "-" * col_width[1]) + print( + f"{col_head[0]:>{col_width[0]}} | {col_head[1]:>{col_width[1]}}" + ) + print("-" * col_width[0] + "-+-" + "-" * col_width[1]) + for test_name, target in READER_TESTS.items(): + benchmark(f"Read {test_name}", run_count, functools.partial(open_shapefile_with_PyShp, target=target), col_width) + + benchmark(f"Slow test", 1, test_shapefile.test_iterRecords_start_stop, col_width) + + + +if __name__ == "__main__": + run(1) \ No newline at end of file From b827fbea426014c6b7932e72b72ef0722ad51e2b Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 18:08:01 +0100 Subject: [PATCH 115/220] Correct type checking and path to speed_test.py in custom repo location --- .github/workflows/speed_test.yml | 4 ++-- speed_test.py | 27 +++++++++++++++------------ 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/.github/workflows/speed_test.yml b/.github/workflows/speed_test.yml index 822982b5..818f3471 100644 --- a/.github/workflows/speed_test.yml +++ b/.github/workflows/speed_test.yml @@ -64,7 +64,7 @@ jobs: - name: Run Benchmarks env: PYSHP_TEST_REPO: ./PyShp_test_shapefile - run: python speed_test.py - + run: python ./Pyshp/speed_test.py + diff --git a/speed_test.py b/speed_test.py index 951ee598..9befbfe7 100644 --- a/speed_test.py +++ b/speed_test.py @@ -2,17 +2,19 @@ from __future__ import annotations -from collections.abc import Callable import functools import os -from pathlib import Path import timeit +from collections.abc import Callable +from pathlib import Path +from typing import Union import shapefile as shp - import test_shapefile -DEFAULT_PYSHP_TEST_REPO = fr"{os.getenv('USERPROFILE')}\Coding\repos\PyShp_test_shapefile" +DEFAULT_PYSHP_TEST_REPO = ( + rf"{os.getenv('USERPROFILE')}\Coding\repos\PyShp_test_shapefile" +) PYSHP_TEST_REPO = Path(os.getenv("PYSHP_TEST_REPO", DEFAULT_PYSHP_TEST_REPO)) REPO_ROOT = Path(__file__).parent @@ -42,7 +44,6 @@ def benchmark( return time_taken - def open_shapefile_with_PyShp(target: Union[str, os.PathLike]): with shp.Reader(target) as r: for shapeRecord in r.iterShapeRecords(): @@ -55,7 +56,7 @@ def open_shapefile_with_PyShp(target: Union[str, os.PathLike]): "Merge": merge_file, "States_35MB": states_provinces_file, "Tiny Countries": tiny_countries_file, - "GIS_OSM_zip_10MB": gis_osm_natural_file, + "GIS_OSM_zip_10MB": gis_osm_natural_file, } @@ -67,16 +68,18 @@ def run(run_count: int) -> None: file_path.read_bytes() print(f"Running benchmarks {run_count} times:") print("-" * col_width[0] + "---" + "-" * col_width[1]) - print( - f"{col_head[0]:>{col_width[0]}} | {col_head[1]:>{col_width[1]}}" - ) + print(f"{col_head[0]:>{col_width[0]}} | {col_head[1]:>{col_width[1]}}") print("-" * col_width[0] + "-+-" + "-" * col_width[1]) for test_name, target in READER_TESTS.items(): - benchmark(f"Read {test_name}", run_count, functools.partial(open_shapefile_with_PyShp, target=target), col_width) + benchmark( + f"Read {test_name}", + run_count, + functools.partial(open_shapefile_with_PyShp, target=target), + col_width, + ) benchmark(f"Slow test", 1, test_shapefile.test_iterRecords_start_stop, col_width) - if __name__ == "__main__": - run(1) \ No newline at end of file + run(1) From dd2da7467948bbb582ddc43b316aaa386e06006e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 18:09:51 +0100 Subject: [PATCH 116/220] Remove slow test from speed tests --- speed_test.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/speed_test.py b/speed_test.py index 9befbfe7..10b7fd0d 100644 --- a/speed_test.py +++ b/speed_test.py @@ -10,7 +10,6 @@ from typing import Union import shapefile as shp -import test_shapefile DEFAULT_PYSHP_TEST_REPO = ( rf"{os.getenv('USERPROFILE')}\Coding\repos\PyShp_test_shapefile" @@ -78,8 +77,5 @@ def run(run_count: int) -> None: col_width, ) - benchmark(f"Slow test", 1, test_shapefile.test_iterRecords_start_stop, col_width) - - if __name__ == "__main__": run(1) From 1958afae1e72d28328f0a228712cf670af2bbe5e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 18:12:03 +0100 Subject: [PATCH 117/220] Update speed_test.py --- speed_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/speed_test.py b/speed_test.py index 10b7fd0d..3374c639 100644 --- a/speed_test.py +++ b/speed_test.py @@ -11,6 +11,7 @@ import shapefile as shp +# For shapefiles from https://github.com/JamesParrott/PyShp_test_shapefile DEFAULT_PYSHP_TEST_REPO = ( rf"{os.getenv('USERPROFILE')}\Coding\repos\PyShp_test_shapefile" ) From 1af47c335855402588bd5806155b5135e34a2cf5 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 18:13:56 +0100 Subject: [PATCH 118/220] Update speed_test.yml --- .github/workflows/speed_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/speed_test.yml b/.github/workflows/speed_test.yml index 818f3471..7f95020c 100644 --- a/.github/workflows/speed_test.yml +++ b/.github/workflows/speed_test.yml @@ -61,7 +61,7 @@ jobs: repository: JamesParrott/PyShp_test_shapefile path: ./PyShp_test_shapefile - - name: Run Benchmarks + - name: Run Speed tests. env: PYSHP_TEST_REPO: ./PyShp_test_shapefile run: python ./Pyshp/speed_test.py From 92eb277fedbdce0b76e0f207452038e6cdb768ad Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 18:15:17 +0100 Subject: [PATCH 119/220] Reformat --- speed_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/speed_test.py b/speed_test.py index 3374c639..0b8b8288 100644 --- a/speed_test.py +++ b/speed_test.py @@ -78,5 +78,6 @@ def run(run_count: int) -> None: col_width, ) + if __name__ == "__main__": run(1) From 5e40f2e590c6b80d0814894657919dac52f80de0 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 18:32:59 +0100 Subject: [PATCH 120/220] Rename speed_test.py to match that of PyShp 2.4.0 --- .github/workflows/speed_test.yml | 2 +- speed_test.py => run_benchmarks.py | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename speed_test.py => run_benchmarks.py (100%) diff --git a/.github/workflows/speed_test.yml b/.github/workflows/speed_test.yml index 7f95020c..5a037229 100644 --- a/.github/workflows/speed_test.yml +++ b/.github/workflows/speed_test.yml @@ -64,7 +64,7 @@ jobs: - name: Run Speed tests. env: PYSHP_TEST_REPO: ./PyShp_test_shapefile - run: python ./Pyshp/speed_test.py + run: python ./Pyshp/run_benchmarks.py diff --git a/speed_test.py b/run_benchmarks.py similarity index 100% rename from speed_test.py rename to run_benchmarks.py From 6454a3f5bd31bc2db7c554ae71d36e7cbac115a3 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 20:08:48 +0100 Subject: [PATCH 121/220] Move creation of shape record to use shapeType read for that shape., not the whole file --- src/shapefile.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 14da4bf3..9356ed70 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1638,7 +1638,6 @@ def __shape( # pylint: disable=attribute-defined-outside-init f = self.__getFileObj(self.shp) - record = SHAPE_CLASS_FROM_SHAPETYPE[self.shapeType](oid=oid) # record = Shape(oid=oid) # Previously, we also set __zmin = __zmax = __mmin = __mmax = None nParts: Optional[int] = None @@ -1647,7 +1646,7 @@ def __shape( # Determine the start of the next record next_shape = f.tell() + (2 * recLength) shapeType = unpack(" Date: Mon, 28 Jul 2025 20:09:58 +0100 Subject: [PATCH 122/220] Use the shapeType variable we just read --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 9356ed70..b6e0dea6 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1646,7 +1646,7 @@ def __shape( # Determine the start of the next record next_shape = f.tell() + (2 * recLength) shapeType = unpack(" Date: Mon, 28 Jul 2025 20:17:03 +0100 Subject: [PATCH 123/220] Refactor Reader.__shape into __read_shape_from_shp_file --- src/shapefile.py | 237 +++++++++++++++++++++++++---------------------- 1 file changed, 125 insertions(+), 112 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index b6e0dea6..6cdcbcba 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -940,6 +940,129 @@ class MultiPointZ(MultiPointM, _HasZ): } +def __read_shape_from_shp_file(f): + """ Constructs a Shape from an open .shp file. Something else + is required to have first read the .shp file's header. + Leaves the shp file's .tell() in the correct position for + a subsequent call to this, to build the next shape. + """ + # record = Shape(oid=oid) + # Previously, we also set __zmin = __zmax = __mmin = __mmax = None + nParts: Optional[int] = None + nPoints: Optional[int] = None + (__recNum, recLength) = unpack(">2i", f.read(8)) + # Determine the start of the next record + next_shape = f.tell() + (2 * recLength) + shapeType = unpack("= 16: + __mmin, __mmax = unpack("<2d", f.read(16)) + # Measure values less than -10e38 are nodata values according to the spec + if next_shape - f.tell() >= nPoints * 8: + record.m = [] + for m in _Array[float]( + "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) + ): + if m > NODATA: + record.m.append(m) + else: + record.m.append(None) + else: + record.m = [None for _ in range(nPoints)] + + # Read a single point + # if shapeType in (1, 11, 21): + if isinstance(record, Point): + x, y = _Array[float]("d", unpack("<2d", f.read(16))) + + record.points = [(x, y)] + if bbox is not None: + # create bounding box for Point by duplicating coordinates + # skip shape if no overlap with bounding box + if not bbox_overlap(bbox, (x, y, x, y)): + f.seek(next_shape) + return None + + # Read a single Z value + # if shapeType == 11: + if isinstance(record, PointZ): + record.z = tuple(unpack("= 8: + (m,) = unpack(" NODATA: + record.m = (m,) + else: + record.m = (None,) + + # pylint: enable=attribute-defined-outside-init + # Seek to the end of this record as defined by the record header because + # the shapefile spec doesn't require the actual content to meet the header + # definition. Probably allowed for lazy feature deletion. + + f.seek(next_shape) + + return record + + + class _Record(list): """ A class to hold a record. Subclasses list to ensure compatibility with @@ -1638,120 +1761,10 @@ def __shape( # pylint: disable=attribute-defined-outside-init f = self.__getFileObj(self.shp) - # record = Shape(oid=oid) - # Previously, we also set __zmin = __zmax = __mmin = __mmax = None - nParts: Optional[int] = None - nPoints: Optional[int] = None - (__recNum, recLength) = unpack(">2i", f.read(8)) - # Determine the start of the next record - next_shape = f.tell() + (2 * recLength) - shapeType = unpack("= 16: - __mmin, __mmax = unpack("<2d", f.read(16)) - # Measure values less than -10e38 are nodata values according to the spec - if next_shape - f.tell() >= nPoints * 8: - record.m = [] - for m in _Array[float]( - "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) - ): - if m > NODATA: - record.m.append(m) - else: - record.m.append(None) - else: - record.m = [None for _ in range(nPoints)] - - # Read a single point - # if shapeType in (1, 11, 21): - if isinstance(record, Point): - x, y = _Array[float]("d", unpack("<2d", f.read(16))) - - record.points = [(x, y)] - if bbox is not None: - # create bounding box for Point by duplicating coordinates - # skip shape if no overlap with bounding box - if not bbox_overlap(bbox, (x, y, x, y)): - f.seek(next_shape) - return None - - # Read a single Z value - # if shapeType == 11: - if isinstance(record, PointZ): - record.z = tuple(unpack("= 8: - (m,) = unpack(" NODATA: - record.m = (m,) - else: - record.m = (None,) - - # pylint: enable=attribute-defined-outside-init - # Seek to the end of this record as defined by the record header because - # the shapefile spec doesn't require the actual content to meet the header - # definition. Probably allowed for lazy feature deletion. - - f.seek(next_shape) - - return record + return shape def __shxHeader(self): """Reads the header information from a .shx file.""" From bb75c01a9c31fffc202ad5643566b01d52861577 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 20:27:06 +0100 Subject: [PATCH 124/220] Pass args of __shape to _read_shape_from_shp_file --- src/shapefile.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 6cdcbcba..ea1da9fb 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -940,11 +940,13 @@ class MultiPointZ(MultiPointM, _HasZ): } -def __read_shape_from_shp_file(f): - """ Constructs a Shape from an open .shp file. Something else - is required to have first read the .shp file's header. - Leaves the shp file's .tell() in the correct position for - a subsequent call to this, to build the next shape. +def _read_shape_from_shp_file( + f, oid=None, bbox=None +): # oid: Optional[int] = None, bbox: Optional[BBox] = None): + """Constructs a Shape from an open .shp file. Something else + is required to have first read the .shp file's header. + Leaves the shp file's .tell() in the correct position for + a subsequent call to this, to build the next shape. """ # record = Shape(oid=oid) # Previously, we also set __zmin = __zmax = __mmin = __mmax = None @@ -999,9 +1001,7 @@ def __read_shape_from_shp_file(f): # if shapeType in (13, 15, 18, 31): if isinstance(record, _HasZ): __zmin, __zmax = unpack("<2d", f.read(16)) - record.z = _Array[float]( - "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) - ) + record.z = _Array[float]("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))) # Read m extremes and values # if shapeType in (13, 23, 15, 25, 18, 28, 31): @@ -1062,7 +1062,6 @@ def __read_shape_from_shp_file(f): return record - class _Record(list): """ A class to hold a record. Subclasses list to ensure compatibility with @@ -1762,7 +1761,7 @@ def __shape( # pylint: disable=attribute-defined-outside-init f = self.__getFileObj(self.shp) - shape = __read_shape_from_shp_file(f) + shape = _read_shape_from_shp_file(f, oid, bbox) return shape From 91db4c2ed73c547011043cd517cfb97b1d90eb6f Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 20:34:11 +0100 Subject: [PATCH 125/220] Try to make fewer isinstance calls --- src/shapefile.py | 53 ++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index ea1da9fb..89ab02fd 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -948,7 +948,7 @@ def _read_shape_from_shp_file( Leaves the shp file's .tell() in the correct position for a subsequent call to this, to build the next shape. """ - # record = Shape(oid=oid) + # shape = Shape(oid=oid) # Previously, we also set __zmin = __zmax = __mmin = __mmax = None nParts: Optional[int] = None nPoints: Optional[int] = None @@ -956,77 +956,78 @@ def _read_shape_from_shp_file( # Determine the start of the next record next_shape = f.tell() + (2 * recLength) shapeType = unpack("= 16: __mmin, __mmax = unpack("<2d", f.read(16)) # Measure values less than -10e38 are nodata values according to the spec if next_shape - f.tell() >= nPoints * 8: - record.m = [] + shape.m = [] for m in _Array[float]( "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) ): if m > NODATA: - record.m.append(m) + shape.m.append(m) else: - record.m.append(None) + shape.m.append(None) else: - record.m = [None for _ in range(nPoints)] + shape.m = [None for _ in range(nPoints)] # Read a single point # if shapeType in (1, 11, 21): - if isinstance(record, Point): + if ShapeClass is Point: x, y = _Array[float]("d", unpack("<2d", f.read(16))) - record.points = [(x, y)] + shape.points = [(x, y)] if bbox is not None: # create bounding box for Point by duplicating coordinates # skip shape if no overlap with bounding box @@ -1036,21 +1037,21 @@ def _read_shape_from_shp_file( # Read a single Z value # if shapeType == 11: - if isinstance(record, PointZ): - record.z = tuple(unpack("= 8: (m,) = unpack(" NODATA: - record.m = (m,) + shape.m = (m,) else: - record.m = (None,) + shape.m = (None,) # pylint: enable=attribute-defined-outside-init # Seek to the end of this record as defined by the record header because From 092d2cd62f5df7b7a891aee2e3fb23e918af3066 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 20:36:07 +0100 Subject: [PATCH 126/220] Correct retvar name --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 89ab02fd..6c3086fb 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1060,7 +1060,7 @@ def _read_shape_from_shp_file( f.seek(next_shape) - return record + return shape class _Record(list): From aec4f52d6f32fe8c858399f173aff3f1a2b0bcd1 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 20:42:11 +0100 Subject: [PATCH 127/220] Replace isinstance test with if ShapeClass is MultiPatch --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 6c3086fb..ff116795 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -989,7 +989,7 @@ def _read_shape_from_shp_file( # Read part types for Multipatch - 31 # if shapeType == 31: - if isinstance(shape, MultiPatch): + if ShapeClass is MultiPatch: shape.partTypes = _Array[int]( "i", unpack(f"<{nParts}i", f.read(nParts * 4)) ) From b3cf1b8ce124d0d0f759b5cccc957cae7012b3e5 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 20:47:24 +0100 Subject: [PATCH 128/220] Refactor into Shape class method --- src/shapefile.py | 251 ++++++++++++++++++++++++----------------------- 1 file changed, 128 insertions(+), 123 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index ff116795..81ac8671 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -844,6 +844,134 @@ def shapeTypeName(self) -> str: def __repr__(self): return f"Shape #{self.__oid}: {self.shapeTypeName}" + @classmethod + def _from_shp_file(cls, oid=None, bbox=None): + # For Null shapes create an empty points list for consistency + # if shapeType == 0: + if cls is NullShape: + record.points = [] + # All shape types capable of having a bounding box + # elif shapeType in (3, 13, 23, 5, 15, 25, 8, 18, 28, 31): + elif isinstance(shape, _CanHaveBBox): + # record.bbox = tuple(_Array[float]("d", unpack("<4d", f.read(32)))) + shape.bbox = _Array[float]("d", unpack("<4d", f.read(32))) + # if bbox specified and no overlap, skip this shape + if bbox is not None and not bbox_overlap(bbox, tuple(shape.bbox)): + # because we stop parsing this shape, skip to beginning of + # next shape before we return + f.seek(next_shape) + return None + # Shape types with parts + # if shapeType in (3, 13, 23, 5, 15, 25, 31): + if any(cls is class_ for class_ in (shape, (Polyline, Polygon, MultiPatch))): + nParts = unpack("= 16: + __mmin, __mmax = unpack("<2d", f.read(16)) + # Measure values less than -10e38 are nodata values according to the spec + if next_shape - f.tell() >= nPoints * 8: + shape.m = [] + for m in _Array[float]( + "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) + ): + if m > NODATA: + shape.m.append(m) + else: + shape.m.append(None) + else: + shape.m = [None for _ in range(nPoints)] + + # Read a single point + # if shapeType in (1, 11, 21): + if cls is Point: + x, y = _Array[float]("d", unpack("<2d", f.read(16))) + + shape.points = [(x, y)] + if bbox is not None: + # create bounding box for Point by duplicating coordinates + # skip shape if no overlap with bounding box + if not bbox_overlap(bbox, (x, y, x, y)): + f.seek(next_shape) + return None + + # Read a single Z value + # if shapeType == 11: + if cls is PointZ: + shape.z = tuple(unpack("= 8: + (m,) = unpack(" NODATA: + shape.m = (m,) + else: + shape.m = (None,) + + # pylint: enable=attribute-defined-outside-init + # Seek to the end of this record as defined by the record header because + # the shapefile spec doesn't require the actual content to meet the header + # definition. Probably allowed for lazy feature deletion. + + +def _read_shape_from_shp_file( + f, oid=None, bbox=None +): # oid: Optional[int] = None, bbox: Optional[BBox] = None): + """Constructs a Shape from an open .shp file. Something else + is required to have first read the .shp file's header. + Leaves the shp file's .tell() in the correct position for + a subsequent call to this, to build the next shape. + """ + # shape = Shape(oid=oid) + # Previously, we also set __zmin = __zmax = __mmin = __mmax = None + nParts: Optional[int] = None + nPoints: Optional[int] = None + (__recNum, recLength) = unpack(">2i", f.read(8)) + # Determine the start of the next record + next_shape = f.tell() + (2 * recLength) + shapeType = unpack("2i", f.read(8)) - # Determine the start of the next record - next_shape = f.tell() + (2 * recLength) - shapeType = unpack("= 16: - __mmin, __mmax = unpack("<2d", f.read(16)) - # Measure values less than -10e38 are nodata values according to the spec - if next_shape - f.tell() >= nPoints * 8: - shape.m = [] - for m in _Array[float]( - "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) - ): - if m > NODATA: - shape.m.append(m) - else: - shape.m.append(None) - else: - shape.m = [None for _ in range(nPoints)] - - # Read a single point - # if shapeType in (1, 11, 21): - if ShapeClass is Point: - x, y = _Array[float]("d", unpack("<2d", f.read(16))) - - shape.points = [(x, y)] - if bbox is not None: - # create bounding box for Point by duplicating coordinates - # skip shape if no overlap with bounding box - if not bbox_overlap(bbox, (x, y, x, y)): - f.seek(next_shape) - return None - - # Read a single Z value - # if shapeType == 11: - if ShapeClass is PointZ: - shape.z = tuple(unpack("= 8: - (m,) = unpack(" NODATA: - shape.m = (m,) - else: - shape.m = (None,) - - # pylint: enable=attribute-defined-outside-init - # Seek to the end of this record as defined by the record header because - # the shapefile spec doesn't require the actual content to meet the header - # definition. Probably allowed for lazy feature deletion. - - f.seek(next_shape) - - return shape - - class _Record(list): """ A class to hold a record. Subclasses list to ensure compatibility with From f425fe2080d52be8bf555de287fa6fac5f24c1fe Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 21:13:45 +0100 Subject: [PATCH 129/220] Restore necessary isinstance check --- src/shapefile.py | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 81ac8671..5961b2af 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -845,11 +845,17 @@ def __repr__(self): return f"Shape #{self.__oid}: {self.shapeTypeName}" @classmethod - def _from_shp_file(cls, oid=None, bbox=None): + def _from_shp_file(cls, f, recLength, next_shape, oid=None, bbox=None): + # Previously, we also set __zmin = __zmax = __mmin = __mmax = None + nParts: Optional[int] = None + nPoints: Optional[int] = None + + shape = cls(oid=oid) + # For Null shapes create an empty points list for consistency # if shapeType == 0: if cls is NullShape: - record.points = [] + shape.points = [] # All shape types capable of having a bounding box # elif shapeType in (3, 13, 23, 5, 15, 25, 8, 18, 28, 31): elif isinstance(shape, _CanHaveBBox): @@ -863,7 +869,7 @@ def _from_shp_file(cls, oid=None, bbox=None): return None # Shape types with parts # if shapeType in (3, 13, 23, 5, 15, 25, 31): - if any(cls is class_ for class_ in (shape, (Polyline, Polygon, MultiPatch))): + if issubclass(cls, (Polyline, Polygon, MultiPatch)): nParts = unpack("2i", f.read(8)) + (__recNum, recLength) = unpack_2_int32_be(f.read(8)) # Determine the start of the next record next_shape = f.tell() + (2 * recLength) shapeType = unpack(" Date: Mon, 28 Jul 2025 21:18:50 +0100 Subject: [PATCH 130/220] Satisfy Pylint --- pyproject.toml | 3 ++- src/shapefile.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index aa11da45..a8e14c4a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,6 +114,7 @@ load-plugins=[ # Silence warning: shapefile.py:2076:20: W0212: Access to a protected # member _from_geojson of a client class (protected-access) +# shapefile.py:950:16: W0201: Attribute 'm' defined outside __init__ (attribute-defined-outside-init) # Silence remarks: # src\shapefile.py:338:0: R0914: Too many local variables (21/15) (too-many-locals) # src\shapefile.py:338:0: R0912: Too many branches (24/12) (too-many-branches) @@ -133,6 +134,6 @@ load-plugins=[ # https://github.com/christopherpickering/pylint-per-file-ignores/issues/160 [tool.pylint.'messages control'] per-file-ignores = [ - "/src/shapefile.py:W0212,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0917,R1732", + "/src/shapefile.py:W0212,W0201,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0917,R1732", "test_shapefile.py:W0212,R1732", ] diff --git a/src/shapefile.py b/src/shapefile.py index 5961b2af..10f91f89 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -845,7 +845,7 @@ def __repr__(self): return f"Shape #{self.__oid}: {self.shapeTypeName}" @classmethod - def _from_shp_file(cls, f, recLength, next_shape, oid=None, bbox=None): + def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): # Previously, we also set __zmin = __zmax = __mmin = __mmax = None nParts: Optional[int] = None nPoints: Optional[int] = None @@ -971,7 +971,7 @@ def _read_shape_from_shp_file( next_shape = f.tell() + (2 * recLength) shapeType = unpack(" Date: Mon, 28 Jul 2025 21:48:32 +0100 Subject: [PATCH 131/220] Only call f.seek(next_shape) in _read_shape_from_shp_file --- src/shapefile.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 10f91f89..02d35192 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -865,7 +865,6 @@ def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): if bbox is not None and not bbox_overlap(bbox, tuple(shape.bbox)): # because we stop parsing this shape, skip to beginning of # next shape before we return - f.seek(next_shape) return None # Shape types with parts # if shapeType in (3, 13, 23, 5, 15, 25, 31): @@ -928,7 +927,6 @@ def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): # create bounding box for Point by duplicating coordinates # skip shape if no overlap with bounding box if not bbox_overlap(bbox, (x, y, x, y)): - f.seek(next_shape) return None # Read a single Z value From 2073cb151b97415c7ef1181293149ee4090834d4 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 21:51:08 +0100 Subject: [PATCH 132/220] Don't need to set NullShape().points = [], it already is. --- src/shapefile.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 02d35192..02e52509 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -852,10 +852,6 @@ def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): shape = cls(oid=oid) - # For Null shapes create an empty points list for consistency - # if shapeType == 0: - if cls is NullShape: - shape.points = [] # All shape types capable of having a bounding box # elif shapeType in (3, 13, 23, 5, 15, 25, 8, 18, 28, 31): elif isinstance(shape, _CanHaveBBox): From 9201e480f52ca4a6e78a3f19d53559ed940828cf Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 21:52:31 +0100 Subject: [PATCH 133/220] Replace elif with if --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 02e52509..64c8fe55 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -854,7 +854,7 @@ def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): # All shape types capable of having a bounding box # elif shapeType in (3, 13, 23, 5, 15, 25, 8, 18, 28, 31): - elif isinstance(shape, _CanHaveBBox): + if isinstance(shape, _CanHaveBBox): # record.bbox = tuple(_Array[float]("d", unpack("<4d", f.read(32)))) shape.bbox = _Array[float]("d", unpack("<4d", f.read(32))) # if bbox specified and no overlap, skip this shape From b7302b64a36bbb95d8f9b55b509b9799cf055ab7 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 22:47:25 +0100 Subject: [PATCH 134/220] Polymorphic! (Errors) --- src/shapefile.py | 234 +++++++++++++++++++++++++++-------------------- 1 file changed, 134 insertions(+), 100 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 64c8fe55..d56bcb05 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -844,111 +844,87 @@ def shapeTypeName(self) -> str: def __repr__(self): return f"Shape #{self.__oid}: {self.shapeTypeName}" + # pylint: disable=unused-argument + def _get_and_set_bbox_from_shp_file(self, f): + return None + + @staticmethod + def _get_nparts_from_shp_file(f): + return None + + @staticmethod + def _get_npoints_from_shp_file(f): + return None + + def _set_parts_from_shp_file(self, f, nParts): + pass + + def _set_part_types_from_shp_file(self, f, nParts): + pass + + def _set_points_from_shp_file(self, f, nPoints): + pass + + def _set_z_from_shp_file(self, f, nPoints): + pass + + def _set_m_from_shp_file(self, f, nPoints, next_shape): + pass + + def _get_and_set_2D_point_from_shp_file(self, f): + return None + + def _set_single_point_z_from_shp_file(self, f): + pass + + def _set_single_point_m_from_shp_file(self, f, next_shape): + pass + + # pylint: enable=unused-argument + @classmethod def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): - # Previously, we also set __zmin = __zmax = __mmin = __mmax = None - nParts: Optional[int] = None - nPoints: Optional[int] = None - shape = cls(oid=oid) - # All shape types capable of having a bounding box - # elif shapeType in (3, 13, 23, 5, 15, 25, 8, 18, 28, 31): - if isinstance(shape, _CanHaveBBox): - # record.bbox = tuple(_Array[float]("d", unpack("<4d", f.read(32)))) - shape.bbox = _Array[float]("d", unpack("<4d", f.read(32))) - # if bbox specified and no overlap, skip this shape - if bbox is not None and not bbox_overlap(bbox, tuple(shape.bbox)): - # because we stop parsing this shape, skip to beginning of - # next shape before we return - return None - # Shape types with parts - # if shapeType in (3, 13, 23, 5, 15, 25, 31): - if issubclass(cls, (Polyline, Polygon, MultiPatch)): - nParts = unpack("= 16: - __mmin, __mmax = unpack("<2d", f.read(16)) - # Measure values less than -10e38 are nodata values according to the spec - if next_shape - f.tell() >= nPoints * 8: - shape.m = [] - for m in _Array[float]( - "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) - ): - if m > NODATA: - shape.m.append(m) - else: - shape.m.append(None) - else: - shape.m = [None for _ in range(nPoints)] + shape._set_z_from_shp_file(f, nPoints) + + shape._set_m_from_shp_file(f, nPoints, next_shape) # Read a single point # if shapeType in (1, 11, 21): - if cls is Point: - x, y = _Array[float]("d", unpack("<2d", f.read(16))) - - shape.points = [(x, y)] - if bbox is not None: - # create bounding box for Point by duplicating coordinates - # skip shape if no overlap with bounding box - if not bbox_overlap(bbox, (x, y, x, y)): - return None - - # Read a single Z value - # if shapeType == 11: - if cls is PointZ: - shape.z = tuple(unpack("= 8: - (m,) = unpack(" NODATA: - shape.m = (m,) - else: - shape.m = (None,) + point_2D = shape._get_and_set_2D_point_from_shp_file(f) # pylint: disable=assignment-from-none - return shape + if bbox is not None and point_2D is not None: + x, y = point_2D # pylint: disable=unpacking-non-sequence + # create bounding box for Point by duplicating coordinates + # skip shape if no overlap with bounding box + if not bbox_overlap(bbox, (x, y, x, y)): + return None - # pylint: enable=attribute-defined-outside-init - # Seek to the end of this record as defined by the record header because - # the shapefile spec doesn't require the actual content to meet the header - # definition. Probably allowed for lazy feature deletion. + shape._set_single_point_z_from_shp_file(f) + + shape._set_single_point_m_from_shp_file(f, next_shape) + + return shape def _read_shape_from_shp_file( @@ -967,6 +943,9 @@ def _read_shape_from_shp_file( ShapeClass = SHAPE_CLASS_FROM_SHAPETYPE[shapeType] shape = ShapeClass._from_shp_file(f, next_shape, oid=oid, bbox=bbox) + # Seek to the end of this record as defined by the record header because + # the shapefile spec doesn't require the actual content to meet the header + # definition. Probably allowed for lazy feature deletion. f.seek(next_shape) return shape @@ -983,16 +962,43 @@ class _CanHaveBBox(Shape): # Not a BBox because the legacy implementation was a list, not a 4-tuple. bbox: Optional[Sequence[float]] = None + def _get_and_set_bbox_from_shp_file(self, f): + # record.bbox = tuple(_Array[float]("d", unpack("<4d", f.read(32)))) + self.bbox = _Array[float]("d", unpack("<4d", f.read(32))) + return self.bbox + + @staticmethod + def _get_npoints_from_shp_file(f): + return unpack("= 16: + __mmin, __mmax = unpack("<2d", f.read(16)) + # Measure values less than -10e38 are nodata values according to the spec + if next_shape - f.tell() >= nPoints * 8: + self.m = [] + for m in _Array[float]("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))): + if m > NODATA: + self.m.append(m) + else: + self.m.append(None) + else: + self.m = [None for _ in range(nPoints)] + class _HasZ(Shape): z: Sequence[float] + def _set_z_from_shp_file(self, f, nPoints): + __zmin, __zmax = unpack("<2d", f.read(16)) # pylint: disable=unused-private-member + self.z = _Array[float]("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))) + -class MultiPatch(_HasM, _HasZ, _CanHaveBBox): +class MultiPatch(_HasM, _HasZ, _CanHaveParts): shapeType = MULTIPATCH + def _set_part_types_from_shp_file(self, f, nParts): + self.partTypes = _Array[int]("i", unpack(f"<{nParts}i", f.read(nParts * 4))) + class PointM(Point, _HasM): shapeType = POINTM @@ -1018,6 +1045,17 @@ class PointM(Point, _HasM): # PyShp encodes None m values as NODATA m = (None,) + def _set_single_point_m_from_shp_file(self, f, next_shape): + if next_shape - f.tell() >= 8: + (m,) = unpack(" NODATA: + self.m = (m,) + else: + self.m = (None,) + class PolylineM(Polyline, _HasM): shapeType = POLYLINEM @@ -1036,6 +1074,9 @@ class PointZ(PointM, _HasZ): # same default as in Writer.__shpRecord (if s.shapeType == 11:) z: Sequence[float] = (0.0,) + def _set_single_point_z_from_shp_file(self, f): + self.z = tuple(unpack(" Optional[Shape]: """Returns the header info and geometry for a single shape.""" - # pylint: disable=attribute-defined-outside-init f = self.__getFileObj(self.shp) shape = _read_shape_from_shp_file(f, oid, bbox) @@ -1901,7 +1938,6 @@ def iterShapes(self, bbox: Optional[BBox] = None) -> Iterator[Optional[Shape]]: def __dbfHeader(self): """Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger""" - # pylint: disable=attribute-defined-outside-init if not self.dbf: raise ShapefileException( "Shapefile Reader requires a shapefile or file-like object. (no dbf file found)" @@ -1948,8 +1984,6 @@ def __dbfHeader(self): self.__fullRecStruct = recStruct self.__fullRecLookup = recLookup - # pylint: enable=attribute-defined-outside-init - def __recordFmt(self, fields: Optional[Container[str]] = None) -> tuple[str, int]: """Calculates the format and size of a .dbf record. Optional 'fields' arg specifies which fieldnames to unpack and which to ignore. Note that this From 408c7d17bfe7e1789e25e1adaa47d44b7def6d53 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 22:59:25 +0100 Subject: [PATCH 135/220] Don't overwrite user specified bbox with the one read from shp file --- src/shapefile.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index d56bcb05..321b215e 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -845,8 +845,8 @@ def __repr__(self): return f"Shape #{self.__oid}: {self.shapeTypeName}" # pylint: disable=unused-argument - def _get_and_set_bbox_from_shp_file(self, f): - return None + def _set_bbox_from_shp_file(self, f): + pass @staticmethod def _get_nparts_from_shp_file(f): @@ -886,7 +886,7 @@ def _set_single_point_m_from_shp_file(self, f, next_shape): def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): shape = cls(oid=oid) - bbox = shape._get_and_set_bbox_from_shp_file(f) # pylint: disable=assignment-from-none + shape._set_bbox_from_shp_file(f) # pylint: disable=assignment-from-none # if bbox specified and no overlap, skip this shape if bbox is not None and not bbox_overlap(bbox, tuple(shape.bbox)): # pylint: disable=no-member @@ -962,10 +962,9 @@ class _CanHaveBBox(Shape): # Not a BBox because the legacy implementation was a list, not a 4-tuple. bbox: Optional[Sequence[float]] = None - def _get_and_set_bbox_from_shp_file(self, f): + def _set_bbox_from_shp_file(self, f): # record.bbox = tuple(_Array[float]("d", unpack("<4d", f.read(32)))) self.bbox = _Array[float]("d", unpack("<4d", f.read(32))) - return self.bbox @staticmethod def _get_npoints_from_shp_file(f): From 2ee7532771bae554a5ecd1c2ba1a6d8a05fda0a1 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 09:18:07 +0100 Subject: [PATCH 136/220] Create py.typed --- src/py.typed | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/py.typed diff --git a/src/py.typed b/src/py.typed new file mode 100644 index 00000000..e69de29b From 5a77065a0e7fe4474b6b4443d26284826f1b6e5a Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 10:07:30 +0100 Subject: [PATCH 137/220] Update test_shapefile.py --- test_shapefile.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/test_shapefile.py b/test_shapefile.py index 04994af8..d7c76360 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -995,7 +995,7 @@ def test_iterRecords_start_stop(): # Arbitrary selection of record indices # (there are 663 records in blockgroups.dbf). - for i in [ + indices = [ 0, 1, 2, @@ -1013,18 +1013,20 @@ def test_iterRecords_start_stop(): N - 3, N - 2, N - 1, - ]: - for record in sf.iterRecords(start=i): + ] + for i, index in enumerate(indices): + for record in sf.iterRecords(start=index): assert record == sf.record(record.oid) - for record in sf.iterRecords(stop=i): + for record in sf.iterRecords(stop=index): assert record == sf.record(record.oid) - for stop in range(i, len(sf)): + for j in range(i+1, len(indices)): + stop = indices[j] # test negative indexing from end, as well as # positive values of stop, and its default - for stop_arg in (stop, stop - len(sf)): - for record in sf.iterRecords(start=i, stop=stop_arg): + for stop_arg in (stop, stop - N): + for record in sf.iterRecords(start=index, stop=stop_arg): assert record == sf.record(record.oid) From d8d635742f35a39bf46ed8379444bf051c8626bc Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 10:08:45 +0100 Subject: [PATCH 138/220] Reformat --- test_shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_shapefile.py b/test_shapefile.py index d7c76360..b43d2470 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -1021,7 +1021,7 @@ def test_iterRecords_start_stop(): for record in sf.iterRecords(stop=index): assert record == sf.record(record.oid) - for j in range(i+1, len(indices)): + for j in range(i + 1, len(indices)): stop = indices[j] # test negative indexing from end, as well as # positive values of stop, and its default From a5ecbf3ed3aa8d91c9fd039e7fedd21c3be1af2a Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 10:25:05 +0100 Subject: [PATCH 139/220] Update shapefile.py --- src/shapefile.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/shapefile.py b/src/shapefile.py index 321b215e..d52c730f 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -623,6 +623,7 @@ def __init__( list of shapes. For MultiPatch geometry, partTypes designates the patch type of each of the parts. """ + # Preserve previous behaviour for anyone who set self.shapeType = None if not isinstance(shapeType, _NoShapeTypeSentinel): self.shapeType = shapeType self.points = points or [] @@ -3138,6 +3139,8 @@ def _shapeparts( # Make sure polygon rings (parts) are closed # if shapeType in (5, 15, 25, 31): + # This method is never actually called on a MultiPatch + # so we omit its shapeType (31) for efficiency if isinstance(polyShape, Polygon): for part in parts: if part[0] != part[-1]: From 3591be48f94fe0157c7a47d116614d9d5469dd51 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 10:25:48 +0100 Subject: [PATCH 140/220] Reformat --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index d52c730f..537e2bcb 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3139,7 +3139,7 @@ def _shapeparts( # Make sure polygon rings (parts) are closed # if shapeType in (5, 15, 25, 31): - # This method is never actually called on a MultiPatch + # This method is never actually called on a MultiPatch # so we omit its shapeType (31) for efficiency if isinstance(polyShape, Polygon): for part in parts: From efe077833a2844b9d9899ea3c60a106d814ad2a4 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 10:48:11 +0100 Subject: [PATCH 141/220] Refactor some of Writer.__shpRecord into _write_shape_to_shp_file( --- shapefiles/test/balancing.dbf | Bin 804 -> 804 bytes shapefiles/test/contextwriter.dbf | Bin 65 -> 65 bytes shapefiles/test/corrupt_too_long.dbf | Bin 580 -> 580 bytes shapefiles/test/dtype.dbf | Bin 259 -> 259 bytes shapefiles/test/line.dbf | Bin 116 -> 116 bytes shapefiles/test/multipoint.dbf | Bin 116 -> 116 bytes shapefiles/test/onlydbf.dbf | Bin 65 -> 65 bytes shapefiles/test/point.dbf | Bin 116 -> 116 bytes shapefiles/test/polygon.dbf | Bin 116 -> 116 bytes shapefiles/test/shapetype.dbf | Bin 65 -> 65 bytes shapefiles/test/testfile.dbf | Bin 65 -> 65 bytes src/shapefile.py | 338 ++++++++++++++------------- 12 files changed, 179 insertions(+), 159 deletions(-) diff --git a/shapefiles/test/balancing.dbf b/shapefiles/test/balancing.dbf index c77d63b3fa175a32a667bb61f9db31c0d220001a..8272cf33374d841d1876aa95146ca6b115a13d4f 100644 GIT binary patch delta 13 UcmZ3&wuFs^xt3jaBZ~qv02gKg)c^nh delta 13 UcmZ3&wuFs^xr&W(BZ~qv02aRjwEzGB diff --git a/shapefiles/test/contextwriter.dbf b/shapefiles/test/contextwriter.dbf index e030c2a3ba3517fb1bee24c6ae015968e20da29b..327fd49366a1e7061490a1e98bc06594129f958f 100644 GIT binary patch delta 10 RcmZ>CWMQslmz~IB3jhj_0sQ~~ delta 10 RcmZ>CWMQsiW1Prh3jhiT0o?!q diff --git a/shapefiles/test/corrupt_too_long.dbf b/shapefiles/test/corrupt_too_long.dbf index 57230c5dae273d13cf95675350da98b122facb83..e1bb1a55d413830e508ae125e7caff320e39f377 100644 GIT binary patch delta 13 UcmX@Ya)gD2xt3jaBZ~tQ02+A%FaQ7m delta 13 UcmX@Ya)gD2xr&W(BZ~tQ02$H)5C8xG diff --git a/shapefiles/test/dtype.dbf b/shapefiles/test/dtype.dbf index 1ddda01fdf467e0303fc64318e5b01aff29b4fee..2939da4791a864894ed53683d9d18574c69f3d05 100644 GIT binary patch delta 12 TcmZo>YGz_#u4R{<$nqZm5$Xd+ delta 12 TcmZo>YGz_#u3}@H$nqZm5n}@< diff --git a/shapefiles/test/line.dbf b/shapefiles/test/line.dbf index 24f529e800a2a5da5d96e8fb44ce5268fb410555..9e43d68b8aedd698aac5f61e600bbb63a4e42b81 100644 GIT binary patch delta 10 RcmXRZVPURimz~H`000h)0+s*( delta 10 RcmXRZVPURfW1PrR000gI0(JlZ diff --git a/shapefiles/test/multipoint.dbf b/shapefiles/test/multipoint.dbf index 4d7d4f1777867f1dded56cbda3f31aea44a25824..74ed8b14883b8194290b563eb207108beb938418 100644 GIT binary patch delta 10 RcmXRZVPURimz~H`000h)0+s*( delta 10 RcmXRZVPURfW1PrR000gI0(JlZ diff --git a/shapefiles/test/onlydbf.dbf b/shapefiles/test/onlydbf.dbf index e030c2a3ba3517fb1bee24c6ae015968e20da29b..327fd49366a1e7061490a1e98bc06594129f958f 100644 GIT binary patch delta 10 RcmZ>CWMQslmz~IB3jhj_0sQ~~ delta 10 RcmZ>CWMQsiW1Prh3jhiT0o?!q diff --git a/shapefiles/test/point.dbf b/shapefiles/test/point.dbf index e29d0859b8c872db6f0f71237bfb88ce21568e61..5a881b870c1a5f904c98e765fabd80fb94feb0d6 100644 GIT binary patch delta 10 RcmXRZVPURimz~H`000h)0+s*( delta 10 RcmXRZVPURfW1PrR000gI0(JlZ diff --git a/shapefiles/test/polygon.dbf b/shapefiles/test/polygon.dbf index b116dc4692b34fdb7c114ee60214e15f734ab3aa..1cc8920a0b6da92323732a36b2e17a0faccab5f3 100644 GIT binary patch delta 10 RcmXRZVPURimz~H`000h)0+s*( delta 10 RcmXRZVPURfW1PrR000gI0(JlZ diff --git a/shapefiles/test/shapetype.dbf b/shapefiles/test/shapetype.dbf index e030c2a3ba3517fb1bee24c6ae015968e20da29b..327fd49366a1e7061490a1e98bc06594129f958f 100644 GIT binary patch delta 10 RcmZ>CWMQslmz~IB3jhj_0sQ~~ delta 10 RcmZ>CWMQsiW1Prh3jhiT0o?!q diff --git a/shapefiles/test/testfile.dbf b/shapefiles/test/testfile.dbf index e030c2a3ba3517fb1bee24c6ae015968e20da29b..327fd49366a1e7061490a1e98bc06594129f958f 100644 GIT binary patch delta 10 RcmZ>CWMQslmz~IB3jhj_0sQ~~ delta 10 RcmZ>CWMQsiW1Prh3jhiT0o?!q diff --git a/src/shapefile.py b/src/shapefile.py index 537e2bcb..20054a0d 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -952,6 +952,176 @@ def _read_shape_from_shp_file( return shape +def _write_shape_to_shp_file( + f, + s, + i, + update_bbox, + update_mbox, + update_zbox, +): + f.write(pack(" 2 else 0)) + except error: + raise ShapefileException( + f"Failed to write elevation values for record {i}. Expected floats." + ) + # Write m extremes and values + # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA + # Note: missing m values are autoset to NODATA. + if s.shapeType in (13, 15, 18, 23, 25, 28, 31): + try: + f.write(pack("<2d", *update_mbox(s))) + except error: + raise ShapefileException( + f"Failed to write measure extremes for record {i}. Expected floats" + ) + try: + if hasattr(s, "m"): + # if m values are stored in attribute + # fmt: off + f.write( + pack( + f"<{len(s.m)}d", + *[m if m is not None else NODATA for m in s.m] + ) + ) + # fmt: on + else: + # if m values are stored as 3rd/4th dimension + # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) + mpos = 3 if s.shapeType in (13, 15, 18, 31) else 2 + for p in s.points: + f.write( + pack( + " mpos and p[mpos] is not None + else NODATA, + ) + ) + except error: + raise ShapefileException( + f"Failed to write measure values for record {i}. Expected floats" + ) + # Write a single point + if s.shapeType in (1, 11, 21): + try: + f.write(pack("<2d", s.points[0][0], s.points[0][1])) + except error: + raise ShapefileException( + f"Failed to write point for record {i}. Expected floats." + ) + # Write a single Z value + # Note: missing z values are autoset to 0, but not sure if this is ideal. + if s.shapeType == 11: + # update the global z box + update_zbox(s) + # then write value + if hasattr(s, "z"): + # if z values are stored in attribute + try: + if not s.z: + s.z = (0,) + f.write(pack(" 2 else 0)) - except error: - raise ShapefileException( - f"Failed to write elevation values for record {self.shpNum}. Expected floats." - ) - # Write m extremes and values - # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA - # Note: missing m values are autoset to NODATA. - if s.shapeType in (13, 15, 18, 23, 25, 28, 31): - try: - f.write(pack("<2d", *self.__mbox(s))) - except error: - raise ShapefileException( - f"Failed to write measure extremes for record {self.shpNum}. Expected floats" - ) - try: - if hasattr(s, "m"): - # if m values are stored in attribute - # fmt: off - f.write( - pack( - f"<{len(s.m)}d", - *[m if m is not None else NODATA for m in s.m] - ) - ) - # fmt: on - else: - # if m values are stored as 3rd/4th dimension - # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) - mpos = 3 if s.shapeType in (13, 15, 18, 31) else 2 - for p in s.points: - f.write( - pack( - " mpos and p[mpos] is not None - else NODATA, - ) - ) - except error: - raise ShapefileException( - f"Failed to write measure values for record {self.shpNum}. Expected floats" - ) - # Write a single point - if s.shapeType in (1, 11, 21): - try: - f.write(pack("<2d", s.points[0][0], s.points[0][1])) - except error: - raise ShapefileException( - f"Failed to write point for record {self.shpNum}. Expected floats." - ) - # Write a single Z value - # Note: missing z values are autoset to 0, but not sure if this is ideal. - if s.shapeType == 11: - # update the global z box - self.__zbox(s) - # then write value - if hasattr(s, "z"): - # if z values are stored in attribute - try: - if not s.z: - s.z = (0,) - f.write(pack(" Date: Tue, 29 Jul 2025 11:18:26 +0100 Subject: [PATCH 142/220] Ignore Pylint W0707 --- pyproject.toml | 9 ++++++--- src/shapefile.py | 14 ++------------ 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a8e14c4a..73883b60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,9 +112,12 @@ load-plugins=[ "pylint_per_file_ignores", ] -# Silence warning: shapefile.py:2076:20: W0212: Access to a protected +# Silence warning: src/shapefile.py:2076:20: W0212: Access to a protected # member _from_geojson of a client class (protected-access) -# shapefile.py:950:16: W0201: Attribute 'm' defined outside __init__ (attribute-defined-outside-init) +# src/shapefile.py:950:16: W0201: Attribute 'm' defined outside __init__ (attribute-defined-outside-init) +# src/shapefile.py:973:12: W0707: Consider explicitly re-raising using 'except error as exc' and +# 'raise ShapefileException(f'Failed to write bounding box for record {i}. +# Expected floats.') from exc' (raise-missing-from) # Silence remarks: # src\shapefile.py:338:0: R0914: Too many local variables (21/15) (too-many-locals) # src\shapefile.py:338:0: R0912: Too many branches (24/12) (too-many-branches) @@ -134,6 +137,6 @@ load-plugins=[ # https://github.com/christopherpickering/pylint-per-file-ignores/issues/160 [tool.pylint.'messages control'] per-file-ignores = [ - "/src/shapefile.py:W0212,W0201,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0917,R1732", + "/src/shapefile.py:W0707,W0212,W0201,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0917,R1732", "test_shapefile.py:W0212,R1732", ] diff --git a/src/shapefile.py b/src/shapefile.py index 20054a0d..d26a3337 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1325,7 +1325,6 @@ def __getattr__(self, item: str) -> RecordValue: and IndexError, if the field exists but the field's corresponding value in the Record does not exist """ - # pylint: disable=raise-missing-from try: if item == "__setstate__": # Prevent infinite loop from copy.deepcopy() raise AttributeError("_Record does not implement __setstate__") @@ -1337,7 +1336,6 @@ def __getattr__(self, item: str) -> RecordValue: raise IndexError( f"{item} found as a field but not enough values available." ) - # pylint: enable=raise-missing-from def __setattr__(self, key: str, value: RecordValue): """ @@ -1353,7 +1351,7 @@ def __setattr__(self, key: str, value: RecordValue): index = self.__field_positions[key] return list.__setitem__(self, index, value) except KeyError: - raise AttributeError(f"{key} is not a field name") # pylint: disable=raise-missing-from + raise AttributeError(f"{key} is not a field name") def __getitem__(self, item): """ @@ -1392,7 +1390,7 @@ def __setitem__(self, key, value): if index is not None: return list.__setitem__(self, index, value) - raise IndexError(f"{key} is not a field name and not an int") # pylint: disable=raise-missing-from + raise IndexError(f"{key} is not a field name and not an int") @property def oid(self) -> int: @@ -2727,7 +2725,6 @@ def __shapefileHeader( Several of the shapefile formats are so similar that a single generic method to read or write them is warranted.""" - # pylint: disable=raise-missing-from f = self.__getFileObj(fileObj) f.seek(0) # File code, Unused bytes @@ -2786,8 +2783,6 @@ def __shapefileHeader( "Failed to write shapefile elevation and measure values. Floats required." ) - # pylint: enable=raise-missing-from - def __dbfHeader(self): """Writes the dbf header and field descriptors.""" f = self.__getFileObj(self.dbf) @@ -2859,7 +2854,6 @@ def shape( self.__shxRecord(offset, length) def __shpRecord(self, s): - # pylint: disable=raise-missing-from f = self.__getFileObj(self.shp) offset = f.tell() # Record number, Content length place holder @@ -2891,13 +2885,11 @@ def __shpRecord(self, s): f.seek(start - 4) f.write(pack(">i", length)) f.seek(finish) - # pylint: enable=raise-missing-from return offset, length def __shxRecord(self, offset, length): """Writes the shx records.""" - # pylint: disable=raise-missing-from f = self.__getFileObj(self.shx) try: f.write(pack(">i", offset // 2)) @@ -2907,8 +2899,6 @@ def __shxRecord(self, offset, length): ) f.write(pack(">i", length)) - # pylint: enable=raise-missing-from - def record( self, *recordList: Iterable[RecordValue], **recordDict: dict[str, RecordValue] ): From 2c4a32f8d1104143ec4d993ce06b0c96efcafdb0 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 11:48:09 +0100 Subject: [PATCH 143/220] Further reduce number of index selector tests --- test_shapefile.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/test_shapefile.py b/test_shapefile.py index b43d2470..2a10d3ee 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -999,19 +999,12 @@ def test_iterRecords_start_stop(): 0, 1, 2, - 3, 5, 11, - 17, - 33, - 51, - 103, - 170, - 234, - 435, - 543, + 41, + 310, + 513, N - 3, - N - 2, N - 1, ] for i, index in enumerate(indices): From b628577dc8ba1097664eb741d32f4f938dc38696 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 14:23:30 +0100 Subject: [PATCH 144/220] Refactor _from_shp_file methods onto new subclasses of Shape --- src/shapefile.py | 255 ++++++++++++++++++++++++++++------------------- 1 file changed, 152 insertions(+), 103 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index d26a3337..7ded3c71 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -845,88 +845,6 @@ def shapeTypeName(self) -> str: def __repr__(self): return f"Shape #{self.__oid}: {self.shapeTypeName}" - # pylint: disable=unused-argument - def _set_bbox_from_shp_file(self, f): - pass - - @staticmethod - def _get_nparts_from_shp_file(f): - return None - - @staticmethod - def _get_npoints_from_shp_file(f): - return None - - def _set_parts_from_shp_file(self, f, nParts): - pass - - def _set_part_types_from_shp_file(self, f, nParts): - pass - - def _set_points_from_shp_file(self, f, nPoints): - pass - - def _set_z_from_shp_file(self, f, nPoints): - pass - - def _set_m_from_shp_file(self, f, nPoints, next_shape): - pass - - def _get_and_set_2D_point_from_shp_file(self, f): - return None - - def _set_single_point_z_from_shp_file(self, f): - pass - - def _set_single_point_m_from_shp_file(self, f, next_shape): - pass - - # pylint: enable=unused-argument - - @classmethod - def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): - shape = cls(oid=oid) - - shape._set_bbox_from_shp_file(f) # pylint: disable=assignment-from-none - - # if bbox specified and no overlap, skip this shape - if bbox is not None and not bbox_overlap(bbox, tuple(shape.bbox)): # pylint: disable=no-member - # because we stop parsing this shape, skip to beginning of - # next shape before we return - return None - - nParts: Optional[int] = shape._get_nparts_from_shp_file(f) - nPoints: Optional[int] = shape._get_npoints_from_shp_file(f) - # Previously, we also set __zmin = __zmax = __mmin = __mmax = None - - if nParts: - shape._set_parts_from_shp_file(f, nParts) - shape._set_part_types_from_shp_file(f, nParts) - - if nPoints: - shape._set_points_from_shp_file(f, nPoints) - - shape._set_z_from_shp_file(f, nPoints) - - shape._set_m_from_shp_file(f, nPoints, next_shape) - - # Read a single point - # if shapeType in (1, 11, 21): - point_2D = shape._get_and_set_2D_point_from_shp_file(f) # pylint: disable=assignment-from-none - - if bbox is not None and point_2D is not None: - x, y = point_2D # pylint: disable=unpacking-non-sequence - # create bounding box for Point by duplicating coordinates - # skip shape if no overlap with bounding box - if not bbox_overlap(bbox, (x, y, x, y)): - return None - - shape._set_single_point_z_from_shp_file(f) - - shape._set_single_point_m_from_shp_file(f, next_shape) - - return shape - def _read_shape_from_shp_file( f, oid=None, bbox=None @@ -963,10 +881,10 @@ def _write_shape_to_shp_file( f.write(pack("= 16: __mmin, __mmax = unpack("<2d", f.read(16)) # Measure values less than -10e38 are nodata values according to the spec @@ -1194,10 +1234,19 @@ def _set_m_from_shp_file(self, f, nPoints, next_shape): self.m = [None for _ in range(nPoints)] -class _HasZ(Shape): +class _HasZ(_CanHaveBBox): + # Not a Point + _shapeTypes = frozenset( + [ + POLYLINEZ, + POLYGONZ, + MULTIPOINTZ, + MULTIPATCH, + ] + ) z: Sequence[float] - def _set_z_from_shp_file(self, f, nPoints): + def _set_zs_from_shp_file(self, f, nPoints): __zmin, __zmax = unpack("<2d", f.read(16)) # pylint: disable=unused-private-member self.z = _Array[float]("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))) @@ -1209,7 +1258,7 @@ def _set_part_types_from_shp_file(self, f, nParts): self.partTypes = _Array[int]("i", unpack(f"<{nParts}i", f.read(nParts * 4))) -class PointM(Point, _HasM): +class PointM(Point): shapeType = POINTM # same default as in Writer.__shpRecord (if s.shapeType in (11, 21):) # PyShp encodes None m values as NODATA @@ -1239,7 +1288,7 @@ class MultiPointM(MultiPoint, _HasM): shapeType = MULTIPOINTM -class PointZ(PointM, _HasZ): +class PointZ(PointM): shapeType = POINTZ # same default as in Writer.__shpRecord (if s.shapeType == 11:) z: Sequence[float] = (0.0,) From 48a2b96532c6d3548633b154997a8b25b1111a18 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 15:08:48 +0100 Subject: [PATCH 145/220] Refactor _write_shape_to_shp_file into static methods on Shape subclasses --- src/shapefile.py | 334 +++++++++++++++++++++++++++-------------------- 1 file changed, 194 insertions(+), 140 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 7ded3c71..64722b9d 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -883,161 +883,43 @@ def _write_shape_to_shp_file( # For point just update bbox of the whole shapefile if s.shapeType in Point._shapeTypes: update_bbox(s) - # All shape types capable of having a bounding box elif s.shapeType in _CanHaveBBox._shapeTypes: - try: - f.write(pack("<4d", *update_bbox(s))) - except error: - raise ShapefileException( - f"Failed to write bounding box for record {i}. Expected floats." - ) - # Shape types with parts + # We use static methods here and below, + # to support s a Shape base class, with shapeType set, + # not one of our newer shape specific sub classes. + _CanHaveBBox._try_write_bbox_to_shp_file(f, s, i, update_bbox) + if s.shapeType in _CanHaveParts._shapeTypes: - # Number of parts - f.write(pack(" 2 else 0)) - except error: - raise ShapefileException( - f"Failed to write elevation values for record {i}. Expected floats." - ) - # Write m extremes and values - # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA - # Note: missing m values are autoset to NODATA. + _HasZ._try_write_zs_to_shp_file(f, s, i, update_zbox) + if s.shapeType in _HasM._shapeTypes: - try: - f.write(pack("<2d", *update_mbox(s))) - except error: - raise ShapefileException( - f"Failed to write measure extremes for record {i}. Expected floats" - ) - try: - if hasattr(s, "m"): - # if m values are stored in attribute - # fmt: off - f.write( - pack( - f"<{len(s.m)}d", - *[m if m is not None else NODATA for m in s.m] - ) - ) - # fmt: on - else: - # if m values are stored as 3rd/4th dimension - # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) - mpos = 3 if s.shapeType in {13, 15, 18, 31} else 2 - for p in s.points: - f.write( - pack( - " mpos and p[mpos] is not None - else NODATA, - ) - ) - except error: - raise ShapefileException( - f"Failed to write measure values for record {i}. Expected floats" - ) + _HasM._try_write_ms_to_shp_file(f, s, i, update_mbox) + # Write a single point if s.shapeType in Point._shapeTypes: - try: - f.write(pack("<2d", s.points[0][0], s.points[0][1])) - except error: - raise ShapefileException( - f"Failed to write point for record {i}. Expected floats." - ) + Point._try_write_to_shp(f, s, i) + # Write a single Z value - # Note: missing z values are autoset to 0, but not sure if this is ideal. if s.shapeType == POINTZ: - # update the global z box - update_zbox(s) - # then write value - if hasattr(s, "z"): - # if z values are stored in attribute - try: - if not s.z: - s.z = (0,) - f.write(pack(" mpos and p[mpos] is not None + else NODATA, + ) + ) + except error: + raise ShapefileException( + f"Failed to write measure values for record {i}. Expected floats" + ) + class _HasZ(_CanHaveBBox): # Not a Point @@ -1250,6 +1216,29 @@ def _set_zs_from_shp_file(self, f, nPoints): __zmin, __zmax = unpack("<2d", f.read(16)) # pylint: disable=unused-private-member self.z = _Array[float]("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))) + @staticmethod + def _try_write_zs_to_shp_file(f, s, i, update_zbox): + # Write z extremes and values + # Note: missing z values are autoset to 0, but not sure if this is ideal. + try: + f.write(pack("<2d", *update_zbox(s))) + except error: + raise ShapefileException( + f"Failed to write elevation extremes for record {i}. Expected floats." + ) + try: + if hasattr(s, "z"): + # if z values are stored in attribute + f.write(pack(f"<{len(s.z)}d", *s.z)) + else: + # if z values are stored as 3rd dimension + for p in s.points: + f.write(pack(" 2 else 0)) + except error: + raise ShapefileException( + f"Failed to write elevation values for record {i}. Expected floats." + ) + class MultiPatch(_HasM, _HasZ, _CanHaveParts): shapeType = MULTIPATCH @@ -1257,6 +1246,11 @@ class MultiPatch(_HasM, _HasZ, _CanHaveParts): def _set_part_types_from_shp_file(self, f, nParts): self.partTypes = _Array[int]("i", unpack(f"<{nParts}i", f.read(nParts * 4))) + @staticmethod + def _write_part_types_to_shp_file(f, s): + for partType in s.partTypes: + f.write(pack(" Date: Tue, 29 Jul 2025 16:09:58 +0100 Subject: [PATCH 146/220] Add shapefile Writer benchmark --- run_benchmarks.py | 84 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 62 insertions(+), 22 deletions(-) diff --git a/run_benchmarks.py b/run_benchmarks.py index 0b8b8288..27b03d00 100644 --- a/run_benchmarks.py +++ b/run_benchmarks.py @@ -2,14 +2,16 @@ from __future__ import annotations +import collections import functools import os import timeit from collections.abc import Callable from pathlib import Path +from tempfile import TemporaryFile as TempF from typing import Union -import shapefile as shp +import shapefile # For shapefiles from https://github.com/JamesParrott/PyShp_test_shapefile DEFAULT_PYSHP_TEST_REPO = ( @@ -31,26 +33,41 @@ def benchmark( name: str, run_count: int, func: Callable, - col_width: tuple, + col_widths: tuple, compare_to: float | None = None, ) -> float: placeholder = "Running..." - print(f"{name:>{col_width[0]}} | {placeholder}", end="", flush=True) + print(f"{name:>{col_widths[0]}} | {placeholder}", end="", flush=True) time_taken = timeit.timeit(func, number=run_count) print("\b" * len(placeholder), end="") time_suffix = " s" - print(f"{time_taken:{col_width[1]-len(time_suffix)}.3g}{time_suffix}", end="") + print(f"{time_taken:{col_widths[1]-len(time_suffix)}.3g}{time_suffix}", end="") print() return time_taken +fields = {} +shapeRecords = collections.defaultdict(list) + + def open_shapefile_with_PyShp(target: Union[str, os.PathLike]): - with shp.Reader(target) as r: + with shapefile.Reader(target) as r: + fields[target] = r.fields for shapeRecord in r.iterShapeRecords(): - pass + shapeRecords[target].append(shapeRecord) + + +def write_shapefile_with_PyShp(target: Union[str, os.PathLike]): + with TempF("wb") as shp, TempF("wb") as dbf, TempF("wb") as shx: + with shapefile.Writer(shp=shp, dbf=dbf, shx=shx) as w: # type: ignore [arg-type] + for field_info_tuple in fields[target]: + w.field(*field_info_tuple) + for shapeRecord in shapeRecords[target]: + w.shape(shapeRecord.shape) + w.record(*shapeRecord.record) -READER_TESTS = { +SHAPEFILES = { "Blockgroups": blockgroups_file, "Edit": edit_file, "Merge": merge_file, @@ -60,24 +77,47 @@ def open_shapefile_with_PyShp(target: Union[str, os.PathLike]): } -def run(run_count: int) -> None: - col_width = (21, 10) +# Load files to avoid one off delays that only affect first disk seek +for file_path in SHAPEFILES.values(): + file_path.read_bytes() + +reader_benchmarks = [ + functools.partial( + benchmark, + name=f"Read {test_name}", + func=functools.partial(open_shapefile_with_PyShp, target=target), + ) + for test_name, target in SHAPEFILES.items() +] + +# Require fields and shapeRecords to first have been populated +# from data from previouly running the reader_benchmarks +writer_benchmarks = [ + functools.partial( + benchmark, + name=f"Write {test_name}", + func=functools.partial(write_shapefile_with_PyShp, target=target), + ) + for test_name, target in SHAPEFILES.items() +] + + +def run(run_count: int, benchmarks: list[Callable[[], None]]) -> None: + col_widths = (22, 10) col_head = ("parser", "exec time", "performance (more is better)") - # Load files to avoid one off delays that only affect first disk seek - for file_path in READER_TESTS.values(): - file_path.read_bytes() print(f"Running benchmarks {run_count} times:") - print("-" * col_width[0] + "---" + "-" * col_width[1]) - print(f"{col_head[0]:>{col_width[0]}} | {col_head[1]:>{col_width[1]}}") - print("-" * col_width[0] + "-+-" + "-" * col_width[1]) - for test_name, target in READER_TESTS.items(): - benchmark( - f"Read {test_name}", - run_count, - functools.partial(open_shapefile_with_PyShp, target=target), - col_width, + print("-" * col_widths[0] + "---" + "-" * col_widths[1]) + print(f"{col_head[0]:>{col_widths[0]}} | {col_head[1]:>{col_widths[1]}}") + print("-" * col_widths[0] + "-+-" + "-" * col_widths[1]) + for benchmark in benchmarks: + benchmark( # type: ignore [call-arg] + run_count=run_count, + col_widths=col_widths, ) if __name__ == "__main__": - run(1) + print("Reader tests:") + run(1, reader_benchmarks) # type: ignore [arg-type] + print("\n\nWrite tests:") + run(1, writer_benchmarks) # type: ignore [arg-type] From 9aac92c25d0eb3b52b19992e48f9802bccd730ff Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 16:17:47 +0100 Subject: [PATCH 147/220] Update run_benchmarks.py --- run_benchmarks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_benchmarks.py b/run_benchmarks.py index 27b03d00..edc2119a 100644 --- a/run_benchmarks.py +++ b/run_benchmarks.py @@ -119,5 +119,5 @@ def run(run_count: int, benchmarks: list[Callable[[], None]]) -> None: if __name__ == "__main__": print("Reader tests:") run(1, reader_benchmarks) # type: ignore [arg-type] - print("\n\nWrite tests:") + print("\n\nWriter tests:") run(1, writer_benchmarks) # type: ignore [arg-type] From fbbae0a56a0eb109c926fef81e33d8cfe8750916 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 17:57:13 +0100 Subject: [PATCH 148/220] Split _write_shape_to_shp_file into _try_write methods on Point and _CanHaveBBox --- pyproject.toml | 7 +- src/shapefile.py | 198 ++++++++++++++++++++++++++--------------------- 2 files changed, 113 insertions(+), 92 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 73883b60..d3e0e894 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,10 +112,9 @@ load-plugins=[ "pylint_per_file_ignores", ] -# Silence warning: src/shapefile.py:2076:20: W0212: Access to a protected -# member _from_geojson of a client class (protected-access) -# src/shapefile.py:950:16: W0201: Attribute 'm' defined outside __init__ (attribute-defined-outside-init) -# src/shapefile.py:973:12: W0707: Consider explicitly re-raising using 'except error as exc' and +# Silence warnings: src/shapefile.py:2076:20: W0212: Access to a protected member _from_geojson of a client class (protected-access) +# src/shapefile.py:950:16: W0201: Attribute 'm' defined outside __init__ (attribute-defined-outside-init) +# src/shapefile.py:973:12: W0707: Consider explicitly re-raising using 'except error as exc' and # 'raise ShapefileException(f'Failed to write bounding box for record {i}. # Expected floats.') from exc' (raise-missing-from) # Silence remarks: diff --git a/src/shapefile.py b/src/shapefile.py index 64722b9d..d19e7b7b 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -874,52 +874,19 @@ def _write_shape_to_shp_file( f, s, i, - update_bbox, - update_mbox, - update_zbox, + bbox, + mbox, + zbox, ): f.write(pack(" str: @@ -2972,13 +2982,25 @@ def __shpRecord(self, s): f"the type of the shapefile ({self.shapeType})." ) + # For both single point and multiple-points non-null shapes, + # update bbox, mbox and zbox of the whole shapefile + new_bbox = self.__bbox(s) if s.shapeType != NULL else None + new_mbox = ( + self.__mbox(s) + if s.shapeType in {POINTM, POINTZ} | _HasM._shapeTypes + else None + ) + new_zbox = ( + self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None + ) + _write_shape_to_shp_file( f=f, s=s, i=self.shpNum, - update_bbox=self.__bbox, - update_mbox=self.__mbox, - update_zbox=self.__zbox, + bbox=new_bbox, + mbox=new_mbox, + zbox=new_zbox, ) # Finalize record length as 16-bit words From 5b65b40f87ed961e145d3191fb7e9a8ee1c4a381 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 18:07:17 +0100 Subject: [PATCH 149/220] Move _write_shape_to_shp_file back into Writer.__shpRecord --- src/shapefile.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index d19e7b7b..3f3d8c94 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -870,23 +870,20 @@ def _read_shape_from_shp_file( return shape -def _write_shape_to_shp_file( - f, - s, - i, - bbox, - mbox, - zbox, -): - f.write(pack(" Date: Tue, 29 Jul 2025 19:35:23 +0100 Subject: [PATCH 150/220] Use tmp io.BytesIO, and write to file at end --- src/shapefile.py | 84 ++++++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 39 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 3f3d8c94..3fdb9f40 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2967,52 +2967,58 @@ def shape( self.__shxRecord(offset, length) def __shpRecord(self, s): - f = self.__getFileObj(self.shp) - offset = f.tell() + shp = self.__getFileObj(self.shp) + offset = shp.tell() # Record number, Content length place holder self.shpNum += 1 - f.write(pack(">2i", self.shpNum, 0)) - start = f.tell() - # Shape Type - if self.shapeType is None and s.shapeType != NULL: - self.shapeType = s.shapeType - if not s.shapeType in {NULL, self.shapeType}: - raise ShapefileException( - f"The shape's type ({s.shapeType}) must match " - f"the type of the shapefile ({self.shapeType})." + with io.BytesIO() as f: + f.write(pack(">2i", self.shpNum, 0)) + start = f.tell() + # Shape Type + if self.shapeType is None and s.shapeType != NULL: + self.shapeType = s.shapeType + if not s.shapeType in {NULL, self.shapeType}: + raise ShapefileException( + f"The shape's type ({s.shapeType}) must match " + f"the type of the shapefile ({self.shapeType})." + ) + + # For both single point and multiple-points non-null shapes, + # update bbox, mbox and zbox of the whole shapefile + new_bbox = self.__bbox(s) if s.shapeType != NULL else None + new_mbox = ( + self.__mbox(s) + if s.shapeType in {POINTM, POINTZ} | _HasM._shapeTypes + else None + ) + new_zbox = ( + self.__zbox(s) + if s.shapeType in {POINTZ} | _HasZ._shapeTypes + else None ) - # For both single point and multiple-points non-null shapes, - # update bbox, mbox and zbox of the whole shapefile - new_bbox = self.__bbox(s) if s.shapeType != NULL else None - new_mbox = ( - self.__mbox(s) - if s.shapeType in {POINTM, POINTZ} | _HasM._shapeTypes - else None - ) - new_zbox = ( - self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None - ) + f.write(pack("i", length)) + # f.seek(finish) - # Finalize record length as 16-bit words - finish = f.tell() - length = (finish - start) // 2 - # start - 4 bytes is the content length field - f.seek(start - 4) - f.write(pack(">i", length)) - f.seek(finish) + f.seek(0) + shp.write(f.read()) return offset, length def __shxRecord(self, offset, length): From c65662bd953cf901a4e195c8ae1cae9a8874700a Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 19:46:40 +0100 Subject: [PATCH 151/220] Update shapefile.py --- src/shapefile.py | 88 +++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 45 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 3fdb9f40..40d29d3c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2967,58 +2967,56 @@ def shape( self.__shxRecord(offset, length) def __shpRecord(self, s): - shp = self.__getFileObj(self.shp) - offset = shp.tell() + f = self.__getFileObj(self.shp) + offset = f.tell() # Record number, Content length place holder self.shpNum += 1 - with io.BytesIO() as f: - f.write(pack(">2i", self.shpNum, 0)) - start = f.tell() - # Shape Type - if self.shapeType is None and s.shapeType != NULL: - self.shapeType = s.shapeType - if not s.shapeType in {NULL, self.shapeType}: - raise ShapefileException( - f"The shape's type ({s.shapeType}) must match " - f"the type of the shapefile ({self.shapeType})." - ) - - # For both single point and multiple-points non-null shapes, - # update bbox, mbox and zbox of the whole shapefile - new_bbox = self.__bbox(s) if s.shapeType != NULL else None - new_mbox = ( - self.__mbox(s) - if s.shapeType in {POINTM, POINTZ} | _HasM._shapeTypes - else None - ) - new_zbox = ( - self.__zbox(s) - if s.shapeType in {POINTZ} | _HasZ._shapeTypes - else None + f.write(pack(">2i", self.shpNum, 0)) + start = f.tell() + # Shape Type + if self.shapeType is None and s.shapeType != NULL: + self.shapeType = s.shapeType + if not s.shapeType in {NULL, self.shapeType}: + raise ShapefileException( + f"The shape's type ({s.shapeType}) must match " + f"the type of the shapefile ({self.shapeType})." ) - f.write(pack("i", length)) + f.seek(finish) - # Finalize record length as 16-bit words - finish = f.tell() - length = (finish - start) // 2 - # start - 4 bytes is the content length field - f.seek(start - 4) - f.write(pack(">i", length)) - # f.seek(finish) - f.seek(0) - shp.write(f.read()) return offset, length def __shxRecord(self, offset, length): From 46249efd2c3e599a834338c6a8ff9eb20588b0a0 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 19:50:34 +0100 Subject: [PATCH 152/220] Reformat --- src/shapefile.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 40d29d3c..231d938d 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -901,6 +901,7 @@ def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): # pylint: disable= def _try_write_to_shp_file(f, s, i, bbox, mbox, zbox): # pylint: disable=unused-argument pass + class _CanHaveBBox(Shape): """As well as setting bounding boxes, we also utilize the fact that this mixin applies to all the shapes that are @@ -2991,9 +2992,7 @@ def __shpRecord(self, s): else None ) new_zbox = ( - self.__zbox(s) - if s.shapeType in {POINTZ} | _HasZ._shapeTypes - else None + self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None ) f.write(pack("i", length)) f.seek(finish) - return offset, length def __shxRecord(self, offset, length): From b3b7593cf3987e3700b2579688105c6137ef0810 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 21:35:51 +0100 Subject: [PATCH 153/220] Serialize shapes to bytes, and deserialize them from byte streams --- README.md | 2 + src/shapefile.py | 394 +++++++++++++++++++++++++---------------------- 2 files changed, 208 insertions(+), 188 deletions(-) diff --git a/README.md b/README.md index 52b8de78..de34f6c4 100644 --- a/README.md +++ b/README.md @@ -458,11 +458,13 @@ shapeType Point do not have a bounding box 'bbox'. ... if not name.startswith('_'): ... name 'bbox' + 'from_bytes' 'oid' 'parts' 'points' 'shapeType' 'shapeTypeName' + 'to_bytes' * `oid`: The shape's index position in the original shapefile. diff --git a/src/shapefile.py b/src/shapefile.py index 231d938d..fc3bf597 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -846,46 +846,6 @@ def __repr__(self): return f"Shape #{self.__oid}: {self.shapeTypeName}" -def _read_shape_from_shp_file( - f, oid=None, bbox=None -): # oid: Optional[int] = None, bbox: Optional[BBox] = None): - """Constructs a Shape from an open .shp file. Something else - is required to have first read the .shp file's header. - Leaves the shp file's .tell() in the correct position for - a subsequent call to this, to build the next shape. - """ - # shape = Shape(oid=oid) - (__recNum, recLength) = unpack_2_int32_be(f.read(8)) - # Determine the start of the next record - next_shape = f.tell() + (2 * recLength) - shapeType = unpack("= 16: - __mmin, __mmax = unpack("<2d", f.read(16)) + def _set_ms_from_byte_stream(self, b_io, nPoints, next_shape): + if next_shape - b_io.tell() >= 16: + __mmin, __mmax = unpack("<2d", b_io.read(16)) # Measure values less than -10e38 are nodata values according to the spec - if next_shape - f.tell() >= nPoints * 8: + if next_shape - b_io.tell() >= nPoints * 8: self.m = [] - for m in _Array[float]("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))): + for m in _Array[float]("d", unpack(f"<{nPoints}d", b_io.read(nPoints * 8))): if m > NODATA: self.m.append(m) else: @@ -1176,12 +1152,14 @@ def _set_ms_from_shp_file(self, f, nPoints, next_shape): self.m = [None for _ in range(nPoints)] @staticmethod - def _try_write_ms_to_shp_file(f, s, i, mbox): + def ms_to_bytes(s, i, mbox): # Write m extremes and values # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA # Note: missing m values are autoset to NODATA. + bytes_ = b"" + try: - f.write(pack("<2d", *mbox)) + bytes_ += pack("<2d", *mbox) except error: raise ShapefileException( f"Failed to write measure extremes for record {i}. Expected floats" @@ -1189,32 +1167,25 @@ def _try_write_ms_to_shp_file(f, s, i, mbox): try: if hasattr(s, "m"): # if m values are stored in attribute - # fmt: off - f.write( - pack( - f"<{len(s.m)}d", - *[m if m is not None else NODATA for m in s.m] - ) + bytes_ += pack( + f"<{len(s.m)}d", *[m if m is not None else NODATA for m in s.m] ) - # fmt: on else: # if m values are stored as 3rd/4th dimension # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) mpos = 3 if s.shapeType in _HasZ._shapeTypes else 2 for p in s.points: - f.write( - pack( - " mpos and p[mpos] is not None - else NODATA, - ) + bytes_ += pack( + " mpos and p[mpos] is not None else NODATA, ) except error: raise ShapefileException( f"Failed to write measure values for record {i}. Expected floats" ) + return bytes_ + class _HasZ(_CanHaveBBox): # Not a Point @@ -1228,16 +1199,17 @@ class _HasZ(_CanHaveBBox): ) z: Sequence[float] - def _set_zs_from_shp_file(self, f, nPoints): - __zmin, __zmax = unpack("<2d", f.read(16)) # pylint: disable=unused-private-member - self.z = _Array[float]("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))) + def _set_zs_from_byte_stream(self, b_io, nPoints): + __zmin, __zmax = unpack("<2d", b_io.read(16)) # pylint: disable=unused-private-member + self.z = _Array[float]("d", unpack(f"<{nPoints}d", b_io.read(nPoints * 8))) @staticmethod - def _try_write_zs_to_shp_file(f, s, i, zbox): + def zs_to_bytes(s, i, zbox): # Write z extremes and values # Note: missing z values are autoset to 0, but not sure if this is ideal. + bytes_ = b"" try: - f.write(pack("<2d", *zbox)) + bytes_ += pack("<2d", *zbox) except error: raise ShapefileException( f"Failed to write elevation extremes for record {i}. Expected floats." @@ -1245,27 +1217,31 @@ def _try_write_zs_to_shp_file(f, s, i, zbox): try: if hasattr(s, "z"): # if z values are stored in attribute - f.write(pack(f"<{len(s.z)}d", *s.z)) + bytes_ += pack(f"<{len(s.z)}d", *s.z) else: # if z values are stored as 3rd dimension for p in s.points: - f.write(pack(" 2 else 0)) + bytes_ += pack(" 2 else 0) except error: raise ShapefileException( f"Failed to write elevation values for record {i}. Expected floats." ) + return bytes_ + class MultiPatch(_HasM, _HasZ, _CanHaveParts): shapeType = MULTIPATCH - def _set_part_types_from_shp_file(self, f, nParts): - self.partTypes = _Array[int]("i", unpack(f"<{nParts}i", f.read(nParts * 4))) + def _set_part_types_from_byte_stream(self, b_io, nParts): + self.partTypes = _Array[int]("i", unpack(f"<{nParts}i", b_io.read(nParts * 4))) @staticmethod - def _write_part_types_to_shp_file(f, s): + def _part_types_to_bytes(s): + bytes_ = b"" for partType in s.partTypes: - f.write(pack("= 8: - (m,) = unpack("= 8: + (m,) = unpack("2i", self.shpNum, 0)) - start = f.tell() + # f.write(pack(">2i", self.shpNum, 0)) + # start = f.tell() # Shape Type if self.shapeType is None and s.shapeType != NULL: self.shapeType = s.shapeType @@ -2995,11 +3007,10 @@ def __shpRecord(self, s): self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None ) - f.write(pack("i", length)) + + record_bytes = pack("i", length)) - f.seek(finish) + length = len(record_bytes) // 2 + header_bytes = pack(">2i", self.shpNum, length) + f.write(header_bytes + record_bytes) + + # f.seek(finish) return offset, length From 312892d93ea94e6b3af5074aa70ef26ab5f84858 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 22:16:24 +0100 Subject: [PATCH 154/220] Use byte streams instead of bytes as it's faster --- README.md | 4 +- src/shapefile.py | 170 ++++++++++++++++++++--------------------------- 2 files changed, 74 insertions(+), 100 deletions(-) diff --git a/README.md b/README.md index de34f6c4..a0abc6a1 100644 --- a/README.md +++ b/README.md @@ -458,13 +458,13 @@ shapeType Point do not have a bounding box 'bbox'. ... if not name.startswith('_'): ... name 'bbox' - 'from_bytes' + 'from_byte_stream' 'oid' 'parts' 'points' 'shapeType' 'shapeTypeName' - 'to_bytes' + 'write_to_byte_stream' * `oid`: The shape's index position in the original shapefile. diff --git a/src/shapefile.py b/src/shapefile.py index fc3bf597..b5e6ed4c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -853,13 +853,13 @@ class NullShape(Shape): shapeType = NULL @classmethod - def from_bytes(cls, bytes_, next_shape, oid=None, bbox=None): # pylint: disable=unused-argument + def from_byte_stream(cls, b_io, next_shape, oid=None, bbox=None): # pylint: disable=unused-argument # Shape.__init__ sets self.points = points or [] return cls(oid=oid) @staticmethod - def to_bytes(s, i, bbox, mbox, zbox): # pylint: disable=unused-argument - return b"" + def write_to_byte_stream(b_io, s, i, bbox, mbox, zbox): # pylint: disable=unused-argument + pass class _CanHaveBBox(Shape): @@ -890,9 +890,9 @@ def _set_bbox_from_byte_stream(self, b_io): self.bbox = _Array[float]("d", unpack("<4d", b_io.read(32))) @staticmethod - def _bbox_to_bytes(i, bbox): + def _write_bbox_to_byte_stream(b_io, i, bbox): try: - return pack("<4d", *bbox) + b_io.write(pack("<4d", *bbox)) except error: raise ShapefileException( f"Failed to write bounding box for record {i}. Expected floats." @@ -903,26 +903,23 @@ def _get_npoints_from_byte_stream(b_io): return unpack(" mpos and p[mpos] is not None else NODATA, + b_io.write( + pack( + " mpos and p[mpos] is not None + else NODATA, + ) ) except error: raise ShapefileException( f"Failed to write measure values for record {i}. Expected floats" ) - return bytes_ - class _HasZ(_CanHaveBBox): # Not a Point @@ -1204,12 +1192,11 @@ def _set_zs_from_byte_stream(self, b_io, nPoints): self.z = _Array[float]("d", unpack(f"<{nPoints}d", b_io.read(nPoints * 8))) @staticmethod - def zs_to_bytes(s, i, zbox): + def _write_zs_to_byte_stream(b_io, s, i, zbox): # Write z extremes and values # Note: missing z values are autoset to 0, but not sure if this is ideal. - bytes_ = b"" try: - bytes_ += pack("<2d", *zbox) + b_io.write(pack("<2d", *zbox)) except error: raise ShapefileException( f"Failed to write elevation extremes for record {i}. Expected floats." @@ -1217,18 +1204,16 @@ def zs_to_bytes(s, i, zbox): try: if hasattr(s, "z"): # if z values are stored in attribute - bytes_ += pack(f"<{len(s.z)}d", *s.z) + b_io.write(pack(f"<{len(s.z)}d", *s.z)) else: # if z values are stored as 3rd dimension for p in s.points: - bytes_ += pack(" 2 else 0) + b_io.write(pack(" 2 else 0)) except error: raise ShapefileException( f"Failed to write elevation values for record {i}. Expected floats." ) - return bytes_ - class MultiPatch(_HasM, _HasZ, _CanHaveParts): shapeType = MULTIPATCH @@ -1237,11 +1222,9 @@ def _set_part_types_from_byte_stream(self, b_io, nParts): self.partTypes = _Array[int]("i", unpack(f"<{nParts}i", b_io.read(nParts * 4))) @staticmethod - def _part_types_to_bytes(s): - bytes_ = b"" + def _write_part_types_to_byte_stream(b_io, s): for partType in s.partTypes: - bytes_ += pack("2i", self.shpNum, 0)) - # start = f.tell() + f.write(pack(">2i", self.shpNum, 0)) + start = f.tell() # Shape Type if self.shapeType is None and s.shapeType != NULL: self.shapeType = s.shapeType @@ -3007,10 +2986,11 @@ def __shpRecord(self, s): self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None ) - # f.write(pack("i", length)) - - record_bytes = pack("2i", self.shpNum, length) - f.write(header_bytes + record_bytes) - - # f.seek(finish) + finish = f.tell() + length = (finish - start) // 2 + # start - 4 bytes is the content length field + f.seek(start - 4) + f.write(pack(">i", length)) + + f.seek(finish) return offset, length From 9cfada586b17ccd61a8c5b97ff3018c3c704f449 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 11:00:57 +0100 Subject: [PATCH 155/220] Collect f.write(pack( in for_loops into a single f.write(pack, and keep num bytes written --- src/shapefile.py | 96 ++++++++++++++++++++++++++---------------------- 1 file changed, 52 insertions(+), 44 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index b5e6ed4c..046443ce 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -859,7 +859,7 @@ def from_byte_stream(cls, b_io, next_shape, oid=None, bbox=None): # pylint: dis @staticmethod def write_to_byte_stream(b_io, s, i, bbox, mbox, zbox): # pylint: disable=unused-argument - pass + return 0 class _CanHaveBBox(Shape): @@ -892,7 +892,7 @@ def _set_bbox_from_byte_stream(self, b_io): @staticmethod def _write_bbox_to_byte_stream(b_io, i, bbox): try: - b_io.write(pack("<4d", *bbox)) + return b_io.write(pack("<4d", *bbox)) except error: raise ShapefileException( f"Failed to write bounding box for record {i}. Expected floats." @@ -904,7 +904,7 @@ def _get_npoints_from_byte_stream(b_io): @staticmethod def _write_npoints_to_byte_stream(b_io, s): - b_io.write(pack(" mpos and p[mpos] is not None - else NODATA, - ) - ) + if len(p) > mpos and p[mpos] is not None: + ms.append(p[mpos]) + else: + ms.append(NODATA) + + num_bytes_written += b_io.write(pack(f"<{len(ms)} 2 else 0)) + zs = [p[2] if len(p) > 2 else 0 + for p in s.points + ] + + num_bytes_written += b_io.write(pack(f"<{len(zs)}d", *zs)) except error: raise ShapefileException( f"Failed to write elevation values for record {i}. Expected floats." ) + return num_bytes_written + class MultiPatch(_HasM, _HasZ, _CanHaveParts): shapeType = MULTIPATCH @@ -1223,8 +1230,7 @@ def _set_part_types_from_byte_stream(self, b_io, nParts): @staticmethod def _write_part_types_to_byte_stream(b_io, s): - for partType in s.partTypes: - b_io.write(pack("2i", self.shpNum, 0)) start = f.tell() + n = 0 # Shape Type if self.shapeType is None and s.shapeType != NULL: self.shapeType = s.shapeType @@ -2986,10 +2993,10 @@ def __shpRecord(self, s): self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None ) - f.write(pack(" Date: Wed, 30 Jul 2025 11:09:28 +0100 Subject: [PATCH 156/220] Fix typo in struct format str for ms --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 046443ce..eb4f5d6a 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1167,7 +1167,7 @@ def _write_ms_to_byte_stream(b_io, s, i, mbox): else: ms.append(NODATA) - num_bytes_written += b_io.write(pack(f"<{len(ms)} Date: Wed, 30 Jul 2025 11:21:55 +0100 Subject: [PATCH 157/220] Write to BytesIO buffer, then write that to .shp file once its length is known --- src/shapefile.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index eb4f5d6a..18cf0efa 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2969,8 +2969,9 @@ def __shpRecord(self, s): offset = f.tell() # Record number, Content length place holder self.shpNum += 1 - f.write(pack(">2i", self.shpNum, 0)) - start = f.tell() + + # f.write(pack(">2i", self.shpNum, 0)) + # start = f.tell() n = 0 # Shape Type if self.shapeType is None and s.shapeType != NULL: @@ -2992,12 +2993,14 @@ def __shpRecord(self, s): new_zbox = ( self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None ) + + b_io = io.BytesIO() - n += f.write(pack("i", length)) + # f.seek(start - 4) + # f.write(pack(">i", length)) + f.write(pack(">2i", self.shpNum, length)) + b_io.seek(0) + f.write(b_io.read()) - f.seek(finish) + # f.seek(finish) return offset, length From 402e00f64d3e9a5ec6c274990684a912fcf6465e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 11:24:30 +0100 Subject: [PATCH 158/220] Reformat --- src/shapefile.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 18cf0efa..8b9dcec0 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1002,6 +1002,7 @@ def write_to_byte_stream(b_io, s, i, bbox, mbox, zbox): return n + class _CanHaveParts(_CanHaveBBox): # The parts attribute is initialised by # the base class Shape's __init__, to parts or []. @@ -1099,6 +1100,7 @@ def write_to_byte_stream(b_io, s, i, bbox, mbox, zbox): # pylint: disable=unuse return n + class Polyline(_CanHaveParts): shapeType = POLYLINE @@ -1166,7 +1168,7 @@ def _write_ms_to_byte_stream(b_io, s, i, mbox): ms.append(p[mpos]) else: ms.append(NODATA) - + num_bytes_written += b_io.write(pack(f"<{len(ms)}d", *ms)) except error: @@ -1209,9 +1211,7 @@ def _write_zs_to_byte_stream(b_io, s, i, zbox): zs = s.z else: # if z values are stored as 3rd dimension - zs = [p[2] if len(p) > 2 else 0 - for p in s.points - ] + zs = [p[2] if len(p) > 2 else 0 for p in s.points] num_bytes_written += b_io.write(pack(f"<{len(zs)}d", *zs)) except error: @@ -2969,7 +2969,7 @@ def __shpRecord(self, s): offset = f.tell() # Record number, Content length place holder self.shpNum += 1 - + # f.write(pack(">2i", self.shpNum, 0)) # start = f.tell() n = 0 @@ -2993,7 +2993,7 @@ def __shpRecord(self, s): new_zbox = ( self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None ) - + b_io = io.BytesIO() n += b_io.write(pack(" Date: Wed, 30 Jul 2025 12:26:48 +0100 Subject: [PATCH 159/220] Avoid file seeks. Write shape to in memory buffer first, in Writer.__shape. --- src/shapefile.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 8b9dcec0..c554080c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2967,12 +2967,8 @@ def shape( def __shpRecord(self, s): f = self.__getFileObj(self.shp) offset = f.tell() - # Record number, Content length place holder self.shpNum += 1 - # f.write(pack(">2i", self.shpNum, 0)) - # start = f.tell() - n = 0 # Shape Type if self.shapeType is None and s.shapeType != NULL: self.shapeType = s.shapeType @@ -2994,8 +2990,20 @@ def __shpRecord(self, s): self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None ) + # Create an in-memory binary buffer to avoid + # unnecessary seeks to files on disk + # (other ops are already buffered until .seek + # or .flush is called if not using RawIOBase). + # https://docs.python.org/3/library/io.html#id2 + # https://docs.python.org/3/library/io.html#io.BufferedWriter b_io = io.BytesIO() + # Record number, Content length place holder + b_io.write(pack(">2i", self.shpNum, -1)) + + # Track number of content bytes written. Excluding self.shpNum and length t.b.c. + n = 0 + n += b_io.write(pack("i", length)) - f.write(pack(">2i", self.shpNum, length)) - b_io.seek(0) - f.write(b_io.read()) - # f.seek(finish) + # 4 bytes in is the content length field + b_io.seek(4) + b_io.write(pack(">i", length)) + # Flush to file. + b_io.seek(0) + f.write(b_io.read()) return offset, length def __shxRecord(self, offset, length): From 581edf31d54d6a67be87de1d6e61b91f36060e18 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 12:28:12 +0100 Subject: [PATCH 160/220] Reformat --- src/shapefile.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index c554080c..2efaf302 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3016,7 +3016,6 @@ def __shpRecord(self, s): zbox=new_zbox, ) - # Finalize record length as 16-bit words length = n // 2 From da2df6320db11774c8ddb08508dd8dec8b1b246e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 12:38:38 +0100 Subject: [PATCH 161/220] Read recLength_bytes into in memory buffer in Reader.__shape --- src/shapefile.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 2efaf302..b604f316 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2074,17 +2074,21 @@ def __shape( # Convert from num of 16 bit words, to 8 bit bytes recLength_bytes = 2 * recLength - next_shape = f.tell() + recLength_bytes + # next_shape = f.tell() + recLength_bytes - shapeType = unpack(" Date: Wed, 30 Jul 2025 13:45:11 +0100 Subject: [PATCH 162/220] Replace index look up with enumerate, tuple with set & simplify bool expression --- src/shapefile.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index b604f316..d0ec1776 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -653,10 +653,8 @@ def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: # the geojson spec does not define a proper null-geometry type # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries return {"type": "Point", "coordinates": ()} - # return {"type": "Point", "coordinates": tuple()} #type: ignore return {"type": "Point", "coordinates": self.points[0]} - # return {"type": "Point", "coordinates": tuple(self.points[0])} # type: ignore if self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: if len(self.points) == 0: @@ -669,7 +667,6 @@ def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: return { "type": "MultiPoint", "coordinates": self.points, - # "coordinates": [tuple(p) for p in self.points], #type: ignore } if self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: @@ -684,7 +681,6 @@ def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: return { "type": "LineString", "coordinates": self.points, - # "coordinates": [tuple(p) for p in self.points], #type: ignore } # multilinestring @@ -695,11 +691,9 @@ def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: ps = part continue - # coordinates.append([tuple(p) for p in self.points[ps:part]]) coordinates.append(list(self.points[ps:part])) ps = part - # coordinates.append([tuple(p) for p in self.points[part:]]) # assert len(self.parts) >1 # so disable pylint rule coordinates.append(list(self.points[part:])) # pylint: disable=undefined-loop-variable return {"type": "MultiLineString", "coordinates": coordinates} @@ -713,16 +707,14 @@ def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: # get all polygon rings rings = [] - for i in range(len(self.parts)): + for i, start in enumerate(self.parts): # get indexes of start and end points of the ring - start = self.parts[i] try: end = self.parts[i + 1] except IndexError: end = len(self.points) # extract the points that make up the ring - # ring = [tuple(p) for p in self.points[start:end]] ring = list(self.points[start:end]) rings.append(ring) @@ -2076,7 +2068,8 @@ def __shape( # next_shape = f.tell() + recLength_bytes - # Read entire record into memory + # Read entire record into memory to avoid having to call + # seek on the file afterwards b_io = io.BytesIO(f.read(recLength_bytes)) b_io.seek(0) @@ -2363,7 +2356,7 @@ def __record( # parse each value record = [] for (__name, typ, __size, deci), value in zip(fieldTuples, recordContents): - if typ in ("N", "F"): + if typ in {"N", "F"}: # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b"\0")[0] value = value.replace(b"*", b"") # QGIS NULL is all '*' chars @@ -2687,9 +2680,7 @@ def close(self): # Flush files for attribute in (self.shp, self.shx, self.dbf): - if hasattr(attribute, "flush") and not ( - hasattr(attribute, "closed") and attribute.closed - ): + if hasattr(attribute, "flush") and not getattr(attribute, "closed", False): try: attribute.flush() except OSError: From dfe6ae073fb1f34d3120e6efdffd52874fe4e704 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 18:09:36 +0100 Subject: [PATCH 163/220] Add in dependency on typing-extensions for Python 3.9 backports of TypeIs and NotRequired --- pyproject.toml | 3 + src/shapefile.py | 225 ++++++++++++++++++++++++++++++++++------------- 2 files changed, 168 insertions(+), 60 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d3e0e894..ca8f667c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,9 @@ classifiers = [ "Topic :: Software Development :: Libraries", "Topic :: Software Development :: Libraries :: Python Modules", ] +dependencies = [ + "typing_extensions", +] [project.optional-dependencies] test = ["pytest"] diff --git a/src/shapefile.py b/src/shapefile.py index d0ec1776..bb159a26 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -43,6 +43,8 @@ from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen +from typing_extensions import NotRequired, TypeIs + # Create named logger logger = logging.getLogger(__name__) @@ -120,15 +122,27 @@ PointsT = list[PointT] BBox = tuple[float, float, float, float] +MBox = tuple[float, float] +ZBox = tuple[float, float] + + +class BinaryWritableSeekable(Protocol): + def write(self, bbox: bytes): ... + def seek(self, offset: int, whence: int = 0): ... # pylint: disable=unused-argument + def tell(self): ... -class BinaryWritable(Protocol): - def write(self, data: bytes): ... +class BinaryReadableSeekable(Protocol): + def seek(self, offset: int, whence: int = 0): ... # pylint: disable=unused-argument + def tell(self): ... + def read(self, size: int = -1): ... -class BinaryWritableSeekable(BinaryWritable): - def seek(self, i: int): ... # pylint: disable=unused-argument +class BinaryReadableWritableSeekable(Protocol): + def write(self, bbox: bytes): ... + def seek(self, offset: int, whence: int = 0): ... # pylint: disable=unused-argument def tell(self): ... + def read(self, size: int = -1): ... # File name, file object or anything with a read() method that returns bytes. @@ -227,13 +241,13 @@ class GeoJSONFeatureCollection(TypedDict): features: list[GeoJSONFeature] -class GeoJSONFeatureCollectionWithBBox(GeoJSONFeatureCollection, total=False): +class GeoJSONFeatureCollectionWithBBox(GeoJSONFeatureCollection): # bbox is optional # typing.NotRequired requires Python 3.11 # and we must support 3.9 (at least until October) # https://docs.python.org/3/library/typing.html#typing.Required # Is there a backport? - bbox: list[float] + bbox: NotRequired[list[float]] # Helpers @@ -603,6 +617,24 @@ class _NoShapeTypeSentinel: class Shape: shapeType: int = NULL + _shapeTypes = frozenset( + [ + NULL, + POINT, + POINTM, + POINTZ, + POLYLINE, + POLYLINEM, + POLYLINEZ, + POLYGON, + POLYGONM, + POLYGONZ, + MULTIPOINT, + MULTIPOINTM, + MULTIPOINTZ, + MULTIPATCH, + ] + ) def __init__( self, @@ -838,19 +870,40 @@ def __repr__(self): return f"Shape #{self.__oid}: {self.shapeTypeName}" +S = TypeVar("S", bound=Shape) + + +def compatible_with(s: Shape, cls: type[S]) -> TypeIs[S]: + return s.shapeType in cls._shapeTypes + + class NullShape(Shape): # Shape.shapeType = NULL already, # to preserve handling of default args in Shape.__init__ # Repeated for clarity. shapeType = NULL + _shapeTypes = frozenset([NULL]) @classmethod - def from_byte_stream(cls, b_io, next_shape, oid=None, bbox=None): # pylint: disable=unused-argument + def from_byte_stream( + cls, + b_io: BinaryReadableSeekable, + next_shape: int, + oid: Optional[int] = None, + bbox: Optional[BBox] = None, + ) -> NullShape: # pylint: disable=unused-argument # Shape.__init__ sets self.points = points or [] return cls(oid=oid) @staticmethod - def write_to_byte_stream(b_io, s, i, bbox, mbox, zbox): # pylint: disable=unused-argument + def write_to_byte_stream( + b_io: BinaryWritableSeekable, + s: Shape, + i: int, + bbox: Optional[BBox], + mbox: Optional[MBox], + zbox: Optional[ZBox], + ) -> int: # pylint: disable=unused-argument return 0 @@ -876,13 +929,21 @@ class _CanHaveBBox(Shape): ) # Not a BBox because the legacy implementation was a list, not a 4-tuple. - bbox: Optional[Sequence[float]] = None + # bbox: Optional[Sequence[float]] = None + bbox: Optional[BBox] = None - def _set_bbox_from_byte_stream(self, b_io): - self.bbox = _Array[float]("d", unpack("<4d", b_io.read(32))) + def _get_set_bbox_from_byte_stream(self, b_io: BinaryReadableSeekable) -> BBox: + self.bbox: BBox = tuple(_Array[float]("d", unpack("<4d", b_io.read(32)))) + return self.bbox @staticmethod - def _write_bbox_to_byte_stream(b_io, i, bbox): + def _write_bbox_to_byte_stream( + b_io: BinaryWritableSeekable, i: int, bbox: Optional[BBox] + ) -> int: + if not bbox or len(bbox) != 4: + raise ShapefileException( + f"Four numbers required. Got: {bbox=}" + ) try: return b_io.write(pack("<4d", *bbox)) except error: @@ -891,20 +952,24 @@ def _write_bbox_to_byte_stream(b_io, i, bbox): ) @staticmethod - def _get_npoints_from_byte_stream(b_io): + def _get_npoints_from_byte_stream(b_io: BinaryReadableSeekable) -> int: return unpack(" int: return b_io.write(pack(" int: + x_ys: list[float] = [] for point in s.points: x_ys.extend(point[:2]) try: @@ -916,31 +981,41 @@ def _write_points_to_byte_stream(b_io, s, i): # pylint: disable=unused-argument @staticmethod - def _get_nparts_from_byte_stream(b_io): - return None + def _get_nparts_from_byte_stream(b_io: BinaryReadableSeekable) -> int: + return 0 - def _set_parts_from_byte_stream(self, b_io, nParts): + def _set_parts_from_byte_stream(self, b_io: BinaryReadableSeekable, nParts: int): pass - def _set_part_types_from_byte_stream(self, b_io, nParts): + def _set_part_types_from_byte_stream( + self, b_io: BinaryReadableSeekable, nParts: int + ): pass - def _set_zs_from_byte_stream(self, b_io, nPoints): + def _set_zs_from_byte_stream(self, b_io: BinaryReadableSeekable, nPoints: int): pass - def _set_ms_from_byte_stream(self, b_io, nPoints, next_shape): + def _set_ms_from_byte_stream( + self, b_io: BinaryReadableSeekable, nPoints: int, next_shape: int + ): pass # pylint: enable=unused-argument @classmethod - def from_byte_stream(cls, b_io, next_shape, oid=None, bbox=None): + def from_byte_stream( + cls, + b_io: BinaryReadableSeekable, + next_shape: int, + oid: Optional[int] = None, + bbox: Optional[BBox] = None, + ) -> Optional[_CanHaveBBox]: # pylint: disable=unused-argument shape = cls(oid=oid) - shape._set_bbox_from_byte_stream(b_io) # pylint: disable=assignment-from-none + shape_bbox = shape._get_set_bbox_from_byte_stream(b_io) # pylint: disable=assignment-from-none # if bbox specified and no overlap, skip this shape - if bbox is not None and not bbox_overlap(bbox, tuple(shape.bbox)): # pylint: disable=no-member + if bbox is not None and not bbox_overlap(bbox, shape_bbox): # pylint: disable=no-member #type: ignore [index] # because we stop parsing this shape, caller must skip to beginning of # next shape after we return (as done in f.seek(next_shape)) return None @@ -963,33 +1038,43 @@ def from_byte_stream(cls, b_io, next_shape, oid=None, bbox=None): return shape @staticmethod - def write_to_byte_stream(b_io, s, i, bbox, mbox, zbox): + def write_to_byte_stream( + b_io: BinaryWritableSeekable, + s: Shape, + i: int, + bbox: Optional[BBox], + mbox: Optional[MBox], + zbox: Optional[ZBox], + ) -> int: # We use static methods here and below, # to support s only being an instance of a the # Shape base class (with shapeType set) # i.e. not necessarily one of our newer shape specific # sub classes. - n = _CanHaveBBox._write_bbox_to_byte_stream(b_io, i, bbox) + n = 0 + + if compatible_with(s, _CanHaveBBox): + n += _CanHaveBBox._write_bbox_to_byte_stream(b_io, i, bbox) - if s.shapeType in _CanHaveParts._shapeTypes: + if compatible_with(s, _CanHaveParts): n += _CanHaveParts._write_nparts_to_byte_stream(b_io, s) # Shape types with multiple points per record - if s.shapeType in _CanHaveBBox._shapeTypes: + if compatible_with(s, _CanHaveBBox): n += _CanHaveBBox._write_npoints_to_byte_stream(b_io, s) # Write part indexes. Includes MultiPatch - if s.shapeType in _CanHaveParts._shapeTypes: + if compatible_with(s, _CanHaveParts): n += _CanHaveParts._write_part_indices_to_byte_stream(b_io, s) - if s.shapeType == MULTIPATCH: + if compatible_with(s, MultiPatch): n += MultiPatch._write_part_types_to_byte_stream(b_io, s) # Write points for multiple-point records - if s.shapeType in _CanHaveBBox._shapeTypes: + if compatible_with(s, _CanHaveBBox): n += _CanHaveBBox._write_points_to_byte_stream(b_io, s, i) - if s.shapeType in _HasZ._shapeTypes: + if compatible_with(s, _HasZ): n += _HasZ._write_zs_to_byte_stream(b_io, s, i, zbox) - if s.shapeType in _HasM._shapeTypes: + if compatible_with(s, _HasM): n += _HasM._write_ms_to_byte_stream(b_io, s, i, mbox) return n @@ -1012,18 +1097,20 @@ class _CanHaveParts(_CanHaveBBox): ) @staticmethod - def _get_nparts_from_byte_stream(b_io): + def _get_nparts_from_byte_stream(b_io: BinaryReadableSeekable) -> int: return unpack(" int: return b_io.write(pack(f"<{len(s.parts)}i", *s.parts)) @@ -1057,7 +1144,13 @@ def _write_x_y_to_byte_stream(b_io, x, y, i): ) @classmethod - def from_byte_stream(cls, b_io, next_shape, oid=None, bbox=None): + def from_byte_stream( + cls, + b_io: BinaryReadableSeekable, + next_shape: int, + oid: Optional[int] = None, + bbox: Optional[BBox] = None, + ): # pylint: disable=unused-argument shape = cls(oid=oid) x, y = cls._x_y_from_byte_stream(b_io) @@ -1077,7 +1170,14 @@ def from_byte_stream(cls, b_io, next_shape, oid=None, bbox=None): return shape @staticmethod - def write_to_byte_stream(b_io, s, i, bbox, mbox, zbox): # pylint: disable=unused-argument + def write_to_byte_stream( + b_io: BinaryWritableSeekable, + s: Shape, + i: int, + bbox: Optional[BBox], + mbox: Optional[MBox], + zbox: Optional[ZBox], + ) -> int: # pylint: disable=unused-argument # Serialize a single point x, y = s.points[0][0], s.points[0][1] n = Point._write_x_y_to_byte_stream(b_io, x, y, i) @@ -1216,6 +1316,7 @@ def _write_zs_to_byte_stream(b_io, s, i, zbox): class MultiPatch(_HasM, _HasZ, _CanHaveParts): shapeType = MULTIPATCH + _shapeTypes = frozenset([MULTIPATCH]) def _set_part_types_from_byte_stream(self, b_io, nParts): self.partTypes = _Array[int]("i", unpack(f"<{nParts}i", b_io.read(nParts * 4))) @@ -1227,6 +1328,8 @@ def _write_part_types_to_byte_stream(b_io, s): class PointM(Point): shapeType = POINTM + _shapeTypes = frozenset([POINTM, POINTZ]) + # same default as in Writer.__shpRecord (if s.shapeType in (11, 21):) # PyShp encodes None m values as NODATA m = (None,) @@ -1297,6 +1400,8 @@ class MultiPointM(MultiPoint, _HasM): class PointZ(PointM): shapeType = POINTZ + _shapeTypes = frozenset([POINTZ]) + # same default as in Writer.__shpRecord (if s.shapeType == 11:) z: Sequence[float] = (0.0,) @@ -2070,7 +2175,7 @@ def __shape( # Read entire record into memory to avoid having to call # seek on the file afterwards - b_io = io.BytesIO(f.read(recLength_bytes)) + b_io: BinaryReadableSeekable = io.BytesIO(f.read(recLength_bytes)) b_io.seek(0) shapeType = unpack(" IO[bytes]: ... + def __getFileObj(self, f: str) -> BinaryWritableSeekable: ... @overload def __getFileObj(self, f: None) -> NoReturn: ... @overload - def __getFileObj(self, f: W) -> W: ... + def __getFileObj(self, f: BinaryWritableSeekable) -> BinaryWritableSeekable: ... def __getFileObj(self, f): """Safety handler to verify file-like objects""" if not f: @@ -2762,8 +2867,8 @@ def __bbox(self, s): self._bbox = bbox return bbox - def __zbox(self, s): - z = [] + def __zbox(self, s) -> ZBox: + z: list[float] = [] if self._zbox: z.extend(self._zbox) @@ -2777,12 +2882,12 @@ def __zbox(self, s): # Original self._zbox bounds (if any) are the first two entries. # Set zbox for the first, and all later times - self._zbox = [min(z), max(z)] + self._zbox = (min(z), max(z)) return self._zbox - def __mbox(self, s): + def __mbox(self, s) -> MBox: mpos = 3 if s.shapeType in _HasZ._shapeTypes else 2 - m = [] + m: list[float] = [] if self._mbox: m.extend(self._mbox) @@ -2801,7 +2906,7 @@ def __mbox(self, s): # Original self._mbox bounds (if any) are the first two entries. # Set mbox for the first, and all later times - self._mbox = [min(m), max(m)] + self._mbox = (min(m), max(m)) return self._mbox @property @@ -2959,8 +3064,8 @@ def shape( if self.shx: self.__shxRecord(offset, length) - def __shpRecord(self, s): - f = self.__getFileObj(self.shp) + def __shpRecord(self, s: Shape) -> tuple[int, int]: + f: BinaryWritableSeekable = self.__getFileObj(self.shp) offset = f.tell() self.shpNum += 1 @@ -2991,7 +3096,7 @@ def __shpRecord(self, s): # or .flush is called if not using RawIOBase). # https://docs.python.org/3/library/io.html#id2 # https://docs.python.org/3/library/io.html#io.BufferedWriter - b_io = io.BytesIO() + b_io: BinaryReadableWritableSeekable = io.BytesIO() # Record number, Content length place holder b_io.write(pack(">2i", self.shpNum, -1)) From ebc859df52473c8b23557f107833e85a7c337eb4 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 18:42:29 +0100 Subject: [PATCH 164/220] Satisfy Pylint --- src/shapefile.py | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index bb159a26..5ead7145 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -877,6 +877,9 @@ def compatible_with(s: Shape, cls: type[S]) -> TypeIs[S]: return s.shapeType in cls._shapeTypes +# pylint: disable=unused-argument +# Need unused arguments to keep the same call signature for +# different implementations of from_byte_stream and write_to_byte_stream class NullShape(Shape): # Shape.shapeType = NULL already, # to preserve handling of default args in Shape.__init__ @@ -891,7 +894,7 @@ def from_byte_stream( next_shape: int, oid: Optional[int] = None, bbox: Optional[BBox] = None, - ) -> NullShape: # pylint: disable=unused-argument + ) -> NullShape: # Shape.__init__ sets self.points = points or [] return cls(oid=oid) @@ -903,7 +906,7 @@ def write_to_byte_stream( bbox: Optional[BBox], mbox: Optional[MBox], zbox: Optional[ZBox], - ) -> int: # pylint: disable=unused-argument + ) -> int: return 0 @@ -928,8 +931,6 @@ class _CanHaveBBox(Shape): ] ) - # Not a BBox because the legacy implementation was a list, not a 4-tuple. - # bbox: Optional[Sequence[float]] = None bbox: Optional[BBox] = None def _get_set_bbox_from_byte_stream(self, b_io: BinaryReadableSeekable) -> BBox: @@ -941,9 +942,7 @@ def _write_bbox_to_byte_stream( b_io: BinaryWritableSeekable, i: int, bbox: Optional[BBox] ) -> int: if not bbox or len(bbox) != 4: - raise ShapefileException( - f"Four numbers required. Got: {bbox=}" - ) + raise ShapefileException(f"Four numbers required. Got: {bbox=}") try: return b_io.write(pack("<4d", *bbox)) except error: @@ -979,7 +978,6 @@ def _write_points_to_byte_stream( f"Failed to write points for record {i}. Expected floats." ) - # pylint: disable=unused-argument @staticmethod def _get_nparts_from_byte_stream(b_io: BinaryReadableSeekable) -> int: return 0 @@ -1000,8 +998,6 @@ def _set_ms_from_byte_stream( ): pass - # pylint: enable=unused-argument - @classmethod def from_byte_stream( cls, @@ -1009,13 +1005,13 @@ def from_byte_stream( next_shape: int, oid: Optional[int] = None, bbox: Optional[BBox] = None, - ) -> Optional[_CanHaveBBox]: # pylint: disable=unused-argument + ) -> Optional[_CanHaveBBox]: shape = cls(oid=oid) - shape_bbox = shape._get_set_bbox_from_byte_stream(b_io) # pylint: disable=assignment-from-none + shape_bbox = shape._get_set_bbox_from_byte_stream(b_io) # if bbox specified and no overlap, skip this shape - if bbox is not None and not bbox_overlap(bbox, shape_bbox): # pylint: disable=no-member #type: ignore [index] + if bbox is not None and not bbox_overlap(bbox, shape_bbox): # because we stop parsing this shape, caller must skip to beginning of # next shape after we return (as done in f.seek(next_shape)) return None @@ -1150,7 +1146,7 @@ def from_byte_stream( next_shape: int, oid: Optional[int] = None, bbox: Optional[BBox] = None, - ): # pylint: disable=unused-argument + ): shape = cls(oid=oid) x, y = cls._x_y_from_byte_stream(b_io) @@ -1177,7 +1173,7 @@ def write_to_byte_stream( bbox: Optional[BBox], mbox: Optional[MBox], zbox: Optional[ZBox], - ) -> int: # pylint: disable=unused-argument + ) -> int: # Serialize a single point x, y = s.points[0][0], s.points[0][1] n = Point._write_x_y_to_byte_stream(b_io, x, y, i) @@ -1193,6 +1189,9 @@ def write_to_byte_stream( return n +# pylint: enable=unused-argument + + class Polyline(_CanHaveParts): shapeType = POLYLINE @@ -1725,6 +1724,7 @@ def _assert_ext_is_supported(self, ext: str): assert ext in self.CONSTITUENT_FILE_EXTS def __init__( + # pylint: disable=unused-argument self, shapefile_path: Union[str, os.PathLike] = "", /, @@ -1734,7 +1734,9 @@ def __init__( shp: Union[_NoShpSentinel, Optional[BinaryFileT]] = _NoShpSentinel(), shx: Optional[BinaryFileT] = None, dbf: Optional[BinaryFileT] = None, - **kwargs, # pylint: disable=unused-argument + # Keep kwargs even though unused, to preserve PyShp 2.4 API + **kwargs, + # pylint: enable=unused-argument ): self.shp = None self.shx = None @@ -2677,6 +2679,7 @@ class Writer: W = TypeVar("W", bound=BinaryWritableSeekable) + # pylint: disable=unused-argument def __init__( self, target: Union[str, os.PathLike, None] = None, @@ -2688,7 +2691,9 @@ def __init__( shp: Optional[BinaryWritableSeekable] = None, shx: Optional[BinaryWritableSeekable] = None, dbf: Optional[BinaryWritableSeekable] = None, - **kwargs, # pylint: disable=unused-argument + # Keep kwargs even though unused, to preserve PyShp 2.4 API + **kwargs, + # pylint: enable=unused-argument ): self.target = target self.autoBalance = autoBalance From 174f589d7963e401285a27fbf7d4fab8abde079d Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 18:57:06 +0100 Subject: [PATCH 165/220] Rename Binary Stream Protocols --- src/shapefile.py | 80 ++++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 5ead7145..7597d224 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -126,20 +126,20 @@ ZBox = tuple[float, float] -class BinaryWritableSeekable(Protocol): - def write(self, bbox: bytes): ... +class WriteSeekableBinStream(Protocol): + def write(self, b: bytes): ... # pylint: disable=redefined-outer-name def seek(self, offset: int, whence: int = 0): ... # pylint: disable=unused-argument def tell(self): ... -class BinaryReadableSeekable(Protocol): +class ReadSeekableBinStream(Protocol): def seek(self, offset: int, whence: int = 0): ... # pylint: disable=unused-argument def tell(self): ... def read(self, size: int = -1): ... -class BinaryReadableWritableSeekable(Protocol): - def write(self, bbox: bytes): ... +class ReadWriteSeekableBinStream(Protocol): + def write(self, b: bytes): ... # pylint: disable=redefined-outer-name def seek(self, offset: int, whence: int = 0): ... # pylint: disable=unused-argument def tell(self): ... def read(self, size: int = -1): ... @@ -147,7 +147,7 @@ def read(self, size: int = -1): ... # File name, file object or anything with a read() method that returns bytes. BinaryFileT = Union[str, IO[bytes]] -BinaryFileStreamT = Union[IO[bytes], io.BytesIO, BinaryWritableSeekable] +BinaryFileStreamT = Union[IO[bytes], io.BytesIO, WriteSeekableBinStream] FieldTuple = tuple[str, str, int, int] RecordValue = Union[ @@ -890,7 +890,7 @@ class NullShape(Shape): @classmethod def from_byte_stream( cls, - b_io: BinaryReadableSeekable, + b_io: ReadSeekableBinStream, next_shape: int, oid: Optional[int] = None, bbox: Optional[BBox] = None, @@ -900,7 +900,7 @@ def from_byte_stream( @staticmethod def write_to_byte_stream( - b_io: BinaryWritableSeekable, + b_io: WriteSeekableBinStream, s: Shape, i: int, bbox: Optional[BBox], @@ -933,13 +933,13 @@ class _CanHaveBBox(Shape): bbox: Optional[BBox] = None - def _get_set_bbox_from_byte_stream(self, b_io: BinaryReadableSeekable) -> BBox: + def _get_set_bbox_from_byte_stream(self, b_io: ReadSeekableBinStream) -> BBox: self.bbox: BBox = tuple(_Array[float]("d", unpack("<4d", b_io.read(32)))) return self.bbox @staticmethod def _write_bbox_to_byte_stream( - b_io: BinaryWritableSeekable, i: int, bbox: Optional[BBox] + b_io: WriteSeekableBinStream, i: int, bbox: Optional[BBox] ) -> int: if not bbox or len(bbox) != 4: raise ShapefileException(f"Four numbers required. Got: {bbox=}") @@ -951,22 +951,22 @@ def _write_bbox_to_byte_stream( ) @staticmethod - def _get_npoints_from_byte_stream(b_io: BinaryReadableSeekable) -> int: + def _get_npoints_from_byte_stream(b_io: ReadSeekableBinStream) -> int: return unpack(" int: return b_io.write(pack(" int: x_ys: list[float] = [] for point in s.points: @@ -979,29 +979,29 @@ def _write_points_to_byte_stream( ) @staticmethod - def _get_nparts_from_byte_stream(b_io: BinaryReadableSeekable) -> int: + def _get_nparts_from_byte_stream(b_io: ReadSeekableBinStream) -> int: return 0 - def _set_parts_from_byte_stream(self, b_io: BinaryReadableSeekable, nParts: int): + def _set_parts_from_byte_stream(self, b_io: ReadSeekableBinStream, nParts: int): pass def _set_part_types_from_byte_stream( - self, b_io: BinaryReadableSeekable, nParts: int + self, b_io: ReadSeekableBinStream, nParts: int ): pass - def _set_zs_from_byte_stream(self, b_io: BinaryReadableSeekable, nPoints: int): + def _set_zs_from_byte_stream(self, b_io: ReadSeekableBinStream, nPoints: int): pass def _set_ms_from_byte_stream( - self, b_io: BinaryReadableSeekable, nPoints: int, next_shape: int + self, b_io: ReadSeekableBinStream, nPoints: int, next_shape: int ): pass @classmethod def from_byte_stream( cls, - b_io: BinaryReadableSeekable, + b_io: ReadSeekableBinStream, next_shape: int, oid: Optional[int] = None, bbox: Optional[BBox] = None, @@ -1035,7 +1035,7 @@ def from_byte_stream( @staticmethod def write_to_byte_stream( - b_io: BinaryWritableSeekable, + b_io: WriteSeekableBinStream, s: Shape, i: int, bbox: Optional[BBox], @@ -1093,19 +1093,19 @@ class _CanHaveParts(_CanHaveBBox): ) @staticmethod - def _get_nparts_from_byte_stream(b_io: BinaryReadableSeekable) -> int: + def _get_nparts_from_byte_stream(b_io: ReadSeekableBinStream) -> int: return unpack(" int: return b_io.write(pack(" int: return b_io.write(pack(f"<{len(s.parts)}i", *s.parts)) @@ -1142,7 +1142,7 @@ def _write_x_y_to_byte_stream(b_io, x, y, i): @classmethod def from_byte_stream( cls, - b_io: BinaryReadableSeekable, + b_io: ReadSeekableBinStream, next_shape: int, oid: Optional[int] = None, bbox: Optional[BBox] = None, @@ -1167,7 +1167,7 @@ def from_byte_stream( @staticmethod def write_to_byte_stream( - b_io: BinaryWritableSeekable, + b_io: WriteSeekableBinStream, s: Shape, i: int, bbox: Optional[BBox], @@ -2177,7 +2177,7 @@ def __shape( # Read entire record into memory to avoid having to call # seek on the file afterwards - b_io: BinaryReadableSeekable = io.BytesIO(f.read(recLength_bytes)) + b_io: ReadSeekableBinStream = io.BytesIO(f.read(recLength_bytes)) b_io.seek(0) shapeType = unpack(" BinaryWritableSeekable: ... + def __getFileObj(self, f: str) -> WriteSeekableBinStream: ... @overload def __getFileObj(self, f: None) -> NoReturn: ... @overload - def __getFileObj(self, f: BinaryWritableSeekable) -> BinaryWritableSeekable: ... + def __getFileObj(self, f: WriteSeekableBinStream) -> WriteSeekableBinStream: ... def __getFileObj(self, f): """Safety handler to verify file-like objects""" if not f: @@ -2934,7 +2934,7 @@ def mbox(self): def __shapefileHeader( self, - fileObj: Optional[BinaryWritableSeekable], + fileObj: Optional[WriteSeekableBinStream], headerType: str = "shp", ): """Writes the specified header type to the specified file-like object. @@ -3070,7 +3070,7 @@ def shape( self.__shxRecord(offset, length) def __shpRecord(self, s: Shape) -> tuple[int, int]: - f: BinaryWritableSeekable = self.__getFileObj(self.shp) + f: WriteSeekableBinStream = self.__getFileObj(self.shp) offset = f.tell() self.shpNum += 1 @@ -3101,7 +3101,7 @@ def __shpRecord(self, s: Shape) -> tuple[int, int]: # or .flush is called if not using RawIOBase). # https://docs.python.org/3/library/io.html#id2 # https://docs.python.org/3/library/io.html#io.BufferedWriter - b_io: BinaryReadableWritableSeekable = io.BytesIO() + b_io: ReadWriteSeekableBinStream = io.BytesIO() # Record number, Content length place holder b_io.write(pack(">2i", self.shpNum, -1)) From 28e2e59c202b980f836cda152295138f6a31018e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 19:09:47 +0100 Subject: [PATCH 166/220] Replace NoReturn with Never --- src/shapefile.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 7597d224..ce59bfb2 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -29,7 +29,6 @@ Iterable, Iterator, Literal, - NoReturn, Optional, Protocol, Reversible, @@ -43,7 +42,7 @@ from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen -from typing_extensions import NotRequired, TypeIs +from typing_extensions import Never, NotRequired, TypeIs, # Create named logger logger = logging.getLogger(__name__) @@ -2810,7 +2809,7 @@ def close(self): @overload def __getFileObj(self, f: str) -> WriteSeekableBinStream: ... @overload - def __getFileObj(self, f: None) -> NoReturn: ... + def __getFileObj(self, f: None) -> Never: ... @overload def __getFileObj(self, f: WriteSeekableBinStream) -> WriteSeekableBinStream: ... def __getFileObj(self, f): From 3deb12c658752190ed7a1bbf767952faffc94e55 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 19:37:51 +0100 Subject: [PATCH 167/220] Type hint rest of shape subclass methods --- src/shapefile.py | 84 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index ce59bfb2..ac85b94a 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -42,7 +42,7 @@ from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen -from typing_extensions import Never, NotRequired, TypeIs, +from typing_extensions import Never, NotRequired, Self, TypeIs # Create named logger logger = logging.getLogger(__name__) @@ -893,7 +893,7 @@ def from_byte_stream( next_shape: int, oid: Optional[int] = None, bbox: Optional[BBox] = None, - ) -> NullShape: + ) -> Self: # Shape.__init__ sets self.points = points or [] return cls(oid=oid) @@ -941,7 +941,7 @@ def _write_bbox_to_byte_stream( b_io: WriteSeekableBinStream, i: int, bbox: Optional[BBox] ) -> int: if not bbox or len(bbox) != 4: - raise ShapefileException(f"Four numbers required. Got: {bbox=}") + raise ShapefileException(f"Four numbers required for bbox. Got: {bbox}") try: return b_io.write(pack("<4d", *bbox)) except error: @@ -1004,7 +1004,7 @@ def from_byte_stream( next_shape: int, oid: Optional[int] = None, bbox: Optional[BBox] = None, - ) -> Optional[_CanHaveBBox]: + ) -> Optional[Self]: shape = cls(oid=oid) shape_bbox = shape._get_set_bbox_from_byte_stream(b_io) @@ -1116,21 +1116,25 @@ class Point(Shape): shapeType = POINT _shapeTypes = frozenset([POINT, POINTM, POINTZ]) - def _set_single_point_z_from_byte_stream(self, b_io): + def _set_single_point_z_from_byte_stream(self, b_io: ReadSeekableBinStream): pass - def _set_single_point_m_from_byte_stream(self, b_io, next_shape): + def _set_single_point_m_from_byte_stream( + self, b_io: ReadSeekableBinStream, next_shape: int + ): pass @staticmethod - def _x_y_from_byte_stream(b_io): + def _x_y_from_byte_stream(b_io: ReadSeekableBinStream): # Unpack _Array too x, y = _Array[float]("d", unpack("<2d", b_io.read(16))) # Convert to tuple return x, y @staticmethod - def _write_x_y_to_byte_stream(b_io, x, y, i): + def _write_x_y_to_byte_stream( + b_io: WriteSeekableBinStream, x: float, y: float, i: int + ) -> int: try: return b_io.write(pack("<2d", x, y)) except error: @@ -1145,7 +1149,7 @@ def from_byte_stream( next_shape: int, oid: Optional[int] = None, bbox: Optional[BBox] = None, - ): + ) -> Optional[Self]: shape = cls(oid=oid) x, y = cls._x_y_from_byte_stream(b_io) @@ -1193,14 +1197,17 @@ def write_to_byte_stream( class Polyline(_CanHaveParts): shapeType = POLYLINE + _shapeTypes = frozenset([POLYLINE, POLYLINEM, POLYLINEZ]) class Polygon(_CanHaveParts): shapeType = POLYGON + _shapeTypes = frozenset([POLYGON, POLYGONM, POLYGONZ]) class MultiPoint(_CanHaveBBox): shapeType = MULTIPOINT + _shapeTypes = frozenset([MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]) class _HasM(_CanHaveBBox): @@ -1218,7 +1225,9 @@ class _HasM(_CanHaveBBox): ) m: Sequence[Optional[float]] - def _set_ms_from_byte_stream(self, b_io, nPoints, next_shape): + def _set_ms_from_byte_stream( + self, b_io: ReadSeekableBinStream, nPoints: int, next_shape: int + ): if next_shape - b_io.tell() >= 16: __mmin, __mmax = unpack("<2d", b_io.read(16)) # Measure values less than -10e38 are nodata values according to the spec @@ -1233,7 +1242,11 @@ def _set_ms_from_byte_stream(self, b_io, nPoints, next_shape): self.m = [None for _ in range(nPoints)] @staticmethod - def _write_ms_to_byte_stream(b_io, s, i, mbox): + def _write_ms_to_byte_stream( + b_io: WriteSeekableBinStream, s: Shape, i: int, mbox: Optional[MBox] + ) -> int: + if not mbox or len(mbox) != 2: + raise ShapefileException(f"Two numbers required for mbox. Got: {mbox}") # Write m extremes and values # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA # Note: missing m values are autoset to NODATA. @@ -1281,12 +1294,17 @@ class _HasZ(_CanHaveBBox): ) z: Sequence[float] - def _set_zs_from_byte_stream(self, b_io, nPoints): + def _set_zs_from_byte_stream(self, b_io: ReadSeekableBinStream, nPoints: int): __zmin, __zmax = unpack("<2d", b_io.read(16)) # pylint: disable=unused-private-member self.z = _Array[float]("d", unpack(f"<{nPoints}d", b_io.read(nPoints * 8))) @staticmethod - def _write_zs_to_byte_stream(b_io, s, i, zbox): + def _write_zs_to_byte_stream( + b_io: WriteSeekableBinStream, s: Shape, i: int, zbox: Optional[ZBox] + ) -> int: + if not zbox or len(zbox) != 2: + raise ShapefileException(f"Two numbers required for zbox. Got: {zbox}") + # Write z extremes and values # Note: missing z values are autoset to 0, but not sure if this is ideal. try: @@ -1316,11 +1334,13 @@ class MultiPatch(_HasM, _HasZ, _CanHaveParts): shapeType = MULTIPATCH _shapeTypes = frozenset([MULTIPATCH]) - def _set_part_types_from_byte_stream(self, b_io, nParts): + def _set_part_types_from_byte_stream( + self, b_io: ReadSeekableBinStream, nParts: int + ): self.partTypes = _Array[int]("i", unpack(f"<{nParts}i", b_io.read(nParts * 4))) @staticmethod - def _write_part_types_to_byte_stream(b_io, s): + def _write_part_types_to_byte_stream(b_io: WriteSeekableBinStream, s: Shape) -> int: return b_io.write(pack(f"<{len(s.partTypes)}i", *s.partTypes)) @@ -1332,7 +1352,9 @@ class PointM(Point): # PyShp encodes None m values as NODATA m = (None,) - def _set_single_point_m_from_byte_stream(self, b_io, next_shape): + def _set_single_point_m_from_byte_stream( + self, b_io: ReadSeekableBinStream, next_shape: int + ): if next_shape - b_io.tell() >= 8: (m,) = unpack(" int: # Write a single M value # Note: missing m values are autoset to NODATA. @@ -1386,15 +1410,19 @@ def _write_single_point_m_to_byte_stream(b_io, s, i): class PolylineM(Polyline, _HasM): shapeType = POLYLINEM + _shapeTypes = frozenset([POLYLINEM, POLYLINEZ]) class PolygonM(Polygon, _HasM): shapeType = POLYGONM + _shapeTypes = frozenset([POLYGONM, POLYGONZ]) class MultiPointM(MultiPoint, _HasM): shapeType = MULTIPOINTM + _shapeTypes = frozenset([MULTIPOINTM, MULTIPOINTZ]) + class PointZ(PointM): shapeType = POINTZ @@ -1403,21 +1431,20 @@ class PointZ(PointM): # same default as in Writer.__shpRecord (if s.shapeType == 11:) z: Sequence[float] = (0.0,) - def _set_single_point_z_from_byte_stream(self, b_io): + def _set_single_point_z_from_byte_stream(self, b_io: ReadSeekableBinStream): self.z = tuple(unpack(" int: # Note: missing z values are autoset to 0, but not sure if this is ideal. - + z: float = 0.0 # then write value if hasattr(s, "z"): # if z values are stored in attribute try: - if not s.z: - # s.z = (0,) - z = 0 - else: + if s.z: z = s.z[0] except error: raise ShapefileException( @@ -1426,10 +1453,7 @@ def _write_single_point_z_to_byte_stream(b_io, s, i): else: # if z values are stored as 3rd dimension try: - if len(s.points[0]) < 3: - # s.points[0].append(0) - z = 0 - else: + if len(s.points[0]) >= 3 and s.points[0][2] is not None: z = s.points[0][2] except error: raise ShapefileException( @@ -1441,14 +1465,18 @@ def _write_single_point_z_to_byte_stream(b_io, s, i): class PolylineZ(PolylineM, _HasZ): shapeType = POLYLINEZ + _shapeTypes = frozenset([POLYLINEZ]) class PolygonZ(PolygonM, _HasZ): shapeType = POLYGONZ + _shapeTypes = frozenset([POLYGONZ]) + class MultiPointZ(MultiPointM, _HasZ): shapeType = MULTIPOINTZ + _shapeTypes = frozenset([MULTIPOINTZ]) SHAPE_CLASS_FROM_SHAPETYPE: dict[int, type[Union[NullShape, Point, _CanHaveBBox]]] = { From a1059f383c4f480ec4762f6a9d08b70123214b53 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 19:48:56 +0100 Subject: [PATCH 168/220] Relax Read Write BinStream Protocols for readability --- src/shapefile.py | 70 +++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index ac85b94a..66669d6a 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -125,6 +125,14 @@ ZBox = tuple[float, float] +class WriteableBinStream(Protocol): + def write(self, b: bytes): ... # pylint: disable=redefined-outer-name + + +class ReadableBinStream(Protocol): + def read(self, size: int = -1): ... + + class WriteSeekableBinStream(Protocol): def write(self, b: bytes): ... # pylint: disable=redefined-outer-name def seek(self, offset: int, whence: int = 0): ... # pylint: disable=unused-argument @@ -899,7 +907,7 @@ def from_byte_stream( @staticmethod def write_to_byte_stream( - b_io: WriteSeekableBinStream, + b_io: WriteableBinStream, s: Shape, i: int, bbox: Optional[BBox], @@ -932,13 +940,13 @@ class _CanHaveBBox(Shape): bbox: Optional[BBox] = None - def _get_set_bbox_from_byte_stream(self, b_io: ReadSeekableBinStream) -> BBox: + def _get_set_bbox_from_byte_stream(self, b_io: ReadableBinStream) -> BBox: self.bbox: BBox = tuple(_Array[float]("d", unpack("<4d", b_io.read(32)))) return self.bbox @staticmethod def _write_bbox_to_byte_stream( - b_io: WriteSeekableBinStream, i: int, bbox: Optional[BBox] + b_io: WriteableBinStream, i: int, bbox: Optional[BBox] ) -> int: if not bbox or len(bbox) != 4: raise ShapefileException(f"Four numbers required for bbox. Got: {bbox}") @@ -950,22 +958,20 @@ def _write_bbox_to_byte_stream( ) @staticmethod - def _get_npoints_from_byte_stream(b_io: ReadSeekableBinStream) -> int: + def _get_npoints_from_byte_stream(b_io: ReadableBinStream) -> int: return unpack(" int: + def _write_npoints_to_byte_stream(b_io: WriteableBinStream, s: _CanHaveBBox) -> int: return b_io.write(pack(" int: x_ys: list[float] = [] for point in s.points: @@ -978,18 +984,16 @@ def _write_points_to_byte_stream( ) @staticmethod - def _get_nparts_from_byte_stream(b_io: ReadSeekableBinStream) -> int: + def _get_nparts_from_byte_stream(b_io: ReadableBinStream) -> int: return 0 - def _set_parts_from_byte_stream(self, b_io: ReadSeekableBinStream, nParts: int): + def _set_parts_from_byte_stream(self, b_io: ReadableBinStream, nParts: int): pass - def _set_part_types_from_byte_stream( - self, b_io: ReadSeekableBinStream, nParts: int - ): + def _set_part_types_from_byte_stream(self, b_io: ReadableBinStream, nParts: int): pass - def _set_zs_from_byte_stream(self, b_io: ReadSeekableBinStream, nPoints: int): + def _set_zs_from_byte_stream(self, b_io: ReadableBinStream, nPoints: int): pass def _set_ms_from_byte_stream( @@ -1034,7 +1038,7 @@ def from_byte_stream( @staticmethod def write_to_byte_stream( - b_io: WriteSeekableBinStream, + b_io: WriteableBinStream, s: Shape, i: int, bbox: Optional[BBox], @@ -1092,19 +1096,19 @@ class _CanHaveParts(_CanHaveBBox): ) @staticmethod - def _get_nparts_from_byte_stream(b_io: ReadSeekableBinStream) -> int: + def _get_nparts_from_byte_stream(b_io: ReadableBinStream) -> int: return unpack(" int: + def _write_nparts_to_byte_stream(b_io: WriteableBinStream, s) -> int: return b_io.write(pack(" int: return b_io.write(pack(f"<{len(s.parts)}i", *s.parts)) @@ -1116,7 +1120,7 @@ class Point(Shape): shapeType = POINT _shapeTypes = frozenset([POINT, POINTM, POINTZ]) - def _set_single_point_z_from_byte_stream(self, b_io: ReadSeekableBinStream): + def _set_single_point_z_from_byte_stream(self, b_io: ReadableBinStream): pass def _set_single_point_m_from_byte_stream( @@ -1125,7 +1129,7 @@ def _set_single_point_m_from_byte_stream( pass @staticmethod - def _x_y_from_byte_stream(b_io: ReadSeekableBinStream): + def _x_y_from_byte_stream(b_io: ReadableBinStream): # Unpack _Array too x, y = _Array[float]("d", unpack("<2d", b_io.read(16))) # Convert to tuple @@ -1133,7 +1137,7 @@ def _x_y_from_byte_stream(b_io: ReadSeekableBinStream): @staticmethod def _write_x_y_to_byte_stream( - b_io: WriteSeekableBinStream, x: float, y: float, i: int + b_io: WriteableBinStream, x: float, y: float, i: int ) -> int: try: return b_io.write(pack("<2d", x, y)) @@ -1170,7 +1174,7 @@ def from_byte_stream( @staticmethod def write_to_byte_stream( - b_io: WriteSeekableBinStream, + b_io: WriteableBinStream, s: Shape, i: int, bbox: Optional[BBox], @@ -1243,7 +1247,7 @@ def _set_ms_from_byte_stream( @staticmethod def _write_ms_to_byte_stream( - b_io: WriteSeekableBinStream, s: Shape, i: int, mbox: Optional[MBox] + b_io: WriteableBinStream, s: Shape, i: int, mbox: Optional[MBox] ) -> int: if not mbox or len(mbox) != 2: raise ShapefileException(f"Two numbers required for mbox. Got: {mbox}") @@ -1294,13 +1298,13 @@ class _HasZ(_CanHaveBBox): ) z: Sequence[float] - def _set_zs_from_byte_stream(self, b_io: ReadSeekableBinStream, nPoints: int): + def _set_zs_from_byte_stream(self, b_io: ReadableBinStream, nPoints: int): __zmin, __zmax = unpack("<2d", b_io.read(16)) # pylint: disable=unused-private-member self.z = _Array[float]("d", unpack(f"<{nPoints}d", b_io.read(nPoints * 8))) @staticmethod def _write_zs_to_byte_stream( - b_io: WriteSeekableBinStream, s: Shape, i: int, zbox: Optional[ZBox] + b_io: WriteableBinStream, s: Shape, i: int, zbox: Optional[ZBox] ) -> int: if not zbox or len(zbox) != 2: raise ShapefileException(f"Two numbers required for zbox. Got: {zbox}") @@ -1334,13 +1338,11 @@ class MultiPatch(_HasM, _HasZ, _CanHaveParts): shapeType = MULTIPATCH _shapeTypes = frozenset([MULTIPATCH]) - def _set_part_types_from_byte_stream( - self, b_io: ReadSeekableBinStream, nParts: int - ): + def _set_part_types_from_byte_stream(self, b_io: ReadableBinStream, nParts: int): self.partTypes = _Array[int]("i", unpack(f"<{nParts}i", b_io.read(nParts * 4))) @staticmethod - def _write_part_types_to_byte_stream(b_io: WriteSeekableBinStream, s: Shape) -> int: + def _write_part_types_to_byte_stream(b_io: WriteableBinStream, s: Shape) -> int: return b_io.write(pack(f"<{len(s.partTypes)}i", *s.partTypes)) @@ -1367,7 +1369,7 @@ def _set_single_point_m_from_byte_stream( @staticmethod def _write_single_point_m_to_byte_stream( - b_io: WriteSeekableBinStream, s: Shape, i: int + b_io: WriteableBinStream, s: Shape, i: int ) -> int: # Write a single M value # Note: missing m values are autoset to NODATA. @@ -1431,12 +1433,12 @@ class PointZ(PointM): # same default as in Writer.__shpRecord (if s.shapeType == 11:) z: Sequence[float] = (0.0,) - def _set_single_point_z_from_byte_stream(self, b_io: ReadSeekableBinStream): + def _set_single_point_z_from_byte_stream(self, b_io: ReadableBinStream): self.z = tuple(unpack(" int: # Note: missing z values are autoset to 0, but not sure if this is ideal. z: float = 0.0 From e1a11a72a50e7422d114376eba585931f45373f6 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 20:41:52 +0100 Subject: [PATCH 169/220] Type hint Reader.mbox and dbf field header code --- src/shapefile.py | 50 ++++++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 66669d6a..ab482102 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2176,17 +2176,17 @@ def __shpHeader(self): shp.seek(32) self.shapeType = unpack(" NODATA: - self.mbox.append(m) - else: - self.mbox.append(None) + # Measure values less than -10e38 are nodata values according to the spec + + self.mbox: tuple[Optional[float], Optional[float]] + for i, m_bound in enumerate(unpack("<2d", shp.read(16))): + self.mbox[i] = m_bound if m_bound < NODATA else None + + def __shape( self, oid: Optional[int] = None, bbox: Optional[BBox] = None @@ -2240,13 +2240,19 @@ def __shxOffsets(self): raise ShapefileException( "Shapefile Reader requires a shapefile or file-like object. (no shx file found" ) + if self.numShapes is None: + raise ShapefileException( + "numShapes must not be None. " + " Was there a problem with .__shxHeader() ?" + f"Got: {self.numShapes=}" + ) # Jump to the first record. shx.seek(100) # Each index record consists of two nrs, we only want the first one shxRecords = _Array[int]("i", shx.read(2 * self.numShapes * 4)) if sys.byteorder != "big": shxRecords.byteswap() - self._offsets: list[int] = [2 * el for el in shxRecords[::2]] + self._offsets = [2 * el for el in shxRecords[::2]] def __shapeIndex(self, i: Optional[int] = None) -> Optional[int]: """Returns the offset in a .shp file for a shape based on information @@ -2366,18 +2372,20 @@ def __dbfHeader(self): # read fields numFields = (self.__dbfHdrLength - 33) // 32 for __field in range(numFields): - fieldDesc = list(unpack("<11sc4xBB14x", dbf.read(32))) - name = 0 - idx = 0 - if b"\x00" in fieldDesc[name]: - idx = fieldDesc[name].index(b"\x00") + encoded_field_tuple: tuple[bytes,bytes,int,int] = unpack("<11sc4xBB14x", dbf.read(32)) + encoded_name, encoded_field_type_char, size, decimal = encoded_field_tuple + + if b"\x00" in encoded_name: + idx = encoded_name.index(b"\x00") else: - idx = len(fieldDesc[name]) - 1 - fieldDesc[name] = fieldDesc[name][:idx] - fieldDesc[name] = u(fieldDesc[name], self.encoding, self.encodingErrors) - fieldDesc[name] = fieldDesc[name].lstrip() - fieldDesc[1] = u(fieldDesc[1], "ascii") - self.fields.append(fieldDesc) + idx = len(encoded_name) - 1 + encoded_name = encoded_name[:idx] + field_name = u(encoded_name, self.encoding, self.encodingErrors) + field_name = field_name.lstrip() + + field_type_char = u(encoded_field_type_char, "ascii") + + self.fields.append((field_name, field_type_char, size, decimal)) terminator = dbf.read(1) if terminator != b"\r": raise ShapefileException( From 9661cf028c4e7d634f5a4fe19d6f0906374cd968 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 20:55:24 +0100 Subject: [PATCH 170/220] Change output in Readme to reflect field data is now a list of tuples --- README.md | 38 +++++++++++++++++++------------------- src/shapefile.py | 10 +++++----- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index a0abc6a1..ff7da81a 100644 --- a/README.md +++ b/README.md @@ -553,7 +553,7 @@ in the shp geometry file and the dbf attribute file. The field names of a shapefile are available as soon as you read a shapefile. You can call the "fields" attribute of the shapefile as a Python list. Each -field is a Python list with the following information: +field is a Python tuple with the following information: * Field name: the name describing the data at this column index. * Field type: the type of data at this column index. Types can be: @@ -574,24 +574,24 @@ attribute: >>> fields = sf.fields - >>> assert fields == [("DeletionFlag", "C", 1, 0), ["AREA", "N", 18, 5], - ... ["BKG_KEY", "C", 12, 0], ["POP1990", "N", 9, 0], ["POP90_SQMI", "N", 10, 1], - ... ["HOUSEHOLDS", "N", 9, 0], - ... ["MALES", "N", 9, 0], ["FEMALES", "N", 9, 0], ["WHITE", "N", 9, 0], - ... ["BLACK", "N", 8, 0], ["AMERI_ES", "N", 7, 0], ["ASIAN_PI", "N", 8, 0], - ... ["OTHER", "N", 8, 0], ["HISPANIC", "N", 8, 0], ["AGE_UNDER5", "N", 8, 0], - ... ["AGE_5_17", "N", 8, 0], ["AGE_18_29", "N", 8, 0], ["AGE_30_49", "N", 8, 0], - ... ["AGE_50_64", "N", 8, 0], ["AGE_65_UP", "N", 8, 0], - ... ["NEVERMARRY", "N", 8, 0], ["MARRIED", "N", 9, 0], ["SEPARATED", "N", 7, 0], - ... ["WIDOWED", "N", 8, 0], ["DIVORCED", "N", 8, 0], ["HSEHLD_1_M", "N", 8, 0], - ... ["HSEHLD_1_F", "N", 8, 0], ["MARHH_CHD", "N", 8, 0], - ... ["MARHH_NO_C", "N", 8, 0], ["MHH_CHILD", "N", 7, 0], - ... ["FHH_CHILD", "N", 7, 0], ["HSE_UNITS", "N", 9, 0], ["VACANT", "N", 7, 0], - ... ["OWNER_OCC", "N", 8, 0], ["RENTER_OCC", "N", 8, 0], - ... ["MEDIAN_VAL", "N", 7, 0], ["MEDIANRENT", "N", 4, 0], - ... ["UNITS_1DET", "N", 8, 0], ["UNITS_1ATT", "N", 7, 0], ["UNITS2", "N", 7, 0], - ... ["UNITS3_9", "N", 8, 0], ["UNITS10_49", "N", 8, 0], - ... ["UNITS50_UP", "N", 8, 0], ["MOBILEHOME", "N", 7, 0]] + >>> assert fields == [("DeletionFlag", "C", 1, 0), ("AREA", "N", 18, 5), + ... ("BKG_KEY", "C", 12, 0), ("POP1990", "N", 9, 0), ("POP90_SQMI", "N", 10, 1), + ... ("HOUSEHOLDS", "N", 9, 0), + ... ("MALES", "N", 9, 0), ("FEMALES", "N", 9, 0), ("WHITE", "N", 9, 0), + ... ("BLACK", "N", 8, 0), ("AMERI_ES", "N", 7, 0), ("ASIAN_PI", "N", 8, 0), + ... ("OTHER", "N", 8, 0), ("HISPANIC", "N", 8, 0), ("AGE_UNDER5", "N", 8, 0), + ... ("AGE_5_17", "N", 8, 0), ("AGE_18_29", "N", 8, 0), ("AGE_30_49", "N", 8, 0), + ... ("AGE_50_64", "N", 8, 0), ("AGE_65_UP", "N", 8, 0), + ... ("NEVERMARRY", "N", 8, 0), ("MARRIED", "N", 9, 0), ("SEPARATED", "N", 7, 0), + ... ("WIDOWED", "N", 8, 0), ("DIVORCED", "N", 8, 0), ("HSEHLD_1_M", "N", 8, 0), + ... ("HSEHLD_1_F", "N", 8, 0), ("MARHH_CHD", "N", 8, 0), + ... ("MARHH_NO_C", "N", 8, 0), ("MHH_CHILD", "N", 7, 0), + ... ("FHH_CHILD", "N", 7, 0), ("HSE_UNITS", "N", 9, 0), ("VACANT", "N", 7, 0), + ... ("OWNER_OCC", "N", 8, 0), ("RENTER_OCC", "N", 8, 0), + ... ("MEDIAN_VAL", "N", 7, 0), ("MEDIANRENT", "N", 4, 0), + ... ("UNITS_1DET", "N", 8, 0), ("UNITS_1ATT", "N", 7, 0), ("UNITS2", "N", 7, 0), + ... ("UNITS3_9", "N", 8, 0), ("UNITS10_49", "N", 8, 0), + ... ("UNITS50_UP", "N", 8, 0), ("MOBILEHOME", "N", 7, 0)] The first field of a dbf file is always a 1-byte field called "DeletionFlag", which indicates records that have been deleted but not removed. However, diff --git a/src/shapefile.py b/src/shapefile.py index ab482102..2ee5cdf3 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2182,10 +2182,10 @@ def __shpHeader(self): # Measure # Measure values less than -10e38 are nodata values according to the spec - self.mbox: tuple[Optional[float], Optional[float]] - for i, m_bound in enumerate(unpack("<2d", shp.read(16))): - self.mbox[i] = m_bound if m_bound < NODATA else None - + self.mbox: tuple[Optional[float], Optional[float]] = tuple( + m_bound if m_bound >= NODATA else None + for m_bound in unpack("<2d", shp.read(16)) + ) def __shape( @@ -2374,7 +2374,7 @@ def __dbfHeader(self): for __field in range(numFields): encoded_field_tuple: tuple[bytes,bytes,int,int] = unpack("<11sc4xBB14x", dbf.read(32)) encoded_name, encoded_field_type_char, size, decimal = encoded_field_tuple - + if b"\x00" in encoded_name: idx = encoded_name.index(b"\x00") else: From db0e99c5e7f634d781d8f9a64ea0c66419d49315 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 20:57:49 +0100 Subject: [PATCH 171/220] Change rbox and mbox in Readme doctests to tuples --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ff7da81a..67ecb4cd 100644 --- a/README.md +++ b/README.md @@ -1375,7 +1375,7 @@ Shapefiles containing M-values can be examined in several ways: >>> r = shapefile.Reader('shapefiles/test/linem') >>> r.mbox # the lower and upper bound of M-values in the shapefile - [0.0, 3.0] + (0.0, 3.0) >>> r.shape(0).m # flat list of M-values [0.0, None, 3.0, None, 0.0, None, None] @@ -1408,7 +1408,7 @@ To examine a Z-type shapefile you can do: >>> r = shapefile.Reader('shapefiles/test/linez') >>> r.zbox # the lower and upper bound of Z-values in the shapefile - [0.0, 22.0] + (0.0, 22.0) >>> r.shape(0).z # flat list of Z-values [18.0, 20.0, 22.0, 0.0, 0.0, 0.0, 0.0, 15.0, 13.0, 14.0] From 3ae51cc9a4b6492cde12a62d6184b0872666190a Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 30 Jul 2025 21:40:55 +0100 Subject: [PATCH 172/220] Make FieldData a NamedTuple --- README.md | 4 +- src/shapefile.py | 106 ++++++++++++++++++++++++++--------------------- 2 files changed, 61 insertions(+), 49 deletions(-) diff --git a/README.md b/README.md index 67ecb4cd..a0e8f448 100644 --- a/README.md +++ b/README.md @@ -919,8 +919,8 @@ You can also add attributes using keyword arguments where the keys are field nam >>> w = shapefile.Writer('shapefiles/test/dtype') - >>> w.field('FIRST_FLD','C','40') - >>> w.field('SECOND_FLD','C','40') + >>> w.field('FIRST_FLD','C', 40) + >>> w.field('SECOND_FLD','C', 40) >>> w.null() >>> w.null() >>> w.record('First', 'Line') diff --git a/src/shapefile.py b/src/shapefile.py index 2ee5cdf3..08f0f058 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -29,6 +29,7 @@ Iterable, Iterator, Literal, + NamedTuple, Optional, Protocol, Reversible, @@ -156,7 +157,14 @@ def read(self, size: int = -1): ... BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO, WriteSeekableBinStream] -FieldTuple = tuple[str, str, int, int] + +class FieldData(NamedTuple): + name: str + fieldType: str + size: int + decimal: int + + RecordValue = Union[ bool, int, float, str, date ] # A Possible value in a Shapefile record, e.g. L, N, F, C, D types @@ -1776,7 +1784,7 @@ def __init__( self.shpLength: Optional[int] = None self.numRecords: Optional[int] = None self.numShapes: Optional[int] = None - self.fields: list[FieldTuple] = [] + self.fields: list[FieldData] = [] self.__dbfHdrLength = 0 self.__fieldLookup: dict[str, int] = {} self.encoding = encoding @@ -2181,12 +2189,11 @@ def __shpHeader(self): self.zbox: ZBox = tuple(unpack("<2d", shp.read(16))) # Measure # Measure values less than -10e38 are nodata values according to the spec - - self.mbox: tuple[Optional[float], Optional[float]] = tuple( - m_bound if m_bound >= NODATA else None + m_bounds = [ + float(m_bound) if m_bound >= NODATA else None for m_bound in unpack("<2d", shp.read(16)) - ) - + ] + self.mbox = tuple(m_bounds[:2]) def __shape( self, oid: Optional[int] = None, bbox: Optional[BBox] = None @@ -2372,20 +2379,22 @@ def __dbfHeader(self): # read fields numFields = (self.__dbfHdrLength - 33) // 32 for __field in range(numFields): - encoded_field_tuple: tuple[bytes,bytes,int,int] = unpack("<11sc4xBB14x", dbf.read(32)) - encoded_name, encoded_field_type_char, size, decimal = encoded_field_tuple + encoded_field_tuple: tuple[bytes, bytes, int, int] = unpack( + "<11sc4xBB14x", dbf.read(32) + ) + encoded_name, encoded_type_char, size, decimal = encoded_field_tuple if b"\x00" in encoded_name: idx = encoded_name.index(b"\x00") else: idx = len(encoded_name) - 1 encoded_name = encoded_name[:idx] - field_name = u(encoded_name, self.encoding, self.encodingErrors) - field_name = field_name.lstrip() + name = u(encoded_name, self.encoding, self.encodingErrors) + name = name.lstrip() - field_type_char = u(encoded_field_type_char, "ascii") + type_char = u(encoded_type_char, "ascii") - self.fields.append((field_name, field_type_char, size, decimal)) + self.fields.append(FieldData(name, type_char, size, decimal)) terminator = dbf.read(1) if terminator != b"\r": raise ShapefileException( @@ -2393,7 +2402,7 @@ def __dbfHeader(self): ) # insert deletion field at start - self.fields.insert(0, ("DeletionFlag", "C", 1, 0)) + self.fields.insert(0, FieldData("DeletionFlag", "C", 1, 0)) # store all field positions for easy lookups # note: fieldLookup gives the index position of a field inside Reader.fields @@ -2434,7 +2443,7 @@ def __recordFmt(self, fields: Optional[Container[str]] = None) -> tuple[str, int def __recordFields( self, fields: Optional[Iterable[str]] = None - ) -> tuple[list[FieldTuple], dict[str, int], Struct]: + ) -> tuple[list[FieldData], dict[str, int], Struct]: """Returns the necessary info required to unpack a record's fields, restricted to a subset of fieldnames 'fields' if specified. Returns a list of field info tuples, a name-index lookup dict, @@ -2469,7 +2478,7 @@ def __recordFields( def __record( self, - fieldTuples: list[FieldTuple], + fieldTuples: list[FieldData], recLookup: dict[str, int], recStruct: Struct, oid: Optional[int] = None, @@ -2734,7 +2743,7 @@ def __init__( ): self.target = target self.autoBalance = autoBalance - self.fields: list[FieldTuple] = [] + self.fields: list[FieldData] = [] self.shapeType = shapeType self.shp: Optional[WriteSeekableBinStream] = None self.shx: Optional[WriteSeekableBinStream] = None @@ -2829,6 +2838,8 @@ def close(self): # Flush files for attribute in (self.shp, self.shx, self.dbf): + if attribute is None: + continue if hasattr(attribute, "flush") and not getattr(attribute, "closed", False): try: attribute.flush() @@ -2868,20 +2879,30 @@ def __getFileObj(self, f): def __shpFileLength(self): """Calculates the file length of the shp file.""" + shp = self.__getFileObj(self.shp) + # Remember starting position - start = self.shp.tell() + + start = shp.tell() # Calculate size of all shapes - self.shp.seek(0, 2) - size = self.shp.tell() + shp.seek(0, 2) + size = shp.tell() # Calculate size as 16-bit words size //= 2 # Return to start - self.shp.seek(start) + shp.seek(start) return size - def __bbox(self, s): - x = [] - y = [] + def __bbox(self, s: Shape): + x: list[float] = [] + y: list[float] = [] + + if self._bbox: + x.append(self._bbox[0]) + y.append(self._bbox[1]) + x.append(self._bbox[2]) + y.append(self._bbox[3]) + if len(s.points) > 0: px, py = list(zip(*s.points))[:2] x.extend(px) @@ -2894,20 +2915,8 @@ def __bbox(self, s): "Cannot create bbox. Expected a valid shape with at least one point. " f"Got a shape of type '{s.shapeType}' and 0 points." ) - bbox = [min(x), min(y), max(x), max(y)] - # update global - if self._bbox: - # compare with existing - self._bbox = [ - min(bbox[0], self._bbox[0]), - min(bbox[1], self._bbox[1]), - max(bbox[2], self._bbox[2]), - max(bbox[3], self._bbox[3]), - ] - else: - # first time bbox is being set - self._bbox = bbox - return bbox + self._bbox = (min(x), min(y), max(x), max(y)) + return self._bbox def __zbox(self, s) -> ZBox: z: list[float] = [] @@ -3057,7 +3066,7 @@ def __dbfHeader(self): raise ShapefileException( "Shapefile dbf header length exceeds maximum length." ) - recordLength = sum(int(field[2]) for field in fields) + 1 + recordLength = sum(field.size for field in fields) + 1 header = pack( " Date: Wed, 30 Jul 2025 23:08:23 +0100 Subject: [PATCH 173/220] Tackle dbf! Delete u() --- src/shapefile.py | 192 +++++++++++++++++++++++------------------------ 1 file changed, 92 insertions(+), 100 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 08f0f058..ed91d64f 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -43,7 +43,7 @@ from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen -from typing_extensions import Never, NotRequired, Self, TypeIs +from typing_extensions import Never, NotRequired, Self, TypeIs, Unpack # Create named logger logger = logging.getLogger(__name__) @@ -267,28 +267,12 @@ class GeoJSONFeatureCollectionWithBBox(GeoJSONFeatureCollection): # Helpers -MISSING = [None, ""] +MISSING = {None, ""} NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. unpack_2_int32_be = Struct(">2i").unpack -def b( - v: Union[str, bytes], encoding: str = "utf-8", encodingErrors: str = "strict" -) -> bytes: - if isinstance(v, str): - # For python 3 encode str to bytes. - return v.encode(encoding, encodingErrors) - if isinstance(v, bytes): - # Already bytes. - return v - if v is None: - # Since we're dealing with text, interpret None as "" - return b"" - # Force string representation. - return str(v).encode(encoding, encodingErrors) - - def u( v: Union[str, bytes], encoding: str = "utf-8", encodingErrors: str = "strict" ) -> str: @@ -2169,7 +2153,7 @@ def __restrictIndex(self, i: int) -> int: i = range(self.numRecords)[i] return i - def __shpHeader(self): + def __shpHeader(self) -> None: """Reads the header information from a .shp file.""" if not self.shp: raise ShapefileException( @@ -2362,7 +2346,7 @@ def iterShapes(self, bbox: Optional[BBox] = None) -> Iterator[Optional[Shape]]: self.numShapes = i self._offsets = offsets - def __dbfHeader(self): + def __dbfHeader(self) -> None: """Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger""" if not self.dbf: @@ -2389,10 +2373,10 @@ def __dbfHeader(self): else: idx = len(encoded_name) - 1 encoded_name = encoded_name[:idx] - name = u(encoded_name, self.encoding, self.encodingErrors) + name = encoded_name.decode(self.encoding, self.encodingErrors) name = name.lstrip() - type_char = u(encoded_type_char, "ascii") + type_char = encoded_type_char.decode("ascii") self.fields.append(FieldData(name, type_char, size, decimal)) terminator = dbf.read(1) @@ -2422,14 +2406,14 @@ def __recordFmt(self, fields: Optional[Container[str]] = None) -> tuple[str, int """ if self.numRecords is None: self.__dbfHeader() - structcodes = [f"{fieldinfo[2]}s" for fieldinfo in self.fields] + structcodes = [f"{fieldinfo.size}s" for fieldinfo in self.fields] if fields is not None: # only unpack specified fields, ignore others using padbytes (x) structcodes = [ code - if fieldinfo[0] in fields - or fieldinfo[0] == "DeletionFlag" # always unpack delflag - else f"{fieldinfo[2]}x" + if fieldinfo.name in fields + or fieldinfo.name == "DeletionFlag" # always unpack delflag + else f"{fieldinfo.size}x" for fieldinfo, code in zip(self.fields, structcodes) ] fmt = "".join(structcodes) @@ -2484,11 +2468,14 @@ def __record( oid: Optional[int] = None, ) -> Optional[_Record]: """Reads and returns a dbf record row as a list of values. Requires specifying - a list of field info tuples 'fieldTuples', a record name-index dict 'recLookup', + a list of field info FieldData namedtuples 'fieldTuples', a record name-index dict 'recLookup', and a Struct instance 'recStruct' for unpacking these fields. """ f = self.__getFileObj(self.dbf) + # The only format chars in from self.__recordFmt, in recStruct from __recordFields, + # are s and x (ascii encoded str and pad byte) so everything in recordContents is bytes + # https://docs.python.org/3/library/struct.html#format-characters recordContents = recStruct.unpack(f.read(recStruct.size)) # deletion flag field is always unpacked as first value (see __recordFmt) @@ -2552,7 +2539,7 @@ def __record( value = date(y, m, d) except (TypeError, ValueError): # if invalid date, just return as unicode string so user can decide - value = u(value.strip()) + value = str(value.strip()) elif typ == "L": # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. if value == b" ": @@ -2877,7 +2864,7 @@ def __getFileObj(self, f): return f raise ShapefileException(f"Unsupported file-like object: {f}") - def __shpFileLength(self): + def __shpFileLength(self) -> int: """Calculates the file length of the shp file.""" shp = self.__getFileObj(self.shp) @@ -2893,7 +2880,7 @@ def __shpFileLength(self): shp.seek(start) return size - def __bbox(self, s: Shape): + def __bbox(self, s: Shape) -> BBox: x: list[float] = [] y: list[float] = [] @@ -2964,25 +2951,25 @@ def __mbox(self, s) -> MBox: def shapeTypeName(self) -> str: return SHAPETYPE_LOOKUP[self.shapeType or 0] - def bbox(self): + def bbox(self) -> Optional[BBox]: """Returns the current bounding box for the shapefile which is the lower-left and upper-right corners. It does not contain the elevation or measure extremes.""" return self._bbox - def zbox(self): + def zbox(self) -> Optional[ZBox]: """Returns the current z extremes for the shapefile.""" return self._zbox - def mbox(self): + def mbox(self) -> Optional[MBox]: """Returns the current m extremes for the shapefile.""" return self._mbox def __shapefileHeader( self, fileObj: Optional[WriteSeekableBinStream], - headerType: str = "shp", - ): + headerType: Literal["shp", "dbf", "shx"] = "shp", + ) -> None: """Writes the specified header type to the specified file-like object. Several of the shapefile formats are so similar that a single generic method to read or write them is warranted.""" @@ -3009,7 +2996,7 @@ def __shapefileHeader( # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. - bbox = [0, 0, 0, 0] + bbox = (0, 0, 0, 0) f.write(pack("<4d", *bbox)) except error: raise ShapefileException( @@ -3018,25 +3005,25 @@ def __shapefileHeader( else: f.write(pack("<4d", 0, 0, 0, 0)) # Elevation - if self.shapeType in (11, 13, 15, 18): + if self.shapeType in {POINTZ} | _HasZ._shapeTypes: # Z values are present in Z type zbox = self.zbox() if zbox is None: # means we have empty shapefile/only null geoms (see commentary on bbox above) - zbox = [0, 0] + zbox = (0, 0) else: # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s - zbox = [0, 0] + zbox = (0, 0) # Measure - if self.shapeType in (11, 13, 15, 18, 21, 23, 25, 28, 31): + if self.shapeType in {POINTM, POINTZ} | _HasM._shapeTypes: # M values are present in M or Z type mbox = self.mbox() if mbox is None: # means we have empty shapefile/only null geoms (see commentary on bbox above) - mbox = [0, 0] + mbox = (0, 0) else: # As per the ESRI shapefile spec, the mbox for non-M type shapefiles are set to 0s - mbox = [0, 0] + mbox = (0, 0) # Try writing try: f.write(pack("<4d", zbox[0], zbox[1], mbox[0], mbox[1])) @@ -3045,7 +3032,7 @@ def __shapefileHeader( "Failed to write shapefile elevation and measure values. Floats required." ) - def __dbfHeader(self): + def __dbfHeader(self) -> None: """Writes the dbf header and field descriptors.""" f = self.__getFileObj(self.dbf) f.seek(0) @@ -3081,10 +3068,10 @@ def __dbfHeader(self): # Field descriptors for field in fields: name, fieldType, size, decimal = field - encoded_name = b(name, self.encoding, self.encodingErrors) + encoded_name = name.encode(self.encoding, self.encodingErrors) encoded_name = encoded_name.replace(b" ", b"_") encoded_name = encoded_name[:10].ljust(11).replace(b" ", b"\x00") - encodedFieldType = b(fieldType, "ascii") + encodedFieldType = fieldType.encode("ascii") fld = pack("<11sc4xBB14x", encoded_name, encodedFieldType, size, decimal) f.write(fld) # Terminator @@ -3093,7 +3080,7 @@ def __dbfHeader(self): def shape( self, s: Union[Shape, HasGeoInterface, dict], - ): + ) -> None: # Balance if already not balanced if self.autoBalance and self.recNum < self.shpNum: self.balance() @@ -3178,7 +3165,7 @@ def __shpRecord(self, s: Shape) -> tuple[int, int]: f.write(b_io.read()) return offset, length - def __shxRecord(self, offset, length): + def __shxRecord(self, offset: int, length: int) -> None: """Writes the shx records.""" f = self.__getFileObj(self.shx) @@ -3191,8 +3178,10 @@ def __shxRecord(self, offset, length): f.write(pack(">i", length)) def record( - self, *recordList: Iterable[RecordValue], **recordDict: dict[str, RecordValue] - ): + self, + *recordList: list[RecordValue], + **recordDict: RecordValue, + ) -> None: """Creates a dbf attribute record. You can submit either a sequence of field values or keyword arguments of field names and values. Before adding records you must add fields for the record values using the @@ -3203,10 +3192,10 @@ def record( # Balance if already not balanced if self.autoBalance and self.recNum > self.shpNum: self.balance() - + record: list[RecordValue] fieldCount = sum(1 for field in self.fields if field[0] != "DeletionFlag") if recordList: - record = list(recordList) + record = list(*recordList) while len(record) < fieldCount: record.append("") elif recordDict: @@ -3227,7 +3216,7 @@ def record( record = ["" for _ in range(fieldCount)] self.__dbfRecord(record) - def __dbfRecord(self, record): + def __dbfRecord(self, record: list[RecordValue]) -> None: """Writes the dbf records.""" f = self.__getFileObj(self.dbf) if self.recNum == 0: @@ -3246,68 +3235,69 @@ def __dbfRecord(self, record): # write fieldType = fieldType.upper() size = int(size) - if fieldType in ("N", "F"): + str_val: str + if fieldType in {"N", "F"}: # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. if value in MISSING: - value = b"*" * size # QGIS NULL + str_val = "*" * size # QGIS NULL elif not deci: # force to int try: # first try to force directly to int. # forcing a large int to float and back to int # will lose information and result in wrong nr. - value = int(value) + int_val = int(value) except ValueError: # forcing directly to int failed, so was probably a float. - value = int(float(value)) - value = format(value, "d")[:size].rjust( + int_val = int(float(value)) + str_val = format(int_val, "d")[:size].rjust( size ) # caps the size if exceeds the field size else: - value = float(value) - value = format(value, f".{deci}f")[:size].rjust( + f_val = float(value) + str_val = format(f_val, f".{deci}f")[:size].rjust( size ) # caps the size if exceeds the field size elif fieldType == "D": # date: 8 bytes - date stored as a string in the format YYYYMMDD. - if isinstance(value, date): - value = f"{value.year:04d}{value.month:02d}{value.day:02d}" - elif isinstance(value, list) and len(value) == 3: - value = f"{value[0]:04d}{value[1]:02d}{value[2]:02d}" - elif value in MISSING: - value = b"0" * 8 # QGIS NULL for date type + if value in MISSING: + str_val = "0" * 8 # QGIS NULL for date type + elif isinstance(value, date): + str_val = f"{value.year:04d}{value.month:02d}{value.day:02d}" + elif isinstance(value, (list, tuple)) and len(value) == 3: + str_val = f"{value[0]:04d}{value[1]:02d}{value[2]:02d}" elif is_string(value) and len(value) == 8: - pass # value is already a date string + str_val = value # value is already a date string else: raise ShapefileException( - "Date values must be either a datetime.date object, a list, a YYYYMMDD string, or a missing value." + "Date values must be either a datetime.date object, a list/tuple, a YYYYMMDD string, or a missing value." ) elif fieldType == "L": # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. if value in MISSING: - value = b" " # missing is set to space - elif value in [True, 1]: - value = b"T" - elif value in [False, 0]: - value = b"F" + str_val = " " # missing is set to space + elif value in {True, 1}: + str_val = "T" + elif value in {False, 0}: + str_val = "F" else: - value = b" " # unknown is set to space + str_val = " " # unknown is set to space else: # anything else is forced to string, truncated to the length of the field - value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) - if not isinstance(value, bytes): - # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) - value = b( - value, "ascii", self.encodingErrors - ) # should be default ascii encoding - if len(value) != size: + str_val = u(value, self.encoding, self.encodingErrors)[:size].ljust( + size + ) + + # should be default ascii encoding + encoded_val = str_val.encode("ascii", self.encodingErrors) + if len(encoded_val) != size: raise ShapefileException( "Shapefile Writer unable to pack incorrect sized value" - f" (size {len(value)}) into field '{fieldName}' (size {size})." + f" (size {len(encoded_val)}) into field '{fieldName}' (size {size})." ) - f.write(value) + f.write(encoded_val) - def balance(self): + def balance(self) -> None: """Adds corresponding empty attributes or null geometry records depending on which type of record was created to make sure all three files are in synch.""" @@ -3316,24 +3306,26 @@ def balance(self): while self.recNum < self.shpNum: self.record() - def null(self): + def null(self) -> None: """Creates a null shape.""" self.shape(NullShape()) - def point(self, x: float, y: float): + def point(self, x: float, y: float) -> None: """Creates a POINT shape.""" pointShape = Point() pointShape.points.append((x, y)) self.shape(pointShape) - def pointm(self, x: float, y: float, m: Optional[float] = None): + def pointm(self, x: float, y: float, m: Optional[float] = None) -> None: """Creates a POINTM shape. If the m (measure) value is not set, it defaults to NoData.""" pointShape = PointM() pointShape.points.append((x, y, m)) self.shape(pointShape) - def pointz(self, x: float, y: float, z: float = 0.0, m: Optional[float] = None): + def pointz( + self, x: float, y: float, z: float = 0.0, m: Optional[float] = None + ) -> None: """Creates a POINTZ shape. If the z (elevation) value is not set, it defaults to 0. If the m (measure) value is not set, it defaults to NoData.""" @@ -3341,20 +3333,20 @@ def pointz(self, x: float, y: float, z: float = 0.0, m: Optional[float] = None): pointShape.points.append((x, y, z, m)) self.shape(pointShape) - def multipoint(self, points: PointsT): + def multipoint(self, points: PointsT) -> None: """Creates a MULTIPOINT shape. Points is a list of xy values.""" # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=[points], polyShape=MultiPoint()) - def multipointm(self, points: PointsT): + def multipointm(self, points: PointsT) -> None: """Creates a MULTIPOINTM shape. Points is a list of xym values. If the m (measure) value is not included, it defaults to None (NoData).""" # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=[points], polyShape=MultiPointM()) - def multipointz(self, points: PointsT): + def multipointz(self, points: PointsT) -> None: """Creates a MULTIPOINTZ shape. Points is a list of xyzm values. If the z (elevation) value is not included, it defaults to 0. @@ -3362,32 +3354,32 @@ def multipointz(self, points: PointsT): # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=[points], polyShape=MultiPointZ()) - def line(self, lines: list[PointsT]): + def line(self, lines: list[PointsT]) -> None: """Creates a POLYLINE shape. Lines is a collection of lines, each made up of a list of xy values.""" self._shapeparts(parts=lines, polyShape=Polyline()) - def linem(self, lines: list[PointsT]): + def linem(self, lines: list[PointsT]) -> None: """Creates a POLYLINEM shape. Lines is a collection of lines, each made up of a list of xym values. If the m (measure) value is not included, it defaults to None (NoData).""" self._shapeparts(parts=lines, polyShape=PolylineM()) - def linez(self, lines: list[PointsT]): + def linez(self, lines: list[PointsT]) -> None: """Creates a POLYLINEZ shape. Lines is a collection of lines, each made up of a list of xyzm values. If the z (elevation) value is not included, it defaults to 0. If the m (measure) value is not included, it defaults to None (NoData).""" self._shapeparts(parts=lines, polyShape=PolylineZ()) - def poly(self, polys: list[PointsT]): + def poly(self, polys: list[PointsT]) -> None: """Creates a POLYGON shape. Polys is a collection of polygons, each made up of a list of xy values. Note that for ordinary polygons the coordinates must run in a clockwise direction. If some of the polygons are holes, these must run in a counterclockwise direction.""" self._shapeparts(parts=polys, polyShape=Polygon()) - def polym(self, polys: list[PointsT]): + def polym(self, polys: list[PointsT]) -> None: """Creates a POLYGONM shape. Polys is a collection of polygons, each made up of a list of xym values. Note that for ordinary polygons the coordinates must run in a clockwise direction. @@ -3395,7 +3387,7 @@ def polym(self, polys: list[PointsT]): If the m (measure) value is not included, it defaults to None (NoData).""" self._shapeparts(parts=polys, polyShape=PolygonM()) - def polyz(self, polys: list[PointsT]): + def polyz(self, polys: list[PointsT]) -> None: """Creates a POLYGONZ shape. Polys is a collection of polygons, each made up of a list of xyzm values. Note that for ordinary polygons the coordinates must run in a clockwise direction. @@ -3404,7 +3396,7 @@ def polyz(self, polys: list[PointsT]): If the m (measure) value is not included, it defaults to None (NoData).""" self._shapeparts(parts=polys, polyShape=PolygonZ()) - def multipatch(self, parts: list[PointsT], partTypes: list[int]): + def multipatch(self, parts: list[PointsT], partTypes: list[int]) -> None: """Creates a MULTIPATCH shape. Parts is a collection of 3D surface patches, each made up of a list of xyzm values. PartTypes is a list of types that define each of the surface patches. @@ -3431,7 +3423,7 @@ def multipatch(self, parts: list[PointsT], partTypes: list[int]): def _shapeparts( self, parts: list[PointsT], polyShape: Union[Polyline, Polygon, MultiPoint] - ): + ) -> None: """Internal method for adding a shape that has multiple collections of points (parts): lines, polygons, and multipoint shapes. """ @@ -3442,7 +3434,7 @@ def _shapeparts( # if shapeType in (5, 15, 25, 31): # This method is never actually called on a MultiPatch # so we omit its shapeType (31) for efficiency - if isinstance(polyShape, Polygon): + if compatible_with(polyShape, Polygon): for part in parts: if part[0] != part[-1]: part.append(part[0]) @@ -3466,7 +3458,7 @@ def field( fieldType: str = "C", size: int = 50, decimal: int = 0, - ): + ) -> None: """Adds a dbf field descriptor to the shapefile.""" if fieldType == "D": size = 8 From 532c02585954d951820abab3bec09c3ec0880a53 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 31 Jul 2025 00:21:46 +0100 Subject: [PATCH 174/220] Need to type dbf coercer methods --- src/shapefile.py | 167 +++++++++++++++++++++++++++++------------------ 1 file changed, 102 insertions(+), 65 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index ed91d64f..d2609536 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -157,10 +157,20 @@ def read(self, size: int = -1): ... BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO, WriteSeekableBinStream] +FieldTypeCode = Literal["N", "F", "L", "D", "C", "M"] +FIELD_TYPE_CODES: dict[str, FieldTypeCode] = { + "N": "N", + "F": "F", + "L": "L", + "D": "D", + "C": "C", + "M": "M", +} + class FieldData(NamedTuple): name: str - fieldType: str + fieldType: FieldTypeCode size: int decimal: int @@ -289,10 +299,6 @@ def u( return bytes(v).decode(encoding, encodingErrors) -def is_string(v: Any) -> bool: - return isinstance(v, str) - - @overload def fsdecode_if_pathlike(path: os.PathLike) -> str: ... @overload @@ -1776,7 +1782,7 @@ def __init__( # See if a shapefile name was passed as the first argument if shapefile_path: path = fsdecode_if_pathlike(shapefile_path) - if is_string(path): + if isinstance(path, str): if ".zip" in path: # Shapefile is inside a zipfile if path.count(".zip") > 1: @@ -2376,7 +2382,9 @@ def __dbfHeader(self) -> None: name = encoded_name.decode(self.encoding, self.encodingErrors) name = name.lstrip() - type_char = encoded_type_char.decode("ascii") + type_char: FieldTypeCode = FIELD_TYPE_CODES.get( + encoded_type_char.decode("ascii").upper(), "C" + ) self.fields.append(FieldData(name, type_char, size, decimal)) terminator = dbf.read(1) @@ -2473,8 +2481,8 @@ def __record( """ f = self.__getFileObj(self.dbf) - # The only format chars in from self.__recordFmt, in recStruct from __recordFields, - # are s and x (ascii encoded str and pad byte) so everything in recordContents is bytes + # The only format chars in from self.__recordFmt, in recStruct from __recordFields, + # are s and x (ascii encoded str and pad byte) so everything in recordContents is bytes # https://docs.python.org/3/library/struct.html#format-characters recordContents = recStruct.unpack(f.read(recStruct.size)) @@ -2552,8 +2560,7 @@ def __record( else: value = None # unknown value is set to missing else: - # anything else is forced to string/unicode - value = u(value, self.encoding, self.encodingErrors) + value = value.decode(self.encoding, self.encodingErrors) value = value.strip().rstrip( "\x00" ) # remove null-padding at end of strings @@ -2738,7 +2745,7 @@ def __init__( self._files_to_close: list[BinaryFileStreamT] = [] if target: target = fsdecode_if_pathlike(target) - if not is_string(target): + if not isinstance(target, str): raise TypeError( f"The target filepath {target!r} must be of type str/unicode or path-like, not {type(target)}." ) @@ -3216,6 +3223,65 @@ def record( record = ["" for _ in range(fieldCount)] self.__dbfRecord(record) + @staticmethod + def _dbf_missing_placeholder( + value: RecordValue, fieldType: FieldTypeCode, size: int + ) -> str: + if fieldType in {"N", "F"}: + return "*" * size # QGIS NULL + if fieldType == "D": + return "0" * 8 # QGIS NULL for date type + if fieldType == "L": + return " " + return str(value) + + @staticmethod + def _try_coerce_to_numeric_str(value: RecordValue, size: int, deci: int) -> str: + # numeric or float: number stored as a string, + # right justified, and padded with blanks + # to the width of the field. + if not deci: + # force to int + try: + # first try to force directly to int. + # forcing a large int to float and back to int + # will lose information and result in wrong nr. + int_val = int(value) + except ValueError: + # forcing directly to int failed, so was probably a float. + int_val = int(float(value)) + + str_val = format(int_val, "d") + else: + f_val = float(value) + str_val = format(f_val, f".{deci}f") + + # caps the size if exceeds the field size + return str_val[:size].rjust(size) + + @staticmethod + def _try_coerce_to_date_str(value: RecordValue) -> str: + # date: 8 bytes - date stored as a string in the format YYYYMMDD. + if isinstance(value, date): + return f"{value.year:04d}{value.month:02d}{value.day:02d}" + if isinstance(value, (list, tuple)) and len(value) == 3: + return f"{value[0]:04d}{value[1]:02d}{value[2]:02d}" + if isinstance(value, str) and len(value) == 8: + return value # value is already a date string + + raise ShapefileException( + "Date values must be either a datetime.date object, a list/tuple, a YYYYMMDD string, or a missing value." + ) + + @staticmethod + def _try_coerce_to_logical_str(value: RecordValue) -> str: + # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. + if value == 1: # True == 1 + return "T" + if value == 0: # False == 0 + return "F" + return " " # unknown is set to space + def __dbfRecord(self, record: list[RecordValue]) -> None: """Writes the dbf records.""" f = self.__getFileObj(self.dbf) @@ -3233,63 +3299,31 @@ def __dbfRecord(self, record: list[RecordValue]) -> None: ) # ignore deletionflag field in case it was specified for (fieldName, fieldType, size, deci), value in zip(fields, record): # write - fieldType = fieldType.upper() + fieldType = FIELD_TYPE_CODES.get(fieldType.upper(), fieldType) size = int(size) str_val: str + + if value in MISSING: + str_val = self._dbf_missing_placeholder(value, fieldType, size) if fieldType in {"N", "F"}: - # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. - if value in MISSING: - str_val = "*" * size # QGIS NULL - elif not deci: - # force to int - try: - # first try to force directly to int. - # forcing a large int to float and back to int - # will lose information and result in wrong nr. - int_val = int(value) - except ValueError: - # forcing directly to int failed, so was probably a float. - int_val = int(float(value)) - str_val = format(int_val, "d")[:size].rjust( - size - ) # caps the size if exceeds the field size - else: - f_val = float(value) - str_val = format(f_val, f".{deci}f")[:size].rjust( - size - ) # caps the size if exceeds the field size + str_val = self._try_coerce_to_numeric_str(value, size, deci) elif fieldType == "D": - # date: 8 bytes - date stored as a string in the format YYYYMMDD. - if value in MISSING: - str_val = "0" * 8 # QGIS NULL for date type - elif isinstance(value, date): - str_val = f"{value.year:04d}{value.month:02d}{value.day:02d}" - elif isinstance(value, (list, tuple)) and len(value) == 3: - str_val = f"{value[0]:04d}{value[1]:02d}{value[2]:02d}" - elif is_string(value) and len(value) == 8: - str_val = value # value is already a date string - else: - raise ShapefileException( - "Date values must be either a datetime.date object, a list/tuple, a YYYYMMDD string, or a missing value." - ) + str_val = self._try_coerce_to_date_str(value) elif fieldType == "L": - # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. - if value in MISSING: - str_val = " " # missing is set to space - elif value in {True, 1}: - str_val = "T" - elif value in {False, 0}: - str_val = "F" - else: - str_val = " " # unknown is set to space + str_val = self._try_coerce_to_logical_str(value) else: - # anything else is forced to string, truncated to the length of the field - str_val = u(value, self.encoding, self.encodingErrors)[:size].ljust( - size - ) - + # + if isinstance(value, bytes): + decoded_val = value.decode(self.encoding, self.encodingErrors) + else: + # anything else is forced to string. + decoded_val = str(value) + # Truncate to the length of the field + str_val = decoded_val[:size].ljust(size) + # should be default ascii encoding encoded_val = str_val.encode("ascii", self.encodingErrors) + if len(encoded_val) != size: raise ShapefileException( "Shapefile Writer unable to pack incorrect sized value" @@ -3455,7 +3489,7 @@ def field( # Types of args should match *FieldData self, name: str, - fieldType: str = "C", + fieldType: FieldTypeCode = "C", size: int = 50, decimal: int = 0, ) -> None: @@ -3466,15 +3500,18 @@ def field( elif fieldType == "L": size = 1 decimal = 0 + elif fieldType not in {"C", "N", "F", "M"}: + raise ShapefileException( + "fieldType must be C,N,F,M,L or D. " f"Got: {fieldType=}. " + ) if len(self.fields) >= 2046: raise ShapefileException( "Shapefile Writer reached maximum number of fields: 2046." ) # A doctest in README.md used to pass in a string ('40') for size, so # try to be robust for incorrect types. - self.fields.append( - FieldData(str(name), str(fieldType), int(size), int(decimal)) - ) + fieldType = FIELD_TYPE_CODES.get(str(fieldType)[0].upper(), fieldType) + self.fields.append(FieldData(str(name), fieldType, int(size), int(decimal))) # Begin Testing From bd729ff9fee84bded53e56f1bfd0c10ff8aad48b Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 31 Jul 2025 15:22:40 +0100 Subject: [PATCH 175/220] Make field code an enum, and BBox, ZBox & MBox named tuples --- src/shapefile.py | 188 +++++++++++++++++++++++++++++------------------ 1 file changed, 115 insertions(+), 73 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index d2609536..92fe3a74 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -12,6 +12,7 @@ import array import doctest +import enum import io import logging import os @@ -37,6 +38,7 @@ TypedDict, TypeVar, Union, + cast, overload, ) from urllib.error import HTTPError @@ -121,9 +123,22 @@ PointT = Union[Point2D, PointMT, PointZT] PointsT = list[PointT] -BBox = tuple[float, float, float, float] -MBox = tuple[float, float] -ZBox = tuple[float, float] + +class BBox(NamedTuple): + xmin: float + ymin: float + xmax: float + ymax: float + + +class MBox(NamedTuple): + mmin: Optional[float] + mmax: Optional[float] + + +class ZBox(NamedTuple): + zmin: float + zmax: float class WriteableBinStream(Protocol): @@ -157,27 +172,57 @@ def read(self, size: int = -1): ... BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO, WriteSeekableBinStream] -FieldTypeCode = Literal["N", "F", "L", "D", "C", "M"] -FIELD_TYPE_CODES: dict[str, FieldTypeCode] = { - "N": "N", - "F": "F", - "L": "L", - "D": "D", - "C": "C", - "M": "M", -} + +# https://en.wikipedia.org/wiki/.dbf#Database_records +class FieldType(enum.Enum): + # Use an ascii-encoded byte of the name, to save a decoding step. + C = "Character" # (str) + D = "Date" + F = "Float" + L = "Logical" # (bool) + M = "Memo" # Legacy. (10 digit str, starting block in an .dbt file) + N = "Numeric" # (int) class FieldData(NamedTuple): name: str - fieldType: FieldTypeCode + fieldType: FieldType size: int decimal: int + @classmethod + def from_unchecked( + cls, + name: str, + fieldType: Union[str, FieldType] = FieldType.C, + size: int = 50, + decimal: int = 0, + ) -> Self: + if isinstance(fieldType, str): + try: + fieldType = FieldType[fieldType.upper()] + except: + raise ShapefileException( + "fieldType must be C,N,F,M,L,D or a FieldType enum member. " + f"Got: {fieldType=}. " + ) -RecordValue = Union[ - bool, int, float, str, date -] # A Possible value in a Shapefile record, e.g. L, N, F, C, D types + if fieldType is FieldType.D: + size = 8 + decimal = 0 + elif fieldType is FieldType.L: + size = 1 + decimal = 0 + + # A doctest in README.md previously passed in a string ('40') for size, + # so explictly convert name to str, and size and decimal to ints. + return cls(str(name), fieldType, int(size), int(decimal)) + + +RecordValueNotDate = Union[bool, int, float, str, date] + +# A Possible value in a Shapefile dbf record, i.e. L, N, M, F, C, or D types +RecordValue = Union[RecordValueNotDate, date] class HasGeoInterface(Protocol): @@ -356,7 +401,7 @@ def rewind(coords: Reversible[PointT]) -> PointsT: def ring_bbox(coords: PointsT) -> BBox: """Calculates and returns the bounding box of a ring.""" xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values - bbox = min(xs), min(ys), max(xs), max(ys) + bbox = BBox(xmin=min(xs), ymin=min(ys), xmax=max(xs), ymax=max(ys)) return bbox @@ -939,7 +984,7 @@ class _CanHaveBBox(Shape): bbox: Optional[BBox] = None def _get_set_bbox_from_byte_stream(self, b_io: ReadableBinStream) -> BBox: - self.bbox: BBox = tuple(_Array[float]("d", unpack("<4d", b_io.read(32)))) + self.bbox: BBox = BBox(*_Array[float]("d", unpack("<4d", b_io.read(32)))) return self.bbox @staticmethod @@ -1159,7 +1204,7 @@ def from_byte_stream( if bbox is not None: # create bounding box for Point by duplicating coordinates # skip shape if no overlap with bounding box - if not bbox_overlap(bbox, (x, y, x, y)): + if not bbox_overlap(bbox, BBox(x, y, x, y)): return None shape.points = [(x, y)] @@ -2174,16 +2219,18 @@ def __shpHeader(self) -> None: shp.seek(32) self.shapeType = unpack("= NODATA else None for m_bound in unpack("<2d", shp.read(16)) ] - self.mbox = tuple(m_bounds[:2]) + self.mbox = MBox(mmin=m_bounds[0], mmax=m_bounds[1]) def __shape( self, oid: Optional[int] = None, bbox: Optional[BBox] = None @@ -2382,11 +2429,9 @@ def __dbfHeader(self) -> None: name = encoded_name.decode(self.encoding, self.encodingErrors) name = name.lstrip() - type_char: FieldTypeCode = FIELD_TYPE_CODES.get( - encoded_type_char.decode("ascii").upper(), "C" - ) + field_type = FieldType[encoded_type_char.decode("ascii").upper()] - self.fields.append(FieldData(name, type_char, size, decimal)) + self.fields.append(FieldData(name, field_type, size, decimal)) terminator = dbf.read(1) if terminator != b"\r": raise ShapefileException( @@ -2394,7 +2439,7 @@ def __dbfHeader(self) -> None: ) # insert deletion field at start - self.fields.insert(0, FieldData("DeletionFlag", "C", 1, 0)) + self.fields.insert(0, FieldData("DeletionFlag", FieldType.C, 1, 0)) # store all field positions for easy lookups # note: fieldLookup gives the index position of a field inside Reader.fields @@ -2892,10 +2937,10 @@ def __bbox(self, s: Shape) -> BBox: y: list[float] = [] if self._bbox: - x.append(self._bbox[0]) - y.append(self._bbox[1]) - x.append(self._bbox[2]) - y.append(self._bbox[3]) + x.append(self._bbox.xmin) + y.append(self._bbox.ymin) + x.append(self._bbox.xmax) + y.append(self._bbox.ymax) if len(s.points) > 0: px, py = list(zip(*s.points))[:2] @@ -2909,7 +2954,7 @@ def __bbox(self, s: Shape) -> BBox: "Cannot create bbox. Expected a valid shape with at least one point. " f"Got a shape of type '{s.shapeType}' and 0 points." ) - self._bbox = (min(x), min(y), max(x), max(y)) + self._bbox = BBox(xmin=min(x), ymin=min(y), xmax=max(x), ymax=max(y)) return self._bbox def __zbox(self, s) -> ZBox: @@ -2927,14 +2972,14 @@ def __zbox(self, s) -> ZBox: # Original self._zbox bounds (if any) are the first two entries. # Set zbox for the first, and all later times - self._zbox = (min(z), max(z)) + self._zbox = ZBox(zmin=min(z), zmax=max(z)) return self._zbox def __mbox(self, s) -> MBox: mpos = 3 if s.shapeType in _HasZ._shapeTypes else 2 m: list[float] = [] if self._mbox: - m.extend(self._mbox) + m.extend(m_bound for m_bound in self._mbox if m_bound is not None) for p in s.points: try: @@ -2951,7 +2996,7 @@ def __mbox(self, s) -> MBox: # Original self._mbox bounds (if any) are the first two entries. # Set mbox for the first, and all later times - self._mbox = (min(m), max(m)) + self._mbox = MBox(mmin=min(m), mmax=max(m)) return self._mbox @property @@ -3003,7 +3048,7 @@ def __shapefileHeader( # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. - bbox = (0, 0, 0, 0) + bbox = BBox(0, 0, 0, 0) f.write(pack("<4d", *bbox)) except error: raise ShapefileException( @@ -3017,20 +3062,20 @@ def __shapefileHeader( zbox = self.zbox() if zbox is None: # means we have empty shapefile/only null geoms (see commentary on bbox above) - zbox = (0, 0) + zbox = ZBox(0, 0) else: # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s - zbox = (0, 0) + zbox = ZBox(0, 0) # Measure if self.shapeType in {POINTM, POINTZ} | _HasM._shapeTypes: # M values are present in M or Z type mbox = self.mbox() if mbox is None: # means we have empty shapefile/only null geoms (see commentary on bbox above) - mbox = (0, 0) + mbox = MBox(0, 0) else: # As per the ESRI shapefile spec, the mbox for non-M type shapefiles are set to 0s - mbox = (0, 0) + mbox = MBox(0, 0) # Try writing try: f.write(pack("<4d", zbox[0], zbox[1], mbox[0], mbox[1])) @@ -3078,7 +3123,7 @@ def __dbfHeader(self) -> None: encoded_name = name.encode(self.encoding, self.encodingErrors) encoded_name = encoded_name.replace(b" ", b"_") encoded_name = encoded_name[:10].ljust(11).replace(b" ", b"\x00") - encodedFieldType = fieldType.encode("ascii") + encodedFieldType = fieldType.name.encode("ascii") fld = pack("<11sc4xBB14x", encoded_name, encodedFieldType, size, decimal) f.write(fld) # Terminator @@ -3225,18 +3270,26 @@ def record( @staticmethod def _dbf_missing_placeholder( - value: RecordValue, fieldType: FieldTypeCode, size: int + value: RecordValue, fieldType: FieldType, size: int ) -> str: - if fieldType in {"N", "F"}: + if fieldType in {FieldType.N, FieldType.F}: return "*" * size # QGIS NULL - if fieldType == "D": + if fieldType is FieldType.D: return "0" * 8 # QGIS NULL for date type - if fieldType == "L": + if fieldType is FieldType.L: return " " return str(value) + @overload + @staticmethod + def _try_coerce_to_numeric_str(value: date, size: int, deci: int) -> Never: ... + @overload @staticmethod - def _try_coerce_to_numeric_str(value: RecordValue, size: int, deci: int) -> str: + def _try_coerce_to_numeric_str( + value: RecordValueNotDate, size: int, deci: int + ) -> str: ... + @staticmethod + def _try_coerce_to_numeric_str(value, size, deci): # numeric or float: number stored as a string, # right justified, and padded with blanks # to the width of the field. @@ -3247,17 +3300,20 @@ def _try_coerce_to_numeric_str(value: RecordValue, size: int, deci: int) -> str: # forcing a large int to float and back to int # will lose information and result in wrong nr. int_val = int(value) - except ValueError: + except (ValueError, TypeError): # forcing directly to int failed, so was probably a float. int_val = int(float(value)) + except TypeError: + raise ShapefileException(f"Could not form int from: {value}") + # length capped to the field size + return format(int_val, "d")[:size].rjust(size) - str_val = format(int_val, "d") - else: + try: f_val = float(value) - str_val = format(f_val, f".{deci}f") - - # caps the size if exceeds the field size - return str_val[:size].rjust(size) + except ValueError: + raise ShapefileException(f"Could not form float from: {value}") + # length capped to the field size + return format(f_val, f".{deci}f")[:size].rjust(size) @staticmethod def _try_coerce_to_date_str(value: RecordValue) -> str: @@ -3299,20 +3355,18 @@ def __dbfRecord(self, record: list[RecordValue]) -> None: ) # ignore deletionflag field in case it was specified for (fieldName, fieldType, size, deci), value in zip(fields, record): # write - fieldType = FIELD_TYPE_CODES.get(fieldType.upper(), fieldType) size = int(size) str_val: str if value in MISSING: str_val = self._dbf_missing_placeholder(value, fieldType, size) - if fieldType in {"N", "F"}: + elif fieldType in {FieldType.N, FieldType.F}: str_val = self._try_coerce_to_numeric_str(value, size, deci) - elif fieldType == "D": + elif fieldType is FieldType.D: str_val = self._try_coerce_to_date_str(value) - elif fieldType == "L": + elif fieldType is FieldType.L: str_val = self._try_coerce_to_logical_str(value) else: - # if isinstance(value, bytes): decoded_val = value.decode(self.encoding, self.encodingErrors) else: @@ -3489,29 +3543,17 @@ def field( # Types of args should match *FieldData self, name: str, - fieldType: FieldTypeCode = "C", + fieldType: Union[str, FieldType] = FieldType.C, size: int = 50, decimal: int = 0, ) -> None: """Adds a dbf field descriptor to the shapefile.""" - if fieldType == "D": - size = 8 - decimal = 0 - elif fieldType == "L": - size = 1 - decimal = 0 - elif fieldType not in {"C", "N", "F", "M"}: - raise ShapefileException( - "fieldType must be C,N,F,M,L or D. " f"Got: {fieldType=}. " - ) if len(self.fields) >= 2046: raise ShapefileException( "Shapefile Writer reached maximum number of fields: 2046." ) - # A doctest in README.md used to pass in a string ('40') for size, so - # try to be robust for incorrect types. - fieldType = FIELD_TYPE_CODES.get(str(fieldType)[0].upper(), fieldType) - self.fields.append(FieldData(str(name), fieldType, int(size), int(decimal))) + + self.fields.append(FieldData.from_unchecked(name, fieldType, size, decimal)) # Begin Testing From 4355660699de0c2fc1a12ecd3a4374d39d6010bd Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 31 Jul 2025 15:30:07 +0100 Subject: [PATCH 176/220] Remove unused imports and dedupe except TypeError: --- src/shapefile.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 92fe3a74..3e89bc68 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -38,14 +38,13 @@ TypedDict, TypeVar, Union, - cast, overload, ) from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen -from typing_extensions import Never, NotRequired, Self, TypeIs, Unpack +from typing_extensions import Never, NotRequired, Self, TypeIs # Create named logger logger = logging.getLogger(__name__) @@ -3119,12 +3118,17 @@ def __dbfHeader(self) -> None: f.write(header) # Field descriptors for field in fields: - name, fieldType, size, decimal = field - encoded_name = name.encode(self.encoding, self.encodingErrors) + encoded_name = field.name.encode(self.encoding, self.encodingErrors) encoded_name = encoded_name.replace(b" ", b"_") encoded_name = encoded_name[:10].ljust(11).replace(b" ", b"\x00") - encodedFieldType = fieldType.name.encode("ascii") - fld = pack("<11sc4xBB14x", encoded_name, encodedFieldType, size, decimal) + encodedFieldType = field.fieldType.name.encode("ascii") + fld = pack( + "<11sc4xBB14x", + encoded_name, + encodedFieldType, + field.size, + field.decimal, + ) f.write(fld) # Terminator f.write(b"\r") @@ -3300,7 +3304,7 @@ def _try_coerce_to_numeric_str(value, size, deci): # forcing a large int to float and back to int # will lose information and result in wrong nr. int_val = int(value) - except (ValueError, TypeError): + except ValueError: # forcing directly to int failed, so was probably a float. int_val = int(float(value)) except TypeError: From 0c7dfc3be7ba2534d77baf7f63045077e836a095 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 31 Jul 2025 20:36:37 +0100 Subject: [PATCH 177/220] Define briefer Field.__repr__. Delete u() --- src/shapefile.py | 110 +++++++++++++++++++++-------------------------- 1 file changed, 49 insertions(+), 61 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 3e89bc68..0c470072 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -177,15 +177,16 @@ class FieldType(enum.Enum): # Use an ascii-encoded byte of the name, to save a decoding step. C = "Character" # (str) D = "Date" - F = "Float" + F = "Floating point" L = "Logical" # (bool) M = "Memo" # Legacy. (10 digit str, starting block in an .dbt file) N = "Numeric" # (int) -class FieldData(NamedTuple): +# Use functional syntax to have an attribute named type, a Python keyword +class Field(NamedTuple): name: str - fieldType: FieldType + field_type: FieldType size: int decimal: int @@ -193,29 +194,34 @@ class FieldData(NamedTuple): def from_unchecked( cls, name: str, - fieldType: Union[str, FieldType] = FieldType.C, + field_type: Union[str, FieldType] = FieldType.C, size: int = 50, decimal: int = 0, ) -> Self: - if isinstance(fieldType, str): - try: - fieldType = FieldType[fieldType.upper()] - except: + if isinstance(field_type, str): + if field_type.upper() in FieldType.__members__: + field_type = FieldType[field_type.upper()] + else: raise ShapefileException( - "fieldType must be C,N,F,M,L,D or a FieldType enum member. " - f"Got: {fieldType=}. " + "type must be C,D,F,L,M,N, or a FieldType enum member. " + f"Got: {field_type=}. " ) - if fieldType is FieldType.D: + if field_type is FieldType.D: size = 8 decimal = 0 - elif fieldType is FieldType.L: + elif field_type is FieldType.L: size = 1 decimal = 0 # A doctest in README.md previously passed in a string ('40') for size, # so explictly convert name to str, and size and decimal to ints. - return cls(str(name), fieldType, int(size), int(decimal)) + return cls( + name=str(name), field_type=field_type, size=int(size), decimal=int(decimal) + ) + + def __repr__(self) -> str: + return f'Field(name="{self.name}", field_type=FieldType.{self.field_type.name}, size={self.size}, decimal={self.decimal})' RecordValueNotDate = Union[bool, int, float, str, date] @@ -327,22 +333,6 @@ class GeoJSONFeatureCollectionWithBBox(GeoJSONFeatureCollection): unpack_2_int32_be = Struct(">2i").unpack -def u( - v: Union[str, bytes], encoding: str = "utf-8", encodingErrors: str = "strict" -) -> str: - if isinstance(v, bytes): - # For python 3 decode bytes to str. - return v.decode(encoding, encodingErrors) - if isinstance(v, str): - # Already str. - return v - if v is None: - # Since we're dealing with text, interpret None as "" - return "" - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) - - @overload def fsdecode_if_pathlike(path: os.PathLike) -> str: ... @overload @@ -1818,7 +1808,7 @@ def __init__( self.shpLength: Optional[int] = None self.numRecords: Optional[int] = None self.numShapes: Optional[int] = None - self.fields: list[FieldData] = [] + self.fields: list[Field] = [] self.__dbfHdrLength = 0 self.__fieldLookup: dict[str, int] = {} self.encoding = encoding @@ -2430,7 +2420,7 @@ def __dbfHeader(self) -> None: field_type = FieldType[encoded_type_char.decode("ascii").upper()] - self.fields.append(FieldData(name, field_type, size, decimal)) + self.fields.append(Field(name, field_type, size, decimal)) terminator = dbf.read(1) if terminator != b"\r": raise ShapefileException( @@ -2438,7 +2428,7 @@ def __dbfHeader(self) -> None: ) # insert deletion field at start - self.fields.insert(0, FieldData("DeletionFlag", FieldType.C, 1, 0)) + self.fields.insert(0, Field("DeletionFlag", FieldType.C, 1, 0)) # store all field positions for easy lookups # note: fieldLookup gives the index position of a field inside Reader.fields @@ -2479,7 +2469,7 @@ def __recordFmt(self, fields: Optional[Container[str]] = None) -> tuple[str, int def __recordFields( self, fields: Optional[Iterable[str]] = None - ) -> tuple[list[FieldData], dict[str, int], Struct]: + ) -> tuple[list[Field], dict[str, int], Struct]: """Returns the necessary info required to unpack a record's fields, restricted to a subset of fieldnames 'fields' if specified. Returns a list of field info tuples, a name-index lookup dict, @@ -2514,13 +2504,13 @@ def __recordFields( def __record( self, - fieldTuples: list[FieldData], + fieldTuples: list[Field], recLookup: dict[str, int], recStruct: Struct, oid: Optional[int] = None, ) -> Optional[_Record]: """Reads and returns a dbf record row as a list of values. Requires specifying - a list of field info FieldData namedtuples 'fieldTuples', a record name-index dict 'recLookup', + a list of field info Field namedtuples 'fieldTuples', a record name-index dict 'recLookup', and a Struct instance 'recStruct' for unpacking these fields. """ f = self.__getFileObj(self.dbf) @@ -2547,14 +2537,14 @@ def __record( # parse each value record = [] - for (__name, typ, __size, deci), value in zip(fieldTuples, recordContents): + for (__name, typ, __size, decimal), value in zip(fieldTuples, recordContents): if typ in {"N", "F"}: # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b"\0")[0] value = value.replace(b"*", b"") # QGIS NULL is all '*' chars if value == b"": value = None - elif deci: + elif decimal: try: value = float(value) except ValueError: @@ -2590,7 +2580,7 @@ def __record( y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8]) value = date(y, m, d) except (TypeError, ValueError): - # if invalid date, just return as unicode string so user can decide + # if invalid date, just return as unicode string so user can decimalde value = str(value.strip()) elif typ == "L": # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. @@ -2781,7 +2771,7 @@ def __init__( ): self.target = target self.autoBalance = autoBalance - self.fields: list[FieldData] = [] + self.fields: list[Field] = [] self.shapeType = shapeType self.shp: Optional[WriteSeekableBinStream] = None self.shx: Optional[WriteSeekableBinStream] = None @@ -3121,7 +3111,7 @@ def __dbfHeader(self) -> None: encoded_name = field.name.encode(self.encoding, self.encodingErrors) encoded_name = encoded_name.replace(b" ", b"_") encoded_name = encoded_name[:10].ljust(11).replace(b" ", b"\x00") - encodedFieldType = field.fieldType.name.encode("ascii") + encodedFieldType = field.field_type.name.encode("ascii") fld = pack( "<11sc4xBB14x", encoded_name, @@ -3273,31 +3263,29 @@ def record( self.__dbfRecord(record) @staticmethod - def _dbf_missing_placeholder( - value: RecordValue, fieldType: FieldType, size: int - ) -> str: - if fieldType in {FieldType.N, FieldType.F}: + def _dbf_missing_placeholder(value: RecordValue, type: FieldType, size: int) -> str: + if type in {FieldType.N, FieldType.F}: return "*" * size # QGIS NULL - if fieldType is FieldType.D: + if type is FieldType.D: return "0" * 8 # QGIS NULL for date type - if fieldType is FieldType.L: + if type is FieldType.L: return " " return str(value) @overload @staticmethod - def _try_coerce_to_numeric_str(value: date, size: int, deci: int) -> Never: ... + def _try_coerce_to_numeric_str(value: date, size: int, decimal: int) -> Never: ... @overload @staticmethod def _try_coerce_to_numeric_str( - value: RecordValueNotDate, size: int, deci: int + value: RecordValueNotDate, size: int, decimal: int ) -> str: ... @staticmethod - def _try_coerce_to_numeric_str(value, size, deci): + def _try_coerce_to_numeric_str(value, size, decimal): # numeric or float: number stored as a string, # right justified, and padded with blanks # to the width of the field. - if not deci: + if not decimal: # force to int try: # first try to force directly to int. @@ -3317,7 +3305,7 @@ def _try_coerce_to_numeric_str(value, size, deci): except ValueError: raise ShapefileException(f"Could not form float from: {value}") # length capped to the field size - return format(f_val, f".{deci}f")[:size].rjust(size) + return format(f_val, f".{decimal}f")[:size].rjust(size) @staticmethod def _try_coerce_to_date_str(value: RecordValue) -> str: @@ -3357,18 +3345,18 @@ def __dbfRecord(self, record: list[RecordValue]) -> None: fields = ( field for field in self.fields if field[0] != "DeletionFlag" ) # ignore deletionflag field in case it was specified - for (fieldName, fieldType, size, deci), value in zip(fields, record): + for (fieldName, type, size, decimal), value in zip(fields, record): # write size = int(size) str_val: str if value in MISSING: - str_val = self._dbf_missing_placeholder(value, fieldType, size) - elif fieldType in {FieldType.N, FieldType.F}: - str_val = self._try_coerce_to_numeric_str(value, size, deci) - elif fieldType is FieldType.D: + str_val = self._dbf_missing_placeholder(value, type, size) + elif type in {FieldType.N, FieldType.F}: + str_val = self._try_coerce_to_numeric_str(value, size, decimal) + elif type is FieldType.D: str_val = self._try_coerce_to_date_str(value) - elif fieldType is FieldType.L: + elif type is FieldType.L: str_val = self._try_coerce_to_logical_str(value) else: if isinstance(value, bytes): @@ -3544,10 +3532,10 @@ def _shapeparts( self.shape(polyShape) def field( - # Types of args should match *FieldData + # Types of args should match *Field self, name: str, - fieldType: Union[str, FieldType] = FieldType.C, + field_type: Union[str, FieldType] = FieldType.C, size: int = 50, decimal: int = 0, ) -> None: @@ -3556,8 +3544,8 @@ def field( raise ShapefileException( "Shapefile Writer reached maximum number of fields: 2046." ) - - self.fields.append(FieldData.from_unchecked(name, fieldType, size, decimal)) + field_ = Field.from_unchecked(name, field_type, size, decimal) + self.fields.append(field_) # Begin Testing From 5f3f0d27ab8a97aff38c3f47b86684a8f8f8c441 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 31 Jul 2025 20:42:54 +0100 Subject: [PATCH 178/220] Don't shadow builtin type --- src/shapefile.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 0c470072..29fc76e2 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3263,12 +3263,14 @@ def record( self.__dbfRecord(record) @staticmethod - def _dbf_missing_placeholder(value: RecordValue, type: FieldType, size: int) -> str: - if type in {FieldType.N, FieldType.F}: + def _dbf_missing_placeholder( + value: RecordValue, field_type: FieldType, size: int + ) -> str: + if field_type in {FieldType.N, FieldType.F}: return "*" * size # QGIS NULL - if type is FieldType.D: + if field_type is FieldType.D: return "0" * 8 # QGIS NULL for date type - if type is FieldType.L: + if field_type is FieldType.L: return " " return str(value) @@ -3345,18 +3347,18 @@ def __dbfRecord(self, record: list[RecordValue]) -> None: fields = ( field for field in self.fields if field[0] != "DeletionFlag" ) # ignore deletionflag field in case it was specified - for (fieldName, type, size, decimal), value in zip(fields, record): + for (fieldName, type_, size, decimal), value in zip(fields, record): # write size = int(size) str_val: str if value in MISSING: - str_val = self._dbf_missing_placeholder(value, type, size) - elif type in {FieldType.N, FieldType.F}: + str_val = self._dbf_missing_placeholder(value, type_, size) + elif type_ in {FieldType.N, FieldType.F}: str_val = self._try_coerce_to_numeric_str(value, size, decimal) - elif type is FieldType.D: + elif type_ is FieldType.D: str_val = self._try_coerce_to_date_str(value) - elif type is FieldType.L: + elif type_ is FieldType.L: str_val = self._try_coerce_to_logical_str(value) else: if isinstance(value, bytes): From 87a1ff53bb022d54e0123b2c407cacaa098055ce Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 31 Jul 2025 21:32:17 +0100 Subject: [PATCH 179/220] Passes doctests (adjusted to reflect new API: Field, BBox, ZBox & MBox) --- README.md | 45 +++++++++++++++++---------------------------- src/shapefile.py | 32 ++++++++++++++++---------------- 2 files changed, 33 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index a0e8f448..80f04314 100644 --- a/README.md +++ b/README.md @@ -406,7 +406,7 @@ and the bounding box area the shapefile covers: >>> len(sf) 663 >>> sf.bbox - (-122.515048, 37.652916, -122.327622, 37.863433) + BBox(xmin=-122.515048, ymin=37.652916, xmax=-122.327622, ymax=37.863433) Finally, if you would prefer to work with the entire shapefile in a different format, you can convert all of it to a GeoJSON dictionary, although you may lose @@ -553,45 +553,34 @@ in the shp geometry file and the dbf attribute file. The field names of a shapefile are available as soon as you read a shapefile. You can call the "fields" attribute of the shapefile as a Python list. Each -field is a Python tuple with the following information: +field is a Python namedtuple (Field) with the following information: - * Field name: the name describing the data at this column index. - * Field type: the type of data at this column index. Types can be: + * name: the name describing the data at this column index (a string). + * field_type: a FieldType enum member determining the type of data at this column index. Names can be: * "C": Characters, text. * "N": Numbers, with or without decimals. * "F": Floats (same as "N"). * "L": Logical, for boolean True/False values. * "D": Dates. * "M": Memo, has no meaning within a GIS and is part of the xbase spec instead. - * Field length: the length of the data found at this column index. Older GIS + * size: Field length: the length of the data found at this column index. Older GIS software may truncate this length to 8 or 11 characters for "Character" fields. - * Decimal length: the number of decimal places found in "Number" fields. + * deci: Decimal length. The number of decimal places found in "Number" fields. + +A new field can be created directly from the type enum member etc., or as follows: + + >>> shapefile.Field.from_unchecked("Population", "N", 10,0) + Field(name="Population", field_type=FieldType.N, size=10, decimal=0) + +Using this method the conversion from string to enum is done automatically. To see the fields for the Reader object above (sf) call the "fields" attribute: - >>> fields = sf.fields - - >>> assert fields == [("DeletionFlag", "C", 1, 0), ("AREA", "N", 18, 5), - ... ("BKG_KEY", "C", 12, 0), ("POP1990", "N", 9, 0), ("POP90_SQMI", "N", 10, 1), - ... ("HOUSEHOLDS", "N", 9, 0), - ... ("MALES", "N", 9, 0), ("FEMALES", "N", 9, 0), ("WHITE", "N", 9, 0), - ... ("BLACK", "N", 8, 0), ("AMERI_ES", "N", 7, 0), ("ASIAN_PI", "N", 8, 0), - ... ("OTHER", "N", 8, 0), ("HISPANIC", "N", 8, 0), ("AGE_UNDER5", "N", 8, 0), - ... ("AGE_5_17", "N", 8, 0), ("AGE_18_29", "N", 8, 0), ("AGE_30_49", "N", 8, 0), - ... ("AGE_50_64", "N", 8, 0), ("AGE_65_UP", "N", 8, 0), - ... ("NEVERMARRY", "N", 8, 0), ("MARRIED", "N", 9, 0), ("SEPARATED", "N", 7, 0), - ... ("WIDOWED", "N", 8, 0), ("DIVORCED", "N", 8, 0), ("HSEHLD_1_M", "N", 8, 0), - ... ("HSEHLD_1_F", "N", 8, 0), ("MARHH_CHD", "N", 8, 0), - ... ("MARHH_NO_C", "N", 8, 0), ("MHH_CHILD", "N", 7, 0), - ... ("FHH_CHILD", "N", 7, 0), ("HSE_UNITS", "N", 9, 0), ("VACANT", "N", 7, 0), - ... ("OWNER_OCC", "N", 8, 0), ("RENTER_OCC", "N", 8, 0), - ... ("MEDIAN_VAL", "N", 7, 0), ("MEDIANRENT", "N", 4, 0), - ... ("UNITS_1DET", "N", 8, 0), ("UNITS_1ATT", "N", 7, 0), ("UNITS2", "N", 7, 0), - ... ("UNITS3_9", "N", 8, 0), ("UNITS10_49", "N", 8, 0), - ... ("UNITS50_UP", "N", 8, 0), ("MOBILEHOME", "N", 7, 0)] + >>> sf.fields + [Field(name="DeletionFlag", field_type=FieldType.C, size=1, decimal=0), Field(name="AREA", field_type=FieldType.N, size=18, decimal=5), Field(name="BKG_KEY", field_type=FieldType.C, size=12, decimal=0), Field(name="POP1990", field_type=FieldType.N, size=9, decimal=0), Field(name="POP90_SQMI", field_type=FieldType.N, size=10, decimal=1), Field(name="HOUSEHOLDS", field_type=FieldType.N, size=9, decimal=0), Field(name="MALES", field_type=FieldType.N, size=9, decimal=0), Field(name="FEMALES", field_type=FieldType.N, size=9, decimal=0), Field(name="WHITE", field_type=FieldType.N, size=9, decimal=0), Field(name="BLACK", field_type=FieldType.N, size=8, decimal=0), Field(name="AMERI_ES", field_type=FieldType.N, size=7, decimal=0), Field(name="ASIAN_PI", field_type=FieldType.N, size=8, decimal=0), Field(name="OTHER", field_type=FieldType.N, size=8, decimal=0), Field(name="HISPANIC", field_type=FieldType.N, size=8, decimal=0), Field(name="AGE_UNDER5", field_type=FieldType.N, size=8, decimal=0), Field(name="AGE_5_17", field_type=FieldType.N, size=8, decimal=0), Field(name="AGE_18_29", field_type=FieldType.N, size=8, decimal=0), Field(name="AGE_30_49", field_type=FieldType.N, size=8, decimal=0), Field(name="AGE_50_64", field_type=FieldType.N, size=8, decimal=0), Field(name="AGE_65_UP", field_type=FieldType.N, size=8, decimal=0), Field(name="NEVERMARRY", field_type=FieldType.N, size=8, decimal=0), Field(name="MARRIED", field_type=FieldType.N, size=9, decimal=0), Field(name="SEPARATED", field_type=FieldType.N, size=7, decimal=0), Field(name="WIDOWED", field_type=FieldType.N, size=8, decimal=0), Field(name="DIVORCED", field_type=FieldType.N, size=8, decimal=0), Field(name="HSEHLD_1_M", field_type=FieldType.N, size=8, decimal=0), Field(name="HSEHLD_1_F", field_type=FieldType.N, size=8, decimal=0), Field(name="MARHH_CHD", field_type=FieldType.N, size=8, decimal=0), Field(name="MARHH_NO_C", field_type=FieldType.N, size=8, decimal=0), Field(name="MHH_CHILD", field_type=FieldType.N, size=7, decimal=0), Field(name="FHH_CHILD", field_type=FieldType.N, size=7, decimal=0), Field(name="HSE_UNITS", field_type=FieldType.N, size=9, decimal=0), Field(name="VACANT", field_type=FieldType.N, size=7, decimal=0), Field(name="OWNER_OCC", field_type=FieldType.N, size=8, decimal=0), Field(name="RENTER_OCC", field_type=FieldType.N, size=8, decimal=0), Field(name="MEDIAN_VAL", field_type=FieldType.N, size=7, decimal=0), Field(name="MEDIANRENT", field_type=FieldType.N, size=4, decimal=0), Field(name="UNITS_1DET", field_type=FieldType.N, size=8, decimal=0), Field(name="UNITS_1ATT", field_type=FieldType.N, size=7, decimal=0), Field(name="UNITS2", field_type=FieldType.N, size=7, decimal=0), Field(name="UNITS3_9", field_type=FieldType.N, size=8, decimal=0), Field(name="UNITS10_49", field_type=FieldType.N, size=8, decimal=0), Field(name="UNITS50_UP", field_type=FieldType.N, size=8, decimal=0), Field(name="MOBILEHOME", field_type=FieldType.N, size=7, decimal=0)] The first field of a dbf file is always a 1-byte field called "DeletionFlag", which indicates records that have been deleted but not removed. However, @@ -1375,7 +1364,7 @@ Shapefiles containing M-values can be examined in several ways: >>> r = shapefile.Reader('shapefiles/test/linem') >>> r.mbox # the lower and upper bound of M-values in the shapefile - (0.0, 3.0) + MBox(mmin=0.0, mmax=3.0) >>> r.shape(0).m # flat list of M-values [0.0, None, 3.0, None, 0.0, None, None] @@ -1408,7 +1397,7 @@ To examine a Z-type shapefile you can do: >>> r = shapefile.Reader('shapefiles/test/linez') >>> r.zbox # the lower and upper bound of Z-values in the shapefile - (0.0, 22.0) + ZBox(zmin=0.0, zmax=22.0) >>> r.shape(0).z # flat list of Z-values [18.0, 20.0, 22.0, 0.0, 0.0, 0.0, 0.0, 15.0, 13.0, 14.0] diff --git a/src/shapefile.py b/src/shapefile.py index 29fc76e2..f002bddc 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -327,7 +327,7 @@ class GeoJSONFeatureCollectionWithBBox(GeoJSONFeatureCollection): # Helpers -MISSING = {None, ""} +MISSING = (None, "") # Don't make a set, as user input may not be Hashable NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. unpack_2_int32_be = Struct(">2i").unpack @@ -2538,7 +2538,7 @@ def __record( # parse each value record = [] for (__name, typ, __size, decimal), value in zip(fieldTuples, recordContents): - if typ in {"N", "F"}: + if typ in {FieldType.N, FieldType.F}: # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b"\0")[0] value = value.replace(b"*", b"") # QGIS NULL is all '*' chars @@ -2564,7 +2564,7 @@ def __record( except ValueError: # not parseable as int, set to None value = None - elif typ == "D": + elif typ is FieldType.D: # date: 8 bytes - date stored as a string in the format YYYYMMDD. if ( not value.replace(b"\x00", b"") @@ -2582,7 +2582,7 @@ def __record( except (TypeError, ValueError): # if invalid date, just return as unicode string so user can decimalde value = str(value.strip()) - elif typ == "L": + elif typ is FieldType.L: # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. if value == b" ": value = None # space means missing or not yet set @@ -3225,7 +3225,7 @@ def __shxRecord(self, offset: int, length: int) -> None: def record( self, - *recordList: list[RecordValue], + *recordList: RecordValue, **recordDict: RecordValue, ) -> None: """Creates a dbf attribute record. You can submit either a sequence of @@ -3241,7 +3241,7 @@ def record( record: list[RecordValue] fieldCount = sum(1 for field in self.fields if field[0] != "DeletionFlag") if recordList: - record = list(*recordList) + record = list(recordList) while len(record) < fieldCount: record.append("") elif recordDict: @@ -3272,7 +3272,7 @@ def _dbf_missing_placeholder( return "0" * 8 # QGIS NULL for date type if field_type is FieldType.L: return " " - return str(value) + return str(value)[:size].ljust(size) @overload @staticmethod @@ -3362,20 +3362,20 @@ def __dbfRecord(self, record: list[RecordValue]) -> None: str_val = self._try_coerce_to_logical_str(value) else: if isinstance(value, bytes): - decoded_val = value.decode(self.encoding, self.encodingErrors) + str_val = value.decode(self.encoding, self.encodingErrors) else: # anything else is forced to string. - decoded_val = str(value) - # Truncate to the length of the field - str_val = decoded_val[:size].ljust(size) + str_val = str(value) - # should be default ascii encoding - encoded_val = str_val.encode("ascii", self.encodingErrors) + # Truncate or right pad to the length of the field + encoded_val = str_val.encode(self.encoding, self.encodingErrors)[ + :size + ].ljust(size) if len(encoded_val) != size: raise ShapefileException( - "Shapefile Writer unable to pack incorrect sized value" - f" (size {len(encoded_val)}) into field '{fieldName}' (size {size})." + f"Shapefile Writer unable to pack incorrect sized {value=!r} " + f"(size {len(encoded_val)}) into field '{fieldName}' (size {size})." ) f.write(encoded_val) @@ -3674,7 +3674,7 @@ def _test(args: list[str] = sys.argv[1:], verbosity: bool = False) -> int: new_url = _replace_remote_url(old_url) example.source = example.source.replace(old_url, new_url) - runner = doctest.DocTestRunner(verbose=verbosity) + runner = doctest.DocTestRunner(verbose=verbosity, optionflags=doctest.FAIL_FAST) if verbosity == 0: print(f"Running {len(tests.examples)} doctests...") From 5dc6257258d758ffb8045b1c66cf7f147b1e0ee1 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 31 Jul 2025 21:35:06 +0100 Subject: [PATCH 180/220] Adjust unit test to allow for new field type enum --- test_shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_shapefile.py b/test_shapefile.py index 2a10d3ee..a2ffbff6 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -695,7 +695,7 @@ def test_reader_fields(): field = fields[0] assert isinstance(field[0], str) # field name - assert field[1] in ["C", "N", "F", "L", "D", "M"] # field type + assert field[1].name in ["C", "N", "F", "L", "D", "M"] # field type assert isinstance(field[2], int) # field length assert isinstance(field[3], int) # decimal length From 435dd3a610aaefa2eb44beaba1bd3a8695e16e0b Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 31 Jul 2025 21:46:58 +0100 Subject: [PATCH 181/220] Update docs --- README.md | 28 ++++++++++++++++++++++++++-- changelog.txt | 15 +++++++++++++-- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 80f04314..1fb969f9 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,8 @@ The Python Shapefile Library (PyShp) reads and writes ESRI Shapefiles in pure Py - **Author**: [Joel Lawhead](https://github.com/GeospatialPython) - **Maintainers**: [Karim Bahgat](https://github.com/karimbahgat) -- **Version**: 2.3.1 -- **Date**: 28 July, 2022 +- **Version**: 3.0.0-alpha +- **Date**: 31 July, 2025 - **License**: [MIT](https://github.com/GeospatialPython/pyshp/blob/master/LICENSE.TXT) ## Contents @@ -93,6 +93,30 @@ part of your geospatial project. # Version Changes +## 3.0.0-alpha + +### Breaking Changes: +- Python 2 and Python 3.8 support dropped. +- Field info tuple is now a namedtuple (Field) instead of a list. +- Field type codes are now FieldType enum members. +- bbox, mbox and zbox attributes are all new Namedtuples. +- Writer does not mutate shapes. +- New custom subclasses for each shape type: Null, Multipatch, Point, Polyline, + Multipoint, and Polygon, plus the latter 4's M and Z variants (Reader and + Writer are still compatible with their base class, Shape, as before). +- Shape sub classes are creatable from, and serializable to bytes streams, + as per the shapefile spec. + +### Code quality +- Statically typed, and checked with Mypy +- Checked with Ruff. +- f-strings +- Remove Python 2 specific functions. +- Run doctests against wheels. +- Testing of wheels before publishing them +- pyproject.toml src layout +- Slow test marked. + ## 2.4.0 ### Breaking Change. Support for Python 2 and Pythons <= 3.8 to be dropped. diff --git a/changelog.txt b/changelog.txt index 48a534a6..d2c5f510 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,9 +1,20 @@ VERSION 3.0.0-alpha -Python 2 and Python 3.8 support dropped + Breaking Changes: + * Python 2 and Python 3.8 support dropped. + * Field info tuple is now a namedtuple (Field) instead of a list. + * Field type codes are now FieldType enum members. + * bbox, mbox and zbox attributes are all new Namedtuples. + * Writer does not mutate shapes. + * New custom subclasses for each shape type: Null, Multipatch, Point, Polyline, + Multipoint, and Polygon, plus the latter 4's M and Z variants (Reader and + Writer are still compatible with their base class, Shape, as before). + * Shape sub classes are creatable from, and serializable to bytes streams, + as per the shapefile spec. -2025-07-22 Code quality + * Statically typed and checked with Mypy + * Checked with Ruff. * Type hints * f-strings * Remove Python 2 specific functions. From c9203040c8c7455f6818d1dc41b239a33d010cea Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 31 Jul 2025 21:50:09 +0100 Subject: [PATCH 182/220] Trim trailing whitespace --- README.md | 6 +++--- changelog.txt | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 1fb969f9..cd4758ca 100644 --- a/README.md +++ b/README.md @@ -101,10 +101,10 @@ part of your geospatial project. - Field type codes are now FieldType enum members. - bbox, mbox and zbox attributes are all new Namedtuples. - Writer does not mutate shapes. -- New custom subclasses for each shape type: Null, Multipatch, Point, Polyline, - Multipoint, and Polygon, plus the latter 4's M and Z variants (Reader and +- New custom subclasses for each shape type: Null, Multipatch, Point, Polyline, + Multipoint, and Polygon, plus the latter 4's M and Z variants (Reader and Writer are still compatible with their base class, Shape, as before). -- Shape sub classes are creatable from, and serializable to bytes streams, +- Shape sub classes are creatable from, and serializable to bytes streams, as per the shapefile spec. ### Code quality diff --git a/changelog.txt b/changelog.txt index d2c5f510..45bfd76a 100644 --- a/changelog.txt +++ b/changelog.txt @@ -6,10 +6,10 @@ VERSION 3.0.0-alpha * Field type codes are now FieldType enum members. * bbox, mbox and zbox attributes are all new Namedtuples. * Writer does not mutate shapes. - * New custom subclasses for each shape type: Null, Multipatch, Point, Polyline, - Multipoint, and Polygon, plus the latter 4's M and Z variants (Reader and + * New custom subclasses for each shape type: Null, Multipatch, Point, Polyline, + Multipoint, and Polygon, plus the latter 4's M and Z variants (Reader and Writer are still compatible with their base class, Shape, as before). - * Shape sub classes are creatable from, and serializable to bytes streams, + * Shape sub classes are creatable from, and serializable to bytes streams, as per the shapefile spec. Code quality From 556c7cb4ebc587d95e52929be7504b8bf9415f6f Mon Sep 17 00:00:00 2001 From: Mike Taves Date: Thu, 31 Jul 2025 13:00:18 +1200 Subject: [PATCH 183/220] Upgrade pre-commit repos and add ruff-check; add "dev" optional --- .pre-commit-config.yaml | 10 +++++++--- pyproject.toml | 1 + run_benchmarks.py | 2 +- src/shapefile.py | 4 ++-- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3849c557..1ef33138 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,16 +1,20 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.4 + rev: v0.12.7 hooks: + # Run the linter + - id: ruff-check + args: [ --fix ] + # Run the formatter - id: ruff-format - repo: https://github.com/pycqa/isort - rev: 5.13.2 + rev: 6.0.1 hooks: - id: isort name: isort (python) args: ["--profile", "black"] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.3.0 + rev: v5.0.0 hooks: - id: check-yaml - id: trailing-whitespace diff --git a/pyproject.toml b/pyproject.toml index ca8f667c..7d043ae5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ dependencies = [ ] [project.optional-dependencies] +dev = ["pyshp[test]", "pre-commit", "ruff"] test = ["pytest"] [project.urls] diff --git a/run_benchmarks.py b/run_benchmarks.py index edc2119a..ac31e1cb 100644 --- a/run_benchmarks.py +++ b/run_benchmarks.py @@ -41,7 +41,7 @@ def benchmark( time_taken = timeit.timeit(func, number=run_count) print("\b" * len(placeholder), end="") time_suffix = " s" - print(f"{time_taken:{col_widths[1]-len(time_suffix)}.3g}{time_suffix}", end="") + print(f"{time_taken:{col_widths[1] - len(time_suffix)}.3g}{time_suffix}", end="") print() return time_taken diff --git a/src/shapefile.py b/src/shapefile.py index f002bddc..65ffce23 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1896,7 +1896,7 @@ def __init__( # Close and delete the temporary zipfile try: zipfileobj.close() - except: # pylint: disable=bare-except + except: # pylint: disable=bare-except # noqa: E722 pass # Try to load shapefile if self.shp or self.dbf: @@ -3155,7 +3155,7 @@ def __shpRecord(self, s: Shape) -> tuple[int, int]: # Shape Type if self.shapeType is None and s.shapeType != NULL: self.shapeType = s.shapeType - if not s.shapeType in {NULL, self.shapeType}: + if s.shapeType not in {NULL, self.shapeType}: raise ShapefileException( f"The shape's type ({s.shapeType}) must match " f"the type of the shapefile ({self.shapeType})." From 1942a590b4ede949a12d4e8fcd76966a3dc4a105 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 12:17:37 +0100 Subject: [PATCH 184/220] replace == POINTZ and in {} with compatible_with(s, Point[ZM]) --- src/shapefile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 65ffce23..1179cbc0 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1218,11 +1218,11 @@ def write_to_byte_stream( n = Point._write_x_y_to_byte_stream(b_io, x, y, i) # Write a single Z value - if s.shapeType == POINTZ: + if compatible_with(s, PointZ): n += PointZ._write_single_point_z_to_byte_stream(b_io, s, i) # Write a single M value - if s.shapeType in {POINTM, POINTZ}: + if compatible_with(s, PointM): n += PointM._write_single_point_m_to_byte_stream(b_io, s, i) return n From a460c1ca9a2cb735bc690fae5a2eed7f29116b1e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 12:24:08 +0100 Subject: [PATCH 185/220] REmove sets of two enum members with identity checked against both --- src/shapefile.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 1179cbc0..0694b0e3 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2538,7 +2538,7 @@ def __record( # parse each value record = [] for (__name, typ, __size, decimal), value in zip(fieldTuples, recordContents): - if typ in {FieldType.N, FieldType.F}: + if typ is FieldType.N or typ is FieldType.F: # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b"\0")[0] value = value.replace(b"*", b"") # QGIS NULL is all '*' chars @@ -3046,7 +3046,7 @@ def __shapefileHeader( else: f.write(pack("<4d", 0, 0, 0, 0)) # Elevation - if self.shapeType in {POINTZ} | _HasZ._shapeTypes: + if self.shapeType in PointZ._shapeTypes | _HasZ._shapeTypes: # Z values are present in Z type zbox = self.zbox() if zbox is None: @@ -3056,7 +3056,7 @@ def __shapefileHeader( # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s zbox = ZBox(0, 0) # Measure - if self.shapeType in {POINTM, POINTZ} | _HasM._shapeTypes: + if self.shapeType in PointM._shapeTypes | _HasM._shapeTypes: # M values are present in M or Z type mbox = self.mbox() if mbox is None: @@ -3155,7 +3155,7 @@ def __shpRecord(self, s: Shape) -> tuple[int, int]: # Shape Type if self.shapeType is None and s.shapeType != NULL: self.shapeType = s.shapeType - if s.shapeType not in {NULL, self.shapeType}: + if s.shapeType != NULL and s.shapeType != self.shapeType: raise ShapefileException( f"The shape's type ({s.shapeType}) must match " f"the type of the shapefile ({self.shapeType})." @@ -3166,11 +3166,11 @@ def __shpRecord(self, s: Shape) -> tuple[int, int]: new_bbox = self.__bbox(s) if s.shapeType != NULL else None new_mbox = ( self.__mbox(s) - if s.shapeType in {POINTM, POINTZ} | _HasM._shapeTypes + if s.shapeType in _PointM._shapeTypes | _HasM._shapeTypes else None ) new_zbox = ( - self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None + self.__zbox(s) if s.shapeType in _PointZ._shapeTypes | _HasZ._shapeTypes else None ) # Create an in-memory binary buffer to avoid @@ -3266,7 +3266,7 @@ def record( def _dbf_missing_placeholder( value: RecordValue, field_type: FieldType, size: int ) -> str: - if field_type in {FieldType.N, FieldType.F}: + if field_type is FieldType.N or field_type is FieldType.F: return "*" * size # QGIS NULL if field_type is FieldType.D: return "0" * 8 # QGIS NULL for date type @@ -3354,7 +3354,7 @@ def __dbfRecord(self, record: list[RecordValue]) -> None: if value in MISSING: str_val = self._dbf_missing_placeholder(value, type_, size) - elif type_ in {FieldType.N, FieldType.F}: + elif type_ is FieldType.N or type_ is FieldType.F: str_val = self._try_coerce_to_numeric_str(value, size, decimal) elif type_ is FieldType.D: str_val = self._try_coerce_to_date_str(value) From d0a0d5607b5ddb21cec47228b7427dc0ce5c3f9b Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 12:41:57 +0100 Subject: [PATCH 186/220] Correct variable names and reformat --- src/shapefile.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 0694b0e3..2d3a440d 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3166,11 +3166,13 @@ def __shpRecord(self, s: Shape) -> tuple[int, int]: new_bbox = self.__bbox(s) if s.shapeType != NULL else None new_mbox = ( self.__mbox(s) - if s.shapeType in _PointM._shapeTypes | _HasM._shapeTypes + if s.shapeType in PointM._shapeTypes | _HasM._shapeTypes else None ) new_zbox = ( - self.__zbox(s) if s.shapeType in _PointZ._shapeTypes | _HasZ._shapeTypes else None + self.__zbox(s) + if s.shapeType in PointZ._shapeTypes | _HasZ._shapeTypes + else None ) # Create an in-memory binary buffer to avoid From 9be9b0245b3456062407c300b329bcf398643097 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 12:44:03 +0100 Subject: [PATCH 187/220] Reinstate a 2-tuple --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 2d3a440d..7119d394 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3155,7 +3155,7 @@ def __shpRecord(self, s: Shape) -> tuple[int, int]: # Shape Type if self.shapeType is None and s.shapeType != NULL: self.shapeType = s.shapeType - if s.shapeType != NULL and s.shapeType != self.shapeType: + if s.shapeType not in (NULL, self.shapeType): raise ShapefileException( f"The shape's type ({s.shapeType}) must match " f"the type of the shapefile ({self.shapeType})." From 6bc458057a81eeb558ab41482e05c172b3275bc3 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 14:25:28 +0100 Subject: [PATCH 188/220] Enum free since 1st August 2025 --- src/shapefile.py | 63 ++++++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 7119d394..53360a76 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -12,7 +12,6 @@ import array import doctest -import enum import io import logging import os @@ -26,7 +25,9 @@ IO, Any, Container, + Final, Generic, + Hashable, Iterable, Iterator, Literal, @@ -38,6 +39,7 @@ TypedDict, TypeVar, Union, + cast, overload, ) from urllib.error import HTTPError @@ -171,22 +173,34 @@ def read(self, size: int = -1): ... BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO, WriteSeekableBinStream] +FieldTypeT = Literal["C", "D", "F", "L", "M", "N"] + # https://en.wikipedia.org/wiki/.dbf#Database_records -class FieldType(enum.Enum): - # Use an ascii-encoded byte of the name, to save a decoding step. - C = "Character" # (str) - D = "Date" - F = "Floating point" - L = "Logical" # (bool) - M = "Memo" # Legacy. (10 digit str, starting block in an .dbt file) - N = "Numeric" # (int) +class FieldType: + """A bare bones 'enum', as the enum library noticeably slows performance.""" + + # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__"] + + C: Final = "C" # Character" # (str) + D: Final = "D" # "Date" + F: Final = "F" # "Floating point" + L: Final = "L" # "Logical" # (bool) + M: Final = "M" # "Memo" # Legacy. (10 digit str, starting block in an .dbt file) + N: Final = "N" # "Numeric" # (int) + __members__ = {"C", "D", "F", "L", "M", "N"} # set(__slots__) - {"__members__"} + + def raise_if_invalid(field_type: Hashable): + if field_type not in FieldType.__members__: + raise ShapefileException( + f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " + ) # Use functional syntax to have an attribute named type, a Python keyword class Field(NamedTuple): name: str - field_type: FieldType + field_type: FieldTypeT size: int decimal: int @@ -194,18 +208,12 @@ class Field(NamedTuple): def from_unchecked( cls, name: str, - field_type: Union[str, FieldType] = FieldType.C, + field_type: FieldTypeT = "C", size: int = 50, decimal: int = 0, ) -> Self: - if isinstance(field_type, str): - if field_type.upper() in FieldType.__members__: - field_type = FieldType[field_type.upper()] - else: - raise ShapefileException( - "type must be C,D,F,L,M,N, or a FieldType enum member. " - f"Got: {field_type=}. " - ) + field_type = cast(FieldTypeT, field_type.upper()) + FieldType.raise_if_invalid(field_type) if field_type is FieldType.D: size = 8 @@ -221,7 +229,7 @@ def from_unchecked( ) def __repr__(self) -> str: - return f'Field(name="{self.name}", field_type=FieldType.{self.field_type.name}, size={self.size}, decimal={self.decimal})' + return f'Field(name="{self.name}", field_type=FieldType.{self.field_type}, size={self.size}, decimal={self.decimal})' RecordValueNotDate = Union[bool, int, float, str, date] @@ -1977,7 +1985,7 @@ def __seek_0_on_file_obj_wrap_or_open_from_name( if hasattr(file_, "read"): # Copy if required try: - file_.seek(0) # type: ignore + file_.seek(0) return file_ except (NameError, io.UnsupportedOperation): return io.BytesIO(file_.read()) @@ -2418,7 +2426,8 @@ def __dbfHeader(self) -> None: name = encoded_name.decode(self.encoding, self.encodingErrors) name = name.lstrip() - field_type = FieldType[encoded_type_char.decode("ascii").upper()] + field_type = cast(FieldTypeT, encoded_type_char.decode("ascii").upper()) + FieldType.raise_if_invalid(field_type) self.fields.append(Field(name, field_type, size, decimal)) terminator = dbf.read(1) @@ -2632,7 +2641,9 @@ def records(self, fields: Optional[list[str]] = None) -> list[_Record]: f = self.__getFileObj(self.dbf) f.seek(self.__dbfHdrLength) fieldTuples, recLookup, recStruct = self.__recordFields(fields) - for i in range(self.numRecords): # type: ignore + # self.__dbfHeader() sets self.numRecords, so it's fine to cast it to int + # (to tell mypy it's not None). + for i in range(cast(int, self.numRecords)): r = self.__record( oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct ) @@ -3111,7 +3122,7 @@ def __dbfHeader(self) -> None: encoded_name = field.name.encode(self.encoding, self.encodingErrors) encoded_name = encoded_name.replace(b" ", b"_") encoded_name = encoded_name[:10].ljust(11).replace(b" ", b"\x00") - encodedFieldType = field.field_type.name.encode("ascii") + encodedFieldType = field.field_type.encode("ascii") fld = pack( "<11sc4xBB14x", encoded_name, @@ -3266,7 +3277,7 @@ def record( @staticmethod def _dbf_missing_placeholder( - value: RecordValue, field_type: FieldType, size: int + value: RecordValue, field_type: FieldTypeT, size: int ) -> str: if field_type is FieldType.N or field_type is FieldType.F: return "*" * size # QGIS NULL @@ -3539,7 +3550,7 @@ def field( # Types of args should match *Field self, name: str, - field_type: Union[str, FieldType] = FieldType.C, + field_type: FieldTypeT = "C", size: int = 50, decimal: int = 0, ) -> None: From 17a89c4fae943382327eec219bb1e753d7ff0497 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 15:00:10 +0100 Subject: [PATCH 189/220] Use equality instead of identity tests with single character strings. --- src/shapefile.py | 34 ++++++++++++++++++++-------------- test_shapefile.py | 2 +- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 53360a76..c11da9f7 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -19,6 +19,7 @@ import tempfile import time import zipfile +from collections.abc import Hashable from datetime import date from struct import Struct, calcsize, error, pack, unpack from typing import ( @@ -27,7 +28,6 @@ Container, Final, Generic, - Hashable, Iterable, Iterator, Literal, @@ -180,7 +180,7 @@ def read(self, size: int = -1): ... class FieldType: """A bare bones 'enum', as the enum library noticeably slows performance.""" - # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__"] + # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__", "raise_if_invalid", "is_numeric"] C: Final = "C" # Character" # (str) D: Final = "D" # "Date" @@ -190,12 +190,17 @@ class FieldType: N: Final = "N" # "Numeric" # (int) __members__ = {"C", "D", "F", "L", "M", "N"} # set(__slots__) - {"__members__"} + @staticmethod def raise_if_invalid(field_type: Hashable): if field_type not in FieldType.__members__: raise ShapefileException( f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " ) + @classmethod + def is_numeric(cls, member: FieldTypeT): + return member in (cls.F, cls.N) + # Use functional syntax to have an attribute named type, a Python keyword class Field(NamedTuple): @@ -215,10 +220,10 @@ def from_unchecked( field_type = cast(FieldTypeT, field_type.upper()) FieldType.raise_if_invalid(field_type) - if field_type is FieldType.D: + if field_type == FieldType.D: size = 8 decimal = 0 - elif field_type is FieldType.L: + elif field_type == FieldType.L: size = 1 decimal = 0 @@ -2426,7 +2431,8 @@ def __dbfHeader(self) -> None: name = encoded_name.decode(self.encoding, self.encodingErrors) name = name.lstrip() - field_type = cast(FieldTypeT, encoded_type_char.decode("ascii").upper()) + decoded_type_char = encoded_type_char.upper().decode("ascii") + field_type: FieldTypeT = getattr(FieldType, decoded_type_char) FieldType.raise_if_invalid(field_type) self.fields.append(Field(name, field_type, size, decimal)) @@ -2547,7 +2553,7 @@ def __record( # parse each value record = [] for (__name, typ, __size, decimal), value in zip(fieldTuples, recordContents): - if typ is FieldType.N or typ is FieldType.F: + if FieldType.is_numeric(typ): # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b"\0")[0] value = value.replace(b"*", b"") # QGIS NULL is all '*' chars @@ -2573,7 +2579,7 @@ def __record( except ValueError: # not parseable as int, set to None value = None - elif typ is FieldType.D: + elif typ == FieldType.D: # date: 8 bytes - date stored as a string in the format YYYYMMDD. if ( not value.replace(b"\x00", b"") @@ -2591,7 +2597,7 @@ def __record( except (TypeError, ValueError): # if invalid date, just return as unicode string so user can decimalde value = str(value.strip()) - elif typ is FieldType.L: + elif typ == FieldType.L: # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. if value == b" ": value = None # space means missing or not yet set @@ -3279,11 +3285,11 @@ def record( def _dbf_missing_placeholder( value: RecordValue, field_type: FieldTypeT, size: int ) -> str: - if field_type is FieldType.N or field_type is FieldType.F: + if FieldType.is_numeric(field_type): return "*" * size # QGIS NULL - if field_type is FieldType.D: + if field_type == FieldType.D: return "0" * 8 # QGIS NULL for date type - if field_type is FieldType.L: + if field_type == FieldType.L: return " " return str(value)[:size].ljust(size) @@ -3367,11 +3373,11 @@ def __dbfRecord(self, record: list[RecordValue]) -> None: if value in MISSING: str_val = self._dbf_missing_placeholder(value, type_, size) - elif type_ is FieldType.N or type_ is FieldType.F: + elif FieldType.is_numeric(type_): str_val = self._try_coerce_to_numeric_str(value, size, decimal) - elif type_ is FieldType.D: + elif type_ == FieldType.D: str_val = self._try_coerce_to_date_str(value) - elif type_ is FieldType.L: + elif type_ == FieldType.L: str_val = self._try_coerce_to_logical_str(value) else: if isinstance(value, bytes): diff --git a/test_shapefile.py b/test_shapefile.py index a2ffbff6..2a10d3ee 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -695,7 +695,7 @@ def test_reader_fields(): field = fields[0] assert isinstance(field[0], str) # field name - assert field[1].name in ["C", "N", "F", "L", "D", "M"] # field type + assert field[1] in ["C", "N", "F", "L", "D", "M"] # field type assert isinstance(field[2], int) # field length assert isinstance(field[3], int) # decimal length From 4846dd8daf5b86114444b1f8d46eabd7fd57594f Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 15:36:27 +0100 Subject: [PATCH 190/220] Use a single FieldType mapping --- src/shapefile.py | 50 +++++++++++++++++++++++++++++++---------------- test_shapefile.py | 2 +- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 53360a76..f4bef2e1 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -27,7 +27,6 @@ Container, Final, Generic, - Hashable, Iterable, Iterator, Literal, @@ -182,19 +181,34 @@ class FieldType: # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__"] - C: Final = "C" # Character" # (str) + C: Final = "C" # "Character" # (str) D: Final = "D" # "Date" F: Final = "F" # "Floating point" L: Final = "L" # "Logical" # (bool) M: Final = "M" # "Memo" # Legacy. (10 digit str, starting block in an .dbt file) N: Final = "N" # "Numeric" # (int) - __members__ = {"C", "D", "F", "L", "M", "N"} # set(__slots__) - {"__members__"} - - def raise_if_invalid(field_type: Hashable): - if field_type not in FieldType.__members__: - raise ShapefileException( - f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " - ) + __members__: set[FieldTypeT] = { + "C", + "D", + "F", + "L", + "M", + "N", + } # set(__slots__) - {"__members__"} + + # def raise_if_invalid(field_type: Hashable): + # if field_type not in FieldType.__members__: + # raise ShapefileException( + # f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " + # ) + + +FIELD_TYPE_ALIASES: dict[Union[str, bytes], FieldTypeT] = {} +for c in FieldType.__members__: + FIELD_TYPE_ALIASES[c.upper()] = c + FIELD_TYPE_ALIASES[c.lower()] = c + FIELD_TYPE_ALIASES[c.encode("ascii").lower()] = c + FIELD_TYPE_ALIASES[c.encode("ascii").upper()] = c # Use functional syntax to have an attribute named type, a Python keyword @@ -208,24 +222,27 @@ class Field(NamedTuple): def from_unchecked( cls, name: str, - field_type: FieldTypeT = "C", + field_type: Union[str, bytes, FieldTypeT] = "C", size: int = 50, decimal: int = 0, ) -> Self: - field_type = cast(FieldTypeT, field_type.upper()) - FieldType.raise_if_invalid(field_type) + if field_type not in FIELD_TYPE_ALIASES: + raise ShapefileException( + f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " + ) + type_ = FIELD_TYPE_ALIASES[field_type] - if field_type is FieldType.D: + if type_ is FieldType.D: size = 8 decimal = 0 - elif field_type is FieldType.L: + elif type_ is FieldType.L: size = 1 decimal = 0 # A doctest in README.md previously passed in a string ('40') for size, # so explictly convert name to str, and size and decimal to ints. return cls( - name=str(name), field_type=field_type, size=int(size), decimal=int(decimal) + name=str(name), field_type=type_, size=int(size), decimal=int(decimal) ) def __repr__(self) -> str: @@ -2426,8 +2443,7 @@ def __dbfHeader(self) -> None: name = encoded_name.decode(self.encoding, self.encodingErrors) name = name.lstrip() - field_type = cast(FieldTypeT, encoded_type_char.decode("ascii").upper()) - FieldType.raise_if_invalid(field_type) + field_type = FIELD_TYPE_ALIASES[encoded_type_char] self.fields.append(Field(name, field_type, size, decimal)) terminator = dbf.read(1) diff --git a/test_shapefile.py b/test_shapefile.py index a2ffbff6..2a10d3ee 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -695,7 +695,7 @@ def test_reader_fields(): field = fields[0] assert isinstance(field[0], str) # field name - assert field[1].name in ["C", "N", "F", "L", "D", "M"] # field type + assert field[1] in ["C", "N", "F", "L", "D", "M"] # field type assert isinstance(field[2], int) # field length assert isinstance(field[3], int) # decimal length From 9dd687c68a5e0f93414f2bbc6c5fce5cc015c5ae Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 16:32:55 +0100 Subject: [PATCH 191/220] Ditch BBox, ZBox and MBox namedtuples --- src/shapefile.py | 79 ++++++++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 32 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index f4bef2e1..4c3aa80d 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -123,22 +123,25 @@ PointT = Union[Point2D, PointMT, PointZT] PointsT = list[PointT] +BBox = tuple[float, float, float, float] +MBox = tuple[float, float] +ZBox = tuple[float, float] -class BBox(NamedTuple): - xmin: float - ymin: float - xmax: float - ymax: float +# class BBox(NamedTuple): +# xmin: float +# ymin: float +# xmax: float +# ymax: float -class MBox(NamedTuple): - mmin: Optional[float] - mmax: Optional[float] +# class MBox(NamedTuple): +# mmin: Optional[float] +# mmax: Optional[float] -class ZBox(NamedTuple): - zmin: float - zmax: float +# class ZBox(NamedTuple): +# zmin: float +# zmax: float class WriteableBinStream(Protocol): @@ -415,8 +418,9 @@ def rewind(coords: Reversible[PointT]) -> PointsT: def ring_bbox(coords: PointsT) -> BBox: """Calculates and returns the bounding box of a ring.""" xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values - bbox = BBox(xmin=min(xs), ymin=min(ys), xmax=max(xs), ymax=max(ys)) - return bbox + # bbox = BBox(xmin=min(xs), ymin=min(ys), xmax=max(xs), ymax=max(ys)) + # return bbox + return min(xs), min(ys), max(xs), max(ys) def bbox_overlap(bbox1: BBox, bbox2: BBox) -> bool: @@ -998,7 +1002,7 @@ class _CanHaveBBox(Shape): bbox: Optional[BBox] = None def _get_set_bbox_from_byte_stream(self, b_io: ReadableBinStream) -> BBox: - self.bbox: BBox = BBox(*_Array[float]("d", unpack("<4d", b_io.read(32)))) + self.bbox: BBox = tuple(*_Array[float]("d", unpack("<4d", b_io.read(32)))) return self.bbox @staticmethod @@ -1218,7 +1222,7 @@ def from_byte_stream( if bbox is not None: # create bounding box for Point by duplicating coordinates # skip shape if no overlap with bounding box - if not bbox_overlap(bbox, BBox(x, y, x, y)): + if not bbox_overlap(bbox, (x, y, x, y)): return None shape.points = [(x, y)] @@ -2233,18 +2237,21 @@ def __shpHeader(self) -> None: shp.seek(32) self.shapeType = unpack("= NODATA else None for m_bound in unpack("<2d", shp.read(16)) ] - self.mbox = MBox(mmin=m_bounds[0], mmax=m_bounds[1]) + # self.mbox = MBox(mmin=m_bounds[0], mmax=m_bounds[1]) + self.mbox = (m_bounds[0], m_bounds[1]) def __shape( self, oid: Optional[int] = None, bbox: Optional[BBox] = None @@ -2953,10 +2960,10 @@ def __bbox(self, s: Shape) -> BBox: y: list[float] = [] if self._bbox: - x.append(self._bbox.xmin) - y.append(self._bbox.ymin) - x.append(self._bbox.xmax) - y.append(self._bbox.ymax) + x.append(self._bbox[0]) + y.append(self._bbox.[1]) + x.append(self._bbox.[2]) + y.append(self._bbox.[3]) if len(s.points) > 0: px, py = list(zip(*s.points))[:2] @@ -2970,7 +2977,8 @@ def __bbox(self, s: Shape) -> BBox: "Cannot create bbox. Expected a valid shape with at least one point. " f"Got a shape of type '{s.shapeType}' and 0 points." ) - self._bbox = BBox(xmin=min(x), ymin=min(y), xmax=max(x), ymax=max(y)) + # self._bbox = BBox(xmin=min(x), ymin=min(y), xmax=max(x), ymax=max(y)) + self._bbox = (min(x), min(y), max(x), max(y)) return self._bbox def __zbox(self, s) -> ZBox: @@ -2988,7 +2996,8 @@ def __zbox(self, s) -> ZBox: # Original self._zbox bounds (if any) are the first two entries. # Set zbox for the first, and all later times - self._zbox = ZBox(zmin=min(z), zmax=max(z)) + # self._zbox = ZBox(zmin=min(z), zmax=max(z)) + self._zbox = (min(z), max(z)) return self._zbox def __mbox(self, s) -> MBox: @@ -3012,7 +3021,8 @@ def __mbox(self, s) -> MBox: # Original self._mbox bounds (if any) are the first two entries. # Set mbox for the first, and all later times - self._mbox = MBox(mmin=min(m), mmax=max(m)) + # self._mbox = MBox(mmin=min(m), mmax=max(m)) + self._mbox = (min(m), max(m)) return self._mbox @property @@ -3064,7 +3074,8 @@ def __shapefileHeader( # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. - bbox = BBox(0, 0, 0, 0) + # bbox = BBox(0, 0, 0, 0) + bbox = (0, 0, 0, 0) f.write(pack("<4d", *bbox)) except error: raise ShapefileException( @@ -3078,20 +3089,24 @@ def __shapefileHeader( zbox = self.zbox() if zbox is None: # means we have empty shapefile/only null geoms (see commentary on bbox above) - zbox = ZBox(0, 0) + # zbox = ZBox(0, 0) + zbox = (0, 0) else: # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s - zbox = ZBox(0, 0) + # zbox = ZBox(0, 0) + zbox = (0, 0) # Measure if self.shapeType in PointM._shapeTypes | _HasM._shapeTypes: # M values are present in M or Z type mbox = self.mbox() if mbox is None: # means we have empty shapefile/only null geoms (see commentary on bbox above) - mbox = MBox(0, 0) + # mbox = MBox(0, 0) + mbox = (0, 0) else: # As per the ESRI shapefile spec, the mbox for non-M type shapefiles are set to 0s - mbox = MBox(0, 0) + # mbox = MBox(0, 0) + mbox = (0, 0) # Try writing try: f.write(pack("<4d", zbox[0], zbox[1], mbox[0], mbox[1])) From dc4511a145f788c4b9a0322e428c7326eeaab38e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 16:34:54 +0100 Subject: [PATCH 192/220] Remove errant dot --- src/shapefile.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 4c3aa80d..54f7c212 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2961,9 +2961,9 @@ def __bbox(self, s: Shape) -> BBox: if self._bbox: x.append(self._bbox[0]) - y.append(self._bbox.[1]) - x.append(self._bbox.[2]) - y.append(self._bbox.[3]) + y.append(self._bbox[1]) + x.append(self._bbox[2]) + y.append(self._bbox[3]) if len(s.points) > 0: px, py = list(zip(*s.points))[:2] From 60265b609f3a30f6cc7071823adfdc4797820c7a Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 16:37:13 +0100 Subject: [PATCH 193/220] Remove errant * --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 54f7c212..83b54e0f 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1002,7 +1002,7 @@ class _CanHaveBBox(Shape): bbox: Optional[BBox] = None def _get_set_bbox_from_byte_stream(self, b_io: ReadableBinStream) -> BBox: - self.bbox: BBox = tuple(*_Array[float]("d", unpack("<4d", b_io.read(32)))) + self.bbox: BBox = tuple(_Array[float]("d", unpack("<4d", b_io.read(32)))) return self.bbox @staticmethod From 3e286003fbfae77fd13d9a3187d61972cc3f3537 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:02:30 +0100 Subject: [PATCH 194/220] Fix type checking. Lose unneccessary conversions to _Array --- src/shapefile.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 83b54e0f..1ddd4920 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -124,7 +124,7 @@ PointsT = list[PointT] BBox = tuple[float, float, float, float] -MBox = tuple[float, float] +MBox = tuple[Optional[float], Optional[float]] ZBox = tuple[float, float] # class BBox(NamedTuple): @@ -1002,7 +1002,7 @@ class _CanHaveBBox(Shape): bbox: Optional[BBox] = None def _get_set_bbox_from_byte_stream(self, b_io: ReadableBinStream) -> BBox: - self.bbox: BBox = tuple(_Array[float]("d", unpack("<4d", b_io.read(32)))) + self.bbox: BBox = unpack("<4d", b_io.read(32)) return self.bbox @staticmethod @@ -1192,7 +1192,7 @@ def _set_single_point_m_from_byte_stream( @staticmethod def _x_y_from_byte_stream(b_io: ReadableBinStream): # Unpack _Array too - x, y = _Array[float]("d", unpack("<2d", b_io.read(16))) + x, y = unpack("<2d", b_io.read(16)) # Convert to tuple return x, y @@ -1298,7 +1298,7 @@ def _set_ms_from_byte_stream( # Measure values less than -10e38 are nodata values according to the spec if next_shape - b_io.tell() >= nPoints * 8: self.m = [] - for m in _Array[float]("d", unpack(f"<{nPoints}d", b_io.read(nPoints * 8))): + for m in unpack(f"<{nPoints}d", b_io.read(nPoints * 8)): if m > NODATA: self.m.append(m) else: @@ -2237,11 +2237,13 @@ def __shpHeader(self) -> None: shp.seek(32) self.shapeType = unpack(" None: for m_bound in unpack("<2d", shp.read(16)) ] # self.mbox = MBox(mmin=m_bounds[0], mmax=m_bounds[1]) - self.mbox = (m_bounds[0], m_bounds[1]) + self.mbox: MBox = (m_bounds[0], m_bounds[1]) def __shape( self, oid: Optional[int] = None, bbox: Optional[BBox] = None From 10703eef413cf4358e3b0b44db545eb77502397d Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:18:27 +0100 Subject: [PATCH 195/220] Get rid of the Faux-enum Don't rely on FIELD_TYPE_ALIASES --- src/shapefile.py | 103 ++++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 51 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 1ddd4920..e66313ff 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -175,43 +175,43 @@ def read(self, size: int = -1): ... BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO, WriteSeekableBinStream] +# https://en.wikipedia.org/wiki/.dbf#Database_records FieldTypeT = Literal["C", "D", "F", "L", "M", "N"] -# https://en.wikipedia.org/wiki/.dbf#Database_records -class FieldType: - """A bare bones 'enum', as the enum library noticeably slows performance.""" - - # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__"] - - C: Final = "C" # "Character" # (str) - D: Final = "D" # "Date" - F: Final = "F" # "Floating point" - L: Final = "L" # "Logical" # (bool) - M: Final = "M" # "Memo" # Legacy. (10 digit str, starting block in an .dbt file) - N: Final = "N" # "Numeric" # (int) - __members__: set[FieldTypeT] = { - "C", - "D", - "F", - "L", - "M", - "N", - } # set(__slots__) - {"__members__"} - - # def raise_if_invalid(field_type: Hashable): - # if field_type not in FieldType.__members__: - # raise ShapefileException( - # f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " - # ) - - -FIELD_TYPE_ALIASES: dict[Union[str, bytes], FieldTypeT] = {} -for c in FieldType.__members__: - FIELD_TYPE_ALIASES[c.upper()] = c - FIELD_TYPE_ALIASES[c.lower()] = c - FIELD_TYPE_ALIASES[c.encode("ascii").lower()] = c - FIELD_TYPE_ALIASES[c.encode("ascii").upper()] = c +# class FieldType: +# """A bare bones 'enum', as the enum library noticeably slows performance.""" + +# # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__"] + +# C: Final = "C" # "Character" # (str) +# D: Final = "D" # "Date" +# F: Final = "F" # "Floating point" +# L: Final = "L" # "Logical" # (bool) +# M: Final = "M" # "Memo" # Legacy. (10 digit str, starting block in an .dbt file) +# N: Final = "N" # "Numeric" # (int) +# __members__: set[FieldTypeT] = { +# "C", +# "D", +# "F", +# "L", +# "M", +# "N", +# } # set(__slots__) - {"__members__"} + +# # def raise_if_invalid(field_type: Hashable): +# # if field_type not in FieldType.__members__: +# # raise ShapefileException( +# # f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " +# # ) + +FIELD_TYPE_ALIASES = dict.fromkeys("CDFLMN") +# FIELD_TYPE_ALIASES: dict[Union[str, bytes], FieldTypeT] = {} +# for c in FieldType.__members__: +# FIELD_TYPE_ALIASES[c.upper()] = c +# FIELD_TYPE_ALIASES[c.lower()] = c +# FIELD_TYPE_ALIASES[c.encode("ascii").lower()] = c +# FIELD_TYPE_ALIASES[c.encode("ascii").upper()] = c # Use functional syntax to have an attribute named type, a Python keyword @@ -231,25 +231,25 @@ def from_unchecked( ) -> Self: if field_type not in FIELD_TYPE_ALIASES: raise ShapefileException( - f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " + f"field_type must be in {FIELD_TYPE_ALIASES}. Got: {field_type=}. " ) - type_ = FIELD_TYPE_ALIASES[field_type] + # type_ = FIELD_TYPE_ALIASES[field_type] - if type_ is FieldType.D: + if field_type == "D": size = 8 decimal = 0 - elif type_ is FieldType.L: + elif field_type == "L": size = 1 decimal = 0 # A doctest in README.md previously passed in a string ('40') for size, # so explictly convert name to str, and size and decimal to ints. return cls( - name=str(name), field_type=type_, size=int(size), decimal=int(decimal) + name=str(name), field_type=field_type, size=int(size), decimal=int(decimal) ) def __repr__(self) -> str: - return f'Field(name="{self.name}", field_type=FieldType.{self.field_type}, size={self.size}, decimal={self.decimal})' + return f'Field(name="{self.name}", field_type="{self.field_type}", size={self.size}, decimal={self.decimal})' RecordValueNotDate = Union[bool, int, float, str, date] @@ -2452,7 +2452,8 @@ def __dbfHeader(self) -> None: name = encoded_name.decode(self.encoding, self.encodingErrors) name = name.lstrip() - field_type = FIELD_TYPE_ALIASES[encoded_type_char] + # field_type = FIELD_TYPE_ALIASES[encoded_type_char] + field_type = encoded_type_char.decode("ascii") self.fields.append(Field(name, field_type, size, decimal)) terminator = dbf.read(1) @@ -2462,7 +2463,7 @@ def __dbfHeader(self) -> None: ) # insert deletion field at start - self.fields.insert(0, Field("DeletionFlag", FieldType.C, 1, 0)) + self.fields.insert(0, Field("DeletionFlag", "C", 1, 0)) # store all field positions for easy lookups # note: fieldLookup gives the index position of a field inside Reader.fields @@ -2572,7 +2573,7 @@ def __record( # parse each value record = [] for (__name, typ, __size, decimal), value in zip(fieldTuples, recordContents): - if typ is FieldType.N or typ is FieldType.F: + if typ in ("N", "F"): #typ is FieldType.F: # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b"\0")[0] value = value.replace(b"*", b"") # QGIS NULL is all '*' chars @@ -2598,7 +2599,7 @@ def __record( except ValueError: # not parseable as int, set to None value = None - elif typ is FieldType.D: + elif typ == "D": # date: 8 bytes - date stored as a string in the format YYYYMMDD. if ( not value.replace(b"\x00", b"") @@ -2616,7 +2617,7 @@ def __record( except (TypeError, ValueError): # if invalid date, just return as unicode string so user can decimalde value = str(value.strip()) - elif typ is FieldType.L: + elif typ == "L": # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. if value == b" ": value = None # space means missing or not yet set @@ -3312,11 +3313,11 @@ def record( def _dbf_missing_placeholder( value: RecordValue, field_type: FieldTypeT, size: int ) -> str: - if field_type is FieldType.N or field_type is FieldType.F: + if field_type in ("N", "F"): #field_type is FieldType.F: return "*" * size # QGIS NULL - if field_type is FieldType.D: + if field_type == "D": return "0" * 8 # QGIS NULL for date type - if field_type is FieldType.L: + if field_type == "L": return " " return str(value)[:size].ljust(size) @@ -3400,11 +3401,11 @@ def __dbfRecord(self, record: list[RecordValue]) -> None: if value in MISSING: str_val = self._dbf_missing_placeholder(value, type_, size) - elif type_ is FieldType.N or type_ is FieldType.F: + elif type_ in ("N", "F"): #type_ is FieldType.F: str_val = self._try_coerce_to_numeric_str(value, size, decimal) - elif type_ is FieldType.D: + elif type_ == "D": str_val = self._try_coerce_to_date_str(value) - elif type_ is FieldType.L: + elif type_ == "L": str_val = self._try_coerce_to_logical_str(value) else: if isinstance(value, bytes): From 5a77db3d553ab6b941bae0a33cbb6945e2446d51 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:26:14 +0100 Subject: [PATCH 196/220] Revert "Get rid of the Faux-enum" This reverts commit 10703eef413cf4358e3b0b44db545eb77502397d. --- src/shapefile.py | 103 +++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 52 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index e66313ff..1ddd4920 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -175,43 +175,43 @@ def read(self, size: int = -1): ... BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO, WriteSeekableBinStream] -# https://en.wikipedia.org/wiki/.dbf#Database_records FieldTypeT = Literal["C", "D", "F", "L", "M", "N"] -# class FieldType: -# """A bare bones 'enum', as the enum library noticeably slows performance.""" - -# # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__"] - -# C: Final = "C" # "Character" # (str) -# D: Final = "D" # "Date" -# F: Final = "F" # "Floating point" -# L: Final = "L" # "Logical" # (bool) -# M: Final = "M" # "Memo" # Legacy. (10 digit str, starting block in an .dbt file) -# N: Final = "N" # "Numeric" # (int) -# __members__: set[FieldTypeT] = { -# "C", -# "D", -# "F", -# "L", -# "M", -# "N", -# } # set(__slots__) - {"__members__"} - -# # def raise_if_invalid(field_type: Hashable): -# # if field_type not in FieldType.__members__: -# # raise ShapefileException( -# # f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " -# # ) - -FIELD_TYPE_ALIASES = dict.fromkeys("CDFLMN") -# FIELD_TYPE_ALIASES: dict[Union[str, bytes], FieldTypeT] = {} -# for c in FieldType.__members__: -# FIELD_TYPE_ALIASES[c.upper()] = c -# FIELD_TYPE_ALIASES[c.lower()] = c -# FIELD_TYPE_ALIASES[c.encode("ascii").lower()] = c -# FIELD_TYPE_ALIASES[c.encode("ascii").upper()] = c +# https://en.wikipedia.org/wiki/.dbf#Database_records +class FieldType: + """A bare bones 'enum', as the enum library noticeably slows performance.""" + + # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__"] + + C: Final = "C" # "Character" # (str) + D: Final = "D" # "Date" + F: Final = "F" # "Floating point" + L: Final = "L" # "Logical" # (bool) + M: Final = "M" # "Memo" # Legacy. (10 digit str, starting block in an .dbt file) + N: Final = "N" # "Numeric" # (int) + __members__: set[FieldTypeT] = { + "C", + "D", + "F", + "L", + "M", + "N", + } # set(__slots__) - {"__members__"} + + # def raise_if_invalid(field_type: Hashable): + # if field_type not in FieldType.__members__: + # raise ShapefileException( + # f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " + # ) + + +FIELD_TYPE_ALIASES: dict[Union[str, bytes], FieldTypeT] = {} +for c in FieldType.__members__: + FIELD_TYPE_ALIASES[c.upper()] = c + FIELD_TYPE_ALIASES[c.lower()] = c + FIELD_TYPE_ALIASES[c.encode("ascii").lower()] = c + FIELD_TYPE_ALIASES[c.encode("ascii").upper()] = c # Use functional syntax to have an attribute named type, a Python keyword @@ -231,25 +231,25 @@ def from_unchecked( ) -> Self: if field_type not in FIELD_TYPE_ALIASES: raise ShapefileException( - f"field_type must be in {FIELD_TYPE_ALIASES}. Got: {field_type=}. " + f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " ) - # type_ = FIELD_TYPE_ALIASES[field_type] + type_ = FIELD_TYPE_ALIASES[field_type] - if field_type == "D": + if type_ is FieldType.D: size = 8 decimal = 0 - elif field_type == "L": + elif type_ is FieldType.L: size = 1 decimal = 0 # A doctest in README.md previously passed in a string ('40') for size, # so explictly convert name to str, and size and decimal to ints. return cls( - name=str(name), field_type=field_type, size=int(size), decimal=int(decimal) + name=str(name), field_type=type_, size=int(size), decimal=int(decimal) ) def __repr__(self) -> str: - return f'Field(name="{self.name}", field_type="{self.field_type}", size={self.size}, decimal={self.decimal})' + return f'Field(name="{self.name}", field_type=FieldType.{self.field_type}, size={self.size}, decimal={self.decimal})' RecordValueNotDate = Union[bool, int, float, str, date] @@ -2452,8 +2452,7 @@ def __dbfHeader(self) -> None: name = encoded_name.decode(self.encoding, self.encodingErrors) name = name.lstrip() - # field_type = FIELD_TYPE_ALIASES[encoded_type_char] - field_type = encoded_type_char.decode("ascii") + field_type = FIELD_TYPE_ALIASES[encoded_type_char] self.fields.append(Field(name, field_type, size, decimal)) terminator = dbf.read(1) @@ -2463,7 +2462,7 @@ def __dbfHeader(self) -> None: ) # insert deletion field at start - self.fields.insert(0, Field("DeletionFlag", "C", 1, 0)) + self.fields.insert(0, Field("DeletionFlag", FieldType.C, 1, 0)) # store all field positions for easy lookups # note: fieldLookup gives the index position of a field inside Reader.fields @@ -2573,7 +2572,7 @@ def __record( # parse each value record = [] for (__name, typ, __size, decimal), value in zip(fieldTuples, recordContents): - if typ in ("N", "F"): #typ is FieldType.F: + if typ is FieldType.N or typ is FieldType.F: # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b"\0")[0] value = value.replace(b"*", b"") # QGIS NULL is all '*' chars @@ -2599,7 +2598,7 @@ def __record( except ValueError: # not parseable as int, set to None value = None - elif typ == "D": + elif typ is FieldType.D: # date: 8 bytes - date stored as a string in the format YYYYMMDD. if ( not value.replace(b"\x00", b"") @@ -2617,7 +2616,7 @@ def __record( except (TypeError, ValueError): # if invalid date, just return as unicode string so user can decimalde value = str(value.strip()) - elif typ == "L": + elif typ is FieldType.L: # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. if value == b" ": value = None # space means missing or not yet set @@ -3313,11 +3312,11 @@ def record( def _dbf_missing_placeholder( value: RecordValue, field_type: FieldTypeT, size: int ) -> str: - if field_type in ("N", "F"): #field_type is FieldType.F: + if field_type is FieldType.N or field_type is FieldType.F: return "*" * size # QGIS NULL - if field_type == "D": + if field_type is FieldType.D: return "0" * 8 # QGIS NULL for date type - if field_type == "L": + if field_type is FieldType.L: return " " return str(value)[:size].ljust(size) @@ -3401,11 +3400,11 @@ def __dbfRecord(self, record: list[RecordValue]) -> None: if value in MISSING: str_val = self._dbf_missing_placeholder(value, type_, size) - elif type_ in ("N", "F"): #type_ is FieldType.F: + elif type_ is FieldType.N or type_ is FieldType.F: str_val = self._try_coerce_to_numeric_str(value, size, decimal) - elif type_ == "D": + elif type_ is FieldType.D: str_val = self._try_coerce_to_date_str(value) - elif type_ == "L": + elif type_ is FieldType.L: str_val = self._try_coerce_to_logical_str(value) else: if isinstance(value, bytes): From 8de848db0bcfae16316b7000d22892739670e23c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:33:35 +0100 Subject: [PATCH 197/220] Remove old namedtuple names from doctests --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index cd4758ca..66951be1 100644 --- a/README.md +++ b/README.md @@ -430,7 +430,7 @@ and the bounding box area the shapefile covers: >>> len(sf) 663 >>> sf.bbox - BBox(xmin=-122.515048, ymin=37.652916, xmax=-122.327622, ymax=37.863433) + (-122.515048, 37.652916, -122.327622, 37.863433) Finally, if you would prefer to work with the entire shapefile in a different format, you can convert all of it to a GeoJSON dictionary, although you may lose @@ -1388,7 +1388,7 @@ Shapefiles containing M-values can be examined in several ways: >>> r = shapefile.Reader('shapefiles/test/linem') >>> r.mbox # the lower and upper bound of M-values in the shapefile - MBox(mmin=0.0, mmax=3.0) + (0.0, 3.0) >>> r.shape(0).m # flat list of M-values [0.0, None, 3.0, None, 0.0, None, None] @@ -1421,7 +1421,7 @@ To examine a Z-type shapefile you can do: >>> r = shapefile.Reader('shapefiles/test/linez') >>> r.zbox # the lower and upper bound of Z-values in the shapefile - ZBox(zmin=0.0, zmax=22.0) + (0.0, 22.0) >>> r.shape(0).z # flat list of Z-values [18.0, 20.0, 22.0, 0.0, 0.0, 0.0, 0.0, 15.0, 13.0, 14.0] From 960a7b28fa048285d169110a3713214caa829d3b Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:41:58 +0100 Subject: [PATCH 198/220] Restore original numerical coercion code --- src/shapefile.py | 46 +++++++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 1ddd4920..90edcec1 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3333,27 +3333,47 @@ def _try_coerce_to_numeric_str(value, size, decimal): # numeric or float: number stored as a string, # right justified, and padded with blanks # to the width of the field. + if not decimal: # force to int try: # first try to force directly to int. # forcing a large int to float and back to int # will lose information and result in wrong nr. - int_val = int(value) + value = int(value) except ValueError: # forcing directly to int failed, so was probably a float. - int_val = int(float(value)) - except TypeError: - raise ShapefileException(f"Could not form int from: {value}") - # length capped to the field size - return format(int_val, "d")[:size].rjust(size) - - try: - f_val = float(value) - except ValueError: - raise ShapefileException(f"Could not form float from: {value}") - # length capped to the field size - return format(f_val, f".{decimal}f")[:size].rjust(size) + value = int(float(value)) + return = format(value, "d")[:size].rjust( + size + ) # caps the size if exceeds the field size + else: + value = float(value) + return format(value, f".{deci}f")[:size].rjust( + size + ) # caps the size if exceeds the field size + + # if not decimal: + # # force to int + # try: + # # first try to force directly to int. + # # forcing a large int to float and back to int + # # will lose information and result in wrong nr. + # int_val = int(value) + # except ValueError: + # # forcing directly to int failed, so was probably a float. + # int_val = int(float(value)) + # except TypeError: + # raise ShapefileException(f"Could not form int from: {value}") + # # length capped to the field size + # return format(int_val, "d")[:size].rjust(size) + + # try: + # f_val = float(value) + # except ValueError: + # raise ShapefileException(f"Could not form float from: {value}") + # # length capped to the field size + # return format(f_val, f".{decimal}f")[:size].rjust(size) @staticmethod def _try_coerce_to_date_str(value: RecordValue) -> str: From 730da43e40156aa6e51c16ec91deb9de4af3866c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:43:07 +0100 Subject: [PATCH 199/220] Fix return statement --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 90edcec1..fe980017 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3344,7 +3344,7 @@ def _try_coerce_to_numeric_str(value, size, decimal): except ValueError: # forcing directly to int failed, so was probably a float. value = int(float(value)) - return = format(value, "d")[:size].rjust( + return format(value, "d")[:size].rjust( size ) # caps the size if exceeds the field size else: From d55f76caef6e6cbabbcff59565ad8f7e02b19a39 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:43:48 +0100 Subject: [PATCH 200/220] Correct arg name --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index fe980017..685362c7 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3349,7 +3349,7 @@ def _try_coerce_to_numeric_str(value, size, decimal): ) # caps the size if exceeds the field size else: value = float(value) - return format(value, f".{deci}f")[:size].rjust( + return format(value, f".{decimal}f")[:size].rjust( size ) # caps the size if exceeds the field size From 3b057cd49a1819d637de8193a8c35431581c25a2 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:54:44 +0100 Subject: [PATCH 201/220] Add in entire old __dbfRecord method --- src/shapefile.py | 91 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 685362c7..9d1019f3 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3398,7 +3398,11 @@ def _try_coerce_to_logical_str(value: RecordValue) -> str: return "F" return " " # unknown is set to space - def __dbfRecord(self, record: list[RecordValue]) -> None: + + + + + def __newdbfRecord(self, record: list[RecordValue]) -> None: """Writes the dbf records.""" f = self.__getFileObj(self.dbf) if self.recNum == 0: @@ -3445,6 +3449,91 @@ def __dbfRecord(self, record: list[RecordValue]) -> None: ) f.write(encoded_val) + + + def __dbfRecord(self, record): + """Writes the dbf records.""" + f = self.__getFileObj(self.dbf) + if self.recNum == 0: + # first records, so all fields should be set + # allowing us to write the dbf header + # cannot change the fields after this point + self.__dbfHeader() + # first byte of the record is deletion flag, always disabled + f.write(b" ") + # begin + self.recNum += 1 + fields = ( + field for field in self.fields if field[0] != "DeletionFlag" + ) # ignore deletionflag field in case it was specified + for (fieldName, fieldType, size, deci), value in zip(fields, record): + # write + fieldType = fieldType.upper() + size = int(size) + if fieldType in ("N", "F"): + # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. + if value in MISSING: + value = b"*" * size # QGIS NULL + elif not deci: + # force to int + try: + # first try to force directly to int. + # forcing a large int to float and back to int + # will lose information and result in wrong nr. + value = int(value) + except ValueError: + # forcing directly to int failed, so was probably a float. + value = int(float(value)) + value = format(value, "d")[:size].rjust( + size + ) # caps the size if exceeds the field size + else: + value = float(value) + value = format(value, f".{deci}f")[:size].rjust( + size + ) # caps the size if exceeds the field size + elif fieldType == "D": + # date: 8 bytes - date stored as a string in the format YYYYMMDD. + if isinstance(value, date): + value = f"{value.year:04d}{value.month:02d}{value.day:02d}" + elif isinstance(value, list) and len(value) == 3: + value = f"{value[0]:04d}{value[1]:02d}{value[2]:02d}" + elif value in MISSING: + value = b"0" * 8 # QGIS NULL for date type + elif is_string(value) and len(value) == 8: + pass # value is already a date string + else: + raise ShapefileException( + "Date values must be either a datetime.date object, a list, a YYYYMMDD string, or a missing value." + ) + elif fieldType == "L": + # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. + if value in MISSING: + value = b" " # missing is set to space + elif value in [True, 1]: + value = b"T" + elif value in [False, 0]: + value = b"F" + else: + value = b" " # unknown is set to space + else: + # anything else is forced to string, truncated to the length of the field + # value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) + value = str(value).encode(self.encoding, self.encodingErrors)[:size].ljust(size) + if not isinstance(value, bytes): + # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) + # value = b( + # value, "ascii", self.encodingErrors + # ) # should be default ascii encoding + value = value.encode('ascii', self.encodingErrors) + if len(value) != size: + raise ShapefileException( + "Shapefile Writer unable to pack incorrect sized value" + f" (size {len(value)}) into field '{fieldName}' (size {size})." + ) + f.write(value) + + def balance(self) -> None: """Adds corresponding empty attributes or null geometry records depending on which type of record was created to make sure all three files From 38a3f41d1f02c3f3f8daae83d90bf640c90d9e9c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 19:22:01 +0100 Subject: [PATCH 202/220] Refactor coercer into method. Remove call to deleted helper function --- src/shapefile.py | 90 +++++++++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 35 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 9d1019f3..536e8fbc 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3347,11 +3347,10 @@ def _try_coerce_to_numeric_str(value, size, decimal): return format(value, "d")[:size].rjust( size ) # caps the size if exceeds the field size - else: - value = float(value) - return format(value, f".{decimal}f")[:size].rjust( - size - ) # caps the size if exceeds the field size + value = float(value) + return format(value, f".{decimal}f")[:size].rjust( + size + ) # caps the size if exceeds the field size # if not decimal: # # force to int @@ -3398,10 +3397,6 @@ def _try_coerce_to_logical_str(value: RecordValue) -> str: return "F" return " " # unknown is set to space - - - - def __newdbfRecord(self, record: list[RecordValue]) -> None: """Writes the dbf records.""" f = self.__getFileObj(self.dbf) @@ -3449,7 +3444,28 @@ def __newdbfRecord(self, record: list[RecordValue]) -> None: ) f.write(encoded_val) - + def _original_coerce_to_numeric_str(self, value, size, deci): + # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. + if value in MISSING: + return b"*" * size # QGIS NULL + if not deci: + # force to int + try: + # first try to force directly to int. + # forcing a large int to float and back to int + # will lose information and result in wrong nr. + value = int(value) + except ValueError: + # forcing directly to int failed, so was probably a float. + value = int(float(value)) + return format(value, "d")[:size].rjust( + size + ) # caps the size if exceeds the field size + + value = float(value) + return format(value, f".{deci}f")[:size].rjust( + size + ) # caps the size if exceeds the field size def __dbfRecord(self, record): """Writes the dbf records.""" @@ -3471,27 +3487,28 @@ def __dbfRecord(self, record): fieldType = fieldType.upper() size = int(size) if fieldType in ("N", "F"): - # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. - if value in MISSING: - value = b"*" * size # QGIS NULL - elif not deci: - # force to int - try: - # first try to force directly to int. - # forcing a large int to float and back to int - # will lose information and result in wrong nr. - value = int(value) - except ValueError: - # forcing directly to int failed, so was probably a float. - value = int(float(value)) - value = format(value, "d")[:size].rjust( - size - ) # caps the size if exceeds the field size - else: - value = float(value) - value = format(value, f".{deci}f")[:size].rjust( - size - ) # caps the size if exceeds the field size + value = self._original_coerce_to_numeric_str(value, size, deci) + # # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. + # if value in MISSING: + # value = b"*" * size # QGIS NULL + # elif not deci: + # # force to int + # try: + # # first try to force directly to int. + # # forcing a large int to float and back to int + # # will lose information and result in wrong nr. + # value = int(value) + # except ValueError: + # # forcing directly to int failed, so was probably a float. + # value = int(float(value)) + # value = format(value, "d")[:size].rjust( + # size + # ) # caps the size if exceeds the field size + # else: + # value = float(value) + # value = format(value, f".{deci}f")[:size].rjust( + # size + # ) # caps the size if exceeds the field size elif fieldType == "D": # date: 8 bytes - date stored as a string in the format YYYYMMDD. if isinstance(value, date): @@ -3500,7 +3517,7 @@ def __dbfRecord(self, record): value = f"{value[0]:04d}{value[1]:02d}{value[2]:02d}" elif value in MISSING: value = b"0" * 8 # QGIS NULL for date type - elif is_string(value) and len(value) == 8: + elif isinstance(value, str) and len(value) == 8: pass # value is already a date string else: raise ShapefileException( @@ -3519,13 +3536,17 @@ def __dbfRecord(self, record): else: # anything else is forced to string, truncated to the length of the field # value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) - value = str(value).encode(self.encoding, self.encodingErrors)[:size].ljust(size) + value = ( + str(value) + .encode(self.encoding, self.encodingErrors)[:size] + .ljust(size) + ) if not isinstance(value, bytes): # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) # value = b( # value, "ascii", self.encodingErrors # ) # should be default ascii encoding - value = value.encode('ascii', self.encodingErrors) + value = value.encode("ascii", self.encodingErrors) if len(value) != size: raise ShapefileException( "Shapefile Writer unable to pack incorrect sized value" @@ -3533,7 +3554,6 @@ def __dbfRecord(self, record): ) f.write(value) - def balance(self) -> None: """Adds corresponding empty attributes or null geometry records depending on which type of record was created to make sure all three files From 141f7583586e28c35dc83db11724ab54788c237a Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 20:20:07 +0100 Subject: [PATCH 203/220] Only coerce if not already int or float. Check isinstance( ,int/float) first. --- src/shapefile.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 536e8fbc..e9f13ece 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3450,19 +3450,20 @@ def _original_coerce_to_numeric_str(self, value, size, deci): return b"*" * size # QGIS NULL if not deci: # force to int - try: - # first try to force directly to int. - # forcing a large int to float and back to int - # will lose information and result in wrong nr. - value = int(value) - except ValueError: - # forcing directly to int failed, so was probably a float. - value = int(float(value)) + if not isinstance(value, int): + try: + # first try to force directly to int. + # forcing a large int to float and back to int + # will lose information and result in wrong nr. + value = int(value) + except ValueError: + # forcing directly to int failed, so was probably a float. + value = int(float(value)) return format(value, "d")[:size].rjust( size ) # caps the size if exceeds the field size - - value = float(value) + if not isinstance(value, float): + value = float(value) return format(value, f".{deci}f")[:size].rjust( size ) # caps the size if exceeds the field size From af9f09d857804c6db350335bb8fdf57ac9b43c83 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 20:57:07 +0100 Subject: [PATCH 204/220] Make existing dbf record code statically typable --- src/shapefile.py | 125 ++++++++++++++++++++--------------------------- 1 file changed, 54 insertions(+), 71 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index e9f13ece..1207e84b 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -229,11 +229,12 @@ def from_unchecked( size: int = 50, decimal: int = 0, ) -> Self: - if field_type not in FIELD_TYPE_ALIASES: + try: + type_ = FIELD_TYPE_ALIASES[field_type] + except KeyError: raise ShapefileException( f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " ) - type_ = FIELD_TYPE_ALIASES[field_type] if type_ is FieldType.D: size = 8 @@ -3444,30 +3445,6 @@ def __newdbfRecord(self, record: list[RecordValue]) -> None: ) f.write(encoded_val) - def _original_coerce_to_numeric_str(self, value, size, deci): - # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. - if value in MISSING: - return b"*" * size # QGIS NULL - if not deci: - # force to int - if not isinstance(value, int): - try: - # first try to force directly to int. - # forcing a large int to float and back to int - # will lose information and result in wrong nr. - value = int(value) - except ValueError: - # forcing directly to int failed, so was probably a float. - value = int(float(value)) - return format(value, "d")[:size].rjust( - size - ) # caps the size if exceeds the field size - if not isinstance(value, float): - value = float(value) - return format(value, f".{deci}f")[:size].rjust( - size - ) # caps the size if exceeds the field size - def __dbfRecord(self, record): """Writes the dbf records.""" f = self.__getFileObj(self.dbf) @@ -3485,39 +3462,40 @@ def __dbfRecord(self, record): ) # ignore deletionflag field in case it was specified for (fieldName, fieldType, size, deci), value in zip(fields, record): # write - fieldType = fieldType.upper() - size = int(size) + # fieldName, fieldType, size and deci were already checked + # when their Field instance was created and added to self.fields + str_val: Optional[str] = None + if fieldType in ("N", "F"): - value = self._original_coerce_to_numeric_str(value, size, deci) - # # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. - # if value in MISSING: - # value = b"*" * size # QGIS NULL - # elif not deci: - # # force to int - # try: - # # first try to force directly to int. - # # forcing a large int to float and back to int - # # will lose information and result in wrong nr. - # value = int(value) - # except ValueError: - # # forcing directly to int failed, so was probably a float. - # value = int(float(value)) - # value = format(value, "d")[:size].rjust( - # size - # ) # caps the size if exceeds the field size - # else: - # value = float(value) - # value = format(value, f".{deci}f")[:size].rjust( - # size - # ) # caps the size if exceeds the field size + # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. + if value in MISSING: + str_val = "*" * size # QGIS NULL + elif not deci: + # force to int + try: + # first try to force directly to int. + # forcing a large int to float and back to int + # will lose information and result in wrong nr. + num_val = int(value) + except ValueError: + # forcing directly to int failed, so was probably a float. + num_val = int(float(value)) + str_val = format(num_val, "d")[:size].rjust( + size + ) # caps the size if exceeds the field size + else: + f_val = float(value) + str_val = format(f_val, f".{deci}f")[:size].rjust( + size + ) # caps the size if exceeds the field size elif fieldType == "D": # date: 8 bytes - date stored as a string in the format YYYYMMDD. if isinstance(value, date): - value = f"{value.year:04d}{value.month:02d}{value.day:02d}" + str_val = f"{value.year:04d}{value.month:02d}{value.day:02d}" elif isinstance(value, list) and len(value) == 3: - value = f"{value[0]:04d}{value[1]:02d}{value[2]:02d}" + str_val = f"{value[0]:04d}{value[1]:02d}{value[2]:02d}" elif value in MISSING: - value = b"0" * 8 # QGIS NULL for date type + str_val = "0" * 8 # QGIS NULL for date type elif isinstance(value, str) and len(value) == 8: pass # value is already a date string else: @@ -3527,33 +3505,38 @@ def __dbfRecord(self, record): elif fieldType == "L": # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. if value in MISSING: - value = b" " # missing is set to space + str_val = " " # missing is set to space elif value in [True, 1]: - value = b"T" + str_val = "T" elif value in [False, 0]: - value = b"F" + str_val = "F" else: - value = b" " # unknown is set to space - else: - # anything else is forced to string, truncated to the length of the field - # value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) - value = ( + str_val = " " # unknown is set to space + + if str_val is None: + # Types C and M, and anything else, value is forced to string, + # encoded by the codec specified to the Writer (utf-8 by default), + # then the resulting bytes are padded and truncated to the length + # of the field + encoded = ( str(value) .encode(self.encoding, self.encodingErrors)[:size] .ljust(size) ) - if not isinstance(value, bytes): - # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) - # value = b( - # value, "ascii", self.encodingErrors - # ) # should be default ascii encoding - value = value.encode("ascii", self.encodingErrors) - if len(value) != size: + else: + # str_val was given a not-None string value + # under the checks for fieldTypes "N", "F", "D", or "L" above + # Numeric, logical, and date numeric types are ascii already, but + # for Shapefile or dbf spec reasons + # "should be default ascii encoding" + encoded = str_val.encode("ascii", self.encodingErrors) + + if len(encoded) != size: raise ShapefileException( - "Shapefile Writer unable to pack incorrect sized value" - f" (size {len(value)}) into field '{fieldName}' (size {size})." + f"Shapefile Writer unable to pack incorrect sized {value=}" + f" (encoded as {len(encoded)}B) into field '{fieldName}' ({size}B)." ) - f.write(value) + f.write(encoded) def balance(self) -> None: """Adds corresponding empty attributes or null geometry records depending From 68fa45f62a18c6923d67cb8a336d08feed42f755 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 21:00:50 +0100 Subject: [PATCH 205/220] Delete unused methods --- src/shapefile.py | 134 ----------------------------------------------- 1 file changed, 134 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 1207e84b..b97e22ce 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3309,141 +3309,7 @@ def record( record = ["" for _ in range(fieldCount)] self.__dbfRecord(record) - @staticmethod - def _dbf_missing_placeholder( - value: RecordValue, field_type: FieldTypeT, size: int - ) -> str: - if field_type is FieldType.N or field_type is FieldType.F: - return "*" * size # QGIS NULL - if field_type is FieldType.D: - return "0" * 8 # QGIS NULL for date type - if field_type is FieldType.L: - return " " - return str(value)[:size].ljust(size) - - @overload - @staticmethod - def _try_coerce_to_numeric_str(value: date, size: int, decimal: int) -> Never: ... - @overload - @staticmethod - def _try_coerce_to_numeric_str( - value: RecordValueNotDate, size: int, decimal: int - ) -> str: ... - @staticmethod - def _try_coerce_to_numeric_str(value, size, decimal): - # numeric or float: number stored as a string, - # right justified, and padded with blanks - # to the width of the field. - - if not decimal: - # force to int - try: - # first try to force directly to int. - # forcing a large int to float and back to int - # will lose information and result in wrong nr. - value = int(value) - except ValueError: - # forcing directly to int failed, so was probably a float. - value = int(float(value)) - return format(value, "d")[:size].rjust( - size - ) # caps the size if exceeds the field size - value = float(value) - return format(value, f".{decimal}f")[:size].rjust( - size - ) # caps the size if exceeds the field size - - # if not decimal: - # # force to int - # try: - # # first try to force directly to int. - # # forcing a large int to float and back to int - # # will lose information and result in wrong nr. - # int_val = int(value) - # except ValueError: - # # forcing directly to int failed, so was probably a float. - # int_val = int(float(value)) - # except TypeError: - # raise ShapefileException(f"Could not form int from: {value}") - # # length capped to the field size - # return format(int_val, "d")[:size].rjust(size) - - # try: - # f_val = float(value) - # except ValueError: - # raise ShapefileException(f"Could not form float from: {value}") - # # length capped to the field size - # return format(f_val, f".{decimal}f")[:size].rjust(size) - - @staticmethod - def _try_coerce_to_date_str(value: RecordValue) -> str: - # date: 8 bytes - date stored as a string in the format YYYYMMDD. - if isinstance(value, date): - return f"{value.year:04d}{value.month:02d}{value.day:02d}" - if isinstance(value, (list, tuple)) and len(value) == 3: - return f"{value[0]:04d}{value[1]:02d}{value[2]:02d}" - if isinstance(value, str) and len(value) == 8: - return value # value is already a date string - - raise ShapefileException( - "Date values must be either a datetime.date object, a list/tuple, a YYYYMMDD string, or a missing value." - ) - - @staticmethod - def _try_coerce_to_logical_str(value: RecordValue) -> str: - # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. - if value == 1: # True == 1 - return "T" - if value == 0: # False == 0 - return "F" - return " " # unknown is set to space - - def __newdbfRecord(self, record: list[RecordValue]) -> None: - """Writes the dbf records.""" - f = self.__getFileObj(self.dbf) - if self.recNum == 0: - # first records, so all fields should be set - # allowing us to write the dbf header - # cannot change the fields after this point - self.__dbfHeader() - # first byte of the record is deletion flag, always disabled - f.write(b" ") - # begin - self.recNum += 1 - fields = ( - field for field in self.fields if field[0] != "DeletionFlag" - ) # ignore deletionflag field in case it was specified - for (fieldName, type_, size, decimal), value in zip(fields, record): - # write - size = int(size) - str_val: str - - if value in MISSING: - str_val = self._dbf_missing_placeholder(value, type_, size) - elif type_ is FieldType.N or type_ is FieldType.F: - str_val = self._try_coerce_to_numeric_str(value, size, decimal) - elif type_ is FieldType.D: - str_val = self._try_coerce_to_date_str(value) - elif type_ is FieldType.L: - str_val = self._try_coerce_to_logical_str(value) - else: - if isinstance(value, bytes): - str_val = value.decode(self.encoding, self.encodingErrors) - else: - # anything else is forced to string. - str_val = str(value) - - # Truncate or right pad to the length of the field - encoded_val = str_val.encode(self.encoding, self.encodingErrors)[ - :size - ].ljust(size) - if len(encoded_val) != size: - raise ShapefileException( - f"Shapefile Writer unable to pack incorrect sized {value=!r} " - f"(size {len(encoded_val)}) into field '{fieldName}' (size {size})." - ) - f.write(encoded_val) def __dbfRecord(self, record): """Writes the dbf records.""" From 17431a244f82761290c8ef1f513207ab16d59c9b Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 21:02:06 +0100 Subject: [PATCH 206/220] Reformat --- src/shapefile.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index b97e22ce..ca761a48 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3309,8 +3309,6 @@ def record( record = ["" for _ in range(fieldCount)] self.__dbfRecord(record) - - def __dbfRecord(self, record): """Writes the dbf records.""" f = self.__getFileObj(self.dbf) From c5ba356a67b0ecd5ebd51466dab1311214310e81 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 21:19:53 +0100 Subject: [PATCH 207/220] Copy in changes from Use-identity-not-equality that PR/Merge conflicts messed up --- src/shapefile.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index ea1f8c18..ca761a48 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -19,7 +19,6 @@ import tempfile import time import zipfile -from collections.abc import Hashable from datetime import date from struct import Struct, calcsize, error, pack, unpack from typing import ( @@ -183,7 +182,7 @@ def read(self, size: int = -1): ... class FieldType: """A bare bones 'enum', as the enum library noticeably slows performance.""" - # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__", "raise_if_invalid", "is_numeric"] + # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__"] C: Final = "C" # "Character" # (str) D: Final = "D" # "Date" @@ -200,6 +199,11 @@ class FieldType: "N", } # set(__slots__) - {"__members__"} + # def raise_if_invalid(field_type: Hashable): + # if field_type not in FieldType.__members__: + # raise ShapefileException( + # f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " + # ) FIELD_TYPE_ALIASES: dict[Union[str, bytes], FieldTypeT] = {} @@ -210,7 +214,7 @@ class FieldType: FIELD_TYPE_ALIASES[c.encode("ascii").upper()] = c - +# Use functional syntax to have an attribute named type, a Python keyword class Field(NamedTuple): name: str field_type: FieldTypeT @@ -2569,7 +2573,7 @@ def __record( # parse each value record = [] for (__name, typ, __size, decimal), value in zip(fieldTuples, recordContents): - if FieldType.is_numeric(typ): + if typ is FieldType.N or typ is FieldType.F: # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b"\0")[0] value = value.replace(b"*", b"") # QGIS NULL is all '*' chars @@ -2595,7 +2599,7 @@ def __record( except ValueError: # not parseable as int, set to None value = None - elif typ == FieldType.D: + elif typ is FieldType.D: # date: 8 bytes - date stored as a string in the format YYYYMMDD. if ( not value.replace(b"\x00", b"") @@ -2613,7 +2617,7 @@ def __record( except (TypeError, ValueError): # if invalid date, just return as unicode string so user can decimalde value = str(value.strip()) - elif typ == FieldType.L: + elif typ is FieldType.L: # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. if value == b" ": value = None # space means missing or not yet set @@ -3326,8 +3330,6 @@ def __dbfRecord(self, record): # when their Field instance was created and added to self.fields str_val: Optional[str] = None - - if fieldType in ("N", "F"): # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. if value in MISSING: From 5c1ca4504d3cceb69416688612f5724cc953a5ff Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 22:07:56 +0100 Subject: [PATCH 208/220] REstore original (correct) __zbox and __mbox implementations. --- src/shapefile.py | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index ca761a48..b1cb504c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1162,7 +1162,7 @@ def _get_nparts_from_byte_stream(b_io: ReadableBinStream) -> int: return unpack(" int: + def _write_nparts_to_byte_stream(b_io: WriteableBinStream, s: _CanHaveParts) -> int: return b_io.write(pack(" BBox: self._bbox = (min(x), min(y), max(x), max(y)) return self._bbox - def __zbox(self, s) -> ZBox: + def __zbox(self, s: Union[_HasZ, PointZ]) -> ZBox: z: list[float] = [] - if self._zbox: - z.extend(self._zbox) - for p in s.points: try: z.append(p[2]) @@ -2996,18 +2993,19 @@ def __zbox(self, s) -> ZBox: # point did not have z value # setting it to 0 is probably ok, since it means all are on the same elevation z.append(0) + zbox = (min(z), max(z)) + # update global + if self._zbox: + # compare with existing + self._zbox = (min(zbox[0], self._zbox[0]), max(zbox[1], self._zbox[1])) + else: + # first time zbox is being set + self._zbox = zbox + return zbox - # Original self._zbox bounds (if any) are the first two entries. - # Set zbox for the first, and all later times - # self._zbox = ZBox(zmin=min(z), zmax=max(z)) - self._zbox = (min(z), max(z)) - return self._zbox - - def __mbox(self, s) -> MBox: - mpos = 3 if s.shapeType in _HasZ._shapeTypes else 2 + def __mbox(self, s: Union[_HasM, PointM]) -> MBox: + mpos = 3 if s.shapeType in _HasZ._shapeTypes | PointZ.shapeTypes else 2 m: list[float] = [] - if self._mbox: - m.extend(m_bound for m_bound in self._mbox if m_bound is not None) for p in s.points: try: @@ -3021,12 +3019,16 @@ def __mbox(self, s) -> MBox: if not m: # only if none of the shapes had m values, should mbox be set to missing m values m.append(NODATA) + mbox = (min(m), max(m)) + # update global + if self._mbox: + # compare with existing + self._mbox = (min(mbox[0], self._mbox[0]), max(mbox[1], self._mbox[1])) + else: + # first time mbox is being set + self._mbox = mbox + return mbox - # Original self._mbox bounds (if any) are the first two entries. - # Set mbox for the first, and all later times - # self._mbox = MBox(mmin=min(m), mmax=max(m)) - self._mbox = (min(m), max(m)) - return self._mbox @property def shapeTypeName(self) -> str: From b7f4f7dabbfbf95a244e6e2d6234d02814ab1ab2 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 23:11:06 +0100 Subject: [PATCH 209/220] Restore old __?box method implementations, and type them. --- src/shapefile.py | 123 ++++++++++++++++++++++++----------------------- 1 file changed, 63 insertions(+), 60 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index b1cb504c..5049653b 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -124,7 +124,7 @@ PointsT = list[PointT] BBox = tuple[float, float, float, float] -MBox = tuple[Optional[float], Optional[float]] +MBox = tuple[float, float] ZBox = tuple[float, float] # class BBox(NamedTuple): @@ -727,8 +727,8 @@ def __init__( # Preserve previous behaviour for anyone who set self.shapeType = None if not isinstance(shapeType, _NoShapeTypeSentinel): self.shapeType = shapeType - self.points = points or [] - self.parts = parts or [] + self.points: PointsT = points or [] + self.parts: Sequence[int] = parts or [] if partTypes: self.partTypes = partTypes @@ -2254,7 +2254,7 @@ def __shpHeader(self) -> None: for m_bound in unpack("<2d", shp.read(16)) ] # self.mbox = MBox(mmin=m_bounds[0], mmax=m_bounds[1]) - self.mbox: MBox = (m_bounds[0], m_bounds[1]) + self.mbox: tuple[Optional[float], Optional[float]] = (m_bounds[0], m_bounds[1]) def __shape( self, oid: Optional[int] = None, bbox: Optional[BBox] = None @@ -2959,41 +2959,47 @@ def __shpFileLength(self) -> int: return size def __bbox(self, s: Shape) -> BBox: - x: list[float] = [] - y: list[float] = [] + xs: list[float] = [] + ys: list[float] = [] - if self._bbox: - x.append(self._bbox[0]) - y.append(self._bbox[1]) - x.append(self._bbox[2]) - y.append(self._bbox[3]) - - if len(s.points) > 0: - px, py = list(zip(*s.points))[:2] - x.extend(px) - y.extend(py) - else: + if not s.points: # this should not happen. # any shape that is not null should have at least one point, and only those should be sent here. # could also mean that earlier code failed to add points to a non-null shape. - raise ValueError( + raise ShapefileException( "Cannot create bbox. Expected a valid shape with at least one point. " - f"Got a shape of type '{s.shapeType}' and 0 points." + f"Got a shape of type {s.shapeType=} and 0 points." ) - # self._bbox = BBox(xmin=min(x), ymin=min(y), xmax=max(x), ymax=max(y)) - self._bbox = (min(x), min(y), max(x), max(y)) - return self._bbox + + for point in s.points: + xs.append(point[0]) + ys.append(point[1]) + + shape_bbox = (min(xs), min(ys), max(xs), max(ys)) + # update global + if self._bbox: + # compare with existing + self._bbox = ( + min(shape_bbox[0], self._bbox[0]), + min(shape_bbox[1], self._bbox[1]), + max(shape_bbox[2], self._bbox[2]), + max(shape_bbox[3], self._bbox[3]), + ) + else: + # first time bbox is being set + self._bbox = shape_bbox + return shape_bbox def __zbox(self, s: Union[_HasZ, PointZ]) -> ZBox: - z: list[float] = [] - for p in s.points: - try: - z.append(p[2]) - except IndexError: - # point did not have z value - # setting it to 0 is probably ok, since it means all are on the same elevation - z.append(0) - zbox = (min(z), max(z)) + shape_zs: list[float] = [] + if s.z: + shape_zs.extend(s.z) + else: + for p in s.points: + # On a ShapeZ type, M is at index 4, and the point can be a 3-tuple or 4-tuple. + z = p[2] if len(p) >= 3 and p[2] is not None else 0 + shape_zs.append(z) + zbox = (min(shape_zs), max(shape_zs)) # update global if self._zbox: # compare with existing @@ -3004,22 +3010,20 @@ def __zbox(self, s: Union[_HasZ, PointZ]) -> ZBox: return zbox def __mbox(self, s: Union[_HasM, PointM]) -> MBox: - mpos = 3 if s.shapeType in _HasZ._shapeTypes | PointZ.shapeTypes else 2 - m: list[float] = [] + mpos = 3 if s.shapeType in _HasZ._shapeTypes | PointZ._shapeTypes else 2 + shape_ms: list[float] = [] + if s.m: + shape_ms.extend(m for m in s.m if m is not None) + else: + for p in s.points: + m = p[mpos] if len(p) >= mpos + 1 else None + if m is not None: + shape_ms.append(m) - for p in s.points: - try: - if p[mpos] is not None: - # mbox should only be calculated on valid m values - m.append(p[mpos]) - except IndexError: - # point did not have m value so is missing - # mbox should only be calculated on valid m values - pass - if not m: + if not shape_ms: # only if none of the shapes had m values, should mbox be set to missing m values - m.append(NODATA) - mbox = (min(m), max(m)) + shape_ms.append(NODATA) + mbox = (min(shape_ms), max(shape_ms)) # update global if self._mbox: # compare with existing @@ -3029,7 +3033,6 @@ def __mbox(self, s: Union[_HasM, PointM]) -> MBox: self._mbox = mbox return mbox - @property def shapeTypeName(self) -> str: return SHAPETYPE_LOOKUP[self.shapeType or 0] @@ -3210,17 +3213,17 @@ def __shpRecord(self, s: Shape) -> tuple[int, int]: # For both single point and multiple-points non-null shapes, # update bbox, mbox and zbox of the whole shapefile - new_bbox = self.__bbox(s) if s.shapeType != NULL else None - new_mbox = ( - self.__mbox(s) - if s.shapeType in PointM._shapeTypes | _HasM._shapeTypes - else None - ) - new_zbox = ( - self.__zbox(s) - if s.shapeType in PointZ._shapeTypes | _HasZ._shapeTypes - else None - ) + shape_bbox = self.__bbox(s) if s.shapeType != NULL else None + + if s.shapeType in PointM._shapeTypes | _HasM._shapeTypes: + shape_mbox = self.__mbox(cast(Union[_HasM, PointM], s)) + else: + shape_mbox = None + + if s.shapeType in PointZ._shapeTypes | _HasZ._shapeTypes: + shape_zbox = self.__zbox(cast(Union[_HasZ, PointZ], s)) + else: + shape_zbox = None # Create an in-memory binary buffer to avoid # unnecessary seeks to files on disk @@ -3243,9 +3246,9 @@ def __shpRecord(self, s: Shape) -> tuple[int, int]: b_io=b_io, s=s, i=self.shpNum, - bbox=new_bbox, - mbox=new_mbox, - zbox=new_zbox, + bbox=shape_bbox, + mbox=shape_mbox, + zbox=shape_zbox, ) # Finalize record length as 16-bit words From 9deff14472bddfca4d992a83afd0ed48bcf31357 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 23:18:20 +0100 Subject: [PATCH 210/220] Initialise .m and .z on multi-point shapes not read from .shp files --- src/shapefile.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/shapefile.py b/src/shapefile.py index 5049653b..a1f5c81c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1291,6 +1291,10 @@ class _HasM(_CanHaveBBox): ) m: Sequence[Optional[float]] + def __init__(self, *args, **kwargs): + self.z = [] + super().__init__(*args, **kwargs) + def _set_ms_from_byte_stream( self, b_io: ReadSeekableBinStream, nPoints: int, next_shape: int ): @@ -1360,6 +1364,10 @@ class _HasZ(_CanHaveBBox): ) z: Sequence[float] + def __init__(self, *args, **kwargs): + self.z = [] + super().__init__(*args, **kwargs) + def _set_zs_from_byte_stream(self, b_io: ReadableBinStream, nPoints: int): __zmin, __zmax = unpack("<2d", b_io.read(16)) # pylint: disable=unused-private-member self.z = _Array[float]("d", unpack(f"<{nPoints}d", b_io.read(nPoints * 8))) From 5dfa9fe675537fb376f62d6b307f5588210ab0ee Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 23:30:38 +0100 Subject: [PATCH 211/220] Specify .m on PolylineM etc. --- src/shapefile.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index a1f5c81c..150d0ce1 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1327,7 +1327,7 @@ def _write_ms_to_byte_stream( f"Failed to write measure extremes for record {i}. Expected floats" ) try: - if hasattr(s, "m"): + if getattr(s, "m", False): # if m values are stored in attribute ms = [m if m is not None else NODATA for m in s.m] @@ -1335,12 +1335,9 @@ def _write_ms_to_byte_stream( # if m values are stored as 3rd/4th dimension # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) mpos = 3 if s.shapeType in _HasZ._shapeTypes else 2 - ms = [] - for p in s.points: - if len(p) > mpos and p[mpos] is not None: - ms.append(p[mpos]) - else: - ms.append(NODATA) + ms = [p[mpos] if len(p) > mpos and p[mpos] is not None else NODATA + for p in s.points + ] num_bytes_written += b_io.write(pack(f"<{len(ms)}d", *ms)) @@ -1388,7 +1385,7 @@ def _write_zs_to_byte_stream( f"Failed to write elevation extremes for record {i}. Expected floats." ) try: - if hasattr(s, "z"): + if getattr(s, "z", False): # if z values are stored in attribute zs = s.z else: @@ -1444,16 +1441,13 @@ def _write_single_point_m_to_byte_stream( # Write a single M value # Note: missing m values are autoset to NODATA. - if hasattr(s, "m"): + if hasattr(s, "m", False): # if m values are stored in attribute try: # if not s.m or s.m[0] is None: # s.m = (NODATA,) # m = s.m[0] - if s.m and s.m[0] is not None: - m = s.m[0] - else: - m = NODATA + m = s.m[0] if s.m and s.m[0] is not None else NODATA except error: raise ShapefileException( f"Failed to write measure value for record {i}. Expected floats." From b456d681b2e0d5bb87bf0d57fb2bfbcfa837b496 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 23:41:20 +0100 Subject: [PATCH 212/220] Initialise .m on _HasM instances, not .z --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 150d0ce1..287ab603 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1292,7 +1292,7 @@ class _HasM(_CanHaveBBox): m: Sequence[Optional[float]] def __init__(self, *args, **kwargs): - self.z = [] + self.m = [] super().__init__(*args, **kwargs) def _set_ms_from_byte_stream( From 6d17738c43c29888e387f39b3257da08f489c793 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 23:56:05 +0100 Subject: [PATCH 213/220] Fix type checking --- src/shapefile.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 287ab603..2c741d02 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1329,15 +1329,18 @@ def _write_ms_to_byte_stream( try: if getattr(s, "m", False): # if m values are stored in attribute - ms = [m if m is not None else NODATA for m in s.m] + ms = [m if m is not None else NODATA for m in cast(_HasM, s).m] else: # if m values are stored as 3rd/4th dimension # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) mpos = 3 if s.shapeType in _HasZ._shapeTypes else 2 - ms = [p[mpos] if len(p) > mpos and p[mpos] is not None else NODATA - for p in s.points - ] + ms = [ + cast(float, p[mpos]) + if len(p) > mpos and p[mpos] is not None + else NODATA + for p in s.points + ] num_bytes_written += b_io.write(pack(f"<{len(ms)}d", *ms)) @@ -1387,10 +1390,10 @@ def _write_zs_to_byte_stream( try: if getattr(s, "z", False): # if z values are stored in attribute - zs = s.z + zs = cast(_HasZ, s).z else: # if z values are stored as 3rd dimension - zs = [p[2] if len(p) > 2 else 0 for p in s.points] + zs = [cast(float, p[2]) if len(p) > 2 else 0 for p in s.points] num_bytes_written += b_io.write(pack(f"<{len(zs)}d", *zs)) except error: @@ -1441,13 +1444,14 @@ def _write_single_point_m_to_byte_stream( # Write a single M value # Note: missing m values are autoset to NODATA. - if hasattr(s, "m", False): + if getattr(s, "m", False): # if m values are stored in attribute try: # if not s.m or s.m[0] is None: # s.m = (NODATA,) # m = s.m[0] - m = s.m[0] if s.m and s.m[0] is not None else NODATA + s = cast(_HasM, s) + m = s.m[0] if s.m and s.m[0] is not None else NODATA except error: raise ShapefileException( f"Failed to write measure value for record {i}. Expected floats." @@ -1464,7 +1468,7 @@ def _write_single_point_m_to_byte_stream( # s.points[0][mpos] = NODATA m = NODATA else: - m = s.points[0][mpos] + m = cast(float, s.points[0][mpos]) except error: raise ShapefileException( From d28290c8e0ff54dbf4fd0762380b70f9dbb0f1e0 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 3 Aug 2025 15:43:20 +0100 Subject: [PATCH 214/220] Type kwargs to Shape as TypedDict --- .pre-commit-config.yaml | 2 +- README.md | 5 +- pyproject.toml | 39 +- pytest.ini | 4 - src/shapefile.py | 1359 ++++++++++++++++++++++++--------------- 5 files changed, 847 insertions(+), 562 deletions(-) delete mode 100644 pytest.ini diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1ef33138..e12617ac 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,6 +19,6 @@ repos: - id: check-yaml - id: trailing-whitespace - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.17.0 + rev: v1.17.1 hooks: - id: mypy \ No newline at end of file diff --git a/README.md b/README.md index 66951be1..2b25fbd8 100644 --- a/README.md +++ b/README.md @@ -466,8 +466,7 @@ index which is 7. >>> s = sf.shape(7) - >>> s - Shape #7: POLYGON + >>> # Read the bbox of the 8th shape to verify >>> # Round coordinates to 3 decimal places @@ -476,7 +475,7 @@ index which is 7. Each shape record (except Points) contains the following attributes. Records of shapeType Point do not have a bounding box 'bbox'. - +# TODO!! Fix attributes >>> for name in dir(shapes[3]): ... if not name.startswith('_'): diff --git a/pyproject.toml b/pyproject.toml index 7d043ae5..00a755fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,6 @@ classifiers = [ "Topic :: Software Development :: Libraries :: Python Modules", ] dependencies = [ - "typing_extensions", ] [project.optional-dependencies] @@ -47,6 +46,12 @@ sources = {"src" = ""} # move from "src" directory for wheel [tool.hatch.version] path = "src/shapefile.py" +[tool.pytest.ini_options] +markers = [ + "network: marks tests requiring network access", + "slow: marks other tests that cause bottlenecks", +] + [tool.ruff] # Exclude a variety of commonly ignored directories. exclude = [ @@ -111,35 +116,3 @@ skip-magic-trailing-comma = false line-ending = "auto" -[tool.pylint.MASTER] -load-plugins=[ - "pylint_per_file_ignores", -] - -# Silence warnings: src/shapefile.py:2076:20: W0212: Access to a protected member _from_geojson of a client class (protected-access) -# src/shapefile.py:950:16: W0201: Attribute 'm' defined outside __init__ (attribute-defined-outside-init) -# src/shapefile.py:973:12: W0707: Consider explicitly re-raising using 'except error as exc' and -# 'raise ShapefileException(f'Failed to write bounding box for record {i}. -# Expected floats.') from exc' (raise-missing-from) -# Silence remarks: -# src\shapefile.py:338:0: R0914: Too many local variables (21/15) (too-many-locals) -# src\shapefile.py:338:0: R0912: Too many branches (24/12) (too-many-branches) -# src\shapefile.py:338:0: R0915: Too many statements (52/50) (too-many-statements) -# src\shapefile.py:470:0: R0902: Too many instance attributes (9/7) (too-many-instance-attributes) -# src\shapefile.py:471:4: R0913: Too many arguments (6/5) (too-many-arguments) -# src\shapefile.py:471:4: R0917: Too many positional arguments (6/5) (too-many-positional-arguments) -# src\shapefile.py:506:4: R0911: Too many return statements (10/6) (too-many-return-statements) -# src\shapefile.py:878:0: R0903: Too few public methods (0/2) (too-few-public-methods) -# src\shapefile.py:1981:0: R0904: Too many public methods (23/20) (too-many-public-methods) -# src\shapefile.py:2117:17: R1732: Consider using 'with' for resource-allocating operations (consider-using-with) -# Silence warnings: test_shapefile.py:{783,786,799,803,06,1195}:19: -# W0212: Access to a protected member _offsets of a -# client class (protected-access) -# -# Toml multi-line string used instead of array due to: -# https://github.com/christopherpickering/pylint-per-file-ignores/issues/160 -[tool.pylint.'messages control'] -per-file-ignores = [ - "/src/shapefile.py:W0707,W0212,W0201,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0917,R1732", - "test_shapefile.py:W0212,R1732", -] diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 39fbfaed..00000000 --- a/pytest.ini +++ /dev/null @@ -1,4 +0,0 @@ -[pytest] -markers = - network: marks tests requiring network access - slow: marks other tests that cause bottlenecks diff --git a/src/shapefile.py b/src/shapefile.py index 2c741d02..316db0cd 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -31,6 +31,7 @@ Iterator, Literal, NamedTuple, + NoReturn, Optional, Protocol, Reversible, @@ -45,8 +46,6 @@ from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen -from typing_extensions import Never, NotRequired, Self, TypeIs - # Create named logger logger = logging.getLogger(__name__) @@ -77,22 +76,24 @@ MULTIPATCH = 31 SHAPETYPE_LOOKUP = { - 0: "NULL", - 1: "POINT", - 3: "POLYLINE", - 5: "POLYGON", - 8: "MULTIPOINT", - 11: "POINTZ", - 13: "POLYLINEZ", - 15: "POLYGONZ", - 18: "MULTIPOINTZ", - 21: "POINTM", - 23: "POLYLINEM", - 25: "POLYGONM", - 28: "MULTIPOINTM", - 31: "MULTIPATCH", + NULL: "NULL", + POINT: "POINT", + POLYLINE: "POLYLINE", + POLYGON: "POLYGON", + MULTIPOINT: "MULTIPOINT", + POINTZ: "POINTZ", + POLYLINEZ: "POLYLINEZ", + POLYGONZ: "POLYGONZ", + MULTIPOINTZ: "MULTIPOINTZ", + POINTM: "POINTM", + POLYLINEM: "POLYLINEM", + POLYGONM: "POLYGONM", + MULTIPOINTM: "MULTIPOINTM", + MULTIPATCH: "MULTIPATCH", } +SHAPETYPENUM_LOOKUP = {name: code for code, name in SHAPETYPE_LOOKUP.items()} + TRIANGLE_STRIP = 0 TRIANGLE_FAN = 1 OUTER_RING = 2 @@ -127,25 +128,9 @@ MBox = tuple[float, float] ZBox = tuple[float, float] -# class BBox(NamedTuple): -# xmin: float -# ymin: float -# xmax: float -# ymax: float - - -# class MBox(NamedTuple): -# mmin: Optional[float] -# mmax: Optional[float] - - -# class ZBox(NamedTuple): -# zmin: float -# zmax: float - class WriteableBinStream(Protocol): - def write(self, b: bytes): ... # pylint: disable=redefined-outer-name + def write(self, b: bytes): ... class ReadableBinStream(Protocol): @@ -153,20 +138,20 @@ def read(self, size: int = -1): ... class WriteSeekableBinStream(Protocol): - def write(self, b: bytes): ... # pylint: disable=redefined-outer-name - def seek(self, offset: int, whence: int = 0): ... # pylint: disable=unused-argument + def write(self, b: bytes): ... + def seek(self, offset: int, whence: int = 0): ... def tell(self): ... class ReadSeekableBinStream(Protocol): - def seek(self, offset: int, whence: int = 0): ... # pylint: disable=unused-argument + def seek(self, offset: int, whence: int = 0): ... def tell(self): ... def read(self, size: int = -1): ... class ReadWriteSeekableBinStream(Protocol): - def write(self, b: bytes): ... # pylint: disable=redefined-outer-name - def seek(self, offset: int, whence: int = 0): ... # pylint: disable=unused-argument + def write(self, b: bytes): ... + def seek(self, offset: int, whence: int = 0): ... def tell(self): ... def read(self, size: int = -1): ... @@ -182,8 +167,6 @@ def read(self, size: int = -1): ... class FieldType: """A bare bones 'enum', as the enum library noticeably slows performance.""" - # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__"] - C: Final = "C" # "Character" # (str) D: Final = "D" # "Date" F: Final = "F" # "Floating point" @@ -197,13 +180,7 @@ class FieldType: "L", "M", "N", - } # set(__slots__) - {"__members__"} - - # def raise_if_invalid(field_type: Hashable): - # if field_type not in FieldType.__members__: - # raise ShapefileException( - # f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " - # ) + } FIELD_TYPE_ALIASES: dict[Union[str, bytes], FieldTypeT] = {} @@ -228,7 +205,7 @@ def from_unchecked( field_type: Union[str, bytes, FieldTypeT] = "C", size: int = 50, decimal: int = 0, - ) -> Self: + ) -> Field: try: type_ = FIELD_TYPE_ALIASES[field_type] except KeyError: @@ -346,12 +323,8 @@ class GeoJSONFeatureCollection(TypedDict): class GeoJSONFeatureCollectionWithBBox(GeoJSONFeatureCollection): - # bbox is optional - # typing.NotRequired requires Python 3.11 - # and we must support 3.9 (at least until October) - # https://docs.python.org/3/library/typing.html#typing.Required - # Is there a backport? - bbox: NotRequired[list[float]] + # bbox is technically optional under the spec + bbox: list[float] # Helpers @@ -684,34 +657,55 @@ class _NoShapeTypeSentinel: """ -class Shape: - shapeType: int = NULL - _shapeTypes = frozenset( - [ - NULL, - POINT, - POINTM, - POINTZ, - POLYLINE, - POLYLINEM, - POLYLINEZ, - POLYGON, - POLYGONM, - POLYGONZ, - MULTIPOINT, - MULTIPOINTM, - MULTIPOINTZ, - MULTIPATCH, - ] - ) +def _m_from_point(point: Union[PointMT, PointZT], mpos: int) -> Optional[float]: + if len(point) > mpos and point[mpos] is not None: + return cast(float, point[mpos]) + return None + + +def _ms_from_points( + points: Union[list[PointMT], list[PointZT]], mpos: int +) -> Iterator[Optional[float]]: + return (_m_from_point(p, mpos) for p in points) + + +def _z_from_point(point: PointZT) -> float: + if len(point) >= 3 and point[2] is not None: + return point[2] + return 0.0 + + +def _zs_from_points(points: Iterable[PointZT]) -> Iterator[float]: + return (_z_from_point(p) for p in points) + + +class CanHaveBboxNoLinesKwargs(TypedDict, total=False): + oid: Optional[int] + points: Optional[PointsT] + parts: Optional[Sequence[int]] # index of start point of each part + partTypes: Optional[Sequence[int]] + bbox: Optional[BBox] + m: Optional[Sequence[Optional[float]]] + z: Optional[Sequence[float]] + mbox: Optional[MBox] + zbox: Optional[ZBox] + +class Shape: def __init__( self, shapeType: Union[int, _NoShapeTypeSentinel] = _NoShapeTypeSentinel(), points: Optional[PointsT] = None, - parts: Optional[Sequence[int]] = None, + parts: Optional[Sequence[int]] = None, # index of start point of each part + lines: Optional[list[PointsT]] = None, partTypes: Optional[Sequence[int]] = None, oid: Optional[int] = None, + *, + m: Optional[Sequence[Optional[float]]] = None, + z: Optional[Sequence[float]] = None, + bbox: Optional[BBox] = None, + mbox: Optional[MBox] = None, + zbox: Optional[ZBox] = None, ): """Stores the geometry of the different shape types specified in the Shapefile spec. Shape types are @@ -724,30 +718,150 @@ def __init__( list of shapes. For MultiPatch geometry, partTypes designates the patch type of each of the parts. """ + # Preserve previous behaviour for anyone who set self.shapeType = None if not isinstance(shapeType, _NoShapeTypeSentinel): self.shapeType = shapeType - self.points: PointsT = points or [] - self.parts: Sequence[int] = parts or [] - if partTypes: + else: + class_name = self.__class__.__name__ + self.shapeType = SHAPETYPENUM_LOOKUP.get(class_name.upper(), NULL) + + if partTypes is not None: self.partTypes = partTypes - # and a dict to silently record any errors encountered + default_points: PointsT = [] + default_parts: list[int] = [] + + # Make sure polygon rings (parts) are closed + if lines is not None: + if self.shapeType in Polygon_shapeTypes: + lines = list(lines) + self._ensure_polygon_rings_closed(lines) + + default_points, default_parts = self._points_and_parts_indexes_from_lines( + lines + ) + elif points and self.shapeType in _CanHaveBBox_shapeTypes: + # TODO: Raise issue. + # This ensures Polylines, Polygons and Multipatches with no part information are a single + # Polyline, Polygon or Multipatch respectively. + # + # However this also allows MultiPoints shapes to have a single part index 0 as + # documented in README.md,also when set from points + # (even though this is just an artefact of initialising them as a length-1 nested + # list of points via _points_and_parts_indexes_from_lines). + # + # Alternatively single points could be given parts = [0] too, as they do if formed + # _from_geojson. + default_parts = [0] + + self.points: PointsT = points or default_points + + self.parts: Sequence[int] = parts or default_parts + + # and a dict to silently record any errors encountered in GeoJSON self._errors: dict[str, int] = {} # add oid - if oid is not None: - self.__oid = oid + self.__oid: int = -1 if oid is None else oid + + if bbox is not None: + self.bbox: BBox = bbox + elif len(self.points) >= 2: + self.bbox = self._bbox_from_points() + + ms_found = True + if m: + self.m: Sequence[Optional[float]] = m + elif self.shapeType in _HasM_shapeTypes: + mpos = 3 if self.shapeType in _HasZ_shapeTypes | PointZ_shapeTypes else 2 + points_m_z = cast(Union[list[PointMT], list[PointZT]], self.points) + self.m = list(_ms_from_points(points_m_z, mpos)) + elif self.shapeType in PointM_shapeTypes: + mpos = 3 if self.shapeType == POINTZ else 2 + point_m_z = cast(Union[PointMT, PointZT], self.points[0]) + self.m = (_m_from_point(point_m_z, mpos),) else: - self.__oid = -1 + ms_found = False + + zs_found = True + if z: + self.z: Sequence[float] = z + elif self.shapeType in _HasZ_shapeTypes: + points_z = cast(list[PointZT], self.points) + self.z = list(_zs_from_points(points_z)) + elif self.shapeType == POINTZ: + point_z = cast(PointZT, self.points[0]) + self.z = (_z_from_point(point_z),) + else: + zs_found = False + + if mbox is not None: + self.mbox: MBox = mbox + elif ms_found: + self.mbox = self._mbox_from_ms() + + if zbox is not None: + self.zbox: ZBox = zbox + elif zs_found: + self.zbox = self._zbox_from_zs() - # self.z: Optional[Union[list[Optional[float]], _Array[float]]] = None - # self.m: Optional[list[Optional[float]]] = None - # self.bbox: Optional[_Array[float]] = None + @staticmethod + def _ensure_polygon_rings_closed( + parts: list[PointsT], # Mutated + ) -> None: + for part in parts: + if part[0] != part[-1]: + part.append(part[0]) + + @staticmethod + def _points_and_parts_indexes_from_lines( + parts: list[PointsT], + ) -> tuple[PointsT, list[int]]: + # Intended for Union[Polyline, Polygon, MultiPoint, MultiPatch] + """From a list of parts (each part a list of points) return + a flattened list of points, and a list of indexes into that + flattened list corresponding to the start of each part. + + Internal method for both multipoints (formed entirely by a single part), + and shapes that have multiple collections of points (each one + a part): (poly)lines, polygons, and multipatchs. + """ + part_indexes: list[int] = [] + points: PointsT = [] + + for part in parts: + # set part index position + part_indexes.append(len(points)) + points.extend(part) + + return points, part_indexes + + def _bbox_from_points(self) -> BBox: + xs: list[float] = [] + ys: list[float] = [] + + for point in self.points: + xs.append(point[0]) + ys.append(point[1]) + + return min(xs), min(ys), max(xs), max(ys) + + def _mbox_from_ms(self) -> MBox: + ms: list[float] = [m for m in self.m if m is not None] + + if not ms: + # only if none of the shapes had m values, should mbox be set to missing m values + ms.append(NODATA) + + return min(ms), max(ms) + + def _zbox_from_zs(self) -> ZBox: + return min(self.z), max(self.z) @property def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: - if self.shapeType in [POINT, POINTM, POINTZ]: + if self.shapeType in {POINT, POINTM, POINTZ}: # point if len(self.points) == 0: # the shape has no coordinate information, i.e. is 'empty' @@ -757,7 +871,7 @@ def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: return {"type": "Point", "coordinates": self.points[0]} - if self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: + if self.shapeType in {MULTIPOINT, MULTIPOINTM, MULTIPOINTZ}: if len(self.points) == 0: # the shape has no coordinate information, i.e. is 'empty' # the geojson spec does not define a proper null-geometry type @@ -770,7 +884,7 @@ def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: "coordinates": self.points, } - if self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: + if self.shapeType in {POLYLINE, POLYLINEM, POLYLINEZ}: if len(self.parts) == 0: # the shape has no coordinate information, i.e. is 'empty' # the geojson spec does not define a proper null-geometry type @@ -795,11 +909,12 @@ def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: coordinates.append(list(self.points[ps:part])) ps = part - # assert len(self.parts) >1 # so disable pylint rule - coordinates.append(list(self.points[part:])) # pylint: disable=undefined-loop-variable + # assert len(self.parts) > 1 + # from previous if len(self.parts) checks so part is defined + coordinates.append(list(self.points[part:])) return {"type": "MultiLineString", "coordinates": coordinates} - if self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: + if self.shapeType in {POLYGON, POLYGONM, POLYGONZ}: if len(self.parts) == 0: # the shape has no coordinate information, i.e. is 'empty' # the geojson spec does not define a proper null-geometry type @@ -860,22 +975,21 @@ def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: @staticmethod def _from_geojson(geoj) -> Shape: # create empty shape - shape = Shape() # set shapeType geojType = geoj["type"] if geoj else "Null" if geojType in GEOJSON_TO_SHAPETYPE: - shape.shapeType = GEOJSON_TO_SHAPETYPE[geojType] + shapeType = GEOJSON_TO_SHAPETYPE[geojType] else: raise GeoJSON_Error(f"Cannot create Shape from GeoJSON type '{geojType}'") # set points and parts if geojType == "Point": - shape.points = [geoj["coordinates"]] - shape.parts = [0] + points = [geoj["coordinates"]] + parts = [0] elif geojType in ("MultiPoint", "LineString"): - shape.points = geoj["coordinates"] - shape.parts = [0] - elif geojType in ("Polygon",): + points = geoj["coordinates"] + parts = [0] + elif geojType == "Polygon": points = [] parts = [] index = 0 @@ -892,9 +1006,7 @@ def _from_geojson(geoj) -> Shape: points.extend(ext_or_hole) parts.append(index) index += len(ext_or_hole) - shape.points = points - shape.parts = parts - elif geojType in ("MultiLineString",): + elif geojType == "MultiLineString": points = [] parts = [] index = 0 @@ -902,9 +1014,7 @@ def _from_geojson(geoj) -> Shape: points.extend(linestring) parts.append(index) index += len(linestring) - shape.points = points - shape.parts = parts - elif geojType in ("MultiPolygon",): + elif geojType == "MultiPolygon": points = [] parts = [] index = 0 @@ -922,9 +1032,7 @@ def _from_geojson(geoj) -> Shape: points.extend(ext_or_hole) parts.append(index) index += len(ext_or_hole) - shape.points = points - shape.parts = parts - return shape + return Shape(shapeType=shapeType, points=points, parts=parts) @property def oid(self) -> int: @@ -936,75 +1044,69 @@ def shapeTypeName(self) -> str: return SHAPETYPE_LOOKUP[self.shapeType] def __repr__(self): - return f"Shape #{self.__oid}: {self.shapeTypeName}" - - -S = TypeVar("S", bound=Shape) + class_name = self.__class__.__name__ + if class_name == "Shape": + return f"Shape #{self.__oid}: {self.shapeTypeName}" + return f"{class_name} #{self.__oid}" -def compatible_with(s: Shape, cls: type[S]) -> TypeIs[S]: - return s.shapeType in cls._shapeTypes - - -# pylint: disable=unused-argument # Need unused arguments to keep the same call signature for # different implementations of from_byte_stream and write_to_byte_stream class NullShape(Shape): # Shape.shapeType = NULL already, # to preserve handling of default args in Shape.__init__ # Repeated for clarity. - shapeType = NULL - _shapeTypes = frozenset([NULL]) + def __init__( + self, + oid: Optional[int] = None, + ): + Shape.__init__(self, shapeType=NULL, oid=oid) - @classmethod + @staticmethod def from_byte_stream( - cls, + shapeType: int, b_io: ReadSeekableBinStream, next_shape: int, oid: Optional[int] = None, bbox: Optional[BBox] = None, - ) -> Self: + ) -> NullShape: # Shape.__init__ sets self.points = points or [] - return cls(oid=oid) + return NullShape(oid=oid) @staticmethod def write_to_byte_stream( b_io: WriteableBinStream, s: Shape, i: int, - bbox: Optional[BBox], - mbox: Optional[MBox], - zbox: Optional[ZBox], ) -> int: return 0 +_CanHaveBBox_shapeTypes = frozenset( + [ + POLYLINE, + POLYLINEM, + POLYLINEZ, + MULTIPOINT, + MULTIPOINTM, + MULTIPOINTZ, + POLYGON, + POLYGONM, + POLYGONZ, + MULTIPATCH, + ] +) + + class _CanHaveBBox(Shape): """As well as setting bounding boxes, we also utilize the fact that this mixin applies to all the shapes that are not a single point. """ - _shapeTypes = frozenset( - [ - POLYLINE, - POLYLINEM, - POLYLINEZ, - POLYGON, - POLYGONM, - POLYGONZ, - MULTIPOINT, - MULTIPOINTM, - MULTIPOINTZ, - MULTIPATCH, - ] - ) - - bbox: Optional[BBox] = None - - def _get_set_bbox_from_byte_stream(self, b_io: ReadableBinStream) -> BBox: - self.bbox: BBox = unpack("<4d", b_io.read(32)) - return self.bbox + @staticmethod + def _read_bbox_from_byte_stream(b_io: ReadableBinStream) -> BBox: + return unpack("<4d", b_io.read(32)) @staticmethod def _write_bbox_to_byte_stream( @@ -1020,16 +1122,19 @@ def _write_bbox_to_byte_stream( ) @staticmethod - def _get_npoints_from_byte_stream(b_io: ReadableBinStream) -> int: + def _read_npoints_from_byte_stream(b_io: ReadableBinStream) -> int: return unpack(" int: return b_io.write(pack(" list[Point2D]: flat = unpack(f"<{2 * nPoints}d", b_io.read(16 * nPoints)) - self.points = list(zip(*(iter(flat),) * 2)) + return list(zip(*(iter(flat),) * 2)) @staticmethod def _write_points_to_byte_stream( @@ -1045,35 +1150,19 @@ def _write_points_to_byte_stream( f"Failed to write points for record {i}. Expected floats." ) - @staticmethod - def _get_nparts_from_byte_stream(b_io: ReadableBinStream) -> int: - return 0 - - def _set_parts_from_byte_stream(self, b_io: ReadableBinStream, nParts: int): - pass - - def _set_part_types_from_byte_stream(self, b_io: ReadableBinStream, nParts: int): - pass - - def _set_zs_from_byte_stream(self, b_io: ReadableBinStream, nPoints: int): - pass - - def _set_ms_from_byte_stream( - self, b_io: ReadSeekableBinStream, nPoints: int, next_shape: int - ): - pass - @classmethod def from_byte_stream( cls, + shapeType: int, b_io: ReadSeekableBinStream, next_shape: int, oid: Optional[int] = None, bbox: Optional[BBox] = None, - ) -> Optional[Self]: - shape = cls(oid=oid) + ) -> Optional[Shape]: + ShapeClass = cast(type[_CanHaveBBox], SHAPE_CLASS_FROM_SHAPETYPE[shapeType]) - shape_bbox = shape._get_set_bbox_from_byte_stream(b_io) + kwargs: CanHaveBboxNoLinesKwargs = {"oid": oid} # "shapeType": shapeType} + kwargs["bbox"] = shape_bbox = cls._read_bbox_from_byte_stream(b_io) # if bbox specified and no overlap, skip this shape if bbox is not None and not bbox_overlap(bbox, shape_bbox): @@ -1081,31 +1170,65 @@ def from_byte_stream( # next shape after we return (as done in f.seek(next_shape)) return None - nParts: Optional[int] = shape._get_nparts_from_byte_stream(b_io) - nPoints: int = shape._get_npoints_from_byte_stream(b_io) + nParts: Optional[int] = ( + _CanHaveParts._read_nparts_from_byte_stream(b_io) + if shapeType in _CanHaveParts_shapeTypes + else None + ) + nPoints: int = cls._read_npoints_from_byte_stream(b_io) # Previously, we also set __zmin = __zmax = __mmin = __mmax = None if nParts: - shape._set_parts_from_byte_stream(b_io, nParts) - shape._set_part_types_from_byte_stream(b_io, nParts) + kwargs["parts"] = _CanHaveParts._read_parts_from_byte_stream(b_io, nParts) + if shapeType == MULTIPATCH: + kwargs["partTypes"] = MultiPatch._read_part_types_from_byte_stream( + b_io, nParts + ) + + # else: + # parts = None + # partTypes = None if nPoints: - shape._set_points_from_byte_stream(b_io, nPoints) + kwargs["points"] = cast( + PointsT, cls._read_points_from_byte_stream(b_io, nPoints) + ) - shape._set_zs_from_byte_stream(b_io, nPoints) + if shapeType in _HasZ_shapeTypes: + kwargs["zbox"], kwargs["z"] = _HasZ._read_zs_from_byte_stream( + b_io, nPoints + ) - shape._set_ms_from_byte_stream(b_io, nPoints, next_shape) + if shapeType in _HasM_shapeTypes: + kwargs["mbox"], kwargs["m"] = _HasM._read_ms_from_byte_stream( + b_io, nPoints, next_shape + ) - return shape + # else: + # points = None + # zbox, zs = None, None + # mbox, ms = None, None + + return ShapeClass(**kwargs) + # return ShapeClass( + # shapeType=shapeType, + # # Mypy 1.17.1 doesn't figure out that an Optional[list[Point2D]] is an Optional[list[PointT]] + # points=cast(Optional[PointsT], points), + # parts=parts, + # partTypes=partTypes, + # oid=oid, + # m=ms, + # z=zs, + # bbox=shape_bbox, + # mbox=mbox, + # zbox=zbox, + # ) @staticmethod def write_to_byte_stream( b_io: WriteableBinStream, s: Shape, i: int, - bbox: Optional[BBox], - mbox: Optional[MBox], - zbox: Optional[ZBox], ) -> int: # We use static methods here and below, # to support s only being an instance of a the @@ -1115,58 +1238,69 @@ def write_to_byte_stream( n = 0 - if compatible_with(s, _CanHaveBBox): - n += _CanHaveBBox._write_bbox_to_byte_stream(b_io, i, bbox) + if s.shapeType in _CanHaveBBox_shapeTypes: + n += _CanHaveBBox._write_bbox_to_byte_stream(b_io, i, s.bbox) - if compatible_with(s, _CanHaveParts): - n += _CanHaveParts._write_nparts_to_byte_stream(b_io, s) + if s.shapeType in _CanHaveParts_shapeTypes: + n += _CanHaveParts._write_nparts_to_byte_stream( + b_io, cast(_CanHaveParts, s) + ) # Shape types with multiple points per record - if compatible_with(s, _CanHaveBBox): - n += _CanHaveBBox._write_npoints_to_byte_stream(b_io, s) + if s.shapeType in _CanHaveBBox_shapeTypes: + n += _CanHaveBBox._write_npoints_to_byte_stream(b_io, cast(_CanHaveBBox, s)) # Write part indexes. Includes MultiPatch - if compatible_with(s, _CanHaveParts): - n += _CanHaveParts._write_part_indices_to_byte_stream(b_io, s) + if s.shapeType in _CanHaveParts_shapeTypes: + n += _CanHaveParts._write_part_indices_to_byte_stream( + b_io, cast(_CanHaveParts, s) + ) - if compatible_with(s, MultiPatch): - n += MultiPatch._write_part_types_to_byte_stream(b_io, s) + if s.shapeType in MultiPatch_shapeTypes: + n += MultiPatch._write_part_types_to_byte_stream(b_io, cast(MultiPatch, s)) # Write points for multiple-point records - if compatible_with(s, _CanHaveBBox): - n += _CanHaveBBox._write_points_to_byte_stream(b_io, s, i) - if compatible_with(s, _HasZ): - n += _HasZ._write_zs_to_byte_stream(b_io, s, i, zbox) + if s.shapeType in _CanHaveBBox_shapeTypes: + n += _CanHaveBBox._write_points_to_byte_stream( + b_io, cast(_CanHaveBBox, s), i + ) + if s.shapeType in _HasZ_shapeTypes: + n += _HasZ._write_zs_to_byte_stream(b_io, cast(_HasZ, s), i, s.zbox) - if compatible_with(s, _HasM): - n += _HasM._write_ms_to_byte_stream(b_io, s, i, mbox) + if s.shapeType in _HasM_shapeTypes: + n += _HasM._write_ms_to_byte_stream(b_io, cast(_HasM, s), i, s.mbox) return n +_CanHaveParts_shapeTypes = frozenset( + [ + POLYLINE, + POLYLINEM, + POLYLINEZ, + POLYGON, + POLYGONM, + POLYGONZ, + MULTIPATCH, + ] +) + + class _CanHaveParts(_CanHaveBBox): # The parts attribute is initialised by # the base class Shape's __init__, to parts or []. # "Can Have Parts" should be read as "Can Have non-empty parts". - _shapeTypes = frozenset( - [ - POLYLINE, - POLYLINEM, - POLYLINEZ, - POLYGON, - POLYGONM, - POLYGONZ, - MULTIPATCH, - ] - ) @staticmethod - def _get_nparts_from_byte_stream(b_io: ReadableBinStream) -> int: + def _read_nparts_from_byte_stream(b_io: ReadableBinStream) -> int: return unpack(" int: return b_io.write(pack(" _Array[int]: + return _Array[int]("i", unpack(f"<{nParts}i", b_io.read(nParts * 4))) @staticmethod def _write_part_indices_to_byte_stream( @@ -1175,20 +1309,20 @@ def _write_part_indices_to_byte_stream( return b_io.write(pack(f"<{len(s.parts)}i", *s.parts)) +Point_shapeTypes = frozenset([POINT, POINTM, POINTZ]) + + class Point(Shape): # We also use the fact that the single Point types are the only # shapes that cannot have their own bounding box (a user supplied # bbox is still used to filter out points). - shapeType = POINT - _shapeTypes = frozenset([POINT, POINTM, POINTZ]) - - def _set_single_point_z_from_byte_stream(self, b_io: ReadableBinStream): - pass - - def _set_single_point_m_from_byte_stream( - self, b_io: ReadSeekableBinStream, next_shape: int + def __init__( + self, + x: float, + y: float, + oid: Optional[int] = None, ): - pass + Shape.__init__(self, points=[(x, y)], oid=oid) @staticmethod def _x_y_from_byte_stream(b_io: ReadableBinStream): @@ -1211,13 +1345,12 @@ def _write_x_y_to_byte_stream( @classmethod def from_byte_stream( cls, + shapeType: int, b_io: ReadSeekableBinStream, next_shape: int, oid: Optional[int] = None, bbox: Optional[BBox] = None, - ) -> Optional[Self]: - shape = cls(oid=oid) - + ) -> Optional[Shape]: x, y = cls._x_y_from_byte_stream(b_io) if bbox is not None: @@ -1225,91 +1358,156 @@ def from_byte_stream( # skip shape if no overlap with bounding box if not bbox_overlap(bbox, (x, y, x, y)): return None + elif shapeType == POINT: + return Point(x=x, y=y, oid=oid) - shape.points = [(x, y)] + if shapeType == POINTZ: + z = PointZ._read_single_point_zs_from_byte_stream(b_io)[0] - shape._set_single_point_z_from_byte_stream(b_io) + m = PointM._read_single_point_ms_from_byte_stream(b_io, next_shape)[0] - shape._set_single_point_m_from_byte_stream(b_io, next_shape) + if shapeType == POINTZ: + return PointZ(x=x, y=y, z=z, m=m, oid=oid) - return shape + return PointM(x=x, y=y, m=m, oid=oid) + # return Shape(shapeType=shapeType, points=[(x, y)], z=zs, m=ms, oid=oid) @staticmethod - def write_to_byte_stream( - b_io: WriteableBinStream, - s: Shape, - i: int, - bbox: Optional[BBox], - mbox: Optional[MBox], - zbox: Optional[ZBox], - ) -> int: + def write_to_byte_stream(b_io: WriteableBinStream, s: Shape, i: int) -> int: # Serialize a single point x, y = s.points[0][0], s.points[0][1] n = Point._write_x_y_to_byte_stream(b_io, x, y, i) # Write a single Z value - if compatible_with(s, PointZ): + if s.shapeType in PointZ_shapeTypes: n += PointZ._write_single_point_z_to_byte_stream(b_io, s, i) # Write a single M value - if compatible_with(s, PointM): + if s.shapeType in PointM_shapeTypes: n += PointM._write_single_point_m_to_byte_stream(b_io, s, i) return n -# pylint: enable=unused-argument +Polyline_shapeTypes = frozenset([POLYLINE, POLYLINEM, POLYLINEZ]) class Polyline(_CanHaveParts): - shapeType = POLYLINE - _shapeTypes = frozenset([POLYLINE, POLYLINEM, POLYLINEZ]) + def __init__( + self, + *args: PointsT, + lines: Optional[list[PointsT]] = None, + points: Optional[PointsT] = None, + parts: Optional[list[int]] = None, + bbox: Optional[BBox] = None, + oid: Optional[int] = None, + ): + if args: + if lines: + raise ShapefileException( + "Specify Either: a) positional args, or: b) the keyword arg lines. " + f"Not both. Got both: {args} and {lines=}. " + "If this was intentional, after the other positional args, " + "the arg passed to lines can be unpacked (arg1, arg2, *more_args, *lines, oid=oid,...)" + ) + lines = list(args) + Shape.__init__( + self, + lines=lines, + points=points, + parts=parts, + bbox=bbox, + oid=oid, + ) + + +Polygon_shapeTypes = frozenset([POLYGON, POLYGONM, POLYGONZ]) class Polygon(_CanHaveParts): - shapeType = POLYGON - _shapeTypes = frozenset([POLYGON, POLYGONM, POLYGONZ]) + def __init__( + self, + *args: PointsT, + lines: Optional[list[PointsT]] = None, + parts: Optional[list[int]] = None, + points: Optional[PointsT] = None, + bbox: Optional[BBox] = None, + oid: Optional[int] = None, + ): + lines = list(args) if args else lines + Shape.__init__( + self, + lines=lines, + points=points, + parts=parts, + bbox=bbox, + oid=oid, + ) + + +MultiPoint_shapeTypes = frozenset([MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]) class MultiPoint(_CanHaveBBox): - shapeType = MULTIPOINT - _shapeTypes = frozenset([MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]) + def __init__( + self, + *args: PointT, + points: Optional[PointsT] = None, + bbox: Optional[BBox] = None, + oid: Optional[int] = None, + ): + if args: + if points: + raise ShapefileException( + "Specify Either: a) positional args, or: b) the keyword arg points. " + f"Not both. Got both: {args} and {points=}. " + "If this was intentional, after the other positional args, " + "the arg passed to points can be unpacked, e.g. " + " (arg1, arg2, *more_args, *points, oid=oid,...)" + ) + points = list(args) + Shape.__init__( + self, + points=points, + bbox=bbox, + oid=oid, + ) + + +# Not a PointM or a PointZ +_HasM_shapeTypes = frozenset( + [ + POLYLINEM, + POLYLINEZ, + POLYGONM, + POLYGONZ, + MULTIPOINTM, + MULTIPOINTZ, + MULTIPATCH, + ] +) class _HasM(_CanHaveBBox): - # Not a Point - _shapeTypes = frozenset( - [ - POLYLINEM, - POLYLINEZ, - POLYGONM, - POLYGONZ, - MULTIPOINTM, - MULTIPOINTZ, - MULTIPATCH, - ] - ) m: Sequence[Optional[float]] - def __init__(self, *args, **kwargs): - self.m = [] - super().__init__(*args, **kwargs) - - def _set_ms_from_byte_stream( - self, b_io: ReadSeekableBinStream, nPoints: int, next_shape: int - ): + @staticmethod + def _read_ms_from_byte_stream( + b_io: ReadSeekableBinStream, nPoints: int, next_shape: int + ) -> tuple[MBox, list[Optional[float]]]: if next_shape - b_io.tell() >= 16: - __mmin, __mmax = unpack("<2d", b_io.read(16)) + mbox = unpack("<2d", b_io.read(16)) # Measure values less than -10e38 are nodata values according to the spec if next_shape - b_io.tell() >= nPoints * 8: - self.m = [] + ms = [] for m in unpack(f"<{nPoints}d", b_io.read(nPoints * 8)): if m > NODATA: - self.m.append(m) + ms.append(m) else: - self.m.append(None) + ms.append(None) else: - self.m = [None for _ in range(nPoints)] + ms = [None for _ in range(nPoints)] + return mbox, ms @staticmethod def _write_ms_to_byte_stream( @@ -1327,23 +1525,11 @@ def _write_ms_to_byte_stream( f"Failed to write measure extremes for record {i}. Expected floats" ) try: - if getattr(s, "m", False): - # if m values are stored in attribute - ms = [m if m is not None else NODATA for m in cast(_HasM, s).m] + ms = cast(_HasM, s).m - else: - # if m values are stored as 3rd/4th dimension - # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) - mpos = 3 if s.shapeType in _HasZ._shapeTypes else 2 - ms = [ - cast(float, p[mpos]) - if len(p) > mpos and p[mpos] is not None - else NODATA - for p in s.points - ] - - num_bytes_written += b_io.write(pack(f"<{len(ms)}d", *ms)) + ms_to_encode = [m if m is not None else NODATA for m in ms] + num_bytes_written += b_io.write(pack(f"<{len(ms)}d", *ms_to_encode)) except error: raise ShapefileException( f"Failed to write measure values for record {i}. Expected floats" @@ -1352,25 +1538,26 @@ def _write_ms_to_byte_stream( return num_bytes_written +# Not a PointZ +_HasZ_shapeTypes = frozenset( + [ + POLYLINEZ, + POLYGONZ, + MULTIPOINTZ, + MULTIPATCH, + ] +) + + class _HasZ(_CanHaveBBox): - # Not a Point - _shapeTypes = frozenset( - [ - POLYLINEZ, - POLYGONZ, - MULTIPOINTZ, - MULTIPATCH, - ] - ) z: Sequence[float] - def __init__(self, *args, **kwargs): - self.z = [] - super().__init__(*args, **kwargs) - - def _set_zs_from_byte_stream(self, b_io: ReadableBinStream, nPoints: int): - __zmin, __zmax = unpack("<2d", b_io.read(16)) # pylint: disable=unused-private-member - self.z = _Array[float]("d", unpack(f"<{nPoints}d", b_io.read(nPoints * 8))) + @staticmethod + def _read_zs_from_byte_stream( + b_io: ReadableBinStream, nPoints: int + ) -> tuple[ZBox, Sequence[float]]: + zbox = unpack("<2d", b_io.read(16)) + return zbox, _Array[float]("d", unpack(f"<{nPoints}d", b_io.read(nPoints * 8))) @staticmethod def _write_zs_to_byte_stream( @@ -1388,13 +1575,7 @@ def _write_zs_to_byte_stream( f"Failed to write elevation extremes for record {i}. Expected floats." ) try: - if getattr(s, "z", False): - # if z values are stored in attribute - zs = cast(_HasZ, s).z - else: - # if z values are stored as 3rd dimension - zs = [cast(float, p[2]) if len(p) > 2 else 0 for p in s.points] - + zs = cast(_HasZ, s).z num_bytes_written += b_io.write(pack(f"<{len(zs)}d", *zs)) except error: raise ShapefileException( @@ -1404,105 +1585,230 @@ def _write_zs_to_byte_stream( return num_bytes_written +MultiPatch_shapeTypes = frozenset([MULTIPATCH]) + + class MultiPatch(_HasM, _HasZ, _CanHaveParts): - shapeType = MULTIPATCH - _shapeTypes = frozenset([MULTIPATCH]) + def __init__( + self, + *args: PointsT, + lines: Optional[list[PointsT]] = None, + partTypes: Optional[list[int]] = None, + z: Optional[list[float]] = None, + m: Optional[list[Optional[float]]] = None, + points: Optional[PointsT] = None, + parts: Optional[list[int]] = None, + bbox: Optional[BBox] = None, + mbox: Optional[MBox] = None, + zbox: Optional[ZBox] = None, + oid: Optional[int] = None, + ): + if args: + if lines: + raise ShapefileException( + "Specify Either: a) positional args, or: b) the keyword arg lines. " + f"Not both. Got both: {args} and {lines=}. " + "If this was intentional, after the other positional args, " + "the arg passed to lines can be unpacked (arg1, arg2, *more_args, *lines, oid=oid,...)" + ) + lines = list(args) + Shape.__init__( + self, + lines=lines, + points=points, + parts=parts, + partTypes=partTypes, + z=z, + m=m, + bbox=bbox, + zbox=zbox, + mbox=mbox, + oid=oid, + ) - def _set_part_types_from_byte_stream(self, b_io: ReadableBinStream, nParts: int): - self.partTypes = _Array[int]("i", unpack(f"<{nParts}i", b_io.read(nParts * 4))) + @staticmethod + def _read_part_types_from_byte_stream( + b_io: ReadableBinStream, nParts: int + ) -> Sequence[int]: + return _Array[int]("i", unpack(f"<{nParts}i", b_io.read(nParts * 4))) @staticmethod def _write_part_types_to_byte_stream(b_io: WriteableBinStream, s: Shape) -> int: return b_io.write(pack(f"<{len(s.partTypes)}i", *s.partTypes)) -class PointM(Point): - shapeType = POINTM - _shapeTypes = frozenset([POINTM, POINTZ]) +PointM_shapeTypes = frozenset([POINTM, POINTZ]) - # same default as in Writer.__shpRecord (if s.shapeType in (11, 21):) - # PyShp encodes None m values as NODATA - m = (None,) - def _set_single_point_m_from_byte_stream( - self, b_io: ReadSeekableBinStream, next_shape: int +class PointM(Point): + def __init__( + self, + x: float, + y: float, + # same default as in Writer.__shpRecord (if s.shapeType in (11, 21):) + # PyShp encodes None m values as NODATA + m: Optional[float] = None, + oid: Optional[int] = None, ): + Shape.__init__(self, points=[(x, y)], m=(m,), oid=oid) + + @staticmethod + def _read_single_point_ms_from_byte_stream( + b_io: ReadSeekableBinStream, next_shape: int + ) -> tuple[Optional[float]]: if next_shape - b_io.tell() >= 8: (m,) = unpack(" NODATA: - self.m = (m,) + return (m,) else: - self.m = (None,) + return (None,) @staticmethod def _write_single_point_m_to_byte_stream( b_io: WriteableBinStream, s: Shape, i: int ) -> int: - # Write a single M value + try: + s = cast(_HasM, s) + m = s.m[0] if s.m else None + except error: + raise ShapefileException( + f"Failed to write measure value for record {i}. Expected floats." + ) + # Note: missing m values are autoset to NODATA. + m_to_encode = m if m is not None else NODATA - if getattr(s, "m", False): - # if m values are stored in attribute - try: - # if not s.m or s.m[0] is None: - # s.m = (NODATA,) - # m = s.m[0] - s = cast(_HasM, s) - m = s.m[0] if s.m and s.m[0] is not None else NODATA - except error: - raise ShapefileException( - f"Failed to write measure value for record {i}. Expected floats." - ) - else: - # if m values are stored as 3rd/4th dimension - # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) - try: - mpos = 3 if s.shapeType == POINTZ else 2 - if len(s.points[0]) < mpos + 1: - # s.points[0].append(NODATA) - m = NODATA - elif s.points[0][mpos] is None: - # s.points[0][mpos] = NODATA - m = NODATA - else: - m = cast(float, s.points[0][mpos]) + return b_io.write(pack("<1d", m_to_encode)) - except error: - raise ShapefileException( - f"Failed to write measure value for record {i}. Expected floats." - ) - return b_io.write(pack("<1d", m)) +PolylineM_shapeTypes = frozenset([POLYLINEM, POLYLINEZ]) class PolylineM(Polyline, _HasM): - shapeType = POLYLINEM - _shapeTypes = frozenset([POLYLINEM, POLYLINEZ]) + def __init__( + self, + *args: PointsT, + lines: Optional[list[PointsT]] = None, + parts: Optional[list[int]] = None, + m: Optional[Sequence[Optional[float]]] = None, + points: Optional[PointsT] = None, + bbox: Optional[BBox] = None, + mbox: Optional[MBox] = None, + oid: Optional[int] = None, + ): + if args: + if lines: + raise ShapefileException( + "Specify Either: a) positional args, or: b) the keyword arg lines. " + f"Not both. Got both: {args} and {lines=}. " + "If this was intentional, after the other positional args, " + "the arg passed to lines can be unpacked (arg1, arg2, *more_args, *lines, oid=oid,...)" + ) + lines = list(args) + Shape.__init__( + self, + lines=lines, + points=points, + parts=parts, + m=m, + bbox=bbox, + mbox=mbox, + oid=oid, + ) + + +PolygonM_shapeTypes = frozenset([POLYGONM, POLYGONZ]) class PolygonM(Polygon, _HasM): - shapeType = POLYGONM - _shapeTypes = frozenset([POLYGONM, POLYGONZ]) + def __init__( + self, + *args: PointsT, + lines: Optional[list[PointsT]] = None, + parts: Optional[list[int]] = None, + m: Optional[list[Optional[float]]] = None, + points: Optional[PointsT] = None, + bbox: Optional[BBox] = None, + mbox: Optional[MBox] = None, + oid: Optional[int] = None, + ): + if args: + if lines: + raise ShapefileException( + "Specify Either: a) positional args, or: b) the keyword arg lines. " + f"Not both. Got both: {args} and {lines=}. " + "If this was intentional, after the other positional args, " + "the arg passed to lines can be unpacked (arg1, arg2, *more_args, *lines, oid=oid,...)" + ) + lines = list(args) + Shape.__init__( + self, + lines=lines, + points=points, + parts=parts, + m=m, + bbox=bbox, + mbox=mbox, + oid=oid, + ) + + +MultiPointM_shapeTypes = frozenset([MULTIPOINTM, MULTIPOINTZ]) class MultiPointM(MultiPoint, _HasM): - shapeType = MULTIPOINTM + def __init__( + self, + *args: PointT, + points: Optional[PointsT] = None, + m: Optional[Sequence[Optional[float]]] = None, + bbox: Optional[BBox] = None, + mbox: Optional[MBox] = None, + oid: Optional[int] = None, + ): + if args: + if points: + raise ShapefileException( + "Specify Either: a) positional args, or: b) the keyword arg points. " + f"Not both. Got both: {args} and {points=}. " + "If this was intentional, after the other positional args, " + "the arg passed to points can be unpacked, e.g. " + " (arg1, arg2, *more_args, *points, oid=oid,...)" + ) + points = list(args) + Shape.__init__( + self, + points=points, + m=m, + bbox=bbox, + mbox=mbox, + oid=oid, + ) + - _shapeTypes = frozenset([MULTIPOINTM, MULTIPOINTZ]) +PointZ_shapeTypes = frozenset([POINTZ]) class PointZ(PointM): - shapeType = POINTZ - _shapeTypes = frozenset([POINTZ]) + def __init__( + self, + x: float, + y: float, + z: float = 0.0, + m: Optional[float] = None, + oid: Optional[int] = None, + ): + Shape.__init__(self, points=[(x, y)], z=(z,), m=(m,), oid=oid) # same default as in Writer.__shpRecord (if s.shapeType == 11:) z: Sequence[float] = (0.0,) - def _set_single_point_z_from_byte_stream(self, b_io: ReadableBinStream): - self.z = tuple(unpack(" tuple[float]: + return unpack("= 3 and s.points[0][2] is not None: - z = s.points[0][2] - except error: - raise ShapefileException( - f"Failed to write elevation value for record {i}. Expected floats." - ) + + try: + if s.z: + z = s.z[0] + except error: + raise ShapefileException( + f"Failed to write elevation value for record {i}. Expected floats." + ) return b_io.write(pack(" WriteSeekableBinStream: ... @overload - def __getFileObj(self, f: None) -> Never: ... + def __getFileObj(self, f: None) -> NoReturn: ... @overload def __getFileObj(self, f: WriteSeekableBinStream) -> WriteSeekableBinStream: ... def __getFileObj(self, f): @@ -2964,25 +3359,18 @@ def __shpFileLength(self) -> int: shp.seek(start) return size - def __bbox(self, s: Shape) -> BBox: - xs: list[float] = [] - ys: list[float] = [] - - if not s.points: - # this should not happen. - # any shape that is not null should have at least one point, and only those should be sent here. - # could also mean that earlier code failed to add points to a non-null shape. - raise ShapefileException( - "Cannot create bbox. Expected a valid shape with at least one point. " - f"Got a shape of type {s.shapeType=} and 0 points." - ) + def _update_file_bbox(self, s: Shape): + if s.shapeType == NULL: + shape_bbox = None + elif s.shapeType in _CanHaveBBox_shapeTypes: + shape_bbox = s.bbox + else: + x, y = s.points[0][:2] + shape_bbox = (x, y, x, y) - for point in s.points: - xs.append(point[0]) - ys.append(point[1]) + if shape_bbox is None: + return - shape_bbox = (min(xs), min(ys), max(xs), max(ys)) - # update global if self._bbox: # compare with existing self._bbox = ( @@ -2994,50 +3382,23 @@ def __bbox(self, s: Shape) -> BBox: else: # first time bbox is being set self._bbox = shape_bbox - return shape_bbox - def __zbox(self, s: Union[_HasZ, PointZ]) -> ZBox: - shape_zs: list[float] = [] - if s.z: - shape_zs.extend(s.z) - else: - for p in s.points: - # On a ShapeZ type, M is at index 4, and the point can be a 3-tuple or 4-tuple. - z = p[2] if len(p) >= 3 and p[2] is not None else 0 - shape_zs.append(z) - zbox = (min(shape_zs), max(shape_zs)) - # update global + def _update_file_zbox(self, s: Union[_HasZ, PointZ]): if self._zbox: # compare with existing - self._zbox = (min(zbox[0], self._zbox[0]), max(zbox[1], self._zbox[1])) + self._zbox = (min(s.zbox[0], self._zbox[0]), max(s.zbox[1], self._zbox[1])) else: # first time zbox is being set - self._zbox = zbox - return zbox - - def __mbox(self, s: Union[_HasM, PointM]) -> MBox: - mpos = 3 if s.shapeType in _HasZ._shapeTypes | PointZ._shapeTypes else 2 - shape_ms: list[float] = [] - if s.m: - shape_ms.extend(m for m in s.m if m is not None) - else: - for p in s.points: - m = p[mpos] if len(p) >= mpos + 1 else None - if m is not None: - shape_ms.append(m) + self._zbox = s.zbox - if not shape_ms: - # only if none of the shapes had m values, should mbox be set to missing m values - shape_ms.append(NODATA) - mbox = (min(shape_ms), max(shape_ms)) - # update global + def _update_file_mbox(self, s: Union[_HasM, PointM]): + mbox = s.mbox if self._mbox: # compare with existing self._mbox = (min(mbox[0], self._mbox[0]), max(mbox[1], self._mbox[1])) else: # first time mbox is being set self._mbox = mbox - return mbox @property def shapeTypeName(self) -> str: @@ -3098,7 +3459,7 @@ def __shapefileHeader( else: f.write(pack("<4d", 0, 0, 0, 0)) # Elevation - if self.shapeType in PointZ._shapeTypes | _HasZ._shapeTypes: + if self.shapeType in PointZ_shapeTypes | _HasZ_shapeTypes: # Z values are present in Z type zbox = self.zbox() if zbox is None: @@ -3110,7 +3471,7 @@ def __shapefileHeader( # zbox = ZBox(0, 0) zbox = (0, 0) # Measure - if self.shapeType in PointM._shapeTypes | _HasM._shapeTypes: + if self.shapeType in PointM_shapeTypes | _HasM_shapeTypes: # M values are present in M or Z type mbox = self.mbox() if mbox is None: @@ -3219,17 +3580,14 @@ def __shpRecord(self, s: Shape) -> tuple[int, int]: # For both single point and multiple-points non-null shapes, # update bbox, mbox and zbox of the whole shapefile - shape_bbox = self.__bbox(s) if s.shapeType != NULL else None + if s.shapeType != NULL: + self._update_file_bbox(s) - if s.shapeType in PointM._shapeTypes | _HasM._shapeTypes: - shape_mbox = self.__mbox(cast(Union[_HasM, PointM], s)) - else: - shape_mbox = None + if s.shapeType in PointM_shapeTypes | _HasM_shapeTypes: + self._update_file_mbox(cast(Union[_HasM, PointM], s)) - if s.shapeType in PointZ._shapeTypes | _HasZ._shapeTypes: - shape_zbox = self.__zbox(cast(Union[_HasZ, PointZ], s)) - else: - shape_zbox = None + if s.shapeType in PointZ_shapeTypes | _HasZ_shapeTypes: + self._update_file_zbox(cast(Union[_HasZ, PointZ], s)) # Create an in-memory binary buffer to avoid # unnecessary seeks to files on disk @@ -3252,9 +3610,6 @@ def __shpRecord(self, s: Shape) -> tuple[int, int]: b_io=b_io, s=s, i=self.shpNum, - bbox=shape_bbox, - mbox=shape_mbox, - zbox=shape_zbox, ) # Finalize record length as 16-bit words @@ -3320,7 +3675,7 @@ def record( record = ["" for _ in range(fieldCount)] self.__dbfRecord(record) - def __dbfRecord(self, record): + def __dbfRecord(self, record: list[RecordValue]) -> None: """Writes the dbf records.""" f = self.__getFileObj(self.dbf) if self.recNum == 0: @@ -3351,15 +3706,15 @@ def __dbfRecord(self, record): # first try to force directly to int. # forcing a large int to float and back to int # will lose information and result in wrong nr. - num_val = int(value) + num_val = int(cast(int, value)) except ValueError: # forcing directly to int failed, so was probably a float. - num_val = int(float(value)) + num_val = int(float(cast(float, value))) str_val = format(num_val, "d")[:size].rjust( size ) # caps the size if exceeds the field size else: - f_val = float(value) + f_val = float(cast(float, value)) str_val = format(f_val, f".{deci}f")[:size].rjust( size ) # caps the size if exceeds the field size @@ -3428,15 +3783,13 @@ def null(self) -> None: def point(self, x: float, y: float) -> None: """Creates a POINT shape.""" - pointShape = Point() - pointShape.points.append((x, y)) + pointShape = Point(x, y) self.shape(pointShape) def pointm(self, x: float, y: float, m: Optional[float] = None) -> None: """Creates a POINTM shape. If the m (measure) value is not set, it defaults to NoData.""" - pointShape = PointM() - pointShape.points.append((x, y, m)) + pointShape = PointM(x, y, m) self.shape(pointShape) def pointz( @@ -3445,22 +3798,23 @@ def pointz( """Creates a POINTZ shape. If the z (elevation) value is not set, it defaults to 0. If the m (measure) value is not set, it defaults to NoData.""" - pointShape = PointZ() - pointShape.points.append((x, y, z, m)) + pointShape = PointZ(x, y, z, m) self.shape(pointShape) def multipoint(self, points: PointsT) -> None: """Creates a MULTIPOINT shape. Points is a list of xy values.""" # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=[points], polyShape=MultiPoint()) + shape = MultiPoint(points=points) + self.shape(shape) def multipointm(self, points: PointsT) -> None: """Creates a MULTIPOINTM shape. Points is a list of xym values. If the m (measure) value is not included, it defaults to None (NoData).""" # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=[points], polyShape=MultiPointM()) + shape = MultiPointM(points=points) + self.shape(shape) def multipointz(self, points: PointsT) -> None: """Creates a MULTIPOINTZ shape. @@ -3468,32 +3822,37 @@ def multipointz(self, points: PointsT) -> None: If the z (elevation) value is not included, it defaults to 0. If the m (measure) value is not included, it defaults to None (NoData).""" # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=[points], polyShape=MultiPointZ()) + shape = MultiPointZ(points=points) + self.shape(shape) def line(self, lines: list[PointsT]) -> None: """Creates a POLYLINE shape. Lines is a collection of lines, each made up of a list of xy values.""" - self._shapeparts(parts=lines, polyShape=Polyline()) + shape = Polyline(lines=lines) + self.shape(shape) def linem(self, lines: list[PointsT]) -> None: """Creates a POLYLINEM shape. Lines is a collection of lines, each made up of a list of xym values. If the m (measure) value is not included, it defaults to None (NoData).""" - self._shapeparts(parts=lines, polyShape=PolylineM()) + shape = PolylineM(lines=lines) + self.shape(shape) def linez(self, lines: list[PointsT]) -> None: """Creates a POLYLINEZ shape. Lines is a collection of lines, each made up of a list of xyzm values. If the z (elevation) value is not included, it defaults to 0. If the m (measure) value is not included, it defaults to None (NoData).""" - self._shapeparts(parts=lines, polyShape=PolylineZ()) + shape = PolylineZ(lines=lines) + self.shape(shape) def poly(self, polys: list[PointsT]) -> None: """Creates a POLYGON shape. Polys is a collection of polygons, each made up of a list of xy values. Note that for ordinary polygons the coordinates must run in a clockwise direction. If some of the polygons are holes, these must run in a counterclockwise direction.""" - self._shapeparts(parts=polys, polyShape=Polygon()) + shape = Polygon(lines=polys) + self.shape(shape) def polym(self, polys: list[PointsT]) -> None: """Creates a POLYGONM shape. @@ -3501,7 +3860,8 @@ def polym(self, polys: list[PointsT]) -> None: Note that for ordinary polygons the coordinates must run in a clockwise direction. If some of the polygons are holes, these must run in a counterclockwise direction. If the m (measure) value is not included, it defaults to None (NoData).""" - self._shapeparts(parts=polys, polyShape=PolygonM()) + shape = PolygonM(lines=polys) + self.shape(shape) def polyz(self, polys: list[PointsT]) -> None: """Creates a POLYGONZ shape. @@ -3510,7 +3870,8 @@ def polyz(self, polys: list[PointsT]) -> None: If some of the polygons are holes, these must run in a counterclockwise direction. If the z (elevation) value is not included, it defaults to 0. If the m (measure) value is not included, it defaults to None (NoData).""" - self._shapeparts(parts=polys, polyShape=PolygonZ()) + shape = PolygonZ(lines=polys) + self.shape(shape) def multipatch(self, parts: list[PointsT], partTypes: list[int]) -> None: """Creates a MULTIPATCH shape. @@ -3520,52 +3881,8 @@ def multipatch(self, parts: list[PointsT], partTypes: list[int]) -> None: TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. If the z (elevation) value is not included, it defaults to 0. If the m (measure) value is not included, it defaults to None (NoData).""" - polyShape = MultiPatch() - polyShape.parts = [] - polyShape.points = [] - for part in parts: - # set part index position - polyShape.parts.append(len(polyShape.points)) - # add points - # for point in part: - # # Ensure point is list - # if not isinstance(point, list): - # point = list(point) - # polyShape.points.append(point) - polyShape.points.extend(part) - polyShape.partTypes = partTypes - # write the shape - self.shape(polyShape) - - def _shapeparts( - self, parts: list[PointsT], polyShape: Union[Polyline, Polygon, MultiPoint] - ) -> None: - """Internal method for adding a shape that has multiple collections of points (parts): - lines, polygons, and multipoint shapes. - """ - polyShape.parts = [] - polyShape.points = [] - # Make sure polygon rings (parts) are closed - - # if shapeType in (5, 15, 25, 31): - # This method is never actually called on a MultiPatch - # so we omit its shapeType (31) for efficiency - if compatible_with(polyShape, Polygon): - for part in parts: - if part[0] != part[-1]: - part.append(part[0]) - # Add points and part indexes - for part in parts: - # set part index position - polyShape.parts.append(len(polyShape.points)) - # add points - # for point in part: - # # Ensure point is list - # point_list = list(point) - # polyShape.points.append(point_list) - polyShape.points.extend(part) - # write the shape - self.shape(polyShape) + shape = MultiPatch(lines=parts, partTypes=partTypes) + self.shape(shape) def field( # Types of args should match *Field @@ -3611,7 +3928,7 @@ def _filter_network_doctests( examples_it = iter(examples) - yield next(examples_it) # pylint: disable=stop-iteration-return + yield next(examples_it) for example in examples_it: # Track variables in doctest shell sessions defined from commands From 7189bcfc0fac3de8daf463b9170e87a777d3a4ad Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 3 Aug 2025 15:44:56 +0100 Subject: [PATCH 215/220] Restore Shape repr doctest --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 2b25fbd8..0c1e848e 100644 --- a/README.md +++ b/README.md @@ -466,6 +466,8 @@ index which is 7. >>> s = sf.shape(7) + >>> s + Polygon #7 >>> # Read the bbox of the 8th shape to verify From afa7b5f74e8d21b9d1f785d1d619884250593091 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 3 Aug 2025 15:46:45 +0100 Subject: [PATCH 216/220] Remove Pylint workflow job --- .github/workflows/run_checks_build_and_test.yml | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/.github/workflows/run_checks_build_and_test.yml b/.github/workflows/run_checks_build_and_test.yml index 89d8c207..0d471263 100644 --- a/.github/workflows/run_checks_build_and_test.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -16,20 +16,6 @@ jobs: - uses: actions/setup-python@v5 - uses: pre-commit/action@v3.0.1 - pylint: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - - name: install Pylint and plugin - run: | - python -m pip install --upgrade pip - pip install pytest pylint pylint-per-file-ignores - pip install -e . - - name: run Pylint for errors, warnings and remarks only (ignore Comments/ Code style) - run: | - pylint --disable=C test_shapefile.py src/shapefile.py - build_wheel_and_sdist: runs-on: ubuntu-latest steps: From 7a6a71705bb6caf25c6b8722af762979216083e1 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 3 Aug 2025 16:02:13 +0100 Subject: [PATCH 217/220] Update Changelogs --- README.md | 10 +++++++--- changelog.txt | 15 ++++++++++++--- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 0c1e848e..fdf6489a 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ The Python Shapefile Library (PyShp) reads and writes ESRI Shapefiles in pure Py - **Author**: [Joel Lawhead](https://github.com/GeospatialPython) - **Maintainers**: [Karim Bahgat](https://github.com/karimbahgat) - **Version**: 3.0.0-alpha -- **Date**: 31 July, 2025 +- **Date**: 3rd August, 2025 - **License**: [MIT](https://github.com/GeospatialPython/pyshp/blob/master/LICENSE.TXT) ## Contents @@ -117,10 +117,13 @@ part of your geospatial project. - pyproject.toml src layout - Slow test marked. -## 2.4.0 +## 2.4.1 + +### Improvements: +- Speed up writing shapefiles by up to ~39%. Combined for loops of calls to f.write(pack(...)), into single calls. ### Breaking Change. Support for Python 2 and Pythons <= 3.8 to be dropped. -- PyShp 2.4.0 is the latest (and likely last) version of PyShp to support Python 2.7 and Pythons <= 3.8. +- PyShp 2.4.1 is the latest (and likely last) version of PyShp to support Python 2.7 and Pythons <= 3.8. These CPython versions have reached [end of life](https://devguide.python.org/versions/#versions). - Future development will focus on PyShp v3.0.0 onwards (currently intended to supporting Pythons >= 3.9). - This will not break any projects, as pip and other package managers should not install PyShp 3.0.0 @@ -129,6 +132,7 @@ bug fixes and features. - If this negatively impacts your project, all feedback about this decision is welcome on our [the discussion page](https://github.com/GeospatialPython/pyshp/discussions/290). +## 2.4.0 ### New Features: - Reader.iterRecords now allows start and stop to be specified, to lookup smaller ranges of records. diff --git a/changelog.txt b/changelog.txt index 45bfd76a..c73718aa 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,5 +1,6 @@ VERSION 3.0.0-alpha +2025-08-03 Breaking Changes: * Python 2 and Python 3.8 support dropped. * Field info tuple is now a namedtuple (Field) instead of a list. @@ -24,11 +25,14 @@ VERSION 3.0.0-alpha * Slow test marked. -VERSION 2.4.0 +VERSION 2.4.1 + +2025-07-30 + Improvements: + * Speed up writing shapefiles by up to ~39%. Combined for loops of calls to f.write(pack(...)), into single calls. -2025-07-21 Forthcoming Breaking Change. Support for Python 2 and Pythons <= 3.8 to be dropped. - * PyShp 2.4.0 is the latest (and likely last) version of PyShp to support Python 2.7 and Pythons <= 3.8. + * PyShp 2.4.1 is the latest (and likely last) version of PyShp to support Python 2.7 and Pythons <= 3.8. These CPython versions have reached [end of life](https://devguide.python.org/versions/#versions). * Future development will focus on PyShp v3.0.0 onwards (currently intended to supporting Pythons >= 3.9). * This will not break any projects, as pip and other package managers should not install PyShp 3.0.0 @@ -38,6 +42,11 @@ VERSION 2.4.0 on our [the discussion page](https://github.com/GeospatialPython/pyshp/discussions/290). +VERSION 2.4.0 + +2025-07-21 + + New Features: * Reader.iterRecords now allows start and stop to be specified, to lookup smaller ranges of records. * Equality comparisons between Records now also require the fields to be the same (and in the same order). From aaedc3f47dc289a8d1eee90a61de859659c98030 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 3 Aug 2025 16:12:46 +0100 Subject: [PATCH 218/220] Describe performance improvements. --- README.md | 7 +++++-- changelog.txt | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fdf6489a..4e83f581 100644 --- a/README.md +++ b/README.md @@ -98,15 +98,18 @@ part of your geospatial project. ### Breaking Changes: - Python 2 and Python 3.8 support dropped. - Field info tuple is now a namedtuple (Field) instead of a list. -- Field type codes are now FieldType enum members. +- Field type codes are now FieldType 'enum' members. - bbox, mbox and zbox attributes are all new Namedtuples. -- Writer does not mutate shapes. +- Writer does not mutate Shapes. - New custom subclasses for each shape type: Null, Multipatch, Point, Polyline, Multipoint, and Polygon, plus the latter 4's M and Z variants (Reader and Writer are still compatible with their base class, Shape, as before). - Shape sub classes are creatable from, and serializable to bytes streams, as per the shapefile spec. +### Improvements: +- Speeded up writing shapefiles by up to another ~27% (on top of the recent ~39% improvement in 2.4.1). + ### Code quality - Statically typed, and checked with Mypy - Checked with Ruff. diff --git a/changelog.txt b/changelog.txt index c73718aa..5cabf292 100644 --- a/changelog.txt +++ b/changelog.txt @@ -4,9 +4,9 @@ VERSION 3.0.0-alpha Breaking Changes: * Python 2 and Python 3.8 support dropped. * Field info tuple is now a namedtuple (Field) instead of a list. - * Field type codes are now FieldType enum members. + * Field type codes are now FieldType 'enum' members. * bbox, mbox and zbox attributes are all new Namedtuples. - * Writer does not mutate shapes. + * Writer does not mutate Shapes. * New custom subclasses for each shape type: Null, Multipatch, Point, Polyline, Multipoint, and Polygon, plus the latter 4's M and Z variants (Reader and Writer are still compatible with their base class, Shape, as before). @@ -24,6 +24,9 @@ VERSION 3.0.0-alpha * pyproject.toml src layout * Slow test marked. + Improvements: + * Speeded up writing shapefiles by up to another ~27% (on top of the recent ~39% improvement in 2.4.1). + VERSION 2.4.1 From 1b8661ec3e0842d20a7ea289b962c6cadb762e69 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 3 Aug 2025 17:08:28 +0100 Subject: [PATCH 219/220] Delete test files, that are created in, but not tested against, in doctests --- shapefiles/test/balancing.dbf | Bin 804 -> 0 bytes shapefiles/test/balancing.shp | Bin 264 -> 0 bytes shapefiles/test/balancing.shx | Bin 156 -> 0 bytes shapefiles/test/contextwriter.dbf | Bin 65 -> 0 bytes shapefiles/test/contextwriter.shp | Bin 100 -> 0 bytes shapefiles/test/contextwriter.shx | Bin 100 -> 0 bytes shapefiles/test/dtype.dbf | Bin 259 -> 0 bytes shapefiles/test/dtype.shp | Bin 124 -> 0 bytes shapefiles/test/dtype.shx | Bin 116 -> 0 bytes shapefiles/test/line.dbf | Bin 116 -> 0 bytes shapefiles/test/line.shp | Bin 272 -> 0 bytes shapefiles/test/line.shx | Bin 108 -> 0 bytes shapefiles/test/linem.dbf | Bin 116 -> 0 bytes shapefiles/test/linem.shp | Bin 344 -> 0 bytes shapefiles/test/linem.shx | Bin 108 -> 0 bytes shapefiles/test/linez.dbf | Bin 116 -> 0 bytes shapefiles/test/linez.shp | Bin 516 -> 0 bytes shapefiles/test/linez.shx | Bin 108 -> 0 bytes shapefiles/test/multipatch.dbf | Bin 116 -> 0 bytes shapefiles/test/multipatch.shp | Bin 712 -> 0 bytes shapefiles/test/multipatch.shx | Bin 108 -> 0 bytes shapefiles/test/multipoint.dbf | Bin 116 -> 0 bytes shapefiles/test/multipoint.shp | Bin 180 -> 0 bytes shapefiles/test/multipoint.shx | Bin 108 -> 0 bytes shapefiles/test/onlydbf.dbf | Bin 65 -> 0 bytes shapefiles/test/point.dbf | Bin 116 -> 0 bytes shapefiles/test/point.shp | Bin 128 -> 0 bytes shapefiles/test/point.shx | Bin 108 -> 0 bytes shapefiles/test/polygon.dbf | Bin 116 -> 0 bytes shapefiles/test/polygon.shp | Bin 404 -> 0 bytes shapefiles/test/polygon.shx | Bin 108 -> 0 bytes shapefiles/test/testfile.dbf | Bin 65 -> 0 bytes shapefiles/test/testfile.shp | Bin 100 -> 0 bytes shapefiles/test/testfile.shx | Bin 100 -> 0 bytes 34 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 shapefiles/test/balancing.dbf delete mode 100644 shapefiles/test/balancing.shp delete mode 100644 shapefiles/test/balancing.shx delete mode 100644 shapefiles/test/contextwriter.dbf delete mode 100644 shapefiles/test/contextwriter.shp delete mode 100644 shapefiles/test/contextwriter.shx delete mode 100644 shapefiles/test/dtype.dbf delete mode 100644 shapefiles/test/dtype.shp delete mode 100644 shapefiles/test/dtype.shx delete mode 100644 shapefiles/test/line.dbf delete mode 100644 shapefiles/test/line.shp delete mode 100644 shapefiles/test/line.shx delete mode 100644 shapefiles/test/linem.dbf delete mode 100644 shapefiles/test/linem.shp delete mode 100644 shapefiles/test/linem.shx delete mode 100644 shapefiles/test/linez.dbf delete mode 100644 shapefiles/test/linez.shp delete mode 100644 shapefiles/test/linez.shx delete mode 100644 shapefiles/test/multipatch.dbf delete mode 100644 shapefiles/test/multipatch.shp delete mode 100644 shapefiles/test/multipatch.shx delete mode 100644 shapefiles/test/multipoint.dbf delete mode 100644 shapefiles/test/multipoint.shp delete mode 100644 shapefiles/test/multipoint.shx delete mode 100644 shapefiles/test/onlydbf.dbf delete mode 100644 shapefiles/test/point.dbf delete mode 100644 shapefiles/test/point.shp delete mode 100644 shapefiles/test/point.shx delete mode 100644 shapefiles/test/polygon.dbf delete mode 100644 shapefiles/test/polygon.shp delete mode 100644 shapefiles/test/polygon.shx delete mode 100644 shapefiles/test/testfile.dbf delete mode 100644 shapefiles/test/testfile.shp delete mode 100644 shapefiles/test/testfile.shx diff --git a/shapefiles/test/balancing.dbf b/shapefiles/test/balancing.dbf deleted file mode 100644 index 8272cf33374d841d1876aa95146ca6b115a13d4f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 804 zcmZRsWtU}VU|>jONClFZAT2XBC&dsf?hGQ0(3F7XA%a-tc@>KC%N0li`FW|NX;2`; yl9KX#I#`lXluDvsNDr>G{L&&4y+Vd1nPnt;gp4>T&a5EOBji|;T9%qek~IL=6K*R2 diff --git a/shapefiles/test/balancing.shp b/shapefiles/test/balancing.shp deleted file mode 100644 index 66af077e161aaf6e36834125306769684f63ba75..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 264 zcmZQzQ0HR64q9F?GcYj1h0 JbRCdp0sueJ2NVDR diff --git a/shapefiles/test/contextwriter.dbf b/shapefiles/test/contextwriter.dbf deleted file mode 100644 index 327fd49366a1e7061490a1e98bc06594129f958f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 65 icmZRsWtU|D0!Id85QPEKGE;L>48g+AAkqj;1up=PYXi^# diff --git a/shapefiles/test/contextwriter.shp b/shapefiles/test/contextwriter.shp deleted file mode 100644 index 45d2404b76511c9f1f79d1c451ee7bf5b545d875..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 100 VcmZQzQ0HR64vbzfW0xRR6aZs$0Z#w` diff --git a/shapefiles/test/contextwriter.shx b/shapefiles/test/contextwriter.shx deleted file mode 100644 index 45d2404b76511c9f1f79d1c451ee7bf5b545d875..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 100 VcmZQzQ0HR64vbzfW0xRR6aZs$0Z#w` diff --git a/shapefiles/test/dtype.dbf b/shapefiles/test/dtype.dbf deleted file mode 100644 index 2939da4791a864894ed53683d9d18574c69f3d05..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 259 zcmZRsWtU}QU|>jO2n3Rtz|Au#I3(W9$Ay8x86>NLrX<+a+279vtN^P#uYy}~q2*hQ?n8gHC$PUCHzzU&& zB)Xk2yU}TwI3=U8$pftK);OZD;z2pjH?S6Nk|-aRvvd1yEW7N)wWYnWF?%&j1B5{~JNYO`tSP l9HtHztp`=F1*LVMG)z5*gRIw_FYQh2CrG7XCO~L}{{XkrG6Dbq diff --git a/shapefiles/test/linez.shx b/shapefiles/test/linez.shx deleted file mode 100644 index 4c59fef7a7823320e9a6c0737dbcc29a9cbcab74..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 108 ucmZQzQ0HR64$NLKGcfSNZU!&_ diff --git a/shapefiles/test/multipatch.dbf b/shapefiles/test/multipatch.dbf deleted file mode 100644 index bc9fe23f0ae043707d2fb0f40e5047ab2ced804d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 116 rcmZRsVq;`vU|?`$Fb0yCATKdD6)NfsqK(j$@G4~FmlmfQDv$sG2g3^z diff --git a/shapefiles/test/multipatch.shp b/shapefiles/test/multipatch.shp deleted file mode 100644 index f3d10408f1747f03cce76562fb4cbf1b0365a5e5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 712 zcmZQzQ0HR63K&yfFf%a7W0eC5ia0=|U^IjWB4xeid}(iDKLMkGLX1G9M~qoaK*a(e zLx2G0Fra240lHgo@nPcVG|U>9I4(X+9GzxyfQZ0oba`Cn;u1&K596b&=Wu|@KxlIL PFmqx0anrE49V|2eCVW?U diff --git a/shapefiles/test/multipatch.shx b/shapefiles/test/multipatch.shx deleted file mode 100644 index 05984a7cf061e9af9752386095f3220e8f07cd09..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 108 wcmZQzQ0HR64$NLKGcd?wmjjB5I6$OeG(w@Q*PJiyP3$LNG*HHffq_vE01{XV82|tP diff --git a/shapefiles/test/multipoint.dbf b/shapefiles/test/multipoint.dbf deleted file mode 100644 index 74ed8b14883b8194290b563eb207108beb938418..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 116 wcmZRsWtU}SU|?`$Fb0yCATKdD6)NfsqK(j$@G9h%=9FX>48g+AAkqj;1up=PYXi^# diff --git a/shapefiles/test/point.dbf b/shapefiles/test/point.dbf deleted file mode 100644 index 5a881b870c1a5f904c98e765fabd80fb94feb0d6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 116 rcmZRsWtU}SU|?`$Fb0yCATKdD6)NfsqK(j$@G2DKXXcd{Dv$sG7Xb@6 diff --git a/shapefiles/test/point.shp b/shapefiles/test/point.shp deleted file mode 100644 index 842954268ab2c1bea768242be2ee8662ac36d84c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 128 pcmZQzQ0HR64jf)EGcYj1JkOX4Iz#&BUX3jo<02g(2d diff --git a/shapefiles/test/point.shx b/shapefiles/test/point.shx deleted file mode 100644 index fda6cae29a492c16bd59255f36cb0e10f5ef5a61..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 108 ncmZQzQ0HR64$NLKGcYj1JkOX4JqfHGVFSMCMS diff --git a/shapefiles/test/polygon.dbf b/shapefiles/test/polygon.dbf deleted file mode 100644 index 1cc8920a0b6da92323732a36b2e17a0faccab5f3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 116 tcmZRsWtU}SU|?`$Fb0yCATKdD6)NfsqK(j$@G2DK=TxTW=NT#x4FED}3)TPt diff --git a/shapefiles/test/polygon.shp b/shapefiles/test/polygon.shp deleted file mode 100644 index c7654ebccaa29fb19c05d8c330ac699b1588016b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 404 zcmZQzQ0HR64oB15hy!_7_)#@GVnu1eBcu0RKe<>;M1& diff --git a/shapefiles/test/testfile.dbf b/shapefiles/test/testfile.dbf deleted file mode 100644 index 327fd49366a1e7061490a1e98bc06594129f958f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 65 icmZRsWtU|D0!Id85QPEKGE;L>48g+AAkqj;1up=PYXi^# diff --git a/shapefiles/test/testfile.shp b/shapefiles/test/testfile.shp deleted file mode 100644 index 45d2404b76511c9f1f79d1c451ee7bf5b545d875..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 100 VcmZQzQ0HR64vbzfW0xRR6aZs$0Z#w` diff --git a/shapefiles/test/testfile.shx b/shapefiles/test/testfile.shx deleted file mode 100644 index 45d2404b76511c9f1f79d1c451ee7bf5b545d875..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 100 VcmZQzQ0HR64vbzfW0xRR6aZs$0Z#w` From 3e3b50cc1ed6468b0fe836fb74a52c542c30f4ab Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 3 Aug 2025 17:52:00 +0100 Subject: [PATCH 220/220] Delete more test artefacts that aren't tested against --- shapefiles/test/MyPolyZ.dbf | Bin 116 -> 0 bytes shapefiles/test/MyPolyZ.shp | Bin 316 -> 0 bytes shapefiles/test/MyPolyZ.shx | Bin 108 -> 0 bytes shapefiles/test/NullTest.dbf | Bin 550 -> 0 bytes shapefiles/test/NullTest.shp | Bin 184 -> 0 bytes shapefiles/test/NullTest.shx | Bin 124 -> 0 bytes shapefiles/test/corrupt_too_long.dbf | Bin 580 -> 0 bytes shapefiles/test/corrupt_too_long.shp | Bin 1145 -> 0 bytes shapefiles/test/corrupt_too_long.shx | Bin 180 -> 0 bytes shapefiles/test/shapetype.dbf | Bin 65 -> 0 bytes shapefiles/test/shapetype.shp | Bin 100 -> 0 bytes shapefiles/test/shapetype.shx | Bin 100 -> 0 bytes 12 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 shapefiles/test/MyPolyZ.dbf delete mode 100644 shapefiles/test/MyPolyZ.shp delete mode 100644 shapefiles/test/MyPolyZ.shx delete mode 100644 shapefiles/test/NullTest.dbf delete mode 100644 shapefiles/test/NullTest.shp delete mode 100644 shapefiles/test/NullTest.shx delete mode 100644 shapefiles/test/corrupt_too_long.dbf delete mode 100644 shapefiles/test/corrupt_too_long.shp delete mode 100644 shapefiles/test/corrupt_too_long.shx delete mode 100644 shapefiles/test/shapetype.dbf delete mode 100644 shapefiles/test/shapetype.shp delete mode 100644 shapefiles/test/shapetype.shx diff --git a/shapefiles/test/MyPolyZ.dbf b/shapefiles/test/MyPolyZ.dbf deleted file mode 100644 index 54a50dbb0ed1c36d4775c42bd38d9c7cc94e3736..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 116 ucmZQBpN{_B6oUVebG|MzXtL?n8GkOdM<~Q3L>S`WMmw diff --git a/shapefiles/test/MyPolyZ.shx b/shapefiles/test/MyPolyZ.shx deleted file mode 100644 index 62de3220a734ab09a5e7aa7c4fa6eacb1b875b0a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 108 ycmZQzQ0HR64$NLKGcfSORP Q3rf#}(w87K6B~#I07&!-R{#J2 diff --git a/shapefiles/test/shapetype.dbf b/shapefiles/test/shapetype.dbf deleted file mode 100644 index 327fd49366a1e7061490a1e98bc06594129f958f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 65 icmZRsWtU|D0!Id85QPEKGE;L>48g+AAkqj;1up=PYXi^# diff --git a/shapefiles/test/shapetype.shp b/shapefiles/test/shapetype.shp deleted file mode 100644 index 29de63a12be6f0016d03b25f33ccd616c262cd9b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 100 YcmZQzQ0HR64vbzfGcYh>mm^6O0AsfSPyhe` diff --git a/shapefiles/test/shapetype.shx b/shapefiles/test/shapetype.shx deleted file mode 100644 index 29de63a12be6f0016d03b25f33ccd616c262cd9b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 100 YcmZQzQ0HR64vbzfGcYh>mm^6O0AsfSPyhe`