diff --git a/mapillary_tools/constants.py b/mapillary_tools/constants.py index cc842b51..1deee80c 100644 --- a/mapillary_tools/constants.py +++ b/mapillary_tools/constants.py @@ -120,8 +120,8 @@ def _parse_scaled_integers( CUTOFF_TIME = float(os.getenv(_ENV_PREFIX + "CUTOFF_TIME", 60)) DUPLICATE_DISTANCE = float(os.getenv(_ENV_PREFIX + "DUPLICATE_DISTANCE", 0.1)) DUPLICATE_ANGLE = float(os.getenv(_ENV_PREFIX + "DUPLICATE_ANGLE", 5)) -MAX_AVG_SPEED = float( - os.getenv(_ENV_PREFIX + "MAX_AVG_SPEED", 400_000 / 3600) +MAX_CAPTURE_SPEED_KMH = float( + os.getenv(_ENV_PREFIX + "MAX_CAPTURE_SPEED_KMH", 400) ) # 400 KM/h # WARNING: Changing the following envvars might result in failed uploads # Max number of images per sequence diff --git a/mapillary_tools/exceptions.py b/mapillary_tools/exceptions.py index cd730d9f..bc3ccf94 100644 --- a/mapillary_tools/exceptions.py +++ b/mapillary_tools/exceptions.py @@ -51,6 +51,10 @@ class MapillaryVideoGPSNotFoundError(MapillaryDescriptionError): pass +class MapillaryInvalidVideoError(MapillaryDescriptionError): + pass + + class MapillaryGPXEmptyError(MapillaryDescriptionError): pass diff --git a/mapillary_tools/geotag/factory.py b/mapillary_tools/geotag/factory.py index a1474d05..c9976663 100644 --- a/mapillary_tools/geotag/factory.py +++ b/mapillary_tools/geotag/factory.py @@ -67,7 +67,14 @@ def process( reprocessable_paths = set(paths) for idx, option in enumerate(options): - LOG.debug("Processing %d files with %s", len(reprocessable_paths), option) + if LOG.getEffectiveLevel() <= logging.DEBUG: + LOG.info( + f"==> Processing {len(reprocessable_paths)} files with source {option}..." + ) + else: + LOG.info( + f"==> Processing {len(reprocessable_paths)} files with source {option.source.value}..." + ) image_videos, video_paths = _filter_images_and_videos( reprocessable_paths, option.filetypes diff --git a/mapillary_tools/geotag/video_extractors/native.py b/mapillary_tools/geotag/video_extractors/native.py index ca434ced..f2d76b91 100644 --- a/mapillary_tools/geotag/video_extractors/native.py +++ b/mapillary_tools/geotag/video_extractors/native.py @@ -12,6 +12,7 @@ from ... import blackvue_parser, exceptions, geo, telemetry, types, utils from ...camm import camm_parser from ...gpmf import gpmf_gps_filter, gpmf_parser +from ...mp4 import construct_mp4_parser, simple_mp4_parser from .base import BaseVideoExtractor @@ -113,6 +114,14 @@ def extract(self) -> types.VideoMetadata: extractor = GoProVideoExtractor(self.video_path) try: return extractor.extract() + except simple_mp4_parser.BoxNotFoundError as ex: + raise exceptions.MapillaryInvalidVideoError( + f"Invalid video: {ex}" + ) from ex + except construct_mp4_parser.BoxNotFoundError as ex: + raise exceptions.MapillaryInvalidVideoError( + f"Invalid video: {ex}" + ) from ex except exceptions.MapillaryVideoGPSNotFoundError: pass @@ -120,6 +129,14 @@ def extract(self) -> types.VideoMetadata: extractor = CAMMVideoExtractor(self.video_path) try: return extractor.extract() + except simple_mp4_parser.BoxNotFoundError as ex: + raise exceptions.MapillaryInvalidVideoError( + f"Invalid video: {ex}" + ) from ex + except construct_mp4_parser.BoxNotFoundError as ex: + raise exceptions.MapillaryInvalidVideoError( + f"Invalid video: {ex}" + ) from ex except exceptions.MapillaryVideoGPSNotFoundError: pass @@ -127,6 +144,14 @@ def extract(self) -> types.VideoMetadata: extractor = BlackVueVideoExtractor(self.video_path) try: return extractor.extract() + except simple_mp4_parser.BoxNotFoundError as ex: + raise exceptions.MapillaryInvalidVideoError( + f"Invalid video: {ex}" + ) from ex + except construct_mp4_parser.BoxNotFoundError as ex: + raise exceptions.MapillaryInvalidVideoError( + f"Invalid video: {ex}" + ) from ex except exceptions.MapillaryVideoGPSNotFoundError: pass diff --git a/mapillary_tools/mp4/construct_mp4_parser.py b/mapillary_tools/mp4/construct_mp4_parser.py index 0c9f5b33..b8778aca 100644 --- a/mapillary_tools/mp4/construct_mp4_parser.py +++ b/mapillary_tools/mp4/construct_mp4_parser.py @@ -370,6 +370,10 @@ class BoxDict(T.TypedDict, total=True): SwitchMapType = T.Dict[BoxType, T.Union[C.Construct, "SwitchMapType"]] +class BoxNotFoundError(Exception): + pass + + class Box64ConstructBuilder: """ Build a box struct that **parses** MP4 boxes with both 32-bit and 64-bit sizes. @@ -591,7 +595,7 @@ def find_box_at_pathx( ) -> BoxDict: found = find_box_at_path(box, path) if found is None: - raise ValueError(f"box at path {path} not found") + raise BoxNotFoundError(f"box at path {path} not found") return found diff --git a/mapillary_tools/process_geotag_properties.py b/mapillary_tools/process_geotag_properties.py index 3182a11f..0c123619 100644 --- a/mapillary_tools/process_geotag_properties.py +++ b/mapillary_tools/process_geotag_properties.py @@ -1,11 +1,11 @@ from __future__ import annotations -import collections import datetime import logging import typing as T from pathlib import Path +import humanize from tqdm import tqdm from . import constants, exceptions, exif_write, types, utils @@ -217,17 +217,19 @@ def _write_metadatas( LOG.info("Check the description file for details: %s", desc_path) -def _is_error_skipped(error_type: str, skipped_process_errors: set[T.Type[Exception]]): - skipped_process_error_names = set(err.__name__ for err in skipped_process_errors) - skip_all = Exception in skipped_process_errors - return skip_all or error_type in skipped_process_error_names +def _is_error_skipped( + error_type: type[Exception], skipped_process_errors: set[type[Exception]] +): + return (Exception in skipped_process_errors) or ( + error_type in skipped_process_errors + ) def _show_stats( metadatas: T.Sequence[types.MetadataOrError], skipped_process_errors: set[T.Type[Exception]], ) -> None: - LOG.info("========== Process summary ==========") + LOG.info("==> Process summary") metadatas_by_filetype: dict[types.FileType, list[types.MetadataOrError]] = {} for metadata in metadatas: @@ -244,9 +246,7 @@ def _show_stats( metadata for metadata in metadatas if isinstance(metadata, types.ErrorMetadata) - and not _is_error_skipped( - metadata.error.__class__.__name__, skipped_process_errors - ) + and not _is_error_skipped(type(metadata.error), skipped_process_errors) ] if critical_error_metadatas: raise exceptions.MapillaryProcessError( @@ -262,38 +262,35 @@ def _show_stats_per_filetype( good_metadatas: list[types.Metadata] good_metadatas, error_metadatas = types.separate_errors(metadatas) - filesize_to_upload = sum( - [0 if m.filesize is None else m.filesize for m in good_metadatas] - ) - - LOG.info("%8d %s(s) read in total", len(metadatas), filetype.value) + LOG.info(f"{len(metadatas)} {filetype.value} read in total") if good_metadatas: + total_filesize = sum( + [0 if m.filesize is None else m.filesize for m in good_metadatas] + ) LOG.info( - "\t %8d %s(s) (%s MB) are ready to be uploaded", - len(good_metadatas), - filetype.value, - round(filesize_to_upload / 1024 / 1024, 1), + f"\t{len(good_metadatas)} ({humanize.naturalsize(total_filesize)}) ready" ) - error_counter = collections.Counter( - metadata.error.__class__.__name__ for metadata in error_metadatas - ) + errors_by_type: dict[type[Exception], list[types.ErrorMetadata]] = {} + for metadata in error_metadatas: + errors_by_type.setdefault(type(metadata.error), []).append(metadata) - for error_type, count in error_counter.items(): + for error_type, errors in errors_by_type.items(): + total_filesize = sum([utils.get_file_size_quietly(m.filename) for m in errors]) if _is_error_skipped(error_type, skipped_process_errors): LOG.warning( - "\t %8d %s(s) skipped due to %s", count, filetype.value, error_type + f"\t{len(errors)} ({humanize.naturalsize(total_filesize)}) {error_type.__name__}" ) else: LOG.error( - "\t %8d %s(s) failed due to %s", count, filetype.value, error_type + f"\t{len(errors)} ({humanize.naturalsize(total_filesize)}) {error_type.__name__}" ) def _validate_metadatas( metadatas: T.Collection[types.MetadataOrError], num_processes: int | None ) -> list[types.MetadataOrError]: - LOG.debug("Validating %d metadatas", len(metadatas)) + LOG.info(f"==> Validating {len(metadatas)} metadatas...") # validating metadatas is slow, hence multiprocessing diff --git a/mapillary_tools/process_sequence_properties.py b/mapillary_tools/process_sequence_properties.py index c3d7b1fe..3d5ff368 100644 --- a/mapillary_tools/process_sequence_properties.py +++ b/mapillary_tools/process_sequence_properties.py @@ -7,6 +7,8 @@ import os import typing as T +import humanize + from . import constants, exceptions, geo, types, utils from .serializer.description import DescriptionJSONSerializer @@ -215,7 +217,7 @@ def _is_video_stationary( def _check_video_limits( video_metadatas: T.Iterable[types.VideoMetadata], max_sequence_filesize_in_bytes: int | None, - max_avg_speed: float, + max_capture_speed_kmh: float, max_radius_for_stationary_check: float, ) -> tuple[list[types.VideoMetadata], list[types.ErrorMetadata]]: output_video_metadatas: list[types.VideoMetadata] = [] @@ -238,7 +240,7 @@ def _check_video_limits( ) if video_filesize > max_sequence_filesize_in_bytes: raise exceptions.MapillaryFileTooLargeError( - f"Video file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)", + f"Video file size {humanize.naturalsize(video_filesize)} exceeds max allowed {humanize.naturalsize(max_sequence_filesize_in_bytes)}", ) contains_null_island = any( @@ -249,15 +251,19 @@ def _check_video_limits( "GPS coordinates in Null Island (0, 0)" ) + avg_speed_kmh = ( + geo.avg_speed(video_metadata.points) * 3.6 + ) # Convert m/s to km/h too_fast = ( len(video_metadata.points) >= 2 - and geo.avg_speed(video_metadata.points) > max_avg_speed + and avg_speed_kmh > max_capture_speed_kmh ) if too_fast: raise exceptions.MapillaryCaptureSpeedTooFastError( - f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)", + f"Capture speed {avg_speed_kmh:.3f} km/h exceeds max allowed {max_capture_speed_kmh:.3f} km/h", ) except exceptions.MapillaryDescriptionError as ex: + LOG.error(f"{_video_name(video_metadata)}: {ex}") error_metadatas.append( types.describe_error_metadata( exc=ex, @@ -268,18 +274,17 @@ def _check_video_limits( else: output_video_metadatas.append(video_metadata) - if error_metadatas: - LOG.info( - f"Video validation: {len(output_video_metadatas)} valid, {len(error_metadatas)} errors" - ) - return output_video_metadatas, error_metadatas +def _video_name(video_metadata: types.VideoMetadata) -> str: + return video_metadata.filename.name + + def _check_sequences_by_limits( input_sequences: T.Sequence[PointSequence], max_sequence_filesize_in_bytes: int | None, - max_avg_speed: float, + max_capture_speed_kmh: float, ) -> tuple[list[PointSequence], list[types.ErrorMetadata]]: output_sequences: list[PointSequence] = [] output_errors: list[types.ErrorMetadata] = [] @@ -295,7 +300,7 @@ def _check_sequences_by_limits( ) if sequence_filesize > max_sequence_filesize_in_bytes: raise exceptions.MapillaryFileTooLargeError( - f"Sequence file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)", + f"Sequence file size {humanize.naturalsize(sequence_filesize)} exceeds max allowed {humanize.naturalsize(max_sequence_filesize_in_bytes)}", ) contains_null_island = any( @@ -306,12 +311,14 @@ def _check_sequences_by_limits( "GPS coordinates in Null Island (0, 0)" ) - too_fast = len(sequence) >= 2 and geo.avg_speed(sequence) > max_avg_speed + avg_speed_kmh = geo.avg_speed(sequence) * 3.6 # Convert m/s to km/h + too_fast = len(sequence) >= 2 and avg_speed_kmh > max_capture_speed_kmh if too_fast: raise exceptions.MapillaryCaptureSpeedTooFastError( - f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)", + f"Capture speed {avg_speed_kmh:.3f} km/h exceeds max allowed {max_capture_speed_kmh:.3f} km/h", ) except exceptions.MapillaryDescriptionError as ex: + LOG.error(f"{_sequence_name(sequence)}: {ex}") for image in sequence: output_errors.append( types.describe_error_metadata( @@ -326,14 +333,16 @@ def _check_sequences_by_limits( len(s) for s in input_sequences ) - if output_errors: - LOG.info( - f"Sequence validation: {len(output_sequences)} valid, {len(output_errors)} errors" - ) - return output_sequences, output_errors +def _sequence_name(sequence: T.Sequence[types.ImageMetadata]) -> str: + if not sequence: + return "N/A" + image = sequence[0] + return f"{image.filename.parent.name}/{image.filename.name}" + + def _group_by_folder_and_camera( image_metadatas: list[types.ImageMetadata], ) -> list[list[types.ImageMetadata]]: @@ -594,8 +603,10 @@ def process_sequence_properties( interpolate_directions: bool = False, duplicate_distance: float = constants.DUPLICATE_DISTANCE, duplicate_angle: float = constants.DUPLICATE_ANGLE, - max_avg_speed: float = constants.MAX_AVG_SPEED, + max_capture_speed_kmh: float = constants.MAX_CAPTURE_SPEED_KMH, ) -> list[types.MetadataOrError]: + LOG.info("==> Processing sequences...") + max_sequence_filesize_in_bytes = constants.MAX_SEQUENCE_FILESIZE max_sequence_pixels = constants.MAX_SEQUENCE_PIXELS @@ -611,14 +622,14 @@ def process_sequence_properties( elif isinstance(metadata, types.VideoMetadata): video_metadatas.append(metadata) else: - raise RuntimeError(f"invalid metadata type: {metadata}") + raise ValueError(f"invalid metadata type: {metadata}") if video_metadatas: # Check limits for videos video_metadatas, video_error_metadatas = _check_video_limits( video_metadatas, max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes, - max_avg_speed=max_avg_speed, + max_capture_speed_kmh=max_capture_speed_kmh, max_radius_for_stationary_check=10.0, ) error_metadatas.extend(video_error_metadatas) @@ -668,7 +679,7 @@ def process_sequence_properties( sequences, errors = _check_sequences_by_limits( sequences, max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes, - max_avg_speed=max_avg_speed, + max_capture_speed_kmh=max_capture_speed_kmh, ) error_metadatas.extend(errors) diff --git a/mapillary_tools/upload.py b/mapillary_tools/upload.py index 5636dec0..14aeb9ce 100644 --- a/mapillary_tools/upload.py +++ b/mapillary_tools/upload.py @@ -9,6 +9,7 @@ import uuid from pathlib import Path +import humanize import jsonschema import requests from tqdm import tqdm @@ -48,6 +49,8 @@ def upload( noresume: bool = False, skip_subfolders: bool = False, ) -> None: + LOG.info("==> Uploading...") + import_paths = _normalize_import_paths(import_path) metadatas = _load_descs(_metadatas_from_process, import_paths, desc_path) @@ -213,17 +216,17 @@ def check_duplication(payload: uploader.Progress): if reupload: if uploaded_at is not None: LOG.info( - f"Reuploading {name} (previously uploaded at {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(uploaded_at))})" + f"Reuploading {name}: previously uploaded {humanize.naturaldelta(time.time() - uploaded_at)} ago ({time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(uploaded_at))})" ) else: LOG.info( - f"Reuploading {name} (already uploaded, see {history_desc_path})" + f"Reuploading {name}: already uploaded, see {history_desc_path}" ) else: if uploaded_at is not None: - msg = f"Skipping {name} (previously uploaded at {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(uploaded_at))})" + msg = f"Skipping {name}: previously uploaded {humanize.naturaldelta(time.time() - uploaded_at)} ago ({time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(uploaded_at))})" else: - msg = f"Skipping {name} (already uploaded, see {history_desc_path})" + msg = f"Skipping {name}: already uploaded, see {history_desc_path}" raise UploadedAlready(msg) @emitter.on("upload_finished") @@ -461,20 +464,19 @@ def _summarize(stats: T.Sequence[_APIStats]) -> dict: def _show_upload_summary(stats: T.Sequence[_APIStats], errors: T.Sequence[Exception]): - LOG.info("========== Upload summary ==========") + LOG.info("==> Upload summary") - errors_by_type: dict[str, list[Exception]] = {} + errors_by_type: dict[type[Exception], list[Exception]] = {} for error in errors: - errors_by_type.setdefault(error.__class__.__name__, []).append(error) + errors_by_type.setdefault(type(error), []).append(error) for error_type, error_list in errors_by_type.items(): - if error_type == UploadedAlready.__name__: + if error_type is UploadedAlready: LOG.info( - "Skipped %d already uploaded sequences (use --reupload to force re-upload)", - len(error_list), + f"Skipped {len(error_list)} already uploaded sequences (use --reupload to force re-upload)", ) else: - LOG.info(f"{len(error_list)} uploads failed due to {error_type}") + LOG.info(f"{len(error_list)} uploads failed due to {error_type.__name__}") if stats: grouped: dict[str, list[_APIStats]] = {} @@ -483,14 +485,16 @@ def _show_upload_summary(stats: T.Sequence[_APIStats], errors: T.Sequence[Except for file_type, typed_stats in grouped.items(): if file_type == FileType.IMAGE.value: - LOG.info("%8d image sequences uploaded", len(typed_stats)) + LOG.info(f"{len(typed_stats)} sequences uploaded") else: - LOG.info("%8d %s videos uploaded", len(typed_stats), file_type.upper()) + LOG.info(f"{len(typed_stats)} {file_type} uploaded") summary = _summarize(stats) - LOG.info("%8.1fM data in total", summary["size"]) - LOG.info("%8.1fM data uploaded", summary["uploaded_size"]) - LOG.info("%8.1fs upload time", summary["time"]) + LOG.info(f"{humanize.naturalsize(summary['size'] * 1024 * 1024)} read in total") + LOG.info( + f"{humanize.naturalsize(summary['uploaded_size'] * 1024 * 1024)} uploaded" + ) + LOG.info(f"{summary['time']} upload time") else: LOG.info("Nothing uploaded. Bye.") diff --git a/mapillary_tools/uploader.py b/mapillary_tools/uploader.py index 405a7eae..953ad457 100644 --- a/mapillary_tools/uploader.py +++ b/mapillary_tools/uploader.py @@ -188,6 +188,7 @@ def upload_videos( sorted_video_metadatas = sorted(video_metadatas, key=lambda m: m.filename) for idx, video_metadata in enumerate(sorted_video_metadatas): + LOG.debug(f"Checksum for video {video_metadata.filename}...") try: video_metadata.update_md5sum() except Exception as ex: @@ -421,6 +422,8 @@ def _zip_sequence_fp( f"Only one sequence is allowed but got {len(sequence_groups)}: {list(sequence_groups.keys())}" ) + if sequence: + LOG.debug(f"Checksum for sequence {sequence[0].MAPSequenceUUID}...") sequence_md5sum = types.update_sequence_md5sum(sequence) with zipfile.ZipFile(zip_fp, "w", zipfile.ZIP_DEFLATED) as zipf: @@ -498,6 +501,7 @@ def upload_images( sequences = types.group_and_sort_images(image_metadatas) for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()): + LOG.debug(f"Checksum for image sequence {sequence_uuid}...") sequence_md5sum = types.update_sequence_md5sum(sequence) sequence_progress: SequenceProgress = { diff --git a/mapillary_tools/utils.py b/mapillary_tools/utils.py index 662e9ff1..6d5f82b3 100644 --- a/mapillary_tools/utils.py +++ b/mapillary_tools/utils.py @@ -197,6 +197,13 @@ def get_file_size(path: Path) -> int: return os.path.getsize(path) +def get_file_size_quietly(path: Path) -> int: + try: + return get_file_size(path) + except Exception: + return 0 + + TMapIn = T.TypeVar("TMapIn") TMapOut = T.TypeVar("TMapOut") diff --git a/pyproject.toml b/pyproject.toml index 8c5939ce..5eb72834 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ dependencies = [ "construct~=2.10.0", "exifread~=3.0", "gpxpy~=1.6.0", + "humanize>=4.12.3", "jsonschema~=4.17.0", "piexif~=1.1", "pynmea2>=1.12.0,<2.0.0", diff --git a/tests/integration/test_gopro.py b/tests/integration/test_gopro.py index 9ea014a4..51a78134 100644 --- a/tests/integration/test_gopro.py +++ b/tests/integration/test_gopro.py @@ -24,7 +24,7 @@ "MAPILLARY_TOOLS_GOPRO_GPS_FIXES": "0,2,3", "MAPILLARY_TOOLS_GOPRO_MAX_DOP100": "100000", "MAPILLARY_TOOLS_GOPRO_GPS_PRECISION": "10000000", - "MAPILLARY_TOOLS_MAX_AVG_SPEED": "200000", # km/h + "MAPILLARY_TOOLS_MAX_CAPTURE_SPEED_KMH": "2000000", # km/h } EXPECTED_DESCS: T.List[T.Any] = [ {