Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ If you provide a path to a `.txt` file instead of a video file to the `input` ar
The `.txt` file should contain one video path per line.
In batch mode, you must specify an output file using `--output`, which will be populated with `video_basename,score` for each video.
The `--output_all_stats` flag is ignored in batch mode.
If you need all statistics in batch mode, use `--batch_json_output` to write the results as json array with the complete statistics and a `video_name` key to identify the source video.

For example, if `video_list.txt` contains:
```
Expand All @@ -105,12 +106,31 @@ This will create `batch_results.txt` with content like:
Gaming_1080P-0ce6_orig.mp4,3.880362033843994
```

To obtain all statistics in JSON format, use the `--batch_json_output` flag:
```bash
python uvq_inference.py video_list.txt --model_version 1.5 --batch_json_output --output batch_results.txt
```

This will create `batch_results.txt` with content like:
```json
[
{
"uvq1p5_score": 3.880362033843994,
"per_frame_scores": [4.021927833557129, 4.013788223266602, 4.110747814178467, 4.142043113708496, 4.1536993980407715, 4.147506237030029, 4.149798393249512, 4.149064064025879, 4.149083137512207, 4.133814811706543, 3.5636682510375977, 3.8045108318328857, 3.630220413208008, 3.6495614051818848, 3.6260201930999756, 3.6136975288391113, 3.5050578117370605, 3.7031033039093018, 3.676196575164795, 3.663726806640625],
"frame_indices": [0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360, 390, 420, 450, 480, 510, 540, 570],
"video_name": "Gaming_1080P-0ce6_orig.mp4"
}
]
```

#### Optional Arguments

* `--transpose`: Transpose the video before processing (e.g., for portrait videos).
* `--output OUTPUT`: Path to save the output scores to a file. Scores will be saved in JSON format.
* `--device DEVICE`: Device to run inference on (e.g., `cpu` or `cuda`).
* `--fps FPS`: (UVQ 1.5 only) Frames per second to sample. Default is 1. Use -1 to sample all frames.
* `--chunk_size_frames FRAMES`: (UVQ 1.5 only) Frames to process at once during inference. If you run out of memory reduce this number. Default is 16.
* `--batch_json_output`: If specified, outputs batch results in JSON format including per frame scores instead of just overall mean score.
* `--output_all_stats`: If specified, print all stats in JSON format to stdout.
* `--ffmpeg_path`: Path to FFmpeg executable (default: `ffmpeg`).
* `--ffprobe_path`: Path to FFprobe executable (default: `ffprobe`).
Expand Down
8 changes: 4 additions & 4 deletions utils/probe.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def get_dimensions(
result = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stderr=subprocess.PIPE,
check=True,
text=True,
)
Expand Down Expand Up @@ -72,7 +72,7 @@ def get_nb_frames(video_path, ffprobe_path="ffprobe") -> int | None:
result = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stderr=subprocess.PIPE,
check=True,
text=True,
)
Expand Down Expand Up @@ -103,7 +103,7 @@ def get_r_frame_rate(video_path, ffprobe_path="ffprobe") -> int | None:
result = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stderr=subprocess.PIPE,
check=True,
text=True,
)
Expand Down Expand Up @@ -137,7 +137,7 @@ def get_video_duration(video_path, ffprobe_path="ffprobe") -> float | None:
]
try:
result = subprocess.run(
cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True
)
duration = float(result.stdout)
return duration
Expand Down
156 changes: 116 additions & 40 deletions utils/video_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,16 +157,17 @@ def load_video_1p0(
return video, video_resized


def load_video_1p5(
def yield_video_1p5_chunks(
filepath: str,
video_length: int,
transpose: bool = False,
video_fps: int = 1,
video_height: int = 1080,
video_width: int = 1920,
ffmpeg_path: str = "ffmpeg",
) -> tuple[np.ndarray, int]:
"""Load input video for UVQ 1.5.
chunk_size_frames: int = 16,
):
"""Yields chunks of the video as numpy arrays.

Args:
filepath: Path to the video file.
Expand All @@ -175,10 +176,12 @@ def load_video_1p5(
video_fps: Frames per second to sample for inference.
video_height: Height of the video to resize to.
video_width: Width of the video to resize to.

Returns:
A tuple containing the loaded video as a numpy array and the number of
real frames.
chunk_size_frames: Number of frames to yield per chunk.

Yields:
A tuple containing:
- A chunk of the loaded video as a numpy array (batch, 1, h, w, c).
- The number of real frames in the entire video (only available once determined).
"""
video_channel = 3
# Rotate video if requested
Expand Down Expand Up @@ -208,41 +211,114 @@ def load_video_1p5(
raise error

# For video, the entire video is divided into 1s chunks in 5 fps
with open(temp_filename, "rb") as rgb_file:
single_frame_size = video_width * video_height * video_channel
full_decode_size = video_length * video_fps * single_frame_size
rgb_file.seek(0, 2)
rgb_file_size = rgb_file.tell()
rgb_file.seek(0)
num_real_frames = rgb_file_size // single_frame_size
assert rgb_file_size >= single_frame_size, (
f"Decoding failed to output a single frame: {rgb_file_size} <"
f" {single_frame_size}"
)
if rgb_file_size < full_decode_size:
logging.warning(
"Decoding may be truncated: %d bytes (%d frames) < %d bytes (%d"
" frames), or video length (%ds) may be too incorrect",
rgb_file_size,
rgb_file_size / single_frame_size,
full_decode_size,
full_decode_size / single_frame_size,
video_length,
try:
with open(temp_filename, "rb") as rgb_file:
single_frame_size = video_width * video_height * video_channel
full_decode_size = video_length * video_fps * single_frame_size
rgb_file.seek(0, 2)
rgb_file_size = rgb_file.tell()
rgb_file.seek(0)
num_real_frames = rgb_file_size // single_frame_size
assert rgb_file_size >= single_frame_size, (
f"Decoding failed to output a single frame: {rgb_file_size} <"
f" {single_frame_size}"
)

rgb = _extend_array(bytearray(rgb_file.read()), full_decode_size)
video = (
np.reshape(
np.frombuffer(rgb, "uint8"),
(video_length, int(video_fps), video_height, video_width, 3),

if rgb_file_size < full_decode_size:
logging.warning(
"Decoding may be truncated: %d bytes (%d frames) < %d bytes (%d"
" frames), or video length (%ds) may be too incorrect",
rgb_file_size,
rgb_file_size / single_frame_size,
full_decode_size,
full_decode_size / single_frame_size,
video_length,
)
/ 255.0
- 0.5
) * 2

# Delete temp files
os.close(fd)
os.remove(temp_filename)
logging.info("Load %s done successfully.", filepath)
chunk_size_bytes = chunk_size_frames * single_frame_size

# Read and yield chunks
read_frames = 0
while read_frames < num_real_frames:
chunk_bytes = rgb_file.read(chunk_size_bytes)
if not chunk_bytes:
break

# Handle partial chunks (e.g. end of file)
# We read len(chunk_bytes). We simply divide by single_frame_size.
# If there are leftovers < single_frame_size (partial frame), we ignore them.
current_chunk_frames = len(chunk_bytes) // single_frame_size

if current_chunk_frames == 0:
break

# Truncate to valid frames bytes
valid_bytes = current_chunk_frames * single_frame_size
if len(chunk_bytes) > valid_bytes:
logging.warning("Read partial frame at end of file, truncating.")
chunk_bytes = chunk_bytes[:valid_bytes]

if current_chunk_frames == 0:
break

video_chunk = (
np.reshape(
np.frombuffer(chunk_bytes, "uint8"),
(current_chunk_frames, 1, video_height, video_width, 3),
).astype(np.float32)
/ 255.0
- 0.5
) * 2

yield video_chunk, num_real_frames
read_frames += current_chunk_frames

finally:
# Delete temp files
os.close(fd)
if os.path.exists(temp_filename):
os.remove(temp_filename)
logging.info("Load %s done successfully.", filepath)


def load_video_1p5(
filepath: str,
video_length: int,
transpose: bool = False,
video_fps: int = 1,
video_height: int = 1080,
video_width: int = 1920,
ffmpeg_path: str = "ffmpeg",
) -> tuple[np.ndarray, int]:
"""Load input video for UVQ 1.5.

Note: This loads the entire video into memory. Use yield_video_1p5_chunks for large videos.

Args:
filepath: Path to the video file.
video_length: Length of the video in seconds.
transpose: Whether to transpose the video.
video_fps: Frames per second to sample for inference.
video_height: Height of the video to resize to.
video_width: Width of the video to resize to.

Returns:
A tuple containing the loaded video as a numpy array and the number of
real frames.
"""
chunks = []
num_real_frames = 0
for chunk, n_frames in yield_video_1p5_chunks(
filepath, video_length, transpose, video_fps, video_height, video_width, ffmpeg_path
):
chunks.append(chunk)
num_real_frames = n_frames

if not chunks:
return np.array([]), 0

# Reconstruct the full video array.
# Shape will be (TotalFrames, 1, H, W, 3).
video = np.concatenate(chunks, axis=0)

return video, num_real_frames
45 changes: 24 additions & 21 deletions uvq1p5_pytorch/utils/uvq1p5.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def infer(
fps: int = 1,
orig_fps: float | None = None,
ffmpeg_path: str = "ffmpeg",
chunk_size_frames: int = 16,
) -> dict[str, Any]:
"""Runs UVQ 1.5 inference on a video file.

Expand All @@ -119,38 +120,40 @@ def infer(
fps: Frames per second to sample for inference.
orig_fps: Original frames per second of the video, used for frame index
calculation.
chunk_size_frames: Number of frames to process in each chunk during inference.

Returns:
A dictionary containing the overall UVQ 1.5 score, per-frame scores,
and frame indices.
"""
video_1080p, _ = self.load_video(

predictions = []

# Use generator to process video in chunks
for video_chunk, _ in video_reader.yield_video_1p5_chunks(
video_filename,
video_length,
transpose,
fps=fps,
video_fps=fps,
ffmpeg_path=ffmpeg_path,
)
num_seconds, read_fps, c, h, w = video_1080p.shape
# reshape to (num_seconds * fps, 1, 3, h, w) to process all frames
num_frames = num_seconds * read_fps
video_1080p = video_1080p.reshape(num_frames, 1, c, h, w)

batch_size = 24
if num_frames > batch_size: # if video is longer than batch size, run inference in batches to avoid OOM
predictions = []
with torch.inference_mode():
for i in range(0, num_frames, batch_size):
batch = video_1080p[i : i + batch_size]
prediction_batch = self.uvq1p5_core(batch)
predictions.append(prediction_batch)
prediction = torch.cat(predictions, dim=0)
else:
with torch.inference_mode():
prediction = self.uvq1p5_core(video_1080p)
chunk_size_frames=chunk_size_frames
):

video_chunk_torch = torch.from_numpy(video_chunk).float()
video_chunk_torch = video_chunk_torch.permute(0, 1, 4, 2, 3)

with torch.inference_mode():
batch = video_chunk_torch.to(next(self.parameters()).device)
prediction_batch = self.uvq1p5_core(batch)
predictions.append(prediction_batch)

if not predictions:
raise ValueError(f"No frames were read from {video_filename}")

prediction = torch.cat(predictions, dim=0)

video_score = torch.mean(prediction).item()
frame_scores = prediction.numpy().flatten().tolist()
frame_scores = prediction.cpu().numpy().flatten().tolist()

if orig_fps:
frame_indices = [
Expand Down
31 changes: 27 additions & 4 deletions uvq_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def run_batch_inference(args):
fps=fps_to_use,
orig_fps=orig_fps,
ffmpeg_path=args.ffmpeg_path,
chunk_size_frames=args.chunk_size_frames,
)
score = results["uvq1p5_score"]
elif args.model_version == "1.0":
Expand All @@ -105,7 +106,12 @@ def run_batch_inference(args):
transpose_flag,
)
score = float(results["compression_content_distortion"])
results_to_write.append(f"{os.path.basename(video_path)},{score}")

if args.batch_json_output:
results["video_name"] = os.path.basename(video_path)
results_to_write.append(results)
else:
results_to_write.append(f"{os.path.basename(video_path)},{score}")
except Exception as e:
print(f"Error processing {video_path}: {e}")

Expand All @@ -114,9 +120,13 @@ def run_batch_inference(args):
output_dir = os.path.dirname(args.output)
if output_dir and not os.path.exists(output_dir):
os.makedirs(output_dir)
with open(args.output, "w") as f_out:
for line in results_to_write:
f_out.write(line + "\n")

if args.batch_json_output:
write_dict_to_file(results_to_write, args.output)
else:
with open(args.output, "w") as f_out:
for line in results_to_write:
f_out.write(line + "\n")
print(f"Batch inference complete. Results saved to {args.output}")
except IOError as e:
print(f"Error writing to output file {args.output}: {e}")
Expand Down Expand Up @@ -167,6 +177,7 @@ def run_single_inference(args):
fps=fps,
orig_fps=orig_fps,
ffmpeg_path=args.ffmpeg_path,
chunk_size_frames=args.chunk_size_frames,
)
elif args.model_version == "1.0":
uvq_inference = uvq1p0.UVQ1p0()
Expand Down Expand Up @@ -266,6 +277,18 @@ def setup_parser():
help="Frames per second to sample for UVQ1.5. -1 to sample all frames."
" Ignored for UVQ1.0.",
)
parser.add_argument(
"--chunk_size_frames",
type=int,
default=16,
help="Number of frames to process in each chunk during inference.",
)
parser.add_argument(
"--batch_json_output",
action="store_true",
help="If specified, outputs batch results in JSON format including per " \
"frame scores instead of just overall mean score.",
)
parser.add_argument(
"--output_all_stats",
action="store_true",
Expand Down