python-utils/ffmpeg.py at master · sendaljpt/python-utils · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import numpy as np
import subprocess as sp
import os
import time
DEVNULL = open(os.devnull, 'w')

# attempts to handle all float/integer conversions with and without normalizing
def convert_bit_depth(y, in_type, out_type, normalize=False):
    in_type = np.dtype(in_type).type
    out_type = np.dtype(out_type).type

    if normalize:
        peak = np.abs(y).max()
        if peak == 0:
            normalize = False

    if issubclass(in_type, np.floating):
        if normalize:
            y /= peak
        if issubclass(out_type, np.integer):
            y *= np.iinfo(out_type).max
        y = y.astype(out_type)
    elif issubclass(in_type, np.integer):
        if issubclass(out_type, np.floating):
            y = y.astype(out_type)
            if normalize:
                y /= peak
        elif issubclass(out_type, np.integer):
            in_max = peak if normalize else np.iinfo(in_type).max
            out_max = np.iinfo(out_type).max
            if out_max > in_max:
                y = y.astype(out_type)
                y *= (out_max / in_max)
            elif out_max < in_max:
                y /= (in_max / out_max)
                y = y.astype(out_type)
    return y

# load_audio can not detect the input type
# could use a command like this with sr=None or detect=True:
# ffprobe -hide_banner \
#     -loglevel fatal \
#     -show_error \
#     -show_format \
#     -show_streams \
#     -print_format json \
#     -i fn
def auread(filename, sr=44100, mono=False, normalize=True, in_type=np.int16, out_type=np.float32):
    in_type = np.dtype(in_type).type
    out_type = np.dtype(out_type).type
    channels = 1 if mono else 2
    format_strings = {
        np.float64: 'f64le',
        np.float32: 'f32le',
        np.int16: 's16le',
        np.int32: 's32le',
        np.uint32: 'u32le'
    }
    format_string = format_strings[in_type]
    command = [
        'ffmpeg',
        '-i', filename,
        '-f', format_string,
        '-acodec', 'pcm_' + format_string,
        '-ar', str(sr),
        '-ac', str(channels),
        '-']
    p = sp.Popen(command, stdout=sp.PIPE, stderr=DEVNULL)
    raw, err = p.communicate()
    audio = np.frombuffer(raw, dtype=in_type)

    if channels > 1:
        audio = audio.reshape((-1, channels)).transpose()

    if audio.size == 0:
        return audio.astype(out_type), sr

    audio = convert_bit_depth(audio, in_type, out_type, normalize)

    return audio, sr

def auwrite(fn, audio, sr, channels=1):
    format_strings = {
        'float64': 'f64le',
        'float32': 'f32le',
        'int16': 's16le',
        'int32': 's32le',
        'uint32': 'u32le'
    }
    format_strings = {np.dtype(key): value for key,value in format_strings.items()}
    format_string = format_strings[audio.dtype]
    command = [
        'ffmpeg',
        '-y',
        '-ar', str(sr),
        '-f', format_string,
        '-i', 'pipe:',
        fn]
    p = sp.Popen(command, stdin=sp.PIPE, stdout=None, stderr=None)
    raw, err = p.communicate(audio.tobytes())

import ffmpeg

def vidwrite(fn, images):
    if not isinstance(images, np.ndarray):
        images = np.asarray(images)
    n,height,width,channels = images.shape
    process = (
        ffmpeg
            .input('pipe:', format='rawvideo', pix_fmt='rgb24', s='{}x{}'.format(width, height))
            .output(fn, pix_fmt='yuv420p', vcodec='libx264', r=60)
            .overwrite_output()
            .run_async(pipe_stdin=True)
    )
    for frame in images:
        process.stdin.write(
            frame
                .astype(np.uint8)
                .tobytes()
        )
    process.stdin.close()
    process.wait()