forked from kylemcdonald/python-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathffmpeg.py
More file actions
122 lines (111 loc) · 3.59 KB
/
ffmpeg.py
File metadata and controls
122 lines (111 loc) · 3.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import numpy as np
import subprocess as sp
import os
import time
DEVNULL = open(os.devnull, 'w')
# attempts to handle all float/integer conversions with and without normalizing
def convert_bit_depth(y, in_type, out_type, normalize=False):
in_type = np.dtype(in_type).type
out_type = np.dtype(out_type).type
if normalize:
peak = np.abs(y).max()
if peak == 0:
normalize = False
if issubclass(in_type, np.floating):
if normalize:
y /= peak
if issubclass(out_type, np.integer):
y *= np.iinfo(out_type).max
y = y.astype(out_type)
elif issubclass(in_type, np.integer):
if issubclass(out_type, np.floating):
y = y.astype(out_type)
if normalize:
y /= peak
elif issubclass(out_type, np.integer):
in_max = peak if normalize else np.iinfo(in_type).max
out_max = np.iinfo(out_type).max
if out_max > in_max:
y = y.astype(out_type)
y *= (out_max / in_max)
elif out_max < in_max:
y /= (in_max / out_max)
y = y.astype(out_type)
return y
# load_audio can not detect the input type
# could use a command like this with sr=None or detect=True:
# ffprobe -hide_banner \
# -loglevel fatal \
# -show_error \
# -show_format \
# -show_streams \
# -print_format json \
# -i fn
def auread(filename, sr=44100, mono=False, normalize=True, in_type=np.int16, out_type=np.float32):
in_type = np.dtype(in_type).type
out_type = np.dtype(out_type).type
channels = 1 if mono else 2
format_strings = {
np.float64: 'f64le',
np.float32: 'f32le',
np.int16: 's16le',
np.int32: 's32le',
np.uint32: 'u32le'
}
format_string = format_strings[in_type]
command = [
'ffmpeg',
'-i', filename,
'-f', format_string,
'-acodec', 'pcm_' + format_string,
'-ar', str(sr),
'-ac', str(channels),
'-']
p = sp.Popen(command, stdout=sp.PIPE, stderr=DEVNULL)
raw, err = p.communicate()
audio = np.frombuffer(raw, dtype=in_type)
if channels > 1:
audio = audio.reshape((-1, channels)).transpose()
if audio.size == 0:
return audio.astype(out_type), sr
audio = convert_bit_depth(audio, in_type, out_type, normalize)
return audio, sr
def auwrite(fn, audio, sr, channels=1):
format_strings = {
'float64': 'f64le',
'float32': 'f32le',
'int16': 's16le',
'int32': 's32le',
'uint32': 'u32le'
}
format_strings = {np.dtype(key): value for key,value in format_strings.items()}
format_string = format_strings[audio.dtype]
command = [
'ffmpeg',
'-y',
'-ar', str(sr),
'-f', format_string,
'-i', 'pipe:',
fn]
p = sp.Popen(command, stdin=sp.PIPE, stdout=None, stderr=None)
raw, err = p.communicate(audio.tobytes())
import ffmpeg
def vidwrite(fn, images):
if not isinstance(images, np.ndarray):
images = np.asarray(images)
n,height,width,channels = images.shape
process = (
ffmpeg
.input('pipe:', format='rawvideo', pix_fmt='rgb24', s='{}x{}'.format(width, height))
.output(fn, pix_fmt='yuv420p', vcodec='libx264', r=60)
.overwrite_output()
.run_async(pipe_stdin=True)
)
for frame in images:
process.stdin.write(
frame
.astype(np.uint8)
.tobytes()
)
process.stdin.close()
process.wait()