-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathMediaParser.py
101 lines (91 loc) · 3.87 KB
/
MediaParser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import subprocess
import json
from util import Log
"""
The Parser has the kind-of-difficult job of figuring out what the file contains. It uses ffprobe to create a series of arrays which describe any given file. Transformers take objects of this type.
"""
TAG = "parser"
class Parser(object):
ffprobe_dict = False
video_stream = {}
audio_streams = []
sub_streams = []
is_interlaced = False
file_format=""
duration = 0
def __init__(self, filename):
if(filename[0] != '/'):
Log.w(TAG, "Filename is not absolute, this may cause issues dispatching jobs.")
ffprobe = subprocess.Popen(["ffprobe","-v", "quiet", "-print_format", "json", "-show_format", "-show_streams",filename], stdout=subprocess.PIPE)
#Get everything from stdout once ffprobe exits, and
try:
ffprobe_string = ffprobe.communicate()[0]
self.ffprobe_dict=json.loads(ffprobe_string)
except ValueError:
Log.e(TAG, "File could not be read, are you sure it exists?")
ffmpeg_interlace = subprocess.Popen(["ffmpeg", "-filter:v", "idet", "-frames:v", "400", "-an", "-f", "null", "-", "-i", filename],stdout=subprocess.PIPE, stderr=subprocess.PIPE)
interlaced_details = ffmpeg_interlace.communicate()[1]
interlaced_lines = interlaced_details.split("\n")
num_progressive = 0
for line in interlaced_lines:
if line.find("idet") != -1 and line.find("Progressive") != -1:
#find the number of progressive frames in this line.
nframes = line.split("Progressive:")[1].split("Undetermined")[0]
num_progressive = num_progressive + int(nframes)
if num_progressive < 20:
self.is_interlaced = True
self.video_stream = self.parse_video(self.ffprobe_dict)
self.audio_streams = self.parse_audio(self.ffprobe_dict)
self.sub_streams = self.parse_subs(self.ffprobe_dict)
self.file_format = self.ffprobe_dict["format"]["format_name"]
self.duration = float(self.ffprobe_dict["format"]["duration"])
#Parses ffprobe_dict to create the video_stream object. Let this be called by the constructor!
def parse_video(self, ffprobe_dict):
if ffprobe_dict == False:
return
foundVideo = False
video_stream = {}
for stream in ffprobe_dict["streams"]:
if stream["codec_type"] == "video":
if foundVideo:
Log.w(TAG, "File had more than one video stream. Using the first one. This is unsupported!")
foundVideo = True
video_stream = {"index": stream["index"], "width": stream["width"], "height": stream["height"], "codec": stream["codec_name"] }
return video_stream
# Parses ffprobe_dict to create the audio_streams array. Don't call manually!
def parse_audio(self, ffprobe_dict):
if ffprobe_dict == False:
return
audio_streams = []
for stream in ffprobe_dict["streams"]:
if stream["codec_type"] == "audio":
language_found = None
try:
language_found = stream["tags"]["language"]
except:
Log.v(TAG, "Could not find a language for stream " + str(stream["index"]))
audio_streams.append({"index": stream["index"], "codec": stream["codec_name"], "channels": stream["channels"], "language": language_found})
return audio_streams
def parse_subs(self, ffprobe_dict):
if ffprobe_dict == False:
return
sub_streams = []
for stream in ffprobe_dict["streams"]:
if stream["codec_type"] == "subtitle":
language_found = None
try:
language_found = stream["tags"]["language"]
except:
Log.v(TAG, "Could not find a language for stream " + str(stream["index"]))
# number of frames or bitrate is the best way to figure out the difference between secondary (ie foreign langauge/ forced) and primary (all text) captions.
# There is likely a better way... still need to find it.
br = None
try:
br = stream["nb_frames"]
except:
try:
br = stream["tags"]["NUMBER_OF_FRAMES"]
except:
pass
sub_streams.append({"index": stream["index"], "codec": stream["codec_name"], "language": language_found, "numframes": br})
return sub_streams