Skip to content

Commit

Permalink
Add spatial audio support for more sound sample descriptions
Browse files Browse the repository at this point in the history
  • Loading branch information
Dillon Cower committed Feb 8, 2016
1 parent fdb852a commit d328cfa
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 39 deletions.
38 changes: 21 additions & 17 deletions spatialmedia/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,7 @@ def action_open(self):

infile = os.path.abspath(self.in_file)
file_extension = os.path.splitext(infile)[1].lower()
self.enable_spatial_audio =\
True if (file_extension == ".mp4") else False
self.enable_spatial_audio = parsed_metadata.num_audio_channels == 4

if not metadata:
self.var_spherical.set(0)
Expand Down Expand Up @@ -226,21 +225,6 @@ def create_widgets(self):
self.checkbox_spherical["command"] = self.action_set_spherical
self.checkbox_spherical.grid(row=row, column=column, padx=14, pady=2)

# Spatial Audio Checkbox
row += 1
column = 0
self.label_spatial_audio = Label(self)
self.label_spatial_audio["text"] = "Spatial Audio"
self.label_spatial_audio.grid(row=row, column=column)

column += 1
self.var_spatial_audio = IntVar()
self.checkbox_spatial_audio = \
Checkbutton(self, variable=self.var_spatial_audio)
self.checkbox_spatial_audio["command"] = self.action_set_spatial_audio
self.checkbox_spatial_audio.grid(
row=row, column=column, padx=0, pady=0)

# 3D
column = 0
row = row + 1
Expand All @@ -266,6 +250,26 @@ def create_widgets(self):
self.options_projection["text"] = "Equirectangular"
self.options_projection.grid(row=row, column=column, padx=14, pady=2)

# Spherical / Spatial Audio Separator
row += 1
separator = Frame(self, relief=GROOVE, bd=1, height=2, bg="white")
separator.grid(columnspan=row, padx=14, pady=4, sticky=N+E+S+W)

# Spatial Audio Checkbox
row += 1
column = 0
self.label_spatial_audio = Label(self)
self.label_spatial_audio["text"] = "Spatial Audio"
self.label_spatial_audio.grid(row=row, column=column)

column += 1
self.var_spatial_audio = IntVar()
self.checkbox_spatial_audio = \
Checkbutton(self, variable=self.var_spatial_audio)
self.checkbox_spatial_audio["command"] = self.action_set_spatial_audio
self.checkbox_spatial_audio.grid(
row=row, column=column, padx=0, pady=0)

# Ambisonics Type
column = 0
row = row + 1
Expand Down
69 changes: 58 additions & 11 deletions spatialmedia/metadata_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ class ParsedMetadata(object):
def __init__(self):
self.video = dict()
self.audio = None
self.num_audio_channels = 0

SPHERICAL_PREFIX = "{http://ns.google.com/videos/1.0/spherical/}"
SPHERICAL_TAGS = dict()
Expand Down Expand Up @@ -211,19 +212,20 @@ def inject_spatial_audio_atom(
if sub_element.name != mpeg.constants.TAG_STSD:
continue
for sample_description in sub_element.contents:
if sample_description.name == mpeg.constants.TAG_MP4A:
if sample_description.name in\
mpeg.constants.SOUND_SAMPLE_DESCRIPTIONS:
in_fh.seek(sample_description.position +
sample_description.header_size + 16)
num_channels = get_num_audio_channels(
sample_description, in_fh)
sub_element, in_fh)
num_ambisonic_components = \
get_expected_num_audio_components(
audio_metadata["ambisonic_type"],
audio_metadata["ambisonic_order"])
if num_channels != num_ambisonic_components:
err_msg = "Error: Found %d audio channel(s). "\
err_msg = "Error: Found %d audio channel(s). "\
"Expected %d channel(s) for %s ambisonics "\
"of orded %d."\
"of order %d."\
% (num_channels,
num_ambisonic_components,
audio_metadata["ambisonic_type"],
Expand Down Expand Up @@ -318,10 +320,13 @@ def parse_spherical_mpeg4(mpeg4_file, fh, console):
for stsd_elem in stbl_elem.contents:
if stsd_elem.name != mpeg.constants.TAG_STSD:
continue
for mp4a_elem in stsd_elem.contents:
if mp4a_elem.name != mpeg.constants.TAG_MP4A:
metadata.num_audio_channels = get_num_audio_channels(
stsd_elem, fh)
for sa3d_container_elem in stsd_elem.contents:
if sa3d_container_elem.name not in\
mpeg.constants.SOUND_SAMPLE_DESCRIPTIONS:
continue
for sa3d_elem in mp4a_elem.contents:
for sa3d_elem in sa3d_container_elem.contents:
if sa3d_elem.name == mpeg.constants.TAG_SA3D:
sa3d_elem.print_box(console)
metadata.audio = sa3d_elem
Expand Down Expand Up @@ -406,9 +411,6 @@ def inject_metadata(src, dest, metadata, console):
extension = os.path.splitext(infile)[1].lower()

if (extension in MPEG_FILE_EXTENSIONS):
if (metadata.audio and extension != ".mp4"):
error("Error: Spatial audio current not supported for %s ." %
extension)
inject_mpeg4(infile, outfile, metadata, console)
return

Expand Down Expand Up @@ -510,8 +512,53 @@ def get_expected_num_audio_components(ambisonics_type, ambisonics_order):
else:
return -1

def get_num_audio_channels(stsd, in_fh):
if stsd.name != mpeg.constants.TAG_STSD:
print "get_num_audio_channels should be given a STSD box"
return -1
for sample_description in stsd.contents:
if sample_description.name == mpeg.constants.TAG_MP4A:
return get_aac_num_channels(sample_description, in_fh)
elif sample_description.name in mpeg.constants.SOUND_SAMPLE_DESCRIPTIONS:
return get_sample_description_num_channels(sample_description, in_fh)
return -1

def get_sample_description_num_channels(sample_description, in_fh):
"""Reads the number of audio channels from a sound sample description.
"""
p = in_fh.tell()
in_fh.seek(sample_description.content_start() + 8)

version = struct.unpack(">h", in_fh.read(2))[0]
revision_level = struct.unpack(">h", in_fh.read(2))[0]
vendor = struct.unpack(">i", in_fh.read(4))[0]
if version == 0:
num_audio_channels = struct.unpack(">h", in_fh.read(2))[0]
sample_size_bytes = struct.unpack(">h", in_fh.read(2))[0]
elif version == 1:
num_audio_channels = struct.unpack(">h", in_fh.read(2))[0]
sample_size_bytes = struct.unpack(">h", in_fh.read(2))[0]
samples_per_packet = struct.unpack(">i", in_fh.read(4))[0]
bytes_per_packet = struct.unpack(">i", in_fh.read(4))[0]
bytes_per_frame = struct.unpack(">i", in_fh.read(4))[0]
bytes_per_sample = struct.unpack(">i", in_fh.read(4))[0]
elif version == 2:
always_3 = struct.unpack(">h", in_fh.read(2))[0]
always_16 = struct.unpack(">h", in_fh.read(2))[0]
always_minus_2 = struct.unpack(">h", in_fh.read(2))[0]
always_0 = struct.unpack(">h", in_fh.read(2))[0]
always_65536 = struct.unpack(">i", in_fh.read(4))[0]
size_of_struct_only = struct.unpack(">i", in_fh.read(4))[0]
audio_sample_rate = struct.unpack(">d", in_fh.read(8))[0]
num_audio_channels = struct.unpack(">i", in_fh.read(4))[0]
else:
print "Unsupported version for " + sample_description.name + " box"
return -1

in_fh.seek(p)
return num_audio_channels

def get_num_audio_channels(mp4a_atom, in_fh):
def get_aac_num_channels(mp4a_atom, in_fh):
"""Reads the number of audio channels from AAC's AudioSpecificConfig
descriptor within the esds child atom of the input mp4a atom.
"""
Expand Down
38 changes: 33 additions & 5 deletions spatialmedia/mpeg/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,27 +29,55 @@
TAG_FTYP = "ftyp"
TAG_ESDS = "esds"
TAG_SOUN = "soun"
TAG_SA3D = "SA3D"

# Container types.
TAG_MOOV = "moov"
TAG_UDTA = "udta"
TAG_META = "meta"
TAG_TRAK = "trak"
TAG_MDIA = "mdia"
TAG_MP4A = "mp4a"
TAG_MINF = "minf"
TAG_STBL = "stbl"
TAG_STSD = "stsd"
TAG_UUID = "uuid"
TAG_SA3D = "SA3D"

CONTAINERS_LIST = [
# Sound sample descriptions.
TAG_NONE = "NONE"
TAG_RAW_ = "raw "
TAG_TWOS = "twos"
TAG_SOWT = "sowt"
TAG_FL32 = "fl32"
TAG_FL64 = "fl64"
TAG_IN24 = "in24"
TAG_IN32 = "in32"
TAG_ULAW = "ulaw"
TAG_ALAW = "alaw"
TAG_LPCM = "lpcm"
TAG_MP4A = "mp4a"

SOUND_SAMPLE_DESCRIPTIONS = frozenset([
TAG_NONE,
TAG_RAW_,
TAG_TWOS,
TAG_SOWT,
TAG_FL32,
TAG_FL64,
TAG_IN24,
TAG_IN32,
TAG_ULAW,
TAG_ALAW,
TAG_LPCM,
TAG_MP4A,
])

CONTAINERS_LIST = frozenset([
TAG_MDIA,
TAG_MINF,
TAG_MP4A,
TAG_MOOV,
TAG_STBL,
TAG_STSD,
TAG_TRAK,
TAG_UDTA,
]
]).union(SOUND_SAMPLE_DESCRIPTIONS)

16 changes: 10 additions & 6 deletions spatialmedia/mpeg/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,20 +55,24 @@ def load(fh, position, end):
return None

padding = 0
stsd_version = 0
if (name == constants.TAG_STSD):
if name == constants.TAG_STSD:
padding = 8

if (name == constants.TAG_MP4A):
if name in constants.SOUND_SAMPLE_DESCRIPTIONS:
current_pos = fh.tell()
fh.seek(current_pos + 8)
sample_description_version = struct.unpack(">h", fh.read(2))[0]
fh.seek(current_pos)

if sample_description_version == 1:
padding = 28+16 # Mov
if sample_description_version == 0:
padding = 28
elif sample_description_version == 1:
padding = 28 + 16
elif sample_description_version == 2:
padding = 56
else:
padding = 28 # Mp4
print("Unsupported sample description version:",
sample_description_version)

new_box = Container()
new_box.name = name
Expand Down

0 comments on commit d328cfa

Please sign in to comment.