Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/ffmpeg #362

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import org.gradle.nativeplatform.platform.internal.DefaultNativePlatform

plugins {
id 'com.github.johnrengelman.shadow' version '7.1.2'
id 'de.undercouch.download' version "5.0.5"
Expand Down Expand Up @@ -128,3 +130,68 @@ task generateOAS(type: Download) {
src 'http://localhost:4567/openapi-specs'
dest "${project.projectDir}/docs/openapi.json"
}

task downloadFFmpeg(type: Download) {
def f = new File("$buildDir/cache/ffmpeg.zip")
outputs.upToDateWhen {
return f.exists()
}

def os = ""
if (DefaultNativePlatform.currentOperatingSystem.isWindows()) {
os = "win"
} else if (DefaultNativePlatform.currentOperatingSystem.isMacOsX()) {
os = "osx"
} else if (DefaultNativePlatform.currentOperatingSystem.isLinux()) {
os = "linux"
}

src "https://github.com/vot/ffbinaries-prebuilt/releases/download/v4.2.1/ffmpeg-4.2.1-$os-64.zip"
dest f
}

task downloadFFprobe(type: Download) {
def f = new File("$buildDir/cache/ffprobe.zip")
outputs.upToDateWhen {
return f.exists()
}

def os = ""
if (DefaultNativePlatform.currentOperatingSystem.isWindows()) {
os = "win"
} else if (DefaultNativePlatform.currentOperatingSystem.isMacOsX()) {
os = "osx"
} else if (DefaultNativePlatform.currentOperatingSystem.isLinux()) {
os = "linux"
}

src "https://github.com/vot/ffbinaries-prebuilt/releases/download/v4.2.1/ffprobe-4.2.1-$os-64.zip"
dest f
}

task copyFFmpeg(type: Copy) {
dependsOn downloadFFmpeg
outputs.upToDateWhen {
return !fileTree("$buildDir/ext/ffmpeg").filter { it.isFile() && it.name.startsWith('ffmpeg') }.isEmpty()
}
from zipTree(downloadFFmpeg.dest)
into "$buildDir/ext/ffmpeg"
include '*ffmpeg*'
}

task copyFFprobe(type: Copy) {
dependsOn downloadFFprobe
outputs.upToDateWhen {
return !fileTree("$buildDir/ext/ffmpeg").filter { it.isFile() && it.name.startsWith('ffprobe') }.isEmpty()
}
from zipTree(downloadFFprobe.dest)
into "$buildDir/ext/ffmpeg"
include '*ffprobe*'
}

task setupFFMpeg(type: Copy) {
dependsOn downloadFFmpeg
dependsOn downloadFFprobe
dependsOn copyFFmpeg
dependsOn copyFFprobe
}
2 changes: 2 additions & 0 deletions cineast-core/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ dependencies {
api group: "org.bytedeco", name: "javacpp", version: version_javacpp
api group: "org.bytedeco", name: "ffmpeg-platform", version: version_ffmpeg

api group: 'com.github.kokorin.jaffree', name: 'jaffree', version: '2022.06.03'

/** OpenCV. */
api group: 'org.openpnp', name: 'opencv', version: version_opencv

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,59 +2,12 @@

/**
* The class encapsulates descriptive information concerning an audio-stream that does not change between frames. The intention behind this class is that {@link AudioFrame}s that belong together share the same instance of the AudioDescriptor.
*
* @param samplingrate Samplingrate of the audio associated with this descriptor.
* @param channels Number of channels in the audio associated with this descriptor.
* @param duration Duration of the audio associated with this descriptor in milliseconds.
*/
public class AudioDescriptor {

/**
* Samplingrate of the audio associated with this descriptor.
*/
private final float samplingrate;

/**
* Number of channels in the audio associated with this descriptor.
*/
private final int channels;

/**
* Duration of the audio associated with this descriptor in milliseconds.
*/
private final long duration;

/**
* Constructor for an AudioDescriptor.
*/
public AudioDescriptor(float samplingrate, int channels, long duration) {
this.samplingrate = samplingrate;
this.channels = channels;
this.duration = duration;
}

/**
* Getter for the samplingrate.
*
* @return Samplingrate of the source stream.
*/
public final float getSamplingrate() {
return this.samplingrate;
}

/**
* Getter for channels.
*
* @return Number of channels in the source stream
*/
public final int getChannels() {
return this.channels;
}

/**
* Getter for duration.
*
* @return Duration of the total source stream
*/
public final long getDuration() {
return this.duration;
}
public record AudioDescriptor(float samplingrate, int channels, long duration) {

@Override
public boolean equals(Object o) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package org.vitrivr.cineast.core.data.frames;


import javax.sound.sampled.AudioFormat;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Objects;
import javax.sound.sampled.AudioFormat;

/**
* Represents a single audio-frame containing a specific number of samples (the number depends on the decoder that created the AudioFrame). Sample data is stored in a byte array and internally represented as 16bit int PCM i.e. each sample is represented by a signed 16bit short between -32767 and 32767.
Expand Down Expand Up @@ -64,7 +64,7 @@ public AudioFrame(long idx, long timestamp, byte[] data, AudioDescriptor descrip
}

public AudioFrame(AudioFrame other) {
this(other.idx, other.timestamp, other.data.array(), new AudioDescriptor(other.descriptor.getSamplingrate(), other.descriptor.getChannels(), other.descriptor.getDuration()));
this(other.idx, other.timestamp, other.data.array(), new AudioDescriptor(other.descriptor.samplingrate(), other.descriptor.channels(), other.descriptor.duration()));
}

/**
Expand All @@ -73,7 +73,7 @@ public AudioFrame(AudioFrame other) {
* @return AudioFormat
*/
public final AudioFormat getFormat() {
return new AudioFormat(this.descriptor.getSamplingrate(), BITS_PER_SAMPLE, this.descriptor.getChannels(), true, false);
return new AudioFormat(this.descriptor.samplingrate(), BITS_PER_SAMPLE, this.descriptor.channels(), true, false);
}

/**
Expand Down Expand Up @@ -137,7 +137,7 @@ public final byte[] getData() {
*/
private void setData(byte[] data) {
this.data = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN);
this.numberOfSamples = data.length / (2 * this.descriptor.getChannels());
this.numberOfSamples = data.length / (2 * this.descriptor.channels());
}

/**
Expand All @@ -146,7 +146,7 @@ private void setData(byte[] data) {
* @return Sample rate of this AudioFrame.
*/
public final float getSamplingrate() {
return this.descriptor.getSamplingrate();
return this.descriptor.samplingrate();
}

/**
Expand All @@ -155,7 +155,7 @@ public final float getSamplingrate() {
* @return Duration of the {@link AudioFrame}
*/
public final float getDuration() {
return this.numberOfSamples / this.descriptor.getSamplingrate();
return this.numberOfSamples / this.descriptor.samplingrate();
}

/**
Expand All @@ -173,7 +173,7 @@ public final float getStart() {
* @return Relative end of the {@link AudioFrame}.
*/
public final float getEnd() {
return this.getStart() + this.numberOfSamples / this.descriptor.getSamplingrate();
return this.getStart() + this.numberOfSamples / this.descriptor.samplingrate();
}

/**
Expand All @@ -182,7 +182,7 @@ public final float getEnd() {
* @return Number of channels in this AudioFrame.
*/
public final int getChannels() {
return this.descriptor.getChannels();
return this.descriptor.channels();
}

/**
Expand All @@ -193,8 +193,8 @@ public final int getChannels() {
* @return Sample value for the specified channel at the specified index.
*/
public final short getSampleAsShort(int idx, int channel) {
if (channel < this.descriptor.getChannels()) {
return this.data.getShort(2 * idx * this.descriptor.getChannels() + 2 * channel);
if (channel < this.descriptor.channels()) {
return this.data.getShort(2 * idx * this.descriptor.channels() + 2 * channel);
} else {
throw new IllegalArgumentException("The channel indexed must not exceed the number of channels!");
}
Expand All @@ -219,10 +219,10 @@ public final double getSampleAsDouble(int idx, int channel) {
*/
public final short getMeanSampleAsShort(int idx) {
int meanSample = 0;
for (int i = 0; i < this.descriptor.getChannels(); i++) {
for (int i = 0; i < this.descriptor.channels(); i++) {
meanSample += this.getSampleAsShort(idx, i);
}
return (short) (meanSample / this.descriptor.getChannels());
return (short) (meanSample / this.descriptor.channels());
}

/**
Expand All @@ -233,10 +233,10 @@ public final short getMeanSampleAsShort(int idx) {
*/
public final double getMeanSampleAsDouble(int idx) {
float meanSample = 0;
for (int i = 0; i < this.descriptor.getChannels(); i++) {
for (int i = 0; i < this.descriptor.channels(); i++) {
meanSample += this.getSampleAsShort(idx, i);
}
return (meanSample / (this.descriptor.getChannels() * Short.MAX_VALUE));
return (meanSample / (this.descriptor.channels() * Short.MAX_VALUE));
}

/**
Expand All @@ -250,7 +250,7 @@ public boolean append(AudioFrame that, int numberOfSamples) {
if (!this.descriptor.equals(that.descriptor)) {
return false;
}
int bytes = that.descriptor.getChannels() * numberOfSamples * (BITS_PER_SAMPLE / 8);
int bytes = that.descriptor.channels() * numberOfSamples * (BITS_PER_SAMPLE / 8);
if (bytes > that.data.capacity()) {
return false;
}
Expand Down Expand Up @@ -281,7 +281,7 @@ public AudioFrame split(int numberOfSamples) {
return this;
}

int bytesToCut = this.descriptor.getChannels() * numberOfSamples * (BITS_PER_SAMPLE / 8);
int bytesToCut = this.descriptor.channels() * numberOfSamples * (BITS_PER_SAMPLE / 8);
byte[] cutBytes = new byte[bytesToCut];
byte[] remaining = new byte[this.data.capacity() - bytesToCut];

Expand All @@ -290,7 +290,7 @@ public AudioFrame split(int numberOfSamples) {

setData(remaining);

return new AudioFrame(idx, timestamp, cutBytes, new AudioDescriptor(descriptor.getSamplingrate(), descriptor.getChannels(), (long) (numberOfSamples / descriptor.getSamplingrate())));
return new AudioFrame(idx, timestamp, cutBytes, new AudioDescriptor(descriptor.samplingrate(), descriptor.channels(), (long) (numberOfSamples / descriptor.samplingrate())));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,54 +2,13 @@

/**
* The class encapsulates descriptive information concerning a video-stream (visual only) that does not change between frames. The intention behind this class is that {@link VideoFrame}s that belong together share the same instance of the AudioDescriptor.
*
* @param fps Frame rate of the video associated with this descriptor.
* @param duration Duration of the video associated with this descriptor in milliseconds.
* @param width Width of the video associated with this descriptor.
* @param height Height of the video associated with this descriptor.
*/
public class VideoDescriptor {

/**
* Frame rate of the video associated with this descriptor.
*/
private final float fps;

/**
* Duration of the video associated with this descriptor in milliseconds.
*/
private final long duration;

/**
* Width of the video associated with this descriptor.
*/
private final int width;

/**
* Height of the video associated with this descriptor.
*/
private final int height;

/**
* Constructor for VideoDescriptor
*/
public VideoDescriptor(float fps, long duration, int width, int height) {
this.fps = fps;
this.duration = duration;
this.width = width;
this.height = height;
}

public float getFps() {
return fps;
}

public long getDuration() {
return duration;
}

public int getWidth() {
return width;
}

public int getHeight() {
return height;
}
public record VideoDescriptor(float fps, long duration, int width, int height) {

@Override
public boolean equals(Object o) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
package org.vitrivr.cineast.core.data.query.containers;

import java.util.List;
import java.util.Objects;
import org.vitrivr.cineast.core.data.frames.AudioDescriptor;
import org.vitrivr.cineast.core.data.frames.AudioFrame;
import org.vitrivr.cineast.core.util.dsp.fft.STFT;
import org.vitrivr.cineast.core.util.dsp.fft.windows.WindowFunction;
import org.vitrivr.cineast.core.util.web.AudioParser;

import java.util.List;
import java.util.Objects;


public class AudioQueryTermContainer extends AbstractQueryTermContainer {

Expand Down Expand Up @@ -94,7 +95,7 @@ public STFT getSTFT(int windowsize, int overlap, int padding, WindowFunction fun
if (2 * padding >= windowsize) {
throw new IllegalArgumentException("The combined padding must be smaller than the sample window.");
}
STFT stft = new STFT(windowsize, overlap, padding, function, this.descriptor.getSamplingrate());
STFT stft = new STFT(windowsize, overlap, padding, function, this.descriptor.samplingrate());
stft.forward(this.getMeanSamplesAsDouble());
return stft;
}
Expand Down
Loading