From 642e04f35933dd57e60b346c44e1cac08ed37b7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20=C5=9Aled=C5=BA?= Date: Mon, 12 Aug 2024 16:10:33 +0200 Subject: [PATCH] Add Xav.Reader.stream!/2 --- README.md | 13 +++++-------- lib/reader.ex | 25 +++++++++++++++++++++++++ test/reader_test.exs | 30 +++++++++++------------------- 3 files changed, 41 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index c1d24be..56853e2 100644 --- a/README.md +++ b/README.md @@ -59,9 +59,6 @@ Kino.Image.new(tensor) Speech to text: ```elixir -# See https://hexdocs.pm/bumblebee/Bumblebee.Audio.WhisperFeaturizer.html for default sampling rate -r = Xav.Reader.new!("sample.mp3", read: :audio, out_format: :f32, out_channels: 1, out_sample_rate: 16_000) - {:ok, whisper} = Bumblebee.load_model({:hf, "openai/whisper-tiny"}) {:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"}) {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"}) @@ -72,12 +69,12 @@ serving = defn_options: [compiler: EXLA] ) -# read a couple of frames +# Read a couple of frames. +# See https://hexdocs.pm/bumblebee/Bumblebee.Audio.WhisperFeaturizer.html for default sampling rate. frames = - for _i <- 0..200 do - {:ok, frame} = Xav.Reader.next_frame(r) - Xav.Frame.to_nx(frame) - end + Xav.Reader.stream!("sample.mp3", read: :audio, out_format: :f32, out_channels: 1, out_sample_rate: 16_000) + |> Stream.take(200) + |> Enum.map(fn frame -> Xav.Reader.to_nx(frame) end) batch = Nx.Batch.concatenate(frames) batch = Nx.Defn.jit_apply(&Function.identity/1, [batch]) diff --git a/lib/reader.ex b/lib/reader.ex index e176540..6c61c3f 100644 --- a/lib/reader.ex +++ b/lib/reader.ex @@ -129,6 +129,31 @@ defmodule Xav.Reader do end end + @doc """ + Creates a new reader stream. + """ + @spec stream!(String.t(), opts()) :: Enumerable.t() + def stream!(path, opts \\ []) do + Stream.resource( + fn -> + case new(path, opts) do + {:ok, reader} -> + reader + + {:error, reason} -> + raise "Couldn't create a new Xav.Reader stream. Reason: #{inspect(reason)}" + end + end, + fn reader -> + case next_frame(reader) do + {:ok, frame} -> {[frame], reader} + {:error, :eof} -> {:halt, reader} + end + end, + fn _reader -> :ok end + ) + end + defp to_human_readable(:libdav1d), do: :av1 defp to_human_readable(:mp3float), do: :mp3 defp to_human_readable(other), do: other diff --git a/test/reader_test.exs b/test/reader_test.exs index c4b53e5..950e892 100644 --- a/test/reader_test.exs +++ b/test/reader_test.exs @@ -17,6 +17,11 @@ defmodule Xav.ReaderTest do for _i <- 0..(30 * 5), do: assert({:ok, %Xav.Frame{}} = Xav.Reader.next_frame(r)) end + test "stream!" do + Xav.Reader.stream!("./test/fixtures/sample_h264.mp4") + |> Enum.all?(fn frame -> is_struct(frame, Xav.Frame) end) + end + test "to_nx/1" do {:ok, r} = Xav.Reader.new("./test/fixtures/sample_h264.mp4") {:ok, frame} = Xav.Reader.next_frame(r) @@ -70,14 +75,6 @@ defmodule Xav.ReaderTest do end defp test_speech_to_text(path, expected_output) do - reader = - Xav.Reader.new!(path, - read: :audio, - out_channels: 1, - out_format: :f32, - out_sample_rate: 16_000 - ) - {:ok, whisper} = Bumblebee.load_model({:hf, "openai/whisper-tiny"}) {:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"}) {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"}) @@ -89,7 +86,12 @@ defmodule Xav.ReaderTest do ) batch = - read_frames(reader) + Xav.Reader.stream!(path, + read: :audio, + out_channels: 1, + out_format: :f32, + out_sample_rate: 16_000 + ) |> Enum.map(&Xav.Frame.to_nx(&1)) |> Nx.Batch.concatenate() @@ -98,14 +100,4 @@ defmodule Xav.ReaderTest do assert [%{text: ^expected_output}] = chunks end - - defp read_frames(reader, acc \\ []) do - case Xav.Reader.next_frame(reader) do - {:ok, frame} -> - read_frames(reader, [frame | acc]) - - {:error, :eof} -> - Enum.reverse(acc) - end - end end