Skip to content

Commit

Permalink
Add Xav.Reader.stream!/2
Browse files Browse the repository at this point in the history
  • Loading branch information
mickel8 committed Aug 12, 2024
1 parent 710e7fb commit 642e04f
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 27 deletions.
13 changes: 5 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,6 @@ Kino.Image.new(tensor)
Speech to text:

```elixir
# See https://hexdocs.pm/bumblebee/Bumblebee.Audio.WhisperFeaturizer.html for default sampling rate
r = Xav.Reader.new!("sample.mp3", read: :audio, out_format: :f32, out_channels: 1, out_sample_rate: 16_000)

{:ok, whisper} = Bumblebee.load_model({:hf, "openai/whisper-tiny"})
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"})
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"})
Expand All @@ -72,12 +69,12 @@ serving =
defn_options: [compiler: EXLA]
)

# read a couple of frames
# Read a couple of frames.
# See https://hexdocs.pm/bumblebee/Bumblebee.Audio.WhisperFeaturizer.html for default sampling rate.
frames =
for _i <- 0..200 do
{:ok, frame} = Xav.Reader.next_frame(r)
Xav.Frame.to_nx(frame)
end
Xav.Reader.stream!("sample.mp3", read: :audio, out_format: :f32, out_channels: 1, out_sample_rate: 16_000)
|> Stream.take(200)
|> Enum.map(fn frame -> Xav.Reader.to_nx(frame) end)

batch = Nx.Batch.concatenate(frames)
batch = Nx.Defn.jit_apply(&Function.identity/1, [batch])
Expand Down
25 changes: 25 additions & 0 deletions lib/reader.ex
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,31 @@ defmodule Xav.Reader do
end
end

@doc """
Creates a new reader stream.
"""
@spec stream!(String.t(), opts()) :: Enumerable.t()
def stream!(path, opts \\ []) do
Stream.resource(
fn ->
case new(path, opts) do
{:ok, reader} ->
reader

{:error, reason} ->
raise "Couldn't create a new Xav.Reader stream. Reason: #{inspect(reason)}"
end
end,
fn reader ->
case next_frame(reader) do
{:ok, frame} -> {[frame], reader}
{:error, :eof} -> {:halt, reader}
end
end,
fn _reader -> :ok end
)
end

defp to_human_readable(:libdav1d), do: :av1
defp to_human_readable(:mp3float), do: :mp3
defp to_human_readable(other), do: other
Expand Down
30 changes: 11 additions & 19 deletions test/reader_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ defmodule Xav.ReaderTest do
for _i <- 0..(30 * 5), do: assert({:ok, %Xav.Frame{}} = Xav.Reader.next_frame(r))
end

test "stream!" do
Xav.Reader.stream!("./test/fixtures/sample_h264.mp4")
|> Enum.all?(fn frame -> is_struct(frame, Xav.Frame) end)
end

test "to_nx/1" do
{:ok, r} = Xav.Reader.new("./test/fixtures/sample_h264.mp4")
{:ok, frame} = Xav.Reader.next_frame(r)
Expand Down Expand Up @@ -70,14 +75,6 @@ defmodule Xav.ReaderTest do
end

defp test_speech_to_text(path, expected_output) do
reader =
Xav.Reader.new!(path,
read: :audio,
out_channels: 1,
out_format: :f32,
out_sample_rate: 16_000
)

{:ok, whisper} = Bumblebee.load_model({:hf, "openai/whisper-tiny"})
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"})
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"})
Expand All @@ -89,7 +86,12 @@ defmodule Xav.ReaderTest do
)

batch =
read_frames(reader)
Xav.Reader.stream!(path,
read: :audio,
out_channels: 1,
out_format: :f32,
out_sample_rate: 16_000
)
|> Enum.map(&Xav.Frame.to_nx(&1))
|> Nx.Batch.concatenate()

Expand All @@ -98,14 +100,4 @@ defmodule Xav.ReaderTest do

assert [%{text: ^expected_output}] = chunks
end

defp read_frames(reader, acc \\ []) do
case Xav.Reader.next_frame(reader) do
{:ok, frame} ->
read_frames(reader, [frame | acc])

{:error, :eof} ->
Enum.reverse(acc)
end
end
end

0 comments on commit 642e04f

Please sign in to comment.