Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump version to 1.1.0 and update benchmarks #1161

Merged
merged 5 commits into from
Nov 21, 2024

Conversation

MahmoudAshraf97
Copy link
Collaborator

No description provided.

@MahmoudAshraf97
Copy link
Collaborator Author

OpenAI Whisper Inference

import torch

# torch.set_num_threads(8)
from whisper import load_model, transcribe, load_audio

audio = load_audio("benchmark/benchmark.m4a")
model = load_model("large-v2, device="cpu")


use_cuda = True

if use_cuda:
    #This is needed to ensure FP16 inference
    state_dict = model.state_dict()
    for i, (parameter_name, parameter) in enumerate(model.named_parameters()):
        if "ln" not in parameter_name:
            state_dict[parameter_name] = state_dict[parameter_name].half()
    model.load_state_dict(state_dict, assign=True)
    model = model.cuda()

result = transcribe(model, audio, beam_size=5, best_of=5, verbose=False)

@MahmoudAshraf97
Copy link
Collaborator Author

MahmoudAshraf97 commented Nov 21, 2024

Measure CPU memory for whisper.cpp

#!/bin/bash

# Command to run
COMMAND="./main -m models/ggml-large-v2.bin -l auto -fa ../faster-whisper/benchmark/output.wav"
# Run the command and measure memory consumption
OUTPUT=$(/usr/bin/time -v $COMMAND 2>&1)

# Extract the peak memory usage from the output
PEAK_MEMORY=$(echo "$OUTPUT" | grep "Maximum resident set size" | awk '{print $6}')

# Convert to MB for readability
PEAK_MEMORY_MB=$(bc <<< "scale=2; $PEAK_MEMORY / 1024")

# Print the result
echo "Peak memory consumption: $PEAK_MEMORY_MB MB"

GPU memory

import time
import pynvml

def measure_gpu_memory(command):
    # Initialize NVML
    pynvml.nvmlInit()
    device_count = pynvml.nvmlDeviceGetCount()
    peak_memory = 0

    # Get initial memory usage for all GPUs
    initial_memory = 0

    handle = pynvml.nvmlDeviceGetHandleByIndex(0)
    mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
    initial_memory += mem_info.used

    # Start the process
    import subprocess
    process = subprocess.Popen(command, shell=True)

    try:
        while process.poll() is None:  # While the process is running
            total_memory = 0
            handle = pynvml.nvmlDeviceGetHandleByIndex(0)
            mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
            total_memory += mem_info.used
            peak_memory = max(peak_memory, total_memory)
            time.sleep(0.5)
    finally:
        pynvml.nvmlShutdown()

    # Calculate memory usage difference (peak - initial)
    memory_difference = (peak_memory - initial_memory) / 1024 / 1024  # Convert to MB
    return memory_difference

if __name__ == "__main__":
    command = "./main -m models/ggml-large-v2.bin -l auto -fa ../faster-whisper/benchmark/output.wav"
    additional_memory = measure_gpu_memory(command)
    print(f"Additional GPU memory used: {additional_memory:.2f} MB")

@MahmoudAshraf97 MahmoudAshraf97 merged commit 97a4785 into SYSTRAN:master Nov 21, 2024
3 checks passed
Equipo45 pushed a commit to Equipo45/faster-whisper that referenced this pull request Dec 4, 2024
* update version

* Update CPU benchmarks

* Updated GPU benchmarks

* ..

* more gpu benchmarks
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant