From 8c72785dd8e9e6f803bfcf9b86c6fa895616d034 Mon Sep 17 00:00:00 2001 From: mashizora <30516315+mashizora@users.noreply.github.com> Date: Mon, 13 May 2024 16:55:32 +0800 Subject: [PATCH] main : fix double quote escaping in csv output (#2090) --- examples/main/main.cpp | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 15d8c8a83b6..6a3db73d87a 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -471,6 +471,38 @@ char *escape_double_quotes_and_backslashes(const char *str) { return escaped; } +// double quote should be escaped by another double quote. (rfc4180) +char *escape_double_quotes_in_csv(const char *str) { + if (str == NULL) { + return NULL; + } + + size_t escaped_length = strlen(str) + 1; + + for (size_t i = 0; str[i] != '\0'; i++) { + if (str[i] == '"') { + escaped_length++; + } + } + + char *escaped = (char *)calloc(escaped_length, 1); // pre-zeroed + if (escaped == NULL) { + return NULL; + } + + size_t pos = 0; + for (size_t i = 0; str[i] != '\0'; i++) { + if (str[i] == '"') { + escaped[pos++] = '"'; + } + escaped[pos++] = str[i]; + } + + // no need to set zero due to calloc() being used prior + + return escaped; +} + bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector> pcmf32s) { std::ofstream fout(fname); if (!fout.is_open()) { @@ -492,7 +524,7 @@ bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_ const char * text = whisper_full_get_segment_text(ctx, i); const int64_t t0 = whisper_full_get_segment_t0(ctx, i); const int64_t t1 = whisper_full_get_segment_t1(ctx, i); - char * text_escaped = escape_double_quotes_and_backslashes(text); + char * text_escaped = escape_double_quotes_in_csv(text); //need to multiply times returned from whisper_full_get_segment_t{0,1}() by 10 to get milliseconds. fout << 10 * t0 << "," << 10 * t1 << ",";