-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathutf8.c
109 lines (97 loc) · 2.58 KB
/
utf8.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
/*
* Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2018, 2023 Stefan Sperling <stsp@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <langinfo.h>
#include "externs.h"
/* Create newly allocated wide-character string equivalent to a byte string. */
int
mbs2ws(wchar_t **ws, size_t *wlen, const char *s)
{
char *vis = NULL;
int err = 0;
*ws = NULL;
*wlen = mbstowcs(NULL, s, 0);
if (*wlen == (size_t)-1) {
int vislen;
if (errno != EILSEQ)
return -1;
/* byte string invalid in current encoding; try to "fix" it */
err = mbsavis(&vis, &vislen, s);
if (err)
return err;
*wlen = mbstowcs(NULL, vis, 0);
if (*wlen == (size_t)-1) {
free(vis);
return -1; /* give up */
}
}
*ws = calloc(*wlen + 1, sizeof(**ws));
if (*ws == NULL)
return -1;
if (mbstowcs(*ws, vis ? vis : s, *wlen) != *wlen)
err = -1;
free(vis);
if (err) {
free(*ws);
*ws = NULL;
*wlen = 0;
}
return err;
}
int
mbsavis(char** outp, int *widthp, const char *mbs)
{
const char *src; /* Iterate mbs. */
char *dst; /* Iterate *outp. */
wchar_t wc;
int total_width; /* Display width of the whole string. */
int width; /* Display width of a single Unicode char. */
int len; /* Length in bytes of UTF-8 encoded string. */
len = strlen(mbs);
if ((*outp = malloc(len + 1)) == NULL)
return -1;
if (MB_CUR_MAX == 1) {
memcpy(*outp, mbs, len + 1);
*widthp = len;
return 0;
}
src = mbs;
dst = *outp;
total_width = 0;
while (*src != '\0') {
if ((len = mbtowc(&wc, src, MB_CUR_MAX)) == -1) {
total_width++;
*dst++ = '?';
src++;
} else if ((width = wcwidth(wc)) == -1) {
total_width++;
*dst++ = '?';
src += len;
} else {
total_width += width;
while (len-- > 0)
*dst++ = *src++;
}
}
*dst = '\0';
*widthp = total_width;
return 0;
}