unplugged-vendor/external/toybox/toys/pending/unicode.c

66 lines
1.6 KiB
C
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* unicode.c - convert between Unicode and UTF-8
*
* Copyright 2020 The Android Open Source Project.
*
* Loosely based on the Plan9/Inferno unicode(1).
USE_UNICODE(NEWTOY(unicode, "<1", TOYFLAG_USR|TOYFLAG_BIN))
config UNICODE
bool "unicode"
default n
help
usage: unicode [[min]-max]
Convert between Unicode code points and UTF-8, in both directions.
*/
#define FOR_unicode
#include "toys.h"
static void codepoint(unsigned wc) {
char *low="NULSOHSTXETXEOTENQACKBELBS HT LF VT FF CR SO SI DLEDC1DC2DC3DC4"
"NAKSYNETBCANEM SUBESCFS GS RS US ";
unsigned n, i;
printf("U+%04X : ", wc);
if (wc < ' ') printf("%.3s", low+(wc*3));
else if (wc == 0x7f) printf("DEL");
else {
toybuf[n = wctoutf8(toybuf, wc)] = 0;
printf("%s%s", toybuf, n>1 ? " :":"");
if (n>1) for (i = 0; i < n; i++) printf(" %#02x", toybuf[i]);
}
xputc('\n');
}
void unicode_main(void)
{
unsigned from, to;
char next, **args;
for (args = toys.optargs; *args; args++) {
// unicode 660-666 => table of `U+0600 : ٠ : 0xd9 0xa0` etc.
if (sscanf(*args, "%x-%x%c", &from, &to, &next) == 2) {
while (from <= to) codepoint(from++);
// unicode 666 => just `U+0666 : ٦ : 0xd9 0xa6`.
} else if (sscanf(*args, "%x%c", &from, &next) == 1) {
codepoint(from);
// unicode hello => table showing every character in the string.
} else {
char *s = *args;
size_t l = strlen(s);
wchar_t wc;
int n;
while ((n = utf8towc(&wc, s, l)) > 0) {
codepoint(wc);
s += n;
l -= n;
}
}
}
}