Improve some unicode functions

This commit is contained in:
Justine Tunney
2021-05-05 07:25:39 -07:00
parent b9187061a7
commit 1b5a5719c3
33 changed files with 8366 additions and 197 deletions

191
tool/decode/scrubdox.c Normal file
View File

@@ -0,0 +1,191 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2021 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚──────────────────────────────────────────────────────────────────────────────┘
THIS PROGRAM TURNS TEXT LIKE THIS
+------------------------------------------------------------------------+
| Button | Name | Go to | From 1.2.3 |
| | | | go to |
|------------+-------------+--------------------------------+------------|
| [ < ] | Back | previous section in reading | 1.2.2 |
| | | order | |
|------------+-------------+--------------------------------+------------|
| [ > ] | Forward | next section in reading order | 1.2.4 |
|------------+-------------+--------------------------------+------------|
| [ << ] | FastBack | previous or up-and-previous | 1.1 |
| | | section | |
|------------+-------------+--------------------------------+------------|
| [ Up ] | Up | up section | 1.2 |
|------------+-------------+--------------------------------+------------|
| [ >> ] | FastForward | next or up-and-next section | 1.3 |
|------------+-------------+--------------------------------+------------|
| [Top] | Top | cover (top) of document | |
|------------+-------------+--------------------------------+------------|
| [Contents] | Contents | table of contents | |
|------------+-------------+--------------------------------+------------|
| [Index] | Index | concept index | |
|------------+-------------+--------------------------------+------------|
| [ ? ] | About | this page | |
+------------------------------------------------------------------------+
INTO THIS
┌────────────┬─────────────┬────────────────────────────────┬────────────┐
│ Button │ Name │ Go to │ From 1.2.3 │
│ │ │ │ go to │
├────────────┼─────────────┼────────────────────────────────┼────────────┤
│ [ < ] │ Back │ previous section in reading │ 1.2.2 │
│ │ │ order │ │
├────────────┼─────────────┼────────────────────────────────┼────────────┤
│ [ > ] │ Forward │ next section in reading order │ 1.2.4 │
├────────────┼─────────────┼────────────────────────────────┼────────────┤
│ [ << ] │ FastBack │ previous or up─and─previous │ 1.1 │
│ │ │ section │ │
├────────────┼─────────────┼────────────────────────────────┼────────────┤
│ [ Up ] │ Up │ up section │ 1.2 │
├────────────┼─────────────┼────────────────────────────────┼────────────┤
│ [ >> ] │ FastForward │ next or up─and─next section │ 1.3 │
├────────────┼─────────────┼────────────────────────────────┼────────────┤
│ [Top] │ Top │ cover (top) of document │ │
├────────────┼─────────────┼────────────────────────────────┼────────────┤
│ [Contents] │ Contents │ table of contents │ │
├────────────┼─────────────┼────────────────────────────────┼────────────┤
│ [Index] │ Index │ concept index │ │
├────────────┼─────────────┼────────────────────────────────┼────────────┤
│ [ ? ] │ About │ this page │ │
└────────────┴─────────────┴────────────────────────────────┴────────────┘ */
#include "libc/log/log.h"
#include "libc/macros.internal.h"
#include "libc/runtime/gc.internal.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/x/x.h"
#define IsSpace(C) ((C) == ' ')
#define IsPipe(C) ((C) == '|' || (C) == u'│')
#define IsPlus(C) ((C) == '+' || (C) == u'┼')
#define IsHyphen(C) ((C) == '-' || (C) == u'─')
#define IsTick(C) ((C) == '`' || (C) == u'└')
int n;
int yn;
int xn;
FILE *f;
bool *V;
char **T;
char16_t **L;
static void DoIt(int y, int x) {
if (V[y * (xn + 1) + x]) return;
V[y * (xn + 1) + x] = 1;
if (IsPipe(L[y - 1][x]) && IsHyphen(L[y][x - 1]) && IsPlus(L[y][x]) &&
IsHyphen(L[y][x + 1]) && IsPipe(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsSpace(L[y - 1][x]) && IsHyphen(L[y][x - 1]) &&
IsHyphen(L[y][x]) && IsHyphen(L[y][x + 1]) &&
IsPipe(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsPipe(L[y - 1][x]) && IsHyphen(L[y][x - 1]) &&
IsHyphen(L[y][x]) && IsHyphen(L[y][x + 1]) &&
IsSpace(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsPipe(L[y - 1][x]) && IsSpace(L[y][x - 1]) && IsPipe(L[y][x]) &&
IsHyphen(L[y][x + 1]) && IsPipe(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsPipe(L[y - 1][x]) && IsHyphen(L[y][x - 1]) && IsPipe(L[y][x]) &&
IsSpace(L[y][x + 1]) && IsPipe(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsSpace(L[y - 1][x]) && IsSpace(L[y][x - 1]) && IsPlus(L[y][x]) &&
IsHyphen(L[y][x + 1]) && IsPipe(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsPipe(L[y - 1][x]) && IsHyphen(L[y][x - 1]) && IsPlus(L[y][x]) &&
IsSpace(L[y][x + 1]) && IsSpace(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsSpace(L[y - 1][x]) && IsHyphen(L[y][x - 1]) && IsPlus(L[y][x]) &&
IsSpace(L[y][x + 1]) && IsPipe(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsPipe(L[y - 1][x]) && IsSpace(L[y][x - 1]) && IsPlus(L[y][x]) &&
IsHyphen(L[y][x + 1]) && IsSpace(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsTick(L[y][x]) && IsPipe(L[y - 1][x]) && IsHyphen(L[y][x + 1]) &&
IsSpace(L[y + 1][x]) && IsSpace(L[y][x - 1])) {
L[y][x] = u'';
} else if (L[y][x] == '-') {
L[y][x] = u'';
} else if (L[y][x] == '|') {
L[y][x] = u'';
} else {
return;
}
DoIt(y - 1, x + 0);
DoIt(y + 1, x + 0);
DoIt(y + 0, x - 1);
DoIt(y + 0, x + 1);
}
int main(int argc, char *argv[]) {
char *s;
int y, x;
showcrashreports();
f = stdin;
while ((s = chomp(xgetline(f)))) {
n = strwidth(s, 0);
xn = MAX(xn, n);
T = xrealloc(T, ++yn * sizeof(*T));
T[yn - 1] = s;
}
xn += 1000;
L = xmalloc((yn + 2) * sizeof(*L));
L[0] = utf8toutf16(gc(xasprintf(" %*s ", xn, " ")), -1, 0);
for (y = 0; y < yn; ++y) {
s = xasprintf(" %s%*s ", T[y], xn - n, " ");
L[y + 1] = utf8toutf16(s, -1, 0);
free(T[y]);
free(s);
}
L[yn + 2 - 1] = utf8toutf16(gc(xasprintf(" %*s ", xn, " ")), -1, 0);
free(T);
V = xcalloc((yn + 1) * (xn + 1), 1);
for (y = 1; y <= yn; ++y) {
for (x = 1; x <= xn; ++x) {
if (IsPipe(L[y - 1][x]) && IsHyphen(L[y][x - 1]) && IsPlus(L[y][x]) &&
IsHyphen(L[y][x + 1]) && IsPipe(L[y + 1][x])) {
DoIt(y, x);
}
if (IsTick(L[y][x]) && IsPipe(L[y - 1][x]) && IsHyphen(L[y][x + 1]) &&
IsSpace(L[y + 1][x]) && IsSpace(L[y][x - 1])) {
DoIt(y, x);
}
}
}
for (y = 1; y + 1 < yn; ++y) {
s = utf16toutf8(L[y], -1, 0);
n = strlen(s);
while (n && s[n - 1] == ' ') s[n - 1] = 0, --n;
puts(s + 1);
free(s);
}
for (y = 0; y < yn; ++y) {
free(L[y]);
}
free(L);
free(V);
return 0;
}

View File

@@ -1,22 +1,30 @@
(defconst cosmo-c-constants-ansi
'("EOF"
"WEOF"
"NDEBUG"
"HUGE_VAL"
"CLK_TCK"))
(defconst cosmo-c-constants-c11
'("__func__"
"__VA_ARGS__"
"__STDC__"
"__STDC_HOSTED__"
"__STDC_VERSION__"
"__TIME__"
"__STDC_ISO_10646__"
"__STDC_MB_MIGHT_NEQ_WC__"
"__STDC_UTF_16__"
"__STDC_UTF_32__"
"__STDC_ANALYZABLE__"
"__STDC_IEC_559_COMPLEX__"
"__STDC_LIB_EXT1__"
"__STDC_NO_ATOMICS__"
"__STDC_NO_COMPLEX__"
"__STDC_NO_THREADS__"
"__STDC_NO_VLA__"
"__STDC_WANT_LIB_EXT1__"))
'("__func__"
"__VA_ARGS__"
"__STDC__"
"__STDC_HOSTED__"
"__STDC_VERSION__"
"__TIME__"
"__STDC_ISO_10646__"
"__STDC_MB_MIGHT_NEQ_WC__"
"__STDC_UTF_16__"
"__STDC_UTF_32__"
"__STDC_ANALYZABLE__"
"__STDC_IEC_559_COMPLEX__"
"__STDC_LIB_EXT1__"
"__STDC_NO_ATOMICS__"
"__STDC_NO_COMPLEX__"
"__STDC_NO_THREADS__"
"__STDC_NO_VLA__"
"__STDC_WANT_LIB_EXT1__"))
(defconst cosmo-c-constants-limits
'("IMAGE_BASE_VIRTUAL"
@@ -24,6 +32,7 @@
"IMAGE_BASE_PHYSICAL"
"CHAR_MAX"
"SCHAR_MAX"
"UCHAR_MAX"
"SHRT_MAX"
"INT_MAX"
"LONG_MAX"
@@ -40,6 +49,7 @@
"INTPTR_MAX"
"PTRDIFF_MAX"
"SCHAR_MIN"
"UCHAR_MIN"
"SHRT_MIN"
"UINT_MIN"
"INT_MIN"
@@ -152,7 +162,8 @@
(defconst cosmo-c-constants-regex
(concat "\\_<"
(regexp-opt (append cosmo-c-constants-c11
(regexp-opt (append cosmo-c-constants-ansi
cosmo-c-constants-c11
cosmo-c-constants-limits
cosmo-c-constants-math))
"\\_>"))

View File

@@ -26,6 +26,7 @@
#include "libc/calls/struct/rusage.h"
#include "libc/calls/struct/sigaction.h"
#include "libc/calls/struct/stat.h"
#include "libc/dos.h"
#include "libc/errno.h"
#include "libc/fmt/conv.h"
#include "libc/fmt/itoa.h"
@@ -83,11 +84,6 @@
#define HASH_LOAD_FACTOR /* 1. / */ 4
#define DEFAULT_PORT 8080
#define DOS_DATE(YEAR, MONTH_IDX1, DAY_IDX1) \
(((YEAR)-1980) << 9 | (MONTH_IDX1) << 5 | (DAY_IDX1))
#define DOS_TIME(HOUR, MINUTE, SECOND) \
((HOUR) << 11 | (MINUTE) << 5 | (SECOND) >> 1)
#define read(F, P, N) readv(F, &(struct iovec){P, N}, 1)
#define LockInc(P) asm volatile("lock incq\t%0" : "=m"(*(P)))
#define AppendCrlf(P) mempcpy(P, "\r\n", 2)
@@ -132,11 +128,15 @@ static const struct ContentTypeExtension {
{"atom", "application/atom+xml"}, //
{"avi", "video/x-msvideo"}, //
{"avif", "image/avif"}, //
{"azw", "application/vnd.amazon.ebook"}, //
{"bmp", "image/bmp"}, //
{"bz2", "application/x-bzip2"}, //
{"c", "text/plain"}, //
{"cc", "text/plain"}, //
{"css", "text/css"}, //
{"csv", "text/csv"}, //
{"doc", "application/msword"}, //
{"epub", "application/epub+zip"}, //
{"gif", "image/gif"}, //
{"gz", "application/gzip"}, //
{"h", "text/plain"}, //
@@ -147,11 +147,13 @@ static const struct ContentTypeExtension {
{"jar", "application/java-archive"}, //
{"jpeg", "image/jpeg"}, //
{"jpg", "image/jpeg"}, //
{"js", "application/javascript"}, //
{"js", "text/javascript"}, //
{"json", "application/json"}, //
{"m4a", "audio/mpeg"}, //
{"markdown", "text/plain"}, //
{"md", "text/plain"}, //
{"mid", "audio/midi"}, //
{"midi", "audio/midi"}, //
{"mp2", "audio/mpeg"}, //
{"mp3", "audio/mpeg"}, //
{"mp4", "video/mp4"}, //
@@ -192,9 +194,11 @@ static const struct ContentTypeExtension {
{"xml", "application/xml"}, //
{"xsl", "application/xslt+xml"}, //
{"xslt", "application/xslt+xml"}, //
{"xz", "application/x-xz"}, //
{"z", "application/zlib"}, //
{"zip", "application/zip"}, //
{"zst", "application/zstd"}, //
{"zst", "application/zstd"}, //
};
static const char kRegCode[][8] = {
@@ -638,7 +642,7 @@ static void UseOutput(void) {
}
static void DropOutput(void) {
free(outbuf.p);
FreeLater(outbuf.p);
outbuf.p = 0;
outbuf.n = 0;
outbuf.c = 0;
@@ -806,13 +810,16 @@ static char *DescribeServer(void) {
}
static void ProgramBrand(const char *s) {
char *p;
free(brand);
free(serverheader);
brand = strdup(s);
if (!(serverheader = EncodeHttpHeaderValue(brand, -1, 0))) {
fprintf(stderr, "error: brand isn't latin1 encodable: %`'s", brand);
if (!(p = EncodeHttpHeaderValue(s, -1, 0))) {
fprintf(stderr, "error: brand isn't latin1 encodable: %`'s", s);
exit(1);
}
brand = strdup(s);
serverheader = xasprintf("Server: %s\r\n", p);
free(p);
}
static void ProgramLinger(long sec) {
@@ -1194,7 +1201,7 @@ static void ReapZombies(void) {
} while (!terminated);
}
static inline ssize_t WritevAll(int fd, struct iovec *iov, int iovlen) {
static ssize_t WritevAll(int fd, struct iovec *iov, int iovlen) {
ssize_t rc;
size_t wrote;
do {
@@ -1286,14 +1293,6 @@ forceinline int GetMode(struct Asset *a) {
return a->file ? a->file->st.st_mode : GetZipCfileMode(zmap + a->cf);
}
forceinline bool IsNotModified(struct Asset *a) {
if (msg.version < 10) return false;
if (!HasHeader(kHttpIfModifiedSince)) return false;
return a->lastmodified >=
ParseHttpDateTime(HeaderData(kHttpIfModifiedSince),
HeaderLength(kHttpIfModifiedSince));
}
static char *FormatUnixHttpDateTime(char *s, int64_t t) {
struct tm tm;
gmtime_r(&t, &tm);
@@ -1305,7 +1304,7 @@ forceinline bool IsCompressionMethodSupported(int method) {
return method == kZipCompressionNone || method == kZipCompressionDeflate;
}
static unsigned Hash(const void *p, unsigned long n) {
static inline unsigned Hash(const void *p, unsigned long n) {
unsigned h, i;
for (h = i = 0; i < n; i++) {
h += ((unsigned char *)p)[i];
@@ -1468,12 +1467,6 @@ static char *AppendCache(char *p, int64_t seconds) {
return AppendExpires(p, (int64_t)shared->nowish + seconds);
}
static inline char *AppendServer(char *p, const char *s) {
p = stpcpy(p, "Server: ");
p = stpcpy(p, s);
return AppendCrlf(p);
}
static inline char *AppendContentLength(char *p, size_t n) {
p = stpcpy(p, "Content-Length: ");
p += uint64toarray_radix10(n, p);
@@ -3059,8 +3052,8 @@ static int LuaIsAcceptablePort(lua_State *L) {
return LuaIsValid(L, IsAcceptablePort);
}
static int LuaCoderImpl(lua_State *L,
char *Coder(const char *, size_t, size_t *)) {
static noinline int LuaCoderImpl(lua_State *L,
char *Coder(const char *, size_t, size_t *)) {
void *p;
size_t n;
p = luaL_checklstring(L, 1, &n);
@@ -3070,7 +3063,8 @@ static int LuaCoderImpl(lua_State *L,
return 1;
}
static int LuaCoder(lua_State *L, char *Coder(const char *, size_t, size_t *)) {
static noinline int LuaCoder(lua_State *L,
char *Coder(const char *, size_t, size_t *)) {
return LuaCoderImpl(L, Coder);
}
@@ -3220,7 +3214,7 @@ static int LuaCrc32c(lua_State *L) {
return LuaHash(L, crc32c);
}
static int LuaProgramInt(lua_State *L, void Program(long)) {
static noinline int LuaProgramInt(lua_State *L, void Program(long)) {
Program(luaL_checkinteger(L, 1));
return 0;
}
@@ -4208,7 +4202,7 @@ static inline int CompareInts(const uint64_t x, uint64_t y) {
return x > y ? 1 : x < y ? -1 : 0;
}
static inline const char *BisectContentType(uint64_t ext) {
static const char *BisectContentType(uint64_t ext) {
int c, m, l, r;
l = 0;
r = ARRAYLEN(kContentTypeExtension) - 1;
@@ -4251,6 +4245,14 @@ static const char *GetContentType(struct Asset *a, const char *path, size_t n) {
a->istext ? "text/plain" : "application/octet-stream"));
}
static bool IsNotModified(struct Asset *a) {
if (msg.version < 10) return false;
if (!HasHeader(kHttpIfModifiedSince)) return false;
return a->lastmodified >=
ParseHttpDateTime(HeaderData(kHttpIfModifiedSince),
HeaderLength(kHttpIfModifiedSince));
}
static char *ServeAsset(struct Asset *a, const char *path, size_t pathlen) {
char *p;
uint32_t crc;
@@ -4392,7 +4394,7 @@ static bool HandleMessage(void) {
}
if (msg.version >= 10) {
p = AppendCrlf(stpcpy(stpcpy(p, "Date: "), shared->currentdate));
if (!branded) p = AppendServer(p, serverheader);
if (!branded) p = stpcpy(p, serverheader);
if (extrahdrs) p = stpcpy(p, extrahdrs);
if (connectionclose) {
p = stpcpy(p, "Connection: close\r\n");

View File

@@ -835,6 +835,26 @@ static void OnMouse(char *p) {
}
}
static void Rando1(void) {
long i, n;
n = (byn * bxn) >> 6;
for (i = 0; i < n; ++i) {
board[i] = rand64();
}
}
static void Rando2(void) {
long i, n;
n = (byn * bxn) >> 6;
for (i = 0; i < n; ++i) {
board[i] = rand();
board[i] <<= 31;
board[i] |= rand();
board[i] <<= 2;
board[i] |= rand() & 0b11;
}
}
static void ReadKeyboard(void) {
char buf[32], *p = buf;
memset(buf, 0, sizeof(buf));
@@ -861,6 +881,12 @@ static void ReadKeyboard(void) {
case CTRL('V'):
OnPageDown();
break;
case CTRL('R'):
Rando1();
break;
case CTRL('G'):
Rando2();
break;
case 'M':
if (mousemode) {
DisableMouse();