diff --git a/build/bootstrap/net/http/uricspn.c.gz b/build/bootstrap/net/http/uricspn.c.gz deleted file mode 100644 index 75e0fb90..00000000 Binary files a/build/bootstrap/net/http/uricspn.c.gz and /dev/null differ diff --git a/build/bootstrap/net/http/uriparse.c.gz b/build/bootstrap/net/http/uriparse.c.gz deleted file mode 100644 index 9234d6b6..00000000 Binary files a/build/bootstrap/net/http/uriparse.c.gz and /dev/null differ diff --git a/build/definitions.mk b/build/definitions.mk index b8e87b44..5430b644 100644 --- a/build/definitions.mk +++ b/build/definitions.mk @@ -53,7 +53,6 @@ SILENT ?= 1 ZFLAGS ?= XARGS ?= xargs -P4 -rs8000 NICE ?= build/actuallynice -RAGEL ?= ragel DOT ?= dot GZ ?= gzip CLANG = clang-10 diff --git a/build/rules.mk b/build/rules.mk index bbaa6eb8..9e91caa6 100644 --- a/build/rules.mk +++ b/build/rules.mk @@ -86,16 +86,3 @@ o/$(MODE)/%-clang.asm: %.f; @ACTION=OBJECTIFY.f build/compile $(OBJECTIFY.f) -S o/$(MODE)/%-gcc.asm: %.F; @ACTION=OBJECTIFY.F build/compile $(OBJECTIFY.F) -S -g0 $(OUTPUT_OPTION) $< o/$(MODE)/%-clang.asm: CC = $(CLANG) o/$(MODE)/%-clang.asm: %.F; @ACTION=OBJECTIFY.F build/compile $(OBJECTIFY.F) -S -g0 $(OUTPUT_OPTION) $< || echo / need $(CLANG) >$@ - -# ragel state machine compiler -.PRECIOUS: build/bootstrap/%.c.gz -o/$(MODE)/%.c: %.rl build/bootstrap/%.c.gz - @mkdir -p $(dir $@) - @$(GZ) $(ZFLAGS) -dc $(<:%.rl=build/bootstrap/%.c.gz) >$@ - -@ACTION=RAGEL build/do $(RAGEL) $(RAGELFLAGS) $(OUTPUT_OPTION) $< -build/bootstrap/%.c.gz: %.rl - @mkdir -p $(dir $@) - @$(RAGEL) -o $(@:%.gz=%) $< - @$(GZ) $(ZFLAGS) -f $(@:%.gz=%) -%.svgz: %.rl - @$(RAGEL) -V -p $< | $(DOT) -Tsvg | $(GZ) $(ZFLAGS) >$@ diff --git a/net/http/http.mk b/net/http/http.mk index 570b466a..786faca4 100644 --- a/net/http/http.mk +++ b/net/http/http.mk @@ -58,24 +58,12 @@ o/$(MODE)/net/http/formathttpdatetime.o: \ OVERRIDE_CFLAGS += \ -O3 -# ifeq (,$(MODE)) -# $(NET_HTTP_A_OBJS): \ -# OVERRIDE_CFLAGS += \ -# -fsanitize=address -# endif - NET_HTTP_LIBS = $(foreach x,$(NET_HTTP_ARTIFACTS),$($(x))) NET_HTTP_SRCS = $(foreach x,$(NET_HTTP_ARTIFACTS),$($(x)_SRCS)) NET_HTTP_HDRS = $(foreach x,$(NET_HTTP_ARTIFACTS),$($(x)_HDRS)) NET_HTTP_CHECKS = $(foreach x,$(NET_HTTP_ARTIFACTS),$($(x)_CHECKS)) NET_HTTP_OBJS = $(foreach x,$(NET_HTTP_ARTIFACTS),$($(x)_OBJS)) -.PRECIOUS: \ - $(NET_HTTP_A_SRCS_R:%.rl=o/$(MODE)/%.c) \ - o/$(MODE)/net/http/uricspn.s \ - o/$(MODE)/net/http/uricspn.i \ - o/$(MODE)/net/http/uricspn.c - .PHONY: o/$(MODE)/net/http o/$(MODE)/net/http: \ $(NET_HTTP_CHECKS) \ diff --git a/net/http/todo.S b/net/http/todo.S deleted file mode 100644 index 7c722e77..00000000 --- a/net/http/todo.S +++ /dev/null @@ -1,50 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ - -/ TODO(jart): FIX ME - - .globl "o//net/http/uricspn.c" - .equ "o//net/http/uricspn.c",. - .globl "o//net/http/uriparse.c" - .equ "o//net/http/uriparse.c",. - - .globl "o/rel/net/http/uricspn.c" - .equ "o/rel/net/http/uricspn.c",. - .globl "o/rel/net/http/uriparse.c" - .equ "o/rel/net/http/uriparse.c",. - - .globl "o/tiny/net/http/uricspn.c" - .equ "o/tiny/net/http/uricspn.c",. - .globl "o/tiny/net/http/uriparse.c" - .equ "o/tiny/net/http/uriparse.c",. - - .globl "o/dbg/net/http/uricspn.c" - .equ "o/dbg/net/http/uricspn.c",. - .globl "o/dbg/net/http/uriparse.c" - .equ "o/dbg/net/http/uriparse.c",. - - .globl "o/opt/net/http/uricspn.c" - .equ "o/opt/net/http/uricspn.c",. - .globl "o/opt/net/http/uriparse.c" - .equ "o/opt/net/http/uriparse.c",. - - .globl "o/ansi/net/http/uricspn.c" - .equ "o/ansi/net/http/uricspn.c",. - .globl "o/ansi/net/http/uriparse.c" - .equ "o/ansi/net/http/uriparse.c",. diff --git a/net/http/uricspn.c b/net/http/uricspn.c new file mode 100644 index 00000000..502b5e89 --- /dev/null +++ b/net/http/uricspn.c @@ -0,0 +1,185 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/assert.h" +#include "libc/nexgen32e/x86feature.h" +#include "libc/sysv/errfuns.h" +#include "net/http/uri.h" + +/* + * GENERATED BY + * + * ragel -o net/http/uricspn.c net/http/uricspn.rl + * + * TODO(jart): Rewrite in normal C. + */ + +#define static + +/* clang-format off */ + +#line 29 "net/http/uricspn.rl" + +#line 34 "build/bootstrap/net/http/uricspn.c" +static const char _uricspn_key_offsets[] = { + 0, 0 +}; + +static const char _uricspn_trans_keys[] = { + 33, 61, 95, 126, 36, 59, 63, 90, + 97, 122, 0 +}; + +static const char _uricspn_single_lengths[] = { + 0, 4 +}; + +static const char _uricspn_range_lengths[] = { + 0, 3 +}; + +static const char _uricspn_index_offsets[] = { + 0, 0 +}; + +static const char _uricspn_trans_targs[] = { + 1, 1, 1, 1, 1, 1, 1, 0, + 0 +}; + +static const int uricspn_start = 1; +static const int uricspn_first_final = 1; +static const int uricspn_error = 0; + +static const int uricspn_en_machina = 1; + + +#line 30 "net/http/uricspn.rl" +/* clang-format on */ + +int uricspn(const char *data, size_t size) { + int uricspn$avx(const char *, size_t) hidden; + const char *p, *pe; + int cs; + + assert(data || !size); + assert(size <= 0x7ffff000); + assert(size <= 0x7ffff000); + + if (X86_HAVE(AVX)) { + return uricspn$avx(data, size); + } + + p = data; + pe = data + size; + + /* clang-format off */ + + +#line 56 "net/http/uricspn.rl" + + + +#line 94 "build/bootstrap/net/http/uricspn.c" + { + cs = uricspn_start; + } + +#line 59 "net/http/uricspn.rl" + cs = uricspn_en_machina; + +#line 102 "build/bootstrap/net/http/uricspn.c" + { + int _klen; + unsigned int _trans; + const char *_keys; + + if ( p == pe ) + goto _test_eof; + if ( cs == 0 ) + goto _out; +_resume: + _keys = _uricspn_trans_keys + _uricspn_key_offsets[cs]; + _trans = _uricspn_index_offsets[cs]; + + _klen = _uricspn_single_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + _klen - 1; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + ((_upper-_lower) >> 1); + if ( (*p) < *_mid ) + _upper = _mid - 1; + else if ( (*p) > *_mid ) + _lower = _mid + 1; + else { + _trans += (unsigned int)(_mid - _keys); + goto _match; + } + } + _keys += _klen; + _trans += _klen; + } + + _klen = _uricspn_range_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + (_klen<<1) - 2; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( (*p) < _mid[0] ) + _upper = _mid - 2; + else if ( (*p) > _mid[1] ) + _lower = _mid + 2; + else { + _trans += (unsigned int)((_mid - _keys)>>1); + goto _match; + } + } + _trans += _klen; + } + +_match: + cs = _uricspn_trans_targs[_trans]; + + if ( cs == 0 ) + goto _out; + if ( ++p != pe ) + goto _resume; + _test_eof: {} + _out: {} + } + +#line 61 "net/http/uricspn.rl" + + /* clang-format on */ + + if (cs >= uricspn_first_final) { + return p - data; + } else { + return einval(); + } +} diff --git a/net/http/uricspn.rl b/net/http/uricspn.rl index c61e462a..b33ec63d 100644 --- a/net/http/uricspn.rl +++ b/net/http/uricspn.rl @@ -21,6 +21,8 @@ #include "libc/sysv/errfuns.h" #include "net/http/uri.h" +/* TODO(jart): Rewrite in C */ + #define static /* clang-format off */ diff --git a/net/http/uriparse.c b/net/http/uriparse.c new file mode 100644 index 00000000..f3ea440a --- /dev/null +++ b/net/http/uriparse.c @@ -0,0 +1,724 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/assert.h" +#include "libc/dce.h" +#include "libc/dns/dns.h" +#include "libc/log/log.h" +#include "libc/str/str.h" +#include "libc/sysv/errfuns.h" +#include "net/http/uri.h" + +/* + * GENERATED BY + * + * ragel -o net/http/uriparse.c net/http/uriparse.rl + * + * TODO(jart): Rewrite in normal C. + */ + +#define static + +/* clang-format off */ + +#line 32 "net/http/uriparse.rl" + +#line 37 "build/bootstrap/net/http/uriparse.c" +static const char _uriparse_actions[] = { + 0, 1, 0, 1, 1, 1, 2, 1, + 3, 1, 4, 1, 5, 1, 6, 1, + 8, 1, 11, 1, 12, 2, 0, 2, + 2, 4, 8, 2, 5, 8, 2, 6, + 9, 2, 6, 10, 2, 7, 9, 2, + 7, 10, 2, 8, 0, 2, 11, 0, + 3, 4, 8, 0, 3, 5, 8, 0, + 3, 6, 9, 0, 3, 7, 9, 0 + +}; + +static const short _uriparse_key_offsets[] = { + 0, 0, 6, 12, 18, 24, 37, 43, + 49, 64, 70, 76, 91, 97, 103, 118, + 124, 130, 145, 151, 157, 169, 188, 202, + 208, 214, 224, 226, 233, 241, 256, 273, + 279, 285, 302, 308, 314, 326, 332, 338, + 357, 371, 377, 383, 393, 395, 410, 416, + 422, 437, 443, 449, 456, 464, 479, 494, + 509, 520, 531, 546, 564, 581, 598, 614, + 625, 630, 634, 653, 671, 689, 707, 727, + 728, 739, 742, 759, 775, 777, 797 +}; + +static const char _uriparse_trans_keys[] = { + 48, 57, 65, 70, 97, 102, 48, 57, + 65, 70, 97, 102, 48, 57, 65, 70, + 97, 102, 48, 57, 65, 70, 97, 102, + 33, 37, 61, 95, 126, 36, 46, 48, + 58, 64, 90, 97, 122, 48, 57, 65, + 70, 97, 102, 48, 57, 65, 70, 97, + 102, 33, 37, 93, 95, 126, 36, 43, + 45, 46, 48, 58, 65, 91, 97, 122, + 48, 57, 65, 70, 97, 102, 48, 57, + 65, 70, 97, 102, 33, 37, 93, 95, + 126, 36, 43, 45, 46, 48, 58, 65, + 91, 97, 122, 48, 57, 65, 70, 97, + 102, 48, 57, 65, 70, 97, 102, 33, + 36, 37, 63, 93, 95, 126, 39, 43, + 45, 58, 65, 91, 97, 122, 48, 57, + 65, 70, 97, 102, 48, 57, 65, 70, + 97, 102, 33, 36, 37, 63, 93, 95, + 126, 39, 43, 45, 58, 65, 91, 97, + 122, 48, 57, 65, 70, 97, 102, 48, + 57, 65, 70, 97, 102, 33, 37, 47, + 61, 95, 126, 36, 58, 64, 90, 97, + 122, 33, 37, 43, 58, 61, 63, 91, + 95, 126, 36, 44, 45, 46, 48, 57, + 65, 90, 97, 122, 33, 37, 61, 64, + 95, 126, 36, 46, 48, 58, 63, 90, + 97, 122, 48, 57, 65, 70, 97, 102, + 48, 57, 65, 70, 97, 102, 43, 91, + 45, 46, 48, 57, 65, 90, 97, 122, + 48, 57, 46, 48, 58, 65, 70, 97, + 102, 46, 93, 48, 58, 65, 70, 97, + 102, 33, 37, 58, 61, 64, 95, 126, + 36, 46, 48, 57, 63, 90, 97, 122, + 33, 37, 38, 44, 47, 61, 64, 91, + 93, 95, 126, 36, 58, 63, 90, 97, + 122, 48, 57, 65, 70, 97, 102, 48, + 57, 65, 70, 97, 102, 33, 37, 38, + 44, 47, 61, 64, 91, 93, 95, 126, + 36, 58, 63, 90, 97, 122, 48, 57, + 65, 70, 97, 102, 48, 57, 65, 70, + 97, 102, 33, 37, 47, 61, 95, 126, + 36, 59, 63, 90, 97, 122, 48, 57, + 65, 70, 97, 102, 48, 57, 65, 70, + 97, 102, 33, 37, 43, 58, 61, 63, + 91, 95, 126, 36, 44, 45, 46, 48, + 57, 65, 90, 97, 122, 33, 37, 61, + 64, 95, 126, 36, 46, 48, 58, 63, + 90, 97, 122, 48, 57, 65, 70, 97, + 102, 48, 57, 65, 70, 97, 102, 43, + 91, 45, 46, 48, 57, 65, 90, 97, + 122, 48, 57, 33, 37, 93, 95, 126, + 36, 43, 45, 46, 48, 58, 65, 91, + 97, 122, 48, 57, 65, 70, 97, 102, + 48, 57, 65, 70, 97, 102, 33, 37, + 93, 95, 126, 36, 43, 45, 46, 48, + 58, 65, 91, 97, 122, 48, 57, 65, + 70, 97, 102, 48, 57, 65, 70, 97, + 102, 46, 48, 58, 65, 70, 97, 102, + 46, 93, 48, 58, 65, 70, 97, 102, + 33, 37, 58, 61, 64, 95, 126, 36, + 46, 48, 57, 63, 90, 97, 122, 33, + 35, 37, 47, 59, 61, 64, 95, 126, + 36, 57, 65, 90, 97, 122, 33, 35, + 37, 47, 59, 61, 63, 95, 126, 36, + 57, 64, 90, 97, 122, 33, 37, 61, + 95, 126, 36, 59, 63, 90, 97, 122, + 33, 37, 61, 95, 126, 36, 59, 63, + 90, 97, 122, 33, 35, 37, 47, 59, + 61, 63, 95, 126, 36, 58, 64, 90, + 97, 122, 33, 35, 37, 47, 59, 61, + 63, 93, 95, 126, 36, 43, 45, 58, + 65, 91, 97, 122, 33, 35, 37, 47, + 59, 63, 93, 95, 126, 36, 43, 45, + 58, 65, 91, 97, 122, 33, 35, 37, + 38, 61, 63, 93, 95, 126, 36, 43, + 45, 58, 65, 91, 97, 122, 33, 35, + 37, 38, 63, 93, 95, 126, 36, 43, + 45, 58, 65, 91, 97, 122, 35, 43, + 47, 58, 63, 45, 57, 65, 90, 97, + 122, 35, 47, 63, 48, 57, 35, 47, + 58, 63, 33, 35, 37, 43, 47, 58, + 61, 63, 64, 95, 126, 36, 44, 45, + 57, 65, 90, 97, 122, 33, 35, 37, + 47, 58, 61, 63, 64, 95, 126, 36, + 46, 48, 57, 65, 90, 97, 122, 33, + 35, 37, 38, 44, 47, 61, 64, 91, + 93, 95, 126, 36, 58, 63, 90, 97, + 122, 33, 35, 37, 38, 44, 47, 61, + 64, 91, 93, 95, 126, 36, 58, 63, + 90, 97, 122, 33, 35, 37, 43, 47, + 58, 59, 61, 63, 64, 95, 126, 36, + 44, 45, 57, 65, 90, 97, 122, 35, + 43, 58, 59, 45, 46, 48, 57, 65, + 90, 97, 122, 59, 48, 57, 33, 37, + 59, 61, 93, 95, 126, 36, 43, 45, + 46, 48, 58, 65, 91, 97, 122, 33, + 37, 59, 93, 95, 126, 36, 43, 45, + 46, 48, 58, 65, 91, 97, 122, 58, + 59, 33, 37, 43, 58, 59, 61, 63, + 64, 95, 126, 36, 44, 45, 46, 48, + 57, 65, 90, 97, 122, 33, 37, 58, + 59, 61, 64, 95, 126, 36, 46, 48, + 57, 63, 90, 97, 122, 0 +}; + +static const char _uriparse_single_lengths[] = { + 0, 0, 0, 0, 0, 5, 0, 0, + 5, 0, 0, 5, 0, 0, 7, 0, + 0, 7, 0, 0, 6, 9, 6, 0, + 0, 2, 0, 1, 2, 7, 11, 0, + 0, 11, 0, 0, 6, 0, 0, 9, + 6, 0, 0, 2, 0, 5, 0, 0, + 5, 0, 0, 1, 2, 7, 9, 9, + 5, 5, 9, 10, 9, 9, 8, 5, + 3, 4, 11, 10, 12, 12, 12, 1, + 3, 1, 7, 6, 2, 10, 8 +}; + +static const char _uriparse_range_lengths[] = { + 0, 3, 3, 3, 3, 4, 3, 3, + 5, 3, 3, 5, 3, 3, 4, 3, + 3, 4, 3, 3, 3, 5, 4, 3, + 3, 4, 1, 3, 3, 4, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 5, + 4, 3, 3, 4, 1, 5, 3, 3, + 5, 3, 3, 3, 3, 4, 3, 3, + 3, 3, 3, 4, 4, 4, 4, 3, + 1, 0, 4, 4, 3, 3, 4, 0, + 4, 1, 5, 5, 0, 5, 4 +}; + +static const short _uriparse_index_offsets[] = { + 0, 0, 4, 8, 12, 16, 26, 30, + 34, 45, 49, 53, 64, 68, 72, 84, + 88, 92, 104, 108, 112, 122, 137, 148, + 152, 156, 163, 165, 170, 176, 188, 203, + 207, 211, 226, 230, 234, 244, 248, 252, + 267, 278, 282, 286, 293, 295, 306, 310, + 314, 325, 329, 333, 338, 344, 356, 369, + 382, 391, 400, 413, 428, 442, 456, 469, + 478, 483, 488, 504, 519, 535, 551, 568, + 570, 578, 581, 594, 606, 609, 625 +}; + +static const unsigned char _uriparse_indicies[] = { + 0, 0, 0, 1, 2, 2, 2, 1, + 3, 3, 3, 1, 4, 4, 4, 1, + 5, 6, 5, 5, 5, 5, 5, 5, + 5, 1, 7, 7, 7, 1, 5, 5, + 5, 1, 8, 9, 8, 8, 8, 8, + 8, 8, 8, 8, 1, 10, 10, 10, + 1, 11, 11, 11, 1, 12, 13, 12, + 12, 12, 12, 12, 12, 12, 12, 1, + 14, 14, 14, 1, 15, 15, 15, 1, + 16, 16, 17, 16, 16, 16, 16, 16, + 16, 16, 16, 1, 18, 18, 18, 1, + 19, 19, 19, 1, 20, 20, 21, 20, + 20, 20, 20, 20, 20, 20, 20, 1, + 22, 22, 22, 1, 23, 23, 23, 1, + 5, 6, 24, 5, 5, 5, 5, 5, + 5, 1, 25, 26, 27, 25, 25, 25, + 28, 25, 25, 25, 27, 27, 27, 27, + 1, 29, 30, 29, 31, 29, 29, 29, + 29, 29, 29, 1, 32, 32, 32, 1, + 29, 29, 29, 1, 33, 28, 33, 33, + 33, 33, 1, 34, 1, 35, 35, 35, + 35, 1, 36, 37, 36, 36, 36, 1, + 29, 30, 29, 29, 31, 29, 29, 29, + 38, 29, 29, 1, 39, 40, 29, 29, + 16, 29, 31, 16, 16, 39, 39, 39, + 39, 39, 1, 41, 41, 41, 1, 42, + 42, 42, 1, 43, 44, 29, 29, 20, + 29, 31, 20, 20, 43, 43, 43, 43, + 43, 1, 45, 45, 45, 1, 46, 46, + 46, 1, 47, 48, 49, 47, 47, 47, + 47, 47, 47, 1, 50, 50, 50, 1, + 47, 47, 47, 1, 51, 52, 53, 51, + 51, 51, 54, 51, 51, 51, 53, 53, + 53, 53, 1, 55, 56, 55, 57, 55, + 55, 55, 55, 55, 55, 1, 58, 58, + 58, 1, 55, 55, 55, 1, 59, 60, + 59, 59, 59, 59, 1, 61, 1, 62, + 63, 62, 62, 62, 62, 62, 62, 62, + 62, 1, 64, 64, 64, 1, 65, 65, + 65, 1, 66, 67, 66, 66, 66, 66, + 66, 66, 66, 66, 1, 68, 68, 68, + 1, 69, 69, 69, 1, 70, 70, 70, + 70, 1, 71, 72, 71, 71, 71, 1, + 55, 56, 55, 55, 57, 55, 55, 55, + 73, 55, 55, 1, 74, 75, 76, 49, + 74, 74, 74, 74, 74, 74, 77, 77, + 1, 4, 78, 79, 80, 4, 4, 81, + 4, 4, 4, 4, 4, 1, 82, 83, + 82, 82, 82, 82, 82, 82, 1, 2, + 84, 2, 2, 2, 2, 2, 2, 1, + 5, 78, 6, 80, 85, 5, 81, 5, + 5, 5, 5, 5, 1, 11, 86, 87, + 88, 89, 90, 91, 11, 11, 11, 11, + 11, 11, 11, 1, 15, 92, 93, 94, + 95, 96, 15, 15, 15, 15, 15, 15, + 15, 1, 19, 97, 98, 99, 100, 19, + 19, 19, 19, 19, 19, 19, 19, 1, + 23, 101, 102, 103, 23, 23, 23, 23, + 23, 23, 23, 23, 1, 104, 105, 106, + 107, 108, 105, 105, 105, 1, 109, 110, + 112, 111, 1, 113, 114, 115, 116, 1, + 29, 104, 30, 117, 106, 118, 29, 119, + 31, 29, 29, 29, 117, 117, 117, 1, + 29, 109, 30, 110, 29, 29, 121, 31, + 29, 29, 29, 120, 29, 29, 1, 42, + 97, 122, 123, 29, 19, 124, 31, 19, + 19, 42, 42, 42, 42, 42, 1, 46, + 101, 125, 126, 29, 23, 29, 31, 23, + 23, 46, 46, 46, 46, 46, 1, 4, + 78, 79, 127, 80, 128, 4, 4, 81, + 4, 4, 4, 4, 127, 127, 127, 1, + 75, 1, 129, 130, 131, 129, 129, 129, + 129, 1, 133, 132, 1, 65, 134, 135, + 136, 65, 65, 65, 65, 65, 65, 65, + 65, 1, 69, 137, 138, 69, 69, 69, + 69, 69, 69, 69, 69, 1, 139, 140, + 1, 55, 56, 141, 142, 131, 55, 55, + 57, 55, 55, 55, 141, 141, 141, 141, + 1, 55, 56, 55, 133, 55, 57, 55, + 55, 55, 143, 55, 55, 1, 0 +}; + +static const char _uriparse_trans_targs[] = { + 2, 0, 57, 4, 55, 58, 6, 7, + 59, 9, 10, 59, 60, 12, 13, 60, + 61, 15, 16, 61, 62, 18, 19, 62, + 21, 22, 23, 66, 27, 22, 23, 25, + 24, 63, 64, 28, 28, 65, 67, 68, + 31, 32, 68, 69, 34, 35, 69, 71, + 37, 20, 38, 40, 41, 77, 51, 40, + 41, 43, 42, 72, 51, 73, 74, 46, + 47, 74, 75, 49, 50, 75, 52, 52, + 76, 78, 55, 56, 3, 70, 56, 3, + 5, 14, 57, 1, 1, 8, 56, 9, + 5, 8, 11, 14, 56, 12, 5, 8, + 14, 56, 15, 14, 17, 56, 18, 14, + 56, 63, 5, 26, 14, 56, 5, 64, + 14, 56, 5, 26, 14, 66, 29, 30, + 67, 30, 31, 30, 33, 34, 30, 70, + 36, 72, 44, 45, 73, 45, 46, 45, + 48, 49, 45, 44, 45, 77, 53, 78 +}; + +static const char _uriparse_trans_actions[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, + 0, 1, 1, 1, 0, 0, 0, 7, + 0, 1, 1, 1, 0, 9, 1, 1, + 1, 0, 0, 1, 1, 0, 0, 19, + 0, 1, 0, 1, 1, 1, 1, 0, + 0, 7, 0, 1, 0, 1, 1, 1, + 0, 0, 1, 1, 0, 0, 1, 0, + 9, 1, 1, 0, 1, 1, 17, 0, + 45, 17, 1, 1, 0, 17, 30, 0, + 56, 30, 13, 30, 36, 0, 60, 36, + 36, 33, 0, 33, 13, 39, 0, 39, + 24, 0, 48, 9, 24, 27, 52, 0, + 27, 15, 42, 0, 15, 0, 9, 24, + 0, 27, 0, 33, 13, 0, 39, 0, + 3, 0, 9, 9, 0, 11, 0, 30, + 13, 0, 36, 0, 0, 0, 9, 0 +}; + +static const char _uriparse_eof_actions[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 17, + 21, 5, 17, 30, 36, 33, 39, 24, + 27, 15, 24, 27, 33, 39, 17, 0, + 9, 11, 30, 36, 0, 9, 11 +}; + +static const int uriparse_start = 54; +static const int uriparse_first_final = 54; +static const int uriparse_error = 0; + +static const int uriparse_en_sip = 39; +static const int uriparse_en_uri = 54; + + +#line 33 "net/http/uriparse.rl" +/* clang-format on */ + +/** + * Parses URI. + * + * This is a general URL parser. It's typically used for HTTP. Support + * for the bonus syntax needed by SIP is provided. The whirlwhind tour + * of the URI rabbit hole is as follows: + * + * /foo.html + * //justine.local/foo.html + * http://justine.local/foo.html + * http://bettersearchengine.local/search.cgi?q=my%20query + * file:///etc/passwd + * gs://bucket/object.txt + * zip:///usr/share/zoneinfo/GMT + * sip:127.0.0.1:5060;lr + * sip:+12125650666@gateway.example + * sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00 + * data:video/mpeg;base64,gigabytesofhex + * + * This parser operates on slices rather than C strings. It performs + * slicing and validation only. Operations like turning "%20"→" " or + * "80"→80 and perfect hashing can be done later, if needed. + * + * The Uri object is owned by the caller; it has a lifecycle like the + * following: + * + * struct Uri uri; + * memset(&uri, 0, sizeof(uri)); + * + * uriparse(&uri, s1, strlen(s1)); + * CHECK_EQ(kUriSchemeHttp, urischeme(uri->scheme, s1)); + * + * uriparse(&uri, s2, strlen(s2)); + * printf("host = %`.*s\n", uri->host.n, s2 + uri->host.i); + * + * Inner arrays may be granted memory by the caller. The uri->𝐴.i field + * is cleared at the mark of this function. No more than uri->𝐴.n items + * can be inserted. If we need more than that, then ENOMEM is returned + * rather than dynamically extending uri->𝐴.p. However, if uri->𝐴.n==0, + * we assume caller doesn't care about uri->𝐴 and its data is discarded. + * + * @param uri is owned by caller + * @param p is caller-owned uri string; won't copy/alias/mutate + * @return 0 on success, or -1 w/ errno + * @see RFC2396: Uniform Resource Identifiers (URI): Generic Syntax + * @see RFC3261: SIP: Session Initiation Protocol + */ +int uriparse(struct Uri *uri, const char *p, size_t size) { + unsigned zero, cs; + struct UriKeyval kv; + const char *pe, *eof, *buf, *mark; + + assert(p || !size); + assert(size <= 0x7ffff000); + +#define ABSENT ((struct UriSlice){zero, zero}) +#define SLICE ((struct UriSlice){mark - buf, p - mark}) + + cs = zero = VEIL("r", 0u); + eof = pe = (mark = buf = p) + size; + + uri->scheme = ABSENT; + uri->opaque = ABSENT; + uri->userinfo = ABSENT; + uri->host = ABSENT; + uri->port = ABSENT; + uri->fragment = ABSENT; + uri->segs.i = zero; + uri->paramsegs.i = zero; + uri->params.i = zero; + uri->queries.i = zero; + + /* clang-format off */ + + +#line 229 "net/http/uriparse.rl" + + + +#line 435 "build/bootstrap/net/http/uriparse.c" + { + cs = uriparse_start; + } + +#line 232 "net/http/uriparse.rl" + cs = uriparse_en_uri; + +#line 443 "build/bootstrap/net/http/uriparse.c" + { + int _klen; + unsigned int _trans; + const char *_acts; + unsigned int _nacts; + const char *_keys; + + if ( p == pe ) + goto _test_eof; + if ( cs == 0 ) + goto _out; +_resume: + _keys = _uriparse_trans_keys + _uriparse_key_offsets[cs]; + _trans = _uriparse_index_offsets[cs]; + + _klen = _uriparse_single_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + _klen - 1; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + ((_upper-_lower) >> 1); + if ( (*p) < *_mid ) + _upper = _mid - 1; + else if ( (*p) > *_mid ) + _lower = _mid + 1; + else { + _trans += (unsigned int)(_mid - _keys); + goto _match; + } + } + _keys += _klen; + _trans += _klen; + } + + _klen = _uriparse_range_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + (_klen<<1) - 2; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( (*p) < _mid[0] ) + _upper = _mid - 2; + else if ( (*p) > _mid[1] ) + _lower = _mid + 2; + else { + _trans += (unsigned int)((_mid - _keys)>>1); + goto _match; + } + } + _trans += _klen; + } + +_match: + _trans = _uriparse_indicies[_trans]; + cs = _uriparse_trans_targs[_trans]; + + if ( _uriparse_trans_actions[_trans] == 0 ) + goto _again; + + _acts = _uriparse_actions + _uriparse_trans_actions[_trans]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + { + switch ( *_acts++ ) + { + case 0: +#line 110 "net/http/uriparse.rl" + { mark = p; } + break; + case 1: +#line 111 "net/http/uriparse.rl" + { uri->scheme = SLICE; } + break; + case 3: +#line 113 "net/http/uriparse.rl" + { uri->userinfo = SLICE; } + break; + case 4: +#line 114 "net/http/uriparse.rl" + { uri->host = SLICE; } + break; + case 5: +#line 115 "net/http/uriparse.rl" + { uri->port = SLICE; } + break; + case 6: +#line 117 "net/http/uriparse.rl" + { + kv.k = SLICE; + kv.v = (struct UriSlice){zero, zero}; + } + break; + case 7: +#line 122 "net/http/uriparse.rl" + { + kv.v = SLICE; + } + break; + case 8: +#line 126 "net/http/uriparse.rl" + { + uri->segs.i = zero; + uri->paramsegs.i = zero; + } + break; + case 9: +#line 131 "net/http/uriparse.rl" + { + if (uri->params.n) { + if (uri->params.i < uri->params.n) { + uri->params.p[uri->params.i++] = kv; + } else { + return enomem(); + } + } + } + break; + case 10: +#line 141 "net/http/uriparse.rl" + { + if (uri->queries.n) { + if (uri->queries.i < uri->queries.n) { + uri->queries.p[uri->queries.i++] = kv; + } else { + return enomem(); + } + } + } + break; + case 11: +#line 151 "net/http/uriparse.rl" + { + if (p > mark && uri->segs.n) { + if (uri->segs.i < uri->segs.n) { + uri->segs.p[uri->segs.i++] = SLICE; + } else { + return enomem(); + } + } + } + break; + case 12: +#line 161 "net/http/uriparse.rl" + { + switch (urischeme(uri->scheme, buf)) { + case kUriSchemeSip: + case kUriSchemeSips: + --p; + {cs = 39;goto _again;} + default: + if (uricspn(p, pe - p) == pe - p) { + uri->opaque = (struct UriSlice){p - buf, pe - p}; + return zero; + } else { + return einval(); + } + } + } + break; +#line 611 "build/bootstrap/net/http/uriparse.c" + } + } + +_again: + if ( cs == 0 ) + goto _out; + if ( ++p != pe ) + goto _resume; + _test_eof: {} + if ( p == eof ) + { + const char *__acts = _uriparse_actions + _uriparse_eof_actions[cs]; + unsigned int __nacts = (unsigned int) *__acts++; + while ( __nacts-- > 0 ) { + switch ( *__acts++ ) { + case 0: +#line 110 "net/http/uriparse.rl" + { mark = p; } + break; + case 2: +#line 112 "net/http/uriparse.rl" + { uri->fragment = SLICE; } + break; + case 4: +#line 114 "net/http/uriparse.rl" + { uri->host = SLICE; } + break; + case 5: +#line 115 "net/http/uriparse.rl" + { uri->port = SLICE; } + break; + case 6: +#line 117 "net/http/uriparse.rl" + { + kv.k = SLICE; + kv.v = (struct UriSlice){zero, zero}; + } + break; + case 7: +#line 122 "net/http/uriparse.rl" + { + kv.v = SLICE; + } + break; + case 8: +#line 126 "net/http/uriparse.rl" + { + uri->segs.i = zero; + uri->paramsegs.i = zero; + } + break; + case 9: +#line 131 "net/http/uriparse.rl" + { + if (uri->params.n) { + if (uri->params.i < uri->params.n) { + uri->params.p[uri->params.i++] = kv; + } else { + return enomem(); + } + } + } + break; + case 10: +#line 141 "net/http/uriparse.rl" + { + if (uri->queries.n) { + if (uri->queries.i < uri->queries.n) { + uri->queries.p[uri->queries.i++] = kv; + } else { + return enomem(); + } + } + } + break; + case 11: +#line 151 "net/http/uriparse.rl" + { + if (p > mark && uri->segs.n) { + if (uri->segs.i < uri->segs.n) { + uri->segs.p[uri->segs.i++] = SLICE; + } else { + return enomem(); + } + } + } + break; +#line 699 "build/bootstrap/net/http/uriparse.c" + } + } + } + + _out: {} + } + +#line 234 "net/http/uriparse.rl" + + /* clang-format on */ + + if (cs >= uriparse_first_final) { + if (uri->host.n <= DNS_NAME_MAX && uri->port.n <= 6) { + return zero; + } else { + return eoverflow(); + } + } else { + return einval(); + } +} diff --git a/net/http/uriparse.rl b/net/http/uriparse.rl index 8f037bf5..04133c16 100644 --- a/net/http/uriparse.rl +++ b/net/http/uriparse.rl @@ -24,6 +24,8 @@ #include "libc/sysv/errfuns.h" #include "net/http/uri.h" +/* TODO(jart): Rewrite in C */ + #define static /* clang-format off */