diff --git a/.gitignore b/.gitignore index 2d67173eb..b8f38989b 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ /asynclook /delayer /dohclient +/doqclient /lock-verify /memstats /perf diff --git a/Makefile.in b/Makefile.in index 672435e01..c262250ca 100644 --- a/Makefile.in +++ b/Makefile.in @@ -179,11 +179,11 @@ testcode/unitlruhash.c testcode/unitmain.c testcode/unitmsgparse.c \ testcode/unitneg.c testcode/unitregional.c testcode/unitslabhash.c \ testcode/unitverify.c testcode/readhex.c testcode/testpkts.c testcode/unitldns.c \ testcode/unitecs.c testcode/unitauth.c testcode/unitzonemd.c \ -testcode/unittcpreuse.c +testcode/unittcpreuse.c testcode/unitdoq.c UNITTEST_OBJ=unitanchor.lo unitdname.lo unitlruhash.lo unitmain.lo \ unitmsgparse.lo unitneg.lo unitregional.lo unitslabhash.lo unitverify.lo \ readhex.lo testpkts.lo unitldns.lo unitecs.lo unitauth.lo unitzonemd.lo \ -unittcpreuse.lo +unittcpreuse.lo unitdoq.lo UNITTEST_OBJ_LINK=$(UNITTEST_OBJ) worker_cb.lo $(COMMON_OBJ) $(SLDNS_OBJ) \ $(COMPAT_OBJ) DAEMON_SRC=daemon/acl_list.c daemon/cachedump.c daemon/daemon.c \ @@ -242,6 +242,10 @@ DOHCLIENT_SRC=testcode/dohclient.c DOHCLIENT_OBJ=dohclient.lo DOHCLIENT_OBJ_LINK=$(DOHCLIENT_OBJ) worker_cb.lo $(COMMON_OBJ) $(COMPAT_OBJ) \ $(SLDNS_OBJ) +DOQCLIENT_SRC=testcode/doqclient.c +DOQCLIENT_OBJ=doqclient.lo +DOQCLIENT_OBJ_LINK=$(DOQCLIENT_OBJ) $(COMMON_OBJ) $(COMPAT_OBJ) \ +$(SLDNS_OBJ) PERF_SRC=testcode/perf.c PERF_OBJ=perf.lo PERF_OBJ_LINK=$(PERF_OBJ) worker_cb.lo $(COMMON_OBJ) $(COMPAT_OBJ) $(SLDNS_OBJ) @@ -288,7 +292,7 @@ ALL_SRC=$(COMMON_SRC) $(UNITTEST_SRC) $(DAEMON_SRC) \ $(CONTROL_SRC) $(UBANCHOR_SRC) $(PETAL_SRC) $(DNSTAP_SOCKET_SRC)\ $(PYTHONMOD_SRC) $(PYUNBOUND_SRC) $(WIN_DAEMON_THE_SRC) \ $(SVCINST_SRC) $(SVCUNINST_SRC) $(ANCHORUPD_SRC) $(SLDNS_SRC) \ - $(DOHCLIENT_SRC) $(READZONE_SRC) + $(DOHCLIENT_SRC) $(DOQCLIENT_SRC) $(READZONE_SRC) ALL_OBJ=$(COMMON_OBJ) $(UNITTEST_OBJ) $(DAEMON_OBJ) \ $(TESTBOUND_OBJ) $(LOCKVERIFY_OBJ) $(PKTVIEW_OBJ) \ @@ -297,7 +301,7 @@ ALL_OBJ=$(COMMON_OBJ) $(UNITTEST_OBJ) $(DAEMON_OBJ) \ $(CONTROL_OBJ) $(UBANCHOR_OBJ) $(PETAL_OBJ) $(DNSTAP_SOCKET_OBJ)\ $(COMPAT_OBJ) $(PYUNBOUND_OBJ) \ $(SVCINST_OBJ) $(SVCUNINST_OBJ) $(ANCHORUPD_OBJ) $(SLDNS_OBJ) \ - $(DOHCLIENT_OBJ) $(READZONE_OBJ) + $(DOHCLIENT_OBJ) $(DOQCLIENT_OBJ) $(READZONE_OBJ) COMPILE=$(LIBTOOL) --tag=CC --mode=compile $(CC) $(CPPFLAGS) $(CFLAGS) @PTHREAD_CFLAGS_ONLY@ LINK=$(LIBTOOL) --tag=CC --mode=link $(CC) $(staticexe) $(RUNTIME_PATH) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) @@ -334,7 +338,7 @@ rsrc_unbound_checkconf.o: $(srcdir)/winrc/rsrc_unbound_checkconf.rc config.h TEST_BIN=asynclook$(EXEEXT) delayer$(EXEEXT) \ lock-verify$(EXEEXT) memstats$(EXEEXT) perf$(EXEEXT) \ petal$(EXEEXT) pktview$(EXEEXT) streamtcp$(EXEEXT) \ - $(DNSTAP_SOCKET_TESTBIN) dohclient$(EXEEXT) \ + $(DNSTAP_SOCKET_TESTBIN) dohclient$(EXEEXT) doqclient$(EXEEXT) \ testbound$(EXEEXT) unittest$(EXEEXT) readzone$(EXEEXT) tests: all $(TEST_BIN) @@ -416,6 +420,9 @@ streamtcp$(EXEEXT): $(STREAMTCP_OBJ_LINK) dohclient$(EXEEXT): $(DOHCLIENT_OBJ_LINK) $(LINK) -o $@ $(DOHCLIENT_OBJ_LINK) $(SSLLIB) $(LIBS) +doqclient$(EXEEXT): $(DOQCLIENT_OBJ_LINK) + $(LINK) -o $@ $(DOQCLIENT_OBJ_LINK) $(SSLLIB) $(LIBS) + perf$(EXEEXT): $(PERF_OBJ_LINK) $(LINK) -o $@ $(PERF_OBJ_LINK) $(SSLLIB) $(LIBS) @@ -703,6 +710,8 @@ depend: # build rules ipset.lo ipset.o: $(srcdir)/ipset/ipset.c +doqclient.lo doqclient.o: $(srcdir)/testcode/doqclient.c +unitdoq.lo unitdoq.o: $(srcdir)/testcode/unitdoq.c # Dependencies dns.lo dns.o: $(srcdir)/services/cache/dns.c config.h $(srcdir)/iterator/iter_delegpt.h $(srcdir)/util/log.h \ diff --git a/cachedb/cachedb.c b/cachedb/cachedb.c index cf5143ee3..0329f8458 100644 --- a/cachedb/cachedb.c +++ b/cachedb/cachedb.c @@ -621,6 +621,9 @@ parse_data(struct module_qstate* qstate, struct sldns_buffer* buf, } verbose(VERB_ALGO, "cachedb msg adjusted down by %d", (int)adjust); adjust_msg_ttl(qstate->return_msg, adjust); + if(qstate->env->cfg->aggressive_nsec) { + limit_nsec_ttl(qstate->return_msg); + } /* Similar to the unbound worker, if serve-expired is enabled and * the msg would be considered to be expired, mark the state so a diff --git a/config.h.in b/config.h.in index 099206025..dc03e82dd 100644 --- a/config.h.in +++ b/config.h.in @@ -129,6 +129,14 @@ and to 0 if you don't. */ #undef HAVE_DECL_NGHTTP2_SESSION_SERVER_NEW +/* Define to 1 if you have the declaration of `ngtcp2_conn_server_new', and to + 0 if you don't. */ +#undef HAVE_DECL_NGTCP2_CONN_SERVER_NEW + +/* Define to 1 if you have the declaration of `ngtcp2_crypto_encrypt_cb', and + to 0 if you don't. */ +#undef HAVE_DECL_NGTCP2_CRYPTO_ENCRYPT_CB + /* Define to 1 if you have the declaration of `NID_ED25519', and to 0 if you don't. */ #undef HAVE_DECL_NID_ED25519 @@ -421,6 +429,65 @@ /* Define to 1 if you have the header file. */ #undef HAVE_NGHTTP2_NGHTTP2_H +/* Define this to use ngtcp2. */ +#undef HAVE_NGTCP2 + +/* Define to 1 if you have the `ngtcp2_ccerr_default' function. */ +#undef HAVE_NGTCP2_CCERR_DEFAULT + +/* Define to 1 if you have the `ngtcp2_conn_encode_0rtt_transport_params' + function. */ +#undef HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS + +/* Define to 1 if you have the `ngtcp2_conn_get_max_local_streams_uni' + function. */ +#undef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI + +/* Define to 1 if you have the `ngtcp2_conn_get_num_scid' function. */ +#undef HAVE_NGTCP2_CONN_GET_NUM_SCID + +/* Define to 1 if you have the `ngtcp2_conn_in_closing_period' function. */ +#undef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD + +/* Define to 1 if you have the `ngtcp2_conn_in_draining_period' function. */ +#undef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD + +/* Define if ngtcp2_conn_shutdown_stream has 4 arguments. */ +#undef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4 + +/* Define to 1 if you have the `ngtcp2_conn_tls_early_data_rejected' function. + */ +#undef HAVE_NGTCP2_CONN_TLS_EARLY_DATA_REJECTED + +/* Define to 1 if you have the `ngtcp2_crypto_encrypt_cb' function. */ +#undef HAVE_NGTCP2_CRYPTO_ENCRYPT_CB + +/* Define to 1 if you have the + `ngtcp2_crypto_quictls_configure_client_context' function. */ +#undef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT + +/* Define to 1 if you have the + `ngtcp2_crypto_quictls_configure_server_context' function. */ +#undef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT + +/* Define to 1 if you have the + `ngtcp2_crypto_quictls_from_ossl_encryption_level' function. */ +#undef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL + +/* Define to 1 if the system has the type `ngtcp2_encryption_level'. */ +#undef HAVE_NGTCP2_ENCRYPTION_LEVEL + +/* Define to 1 if you have the header file. + */ +#undef HAVE_NGTCP2_NGTCP2_CRYPTO_OPENSSL_H + +/* Define to 1 if you have the header file. + */ +#undef HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_NGTCP2_NGTCP2_H + /* Use libnss for crypto */ #undef HAVE_NSS @@ -587,6 +654,9 @@ /* Define to 1 if you have the `SSL_get1_peer_certificate' function. */ #undef HAVE_SSL_GET1_PEER_CERTIFICATE +/* Define to 1 if you have the `SSL_is_quic' function. */ +#undef HAVE_SSL_IS_QUIC + /* Define to 1 if you have the `SSL_set1_host' function. */ #undef HAVE_SSL_SET1_HOST @@ -629,6 +699,23 @@ /* Define to 1 if `ipi_spec_dst' is a member of `struct in_pktinfo'. */ #undef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST +/* Define to 1 if `tokenlen' is a member of `struct ngtcp2_pkt_hd'. */ +#undef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN + +/* Define to 1 if `max_tx_udp_payload_size' is a member of `struct + ngtcp2_settings'. */ +#undef HAVE_STRUCT_NGTCP2_SETTINGS_MAX_TX_UDP_PAYLOAD_SIZE + +/* Define to 1 if `tokenlen' is a member of `struct ngtcp2_settings'. */ +#undef HAVE_STRUCT_NGTCP2_SETTINGS_TOKENLEN + +/* Define to 1 if `original_dcid_present' is a member of `struct + ngtcp2_transport_params'. */ +#undef HAVE_STRUCT_NGTCP2_TRANSPORT_PARAMS_ORIGINAL_DCID_PRESENT + +/* Define to 1 if the system has the type `struct ngtcp2_version_cid'. */ +#undef HAVE_STRUCT_NGTCP2_VERSION_CID + /* Define to 1 if `sun_len' is a member of `struct sockaddr_un'. */ #undef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN @@ -1497,6 +1584,9 @@ struct sockaddr_storage; # define free(p) unbound_stat_free_log(p, __FILE__, __LINE__, __func__) # define realloc(p,s) unbound_stat_realloc_log(p, s, __FILE__, __LINE__, __func__) # define strdup(s) unbound_stat_strdup_log(s, __FILE__, __LINE__, __func__) +#ifdef HAVE_REALLOCARRAY +# define reallocarray(p,n,s) unbound_stat_reallocarray_log(p, n, s, __FILE__, __LINE__, __func__) +#endif void *unbound_stat_malloc(size_t size); void *unbound_stat_calloc(size_t nmemb, size_t size); void unbound_stat_free(void *ptr); @@ -1509,6 +1599,8 @@ void unbound_stat_free_log(void *ptr, const char* file, int line, const char* func); void *unbound_stat_realloc_log(void *ptr, size_t size, const char* file, int line, const char* func); +void *unbound_stat_reallocarray_log(void *ptr, size_t nmemb, size_t size, + const char* file, int line, const char* func); char *unbound_stat_strdup_log(const char *s, const char* file, int line, const char* func); #elif defined(UNBOUND_ALLOC_LITE) @@ -1521,6 +1613,8 @@ char *unbound_stat_strdup_log(const char *s, const char* file, int line, #define UNBOUND_DNS_OVER_TLS_PORT 853 /** default port for DNS over HTTPS traffic. */ #define UNBOUND_DNS_OVER_HTTPS_PORT 443 +/** default port for DNS over QUIC traffic. */ +#define UNBOUND_DNS_OVER_QUIC_PORT 853 /** default port for unbound control traffic, registered port with IANA, ub-dns-control 8953/tcp unbound dns nameserver control */ #define UNBOUND_CONTROL_PORT 8953 diff --git a/configure b/configure index ef250d6c6..a41e3e1eb 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for unbound 1.21.1. +# Generated by GNU Autoconf 2.71 for unbound 1.22.1. # # Report bugs to . # @@ -622,8 +622,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='unbound' PACKAGE_TARNAME='unbound' -PACKAGE_VERSION='1.21.1' -PACKAGE_STRING='unbound 1.21.1' +PACKAGE_VERSION='1.22.1' +PACKAGE_STRING='unbound 1.22.1' PACKAGE_BUGREPORT='unbound-bugs@nlnetlabs.nl or https://github.com/NLnetLabs/unbound/issues' PACKAGE_URL='' @@ -921,6 +921,7 @@ with_libevent with_libexpat with_libhiredis with_libnghttp2 +with_libngtcp2 enable_static_exe enable_fully_static enable_lock_checks @@ -1508,7 +1509,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures unbound 1.21.1 to adapt to many kinds of systems. +\`configure' configures unbound 1.22.1 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1574,7 +1575,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of unbound 1.21.1:";; + short | recursive ) echo "Configuration of unbound 1.22.1:";; esac cat <<\_ACEOF @@ -1709,6 +1710,7 @@ Optional Packages: --with-libexpat=path specify explicit path for libexpat. --with-libhiredis=path specify explicit path for libhiredis. --with-libnghttp2=path specify explicit path for libnghttp2. + --with-libngtcp2=path specify explicit path for libngtcp2, for QUIC. --with-dnstap-socket-path=pathname set default dnstap socket path --with-protobuf-c=path Path where protobuf-c is installed, for dnstap @@ -1822,7 +1824,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -unbound configure 1.21.1 +unbound configure 1.22.1 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -2479,7 +2481,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by unbound $as_me 1.21.1, which was +It was created by unbound $as_me 1.22.1, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3241,13 +3243,13 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu UNBOUND_VERSION_MAJOR=1 -UNBOUND_VERSION_MINOR=21 +UNBOUND_VERSION_MINOR=22 UNBOUND_VERSION_MICRO=1 LIBUNBOUND_CURRENT=9 -LIBUNBOUND_REVISION=29 +LIBUNBOUND_REVISION=31 LIBUNBOUND_AGE=1 # 1.0.0 had 0:12:0 # 1.0.1 had 0:13:0 @@ -3344,6 +3346,8 @@ LIBUNBOUND_AGE=1 # 1.20.0 had 9:27:1 # 1.21.0 had 9:28:1 # 1.21.1 had 9:29:1 +# 1.22.0 had 9:30:1 +# 1.22.1 had 9:31:1 # Current -- the number of the binary API that we're implementing # Revision -- which iteration of the implementation of the binary @@ -22204,6 +22208,353 @@ printf "%s\n" "#define HAVE_DECL_NGHTTP2_SESSION_SERVER_NEW $ac_have_decl" >>con fi +# ngtcp2 + +# Check whether --with-libngtcp2 was given. +if test ${with_libngtcp2+y} +then : + withval=$with_libngtcp2; +else $as_nop + withval="no" +fi + +found_libngtcp2="no" +if test x_$withval = x_yes -o x_$withval != x_no; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for libngtcp2" >&5 +printf %s "checking for libngtcp2... " >&6; } + if test x_$withval = x_ -o x_$withval = x_yes; then + withval="/usr/local /opt/local /usr/lib /usr/pkg /usr/sfw /usr" + fi + for dir in $withval ; do + if test -f "$dir/include/ngtcp2/ngtcp2.h"; then + found_libngtcp2="yes" + if test "$dir" != "/usr"; then + CPPFLAGS="$CPPFLAGS -I$dir/include" + LDFLAGS="$LDFLAGS -L$dir/lib" + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: found in $dir" >&5 +printf "%s\n" "found in $dir" >&6; } + +printf "%s\n" "#define HAVE_NGTCP2 1" >>confdefs.h + + LIBS="$LIBS -lngtcp2" + break; + fi + done + if test x_$found_libngtcp2 != x_yes; then + as_fn_error $? "Could not find libngtcp2, ngtcp2.h" "$LINENO" 5 + fi + ac_fn_c_check_header_compile "$LINENO" "ngtcp2/ngtcp2.h" "ac_cv_header_ngtcp2_ngtcp2_h" "$ac_includes_default +" +if test "x$ac_cv_header_ngtcp2_ngtcp2_h" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_NGTCP2_H 1" >>confdefs.h + +fi +ac_fn_c_check_header_compile "$LINENO" "ngtcp2/ngtcp2_crypto_openssl.h" "ac_cv_header_ngtcp2_ngtcp2_crypto_openssl_h" "$ac_includes_default +" +if test "x$ac_cv_header_ngtcp2_ngtcp2_crypto_openssl_h" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_NGTCP2_CRYPTO_OPENSSL_H 1" >>confdefs.h + +fi +ac_fn_c_check_header_compile "$LINENO" "ngtcp2/ngtcp2_crypto_quictls.h" "ac_cv_header_ngtcp2_ngtcp2_crypto_quictls_h" "$ac_includes_default +" +if test "x$ac_cv_header_ngtcp2_ngtcp2_crypto_quictls_h" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H 1" >>confdefs.h + +fi + + ac_fn_check_decl "$LINENO" "ngtcp2_conn_server_new" "ac_cv_have_decl_ngtcp2_conn_server_new" "$ac_includes_default + #include + +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_ngtcp2_conn_server_new" = xyes +then : + ac_have_decl=1 +else $as_nop + ac_have_decl=0 +fi +printf "%s\n" "#define HAVE_DECL_NGTCP2_CONN_SERVER_NEW $ac_have_decl" >>confdefs.h + + ac_fn_check_decl "$LINENO" "ngtcp2_crypto_encrypt_cb" "ac_cv_have_decl_ngtcp2_crypto_encrypt_cb" "$ac_includes_default + #include + +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_ngtcp2_crypto_encrypt_cb" = xyes +then : + ac_have_decl=1 +else $as_nop + ac_have_decl=0 +fi +printf "%s\n" "#define HAVE_DECL_NGTCP2_CRYPTO_ENCRYPT_CB $ac_have_decl" >>confdefs.h + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ngtcp2_crypto_encrypt_cb in -lngtcp2_crypto_openssl" >&5 +printf %s "checking for ngtcp2_crypto_encrypt_cb in -lngtcp2_crypto_openssl... " >&6; } +if test ${ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lngtcp2_crypto_openssl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char ngtcp2_crypto_encrypt_cb (); +int +main (void) +{ +return ngtcp2_crypto_encrypt_cb (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb=yes +else $as_nop + ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb" >&5 +printf "%s\n" "$ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb" >&6; } +if test "x$ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb" = xyes +then : + LIBS="$LIBS -lngtcp2_crypto_openssl" +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ngtcp2_crypto_encrypt_cb in -lngtcp2_crypto_quictls" >&5 +printf %s "checking for ngtcp2_crypto_encrypt_cb in -lngtcp2_crypto_quictls... " >&6; } +if test ${ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lngtcp2_crypto_quictls $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char ngtcp2_crypto_encrypt_cb (); +int +main (void) +{ +return ngtcp2_crypto_encrypt_cb (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb=yes +else $as_nop + ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb" >&5 +printf "%s\n" "$ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb" >&6; } +if test "x$ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb" = xyes +then : + LIBS="$LIBS -lngtcp2_crypto_quictls" +fi + + ac_fn_c_check_func "$LINENO" "ngtcp2_crypto_encrypt_cb" "ac_cv_func_ngtcp2_crypto_encrypt_cb" +if test "x$ac_cv_func_ngtcp2_crypto_encrypt_cb" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CRYPTO_ENCRYPT_CB 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_ccerr_default" "ac_cv_func_ngtcp2_ccerr_default" +if test "x$ac_cv_func_ngtcp2_ccerr_default" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CCERR_DEFAULT 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_conn_in_closing_period" "ac_cv_func_ngtcp2_conn_in_closing_period" +if test "x$ac_cv_func_ngtcp2_conn_in_closing_period" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_conn_in_draining_period" "ac_cv_func_ngtcp2_conn_in_draining_period" +if test "x$ac_cv_func_ngtcp2_conn_in_draining_period" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_conn_get_max_local_streams_uni" "ac_cv_func_ngtcp2_conn_get_max_local_streams_uni" +if test "x$ac_cv_func_ngtcp2_conn_get_max_local_streams_uni" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_crypto_quictls_from_ossl_encryption_level" "ac_cv_func_ngtcp2_crypto_quictls_from_ossl_encryption_level" +if test "x$ac_cv_func_ngtcp2_crypto_quictls_from_ossl_encryption_level" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_crypto_quictls_configure_server_context" "ac_cv_func_ngtcp2_crypto_quictls_configure_server_context" +if test "x$ac_cv_func_ngtcp2_crypto_quictls_configure_server_context" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_crypto_quictls_configure_client_context" "ac_cv_func_ngtcp2_crypto_quictls_configure_client_context" +if test "x$ac_cv_func_ngtcp2_crypto_quictls_configure_client_context" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_conn_get_num_scid" "ac_cv_func_ngtcp2_conn_get_num_scid" +if test "x$ac_cv_func_ngtcp2_conn_get_num_scid" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CONN_GET_NUM_SCID 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_conn_tls_early_data_rejected" "ac_cv_func_ngtcp2_conn_tls_early_data_rejected" +if test "x$ac_cv_func_ngtcp2_conn_tls_early_data_rejected" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CONN_TLS_EARLY_DATA_REJECTED 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_conn_encode_0rtt_transport_params" "ac_cv_func_ngtcp2_conn_encode_0rtt_transport_params" +if test "x$ac_cv_func_ngtcp2_conn_encode_0rtt_transport_params" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS 1" >>confdefs.h + +fi + + + for ac_func in SSL_is_quic +do : + ac_fn_c_check_func "$LINENO" "SSL_is_quic" "ac_cv_func_SSL_is_quic" +if test "x$ac_cv_func_SSL_is_quic" = xyes +then : + printf "%s\n" "#define HAVE_SSL_IS_QUIC 1" >>confdefs.h + +else $as_nop + as_fn_error $? "No QUIC support detected in OpenSSL. Need OpenSSL version with QUIC support to enable DNS over QUIC with libngtcp2." "$LINENO" 5 +fi + +done + ac_fn_c_check_type "$LINENO" "struct ngtcp2_version_cid" "ac_cv_type_struct_ngtcp2_version_cid" "$ac_includes_default + #include + +" +if test "x$ac_cv_type_struct_ngtcp2_version_cid" = xyes +then : + +printf "%s\n" "#define HAVE_STRUCT_NGTCP2_VERSION_CID 1" >>confdefs.h + + +fi +ac_fn_c_check_type "$LINENO" "ngtcp2_encryption_level" "ac_cv_type_ngtcp2_encryption_level" "$ac_includes_default + #include + +" +if test "x$ac_cv_type_ngtcp2_encryption_level" = xyes +then : + +printf "%s\n" "#define HAVE_NGTCP2_ENCRYPTION_LEVEL 1" >>confdefs.h + + +fi + + ac_fn_c_check_member "$LINENO" "struct ngtcp2_pkt_hd" "tokenlen" "ac_cv_member_struct_ngtcp2_pkt_hd_tokenlen" "$ac_includes_default + #include + +" +if test "x$ac_cv_member_struct_ngtcp2_pkt_hd_tokenlen" = xyes +then : + +printf "%s\n" "#define HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN 1" >>confdefs.h + + +fi +ac_fn_c_check_member "$LINENO" "struct ngtcp2_settings" "tokenlen" "ac_cv_member_struct_ngtcp2_settings_tokenlen" "$ac_includes_default + #include + +" +if test "x$ac_cv_member_struct_ngtcp2_settings_tokenlen" = xyes +then : + +printf "%s\n" "#define HAVE_STRUCT_NGTCP2_SETTINGS_TOKENLEN 1" >>confdefs.h + + +fi +ac_fn_c_check_member "$LINENO" "struct ngtcp2_settings" "max_tx_udp_payload_size" "ac_cv_member_struct_ngtcp2_settings_max_tx_udp_payload_size" "$ac_includes_default + #include + +" +if test "x$ac_cv_member_struct_ngtcp2_settings_max_tx_udp_payload_size" = xyes +then : + +printf "%s\n" "#define HAVE_STRUCT_NGTCP2_SETTINGS_MAX_TX_UDP_PAYLOAD_SIZE 1" >>confdefs.h + + +fi +ac_fn_c_check_member "$LINENO" "struct ngtcp2_transport_params" "original_dcid_present" "ac_cv_member_struct_ngtcp2_transport_params_original_dcid_present" "$ac_includes_default + #include + +" +if test "x$ac_cv_member_struct_ngtcp2_transport_params_original_dcid_present" = xyes +then : + +printf "%s\n" "#define HAVE_STRUCT_NGTCP2_TRANSPORT_PARAMS_ORIGINAL_DCID_PRESENT 1" >>confdefs.h + + +fi + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ngtcp2_conn_shutdown_stream has 4 arguments" >&5 +printf %s "checking whether ngtcp2_conn_shutdown_stream has 4 arguments... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default +#include + +int +main (void) +{ + + (void)ngtcp2_conn_shutdown_stream(NULL, 0, 0, 0); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +printf "%s\n" "#define HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4 1" >>confdefs.h + + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + +fi + # set static linking for uninstalled libraries if requested staticexe="" @@ -23787,10 +24138,12 @@ if test x_$enable_lock_checks = x_yes; then UBSYMS="-export-symbols clubsyms.def" cp ${srcdir}/libunbound/ubsyms.def clubsyms.def echo lock_protect >> clubsyms.def + echo lock_protect_place >> clubsyms.def echo lock_unprotect >> clubsyms.def echo lock_get_mem >> clubsyms.def echo checklock_start >> clubsyms.def echo checklock_stop >> clubsyms.def + echo checklock_set_output_name >> clubsyms.def echo checklock_lock >> clubsyms.def echo checklock_unlock >> clubsyms.def echo checklock_init >> clubsyms.def @@ -24645,7 +24998,7 @@ printf "%s\n" "#define MAXSYSLOGMSGLEN 10240" >>confdefs.h -version=1.21.1 +version=1.22.1 date=`date +'%b %e, %Y'` @@ -25157,7 +25510,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by unbound $as_me 1.21.1, which was +This file was extended by unbound $as_me 1.22.1, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -25225,7 +25578,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -unbound config.status 1.21.1 +unbound config.status 1.22.1 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index fdded4f50..a5a856fc1 100644 --- a/configure.ac +++ b/configure.ac @@ -10,7 +10,7 @@ sinclude(dnscrypt/dnscrypt.m4) # must be numbers. ac_defun because of later processing m4_define([VERSION_MAJOR],[1]) -m4_define([VERSION_MINOR],[21]) +m4_define([VERSION_MINOR],[22]) m4_define([VERSION_MICRO],[1]) AC_INIT([unbound],m4_defn([VERSION_MAJOR]).m4_defn([VERSION_MINOR]).m4_defn([VERSION_MICRO]),[unbound-bugs@nlnetlabs.nl or https://github.com/NLnetLabs/unbound/issues],[unbound]) AC_SUBST(UNBOUND_VERSION_MAJOR, [VERSION_MAJOR]) @@ -18,7 +18,7 @@ AC_SUBST(UNBOUND_VERSION_MINOR, [VERSION_MINOR]) AC_SUBST(UNBOUND_VERSION_MICRO, [VERSION_MICRO]) LIBUNBOUND_CURRENT=9 -LIBUNBOUND_REVISION=29 +LIBUNBOUND_REVISION=31 LIBUNBOUND_AGE=1 # 1.0.0 had 0:12:0 # 1.0.1 had 0:13:0 @@ -115,6 +115,8 @@ LIBUNBOUND_AGE=1 # 1.20.0 had 9:27:1 # 1.21.0 had 9:28:1 # 1.21.1 had 9:29:1 +# 1.22.0 had 9:30:1 +# 1.22.1 had 9:31:1 # Current -- the number of the binary API that we're implementing # Revision -- which iteration of the implementation of the binary @@ -1578,6 +1580,64 @@ if test x_$withval = x_yes -o x_$withval != x_no; then ]) fi +# ngtcp2 +AC_ARG_WITH(libngtcp2, AS_HELP_STRING([--with-libngtcp2=path],[specify explicit path for libngtcp2, for QUIC.]), + [ ],[ withval="no" ]) +found_libngtcp2="no" +if test x_$withval = x_yes -o x_$withval != x_no; then + AC_MSG_CHECKING(for libngtcp2) + if test x_$withval = x_ -o x_$withval = x_yes; then + withval="/usr/local /opt/local /usr/lib /usr/pkg /usr/sfw /usr" + fi + for dir in $withval ; do + if test -f "$dir/include/ngtcp2/ngtcp2.h"; then + found_libngtcp2="yes" + dnl assume /usr is in default path. + if test "$dir" != "/usr"; then + CPPFLAGS="$CPPFLAGS -I$dir/include" + LDFLAGS="$LDFLAGS -L$dir/lib" + fi + AC_MSG_RESULT(found in $dir) + AC_DEFINE([HAVE_NGTCP2], [1], [Define this to use ngtcp2.]) + LIBS="$LIBS -lngtcp2" + break; + fi + done + if test x_$found_libngtcp2 != x_yes; then + AC_MSG_ERROR([Could not find libngtcp2, ngtcp2.h]) + fi + AC_CHECK_HEADERS([ngtcp2/ngtcp2.h ngtcp2/ngtcp2_crypto_openssl.h ngtcp2/ngtcp2_crypto_quictls.h],,, [AC_INCLUDES_DEFAULT]) + AC_CHECK_DECLS([ngtcp2_conn_server_new], [], [], [AC_INCLUDES_DEFAULT + #include + ]) + AC_CHECK_DECLS([ngtcp2_crypto_encrypt_cb], [], [], [AC_INCLUDES_DEFAULT + #include + ]) + AC_CHECK_LIB([ngtcp2_crypto_openssl], [ngtcp2_crypto_encrypt_cb], [ LIBS="$LIBS -lngtcp2_crypto_openssl" ]) + AC_CHECK_LIB([ngtcp2_crypto_quictls], [ngtcp2_crypto_encrypt_cb], [ LIBS="$LIBS -lngtcp2_crypto_quictls" ]) + AC_CHECK_FUNCS([ngtcp2_crypto_encrypt_cb ngtcp2_ccerr_default ngtcp2_conn_in_closing_period ngtcp2_conn_in_draining_period ngtcp2_conn_get_max_local_streams_uni ngtcp2_crypto_quictls_from_ossl_encryption_level ngtcp2_crypto_quictls_configure_server_context ngtcp2_crypto_quictls_configure_client_context ngtcp2_conn_get_num_scid ngtcp2_conn_tls_early_data_rejected ngtcp2_conn_encode_0rtt_transport_params]) + AC_CHECK_FUNCS([SSL_is_quic], [], [AC_MSG_ERROR([No QUIC support detected in OpenSSL. Need OpenSSL version with QUIC support to enable DNS over QUIC with libngtcp2.])]) + AC_CHECK_TYPES([struct ngtcp2_version_cid, ngtcp2_encryption_level],,,[AC_INCLUDES_DEFAULT + #include + ]) + AC_CHECK_MEMBERS([struct ngtcp2_pkt_hd.tokenlen, struct ngtcp2_settings.tokenlen, struct ngtcp2_settings.max_tx_udp_payload_size, struct ngtcp2_transport_params.original_dcid_present],,,[AC_INCLUDES_DEFAULT + #include + ]) + + AC_MSG_CHECKING([whether ngtcp2_conn_shutdown_stream has 4 arguments]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT +#include + ],[ + (void)ngtcp2_conn_shutdown_stream(NULL, 0, 0, 0); + ])],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4, 1, [Define if ngtcp2_conn_shutdown_stream has 4 arguments.]) + ],[ + AC_MSG_RESULT(no) + ]) + +fi + # set static linking for uninstalled libraries if requested AC_SUBST(staticexe) staticexe="" @@ -1893,10 +1953,12 @@ if test x_$enable_lock_checks = x_yes; then UBSYMS="-export-symbols clubsyms.def" cp ${srcdir}/libunbound/ubsyms.def clubsyms.def echo lock_protect >> clubsyms.def + echo lock_protect_place >> clubsyms.def echo lock_unprotect >> clubsyms.def echo lock_get_mem >> clubsyms.def echo checklock_start >> clubsyms.def echo checklock_stop >> clubsyms.def + echo checklock_set_output_name >> clubsyms.def echo checklock_lock >> clubsyms.def echo checklock_unlock >> clubsyms.def echo checklock_init >> clubsyms.def @@ -2331,6 +2393,9 @@ struct sockaddr_storage; # define free(p) unbound_stat_free_log(p, __FILE__, __LINE__, __func__) # define realloc(p,s) unbound_stat_realloc_log(p, s, __FILE__, __LINE__, __func__) # define strdup(s) unbound_stat_strdup_log(s, __FILE__, __LINE__, __func__) +#ifdef HAVE_REALLOCARRAY +# define reallocarray(p,n,s) unbound_stat_reallocarray_log(p, n, s, __FILE__, __LINE__, __func__) +#endif void *unbound_stat_malloc(size_t size); void *unbound_stat_calloc(size_t nmemb, size_t size); void unbound_stat_free(void *ptr); @@ -2343,6 +2408,8 @@ void unbound_stat_free_log(void *ptr, const char* file, int line, const char* func); void *unbound_stat_realloc_log(void *ptr, size_t size, const char* file, int line, const char* func); +void *unbound_stat_reallocarray_log(void *ptr, size_t nmemb, size_t size, + const char* file, int line, const char* func); char *unbound_stat_strdup_log(const char *s, const char* file, int line, const char* func); #elif defined(UNBOUND_ALLOC_LITE) @@ -2355,6 +2422,8 @@ char *unbound_stat_strdup_log(const char *s, const char* file, int line, #define UNBOUND_DNS_OVER_TLS_PORT 853 /** default port for DNS over HTTPS traffic. */ #define UNBOUND_DNS_OVER_HTTPS_PORT 443 +/** default port for DNS over QUIC traffic. */ +#define UNBOUND_DNS_OVER_QUIC_PORT 853 /** default port for unbound control traffic, registered port with IANA, ub-dns-control 8953/tcp unbound dns nameserver control */ #define UNBOUND_CONTROL_PORT 8953 diff --git a/contrib/aaaa-filter-iterator.patch b/contrib/aaaa-filter-iterator.patch index cb6dabc44..b0c2b2198 100644 --- a/contrib/aaaa-filter-iterator.patch +++ b/contrib/aaaa-filter-iterator.patch @@ -236,7 +236,7 @@ index 54006940..768fe202 100644 + /* see if the failure did get (parent-lame) info */ + if(!cache_fill_missing(super->env, + super_iq->qchase.qclass, super->region, -+ super_iq->dp)) ++ super_iq->dp, 0)) + log_err("ASN-AAAA-filter: out of memory adding missing"); + dpns->resolved = 1; /* mark as failed */ + } diff --git a/daemon/acl_list.c b/daemon/acl_list.c index 83cfd7ddf..605851e4f 100644 --- a/daemon/acl_list.c +++ b/daemon/acl_list.c @@ -551,17 +551,6 @@ acl_list_apply_cfg(struct acl_list* acl, struct config_file* cfg, return 1; } -int -acl_interface_compare(const void* k1, const void* k2) -{ - struct addr_tree_node* n1 = (struct addr_tree_node*)k1; - struct addr_tree_node* n2 = (struct addr_tree_node*)k2; - return sockaddr_cmp(&n1->addr, n1->addrlen, &n2->addr, - n2->addrlen); - /* We don't care about comparing node->net. All addresses in the - * acl_interface tree have either 32 (ipv4) or 128 (ipv6). */ -} - void acl_interface_init(struct acl_list* acl_interface) { diff --git a/daemon/acl_list.h b/daemon/acl_list.h index 9da43bef3..ee679eb6e 100644 --- a/daemon/acl_list.h +++ b/daemon/acl_list.h @@ -147,9 +147,6 @@ acl_interface_insert(struct acl_list* acl_interface, int acl_list_apply_cfg(struct acl_list* acl, struct config_file* cfg, struct views* v); -/** compare ACL interface "addr_tree" nodes (+port) */ -int acl_interface_compare(const void* k1, const void* k2); - /** * Initialise (also clean) the acl_interface struct. * @param acl_interface: where to store. diff --git a/daemon/daemon.c b/daemon/daemon.c index 72b4a43be..1c8272b14 100644 --- a/daemon/daemon.c +++ b/daemon/daemon.c @@ -557,6 +557,12 @@ daemon_create_workers(struct daemon* daemon) fatal_exit("out of memory during daemon init"); numport = daemon_get_shufport(daemon, shufport); verbose(VERB_ALGO, "total of %d outgoing ports available", numport); + +#ifdef HAVE_NGTCP2 + daemon->doq_table = doq_table_create(daemon->cfg, daemon->rand); + if(!daemon->doq_table) + fatal_exit("could not create doq_table: out of memory"); +#endif daemon->num = (daemon->cfg->num_threads?daemon->cfg->num_threads:1); if(daemon->reuseport && (int)daemon->num < (int)daemon->num_ports) { @@ -906,6 +912,10 @@ daemon_cleanup(struct daemon* daemon) #ifdef USE_DNSCRYPT dnsc_delete(daemon->dnscenv); daemon->dnscenv = NULL; +#endif +#ifdef HAVE_NGTCP2 + doq_table_delete(daemon->doq_table); + daemon->doq_table = NULL; #endif daemon->cfg = NULL; } diff --git a/daemon/daemon.h b/daemon/daemon.h index 5c3a114cc..fc1bde713 100644 --- a/daemon/daemon.h +++ b/daemon/daemon.h @@ -58,6 +58,7 @@ struct ub_randstate; struct daemon_remote; struct respip_set; struct shm_main_info; +struct doq_table; struct cookie_secrets; #include "dnstap/dnstap_config.h" @@ -147,6 +148,8 @@ struct daemon { /** the dnscrypt environment */ struct dnsc_env* dnscenv; #endif + /** the doq connection table */ + struct doq_table* doq_table; /** reuse existing cache on reload if other conditions allow it. */ int reuse_cache; /** the EDNS cookie secrets from the cookie-secret-file */ diff --git a/daemon/remote.c b/daemon/remote.c index 5af03328e..8877cd194 100644 --- a/daemon/remote.c +++ b/daemon/remote.c @@ -302,7 +302,7 @@ add_open(const char* ip, int nr, struct listen_port** list, int noproto_is_err, /* open fd */ fd = create_tcp_accept_sock(res, 1, &noproto, 0, cfg->ip_transparent, 0, 0, cfg->ip_freebind, - cfg->use_systemd, cfg->ip_dscp); + cfg->use_systemd, cfg->ip_dscp, "unbound-control"); freeaddrinfo(res); } @@ -866,6 +866,10 @@ print_mem(RES* ssl, struct worker* worker, struct daemon* daemon, if(!print_longnum(ssl, "mem.http.response_buffer"SQ, (size_t)s->svr.mem_http2_response_buffer)) return 0; +#ifdef HAVE_NGTCP2 + if(!print_longnum(ssl, "mem.quic"SQ, (size_t)s->svr.mem_quic)) + return 0; +#endif /* HAVE_NGTCP2 */ return 1; } @@ -996,6 +1000,10 @@ print_ext(RES* ssl, struct ub_stats_info* s, int inhibit_zero) (unsigned long)s->svr.qipv6)) return 0; if(!ssl_printf(ssl, "num.query.https"SQ"%lu\n", (unsigned long)s->svr.qhttps)) return 0; +#ifdef HAVE_NGTCP2 + if(!ssl_printf(ssl, "num.query.quic"SQ"%lu\n", + (unsigned long)s->svr.qquic)) return 0; +#endif /* HAVE_NGTCP2 */ /* flags */ if(!ssl_printf(ssl, "num.query.flags.QR"SQ"%lu\n", (unsigned long)s->svr.qbit_QR)) return 0; diff --git a/daemon/stats.c b/daemon/stats.c index 827110698..0e17300a1 100644 --- a/daemon/stats.c +++ b/daemon/stats.c @@ -346,6 +346,12 @@ server_stats_compile(struct worker* worker, struct ub_stats_info* s, int reset) (long long)http2_get_query_buffer_size(); s->svr.mem_http2_response_buffer = (long long)http2_get_response_buffer_size(); +#ifdef HAVE_NGTCP2 + s->svr.mem_quic = (long long)doq_table_quic_size_get( + worker->daemon->doq_table); +#else + s->svr.mem_quic = 0; +#endif /* HAVE_NGTCP2 */ /* Set neg cache usage numbers */ set_neg_cache_stats(worker, &s->svr, reset); @@ -474,6 +480,7 @@ void server_stats_add(struct ub_stats_info* total, struct ub_stats_info* a) total->svr.qtls += a->svr.qtls; total->svr.qtls_resume += a->svr.qtls_resume; total->svr.qhttps += a->svr.qhttps; + total->svr.qquic += a->svr.qquic; total->svr.qipv6 += a->svr.qipv6; total->svr.qbit_QR += a->svr.qbit_QR; total->svr.qbit_AA += a->svr.qbit_AA; @@ -533,7 +540,8 @@ void server_stats_insquery(struct ub_server_stats* stats, struct comm_point* c, else stats->qclass_big++; stats->qopcode[ LDNS_OPCODE_WIRE(sldns_buffer_begin(c->buffer)) ]++; if(c->type != comm_udp) { - stats->qtcp++; + if(c->type != comm_doq) + stats->qtcp++; if(c->ssl != NULL) { stats->qtls++; #ifdef HAVE_SSL @@ -542,6 +550,10 @@ void server_stats_insquery(struct ub_server_stats* stats, struct comm_point* c, #endif if(c->type == comm_http) stats->qhttps++; +#ifdef HAVE_NGTCP2 + else if(c->type == comm_doq) + stats->qquic++; +#endif } } if(repinfo && addr_is_ip6(&repinfo->remote_addr, repinfo->remote_addrlen)) diff --git a/daemon/worker.c b/daemon/worker.c index fe105eb7b..713de3163 100644 --- a/daemon/worker.c +++ b/daemon/worker.c @@ -2174,7 +2174,9 @@ worker_init(struct worker* worker, struct config_file *cfg, cfg->harden_large_queries, cfg->http_max_streams, cfg->http_endpoint, cfg->http_notls_downstream, worker->daemon->tcl, worker->daemon->listen_sslctx, - dtenv, worker_handle_request, worker); + dtenv, worker->daemon->doq_table, worker->env.rnd, + cfg->ssl_service_key, cfg->ssl_service_pem, cfg, + worker_handle_request, worker); if(!worker->front) { log_err("could not create listening sockets"); worker_delete(worker); @@ -2508,3 +2510,19 @@ void dtio_mainfdcallback(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), log_assert(0); } #endif + +#ifdef HAVE_NGTCP2 +void doq_client_event_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif + +#ifdef HAVE_NGTCP2 +void doq_client_timer_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif diff --git a/dnstap/unbound-dnstap-socket.c b/dnstap/unbound-dnstap-socket.c index f203aa7d7..7f8be4965 100644 --- a/dnstap/unbound-dnstap-socket.c +++ b/dnstap/unbound-dnstap-socket.c @@ -1151,7 +1151,9 @@ void dtio_mainfdcallback(int fd, short ATTR_UNUSED(bits), void* arg) char* id = NULL; struct sockaddr_storage addr; socklen_t addrlen = (socklen_t)sizeof(addr); - int s = accept(fd, (struct sockaddr*)&addr, &addrlen); + int s; + memset(&addr, 0, sizeof(addr)); + s = accept(fd, (struct sockaddr*)&addr, &addrlen); if(s == -1) { #ifndef USE_WINSOCK /* EINTR is signal interrupt. others are closed connection. */ @@ -1543,8 +1545,8 @@ int main(int argc, char** argv) usage(argv); } } - argc -= optind; - argv += optind; + /* argc -= optind; not using further arguments */ + /* argv += optind; not using further arguments */ if(usessl) { #ifdef HAVE_SSL @@ -1783,3 +1785,19 @@ void remote_get_opt_ssl(char* ATTR_UNUSED(str), void* ATTR_UNUSED(arg)) { log_assert(0); } + +#ifdef HAVE_NGTCP2 +void doq_client_event_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif + +#ifdef HAVE_NGTCP2 +void doq_client_timer_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif diff --git a/doc/Changelog b/doc/Changelog index d47b98520..adc76c08d 100644 --- a/doc/Changelog +++ b/doc/Changelog @@ -1,3 +1,67 @@ +17 October 2024: Wouter + - Tag for 1.22.0 release. This did not contain the 1154 fix + from 16 oct. The code repository continues with + version 1.22.1 in development. + +16 October 2024: Yorgos + - Fix for dnsoverquic and dnstap to use the correct dnstap + environment. + +16 October 2024: Wouter + - Fix for dnstap with dnscrypt and dnstap without dnsoverquic. + - Fix #1154: Tag Incorrectly Applying for Other Interfaces + Using the Same IP. This fix is not for 1.22.0. + +14 October 2024: Wouter + - Fix to display warning if quic-port is set but dnsoverquic is not + enabled when compiled. + - Fix dnsoverquic to extend the number of streams when one is closed. + +11 October 2024: Wouter + - Fix to disable detection of quic configured ports when quic is + not compiled in. + - Fix harden-unverified-glue for AAAA cache_fill_missing lookups. + - Fix contrib/aaaa-filter-iterator.patch for change in call + signature for cache_fill_missing. + +10 October 2024: Wouter + - Fix cookie_file test sporadic fails for time change during + the test. + - Fix add reallocarray to alloc stats unit test, and disable + override of strdup in unbound-host, and the result of config + get option is freed properly. + - Tag for 1.22.0rc1. + +9 October 2024: Wouter + - Merge #871: DNS over QUIC. This adds `quic-port: 853` and + `quic-size: 8m` that enable dnsoverquic, and the counters + `num.query.quic` and `mem.quic` in the statistics output. + The feature needs to be enabled by compiling with libngtcp2, + with `--with-libngtcp2=path` and libngtcp2 needs openssl+quic, + pass that with `--with-ssl=path` to compile unbound as well. + - Fix to limit NSEC TTL for messages from cachedb. Fix to limit the + prefetch ttl for messages after a CNAME with short TTL. + - Fix for dnstap compile of doqclient with doq disabled. + +8 October 2024: Wouter + - Fix #1149: unbound-control-setup hangs sometimes depending on + the openssl version. + - Fix #1128: Cannot override tcp-upstream and tls-upstream with + forward-tcp-upstream and forward-tls-upstream. + +3 October 2024: Yorgos + - Fix CVE-2024-8508, unbounded name compression could lead to denial + of service. + - This fix was part of 1.21.1, a security point release on 1.21.0. + The code repository continues with this fix and the version number + 1.22.0. + +30 September 2024: Wouter + - Fix negative cache NSEC3 parameter compares for zero length NSEC3 + salt. + - Fix unbound dnstap socket test program analyzer warnings about + unused variable assignments and variable initialization. + 25 September 2024: Wouter - Fix #1144: [FR] log timestamps in ISO8601 format with timezone. This adds the option `log-time-iso: yes` that logs in ISO8601 diff --git a/doc/example.conf.in b/doc/example.conf.in index 2d16ee75f..06e2b4ba8 100644 --- a/doc/example.conf.in +++ b/doc/example.conf.in @@ -920,6 +920,7 @@ server: # tls-service-pem: "path/to/publiccertfile.pem" # tls-port: 853 # https-port: 443 + # quic-port: 853 # cipher setting for TLSv1.2 # tls-ciphers: "DHE-RSA-AES256-GCM-SHA384:DHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-SHA256:DHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384:ECDHE-RSA-AES128-SHA256" @@ -984,6 +985,9 @@ server: # Disable TLS for DNS-over-HTTP downstream service. # http-notls-downstream: no + # Maximum number of bytes used for QUIC buffers. + # quic-size: 8m + # The interfaces that use these listed port numbers will support and # expect PROXYv2. For UDP and TCP/TLS interfaces. # proxy-protocol-port: portno for each of the port numbers. diff --git a/doc/unbound-control.8.in b/doc/unbound-control.8.in index 17073f938..6470d544c 100644 --- a/doc/unbound-control.8.in +++ b/doc/unbound-control.8.in @@ -606,6 +606,10 @@ queries waiting for request stream completion. Memory in bytes used by the HTTP/2 response buffers. Containing DNS responses waiting to be written back to the clients. .TP +.I mem.quic +Memory in bytes used by QUIC. Containing connection information, stream +information, queries read and responses written back to the clients. +.TP .I histogram...to.. Shows a histogram, summed over all threads. Every element counts the recursive queries whose reply time fit between the lower and upper bound. @@ -654,6 +658,10 @@ Number of queries that were made using HTTPS towards the Unbound server. These are also counted in num.query.tcp and num.query.tls, because HTTPS uses TLS and TCP. .TP +.I num.query.quic +Number of queries that were made using QUIC towards the Unbound server. +These are also counted in num.query.tls, because TLS is used for these queries. +.TP .I num.query.ipv6 Number of queries that were made using IPv6 towards the Unbound server. .TP diff --git a/doc/unbound.conf.5.in b/doc/unbound.conf.5.in index bc48db478..da494087c 100644 --- a/doc/unbound.conf.5.in +++ b/doc/unbound.conf.5.in @@ -566,6 +566,9 @@ tls\-system\-cert to load CA certs, otherwise the connections cannot be authenticated. This option enables TLS for all of them, but if you do not set this you can configure TLS specifically for some forward zones with forward\-tls\-upstream. And also with stub\-tls\-upstream. +If the tls\-upstream option is enabled, it is for all the forwards and stubs, +where the forward\-tls\-upstream and stub\-tls\-upstream options are ignored, +as if they had been set to yes. .TP .B ssl\-upstream: \fI Alternate syntax for \fBtls\-upstream\fR. If both are present in the config @@ -716,6 +719,18 @@ PROXYv2 is supported for UDP and TCP/TLS listening interfaces. There is no support for PROXYv2 on a DoH or DNSCrypt listening interface. Can list multiple, each on a new statement. .TP +.B quic\-port: \fI +The port number on which to provide DNS-over-QUIC service, default 853, only +interfaces configured with that port number as @number get the QUIC service. +The interface uses QUIC for the UDP traffic on that port number. +.TP +.B quic\-size: \fI +Maximum number of bytes for all QUIC buffers and data combined. Default is 8 +megabytes. A plain number is in bytes, append 'k', 'm' or 'g' for kilobytes, +megabytes or gigabytes (1024*1024 bytes in a megabyte). New connections receive +connection refused when the limit is exceeded. New streams are reset when the +limit is exceeded. +.TP .B use\-systemd: \fI Enable or disable systemd socket activation. Default is no. diff --git a/iterator/iter_utils.c b/iterator/iter_utils.c index 1b4f5f6eb..cacba420e 100644 --- a/iterator/iter_utils.c +++ b/iterator/iter_utils.c @@ -1564,3 +1564,45 @@ void iterator_set_ip46_support(struct module_stack* mods, if(outnet->num_ip6 == 0) ie->supports_ipv6 = 0; } + +void +limit_nsec_ttl(struct dns_msg* msg) +{ + /* Limit NSEC and NSEC3 TTL in response, RFC9077 */ + size_t i; + int found = 0; + time_t soa_ttl = 0; + /* Limit the NSEC and NSEC3 TTL values to the SOA TTL and SOA minimum + * TTL. That has already been applied to the SOA record ttl. */ + for(i=0; irep->rrset_count; i++) { + struct ub_packed_rrset_key* s = msg->rep->rrsets[i]; + if(ntohs(s->rk.type) == LDNS_RR_TYPE_SOA) { + struct packed_rrset_data* soadata = (struct packed_rrset_data*)s->entry.data; + found = 1; + soa_ttl = soadata->ttl; + break; + } + } + if(!found) + return; + for(i=0; irep->rrset_count; i++) { + struct ub_packed_rrset_key* s = msg->rep->rrsets[i]; + if(ntohs(s->rk.type) == LDNS_RR_TYPE_NSEC || + ntohs(s->rk.type) == LDNS_RR_TYPE_NSEC3) { + struct packed_rrset_data* data = (struct packed_rrset_data*)s->entry.data; + /* Limit the negative TTL. */ + if(data->ttl > soa_ttl) { + if(verbosity >= VERB_ALGO) { + char buf[256]; + snprintf(buf, sizeof(buf), + "limiting TTL %d of %s record to the SOA TTL of %d for", + (int)data->ttl, ((ntohs(s->rk.type) == LDNS_RR_TYPE_NSEC)?"NSEC":"NSEC3"), (int)soa_ttl); + log_nametypeclass(VERB_ALGO, buf, + s->rk.dname, ntohs(s->rk.type), + ntohs(s->rk.rrset_class)); + } + data->ttl = soa_ttl; + } + } + } +} diff --git a/iterator/iter_utils.h b/iterator/iter_utils.h index 4024629e6..0361e4377 100644 --- a/iterator/iter_utils.h +++ b/iterator/iter_utils.h @@ -428,4 +428,11 @@ int iter_stub_fwd_no_cache(struct module_qstate *qstate, void iterator_set_ip46_support(struct module_stack* mods, struct module_env* env, struct outside_network* outnet); +/** + * Limit NSEC and NSEC3 TTL in response, RFC9077 + * @param msg: dns message, the SOA record ttl is used to restrict ttls + * of NSEC and NSEC3 RRsets. If no SOA record, nothing happens. + */ +void limit_nsec_ttl(struct dns_msg* msg); + #endif /* ITERATOR_ITER_UTILS_H */ diff --git a/iterator/iterator.c b/iterator/iterator.c index 0b66db8a6..59e4b36ce 100644 --- a/iterator/iterator.c +++ b/iterator/iterator.c @@ -372,48 +372,6 @@ error_response_cache(struct module_qstate* qstate, int id, int rcode) return error_response(qstate, id, rcode); } -/** limit NSEC and NSEC3 TTL in response, RFC9077 */ -static void -limit_nsec_ttl(struct dns_msg* msg) -{ - size_t i; - int found = 0; - time_t soa_ttl = 0; - /* Limit the NSEC and NSEC3 TTL values to the SOA TTL and SOA minimum - * TTL. That has already been applied to the SOA record ttl. */ - for(i=0; irep->rrset_count; i++) { - struct ub_packed_rrset_key* s = msg->rep->rrsets[i]; - if(ntohs(s->rk.type) == LDNS_RR_TYPE_SOA) { - struct packed_rrset_data* soadata = (struct packed_rrset_data*)s->entry.data; - found = 1; - soa_ttl = soadata->ttl; - break; - } - } - if(!found) - return; - for(i=0; irep->rrset_count; i++) { - struct ub_packed_rrset_key* s = msg->rep->rrsets[i]; - if(ntohs(s->rk.type) == LDNS_RR_TYPE_NSEC || - ntohs(s->rk.type) == LDNS_RR_TYPE_NSEC3) { - struct packed_rrset_data* data = (struct packed_rrset_data*)s->entry.data; - /* Limit the negative TTL. */ - if(data->ttl > soa_ttl) { - if(verbosity >= VERB_ALGO) { - char buf[256]; - snprintf(buf, sizeof(buf), - "limiting TTL %d of %s record to the SOA TTL of %d for", - (int)data->ttl, ((ntohs(s->rk.type) == LDNS_RR_TYPE_NSEC)?"NSEC":"NSEC3"), (int)soa_ttl); - log_nametypeclass(VERB_ALGO, buf, - s->rk.dname, ntohs(s->rk.type), - ntohs(s->rk.rrset_class)); - } - data->ttl = soa_ttl; - } - } - } -} - /** check if prepend item is duplicate item */ static int prepend_is_duplicate(struct ub_packed_rrset_key** sets, size_t to, @@ -456,8 +414,11 @@ iter_prepend(struct iter_qstate* iq, struct dns_msg* msg, num_an = 0; for(p = iq->an_prepend_list; p; p = p->next) { sets[num_an++] = p->rrset; - if(ub_packed_rrset_ttl(p->rrset) < msg->rep->ttl) + if(ub_packed_rrset_ttl(p->rrset) < msg->rep->ttl) { msg->rep->ttl = ub_packed_rrset_ttl(p->rrset); + msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl); + msg->rep->serve_expired_ttl = msg->rep->ttl + SERVE_EXPIRED_TTL; + } } memcpy(sets+num_an, msg->rep->rrsets, msg->rep->an_numrrsets * sizeof(struct ub_packed_rrset_key*)); @@ -470,8 +431,11 @@ iter_prepend(struct iter_qstate* iq, struct dns_msg* msg, msg->rep->ns_numrrsets, p->rrset)) continue; sets[msg->rep->an_numrrsets + num_an + num_ns++] = p->rrset; - if(ub_packed_rrset_ttl(p->rrset) < msg->rep->ttl) + if(ub_packed_rrset_ttl(p->rrset) < msg->rep->ttl) { msg->rep->ttl = ub_packed_rrset_ttl(p->rrset); + msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl); + msg->rep->serve_expired_ttl = msg->rep->ttl + SERVE_EXPIRED_TTL; + } } memcpy(sets + num_an + msg->rep->an_numrrsets + num_ns, msg->rep->rrsets + msg->rep->an_numrrsets, diff --git a/libunbound/libworker.c b/libunbound/libworker.c index 94b644a49..da7d4c224 100644 --- a/libunbound/libworker.c +++ b/libunbound/libworker.c @@ -1058,3 +1058,19 @@ void dtio_mainfdcallback(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), log_assert(0); } #endif + +#ifdef HAVE_NGTCP2 +void doq_client_event_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif + +#ifdef HAVE_NGTCP2 +void doq_client_timer_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif diff --git a/libunbound/unbound.h b/libunbound/unbound.h index bb8e8acf0..ef2c5c067 100644 --- a/libunbound/unbound.h +++ b/libunbound/unbound.h @@ -845,6 +845,10 @@ struct ub_server_stats { long long qtls_resume; /** RPZ action stats */ long long rpz_action[UB_STATS_RPZ_ACTION_NUM]; + /** number of bytes in QUIC buffers */ + long long mem_quic; + /** number of queries over (DNS over) QUIC */ + long long qquic; }; /** diff --git a/services/cache/dns.c b/services/cache/dns.c index 8a08d3a30..7ab63bacf 100644 --- a/services/cache/dns.c +++ b/services/cache/dns.c @@ -398,7 +398,7 @@ cache_fill_missing(struct module_env* env, uint16_t qclass, } } akey = rrset_cache_lookup(env->rrset_cache, ns->name, - ns->namelen, LDNS_RR_TYPE_AAAA, qclass, 0, now, 0); + ns->namelen, LDNS_RR_TYPE_AAAA, qclass, flags, now, 0); if(akey) { if(!delegpt_add_rrset_AAAA(dp, region, akey, ns->lame, NULL)) { diff --git a/services/listen_dnsport.c b/services/listen_dnsport.c index 6c0691f2a..5dbac3650 100644 --- a/services/listen_dnsport.c +++ b/services/listen_dnsport.c @@ -56,9 +56,11 @@ #include "util/net_help.h" #include "sldns/sbuffer.h" #include "sldns/parseutil.h" +#include "sldns/wire2str.h" #include "services/mesh.h" #include "util/fptr_wlist.h" #include "util/locks.h" +#include "util/timeval_func.h" #ifdef HAVE_NETDB_H #include @@ -79,9 +81,30 @@ #ifdef HAVE_NET_IF_H #include #endif + +#ifdef HAVE_TIME_H +#include +#endif +#include + +#ifdef HAVE_NGTCP2 +#include +#include +#ifdef HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H +#include +#else +#include +#endif +#endif + +#ifdef HAVE_OPENSSL_SSL_H +#include +#endif + #ifdef HAVE_LINUX_NET_TSTAMP_H #include #endif + /** number of queued TCP connections for listen() */ #define TCP_BACKLOG 256 @@ -109,9 +132,11 @@ static int http2_response_buffer_lock_inited = 0; /** * Debug print of the getaddrinfo returned address. * @param addr: the address returned. + * @param additional: additional text that describes the type of socket, + * or NULL for no text. */ static void -verbose_print_addr(struct addrinfo *addr) +verbose_print_addr(struct addrinfo *addr, const char* additional) { if(verbosity >= VERB_ALGO) { char buf[100]; @@ -126,13 +151,14 @@ verbose_print_addr(struct addrinfo *addr) (void)strlcpy(buf, "(null)", sizeof(buf)); } buf[sizeof(buf)-1] = 0; - verbose(VERB_ALGO, "creating %s%s socket %s %d", + verbose(VERB_ALGO, "creating %s%s socket %s %d%s%s", addr->ai_socktype==SOCK_DGRAM?"udp": addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto", addr->ai_family==AF_INET?"4": addr->ai_family==AF_INET6?"6": "_otherfam", buf, - ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port)); + ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port), + (additional?" ":""), (additional?additional:"")); } } @@ -673,7 +699,7 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, int create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, int* reuseport, int transparent, int mss, int nodelay, int freebind, - int use_systemd, int dscp) + int use_systemd, int dscp, const char* additional) { int s = -1; char* err; @@ -692,7 +718,7 @@ create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, #if !defined(IP_FREEBIND) (void)freebind; #endif - verbose_print_addr(addr); + verbose_print_addr(addr, additional); *noproto = 0; #ifdef HAVE_SYSTEMD if (!use_systemd || @@ -1008,7 +1034,8 @@ static int make_sock(int stype, const char* ifname, const char* port, struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, - int use_systemd, int dscp, struct unbound_socket* ub_sock) + int use_systemd, int dscp, struct unbound_socket* ub_sock, + const char* additional) { struct addrinfo *res = NULL; int r, s, inuse, noproto; @@ -1032,7 +1059,7 @@ make_sock(int stype, const char* ifname, const char* port, return -1; } if(stype == SOCK_DGRAM) { - verbose_print_addr(res); + verbose_print_addr(res, additional); s = create_udp_sock(res->ai_family, res->ai_socktype, (struct sockaddr*)res->ai_addr, res->ai_addrlen, v6only, &inuse, &noproto, (int)rcv, (int)snd, 1, @@ -1045,7 +1072,7 @@ make_sock(int stype, const char* ifname, const char* port, } else { s = create_tcp_accept_sock(res, v6only, &noproto, reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd, - dscp); + dscp, additional); if(s == -1 && noproto && hints->ai_family == AF_INET6){ *noip6 = 1; } @@ -1079,7 +1106,8 @@ static int make_sock_port(int stype, const char* ifname, const char* port, struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, - int use_systemd, int dscp, struct unbound_socket* ub_sock) + int use_systemd, int dscp, struct unbound_socket* ub_sock, + const char* additional) { char* s = strchr(ifname, '@'); if(s) { @@ -1102,11 +1130,11 @@ make_sock_port(int stype, const char* ifname, const char* port, p[strlen(s+1)]=0; return make_sock(stype, newif, p, hints, v6only, noip6, rcv, snd, reuseport, transparent, tcp_mss, nodelay, freebind, - use_systemd, dscp, ub_sock); + use_systemd, dscp, ub_sock, additional); } return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd, reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd, - dscp, ub_sock); + dscp, ub_sock, additional); } /** @@ -1254,6 +1282,8 @@ if_is_ssl(const char* ifname, const char* port, int ssl_port, * @param use_systemd: if true, fetch sockets from systemd. * @param dnscrypt_port: dnscrypt service port number * @param dscp: DSCP to use. + * @param quic_port: dns over quic port number. + * @param http_notls_downstream: if no tls is used for https downstream. * @param sock_queue_timeout: the sock_queue_timeout from config. Seconds to * wait to discard if UDP packets have waited for long in the socket * buffer. @@ -1267,7 +1297,7 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, struct config_strlist* proxy_protocol_port, int* reuseport, int transparent, int tcp_mss, int freebind, int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp, - int sock_queue_timeout) + int quic_port, int http_notls_downstream, int sock_queue_timeout) { int s, noip6=0; int is_https = if_is_https(ifname, port, https_port); @@ -1275,6 +1305,8 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, int is_pp2 = if_is_pp2(ifname, port, proxy_protocol_port); int nodelay = is_https && http2_nodelay; struct unbound_socket* ub_sock; + int is_doq = if_is_quic(ifname, port, quic_port); + const char* add = NULL; if(!do_udp && !do_tcp) return 0; @@ -1286,6 +1318,9 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, } else if(is_https) { fatal_exit("PROXYv2 and DoH combination not " "supported!"); + } else if(is_doq) { + fatal_exit("PROXYv2 and DoQ combination not " + "supported!"); } } @@ -1295,7 +1330,8 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, return 0; if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, &noip6, rcv, snd, reuseport, transparent, - tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) { + tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock, + (is_dnscrypt?"udpancil_dnscrypt":"udpancil"))) == -1) { free(ub_sock->addr); free(ub_sock); if(noip6) { @@ -1323,13 +1359,36 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, return 0; } } else if(do_udp) { + enum listen_type udp_port_type; ub_sock = calloc(1, sizeof(struct unbound_socket)); if(!ub_sock) return 0; + if(is_dnscrypt) { + udp_port_type = listen_type_udp_dnscrypt; + add = "dnscrypt"; + } else if(is_doq) { + udp_port_type = listen_type_doq; + add = "doq"; + if(((strchr(ifname, '@') && + atoi(strchr(ifname, '@')+1) == 53) || + (!strchr(ifname, '@') && atoi(port) == 53))) { + log_err("DNS over QUIC is not allowed on " + "port 53. Port 53 is for DNS " + "datagrams. Error for " + "interface '%s'.", ifname); + free(ub_sock->addr); + free(ub_sock); + return 0; + } + } else { + udp_port_type = listen_type_udp; + add = NULL; + } /* regular udp socket */ if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, &noip6, rcv, snd, reuseport, transparent, - tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) { + tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock, + add)) == -1) { free(ub_sock->addr); free(ub_sock); if(noip6) { @@ -1338,14 +1397,25 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, } return 0; } - if (sock_queue_timeout && !set_recvtimestamp(s)) { - log_warn("socket timestamping is not available"); + if(udp_port_type == listen_type_doq) { + if(!set_recvpktinfo(s, hints->ai_family)) { + sock_close(s); + free(ub_sock->addr); + free(ub_sock); + return 0; + } } - if(!port_insert(list, s, is_dnscrypt - ?listen_type_udp_dnscrypt : - (sock_queue_timeout ? - listen_type_udpancil:listen_type_udp), - is_pp2, ub_sock)) { + if(udp_port_type == listen_type_udp && sock_queue_timeout) + udp_port_type = listen_type_udpancil; + if (sock_queue_timeout) { + if(!set_recvtimestamp(s)) { + log_warn("socket timestamping is not available"); + } else { + if(udp_port_type == listen_type_udp) + udp_port_type = listen_type_udpancil; + } + } + if(!port_insert(list, s, udp_port_type, is_pp2, ub_sock)) { sock_close(s); free(ub_sock->addr); free(ub_sock); @@ -1359,17 +1429,24 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, ub_sock = calloc(1, sizeof(struct unbound_socket)); if(!ub_sock) return 0; - if(is_ssl) + if(is_ssl) { port_type = listen_type_ssl; - else if(is_https) + add = "tls"; + } else if(is_https) { port_type = listen_type_http; - else if(is_dnscrypt) + add = "https"; + if(http_notls_downstream) + add = "http"; + } else if(is_dnscrypt) { port_type = listen_type_tcp_dnscrypt; - else + add = "dnscrypt"; + } else { port_type = listen_type_tcp; + add = NULL; + } if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, &noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay, - freebind, use_systemd, dscp, ub_sock)) == -1) { + freebind, use_systemd, dscp, ub_sock, add)) == -1) { free(ub_sock->addr); free(ub_sock); if(noip6) { @@ -1446,8 +1523,10 @@ listen_create(struct comm_base* base, struct listen_port* ports, size_t bufsize, int tcp_accept_count, int tcp_idle_timeout, int harden_large_queries, uint32_t http_max_streams, char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit, - void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb, - void *cb_arg) + void* sslctx, struct dt_env* dtenv, struct doq_table* doq_table, + struct ub_randstate* rnd, const char* ssl_service_key, + const char* ssl_service_pem, struct config_file* cfg, + comm_point_callback_type* cb, void *cb_arg) { struct listen_dnsport* front = (struct listen_dnsport*) malloc(sizeof(struct listen_dnsport)); @@ -1471,6 +1550,16 @@ listen_create(struct comm_base* base, struct listen_port* ports, cp = comm_point_create_udp(base, ports->fd, front->udp_buff, ports->pp2_enabled, cb, cb_arg, ports->socket); + } else if(ports->ftype == listen_type_doq) { +#ifndef HAVE_NGTCP2 + log_warn("Unbound is not compiled with " + "ngtcp2. This is required to use DNS " + "over QUIC."); +#endif + cp = comm_point_create_doq(base, ports->fd, + front->udp_buff, cb, cb_arg, ports->socket, + doq_table, rnd, ssl_service_key, + ssl_service_pem, cfg); } else if(ports->ftype == listen_type_tcp || ports->ftype == listen_type_tcp_dnscrypt) { cp = comm_point_create_tcp(base, ports->fd, @@ -1858,7 +1947,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, reuseport, cfg->ip_transparent, cfg->tcp_mss, cfg->ip_freebind, cfg->http_nodelay, cfg->use_systemd, - cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { + cfg->dnscrypt_port, cfg->ip_dscp, + cfg->quic_port, cfg->http_notls_downstream, + cfg->sock_queue_timeout)) { listening_ports_free(list); return NULL; } @@ -1875,7 +1966,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, reuseport, cfg->ip_transparent, cfg->tcp_mss, cfg->ip_freebind, cfg->http_nodelay, cfg->use_systemd, - cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { + cfg->dnscrypt_port, cfg->ip_dscp, + cfg->quic_port, cfg->http_notls_downstream, + cfg->sock_queue_timeout)) { listening_ports_free(list); return NULL; } @@ -1894,7 +1987,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, reuseport, cfg->ip_transparent, cfg->tcp_mss, cfg->ip_freebind, cfg->http_nodelay, cfg->use_systemd, - cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { + cfg->dnscrypt_port, cfg->ip_dscp, + cfg->quic_port, cfg->http_notls_downstream, + cfg->sock_queue_timeout)) { listening_ports_free(list); return NULL; } @@ -1910,7 +2005,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, reuseport, cfg->ip_transparent, cfg->tcp_mss, cfg->ip_freebind, cfg->http_nodelay, cfg->use_systemd, - cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { + cfg->dnscrypt_port, cfg->ip_dscp, + cfg->quic_port, cfg->http_notls_downstream, + cfg->sock_queue_timeout)) { listening_ports_free(list); return NULL; } @@ -1928,7 +2025,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, reuseport, cfg->ip_transparent, cfg->tcp_mss, cfg->ip_freebind, cfg->http_nodelay, cfg->use_systemd, - cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { + cfg->dnscrypt_port, cfg->ip_dscp, + cfg->quic_port, cfg->http_notls_downstream, + cfg->sock_queue_timeout)) { listening_ports_free(list); return NULL; } @@ -1944,7 +2043,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, reuseport, cfg->ip_transparent, cfg->tcp_mss, cfg->ip_freebind, cfg->http_nodelay, cfg->use_systemd, - cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { + cfg->dnscrypt_port, cfg->ip_dscp, + cfg->quic_port, cfg->http_notls_downstream, + cfg->sock_queue_timeout)) { listening_ports_free(list); return NULL; } @@ -3154,3 +3255,2368 @@ nghttp2_session_callbacks* http2_req_callbacks_create(void) return callbacks; } #endif /* HAVE_NGHTTP2 */ + +#ifdef HAVE_NGTCP2 +struct doq_table* +doq_table_create(struct config_file* cfg, struct ub_randstate* rnd) +{ + struct doq_table* table = calloc(1, sizeof(*table)); + if(!table) + return NULL; + table->idle_timeout = ((uint64_t)cfg->tcp_idle_timeout)* + NGTCP2_MILLISECONDS; + table->sv_scidlen = 16; + table->static_secret_len = 16; + table->static_secret = malloc(table->static_secret_len); + if(!table->static_secret) { + free(table); + return NULL; + } + doq_fill_rand(rnd, table->static_secret, table->static_secret_len); + table->conn_tree = rbtree_create(doq_conn_cmp); + if(!table->conn_tree) { + free(table->static_secret); + free(table); + return NULL; + } + table->conid_tree = rbtree_create(doq_conid_cmp); + if(!table->conid_tree) { + free(table->static_secret); + free(table->conn_tree); + free(table); + return NULL; + } + table->timer_tree = rbtree_create(doq_timer_cmp); + if(!table->timer_tree) { + free(table->static_secret); + free(table->conn_tree); + free(table->conid_tree); + free(table); + return NULL; + } + lock_rw_init(&table->lock); + lock_rw_init(&table->conid_lock); + lock_basic_init(&table->size_lock); + lock_protect(&table->lock, &table->static_secret, + sizeof(table->static_secret)); + lock_protect(&table->lock, &table->static_secret_len, + sizeof(table->static_secret_len)); + lock_protect(&table->lock, table->static_secret, + table->static_secret_len); + lock_protect(&table->lock, &table->sv_scidlen, + sizeof(table->sv_scidlen)); + lock_protect(&table->lock, &table->idle_timeout, + sizeof(table->idle_timeout)); + lock_protect(&table->lock, &table->conn_tree, sizeof(table->conn_tree)); + lock_protect(&table->lock, table->conn_tree, sizeof(*table->conn_tree)); + lock_protect(&table->conid_lock, table->conid_tree, + sizeof(*table->conid_tree)); + lock_protect(&table->lock, table->timer_tree, + sizeof(*table->timer_tree)); + lock_protect(&table->size_lock, &table->current_size, + sizeof(table->current_size)); + return table; +} + +/** delete elements from the connection tree */ +static void +conn_tree_del(rbnode_type* node, void* arg) +{ + struct doq_table* table = (struct doq_table*)arg; + struct doq_conn* conn; + if(!node) + return; + conn = (struct doq_conn*)node->key; + if(conn->timer.timer_in_list) { + /* Remove timer from list first, because finding the rbnode + * element of the setlist of same timeouts needs tree lookup. + * Edit the tree structure after that lookup. */ + doq_timer_list_remove(conn->table, &conn->timer); + } + if(conn->timer.timer_in_tree) + doq_timer_tree_remove(conn->table, &conn->timer); + doq_table_quic_size_subtract(table, sizeof(*conn)+conn->key.dcidlen); + doq_conn_delete(conn, table); +} + +/** delete elements from the connection id tree */ +static void +conid_tree_del(rbnode_type* node, void* ATTR_UNUSED(arg)) +{ + if(!node) + return; + doq_conid_delete((struct doq_conid*)node->key); +} + +void +doq_table_delete(struct doq_table* table) +{ + if(!table) + return; + lock_rw_destroy(&table->lock); + free(table->static_secret); + if(table->conn_tree) { + traverse_postorder(table->conn_tree, conn_tree_del, table); + free(table->conn_tree); + } + lock_rw_destroy(&table->conid_lock); + if(table->conid_tree) { + /* The tree should be empty, because the doq_conn_delete calls + * above should have also removed their conid elements. */ + traverse_postorder(table->conid_tree, conid_tree_del, NULL); + free(table->conid_tree); + } + lock_basic_destroy(&table->size_lock); + if(table->timer_tree) { + /* The tree should be empty, because the conn_tree_del calls + * above should also have removed them. Also the doq_timer + * is part of the doq_conn struct, so is already freed. */ + free(table->timer_tree); + } + table->write_list_first = NULL; + table->write_list_last = NULL; + free(table); +} + +struct doq_timer* +doq_timer_find_time(struct doq_table* table, struct timeval* tv) +{ + struct doq_timer key; + struct rbnode_type* node; + memset(&key, 0, sizeof(key)); + key.time.tv_sec = tv->tv_sec; + key.time.tv_usec = tv->tv_usec; + node = rbtree_search(table->timer_tree, &key); + if(node) + return (struct doq_timer*)node->key; + return NULL; +} + +void +doq_timer_tree_remove(struct doq_table* table, struct doq_timer* timer) +{ + if(!timer->timer_in_tree) + return; + rbtree_delete(table->timer_tree, timer); + timer->timer_in_tree = 0; + /* This item could have more timers in the same set. */ + if(timer->setlist_first) { + struct doq_timer* rb_timer = timer->setlist_first; + /* del first element from setlist */ + if(rb_timer->setlist_next) + rb_timer->setlist_next->setlist_prev = NULL; + else + timer->setlist_last = NULL; + timer->setlist_first = rb_timer->setlist_next; + rb_timer->setlist_prev = NULL; + rb_timer->setlist_next = NULL; + rb_timer->timer_in_list = 0; + /* insert it into the tree as new rb element */ + memset(&rb_timer->node, 0, sizeof(rb_timer->node)); + rb_timer->node.key = rb_timer; + rbtree_insert(table->timer_tree, &rb_timer->node); + rb_timer->timer_in_tree = 1; + /* the setlist, if any remainder, moves to the rb element */ + rb_timer->setlist_first = timer->setlist_first; + rb_timer->setlist_last = timer->setlist_last; + timer->setlist_first = NULL; + timer->setlist_last = NULL; + rb_timer->worker_doq_socket = timer->worker_doq_socket; + } + timer->worker_doq_socket = NULL; +} + +void +doq_timer_list_remove(struct doq_table* table, struct doq_timer* timer) +{ + struct doq_timer* rb_timer; + if(!timer->timer_in_list) + return; + /* The item in the rbtree has the list start and end. */ + rb_timer = doq_timer_find_time(table, &timer->time); + if(rb_timer) { + if(timer->setlist_prev) + timer->setlist_prev->setlist_next = timer->setlist_next; + else + rb_timer->setlist_first = timer->setlist_next; + if(timer->setlist_next) + timer->setlist_next->setlist_prev = timer->setlist_prev; + else + rb_timer->setlist_last = timer->setlist_prev; + timer->setlist_prev = NULL; + timer->setlist_next = NULL; + } + timer->timer_in_list = 0; +} + +/** doq append timer to setlist */ +static void +doq_timer_list_append(struct doq_timer* rb_timer, struct doq_timer* timer) +{ + log_assert(timer->timer_in_list == 0); + timer->timer_in_list = 1; + timer->setlist_next = NULL; + timer->setlist_prev = rb_timer->setlist_last; + if(rb_timer->setlist_last) + rb_timer->setlist_last->setlist_next = timer; + else + rb_timer->setlist_first = timer; + rb_timer->setlist_last = timer; +} + +void +doq_timer_unset(struct doq_table* table, struct doq_timer* timer) +{ + if(timer->timer_in_list) { + /* Remove timer from list first, because finding the rbnode + * element of the setlist of same timeouts needs tree lookup. + * Edit the tree structure after that lookup. */ + doq_timer_list_remove(table, timer); + } + if(timer->timer_in_tree) + doq_timer_tree_remove(table, timer); + timer->worker_doq_socket = NULL; +} + +void doq_timer_set(struct doq_table* table, struct doq_timer* timer, + struct doq_server_socket* worker_doq_socket, struct timeval* tv) +{ + struct doq_timer* rb_timer; + if(verbosity >= VERB_ALGO && timer->conn) { + char a[256]; + struct timeval rel; + addr_to_str((void*)&timer->conn->key.paddr.addr, + timer->conn->key.paddr.addrlen, a, sizeof(a)); + timeval_subtract(&rel, tv, worker_doq_socket->now_tv); + verbose(VERB_ALGO, "doq %s timer set %d.%6.6d in %d.%6.6d", + a, (int)tv->tv_sec, (int)tv->tv_usec, + (int)rel.tv_sec, (int)rel.tv_usec); + } + if(timer->timer_in_tree || timer->timer_in_list) { + if(timer->time.tv_sec == tv->tv_sec && + timer->time.tv_usec == tv->tv_usec) + return; /* already set on that time */ + doq_timer_unset(table, timer); + } + timer->time.tv_sec = tv->tv_sec; + timer->time.tv_usec = tv->tv_usec; + rb_timer = doq_timer_find_time(table, tv); + if(rb_timer) { + /* There is a timeout already with this value. Timer is + * added to the setlist. */ + doq_timer_list_append(rb_timer, timer); + } else { + /* There is no timeout with this value. Make timer a new + * tree element. */ + memset(&timer->node, 0, sizeof(timer->node)); + timer->node.key = timer; + rbtree_insert(table->timer_tree, &timer->node); + timer->timer_in_tree = 1; + timer->setlist_first = NULL; + timer->setlist_last = NULL; + timer->worker_doq_socket = worker_doq_socket; + } +} + +struct doq_conn* +doq_conn_create(struct comm_point* c, struct doq_pkt_addr* paddr, + const uint8_t* dcid, size_t dcidlen, uint32_t version) +{ + struct doq_conn* conn = calloc(1, sizeof(*conn)); + if(!conn) + return NULL; + conn->node.key = conn; + conn->doq_socket = c->doq_socket; + conn->table = c->doq_socket->table; + memmove(&conn->key.paddr.addr, &paddr->addr, paddr->addrlen); + conn->key.paddr.addrlen = paddr->addrlen; + memmove(&conn->key.paddr.localaddr, &paddr->localaddr, + paddr->localaddrlen); + conn->key.paddr.localaddrlen = paddr->localaddrlen; + conn->key.paddr.ifindex = paddr->ifindex; + conn->key.dcid = memdup((void*)dcid, dcidlen); + if(!conn->key.dcid) { + free(conn); + return NULL; + } + conn->key.dcidlen = dcidlen; + conn->version = version; +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_default(&conn->ccerr); +#else + ngtcp2_connection_close_error_default(&conn->last_error); +#endif + rbtree_init(&conn->stream_tree, &doq_stream_cmp); + conn->timer.conn = conn; + lock_basic_init(&conn->lock); + lock_protect(&conn->lock, &conn->key, sizeof(conn->key)); + lock_protect(&conn->lock, &conn->doq_socket, sizeof(conn->doq_socket)); + lock_protect(&conn->lock, &conn->table, sizeof(conn->table)); + lock_protect(&conn->lock, &conn->is_deleted, sizeof(conn->is_deleted)); + lock_protect(&conn->lock, &conn->version, sizeof(conn->version)); + lock_protect(&conn->lock, &conn->conn, sizeof(conn->conn)); + lock_protect(&conn->lock, &conn->conid_list, sizeof(conn->conid_list)); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + lock_protect(&conn->lock, &conn->ccerr, sizeof(conn->ccerr)); +#else + lock_protect(&conn->lock, &conn->last_error, sizeof(conn->last_error)); +#endif + lock_protect(&conn->lock, &conn->tls_alert, sizeof(conn->tls_alert)); + lock_protect(&conn->lock, &conn->ssl, sizeof(conn->ssl)); + lock_protect(&conn->lock, &conn->close_pkt, sizeof(conn->close_pkt)); + lock_protect(&conn->lock, &conn->close_pkt_len, sizeof(conn->close_pkt_len)); + lock_protect(&conn->lock, &conn->close_ecn, sizeof(conn->close_ecn)); + lock_protect(&conn->lock, &conn->stream_tree, sizeof(conn->stream_tree)); + lock_protect(&conn->lock, &conn->stream_write_first, sizeof(conn->stream_write_first)); + lock_protect(&conn->lock, &conn->stream_write_last, sizeof(conn->stream_write_last)); + lock_protect(&conn->lock, &conn->write_interest, sizeof(conn->write_interest)); + lock_protect(&conn->lock, &conn->on_write_list, sizeof(conn->on_write_list)); + lock_protect(&conn->lock, &conn->write_prev, sizeof(conn->write_prev)); + lock_protect(&conn->lock, &conn->write_next, sizeof(conn->write_next)); + return conn; +} + +/** delete stream tree node */ +static void +stream_tree_del(rbnode_type* node, void* arg) +{ + struct doq_table* table = (struct doq_table*)arg; + struct doq_stream* stream; + if(!node) + return; + stream = (struct doq_stream*)node; + if(stream->in) + doq_table_quic_size_subtract(table, stream->inlen); + if(stream->out) + doq_table_quic_size_subtract(table, stream->outlen); + doq_table_quic_size_subtract(table, sizeof(*stream)); + doq_stream_delete(stream); +} + +void +doq_conn_delete(struct doq_conn* conn, struct doq_table* table) +{ + if(!conn) + return; + lock_basic_destroy(&conn->lock); + lock_rw_wrlock(&conn->table->conid_lock); + doq_conn_clear_conids(conn); + lock_rw_unlock(&conn->table->conid_lock); + ngtcp2_conn_del(conn->conn); + if(conn->stream_tree.count != 0) { + traverse_postorder(&conn->stream_tree, stream_tree_del, table); + } + free(conn->key.dcid); + SSL_free(conn->ssl); + free(conn->close_pkt); + free(conn); +} + +int +doq_conn_cmp(const void* key1, const void* key2) +{ + struct doq_conn* c = (struct doq_conn*)key1; + struct doq_conn* d = (struct doq_conn*)key2; + int r; + /* Compared in the order destination address, then + * local address, ifindex and then dcid. + * So that for a search for findlessorequal for the destination + * address will find connections to that address, with different + * dcids. + * Also a printout in sorted order prints the connections by IP + * address of destination, and then a number of them depending on the + * dcids. */ + if(c->key.paddr.addrlen != d->key.paddr.addrlen) { + if(c->key.paddr.addrlen < d->key.paddr.addrlen) + return -1; + return 1; + } + if((r=memcmp(&c->key.paddr.addr, &d->key.paddr.addr, + c->key.paddr.addrlen))!=0) + return r; + if(c->key.paddr.localaddrlen != d->key.paddr.localaddrlen) { + if(c->key.paddr.localaddrlen < d->key.paddr.localaddrlen) + return -1; + return 1; + } + if((r=memcmp(&c->key.paddr.localaddr, &d->key.paddr.localaddr, + c->key.paddr.localaddrlen))!=0) + return r; + if(c->key.paddr.ifindex != d->key.paddr.ifindex) { + if(c->key.paddr.ifindex < d->key.paddr.ifindex) + return -1; + return 1; + } + if(c->key.dcidlen != d->key.dcidlen) { + if(c->key.dcidlen < d->key.dcidlen) + return -1; + return 1; + } + if((r=memcmp(c->key.dcid, d->key.dcid, c->key.dcidlen))!=0) + return r; + return 0; +} + +int doq_conid_cmp(const void* key1, const void* key2) +{ + struct doq_conid* c = (struct doq_conid*)key1; + struct doq_conid* d = (struct doq_conid*)key2; + if(c->cidlen != d->cidlen) { + if(c->cidlen < d->cidlen) + return -1; + return 1; + } + return memcmp(c->cid, d->cid, c->cidlen); +} + +int doq_timer_cmp(const void* key1, const void* key2) +{ + struct doq_timer* e = (struct doq_timer*)key1; + struct doq_timer* f = (struct doq_timer*)key2; + if(e->time.tv_sec < f->time.tv_sec) + return -1; + if(e->time.tv_sec > f->time.tv_sec) + return 1; + if(e->time.tv_usec < f->time.tv_usec) + return -1; + if(e->time.tv_usec > f->time.tv_usec) + return 1; + return 0; +} + +int doq_stream_cmp(const void* key1, const void* key2) +{ + struct doq_stream* c = (struct doq_stream*)key1; + struct doq_stream* d = (struct doq_stream*)key2; + if(c->stream_id != d->stream_id) { + if(c->stream_id < d->stream_id) + return -1; + return 1; + } + return 0; +} + +/** doq store a local address in repinfo */ +static void +doq_repinfo_store_localaddr(struct comm_reply* repinfo, + struct doq_addr_storage* localaddr, socklen_t localaddrlen) +{ + /* use the pktinfo that we have for ancillary udp data otherwise, + * this saves space for a sockaddr */ + memset(&repinfo->pktinfo, 0, sizeof(repinfo->pktinfo)); + if(addr_is_ip6((void*)localaddr, localaddrlen)) { +#ifdef IPV6_PKTINFO + struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr; + memmove(&repinfo->pktinfo.v6info.ipi6_addr, + &sa6->sin6_addr, sizeof(struct in6_addr)); + repinfo->doq_srcport = sa6->sin6_port; +#endif + repinfo->srctype = 6; + } else { +#ifdef IP_PKTINFO + struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; + memmove(&repinfo->pktinfo.v4info.ipi_addr, + &sa->sin_addr, sizeof(struct in_addr)); + repinfo->doq_srcport = sa->sin_port; +#elif defined(IP_RECVDSTADDR) + struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; + memmove(&repinfo->pktinfo.v4addr, &sa->sin_addr, + sizeof(struct in_addr)); + repinfo->doq_srcport = sa->sin_port; +#endif + repinfo->srctype = 4; + } +} + +/** doq retrieve localaddr from repinfo */ +static void +doq_repinfo_retrieve_localaddr(struct comm_reply* repinfo, + struct doq_addr_storage* localaddr, socklen_t* localaddrlen) +{ + if(repinfo->srctype == 6) { +#ifdef IPV6_PKTINFO + struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr; + *localaddrlen = (socklen_t)sizeof(struct sockaddr_in6); + memset(sa6, 0, *localaddrlen); + sa6->sin6_family = AF_INET6; + memmove(&sa6->sin6_addr, &repinfo->pktinfo.v6info.ipi6_addr, + *localaddrlen); + sa6->sin6_port = repinfo->doq_srcport; +#endif + } else { +#ifdef IP_PKTINFO + struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; + *localaddrlen = (socklen_t)sizeof(struct sockaddr_in); + memset(sa, 0, *localaddrlen); + sa->sin_family = AF_INET; + memmove(&sa->sin_addr, &repinfo->pktinfo.v4info.ipi_addr, + *localaddrlen); + sa->sin_port = repinfo->doq_srcport; +#elif defined(IP_RECVDSTADDR) + struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; + *localaddrlen = (socklen_t)sizeof(struct sockaddr_in); + memset(sa, 0, *localaddrlen); + sa->sin_family = AF_INET; + memmove(&sa->sin_addr, &repinfo->pktinfo.v4addr, + sizeof(struct in_addr)); + sa->sin_port = repinfo->doq_srcport; +#endif + } +} + +/** doq write a connection key into repinfo, false if it does not fit */ +static int +doq_conn_key_store_repinfo(struct doq_conn_key* key, + struct comm_reply* repinfo) +{ + repinfo->is_proxied = 0; + repinfo->doq_ifindex = key->paddr.ifindex; + repinfo->remote_addrlen = key->paddr.addrlen; + memmove(&repinfo->remote_addr, &key->paddr.addr, + repinfo->remote_addrlen); + repinfo->client_addrlen = key->paddr.addrlen; + memmove(&repinfo->client_addr, &key->paddr.addr, + repinfo->client_addrlen); + doq_repinfo_store_localaddr(repinfo, &key->paddr.localaddr, + key->paddr.localaddrlen); + if(key->dcidlen > sizeof(repinfo->doq_dcid)) + return 0; + repinfo->doq_dcidlen = key->dcidlen; + memmove(repinfo->doq_dcid, key->dcid, key->dcidlen); + return 1; +} + +void +doq_conn_key_from_repinfo(struct doq_conn_key* key, struct comm_reply* repinfo) +{ + key->paddr.ifindex = repinfo->doq_ifindex; + key->paddr.addrlen = repinfo->remote_addrlen; + memmove(&key->paddr.addr, &repinfo->remote_addr, + repinfo->remote_addrlen); + doq_repinfo_retrieve_localaddr(repinfo, &key->paddr.localaddr, + &key->paddr.localaddrlen); + key->dcidlen = repinfo->doq_dcidlen; + key->dcid = repinfo->doq_dcid; +} + +/** doq add a stream to the connection */ +static void +doq_conn_add_stream(struct doq_conn* conn, struct doq_stream* stream) +{ + (void)rbtree_insert(&conn->stream_tree, &stream->node); +} + +/** doq delete a stream from the connection */ +static void +doq_conn_del_stream(struct doq_conn* conn, struct doq_stream* stream) +{ + (void)rbtree_delete(&conn->stream_tree, &stream->node); +} + +/** doq create new stream */ +static struct doq_stream* +doq_stream_create(int64_t stream_id) +{ + struct doq_stream* stream = calloc(1, sizeof(*stream)); + if(!stream) + return NULL; + stream->node.key = stream; + stream->stream_id = stream_id; + return stream; +} + +void doq_stream_delete(struct doq_stream* stream) +{ + if(!stream) + return; + free(stream->in); + free(stream->out); + free(stream); +} + +struct doq_stream* +doq_stream_find(struct doq_conn* conn, int64_t stream_id) +{ + rbnode_type* node; + struct doq_stream key; + key.node.key = &key; + key.stream_id = stream_id; + node = rbtree_search(&conn->stream_tree, &key); + if(node) + return (struct doq_stream*)node->key; + return NULL; +} + +/** doq put stream on the conn write list */ +static void +doq_stream_on_write_list(struct doq_conn* conn, struct doq_stream* stream) +{ + if(stream->on_write_list) + return; + stream->write_prev = conn->stream_write_last; + if(conn->stream_write_last) + conn->stream_write_last->write_next = stream; + else + conn->stream_write_first = stream; + conn->stream_write_last = stream; + stream->write_next = NULL; + stream->on_write_list = 1; +} + +/** doq remove stream from the conn write list */ +static void +doq_stream_off_write_list(struct doq_conn* conn, struct doq_stream* stream) +{ + if(!stream->on_write_list) + return; + if(stream->write_next) + stream->write_next->write_prev = stream->write_prev; + else conn->stream_write_last = stream->write_prev; + if(stream->write_prev) + stream->write_prev->write_next = stream->write_next; + else conn->stream_write_first = stream->write_next; + stream->write_prev = NULL; + stream->write_next = NULL; + stream->on_write_list = 0; +} + +/** doq stream remove in buffer */ +static void +doq_stream_remove_in_buffer(struct doq_stream* stream, struct doq_table* table) +{ + if(stream->in) { + doq_table_quic_size_subtract(table, stream->inlen); + free(stream->in); + stream->in = NULL; + stream->inlen = 0; + } +} + +/** doq stream remove out buffer */ +static void +doq_stream_remove_out_buffer(struct doq_stream* stream, + struct doq_table* table) +{ + if(stream->out) { + doq_table_quic_size_subtract(table, stream->outlen); + free(stream->out); + stream->out = NULL; + stream->outlen = 0; + } +} + +int +doq_stream_close(struct doq_conn* conn, struct doq_stream* stream, + int send_shutdown) +{ + int ret; + if(stream->is_closed) + return 1; + stream->is_closed = 1; + doq_stream_off_write_list(conn, stream); + if(send_shutdown) { + verbose(VERB_ALGO, "doq: shutdown stream_id %d with app_error_code %d", + (int)stream->stream_id, (int)DOQ_APP_ERROR_CODE); + ret = ngtcp2_conn_shutdown_stream(conn->conn, +#ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4 + 0, +#endif + stream->stream_id, DOQ_APP_ERROR_CODE); + if(ret != 0) { + log_err("doq ngtcp2_conn_shutdown_stream %d failed: %s", + (int)stream->stream_id, ngtcp2_strerror(ret)); + return 0; + } + doq_conn_write_enable(conn); + } + verbose(VERB_ALGO, "doq: conn extend max streams bidi by 1"); + ngtcp2_conn_extend_max_streams_bidi(conn->conn, 1); + doq_conn_write_enable(conn); + doq_stream_remove_in_buffer(stream, conn->doq_socket->table); + doq_stream_remove_out_buffer(stream, conn->doq_socket->table); + doq_table_quic_size_subtract(conn->doq_socket->table, sizeof(*stream)); + doq_conn_del_stream(conn, stream); + doq_stream_delete(stream); + return 1; +} + +/** doq stream pick up answer data from buffer */ +static int +doq_stream_pickup_answer(struct doq_stream* stream, struct sldns_buffer* buf) +{ + stream->is_answer_available = 1; + if(stream->out) { + free(stream->out); + stream->out = NULL; + stream->outlen = 0; + } + stream->nwrite = 0; + stream->outlen = sldns_buffer_limit(buf); + /* For quic the output bytes have to stay allocated and available, + * for potential resends, until the remote end has acknowledged them. + * This includes the tcplen start uint16_t, in outlen_wire. */ + stream->outlen_wire = htons(stream->outlen); + stream->out = memdup(sldns_buffer_begin(buf), sldns_buffer_limit(buf)); + if(!stream->out) { + log_err("doq could not send answer: out of memory"); + return 0; + } + return 1; +} + +int +doq_stream_send_reply(struct doq_conn* conn, struct doq_stream* stream, + struct sldns_buffer* buf) +{ + if(verbosity >= VERB_ALGO) { + char* s = sldns_wire2str_pkt(sldns_buffer_begin(buf), + sldns_buffer_limit(buf)); + verbose(VERB_ALGO, "doq stream %d response\n%s", + (int)stream->stream_id, (s?s:"null")); + free(s); + } + if(stream->out) + doq_table_quic_size_subtract(conn->doq_socket->table, + stream->outlen); + if(!doq_stream_pickup_answer(stream, buf)) + return 0; + doq_table_quic_size_add(conn->doq_socket->table, stream->outlen); + doq_stream_on_write_list(conn, stream); + doq_conn_write_enable(conn); + return 1; +} + +/** doq stream data length has completed, allocations can be done. False on + * allocation failure. */ +static int +doq_stream_datalen_complete(struct doq_stream* stream, struct doq_table* table) +{ + if(stream->inlen > 1024*1024) { + log_err("doq stream in length too large %d", + (int)stream->inlen); + return 0; + } + stream->in = calloc(1, stream->inlen); + if(!stream->in) { + log_err("doq could not read stream, calloc failed: " + "out of memory"); + return 0; + } + doq_table_quic_size_add(table, stream->inlen); + return 1; +} + +/** doq stream data is complete, the input data has been received. */ +static int +doq_stream_data_complete(struct doq_conn* conn, struct doq_stream* stream) +{ + struct comm_point* c; + if(verbosity >= VERB_ALGO) { + char* s = sldns_wire2str_pkt(stream->in, stream->inlen); + char a[128]; + addr_to_str((void*)&conn->key.paddr.addr, + conn->key.paddr.addrlen, a, sizeof(a)); + verbose(VERB_ALGO, "doq %s stream %d incoming query\n%s", + a, (int)stream->stream_id, (s?s:"null")); + free(s); + } + stream->is_query_complete = 1; + c = conn->doq_socket->cp; + if(!stream->in) { + verbose(VERB_ALGO, "doq_stream_data_complete: no in buffer"); + return 0; + } + if(stream->inlen > sldns_buffer_capacity(c->buffer)) { + verbose(VERB_ALGO, "doq_stream_data_complete: query too long"); + return 0; + } + sldns_buffer_clear(c->buffer); + sldns_buffer_write(c->buffer, stream->in, stream->inlen); + sldns_buffer_flip(c->buffer); + c->repinfo.c = c; + if(!doq_conn_key_store_repinfo(&conn->key, &c->repinfo)) { + verbose(VERB_ALGO, "doq_stream_data_complete: connection " + "DCID too long"); + return 0; + } + c->repinfo.doq_streamid = stream->stream_id; + conn->doq_socket->current_conn = conn; + fptr_ok(fptr_whitelist_comm_point(c->callback)); + if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo)) { + conn->doq_socket->current_conn = NULL; + if(!doq_stream_send_reply(conn, stream, c->buffer)) { + verbose(VERB_ALGO, "doq: failed to send_reply"); + return 0; + } + return 1; + } + conn->doq_socket->current_conn = NULL; + return 1; +} + +/** doq receive data for a stream, more bytes of the incoming data */ +static int +doq_stream_recv_data(struct doq_stream* stream, const uint8_t* data, + size_t datalen, int* recv_done, struct doq_table* table) +{ + int got_data = 0; + /* read the tcplength uint16_t at the start */ + if(stream->nread < 2) { + uint16_t tcplen = 0; + size_t todolen = 2 - stream->nread; + + if(stream->nread > 0) { + /* put in the already read byte if there is one */ + tcplen = stream->inlen; + } + if(datalen < todolen) + todolen = datalen; + memmove(((uint8_t*)&tcplen)+stream->nread, data, todolen); + stream->nread += todolen; + data += todolen; + datalen -= todolen; + if(stream->nread == 2) { + /* the initial length value is completed */ + stream->inlen = ntohs(tcplen); + if(!doq_stream_datalen_complete(stream, table)) + return 0; + } else { + /* store for later */ + stream->inlen = tcplen; + return 1; + } + } + /* if there are more data bytes */ + if(datalen > 0) { + size_t to_write = datalen; + if(stream->nread-2 > stream->inlen) { + verbose(VERB_ALGO, "doq stream buffer too small"); + return 0; + } + if(datalen > stream->inlen - (stream->nread-2)) + to_write = stream->inlen - (stream->nread-2); + if(to_write > 0) { + if(!stream->in) { + verbose(VERB_ALGO, "doq: stream has " + "no buffer"); + return 0; + } + memmove(stream->in+(stream->nread-2), data, to_write); + stream->nread += to_write; + data += to_write; + datalen -= to_write; + got_data = 1; + } + } + /* Are there extra bytes received after the end? If so, log them. */ + if(datalen > 0) { + if(verbosity >= VERB_ALGO) + log_hex("doq stream has extra bytes received after end", + (void*)data, datalen); + } + /* Is the input data complete? */ + if(got_data && stream->nread >= stream->inlen+2) { + if(!stream->in) { + verbose(VERB_ALGO, "doq: completed stream has " + "no buffer"); + return 0; + } + *recv_done = 1; + } + return 1; +} + +/** doq receive FIN for a stream. No more bytes are going to arrive. */ +static int +doq_stream_recv_fin(struct doq_conn* conn, struct doq_stream* stream, int + recv_done) +{ + if(!stream->is_query_complete && !recv_done) { + verbose(VERB_ALGO, "doq: stream recv FIN, but is " + "not complete, have %d of %d bytes", + ((int)stream->nread)-2, (int)stream->inlen); + if(!doq_stream_close(conn, stream, 1)) + return 0; + } + return 1; +} + +void doq_fill_rand(struct ub_randstate* rnd, uint8_t* buf, size_t len) +{ + size_t i; + for(i=0; idoq_socket->rnd, data, datalen); + if(!doq_conid_find(conn->table, data, datalen)) { + /* Found an unused connection id. */ + return 1; + } + } + verbose(VERB_ALGO, "doq_conn_generate_new_conid failed: could not " + "generate random unused connection id value in %d attempts.", + max_try); + return 0; +} + +/** ngtcp2 rand callback function */ +static void +doq_rand_cb(uint8_t* dest, size_t destlen, const ngtcp2_rand_ctx* rand_ctx) +{ + struct ub_randstate* rnd = (struct ub_randstate*) + rand_ctx->native_handle; + doq_fill_rand(rnd, dest, destlen); +} + +/** ngtcp2 get_new_connection_id callback function */ +static int +doq_get_new_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn), ngtcp2_cid* cid, + uint8_t* token, size_t cidlen, void* user_data) +{ + struct doq_conn* doq_conn = (struct doq_conn*)user_data; + /* Lock the conid tree, so we can check for duplicates while + * generating the id, and then insert it, whilst keeping the tree + * locked against other modifications, guaranteeing uniqueness. */ + lock_rw_wrlock(&doq_conn->table->conid_lock); + if(!doq_conn_generate_new_conid(doq_conn, cid->data, cidlen)) { + lock_rw_unlock(&doq_conn->table->conid_lock); + return NGTCP2_ERR_CALLBACK_FAILURE; + } + cid->datalen = cidlen; + if(ngtcp2_crypto_generate_stateless_reset_token(token, + doq_conn->doq_socket->static_secret, + doq_conn->doq_socket->static_secret_len, cid) != 0) { + lock_rw_unlock(&doq_conn->table->conid_lock); + return NGTCP2_ERR_CALLBACK_FAILURE; + } + if(!doq_conn_associate_conid(doq_conn, cid->data, cid->datalen)) { + lock_rw_unlock(&doq_conn->table->conid_lock); + return NGTCP2_ERR_CALLBACK_FAILURE; + } + lock_rw_unlock(&doq_conn->table->conid_lock); + return 0; +} + +/** ngtcp2 remove_connection_id callback function */ +static int +doq_remove_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn), + const ngtcp2_cid* cid, void* user_data) +{ + struct doq_conn* doq_conn = (struct doq_conn*)user_data; + lock_rw_wrlock(&doq_conn->table->conid_lock); + doq_conn_dissociate_conid(doq_conn, cid->data, cid->datalen); + lock_rw_unlock(&doq_conn->table->conid_lock); + return 0; +} + +/** doq submit a new token */ +static int +doq_submit_new_token(struct doq_conn* conn) +{ + uint8_t token[NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN]; + ngtcp2_ssize tokenlen; + int ret; + const ngtcp2_path* path = ngtcp2_conn_get_path(conn->conn); + ngtcp2_tstamp ts = doq_get_timestamp_nanosec(); + + tokenlen = ngtcp2_crypto_generate_regular_token(token, + conn->doq_socket->static_secret, + conn->doq_socket->static_secret_len, path->remote.addr, + path->remote.addrlen, ts); + if(tokenlen < 0) { + log_err("doq ngtcp2_crypto_generate_regular_token failed"); + return 1; + } + + verbose(VERB_ALGO, "doq submit new token"); + ret = ngtcp2_conn_submit_new_token(conn->conn, token, tokenlen); + if(ret != 0) { + log_err("doq ngtcp2_conn_submit_new_token failed: %s", + ngtcp2_strerror(ret)); + return 0; + } + return 1; +} + +/** ngtcp2 handshake_completed callback function */ +static int +doq_handshake_completed_cb(ngtcp2_conn* ATTR_UNUSED(conn), void* user_data) +{ + struct doq_conn* doq_conn = (struct doq_conn*)user_data; + verbose(VERB_ALGO, "doq handshake_completed callback"); + verbose(VERB_ALGO, "ngtcp2_conn_get_max_data_left is %d", + (int)ngtcp2_conn_get_max_data_left(doq_conn->conn)); +#ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI + verbose(VERB_ALGO, "ngtcp2_conn_get_max_local_streams_uni is %d", + (int)ngtcp2_conn_get_max_local_streams_uni(doq_conn->conn)); +#endif + verbose(VERB_ALGO, "ngtcp2_conn_get_streams_uni_left is %d", + (int)ngtcp2_conn_get_streams_uni_left(doq_conn->conn)); + verbose(VERB_ALGO, "ngtcp2_conn_get_streams_bidi_left is %d", + (int)ngtcp2_conn_get_streams_bidi_left(doq_conn->conn)); + verbose(VERB_ALGO, "negotiated cipher name is %s", + SSL_get_cipher_name(doq_conn->ssl)); + if(verbosity > VERB_ALGO) { + const unsigned char* alpn = NULL; + unsigned int alpnlen = 0; + char alpnstr[128]; + SSL_get0_alpn_selected(doq_conn->ssl, &alpn, &alpnlen); + if(alpnlen > sizeof(alpnstr)-1) + alpnlen = sizeof(alpnstr)-1; + memmove(alpnstr, alpn, alpnlen); + alpnstr[alpnlen]=0; + verbose(VERB_ALGO, "negotiated ALPN is '%s'", alpnstr); + } + + if(!doq_submit_new_token(doq_conn)) + return -1; + return 0; +} + +/** ngtcp2 stream_open callback function */ +static int +doq_stream_open_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id, + void* user_data) +{ + struct doq_conn* doq_conn = (struct doq_conn*)user_data; + struct doq_stream* stream; + verbose(VERB_ALGO, "doq new stream %x", (int)stream_id); + if(doq_stream_find(doq_conn, stream_id)) { + verbose(VERB_ALGO, "doq: stream with this id already exists"); + return 0; + } + if(stream_id != 0 && stream_id != 4 && /* allow one stream on a new connection */ + !doq_table_quic_size_available(doq_conn->doq_socket->table, + doq_conn->doq_socket->cfg, sizeof(*stream) + + 100 /* estimated query in */ + + 512 /* estimated response out */ + )) { + int rv; + verbose(VERB_ALGO, "doq: no mem for new stream"); + rv = ngtcp2_conn_shutdown_stream(doq_conn->conn, +#ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4 + 0, +#endif + stream_id, NGTCP2_CONNECTION_REFUSED); + if(rv != 0) { + log_err("ngtcp2_conn_shutdown_stream failed: %s", + ngtcp2_strerror(rv)); + return NGTCP2_ERR_CALLBACK_FAILURE; + } + return 0; + } + stream = doq_stream_create(stream_id); + if(!stream) { + log_err("doq: could not doq_stream_create: out of memory"); + return NGTCP2_ERR_CALLBACK_FAILURE; + } + doq_table_quic_size_add(doq_conn->doq_socket->table, sizeof(*stream)); + doq_conn_add_stream(doq_conn, stream); + return 0; +} + +/** ngtcp2 recv_stream_data callback function */ +static int +doq_recv_stream_data_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags, + int64_t stream_id, uint64_t offset, const uint8_t* data, + size_t datalen, void* user_data, void* ATTR_UNUSED(stream_user_data)) +{ + int recv_done = 0; + struct doq_conn* doq_conn = (struct doq_conn*)user_data; + struct doq_stream* stream; + verbose(VERB_ALGO, "doq recv stream data stream id %d offset %d " + "datalen %d%s%s", (int)stream_id, (int)offset, (int)datalen, + ((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0?" FIN":""), +#ifdef NGTCP2_STREAM_DATA_FLAG_0RTT + ((flags&NGTCP2_STREAM_DATA_FLAG_0RTT)!=0?" 0RTT":"") +#else + ((flags&NGTCP2_STREAM_DATA_FLAG_EARLY)!=0?" EARLY":"") +#endif + ); + stream = doq_stream_find(doq_conn, stream_id); + if(!stream) { + verbose(VERB_ALGO, "doq: received stream data for " + "unknown stream %d", (int)stream_id); + return 0; + } + if(stream->is_closed) { + verbose(VERB_ALGO, "doq: stream is closed, ignore recv data"); + return 0; + } + if(datalen != 0) { + if(!doq_stream_recv_data(stream, data, datalen, &recv_done, + doq_conn->doq_socket->table)) + return NGTCP2_ERR_CALLBACK_FAILURE; + } + if((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0) { + if(!doq_stream_recv_fin(doq_conn, stream, recv_done)) + return NGTCP2_ERR_CALLBACK_FAILURE; + } + ngtcp2_conn_extend_max_stream_offset(doq_conn->conn, stream_id, + datalen); + ngtcp2_conn_extend_max_offset(doq_conn->conn, datalen); + if(recv_done) { + if(!doq_stream_data_complete(doq_conn, stream)) + return NGTCP2_ERR_CALLBACK_FAILURE; + } + return 0; +} + +/** ngtcp2 stream_close callback function */ +static int +doq_stream_close_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags, + int64_t stream_id, uint64_t app_error_code, void* user_data, + void* ATTR_UNUSED(stream_user_data)) +{ + struct doq_conn* doq_conn = (struct doq_conn*)user_data; + struct doq_stream* stream; + if((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0) + verbose(VERB_ALGO, "doq stream close for stream id %d %sapp_error_code %d", + (int)stream_id, + (((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0)? + "APP_ERROR_CODE_SET ":""), + (int)app_error_code); + else + verbose(VERB_ALGO, "doq stream close for stream id %d", + (int)stream_id); + + stream = doq_stream_find(doq_conn, stream_id); + if(!stream) { + verbose(VERB_ALGO, "doq: stream close for " + "unknown stream %d", (int)stream_id); + return 0; + } + if(!doq_stream_close(doq_conn, stream, 0)) + return NGTCP2_ERR_CALLBACK_FAILURE; + return 0; +} + +/** ngtcp2 stream_reset callback function */ +static int +doq_stream_reset_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id, + uint64_t final_size, uint64_t app_error_code, void* user_data, + void* ATTR_UNUSED(stream_user_data)) +{ + struct doq_conn* doq_conn = (struct doq_conn*)user_data; + struct doq_stream* stream; + verbose(VERB_ALGO, "doq stream reset for stream id %d final_size %d " + "app_error_code %d", (int)stream_id, (int)final_size, + (int)app_error_code); + + stream = doq_stream_find(doq_conn, stream_id); + if(!stream) { + verbose(VERB_ALGO, "doq: stream reset for " + "unknown stream %d", (int)stream_id); + return 0; + } + if(!doq_stream_close(doq_conn, stream, 0)) + return NGTCP2_ERR_CALLBACK_FAILURE; + return 0; +} + +/** ngtcp2 acked_stream_data_offset callback function */ +static int +doq_acked_stream_data_offset_cb(ngtcp2_conn* ATTR_UNUSED(conn), + int64_t stream_id, uint64_t offset, uint64_t datalen, void* user_data, + void* ATTR_UNUSED(stream_user_data)) +{ + struct doq_conn* doq_conn = (struct doq_conn*)user_data; + struct doq_stream* stream; + verbose(VERB_ALGO, "doq stream acked data for stream id %d offset %d " + "datalen %d", (int)stream_id, (int)offset, (int)datalen); + + stream = doq_stream_find(doq_conn, stream_id); + if(!stream) { + verbose(VERB_ALGO, "doq: stream acked data for " + "unknown stream %d", (int)stream_id); + return 0; + } + /* Acked the data from [offset .. offset+datalen). */ + if(stream->is_closed) + return 0; + if(offset+datalen >= stream->outlen) { + doq_stream_remove_in_buffer(stream, + doq_conn->doq_socket->table); + doq_stream_remove_out_buffer(stream, + doq_conn->doq_socket->table); + } + return 0; +} + +/** ngtc2p log_printf callback function */ +static void +doq_log_printf_cb(void* ATTR_UNUSED(user_data), const char* fmt, ...) +{ + char buf[1024]; + va_list ap; + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + verbose(VERB_ALGO, "libngtcp2: %s", buf); + va_end(ap); +} + +#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT +/** the doq application tx key callback, false on failure */ +static int +doq_application_tx_key_cb(struct doq_conn* conn) +{ + verbose(VERB_ALGO, "doq application tx key cb"); + /* The server does not want to open streams to the client, + * the client instead initiates by opening bidi streams. */ + verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_data_left is %d", + (int)ngtcp2_conn_get_max_data_left(conn->conn)); +#ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI + verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_local_streams_uni is %d", + (int)ngtcp2_conn_get_max_local_streams_uni(conn->conn)); +#endif + verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_uni_left is %d", + (int)ngtcp2_conn_get_streams_uni_left(conn->conn)); + verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_bidi_left is %d", + (int)ngtcp2_conn_get_streams_bidi_left(conn->conn)); + return 1; +} + +/** quic_method set_encryption_secrets function */ +static int +doq_set_encryption_secrets(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level, + const uint8_t *read_secret, const uint8_t *write_secret, + size_t secret_len) +{ + struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl); +#ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL + ngtcp2_encryption_level +#else + ngtcp2_crypto_level +#endif + level = +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL + ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); +#else + ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level); +#endif + + if(read_secret) { + verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_rx_key for level %d ossl %d", (int)level, (int)ossl_level); + if(ngtcp2_crypto_derive_and_install_rx_key(doq_conn->conn, + NULL, NULL, NULL, level, read_secret, secret_len) + != 0) { + log_err("ngtcp2_crypto_derive_and_install_rx_key " + "failed"); + return 0; + } + } + + if(write_secret) { + verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_tx_key for level %d ossl %d", (int)level, (int)ossl_level); + if(ngtcp2_crypto_derive_and_install_tx_key(doq_conn->conn, + NULL, NULL, NULL, level, write_secret, secret_len) + != 0) { + log_err("ngtcp2_crypto_derive_and_install_tx_key " + "failed"); + return 0; + } + if(level == NGTCP2_CRYPTO_LEVEL_APPLICATION) { + if(!doq_application_tx_key_cb(doq_conn)) + return 0; + } + } + return 1; +} + +/** quic_method add_handshake_data function */ +static int +doq_add_handshake_data(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level, + const uint8_t *data, size_t len) +{ + struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl); +#ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL + ngtcp2_encryption_level +#else + ngtcp2_crypto_level +#endif + level = +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL + ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); +#else + ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level); +#endif + int rv; + + verbose(VERB_ALGO, "doq_add_handshake_data: " + "ngtcp2_con_submit_crypto_data level %d", (int)level); + rv = ngtcp2_conn_submit_crypto_data(doq_conn->conn, level, data, len); + if(rv != 0) { + log_err("ngtcp2_conn_submit_crypto_data failed: %s", + ngtcp2_strerror(rv)); + ngtcp2_conn_set_tls_error(doq_conn->conn, rv); + return 0; + } + return 1; +} + +/** quic_method flush_flight function */ +static int +doq_flush_flight(SSL* ATTR_UNUSED(ssl)) +{ + return 1; +} + +/** quic_method send_alert function */ +static int +doq_send_alert(SSL *ssl, enum ssl_encryption_level_t ATTR_UNUSED(level), + uint8_t alert) +{ + struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl); + doq_conn->tls_alert = alert; + return 1; +} +#endif /* HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT */ + +/** ALPN select callback for the doq SSL context */ +static int +doq_alpn_select_cb(SSL* ATTR_UNUSED(ssl), const unsigned char** out, + unsigned char* outlen, const unsigned char* in, unsigned int inlen, + void* ATTR_UNUSED(arg)) +{ + /* select "doq" */ + int ret = SSL_select_next_proto((void*)out, outlen, + (const unsigned char*)"\x03""doq", 4, in, inlen); + if(ret == OPENSSL_NPN_NEGOTIATED) + return SSL_TLSEXT_ERR_OK; + verbose(VERB_ALGO, "doq alpn_select_cb: ALPN from client does " + "not have 'doq'"); + return SSL_TLSEXT_ERR_ALERT_FATAL; +} + +/** create new tls session for server doq connection */ +static SSL_CTX* +doq_ctx_server_setup(struct doq_server_socket* doq_socket) +{ + char* sid_ctx = "unbound server"; +#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT + SSL_QUIC_METHOD* quic_method; +#endif + SSL_CTX* ctx = SSL_CTX_new(TLS_server_method()); + if(!ctx) { + log_crypto_err("Could not SSL_CTX_new"); + return NULL; + } + SSL_CTX_set_options(ctx, + (SSL_OP_ALL & ~SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS) | + SSL_OP_SINGLE_ECDH_USE | + SSL_OP_CIPHER_SERVER_PREFERENCE | + SSL_OP_NO_ANTI_REPLAY); + SSL_CTX_set_mode(ctx, SSL_MODE_RELEASE_BUFFERS); + SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION); + SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION); +#ifdef HAVE_SSL_CTX_SET_ALPN_SELECT_CB + SSL_CTX_set_alpn_select_cb(ctx, doq_alpn_select_cb, NULL); +#endif + SSL_CTX_set_default_verify_paths(ctx); + if(!SSL_CTX_use_certificate_chain_file(ctx, + doq_socket->ssl_service_pem)) { + log_err("doq: error for cert file: %s", + doq_socket->ssl_service_pem); + log_crypto_err("doq: error in " + "SSL_CTX_use_certificate_chain_file"); + SSL_CTX_free(ctx); + return NULL; + } + if(!SSL_CTX_use_PrivateKey_file(ctx, doq_socket->ssl_service_key, + SSL_FILETYPE_PEM)) { + log_err("doq: error for private key file: %s", + doq_socket->ssl_service_key); + log_crypto_err("doq: error in SSL_CTX_use_PrivateKey_file"); + SSL_CTX_free(ctx); + return NULL; + } + if(!SSL_CTX_check_private_key(ctx)) { + log_err("doq: error for key file: %s", + doq_socket->ssl_service_key); + log_crypto_err("doq: error in SSL_CTX_check_private_key"); + SSL_CTX_free(ctx); + return NULL; + } + SSL_CTX_set_session_id_context(ctx, (void*)sid_ctx, strlen(sid_ctx)); + if(doq_socket->ssl_verify_pem && doq_socket->ssl_verify_pem[0]) { + if(!SSL_CTX_load_verify_locations(ctx, + doq_socket->ssl_verify_pem, NULL)) { + log_err("doq: error for verify pem file: %s", + doq_socket->ssl_verify_pem); + log_crypto_err("doq: error in " + "SSL_CTX_load_verify_locations"); + SSL_CTX_free(ctx); + return NULL; + } + SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file( + doq_socket->ssl_verify_pem)); + SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER| + SSL_VERIFY_CLIENT_ONCE| + SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL); + } + + SSL_CTX_set_max_early_data(ctx, 0xffffffff); +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT + if(ngtcp2_crypto_quictls_configure_server_context(ctx) != 0) { + log_err("ngtcp2_crypto_quictls_configure_server_context failed"); + SSL_CTX_free(ctx); + return NULL; + } +#else + /* The quic_method needs to remain valid during the SSL_CTX + * lifetime, so we allocate it. It is freed with the + * doq_server_socket. */ + quic_method = calloc(1, sizeof(SSL_QUIC_METHOD)); + if(!quic_method) { + log_err("calloc failed: out of memory"); + SSL_CTX_free(ctx); + return NULL; + } + doq_socket->quic_method = quic_method; + quic_method->set_encryption_secrets = doq_set_encryption_secrets; + quic_method->add_handshake_data = doq_add_handshake_data; + quic_method->flush_flight = doq_flush_flight; + quic_method->send_alert = doq_send_alert; + SSL_CTX_set_quic_method(ctx, doq_socket->quic_method); +#endif + return ctx; +} + +/** Get the ngtcp2_conn from ssl userdata of type ngtcp2_conn_ref */ +static ngtcp2_conn* doq_conn_ref_get_conn(ngtcp2_crypto_conn_ref* conn_ref) +{ + struct doq_conn* conn = (struct doq_conn*)conn_ref->user_data; + return conn->conn; +} + +/** create new SSL session for server connection */ +static SSL* +doq_ssl_server_setup(SSL_CTX* ctx, struct doq_conn* conn) +{ + SSL* ssl = SSL_new(ctx); + if(!ssl) { + log_crypto_err("doq: SSL_new failed"); + return NULL; + } +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT + conn->conn_ref.get_conn = &doq_conn_ref_get_conn; + conn->conn_ref.user_data = conn; + SSL_set_app_data(ssl, &conn->conn_ref); +#else + SSL_set_app_data(ssl, conn); +#endif + SSL_set_accept_state(ssl); + SSL_set_quic_early_data_enabled(ssl, 1); + return ssl; +} + +/** setup the doq_socket server tls context */ +int +doq_socket_setup_ctx(struct doq_server_socket* doq_socket) +{ + doq_socket->ctx = doq_ctx_server_setup(doq_socket); + if(!doq_socket->ctx) + return 0; + return 1; +} + +int +doq_conn_setup(struct doq_conn* conn, uint8_t* scid, size_t scidlen, + uint8_t* ocid, size_t ocidlen, const uint8_t* token, size_t tokenlen) +{ + int rv; + struct ngtcp2_cid dcid, sv_scid, scid_cid; + struct ngtcp2_path path; + struct ngtcp2_callbacks callbacks; + struct ngtcp2_settings settings; + struct ngtcp2_transport_params params; + memset(&dcid, 0, sizeof(dcid)); + memset(&sv_scid, 0, sizeof(sv_scid)); + memset(&scid_cid, 0, sizeof(scid_cid)); + memset(&path, 0, sizeof(path)); + memset(&callbacks, 0, sizeof(callbacks)); + memset(&settings, 0, sizeof(settings)); + memset(¶ms, 0, sizeof(params)); + + ngtcp2_cid_init(&scid_cid, scid, scidlen); + ngtcp2_cid_init(&dcid, conn->key.dcid, conn->key.dcidlen); + + path.remote.addr = (struct sockaddr*)&conn->key.paddr.addr; + path.remote.addrlen = conn->key.paddr.addrlen; + path.local.addr = (struct sockaddr*)&conn->key.paddr.localaddr; + path.local.addrlen = conn->key.paddr.localaddrlen; + + callbacks.recv_client_initial = ngtcp2_crypto_recv_client_initial_cb; + callbacks.recv_crypto_data = ngtcp2_crypto_recv_crypto_data_cb; + callbacks.encrypt = ngtcp2_crypto_encrypt_cb; + callbacks.decrypt = ngtcp2_crypto_decrypt_cb; + callbacks.hp_mask = ngtcp2_crypto_hp_mask; + callbacks.update_key = ngtcp2_crypto_update_key_cb; + callbacks.delete_crypto_aead_ctx = + ngtcp2_crypto_delete_crypto_aead_ctx_cb; + callbacks.delete_crypto_cipher_ctx = + ngtcp2_crypto_delete_crypto_cipher_ctx_cb; + callbacks.get_path_challenge_data = + ngtcp2_crypto_get_path_challenge_data_cb; + callbacks.version_negotiation = ngtcp2_crypto_version_negotiation_cb; + callbacks.rand = doq_rand_cb; + callbacks.get_new_connection_id = doq_get_new_connection_id_cb; + callbacks.remove_connection_id = doq_remove_connection_id_cb; + callbacks.handshake_completed = doq_handshake_completed_cb; + callbacks.stream_open = doq_stream_open_cb; + callbacks.stream_close = doq_stream_close_cb; + callbacks.stream_reset = doq_stream_reset_cb; + callbacks.acked_stream_data_offset = doq_acked_stream_data_offset_cb; + callbacks.recv_stream_data = doq_recv_stream_data_cb; + + ngtcp2_settings_default(&settings); + if(verbosity >= VERB_ALGO) { + settings.log_printf = doq_log_printf_cb; + } + settings.rand_ctx.native_handle = conn->doq_socket->rnd; + settings.initial_ts = doq_get_timestamp_nanosec(); + settings.max_stream_window = 6*1024*1024; + settings.max_window = 6*1024*1024; +#ifdef HAVE_STRUCT_NGTCP2_SETTINGS_TOKENLEN + settings.token = (void*)token; + settings.tokenlen = tokenlen; +#else + settings.token.base = (void*)token; + settings.token.len = tokenlen; +#endif + + ngtcp2_transport_params_default(¶ms); + params.max_idle_timeout = conn->doq_socket->idle_timeout; + params.active_connection_id_limit = 7; + params.initial_max_stream_data_bidi_local = 256*1024; + params.initial_max_stream_data_bidi_remote = 256*1024; + params.initial_max_data = 1024*1024; + /* DoQ uses bidi streams, so we allow 0 uni streams. */ + params.initial_max_streams_uni = 0; + /* Initial max on number of bidi streams the remote end can open. + * That is the number of queries it can make, at first. */ + params.initial_max_streams_bidi = 10; + if(ocid) { + ngtcp2_cid_init(¶ms.original_dcid, ocid, ocidlen); + ngtcp2_cid_init(¶ms.retry_scid, conn->key.dcid, + conn->key.dcidlen); + params.retry_scid_present = 1; + } else { + ngtcp2_cid_init(¶ms.original_dcid, conn->key.dcid, + conn->key.dcidlen); + } +#ifdef HAVE_STRUCT_NGTCP2_TRANSPORT_PARAMS_ORIGINAL_DCID_PRESENT + params.original_dcid_present = 1; +#endif + doq_fill_rand(conn->doq_socket->rnd, params.stateless_reset_token, + sizeof(params.stateless_reset_token)); + sv_scid.datalen = conn->doq_socket->sv_scidlen; + lock_rw_wrlock(&conn->table->conid_lock); + if(!doq_conn_generate_new_conid(conn, sv_scid.data, sv_scid.datalen)) { + lock_rw_unlock(&conn->table->conid_lock); + return 0; + } + + rv = ngtcp2_conn_server_new(&conn->conn, &scid_cid, &sv_scid, &path, + conn->version, &callbacks, &settings, ¶ms, NULL, conn); + if(rv != 0) { + lock_rw_unlock(&conn->table->conid_lock); + log_err("ngtcp2_conn_server_new failed: %s", + ngtcp2_strerror(rv)); + return 0; + } + if(!doq_conn_setup_conids(conn)) { + lock_rw_unlock(&conn->table->conid_lock); + log_err("doq_conn_setup_conids failed: out of memory"); + return 0; + } + lock_rw_unlock(&conn->table->conid_lock); + conn->ssl = doq_ssl_server_setup((SSL_CTX*)conn->doq_socket->ctx, + conn); + if(!conn->ssl) { + log_err("doq_ssl_server_setup failed"); + return 0; + } + ngtcp2_conn_set_tls_native_handle(conn->conn, conn->ssl); + doq_conn_write_enable(conn); + return 1; +} + +struct doq_conid* +doq_conid_find(struct doq_table* table, const uint8_t* data, size_t datalen) +{ + struct rbnode_type* node; + struct doq_conid key; + key.node.key = &key; + key.cid = (void*)data; + key.cidlen = datalen; + node = rbtree_search(table->conid_tree, &key); + if(node) + return (struct doq_conid*)node->key; + return NULL; +} + +/** insert conid in the conid list */ +static void +doq_conid_list_insert(struct doq_conn* conn, struct doq_conid* conid) +{ + conid->prev = NULL; + conid->next = conn->conid_list; + if(conn->conid_list) + conn->conid_list->prev = conid; + conn->conid_list = conid; +} + +/** remove conid from the conid list */ +static void +doq_conid_list_remove(struct doq_conn* conn, struct doq_conid* conid) +{ + if(conid->prev) + conid->prev->next = conid->next; + else conn->conid_list = conid->next; + if(conid->next) + conid->next->prev = conid->prev; +} + +/** create a doq_conid */ +static struct doq_conid* +doq_conid_create(uint8_t* data, size_t datalen, struct doq_conn_key* key) +{ + struct doq_conid* conid; + conid = calloc(1, sizeof(*conid)); + if(!conid) + return NULL; + conid->cid = memdup(data, datalen); + if(!conid->cid) { + free(conid); + return NULL; + } + conid->cidlen = datalen; + conid->node.key = conid; + conid->key = *key; + conid->key.dcid = memdup(key->dcid, key->dcidlen); + if(!conid->key.dcid) { + free(conid->cid); + free(conid); + return NULL; + } + return conid; +} + +void +doq_conid_delete(struct doq_conid* conid) +{ + if(!conid) + return; + free(conid->key.dcid); + free(conid->cid); + free(conid); +} + +/** return true if the conid is for the conn. */ +static int +conid_is_for_conn(struct doq_conn* conn, struct doq_conid* conid) +{ + if(conid->key.dcidlen == conn->key.dcidlen && + memcmp(conid->key.dcid, conn->key.dcid, conid->key.dcidlen)==0 + && conid->key.paddr.addrlen == conn->key.paddr.addrlen && + memcmp(&conid->key.paddr.addr, &conn->key.paddr.addr, + conid->key.paddr.addrlen) == 0 && + conid->key.paddr.localaddrlen == conn->key.paddr.localaddrlen && + memcmp(&conid->key.paddr.localaddr, &conn->key.paddr.localaddr, + conid->key.paddr.localaddrlen) == 0 && + conid->key.paddr.ifindex == conn->key.paddr.ifindex) + return 1; + return 0; +} + +int +doq_conn_associate_conid(struct doq_conn* conn, uint8_t* data, size_t datalen) +{ + struct doq_conid* conid; + conid = doq_conid_find(conn->table, data, datalen); + if(conid && !conid_is_for_conn(conn, conid)) { + verbose(VERB_ALGO, "doq connection id already exists for " + "another doq_conn. Ignoring second connection id."); + /* Already exists to another conn, ignore it. + * This works, in that the conid is listed in the doq_conn + * conid_list element, and removed from there. So our conid + * tree and list are fine, when created and removed. + * The tree now does not have the lookup element pointing + * to this connection. */ + return 1; + } + if(conid) + return 1; /* already inserted */ + conid = doq_conid_create(data, datalen, &conn->key); + if(!conid) + return 0; + doq_conid_list_insert(conn, conid); + (void)rbtree_insert(conn->table->conid_tree, &conid->node); + return 1; +} + +void +doq_conn_dissociate_conid(struct doq_conn* conn, const uint8_t* data, + size_t datalen) +{ + struct doq_conid* conid; + conid = doq_conid_find(conn->table, data, datalen); + if(conid && !conid_is_for_conn(conn, conid)) + return; + if(conid) { + (void)rbtree_delete(conn->table->conid_tree, + conid->node.key); + doq_conid_list_remove(conn, conid); + doq_conid_delete(conid); + } +} + +/** associate the scid array and also the dcid. + * caller must hold the locks on conn and doq_table.conid_lock. */ +static int +doq_conn_setup_id_array_and_dcid(struct doq_conn* conn, + struct ngtcp2_cid* scids, size_t num_scid) +{ + size_t i; + for(i=0; ikey.dcid, conn->key.dcidlen)) + return 0; + return 1; +} + +int +doq_conn_setup_conids(struct doq_conn* conn) +{ + size_t num_scid = +#ifndef HAVE_NGTCP2_CONN_GET_NUM_SCID + ngtcp2_conn_get_scid(conn->conn, NULL); +#else + ngtcp2_conn_get_num_scid(conn->conn); +#endif + if(num_scid <= 4) { + struct ngtcp2_cid ids[4]; + /* Usually there are not that many scids when just accepted, + * like only 2. */ + ngtcp2_conn_get_scid(conn->conn, ids); + return doq_conn_setup_id_array_and_dcid(conn, ids, num_scid); + } else { + struct ngtcp2_cid *scids = calloc(num_scid, + sizeof(struct ngtcp2_cid)); + if(!scids) + return 0; + ngtcp2_conn_get_scid(conn->conn, scids); + if(!doq_conn_setup_id_array_and_dcid(conn, scids, num_scid)) { + free(scids); + return 0; + } + free(scids); + } + return 1; +} + +void +doq_conn_clear_conids(struct doq_conn* conn) +{ + struct doq_conid* p, *next; + if(!conn) + return; + p = conn->conid_list; + while(p) { + next = p->next; + (void)rbtree_delete(conn->table->conid_tree, p->node.key); + doq_conid_delete(p); + p = next; + } + conn->conid_list = NULL; +} + +ngtcp2_tstamp doq_get_timestamp_nanosec(void) +{ +#ifdef CLOCK_REALTIME + struct timespec tp; + memset(&tp, 0, sizeof(tp)); + /* Get a nanosecond time, that can be compared with the event base. */ + if(clock_gettime(CLOCK_REALTIME, &tp) == -1) { + log_err("clock_gettime failed: %s", strerror(errno)); + } + return ((uint64_t)tp.tv_sec)*((uint64_t)1000000000) + + ((uint64_t)tp.tv_nsec); +#else + struct timeval tv; + if(gettimeofday(&tv, NULL) < 0) { + log_err("gettimeofday failed: %s", strerror(errno)); + } + return ((uint64_t)tv.tv_sec)*((uint64_t)1000000000) + + ((uint64_t)tv.tv_usec)*((uint64_t)1000); +#endif /* CLOCK_REALTIME */ +} + +/** doq start the closing period for the connection. */ +static int +doq_conn_start_closing_period(struct comm_point* c, struct doq_conn* conn) +{ + struct ngtcp2_path_storage ps; + struct ngtcp2_pkt_info pi; + ngtcp2_ssize ret; + if(!conn) + return 1; + if( +#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD + ngtcp2_conn_in_closing_period(conn->conn) +#else + ngtcp2_conn_is_in_closing_period(conn->conn) +#endif + ) + return 1; + if( +#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD + ngtcp2_conn_in_draining_period(conn->conn) +#else + ngtcp2_conn_is_in_draining_period(conn->conn) +#endif + ) { + doq_conn_write_disable(conn); + return 1; + } + ngtcp2_path_storage_zero(&ps); + sldns_buffer_clear(c->doq_socket->pkt_buf); + /* the call to ngtcp2_conn_write_connection_close causes the + * conn to be closed. It is now in the closing period. */ + ret = ngtcp2_conn_write_connection_close(conn->conn, &ps.path, + &pi, sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_remaining(c->doq_socket->pkt_buf), +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + &conn->ccerr +#else + &conn->last_error +#endif + , doq_get_timestamp_nanosec()); + if(ret < 0) { + log_err("doq ngtcp2_conn_write_connection_close failed: %s", + ngtcp2_strerror(ret)); + return 0; + } + if(ret == 0) { + return 0; + } + sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); + sldns_buffer_flip(c->doq_socket->pkt_buf); + + /* The close packet is allocated, because it may have to be repeated. + * When incoming packets have this connection dcid. */ + conn->close_pkt = memdup(sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_limit(c->doq_socket->pkt_buf)); + if(!conn->close_pkt) { + log_err("doq: could not allocate close packet: out of memory"); + return 0; + } + conn->close_pkt_len = sldns_buffer_limit(c->doq_socket->pkt_buf); + conn->close_ecn = pi.ecn; + return 1; +} + +/** doq send the close packet for the connection, perhaps again. */ +int +doq_conn_send_close(struct comm_point* c, struct doq_conn* conn) +{ + if(!conn) + return 0; + if(!conn->close_pkt) + return 0; + if(conn->close_pkt_len > sldns_buffer_capacity(c->doq_socket->pkt_buf)) + return 0; + sldns_buffer_clear(c->doq_socket->pkt_buf); + sldns_buffer_write(c->doq_socket->pkt_buf, conn->close_pkt, conn->close_pkt_len); + sldns_buffer_flip(c->doq_socket->pkt_buf); + verbose(VERB_ALGO, "doq send connection close"); + doq_send_pkt(c, &conn->key.paddr, conn->close_ecn); + doq_conn_write_disable(conn); + return 1; +} + +/** doq close the connection on error. If it returns a failure, it + * does not wait to send a close, and the connection can be dropped. */ +static int +doq_conn_close_error(struct comm_point* c, struct doq_conn* conn) +{ +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + if(conn->ccerr.type == NGTCP2_CCERR_TYPE_IDLE_CLOSE) + return 0; +#else + if(conn->last_error.type == + NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE) + return 0; +#endif + if(!doq_conn_start_closing_period(c, conn)) + return 0; + if( +#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD + ngtcp2_conn_in_draining_period(conn->conn) +#else + ngtcp2_conn_is_in_draining_period(conn->conn) +#endif + ) { + doq_conn_write_disable(conn); + return 1; + } + doq_conn_write_enable(conn); + if(!doq_conn_send_close(c, conn)) + return 0; + return 1; +} + +int +doq_conn_recv(struct comm_point* c, struct doq_pkt_addr* paddr, + struct doq_conn* conn, struct ngtcp2_pkt_info* pi, int* err_retry, + int* err_drop) +{ + int ret; + ngtcp2_tstamp ts; + struct ngtcp2_path path; + memset(&path, 0, sizeof(path)); + path.remote.addr = (struct sockaddr*)&paddr->addr; + path.remote.addrlen = paddr->addrlen; + path.local.addr = (struct sockaddr*)&paddr->localaddr; + path.local.addrlen = paddr->localaddrlen; + ts = doq_get_timestamp_nanosec(); + + ret = ngtcp2_conn_read_pkt(conn->conn, &path, pi, + sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_limit(c->doq_socket->pkt_buf), ts); + if(ret != 0) { + if(err_retry) + *err_retry = 0; + if(err_drop) + *err_drop = 0; + if(ret == NGTCP2_ERR_DRAINING) { + verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s", + ngtcp2_strerror(ret)); + doq_conn_write_disable(conn); + return 0; + } else if(ret == NGTCP2_ERR_DROP_CONN) { + verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s", + ngtcp2_strerror(ret)); + if(err_drop) + *err_drop = 1; + return 0; + } else if(ret == NGTCP2_ERR_RETRY) { + verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s", + ngtcp2_strerror(ret)); + if(err_retry) + *err_retry = 1; + if(err_drop) + *err_drop = 1; + return 0; + } else if(ret == NGTCP2_ERR_CRYPTO) { + if( +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + !conn->ccerr.error_code +#else + !conn->last_error.error_code +#endif + ) { + /* in picotls the tls alert may need to be + * copied, but this is with openssl. And there + * is conn->tls_alert. */ +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_tls_alert(&conn->ccerr, + conn->tls_alert, NULL, 0); +#else + ngtcp2_connection_close_error_set_transport_error_tls_alert( + &conn->last_error, conn->tls_alert, + NULL, 0); +#endif + } + } else { + if( +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + !conn->ccerr.error_code +#else + !conn->last_error.error_code +#endif + ) { +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_liberr(&conn->ccerr, ret, + NULL, 0); +#else + ngtcp2_connection_close_error_set_transport_error_liberr( + &conn->last_error, ret, NULL, 0); +#endif + } + } + log_err("ngtcp2_conn_read_pkt failed: %s", + ngtcp2_strerror(ret)); + if(!doq_conn_close_error(c, conn)) { + if(err_drop) + *err_drop = 1; + } + return 0; + } + doq_conn_write_enable(conn); + return 1; +} + +/** doq stream write is done */ +static void +doq_stream_write_is_done(struct doq_conn* conn, struct doq_stream* stream) +{ + /* Cannot deallocate, the buffer may be needed for resends. */ + doq_stream_off_write_list(conn, stream); +} + +int +doq_conn_write_streams(struct comm_point* c, struct doq_conn* conn, + int* err_drop) +{ + struct doq_stream* stream = conn->stream_write_first; + ngtcp2_path_storage ps; + ngtcp2_tstamp ts = doq_get_timestamp_nanosec(); + size_t num_packets = 0, max_packets = 65535; + ngtcp2_path_storage_zero(&ps); + + for(;;) { + int64_t stream_id; + uint32_t flags = 0; + ngtcp2_pkt_info pi; + ngtcp2_vec datav[2]; + size_t datav_count = 0; + ngtcp2_ssize ret, ndatalen = 0; + int fin; + + if(stream) { + /* data to send */ + verbose(VERB_ALGO, "doq: doq_conn write stream %d", + (int)stream->stream_id); + stream_id = stream->stream_id; + fin = 1; + if(stream->nwrite < 2) { + datav[0].base = ((uint8_t*)&stream-> + outlen_wire) + stream->nwrite; + datav[0].len = 2 - stream->nwrite; + datav[1].base = stream->out; + datav[1].len = stream->outlen; + datav_count = 2; + } else { + datav[0].base = stream->out + + (stream->nwrite-2); + datav[0].len = stream->outlen - + (stream->nwrite-2); + datav_count = 1; + } + } else { + /* no data to send */ + verbose(VERB_ALGO, "doq: doq_conn write stream -1"); + stream_id = -1; + fin = 0; + datav[0].base = NULL; + datav[0].len = 0; + datav_count = 1; + } + + /* if more streams, set it to write more */ + if(stream && stream->write_next) + flags |= NGTCP2_WRITE_STREAM_FLAG_MORE; + if(fin) + flags |= NGTCP2_WRITE_STREAM_FLAG_FIN; + + sldns_buffer_clear(c->doq_socket->pkt_buf); + ret = ngtcp2_conn_writev_stream(conn->conn, &ps.path, &pi, + sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_remaining(c->doq_socket->pkt_buf), + &ndatalen, flags, stream_id, datav, datav_count, ts); + if(ret < 0) { + if(ret == NGTCP2_ERR_WRITE_MORE) { + verbose(VERB_ALGO, "doq: write more, ndatalen %d", (int)ndatalen); + if(stream) { + if(ndatalen >= 0) + stream->nwrite += ndatalen; + if(stream->nwrite >= stream->outlen+2) + doq_stream_write_is_done( + conn, stream); + stream = stream->write_next; + } + continue; + } else if(ret == NGTCP2_ERR_STREAM_DATA_BLOCKED) { + verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_DATA_BLOCKED"); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_application_error( + &conn->ccerr, -1, NULL, 0); +#else + ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0); +#endif + if(err_drop) + *err_drop = 0; + if(!doq_conn_close_error(c, conn)) { + if(err_drop) + *err_drop = 1; + } + return 0; + } else if(ret == NGTCP2_ERR_STREAM_SHUT_WR) { + verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_SHUT_WR"); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_application_error( + &conn->ccerr, -1, NULL, 0); +#else + ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0); +#endif + if(err_drop) + *err_drop = 0; + if(!doq_conn_close_error(c, conn)) { + if(err_drop) + *err_drop = 1; + } + return 0; + } + + log_err("doq: ngtcp2_conn_writev_stream failed: %s", + ngtcp2_strerror(ret)); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_liberr(&conn->ccerr, ret, NULL, 0); +#else + ngtcp2_connection_close_error_set_transport_error_liberr( + &conn->last_error, ret, NULL, 0); +#endif + if(err_drop) + *err_drop = 0; + if(!doq_conn_close_error(c, conn)) { + if(err_drop) + *err_drop = 1; + } + return 0; + } + verbose(VERB_ALGO, "doq: writev_stream pkt size %d ndatawritten %d", + (int)ret, (int)ndatalen); + + if(ndatalen >= 0 && stream) { + stream->nwrite += ndatalen; + if(stream->nwrite >= stream->outlen+2) + doq_stream_write_is_done(conn, stream); + } + if(ret == 0) { + /* congestion limited */ + doq_conn_write_disable(conn); + ngtcp2_conn_update_pkt_tx_time(conn->conn, ts); + return 1; + } + sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); + sldns_buffer_flip(c->doq_socket->pkt_buf); + doq_send_pkt(c, &conn->key.paddr, pi.ecn); + + if(c->doq_socket->have_blocked_pkt) + break; + if(++num_packets == max_packets) + break; + if(stream) + stream = stream->write_next; + } + ngtcp2_conn_update_pkt_tx_time(conn->conn, ts); + return 1; +} + +void +doq_conn_write_enable(struct doq_conn* conn) +{ + conn->write_interest = 1; +} + +void +doq_conn_write_disable(struct doq_conn* conn) +{ + conn->write_interest = 0; +} + +/** doq append the connection to the write list */ +static void +doq_conn_write_list_append(struct doq_table* table, struct doq_conn* conn) +{ + if(conn->on_write_list) + return; + conn->write_prev = table->write_list_last; + if(table->write_list_last) + table->write_list_last->write_next = conn; + else table->write_list_first = conn; + conn->write_next = NULL; + table->write_list_last = conn; + conn->on_write_list = 1; +} + +void +doq_conn_write_list_remove(struct doq_table* table, struct doq_conn* conn) +{ + if(!conn->on_write_list) + return; + if(conn->write_next) + conn->write_next->write_prev = conn->write_prev; + else table->write_list_last = conn->write_prev; + if(conn->write_prev) + conn->write_prev->write_next = conn->write_next; + else table->write_list_first = conn->write_next; + conn->write_prev = NULL; + conn->write_next = NULL; + conn->on_write_list = 0; +} + +void +doq_conn_set_write_list(struct doq_table* table, struct doq_conn* conn) +{ + if(conn->write_interest && conn->on_write_list) + return; + if(!conn->write_interest && !conn->on_write_list) + return; + if(conn->write_interest) + doq_conn_write_list_append(table, conn); + else doq_conn_write_list_remove(table, conn); +} + +struct doq_conn* +doq_table_pop_first(struct doq_table* table) +{ + struct doq_conn* conn = table->write_list_first; + if(!conn) + return NULL; + lock_basic_lock(&conn->lock); + table->write_list_first = conn->write_next; + if(conn->write_next) + conn->write_next->write_prev = NULL; + else table->write_list_last = NULL; + conn->write_next = NULL; + conn->write_prev = NULL; + conn->on_write_list = 0; + return conn; +} + +int +doq_conn_check_timer(struct doq_conn* conn, struct timeval* tv) +{ + ngtcp2_tstamp expiry = ngtcp2_conn_get_expiry(conn->conn); + ngtcp2_tstamp now = doq_get_timestamp_nanosec(); + ngtcp2_tstamp t; + + if(expiry <= now) { + /* The timer has already expired, add with zero timeout. + * This should call the callback straight away. Calling it + * from the event callbacks is cleaner than calling it here, + * because then it is always called with the same locks and + * so on. This routine only has the conn.lock. */ + t = now; + } else { + t = expiry; + } + + /* convert to timeval */ + memset(tv, 0, sizeof(*tv)); + tv->tv_sec = t / NGTCP2_SECONDS; + tv->tv_usec = (t / NGTCP2_MICROSECONDS)%1000000; + + /* If we already have a timer, is it the right value? */ + if(conn->timer.timer_in_tree || conn->timer.timer_in_list) { + if(conn->timer.time.tv_sec == tv->tv_sec && + conn->timer.time.tv_usec == tv->tv_usec) + return 0; + } + return 1; +} + +/* doq print connection log */ +static void +doq_conn_log_line(struct doq_conn* conn, char* s) +{ + char remotestr[256], localstr[256]; + addr_to_str((void*)&conn->key.paddr.addr, conn->key.paddr.addrlen, + remotestr, sizeof(remotestr)); + addr_to_str((void*)&conn->key.paddr.localaddr, + conn->key.paddr.localaddrlen, localstr, sizeof(localstr)); + log_info("doq conn %s %s %s", remotestr, localstr, s); +} + +int +doq_conn_handle_timeout(struct doq_conn* conn) +{ + ngtcp2_tstamp now = doq_get_timestamp_nanosec(); + int rv; + + if(verbosity >= VERB_ALGO) + doq_conn_log_line(conn, "timeout"); + + rv = ngtcp2_conn_handle_expiry(conn->conn, now); + if(rv != 0) { + verbose(VERB_ALGO, "ngtcp2_conn_handle_expiry failed: %s", + ngtcp2_strerror(rv)); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_liberr(&conn->ccerr, rv, NULL, 0); +#else + ngtcp2_connection_close_error_set_transport_error_liberr( + &conn->last_error, rv, NULL, 0); +#endif + if(!doq_conn_close_error(conn->doq_socket->cp, conn)) { + /* failed, return for deletion */ + return 0; + } + return 1; + } + doq_conn_write_enable(conn); + if(!doq_conn_write_streams(conn->doq_socket->cp, conn, NULL)) { + /* failed, return for deletion. */ + return 0; + } + return 1; +} + +void +doq_table_quic_size_add(struct doq_table* table, size_t add) +{ + lock_basic_lock(&table->size_lock); + table->current_size += add; + lock_basic_unlock(&table->size_lock); +} + +void +doq_table_quic_size_subtract(struct doq_table* table, size_t subtract) +{ + lock_basic_lock(&table->size_lock); + if(table->current_size < subtract) + table->current_size = 0; + else table->current_size -= subtract; + lock_basic_unlock(&table->size_lock); +} + +int +doq_table_quic_size_available(struct doq_table* table, + struct config_file* cfg, size_t mem) +{ + size_t cur; + lock_basic_lock(&table->size_lock); + cur = table->current_size; + lock_basic_unlock(&table->size_lock); + + if(cur + mem > cfg->quic_size) + return 0; + return 1; +} + +size_t doq_table_quic_size_get(struct doq_table* table) +{ + size_t sz; + if(!table) + return 0; + lock_basic_lock(&table->size_lock); + sz = table->current_size; + lock_basic_unlock(&table->size_lock); + return sz; +} +#endif /* HAVE_NGTCP2 */ diff --git a/services/listen_dnsport.h b/services/listen_dnsport.h index 84ac4b068..c29f4d72b 100644 --- a/services/listen_dnsport.h +++ b/services/listen_dnsport.h @@ -43,10 +43,16 @@ #define LISTEN_DNSPORT_H #include "util/netevent.h" +#include "util/rbtree.h" +#include "util/locks.h" #include "daemon/acl_list.h" #ifdef HAVE_NGHTTP2_NGHTTP2_H #include #endif +#ifdef HAVE_NGTCP2 +#include +#include +#endif struct listen_list; struct config_file; struct addrinfo; @@ -100,7 +106,9 @@ enum listen_type { /** udp ipv6 (v4mapped) for use with ancillary data + dnscrypt*/ listen_type_udpancil_dnscrypt, /** HTTP(2) over TLS over TCP */ - listen_type_http + listen_type_http, + /** DNS over QUIC */ + listen_type_doq }; /* @@ -188,6 +196,11 @@ int resolve_interface_names(char** ifs, int num_ifs, * @param tcp_conn_limit: TCP connection limit info. * @param sslctx: nonNULL if ssl context. * @param dtenv: nonNULL if dnstap enabled. + * @param doq_table: the doq connection table, with shared information. + * @param rnd: random state. + * @param ssl_service_key: the SSL service key file. + * @param ssl_service_pem: the SSL service pem file. + * @param cfg: config file struct. * @param cb: callback function when a request arrives. It is passed * the packet and user argument. Return true to send a reply. * @param cb_arg: user data argument for callback function. @@ -198,8 +211,10 @@ listen_create(struct comm_base* base, struct listen_port* ports, size_t bufsize, int tcp_accept_count, int tcp_idle_timeout, int harden_large_queries, uint32_t http_max_streams, char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit, - void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb, - void *cb_arg); + void* sslctx, struct dt_env* dtenv, struct doq_table* doq_table, + struct ub_randstate* rnd, const char* ssl_service_key, + const char* ssl_service_pem, struct config_file* cfg, + comm_point_callback_type* cb, void *cb_arg); /** * delete the listening structure @@ -278,11 +293,12 @@ int create_udp_sock(int family, int socktype, struct sockaddr* addr, * @param freebind: set IP_FREEBIND socket option. * @param use_systemd: if true, fetch sockets from systemd. * @param dscp: DSCP to use. + * @param additional: additional log information for the socket type. * @return: the socket. -1 on error. */ int create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, int* reuseport, int transparent, int mss, int nodelay, int freebind, - int use_systemd, int dscp); + int use_systemd, int dscp, const char* additional); /** * Create and bind local listening socket @@ -452,6 +468,377 @@ int http2_submit_dns_response(struct http2_session* h2_session); int http2_submit_dns_response(void* v); #endif /* HAVE_NGHTTP2 */ +#ifdef HAVE_NGTCP2 +struct doq_conid; +struct doq_server_socket; + +/** + * DoQ shared connection table. This is the connections for the host. + * And some config parameter values for connections. The host has to + * respond on that ip,port for those connections, so they are shared + * between threads. + */ +struct doq_table { + /** the lock on the tree and config elements. insert and deletion, + * also lookup in the tree needs to hold the lock. */ + lock_rw_type lock; + /** rbtree of doq_conn, the connections to different destination + * addresses, and can be found by dcid. */ + struct rbtree_type* conn_tree; + /** lock for the conid tree, needed for the conid tree and also + * the conid elements */ + lock_rw_type conid_lock; + /** rbtree of doq_conid, connections can be found by their + * connection ids. Lookup by connection id, finds doq_conn. */ + struct rbtree_type* conid_tree; + /** the server scid length */ + int sv_scidlen; + /** the static secret for the server */ + uint8_t* static_secret; + /** length of the static secret */ + size_t static_secret_len; + /** the idle timeout in nanoseconds */ + uint64_t idle_timeout; + /** the list of write interested connections, hold the doq_table.lock + * to change them */ + struct doq_conn* write_list_first, *write_list_last; + /** rbtree of doq_timer. */ + struct rbtree_type* timer_tree; + /** lock on the current_size counter. */ + lock_basic_type size_lock; + /** current use, in bytes, of QUIC buffers. + * The doq_conn ngtcp2_conn structure, SSL structure and conid structs + * are not counted. */ + size_t current_size; +}; + +/** create doq table */ +struct doq_table* doq_table_create(struct config_file* cfg, + struct ub_randstate* rnd); + +/** delete doq table */ +void doq_table_delete(struct doq_table* table); + +/** + * Timer information for doq timer. + */ +struct doq_timer { + /** The rbnode in the tree sorted by timeout value. Key this struct. */ + struct rbnode_type node; + /** The timeout value. Absolute time value. */ + struct timeval time; + /** If the timer is in the time tree, with the node. */ + int timer_in_tree; + /** If there are more timers with the exact same timeout value, + * they form a set of timers. The rbnode timer has a link to the list + * with the other timers in the set. The rbnode timer is not a + * member of the list with the other timers. The other timers are not + * linked into the tree. */ + struct doq_timer* setlist_first, *setlist_last; + /** If the timer is on the setlist. */ + int timer_in_list; + /** If in the setlist, the next and prev element. */ + struct doq_timer* setlist_next, *setlist_prev; + /** The connection that is timeouted. */ + struct doq_conn* conn; + /** The worker that is waiting for the timeout event. + * Set for the rbnode tree linked element. If a worker is waiting + * for the event. If NULL, no worker is waiting for this timeout. */ + struct doq_server_socket* worker_doq_socket; +}; + +/** + * Key information that makes a doq_conn node in the tree lookup. + */ +struct doq_conn_key { + /** the remote endpoint and local endpoint and ifindex */ + struct doq_pkt_addr paddr; + /** the doq connection dcid */ + uint8_t* dcid; + /** length of dcid */ + size_t dcidlen; +}; + +/** + * DoQ connection, for DNS over QUIC. One connection to a remote endpoint + * with a number of streams in it. Every stream is like a tcp stream with + * a uint16_t length, query read, and a uint16_t length and answer written. + */ +struct doq_conn { + /** rbtree node, key is addresses and dcid */ + struct rbnode_type node; + /** lock on the connection */ + lock_basic_type lock; + /** the key information, with dcid and address endpoint */ + struct doq_conn_key key; + /** the doq server socket for inside callbacks */ + struct doq_server_socket* doq_socket; + /** the doq table this connection is part of */ + struct doq_table* table; + /** if the connection is about to be deleted. */ + uint8_t is_deleted; + /** the version, the client chosen version of QUIC */ + uint32_t version; + /** the ngtcp2 connection, a server connection */ + struct ngtcp2_conn* conn; + /** the connection ids that are associated with this doq_conn. + * There can be a number, that can change. They are linked here, + * so that upon removal, the list of actually associated conid + * elements can be removed as well. */ + struct doq_conid* conid_list; + /** the ngtcp2 last error for the connection */ +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + struct ngtcp2_ccerr ccerr; +#else + struct ngtcp2_connection_close_error last_error; +#endif + /** the recent tls alert error code */ + uint8_t tls_alert; + /** the ssl context, SSL* */ + void* ssl; +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT + /** the connection reference for ngtcp2_conn and userdata in ssl */ + struct ngtcp2_crypto_conn_ref conn_ref; +#endif + /** closure packet, if any */ + uint8_t* close_pkt; + /** length of closure packet. */ + size_t close_pkt_len; + /** closure ecn */ + uint32_t close_ecn; + /** the streams for this connection, of type doq_stream */ + struct rbtree_type stream_tree; + /** the streams that want write, they have something to write. + * The list is ordered, the last have to wait for the first to + * get their data written. */ + struct doq_stream* stream_write_first, *stream_write_last; + /** the conn has write interest if true, no write interest if false. */ + uint8_t write_interest; + /** if the conn is on the connection write list */ + uint8_t on_write_list; + /** the connection write list prev and next, if on the write list */ + struct doq_conn* write_prev, *write_next; + /** The timer for the connection. If unused, it is not in the tree + * and not in the list. It is alloced here, so that it is prealloced. + * It has to be set after every read and write on the connection, so + * this improves performance, but also the allocation does not fail. */ + struct doq_timer timer; +}; + +/** + * Connection ID and the doq_conn that is that connection. A connection + * has an original dcid, and then more connection ids associated. + */ +struct doq_conid { + /** rbtree node, key is the connection id. */ + struct rbnode_type node; + /** the next and prev in the list of conids for the doq_conn */ + struct doq_conid* next, *prev; + /** key to the doq_conn that is the connection */ + struct doq_conn_key key; + /** the connection id, byte string */ + uint8_t* cid; + /** the length of cid */ + size_t cidlen; +}; + +/** + * DoQ stream, for DNS over QUIC. + */ +struct doq_stream { + /** the rbtree node for the stream, key is the stream_id */ + rbnode_type node; + /** the stream id */ + int64_t stream_id; + /** if the stream is closed */ + uint8_t is_closed; + /** if the query is complete */ + uint8_t is_query_complete; + /** the number of bytes read on the stream, up to querylen+2. */ + size_t nread; + /** the length of the input query bytes */ + size_t inlen; + /** the input bytes */ + uint8_t* in; + /** does the stream have an answer to send */ + uint8_t is_answer_available; + /** the answer bytes sent, up to outlen+2. */ + size_t nwrite; + /** the length of the output answer bytes */ + size_t outlen; + /** the output length in network wireformat */ + uint16_t outlen_wire; + /** the output packet bytes */ + uint8_t* out; + /** if the stream is on the write list */ + uint8_t on_write_list; + /** the prev and next on the write list, if on the list */ + struct doq_stream* write_prev, *write_next; +}; + +/** doq application error code that is sent when a stream is closed */ +#define DOQ_APP_ERROR_CODE 1 + +/** + * Create the doq connection. + * @param c: the comm point for the listening doq socket. + * @param paddr: with remote and local address and ifindex for the + * connection destination. This is where packets are sent. + * @param dcid: the dcid, Destination Connection ID. + * @param dcidlen: length of dcid. + * @param version: client chosen version. + * @return new doq connection or NULL on allocation failure. + */ +struct doq_conn* doq_conn_create(struct comm_point* c, + struct doq_pkt_addr* paddr, const uint8_t* dcid, size_t dcidlen, + uint32_t version); + +/** + * Delete the doq connection structure. + * @param conn: to delete. + * @param table: with memory size. + */ +void doq_conn_delete(struct doq_conn* conn, struct doq_table* table); + +/** compare function of doq_conn */ +int doq_conn_cmp(const void* key1, const void* key2); + +/** compare function of doq_conid */ +int doq_conid_cmp(const void* key1, const void* key2); + +/** compare function of doq_timer */ +int doq_timer_cmp(const void* key1, const void* key2); + +/** compare function of doq_stream */ +int doq_stream_cmp(const void* key1, const void* key2); + +/** setup the doq_socket server tls context */ +int doq_socket_setup_ctx(struct doq_server_socket* doq_socket); + +/** setup the doq connection callbacks, and settings. */ +int doq_conn_setup(struct doq_conn* conn, uint8_t* scid, size_t scidlen, + uint8_t* ocid, size_t ocidlen, const uint8_t* token, size_t tokenlen); + +/** fill a buffer with random data */ +void doq_fill_rand(struct ub_randstate* rnd, uint8_t* buf, size_t len); + +/** delete a doq_conid */ +void doq_conid_delete(struct doq_conid* conid); + +/** add a connection id to the doq_conn. + * caller must hold doq_table.conid_lock. */ +int doq_conn_associate_conid(struct doq_conn* conn, uint8_t* data, + size_t datalen); + +/** remove a connection id from the doq_conn. + * caller must hold doq_table.conid_lock. */ +void doq_conn_dissociate_conid(struct doq_conn* conn, const uint8_t* data, + size_t datalen); + +/** initial setup to link current connection ids to the doq_conn */ +int doq_conn_setup_conids(struct doq_conn* conn); + +/** remove the connection ids from the doq_conn. + * caller must hold doq_table.conid_lock. */ +void doq_conn_clear_conids(struct doq_conn* conn); + +/** find a conid in the doq_conn connection. + * caller must hold table.conid_lock. */ +struct doq_conid* doq_conid_find(struct doq_table* doq_table, + const uint8_t* data, size_t datalen); + +/** receive a packet for a connection */ +int doq_conn_recv(struct comm_point* c, struct doq_pkt_addr* paddr, + struct doq_conn* conn, struct ngtcp2_pkt_info* pi, int* err_retry, + int* err_drop); + +/** send packets for a connection */ +int doq_conn_write_streams(struct comm_point* c, struct doq_conn* conn, + int* err_drop); + +/** send the close packet for the connection, perhaps again. */ +int doq_conn_send_close(struct comm_point* c, struct doq_conn* conn); + +/** delete doq stream */ +void doq_stream_delete(struct doq_stream* stream); + +/** doq read a connection key from repinfo. It is not malloced, but points + * into the repinfo for the dcid. */ +void doq_conn_key_from_repinfo(struct doq_conn_key* key, + struct comm_reply* repinfo); + +/** doq find a stream in the connection */ +struct doq_stream* doq_stream_find(struct doq_conn* conn, int64_t stream_id); + +/** doq shutdown the stream. */ +int doq_stream_close(struct doq_conn* conn, struct doq_stream* stream, + int send_shutdown); + +/** send reply for a connection */ +int doq_stream_send_reply(struct doq_conn* conn, struct doq_stream* stream, + struct sldns_buffer* buf); + +/** the connection has write interest, wants to write packets */ +void doq_conn_write_enable(struct doq_conn* conn); + +/** the connection has no write interest, does not want to write packets */ +void doq_conn_write_disable(struct doq_conn* conn); + +/** set the connection on or off the write list, depending on write interest */ +void doq_conn_set_write_list(struct doq_table* table, struct doq_conn* conn); + +/** doq remove the connection from the write list */ +void doq_conn_write_list_remove(struct doq_table* table, + struct doq_conn* conn); + +/** doq get the first conn from the write list, if any, popped from list. + * Locks the conn that is returned. */ +struct doq_conn* doq_table_pop_first(struct doq_table* table); + +/** + * doq check if the timer for the conn needs to be changed. + * @param conn: connection, caller must hold lock on it. + * @param tv: time value, absolute time, returned. + * @return true if timer needs to be set to tv, false if no change is needed + * to the timer. The timer is already set to the right time in that case. + */ +int doq_conn_check_timer(struct doq_conn* conn, struct timeval* tv); + +/** doq remove timer from tree */ +void doq_timer_tree_remove(struct doq_table* table, struct doq_timer* timer); + +/** doq remove timer from list */ +void doq_timer_list_remove(struct doq_table* table, struct doq_timer* timer); + +/** doq unset the timer if it was set. */ +void doq_timer_unset(struct doq_table* table, struct doq_timer* timer); + +/** doq set the timer and add it. */ +void doq_timer_set(struct doq_table* table, struct doq_timer* timer, + struct doq_server_socket* worker_doq_socket, struct timeval* tv); + +/** doq find a timeout in the timer tree */ +struct doq_timer* doq_timer_find_time(struct doq_table* table, + struct timeval* tv); + +/** doq handle timeout for a connection. Pass conn locked. Returns false for + * deletion. */ +int doq_conn_handle_timeout(struct doq_conn* conn); + +/** doq add size to the current quic buffer counter */ +void doq_table_quic_size_add(struct doq_table* table, size_t add); + +/** doq subtract size from the current quic buffer counter */ +void doq_table_quic_size_subtract(struct doq_table* table, size_t subtract); + +/** doq check if mem is available for quic. */ +int doq_table_quic_size_available(struct doq_table* table, + struct config_file* cfg, size_t mem); + +/** doq get the quic size value */ +size_t doq_table_quic_size_get(struct doq_table* table); +#endif /* HAVE_NGTCP2 */ + char* set_ip_dscp(int socket, int addrfamily, int ds); /** for debug and profiling purposes only @@ -459,4 +846,14 @@ char* set_ip_dscp(int socket, int addrfamily, int ds); */ void verbose_print_unbound_socket(struct unbound_socket* ub_sock); +/** event callback for testcode/doqclient */ +void doq_client_event_cb(int fd, short event, void* arg); + +/** timer event callback for testcode/doqclient */ +void doq_client_timer_cb(int fd, short event, void* arg); + +#ifdef HAVE_NGTCP2 +/** get a timestamp in nanoseconds */ +ngtcp2_tstamp doq_get_timestamp_nanosec(void); +#endif #endif /* LISTEN_DNSPORT_H */ diff --git a/smallapp/unbound-control-setup.sh.in b/smallapp/unbound-control-setup.sh.in index 91458af36..4a358f6bd 100644 --- a/smallapp/unbound-control-setup.sh.in +++ b/smallapp/unbound-control-setup.sh.in @@ -104,7 +104,7 @@ while getopts 'd:hr' arg; do done shift $((OPTIND - 1)) -if ! openssl /dev/null 2>&1; then +if ! openssl version /dev/null 2>&1; then echo "$0 requires openssl to be installed for keys/certificates generation." >&2 exit 1 fi diff --git a/smallapp/unbound-control.c b/smallapp/unbound-control.c index 21e7eb82d..b8479e9ab 100644 --- a/smallapp/unbound-control.c +++ b/smallapp/unbound-control.c @@ -293,6 +293,9 @@ static void print_mem(struct ub_shm_stat_info* shm_stat, PR_LL("mem.streamwait", s->svr.mem_stream_wait); PR_LL("mem.http.query_buffer", s->svr.mem_http2_query_buffer); PR_LL("mem.http.response_buffer", s->svr.mem_http2_response_buffer); +#ifdef HAVE_NGTCP2 + PR_LL("mem.quic", s->svr.mem_quic); +#endif } /** print histogram */ @@ -359,6 +362,9 @@ static void print_extended(struct ub_stats_info* s, int inhibit_zero) PR_UL("num.query.tls_resume", s->svr.qtls_resume); PR_UL("num.query.ipv6", s->svr.qipv6); PR_UL("num.query.https", s->svr.qhttps); +#ifdef HAVE_NGTCP2 + PR_UL("num.query.quic", s->svr.qquic); +#endif /* flags */ PR_UL("num.query.flags.QR", s->svr.qbit_QR); diff --git a/smallapp/unbound-host.c b/smallapp/unbound-host.c index 8bffe46ce..edca6b9a3 100644 --- a/smallapp/unbound-host.c +++ b/smallapp/unbound-host.c @@ -50,6 +50,8 @@ #undef calloc #undef free #undef realloc +#undef reallocarray +#undef strdup #endif #ifdef UNBOUND_ALLOC_LITE #undef malloc @@ -492,7 +494,11 @@ int main(int argc, char* argv[]) if(strcmp(use_syslog, "yes") == 0) /* disable use-syslog */ check_ub_res(ub_ctx_set_option(ctx, "use-syslog:", "no")); +#ifdef UNBOUND_ALLOC_STATS + unbound_stat_free_log(use_syslog, __FILE__, __LINE__, __func__); +#else free(use_syslog); +#endif } argc -= optind; argv += optind; diff --git a/smallapp/worker_cb.c b/smallapp/worker_cb.c index c68981735..1d71a0945 100644 --- a/smallapp/worker_cb.c +++ b/smallapp/worker_cb.c @@ -255,3 +255,19 @@ void dtio_mainfdcallback(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), log_assert(0); } #endif + +#ifdef HAVE_NGTCP2 +void doq_client_event_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif + +#ifdef HAVE_NGTCP2 +void doq_client_timer_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif diff --git a/testcode/checklocks.c b/testcode/checklocks.c index d1c877467..fdc1b8af1 100644 --- a/testcode/checklocks.c +++ b/testcode/checklocks.c @@ -68,6 +68,8 @@ static struct thr_check* thread_infos[THRDEBUG_MAX_THREADS]; int check_locking_order = 1; /** the pid of this runset, reasonably unique. */ static pid_t check_lock_pid; +/** the name of the output file */ +static const char* output_name = "ublocktrace"; /** * Should checklocks print a trace of the lock and unlock calls. * It uses fprintf for that because the log function uses a lock and that @@ -142,7 +144,8 @@ acquire_locklock(struct checked_lock* lock, /** add protected region */ void -lock_protect(void *p, void* area, size_t size) +lock_protect_place(void* p, void* area, size_t size, const char* def_func, + const char* def_file, int def_line, const char* def_area) { struct checked_lock* lock = *(struct checked_lock**)p; struct protected_area* e = (struct protected_area*)malloc( @@ -151,6 +154,10 @@ lock_protect(void *p, void* area, size_t size) fatal_exit("lock_protect: out of memory"); e->region = area; e->size = size; + e->def_func = def_func; + e->def_file = def_file; + e->def_line = def_line; + e->def_area = def_area; e->hold = malloc(size); if(!e->hold) fatal_exit("lock_protect: out of memory"); @@ -203,6 +210,9 @@ prot_check(struct checked_lock* lock, if(memcmp(p->hold, p->region, p->size) != 0) { log_hex("memory prev", p->hold, p->size); log_hex("memory here", p->region, p->size); + log_err("lock_protect on %s %s:%d %s failed", + p->def_func, p->def_file, p->def_line, + p->def_area); lock_error(lock, func, file, line, "protected area modified"); } @@ -675,13 +685,19 @@ checklock_unlock(enum check_lock_type type, struct checked_lock* lock, } } +void +checklock_set_output_name(const char* name) +{ + output_name = name; +} + /** open order info debug file, thr->num must be valid */ static void open_lockorder(struct thr_check* thr) { char buf[24]; time_t t; - snprintf(buf, sizeof(buf), "ublocktrace.%d", thr->num); + snprintf(buf, sizeof(buf), "%s.%d", output_name, thr->num); thr->order_info = fopen(buf, "w"); if(!thr->order_info) fatal_exit("could not open %s: %s", buf, strerror(errno)); diff --git a/testcode/checklocks.h b/testcode/checklocks.h index 61cc6fb0c..7ebc2f984 100644 --- a/testcode/checklocks.h +++ b/testcode/checklocks.h @@ -90,6 +90,14 @@ struct protected_area { void* hold; /** next protected area in list */ struct protected_area* next; + /** the place where the lock_protect is made, at init. */ + const char* def_func; + /** the file where the lock_protect is made */ + const char* def_file; + /** the line number where the lock_protect is made */ + int def_line; + /** the text string for the area that is protected, at init call. */ + const char* def_area; }; /** @@ -181,12 +189,19 @@ struct checked_lock { * It demangles the lock itself (struct checked_lock**). * @param area: ptr to mem. * @param size: length of area. + * @param def_func: function where the lock_protect() line is. + * @param def_file: file where the lock_protect() line is. + * @param def_line: line where the lock_protect() line is. + * @param def_area: area string * You can call it multiple times with the same lock to give several areas. * Call it when you are done initializing the area, since it will be copied * at this time and protected right away against unauthorised changes until * the next lock() call is done. */ -void lock_protect(void* lock, void* area, size_t size); +void lock_protect_place(void* lock, void* area, size_t size, + const char* def_func, const char* def_file, int def_line, + const char* def_area); +#define lock_protect(lock, area, size) lock_protect_place(lock, area, size, __func__, __FILE__, __LINE__, #area) /** * Remove protected area from lock. @@ -203,6 +218,13 @@ void lock_unprotect(void* lock, void* area); */ size_t lock_get_mem(void* lock); +/** + * Set the output name, prefix, of the lock check output file(s). + * Call it before the checklock_start or thread creation. Pass a fixed string. + * @param name: string to use for output data file names. + */ +void checklock_set_output_name(const char* name); + /** * Initialise checklock. Sets up internal debug structures. */ diff --git a/testcode/doqclient.c b/testcode/doqclient.c new file mode 100644 index 000000000..1a2fd4183 --- /dev/null +++ b/testcode/doqclient.c @@ -0,0 +1,2701 @@ +/* + * testcode/doqclient.c - debug program. Perform multiple DNS queries using DoQ. + * + * Copyright (c) 2022, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * Simple DNS-over-QUIC client. For testing and debugging purposes. + * No authentication of TLS cert. + */ + +#include "config.h" +#ifdef HAVE_GETOPT_H +#include +#endif + +#ifdef HAVE_NGTCP2 +#include +#include +#ifdef HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H +#include +#else +#include +#endif +#include +#include +#ifdef HAVE_TIME_H +#include +#endif +#include +#include "util/locks.h" +#include "util/net_help.h" +#include "sldns/sbuffer.h" +#include "sldns/str2wire.h" +#include "sldns/wire2str.h" +#include "util/data/msgreply.h" +#include "util/data/msgencode.h" +#include "util/data/msgparse.h" +#include "util/data/dname.h" +#include "util/random.h" +#include "util/ub_event.h" +struct doq_client_stream_list; +struct doq_client_stream; + +/** the local client data for the DoQ connection */ +struct doq_client_data { + /** file descriptor */ + int fd; + /** the event base for the events */ + struct ub_event_base* base; + /** the ub event */ + struct ub_event* ev; + /** the expiry timer */ + struct ub_event* expire_timer; + /** is the expire_timer added */ + int expire_timer_added; + /** the ngtcp2 connection information */ + struct ngtcp2_conn* conn; + /** random state */ + struct ub_randstate* rnd; + /** server connected to as a string */ + const char* svr; + /** the static secret */ + uint8_t* static_secret_data; + /** the static secret size */ + size_t static_secret_size; + /** destination address sockaddr */ + struct sockaddr_storage dest_addr; + /** length of dest addr */ + socklen_t dest_addr_len; + /** local address sockaddr */ + struct sockaddr_storage local_addr; + /** length of local addr */ + socklen_t local_addr_len; + /** SSL context */ + SSL_CTX* ctx; + /** SSL object */ + SSL* ssl; +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT + /** the connection reference for ngtcp2_conn and userdata in ssl */ + struct ngtcp2_crypto_conn_ref conn_ref; +#endif + /** the quic version to use */ + uint32_t quic_version; + /** the last error */ +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + struct ngtcp2_ccerr ccerr; +#else + struct ngtcp2_connection_close_error last_error; +#endif + /** the recent tls alert error code */ + uint8_t tls_alert; + /** the buffer for packet operations */ + struct sldns_buffer* pkt_buf; + /** The list of queries to start. They have no stream associated. + * Once they do, they move to the send list. */ + struct doq_client_stream_list* query_list_start; + /** The list of queries to send. They have a stream, and they are + * sending data. Data could also be received, like errors. */ + struct doq_client_stream_list* query_list_send; + /** The list of queries to receive. They have a stream, and the + * send is done, it is possible to read data. */ + struct doq_client_stream_list* query_list_receive; + /** The list of queries that are stopped. They have no stream + * active any more. Write and read are done. The query is done, + * and it may be in error and then have no answer or partial answer. */ + struct doq_client_stream_list* query_list_stop; + /** is there a blocked packet in the blocked_pkt buffer */ + int have_blocked_pkt; + /** store blocked packet, a packet that could not be sent on the + * nonblocking socket. */ + struct sldns_buffer* blocked_pkt; + /** ecn info for the blocked packet */ + struct ngtcp2_pkt_info blocked_pkt_pi; + /** the congestion control algorithm */ + ngtcp2_cc_algo cc_algo; + /** the transport parameters file, for early data transmission */ + const char* transport_file; + /** the tls session file, for session resumption */ + const char* session_file; + /** if early data is enabled for the connection */ + int early_data_enabled; + /** how quiet is the output */ + int quiet; + /** the configured port for the destination */ + int port; +}; + +/** the local client stream list, for appending streams to */ +struct doq_client_stream_list { + /** first and last members of the list */ + struct doq_client_stream* first, *last; +}; + +/** the local client data for a DoQ stream */ +struct doq_client_stream { + /** next stream in list, and prev in list */ + struct doq_client_stream* next, *prev; + /** the data buffer */ + uint8_t* data; + /** length of the data buffer */ + size_t data_len; + /** if the client query has a stream, that is active, associated with + * it. The stream_id is in stream_id. */ + int has_stream; + /** the stream id */ + int64_t stream_id; + /** data written position */ + size_t nwrite; + /** the data length for write, in network format */ + uint16_t data_tcplen; + /** if the write of the query data is done. That means the + * write channel has FIN, is closed for writing. */ + int write_is_done; + /** data read position */ + size_t nread; + /** the answer length, in network byte order */ + uint16_t answer_len; + /** the answer buffer */ + struct sldns_buffer* answer; + /** the answer is complete */ + int answer_is_complete; + /** the query has an error, it has no answer, or no complete answer */ + int query_has_error; + /** if the query is done */ + int query_is_done; +}; + +#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT +/** the quic method struct, must remain valid during the QUIC connection. */ +static SSL_QUIC_METHOD quic_method; +#endif + +/** Get the connection ngtcp2_conn from the ssl app data + * ngtcp2_crypto_conn_ref */ +static ngtcp2_conn* conn_ref_get_conn(ngtcp2_crypto_conn_ref* conn_ref) +{ + struct doq_client_data* data = (struct doq_client_data*) + conn_ref->user_data; + return data->conn; +} + +static void +set_app_data(SSL* ssl, struct doq_client_data* data) +{ +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT + data->conn_ref.get_conn = &conn_ref_get_conn; + data->conn_ref.user_data = data; + SSL_set_app_data(ssl, &data->conn_ref); +#else + SSL_set_app_data(ssl, data); +#endif +} + +static struct doq_client_data* +get_app_data(SSL* ssl) +{ + struct doq_client_data* data; +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT + data = (struct doq_client_data*)((struct ngtcp2_crypto_conn_ref*) + SSL_get_app_data(ssl))->user_data; +#else + data = (struct doq_client_data*) SSL_get_app_data(ssl); +#endif + return data; +} + + + +/** write handle routine */ +static void on_write(struct doq_client_data* data); +/** update the timer */ +static void update_timer(struct doq_client_data* data); +/** disconnect we are done */ +static void disconnect(struct doq_client_data* data); +/** fetch and write the transport file */ +static void early_data_write_transport(struct doq_client_data* data); + +/** usage of doqclient */ +static void usage(char* argv[]) +{ + printf("usage: %s [options] name type class ...\n", argv[0]); + printf(" sends the name-type-class queries over " + "DNS-over-QUIC.\n"); + printf("-s server IP address to send the queries to, " + "default: 127.0.0.1\n"); + printf("-p Port to connect to, default: %d\n", + UNBOUND_DNS_OVER_QUIC_PORT); + printf("-v verbose output\n"); + printf("-q quiet, short output of answer\n"); + printf("-x file transport file, for read/write of transport parameters.\n\t\tIf it exists, it is used to send early data. It is then\n\t\twritten to contain the last used transport parameters.\n\t\tAlso -y must be enabled for early data to succeed.\n"); + printf("-y file session file, for read/write of TLS session. If it exists,\n\t\tit is used for TLS session resumption. It is then written\n\t\tto contain the last session used.\n\t\tOn its own, without also -x, resumes TLS session.\n"); + printf("-h This help text\n"); + exit(1); +} + +/** get the dest address */ +static void +get_dest_addr(struct doq_client_data* data, const char* svr, int port) +{ + if(!ipstrtoaddr(svr, port, &data->dest_addr, &data->dest_addr_len)) { + printf("fatal: bad server specs '%s'\n", svr); + exit(1); + } +} + +/** open UDP socket to svr */ +static int +open_svr_udp(struct doq_client_data* data) +{ + int fd = -1; + int r; + fd = socket(addr_is_ip6(&data->dest_addr, data->dest_addr_len)? + PF_INET6:PF_INET, SOCK_DGRAM, 0); + if(fd == -1) { + perror("socket() error"); + exit(1); + } + r = connect(fd, (struct sockaddr*)&data->dest_addr, + data->dest_addr_len); + if(r < 0 && r != EINPROGRESS) { + perror("connect() error"); + exit(1); + } + fd_set_nonblock(fd); + return fd; +} + +/** get the local address of the connection */ +static void +get_local_addr(struct doq_client_data* data) +{ + memset(&data->local_addr, 0, sizeof(data->local_addr)); + data->local_addr_len = (socklen_t)sizeof(data->local_addr); + if(getsockname(data->fd, (struct sockaddr*)&data->local_addr, + &data->local_addr_len) == -1) { + perror("getsockname() error"); + exit(1); + } + log_addr(1, "local_addr", &data->local_addr, data->local_addr_len); + log_addr(1, "dest_addr", &data->dest_addr, data->dest_addr_len); +} + +static sldns_buffer* +make_query(char* qname, char* qtype, char* qclass) +{ + struct query_info qinfo; + struct edns_data edns; + sldns_buffer* buf = sldns_buffer_new(65553); + if(!buf) fatal_exit("out of memory"); + qinfo.qname = sldns_str2wire_dname(qname, &qinfo.qname_len); + if(!qinfo.qname) { + printf("cannot parse query name: '%s'\n", qname); + exit(1); + } + + qinfo.qtype = sldns_get_rr_type_by_name(qtype); + qinfo.qclass = sldns_get_rr_class_by_name(qclass); + qinfo.local_alias = NULL; + + qinfo_query_encode(buf, &qinfo); /* flips buffer */ + free(qinfo.qname); + sldns_buffer_write_u16_at(buf, 0, 0x0000); + sldns_buffer_write_u16_at(buf, 2, BIT_RD); + memset(&edns, 0, sizeof(edns)); + edns.edns_present = 1; + edns.bits = EDNS_DO; + edns.udp_size = 4096; + if(sldns_buffer_capacity(buf) >= + sldns_buffer_limit(buf)+calc_edns_field_size(&edns)) + attach_edns_record(buf, &edns); + return buf; +} + +/** create client stream structure */ +static struct doq_client_stream* +client_stream_create(struct sldns_buffer* query_data) +{ + struct doq_client_stream* str = calloc(1, sizeof(*str)); + if(!str) + fatal_exit("calloc failed: out of memory"); + str->data = memdup(sldns_buffer_begin(query_data), + sldns_buffer_limit(query_data)); + if(!str->data) + fatal_exit("alloc data failed: out of memory"); + str->data_len = sldns_buffer_limit(query_data); + str->stream_id = -1; + return str; +} + +/** free client stream structure */ +static void +client_stream_free(struct doq_client_stream* str) +{ + if(!str) + return; + free(str->data); + sldns_buffer_free(str->answer); + free(str); +} + +/** setup the stream to start the write process */ +static void +client_stream_start_setup(struct doq_client_stream* str, int64_t stream_id) +{ + str->has_stream = 1; + str->stream_id = stream_id; + str->nwrite = 0; + str->nread = 0; + str->answer_len = 0; + str->query_is_done = 0; + str->answer_is_complete = 0; + str->query_has_error = 0; + if(str->answer) { + sldns_buffer_free(str->answer); + str->answer = NULL; + } +} + +/** Return string for log purposes with query name. */ +static char* +client_stream_string(struct doq_client_stream* str) +{ + char* s; + size_t dname_len; + char dname[256], tpstr[32], result[256+32+16]; + uint16_t tp; + if(str->data_len <= LDNS_HEADER_SIZE) { + s = strdup("query_with_no_question"); + if(!s) + fatal_exit("strdup failed: out of memory"); + return s; + } + dname_len = dname_valid(str->data+LDNS_HEADER_SIZE, + str->data_len-LDNS_HEADER_SIZE); + if(!dname_len) { + s = strdup("query_dname_not_valid"); + if(!s) + fatal_exit("strdup failed: out of memory"); + return s; + } + (void)sldns_wire2str_dname_buf(str->data+LDNS_HEADER_SIZE, dname_len, + dname, sizeof(dname)); + tp = sldns_wirerr_get_type(str->data+LDNS_HEADER_SIZE, + str->data_len-LDNS_HEADER_SIZE, dname_len); + (void)sldns_wire2str_type_buf(tp, tpstr, sizeof(tpstr)); + snprintf(result, sizeof(result), "%s %s", dname, tpstr); + s = strdup(result); + if(!s) + fatal_exit("strdup failed: out of memory"); + return s; +} + +/** create query stream list */ +static struct doq_client_stream_list* +stream_list_create(void) +{ + struct doq_client_stream_list* list = calloc(1, sizeof(*list)); + if(!list) + fatal_exit("calloc failed: out of memory"); + return list; +} + +/** free the query stream list */ +static void +stream_list_free(struct doq_client_stream_list* list) +{ + struct doq_client_stream* str; + if(!list) + return; + str = list->first; + while(str) { + struct doq_client_stream* next = str->next; + client_stream_free(str); + str = next; + } + free(list); +} + +/** append item to list */ +static void +stream_list_append(struct doq_client_stream_list* list, + struct doq_client_stream* str) +{ + if(list->last) { + str->prev = list->last; + list->last->next = str; + } else { + str->prev = NULL; + list->first = str; + } + str->next = NULL; + list->last = str; +} + +/** delete the item from the list */ +static void +stream_list_delete(struct doq_client_stream_list* list, + struct doq_client_stream* str) +{ + if(str->next) { + str->next->prev = str->prev; + } else { + list->last = str->prev; + } + if(str->prev) { + str->prev->next = str->next; + } else { + list->first = str->next; + } + str->prev = NULL; + str->next = NULL; +} + +/** move the item from list1 to list2 */ +static void +stream_list_move(struct doq_client_stream* str, + struct doq_client_stream_list* list1, + struct doq_client_stream_list* list2) +{ + stream_list_delete(list1, str); + stream_list_append(list2, str); +} + +/** allocate stream data buffer, then answer length is complete */ +static void +client_stream_datalen_complete(struct doq_client_stream* str) +{ + verbose(1, "answer length %d", (int)ntohs(str->answer_len)); + str->answer = sldns_buffer_new(ntohs(str->answer_len)); + if(!str->answer) + fatal_exit("sldns_buffer_new failed: out of memory"); + sldns_buffer_set_limit(str->answer, ntohs(str->answer_len)); +} + +/** print the answer rrs */ +static void +print_answer_rrs(uint8_t* pkt, size_t pktlen) +{ + char buf[65535]; + char* str; + size_t str_len; + int i, qdcount, ancount; + uint8_t* data = pkt; + size_t data_len = pktlen; + int comprloop = 0; + if(data_len < LDNS_HEADER_SIZE) + return; + qdcount = LDNS_QDCOUNT(data); + ancount = LDNS_ANCOUNT(data); + data += LDNS_HEADER_SIZE; + data_len -= LDNS_HEADER_SIZE; + + for(i=0; iquery_has_error) { + char* logs = client_stream_string(str); + printf("%s has error, there is no answer\n", logs); + free(logs); + return; + } + if(sldns_buffer_limit(str->answer) < LDNS_HEADER_SIZE) { + char* logs = client_stream_string(str); + printf("%s received short packet, smaller than header\n", + logs); + free(logs); + return; + } + rcode = LDNS_RCODE_WIRE(sldns_buffer_begin(str->answer)); + if(rcode != 0) { + char* logs = client_stream_string(str); + char rc[16]; + (void)sldns_wire2str_rcode_buf(rcode, rc, sizeof(rc)); + printf("%s rcode %s\n", logs, rc); + free(logs); + return; + } + ancount = LDNS_ANCOUNT(sldns_buffer_begin(str->answer)); + if(ancount == 0) { + char* logs = client_stream_string(str); + printf("%s nodata answer\n", logs); + free(logs); + return; + } + print_answer_rrs(sldns_buffer_begin(str->answer), + sldns_buffer_limit(str->answer)); +} + +/** print the stream output answer */ +static void +client_stream_print_long(struct doq_client_data* data, + struct doq_client_stream* str) +{ + char* s; + if(str->query_has_error) { + char* logs = client_stream_string(str); + printf("%s has error, there is no answer\n", logs); + free(logs); + return; + } + s = sldns_wire2str_pkt(sldns_buffer_begin(str->answer), + sldns_buffer_limit(str->answer)); + printf("%s", (s?s:";sldns_wire2str_pkt failed\n")); + printf(";; SERVER: %s %d\n", data->svr, data->port); + free(s); +} + +/** the stream has completed the data */ +static void +client_stream_data_complete(struct doq_client_stream* str) +{ + verbose(1, "received all answer content"); + if(verbosity > 0) { + char* logs = client_stream_string(str); + char* s; + log_buf(1, "received answer", str->answer); + s = sldns_wire2str_pkt(sldns_buffer_begin(str->answer), + sldns_buffer_limit(str->answer)); + if(!s) verbose(1, "could not sldns_wire2str_pkt"); + else verbose(1, "query %s received:\n%s", logs, s); + free(s); + free(logs); + } + str->answer_is_complete = 1; +} + +/** the stream has completed but with an error */ +static void +client_stream_answer_error(struct doq_client_stream* str) +{ + if(verbosity > 0) { + char* logs = client_stream_string(str); + if(str->answer) + verbose(1, "query %s has an error. received %d/%d bytes.", + logs, (int)sldns_buffer_position(str->answer), + (int)sldns_buffer_limit(str->answer)); + else + verbose(1, "query %s has an error. received no data.", + logs); + free(logs); + } + str->query_has_error = 1; +} + +/** receive data for a stream */ +static void +client_stream_recv_data(struct doq_client_stream* str, const uint8_t* data, + size_t datalen) +{ + int got_data = 0; + /* read the tcplength uint16_t at the start of the DNS message */ + if(str->nread < 2) { + size_t to_move = datalen; + if(datalen > 2-str->nread) + to_move = 2-str->nread; + memmove(((uint8_t*)&str->answer_len)+str->nread, data, + to_move); + str->nread += to_move; + data += to_move; + datalen -= to_move; + if(str->nread == 2) { + /* we can allocate the data buffer */ + client_stream_datalen_complete(str); + } + } + /* if we have data bytes */ + if(datalen > 0) { + size_t to_write = datalen; + if(datalen > sldns_buffer_remaining(str->answer)) + to_write = sldns_buffer_remaining(str->answer); + if(to_write > 0) { + sldns_buffer_write(str->answer, data, to_write); + str->nread += to_write; + data += to_write; + datalen -= to_write; + got_data = 1; + } + } + /* extra received bytes after end? */ + if(datalen > 0) { + verbose(1, "extra bytes after end of DNS length"); + if(verbosity > 0) + log_hex("extradata", (void*)data, datalen); + } + /* are we done with it? */ + if(got_data && str->nread >= (size_t)(ntohs(str->answer_len))+2) { + client_stream_data_complete(str); + } +} + +/** receive FIN from remote end on client stream, no more data to be + * received on the stream. */ +static void +client_stream_recv_fin(struct doq_client_data* data, + struct doq_client_stream* str, int is_fin) +{ + if(verbosity > 0) { + char* logs = client_stream_string(str); + if(is_fin) + verbose(1, "query %s: received FIN from remote", logs); + else + verbose(1, "query %s: stream reset from remote", logs); + free(logs); + } + if(str->write_is_done) + stream_list_move(str, data->query_list_receive, + data->query_list_stop); + else + stream_list_move(str, data->query_list_send, + data->query_list_stop); + if(!str->answer_is_complete) { + client_stream_answer_error(str); + } + str->query_is_done = 1; + if(data->quiet) + client_stream_print_short(str); + else client_stream_print_long(data, str); + if(data->query_list_send->first==NULL && + data->query_list_receive->first==NULL) + disconnect(data); +} + +/** fill a buffer with random data */ +static void fill_rand(struct ub_randstate* rnd, uint8_t* buf, size_t len) +{ + if(RAND_bytes(buf, len) != 1) { + size_t i; + for(i=0; istatic_secret_data = malloc(len); + if(!data->static_secret_data) + fatal_exit("malloc failed: out of memory"); + data->static_secret_size = len; + fill_rand(data->rnd, data->static_secret_data, len); +} + +/** fill cid structure with random data */ +static void cid_randfill(struct ngtcp2_cid* cid, size_t datalen, + struct ub_randstate* rnd) +{ + uint8_t buf[32]; + if(datalen > sizeof(buf)) + datalen = sizeof(buf); + fill_rand(rnd, buf, datalen); + ngtcp2_cid_init(cid, buf, datalen); +} + +/** send buf on the client stream */ +static int +client_bidi_stream(struct doq_client_data* data, int64_t* ret_stream_id, + void* stream_user_data) +{ + int64_t stream_id; + int rv; + + /* open new bidirectional stream */ + rv = ngtcp2_conn_open_bidi_stream(data->conn, &stream_id, + stream_user_data); + if(rv != 0) { + if(rv == NGTCP2_ERR_STREAM_ID_BLOCKED) { + /* no bidi stream count for this new stream */ + return 0; + } + fatal_exit("could not ngtcp2_conn_open_bidi_stream: %s", + ngtcp2_strerror(rv)); + } + *ret_stream_id = stream_id; + return 1; +} + +/** See if we can start query streams, by creating bidirectional streams + * on the QUIC transport for them. */ +static void +query_streams_start(struct doq_client_data* data) +{ + while(data->query_list_start->first) { + struct doq_client_stream* str = data->query_list_start->first; + int64_t stream_id = 0; + if(!client_bidi_stream(data, &stream_id, str)) { + /* no more bidi streams allowed */ + break; + } + if(verbosity > 0) { + char* logs = client_stream_string(str); + verbose(1, "query %s start on bidi stream id %lld", + logs, (long long int)stream_id); + free(logs); + } + /* setup the stream to start */ + client_stream_start_setup(str, stream_id); + /* move the query entry to the send list to write it */ + stream_list_move(str, data->query_list_start, + data->query_list_send); + } +} + +/** the rand callback routine from ngtcp2 */ +static void rand_cb(uint8_t* dest, size_t destlen, + const ngtcp2_rand_ctx* rand_ctx) +{ + struct ub_randstate* rnd = (struct ub_randstate*) + rand_ctx->native_handle; + fill_rand(rnd, dest, destlen); +} + +/** the get_new_connection_id callback routine from ngtcp2 */ +static int get_new_connection_id_cb(struct ngtcp2_conn* ATTR_UNUSED(conn), + struct ngtcp2_cid* cid, uint8_t* token, size_t cidlen, void* user_data) +{ + struct doq_client_data* data = (struct doq_client_data*)user_data; + cid_randfill(cid, cidlen, data->rnd); + if(ngtcp2_crypto_generate_stateless_reset_token(token, + data->static_secret_data, data->static_secret_size, cid) != 0) + return NGTCP2_ERR_CALLBACK_FAILURE; + return 0; +} + +/** handle that early data is rejected */ +static void +early_data_is_rejected(struct doq_client_data* data) +{ + int rv; + verbose(1, "early data was rejected by the server"); +#ifdef HAVE_NGTCP2_CONN_TLS_EARLY_DATA_REJECTED + rv = ngtcp2_conn_tls_early_data_rejected(data->conn); +#else + rv = ngtcp2_conn_early_data_rejected(data->conn); +#endif + if(rv != 0) { + log_err("ngtcp2_conn_early_data_rejected failed: %s", + ngtcp2_strerror(rv)); + return; + } + /* move the streams back to the start state */ + while(data->query_list_send->first) { + struct doq_client_stream* str = data->query_list_send->first; + /* move it back to the start list */ + stream_list_move(str, data->query_list_send, + data->query_list_start); + str->has_stream = 0; + /* remove stream id */ + str->stream_id = 0; + /* initialise other members, in case they are altered, + * but unlikely, because early streams are rejected. */ + str->nwrite = 0; + str->nread = 0; + str->answer_len = 0; + str->query_is_done = 0; + str->answer_is_complete = 0; + str->query_has_error = 0; + if(str->answer) { + sldns_buffer_free(str->answer); + str->answer = NULL; + } + } +} + +/** the handshake completed callback from ngtcp2 */ +static int +handshake_completed(ngtcp2_conn* ATTR_UNUSED(conn), void* user_data) +{ + struct doq_client_data* data = (struct doq_client_data*)user_data; + verbose(1, "handshake_completed callback"); + verbose(1, "ngtcp2_conn_get_max_data_left is %d", + (int)ngtcp2_conn_get_max_data_left(data->conn)); +#ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI + verbose(1, "ngtcp2_conn_get_max_local_streams_uni is %d", + (int)ngtcp2_conn_get_max_local_streams_uni(data->conn)); +#endif + verbose(1, "ngtcp2_conn_get_streams_uni_left is %d", + (int)ngtcp2_conn_get_streams_uni_left(data->conn)); + verbose(1, "ngtcp2_conn_get_streams_bidi_left is %d", + (int)ngtcp2_conn_get_streams_bidi_left(data->conn)); + verbose(1, "negotiated cipher name is %s", + SSL_get_cipher_name(data->ssl)); + if(verbosity > 0) { + const unsigned char* alpn = NULL; + unsigned int alpnlen = 0; + char alpnstr[128]; + SSL_get0_alpn_selected(data->ssl, &alpn, &alpnlen); + if(alpnlen > sizeof(alpnstr)-1) + alpnlen = sizeof(alpnstr)-1; + memmove(alpnstr, alpn, alpnlen); + alpnstr[alpnlen]=0; + verbose(1, "negotiated ALPN is '%s'", alpnstr); + } + /* The SSL_get_early_data_status call works after the handshake + * completes. */ + if(data->early_data_enabled) { + if(SSL_get_early_data_status(data->ssl) != + SSL_EARLY_DATA_ACCEPTED) { + early_data_is_rejected(data); + } else { + verbose(1, "early data was accepted by the server"); + } + } +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT + if(data->transport_file) { + early_data_write_transport(data); + } +#endif + return 0; +} + +/** the extend_max_local_streams_bidi callback from ngtcp2 */ +static int +extend_max_local_streams_bidi(ngtcp2_conn* ATTR_UNUSED(conn), + uint64_t max_streams, void* user_data) +{ + struct doq_client_data* data = (struct doq_client_data*)user_data; + verbose(1, "extend_max_local_streams_bidi callback, %d max_streams", + (int)max_streams); + verbose(1, "ngtcp2_conn_get_max_data_left is %d", + (int)ngtcp2_conn_get_max_data_left(data->conn)); +#ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI + verbose(1, "ngtcp2_conn_get_max_local_streams_uni is %d", + (int)ngtcp2_conn_get_max_local_streams_uni(data->conn)); +#endif + verbose(1, "ngtcp2_conn_get_streams_uni_left is %d", + (int)ngtcp2_conn_get_streams_uni_left(data->conn)); + verbose(1, "ngtcp2_conn_get_streams_bidi_left is %d", + (int)ngtcp2_conn_get_streams_bidi_left(data->conn)); + query_streams_start(data); + return 0; +} + +/** the recv_stream_data callback from ngtcp2 */ +static int +recv_stream_data(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags, + int64_t stream_id, uint64_t offset, const uint8_t* data, + size_t datalen, void* user_data, void* stream_user_data) +{ + struct doq_client_data* doqdata = (struct doq_client_data*)user_data; + struct doq_client_stream* str = (struct doq_client_stream*) + stream_user_data; + verbose(1, "recv_stream_data stream %d offset %d datalen %d%s%s", + (int)stream_id, (int)offset, (int)datalen, + ((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0?" FIN":""), +#ifdef NGTCP2_STREAM_DATA_FLAG_0RTT + ((flags&NGTCP2_STREAM_DATA_FLAG_0RTT)!=0?" 0RTT":"") +#else + ((flags&NGTCP2_STREAM_DATA_FLAG_EARLY)!=0?" EARLY":"") +#endif + ); + if(verbosity > 0) + log_hex("data", (void*)data, datalen); + if(verbosity > 0) { + char* logs = client_stream_string(str); + verbose(1, "the stream_user_data is %s stream id %d, nread %d", + logs, (int)str->stream_id, (int)str->nread); + free(logs); + } + + /* append the data, if there is data */ + if(datalen > 0) { + client_stream_recv_data(str, data, datalen); + } + if((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0) { + client_stream_recv_fin(doqdata, str, 1); + } + ngtcp2_conn_extend_max_stream_offset(doqdata->conn, stream_id, datalen); + ngtcp2_conn_extend_max_offset(doqdata->conn, datalen); + return 0; +} + +/** the stream reset callback from ngtcp2 */ +static int +stream_reset(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id, + uint64_t final_size, uint64_t app_error_code, void* user_data, + void* stream_user_data) +{ + struct doq_client_data* doqdata = (struct doq_client_data*)user_data; + struct doq_client_stream* str = (struct doq_client_stream*) + stream_user_data; + verbose(1, "stream reset for stream %d final size %d app error code %d", + (int)stream_id, (int)final_size, (int)app_error_code); + client_stream_recv_fin(doqdata, str, 0); + return 0; +} + +/** copy sockaddr into ngtcp2 addr */ +static void +copy_ngaddr(struct ngtcp2_addr* ngaddr, struct sockaddr_storage* addr, + socklen_t addrlen) +{ + if(addr_is_ip6(addr, addrlen)) { +#if defined(NGTCP2_USE_GENERIC_SOCKADDR) || defined(NGTCP2_USE_GENERIC_IPV6_SOCKADDR) + struct sockaddr_in* i6 = (struct sockaddr_in6*)addr; + struct ngtcp2_sockaddr_in6 a6; + ngaddr->addr = calloc(1, sizeof(a6)); + if(!ngaddr->addr) fatal_exit("calloc failed: out of memory"); + ngaddr->addrlen = sizeof(a6); + memset(&a6, 0, sizeof(a6)); + a6.sin6_family = i6->sin6_family; + a6.sin6_port = i6->sin6_port; + a6.sin6_flowinfo = i6->sin6_flowinfo; + memmove(&a6.sin6_addr, i6->sin6_addr, sizeof(a6.sin6_addr); + a6.sin6_scope_id = i6->sin6_scope_id; + memmove(ngaddr->addr, &a6, sizeof(a6)); +#else + ngaddr->addr = (ngtcp2_sockaddr*)addr; + ngaddr->addrlen = addrlen; +#endif + } else { +#ifdef NGTCP2_USE_GENERIC_SOCKADDR + struct sockaddr_in* i4 = (struct sockaddr_in*)addr; + struct ngtcp2_sockaddr_in a4; + ngaddr->addr = calloc(1, sizeof(a4)); + if(!ngaddr->addr) fatal_exit("calloc failed: out of memory"); + ngaddr->addrlen = sizeof(a4); + memset(&a4, 0, sizeof(a4)); + a4.sin_family = i4->sin_family; + a4.sin_port = i4->sin_port; + memmove(&a4.sin_addr, i4->sin_addr, sizeof(a4.sin_addr); + memmove(ngaddr->addr, &a4, sizeof(a4)); +#else + ngaddr->addr = (ngtcp2_sockaddr*)addr; + ngaddr->addrlen = addrlen; +#endif + } +} + +/** debug log printf for ngtcp2 connections */ +static void log_printf_for_doq(void* ATTR_UNUSED(user_data), + const char* fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + fprintf(stderr, "libngtcp2: "); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, "\n"); +} + +/** get a timestamp in nanoseconds */ +static ngtcp2_tstamp get_timestamp_nanosec(void) +{ +#ifdef CLOCK_REALTIME + struct timespec tp; + memset(&tp, 0, sizeof(tp)); +#ifdef CLOCK_MONOTONIC + if(clock_gettime(CLOCK_MONOTONIC, &tp) == -1) { +#endif + if(clock_gettime(CLOCK_REALTIME, &tp) == -1) { + log_err("clock_gettime failed: %s", strerror(errno)); + } +#ifdef CLOCK_MONOTONIC + } +#endif + return ((uint64_t)tp.tv_sec)*((uint64_t)1000000000) + + ((uint64_t)tp.tv_nsec); +#else + struct timeval tv; + if(gettimeofday(&tv, NULL) < 0) { + log_err("gettimeofday failed: %s", strerror(errno)); + } + return ((uint64_t)tv.tv_sec)*((uint64_t)1000000000) + + ((uint64_t)tv.tv_usec)*((uint64_t)1000); +#endif /* CLOCK_REALTIME */ +} + +/** create ngtcp2 client connection and set up. */ +static struct ngtcp2_conn* conn_client_setup(struct doq_client_data* data) +{ + struct ngtcp2_conn* conn = NULL; + int rv; + struct ngtcp2_cid dcid, scid; + struct ngtcp2_path path; + uint32_t client_chosen_version = NGTCP2_PROTO_VER_V1; + struct ngtcp2_callbacks cbs; + struct ngtcp2_settings settings; + struct ngtcp2_transport_params params; + + memset(&cbs, 0, sizeof(cbs)); + memset(&settings, 0, sizeof(settings)); + memset(¶ms, 0, sizeof(params)); + memset(&dcid, 0, sizeof(dcid)); + memset(&scid, 0, sizeof(scid)); + memset(&path, 0, sizeof(path)); + + data->quic_version = client_chosen_version; + ngtcp2_settings_default(&settings); + if(str_is_ip6(data->svr)) { +#ifdef HAVE_STRUCT_NGTCP2_SETTINGS_MAX_TX_UDP_PAYLOAD_SIZE + settings.max_tx_udp_payload_size = 1232; +#else + settings.max_udp_payload_size = 1232; +#endif + } + settings.rand_ctx.native_handle = data->rnd; + if(verbosity > 0) { + /* make debug logs */ + settings.log_printf = log_printf_for_doq; + } + settings.initial_ts = get_timestamp_nanosec(); + ngtcp2_transport_params_default(¶ms); + params.initial_max_stream_data_bidi_local = 256*1024; + params.initial_max_stream_data_bidi_remote = 256*1024; + params.initial_max_stream_data_uni = 256*1024; + params.initial_max_data = 1024*1024; + params.initial_max_streams_bidi = 0; + params.initial_max_streams_uni = 100; + params.max_idle_timeout = 30*NGTCP2_SECONDS; + params.active_connection_id_limit = 7; + cid_randfill(&dcid, 16, data->rnd); + cid_randfill(&scid, 16, data->rnd); + cbs.client_initial = ngtcp2_crypto_client_initial_cb; + cbs.recv_crypto_data = ngtcp2_crypto_recv_crypto_data_cb; + cbs.encrypt = ngtcp2_crypto_encrypt_cb; + cbs.decrypt = ngtcp2_crypto_decrypt_cb; + cbs.hp_mask = ngtcp2_crypto_hp_mask_cb; + cbs.recv_retry = ngtcp2_crypto_recv_retry_cb; + cbs.update_key = ngtcp2_crypto_update_key_cb; + cbs.delete_crypto_aead_ctx = ngtcp2_crypto_delete_crypto_aead_ctx_cb; + cbs.delete_crypto_cipher_ctx = + ngtcp2_crypto_delete_crypto_cipher_ctx_cb; + cbs.get_path_challenge_data = ngtcp2_crypto_get_path_challenge_data_cb; + cbs.version_negotiation = ngtcp2_crypto_version_negotiation_cb; + cbs.get_new_connection_id = get_new_connection_id_cb; + cbs.handshake_completed = handshake_completed; + cbs.extend_max_local_streams_bidi = extend_max_local_streams_bidi; + cbs.rand = rand_cb; + cbs.recv_stream_data = recv_stream_data; + cbs.stream_reset = stream_reset; + copy_ngaddr(&path.local, &data->local_addr, data->local_addr_len); + copy_ngaddr(&path.remote, &data->dest_addr, data->dest_addr_len); + + rv = ngtcp2_conn_client_new(&conn, &dcid, &scid, &path, + client_chosen_version, &cbs, &settings, ¶ms, + NULL, /* ngtcp2_mem allocator, use default */ + data /* callback argument */); + if(!conn) fatal_exit("could not ngtcp2_conn_client_new: %s", + ngtcp2_strerror(rv)); + data->cc_algo = settings.cc_algo; + return conn; +} + +#ifndef HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS +/** write the transport file */ +static void +transport_file_write(const char* file, struct ngtcp2_transport_params* params) +{ + FILE* out; + out = fopen(file, "w"); + if(!out) { + perror(file); + return; + } + fprintf(out, "initial_max_streams_bidi=%u\n", + (unsigned)params->initial_max_streams_bidi); + fprintf(out, "initial_max_streams_uni=%u\n", + (unsigned)params->initial_max_streams_uni); + fprintf(out, "initial_max_stream_data_bidi_local=%u\n", + (unsigned)params->initial_max_stream_data_bidi_local); + fprintf(out, "initial_max_stream_data_bidi_remote=%u\n", + (unsigned)params->initial_max_stream_data_bidi_remote); + fprintf(out, "initial_max_stream_data_uni=%u\n", + (unsigned)params->initial_max_stream_data_uni); + fprintf(out, "initial_max_data=%u\n", + (unsigned)params->initial_max_data); + fprintf(out, "active_connection_id_limit=%u\n", + (unsigned)params->active_connection_id_limit); + fprintf(out, "max_datagram_frame_size=%u\n", + (unsigned)params->max_datagram_frame_size); + if(ferror(out)) { + verbose(1, "There was an error writing %s: %s", + file, strerror(errno)); + fclose(out); + return; + } + fclose(out); +} +#endif /* HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS */ + +/** fetch and write the transport file */ +static void +early_data_write_transport(struct doq_client_data* data) +{ +#ifdef HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS + FILE* out; + uint8_t buf[1024]; + ngtcp2_ssize len = ngtcp2_conn_encode_0rtt_transport_params(data->conn, + buf, sizeof(buf)); + if(len < 0) { + log_err("ngtcp2_conn_encode_0rtt_transport_params failed: %s", + ngtcp2_strerror(len)); + return; + } + out = fopen(data->transport_file, "w"); + if(!out) { + perror(data->transport_file); + return; + } + if(fwrite(buf, 1, len, out) != (size_t)len) { + log_err("fwrite %s failed: %s", data->transport_file, + strerror(errno)); + } + if(ferror(out)) { + verbose(1, "There was an error writing %s: %s", + data->transport_file, strerror(errno)); + } + fclose(out); +#else + struct ngtcp2_transport_params params; + memset(¶ms, 0, sizeof(params)); + ngtcp2_conn_get_remote_transport_params(data->conn, ¶ms); + transport_file_write(data->transport_file, ¶ms); +#endif +} + +#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT +/** applicatation rx key callback, this is where the rx key is set, + * and streams can be opened, like http3 unidirectional streams, like + * the http3 control and http3 qpack encode and decoder streams. */ +static int +application_rx_key_cb(struct doq_client_data* data) +{ + verbose(1, "application_rx_key_cb callback"); + verbose(1, "ngtcp2_conn_get_max_data_left is %d", + (int)ngtcp2_conn_get_max_data_left(data->conn)); +#ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI + verbose(1, "ngtcp2_conn_get_max_local_streams_uni is %d", + (int)ngtcp2_conn_get_max_local_streams_uni(data->conn)); +#endif + verbose(1, "ngtcp2_conn_get_streams_uni_left is %d", + (int)ngtcp2_conn_get_streams_uni_left(data->conn)); + verbose(1, "ngtcp2_conn_get_streams_bidi_left is %d", + (int)ngtcp2_conn_get_streams_bidi_left(data->conn)); + if(data->transport_file) { + early_data_write_transport(data); + } + return 1; +} + +/** quic_method set_encryption_secrets function */ +static int +set_encryption_secrets(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level, + const uint8_t *read_secret, const uint8_t *write_secret, + size_t secret_len) +{ + struct doq_client_data* data = get_app_data(ssl); +#ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL + ngtcp2_encryption_level +#else + ngtcp2_crypto_level +#endif + level = +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL + ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); +#else + ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level); +#endif + + if(read_secret) { + if(ngtcp2_crypto_derive_and_install_rx_key(data->conn, NULL, + NULL, NULL, level, read_secret, secret_len) != 0) { + log_err("ngtcp2_crypto_derive_and_install_rx_key failed"); + return 0; + } + if(level == NGTCP2_CRYPTO_LEVEL_APPLICATION) { + if(!application_rx_key_cb(data)) + return 0; + } + } + + if(write_secret) { + if(ngtcp2_crypto_derive_and_install_tx_key(data->conn, NULL, + NULL, NULL, level, write_secret, secret_len) != 0) { + log_err("ngtcp2_crypto_derive_and_install_tx_key failed"); + return 0; + } + } + return 1; +} + +/** quic_method add_handshake_data function */ +static int +add_handshake_data(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level, + const uint8_t *data, size_t len) +{ + struct doq_client_data* doqdata = get_app_data(ssl); +#ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL + ngtcp2_encryption_level +#else + ngtcp2_crypto_level +#endif + level = +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL + ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); +#else + ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level); +#endif + int rv; + + rv = ngtcp2_conn_submit_crypto_data(doqdata->conn, level, data, len); + if(rv != 0) { + log_err("ngtcp2_conn_submit_crypto_data failed: %s", + ngtcp2_strerror(rv)); + ngtcp2_conn_set_tls_error(doqdata->conn, rv); + return 0; + } + return 1; +} + +/** quic_method flush_flight function */ +static int +flush_flight(SSL* ATTR_UNUSED(ssl)) +{ + return 1; +} + +/** quic_method send_alert function */ +static int +send_alert(SSL *ssl, enum ssl_encryption_level_t ATTR_UNUSED(level), + uint8_t alert) +{ + struct doq_client_data* data = get_app_data(ssl); + data->tls_alert = alert; + return 1; +} +#endif /* HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT */ + +/** new session callback. We can write it to file for resumption later. */ +static int +new_session_cb(SSL* ssl, SSL_SESSION* session) +{ + struct doq_client_data* data = get_app_data(ssl); + BIO *f; + log_assert(data->session_file); + verbose(1, "new session cb: the ssl session max_early_data_size is %u", + (unsigned)SSL_SESSION_get_max_early_data(session)); + f = BIO_new_file(data->session_file, "w"); + if(!f) { + log_err("Could not open %s: %s", data->session_file, + strerror(errno)); + return 0; + } + PEM_write_bio_SSL_SESSION(f, session); + BIO_free(f); + verbose(1, "written tls session to %s", data->session_file); + return 0; +} + +/** setup the TLS context */ +static SSL_CTX* +ctx_client_setup(void) +{ + SSL_CTX* ctx = SSL_CTX_new(TLS_client_method()); + if(!ctx) { + log_crypto_err("Could not SSL_CTX_new"); + exit(1); + } + SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION); + SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION); + SSL_CTX_set_default_verify_paths(ctx); +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT + if(ngtcp2_crypto_quictls_configure_client_context(ctx) != 0) { + log_err("ngtcp2_crypto_quictls_configure_client_context failed"); + exit(1); + } +#else + memset(&quic_method, 0, sizeof(quic_method)); + quic_method.set_encryption_secrets = &set_encryption_secrets; + quic_method.add_handshake_data = &add_handshake_data; + quic_method.flush_flight = &flush_flight; + quic_method.send_alert = &send_alert; + SSL_CTX_set_quic_method(ctx, &quic_method); +#endif + return ctx; +} + + +/* setup the TLS object */ +static SSL* +ssl_client_setup(struct doq_client_data* data) +{ + SSL* ssl = SSL_new(data->ctx); + if(!ssl) { + log_crypto_err("Could not SSL_new"); + exit(1); + } + set_app_data(ssl, data); + SSL_set_connect_state(ssl); + if(!SSL_set_fd(ssl, data->fd)) { + log_crypto_err("Could not SSL_set_fd"); + exit(1); + } + if((data->quic_version & 0xff000000) == 0xff000000) { + SSL_set_quic_use_legacy_codepoint(ssl, 1); + } else { + SSL_set_quic_use_legacy_codepoint(ssl, 0); + } + SSL_set_alpn_protos(ssl, (const unsigned char *)"\x03""doq", 4); + /* send the SNI host name */ + SSL_set_tlsext_host_name(ssl, "localhost"); + return ssl; +} + +/** get packet ecn information */ +static uint32_t +msghdr_get_ecn(struct msghdr* msg, int family) +{ +#ifndef S_SPLINT_S + struct cmsghdr* cmsg; + if(family == AF_INET6) { + for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + if(cmsg->cmsg_level == IPPROTO_IPV6 && + cmsg->cmsg_type == IPV6_TCLASS && + cmsg->cmsg_len != 0) { + uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg); + return *ecn; + } + } + return 0; + } + for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + if(cmsg->cmsg_level == IPPROTO_IP && + cmsg->cmsg_type == IP_TOS && + cmsg->cmsg_len != 0) { + uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg); + return *ecn; + } + } + return 0; +#endif /* S_SPLINT_S */ +} + +/** set the ecn on the transmission */ +static void +set_ecn(int fd, int family, uint32_t ecn) +{ + unsigned int val = ecn; + if(family == AF_INET6) { + if(setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, &val, + (socklen_t)sizeof(val)) == -1) { + log_err("setsockopt(.. IPV6_TCLASS ..): %s", + strerror(errno)); + } + return; + } + if(setsockopt(fd, IPPROTO_IP, IP_TOS, &val, + (socklen_t)sizeof(val)) == -1) { + log_err("setsockopt(.. IP_TOS ..): %s", + strerror(errno)); + } +} + +/** send a packet */ +static int +doq_client_send_pkt(struct doq_client_data* data, uint32_t ecn, uint8_t* buf, + size_t buf_len, int is_blocked_pkt, int* send_is_blocked) +{ + struct msghdr msg; + struct iovec iov[1]; + ssize_t ret; + iov[0].iov_base = buf; + iov[0].iov_len = buf_len; + memset(&msg, 0, sizeof(msg)); + msg.msg_name = (void*)&data->dest_addr; + msg.msg_namelen = data->dest_addr_len; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + set_ecn(data->fd, data->dest_addr.ss_family, ecn); + + for(;;) { + ret = sendmsg(data->fd, &msg, MSG_DONTWAIT); + if(ret == -1 && errno == EINTR) + continue; + break; + } + if(ret == -1) { + if(errno == EAGAIN) { + if(buf_len > + sldns_buffer_capacity(data->blocked_pkt)) + return 0; /* Cannot store it, but the buffers + are equal length and large enough, so this + should not happen. */ + data->have_blocked_pkt = 1; + if(send_is_blocked) + *send_is_blocked = 1; + /* If we already send the previously blocked packet, + * no need to copy it, otherwise store the packet for + * later. */ + if(!is_blocked_pkt) { + data->blocked_pkt_pi.ecn = ecn; + sldns_buffer_clear(data->blocked_pkt); + sldns_buffer_write(data->blocked_pkt, buf, + buf_len); + sldns_buffer_flip(data->blocked_pkt); + } + return 0; + } + log_err("doq sendmsg: %s", strerror(errno)); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_application_error(&data->ccerr, -1, NULL, 0); +#else + ngtcp2_connection_close_error_set_application_error(&data->last_error, -1, NULL, 0); +#endif + return 0; + } + return 1; +} + +/** change event write on fd to when we have data or when congested */ +static void +event_change_write(struct doq_client_data* data, int do_write) +{ + ub_event_del(data->ev); + if(do_write) { + ub_event_add_bits(data->ev, UB_EV_WRITE); + } else { + ub_event_del_bits(data->ev, UB_EV_WRITE); + } + if(ub_event_add(data->ev, NULL) != 0) { + fatal_exit("could not ub_event_add"); + } +} + +/** write the connection close, with possible error */ +static void +write_conn_close(struct doq_client_data* data) +{ + struct ngtcp2_path_storage ps; + struct ngtcp2_pkt_info pi; + ngtcp2_ssize ret; + if(!data->conn || +#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD + ngtcp2_conn_in_closing_period(data->conn) || +#else + ngtcp2_conn_is_in_closing_period(data->conn) || +#endif +#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD + ngtcp2_conn_in_draining_period(data->conn) +#else + ngtcp2_conn_is_in_draining_period(data->conn) +#endif + ) + return; + /* Drop blocked packet if there is one, the connection is being + * closed. And thus no further data traffic. */ + data->have_blocked_pkt = 0; + if( +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + data->ccerr.type == NGTCP2_CCERR_TYPE_IDLE_CLOSE +#else + data->last_error.type == + NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE +#endif + ) { + /* do not call ngtcp2_conn_write_connection_close on the + * connection because the ngtcp2_conn_handle_expiry call + * has returned NGTCP2_ERR_IDLE_CLOSE. But continue to close + * the connection. */ + return; + } + verbose(1, "write connection close"); + ngtcp2_path_storage_zero(&ps); + sldns_buffer_clear(data->pkt_buf); + ret = ngtcp2_conn_write_connection_close( + data->conn, &ps.path, &pi, sldns_buffer_begin(data->pkt_buf), + sldns_buffer_remaining(data->pkt_buf), +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + &data->ccerr +#else + &data->last_error +#endif + , get_timestamp_nanosec()); + if(ret < 0) { + log_err("ngtcp2_conn_write_connection_close failed: %s", + ngtcp2_strerror(ret)); + return; + } + verbose(1, "write connection close packet length %d", (int)ret); + if(ret == 0) + return; + doq_client_send_pkt(data, pi.ecn, sldns_buffer_begin(data->pkt_buf), + ret, 0, NULL); +} + +/** disconnect we are done */ +static void +disconnect(struct doq_client_data* data) +{ + verbose(1, "disconnect"); + write_conn_close(data); + ub_event_base_loopexit(data->base); +} + +/** the expire timer callback */ +void doq_client_timer_cb(int ATTR_UNUSED(fd), + short ATTR_UNUSED(bits), void* arg) +{ + struct doq_client_data* data = (struct doq_client_data*)arg; + ngtcp2_tstamp now = get_timestamp_nanosec(); + int rv; + + verbose(1, "doq expire_timer"); + data->expire_timer_added = 0; + rv = ngtcp2_conn_handle_expiry(data->conn, now); + if(rv != 0) { + log_err("ngtcp2_conn_handle_expiry failed: %s", + ngtcp2_strerror(rv)); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_liberr(&data->ccerr, rv, NULL, 0); +#else + ngtcp2_connection_close_error_set_transport_error_liberr( + &data->last_error, rv, NULL, 0); +#endif + disconnect(data); + return; + } + update_timer(data); + on_write(data); +} + +/** update the timers */ +static void +update_timer(struct doq_client_data* data) +{ + ngtcp2_tstamp expiry = ngtcp2_conn_get_expiry(data->conn); + ngtcp2_tstamp now = get_timestamp_nanosec(); + ngtcp2_tstamp t; + struct timeval tv; + + if(expiry <= now) { + /* the timer has already expired, add with zero timeout */ + t = 0; + } else { + t = expiry - now; + } + + /* set the timer */ + if(data->expire_timer_added) { + ub_timer_del(data->expire_timer); + data->expire_timer_added = 0; + } + memset(&tv, 0, sizeof(tv)); + tv.tv_sec = t / NGTCP2_SECONDS; + tv.tv_usec = (t / NGTCP2_MICROSECONDS)%1000000; + verbose(1, "update_timer in %d.%6.6d secs", (int)tv.tv_sec, + (int)tv.tv_usec); + if(ub_timer_add(data->expire_timer, data->base, + &doq_client_timer_cb, data, &tv) != 0) { + log_err("timer_add failed: could not add expire timer"); + return; + } + data->expire_timer_added = 1; +} + +/** perform read operations on fd */ +static void +on_read(struct doq_client_data* data) +{ + struct sockaddr_storage addr; + struct iovec iov[1]; + struct msghdr msg; + union { + struct cmsghdr hdr; + char buf[256]; + } ancil; + int i; + ssize_t rcv; + ngtcp2_pkt_info pi; + int rv; + struct ngtcp2_path path; + + for(i=0; i<10; i++) { + msg.msg_name = &addr; + msg.msg_namelen = (socklen_t)sizeof(addr); + iov[0].iov_base = sldns_buffer_begin(data->pkt_buf); + iov[0].iov_len = sldns_buffer_remaining(data->pkt_buf); + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = ancil.buf; +#ifndef S_SPLINT_S + msg.msg_controllen = sizeof(ancil.buf); +#endif /* S_SPLINT_S */ + msg.msg_flags = 0; + + rcv = recvmsg(data->fd, &msg, MSG_DONTWAIT); + if(rcv == -1) { + if(errno == EINTR || errno == EAGAIN) + break; + log_err_addr("doq recvmsg", strerror(errno), + &data->dest_addr, sizeof(data->dest_addr_len)); + break; + } + + pi.ecn = msghdr_get_ecn(&msg, addr.ss_family); + verbose(1, "recvmsg %d ecn=0x%x", (int)rcv, (int)pi.ecn); + + memset(&path, 0, sizeof(path)); + path.local.addr = (void*)&data->local_addr; + path.local.addrlen = data->local_addr_len; + path.remote.addr = (void*)msg.msg_name; + path.remote.addrlen = msg.msg_namelen; + rv = ngtcp2_conn_read_pkt(data->conn, &path, &pi, + iov[0].iov_base, rcv, get_timestamp_nanosec()); + if(rv != 0) { + log_err("ngtcp2_conn_read_pkt failed: %s", + ngtcp2_strerror(rv)); + if( +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + data->ccerr.error_code == 0 +#else + data->last_error.error_code == 0 +#endif + ) { + if(rv == NGTCP2_ERR_CRYPTO) { + /* in picotls the tls alert may need + * to be copied, but this is with + * openssl. And we have the value + * data.tls_alert. */ +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_tls_alert( + &data->ccerr, data->tls_alert, + NULL, 0); +#else + ngtcp2_connection_close_error_set_transport_error_tls_alert( + &data->last_error, + data->tls_alert, NULL, 0); +#endif + } else { +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_liberr(&data->ccerr, + rv, NULL, 0); +#else + ngtcp2_connection_close_error_set_transport_error_liberr( + &data->last_error, rv, NULL, + 0); +#endif + } + } + disconnect(data); + return; + } + } + + update_timer(data); +} + +/** the write of this query has completed, it has spooled to packets, + * set it to have the write done and move it to the list of receive streams. */ +static void +query_write_is_done(struct doq_client_data* data, + struct doq_client_stream* str) +{ + if(verbosity > 0) { + char* logs = client_stream_string(str); + verbose(1, "query %s write is done", logs); + free(logs); + } + str->write_is_done = 1; + stream_list_move(str, data->query_list_send, data->query_list_receive); +} + +/** write the data streams, if possible */ +static int +write_streams(struct doq_client_data* data) +{ + ngtcp2_path_storage ps; + ngtcp2_tstamp ts = get_timestamp_nanosec(); + struct doq_client_stream* str, *next; + uint32_t flags; + /* number of bytes that can be sent without packet pacing */ + size_t send_quantum = ngtcp2_conn_get_send_quantum(data->conn); + /* Overhead is the stream overhead of adding a header onto the data, + * this make sure the number of bytes to send in data bytes plus + * the overhead overshoots the target quantum by a smaller margin, + * and then it stops sending more bytes. With zero it would overshoot + * more, an accurate number would not overshoot. It is based on the + * stream frame header size. */ + size_t accumulated_send = 0, overhead_stream = 24, overhead_pkt = 60, + max_packet_size = 1200; + size_t num_packets = 0, max_packets = 65535; + ngtcp2_path_storage_zero(&ps); + str = data->query_list_send->first; + + if(data->cc_algo != NGTCP2_CC_ALGO_BBR +#ifdef NGTCP2_CC_ALGO_BBR_V2 + && data->cc_algo != NGTCP2_CC_ALGO_BBR_V2 +#endif +#ifdef NGTCP2_CC_ALGO_BBR2 + && data->cc_algo != NGTCP2_CC_ALGO_BBR2 +#endif + ) { + /* If we do not have a packet pacing congestion control + * algorithm, limit the number of packets. */ + max_packets = 10; + } + + /* loop like this, because at the start, the send list is empty, + * and we want to send handshake packets. But when there is a + * send_list, loop through that. */ + for(;;) { + int64_t stream_id; + ngtcp2_pkt_info pi; + ngtcp2_vec datav[2]; + size_t datav_count = 0; + int fin; + ngtcp2_ssize ret; + ngtcp2_ssize ndatalen = 0; + int send_is_blocked = 0; + + if(str) { + /* pick up next in case this one is deleted */ + next = str->next; + if(verbosity > 0) { + char* logs = client_stream_string(str); + verbose(1, "query %s write stream", logs); + free(logs); + } + stream_id = str->stream_id; + fin = 1; + if(str->nwrite < 2) { + str->data_tcplen = htons(str->data_len); + datav[0].base = ((uint8_t*)&str->data_tcplen)+str->nwrite; + datav[0].len = 2-str->nwrite; + datav[1].base = str->data; + datav[1].len = str->data_len; + datav_count = 2; + } else { + datav[0].base = str->data + (str->nwrite-2); + datav[0].len = str->data_len - (str->nwrite-2); + datav_count = 1; + } + } else { + next = NULL; + verbose(1, "write stream -1."); + stream_id = -1; + fin = 0; + datav[0].base = NULL; + datav[0].len = 0; + datav_count = 1; + } + + /* Does the first data entry fit into the send quantum? */ + /* Check if the data size sent, with a max of one full packet, + * with added stream header and packet header is allowed + * within the send quantum number of bytes. If not, it does + * not fit, and wait. */ + if(accumulated_send == 0 && ((datav_count == 1 && + (datav[0].len>max_packet_size?max_packet_size: + datav[0].len)+overhead_stream+overhead_pkt > + send_quantum) || + (datav_count == 2 && + (datav[0].len+datav[1].len>max_packet_size? + max_packet_size:datav[0].len+datav[1].len) + +overhead_stream+overhead_pkt > send_quantum))) { + /* congestion limited */ + ngtcp2_conn_update_pkt_tx_time(data->conn, ts); + event_change_write(data, 0); + /* update the timer to wait until it is possible to + * write again */ + update_timer(data); + return 0; + } + flags = 0; + if(str && str->next != NULL) { + /* Coalesce more data from more streams into this + * packet, if possible */ + /* There is more than one data entry in this send + * quantum, does the next one fit in the quantum? */ + size_t this_send, possible_next_send; + if(datav_count == 1) + this_send = datav[0].len; + else this_send = datav[0].len + datav[1].len; + if(this_send > max_packet_size) + this_send = max_packet_size; + if(str->next->nwrite < 2) + possible_next_send = (2-str->next->nwrite) + + str->next->data_len; + else possible_next_send = str->next->data_len - + (str->next->nwrite - 2); + if(possible_next_send > max_packet_size) + possible_next_send = max_packet_size; + /* Check if the data lengths that writev returned + * with stream headers added up so far, in + * accumulated_send, with added the data length + * of this send, with a max of one full packet, and + * the data length of the next possible send, with + * a max of one full packet, with a stream header for + * this_send and a stream header for the next possible + * send and a packet header, fit in the send quantum + * number of bytes. If so, ask to add more content + * to the packet with the more flag. */ + if(accumulated_send + this_send + possible_next_send + +2*overhead_stream+ overhead_pkt < send_quantum) + flags |= NGTCP2_WRITE_STREAM_FLAG_MORE; + } + if(fin) { + /* This is the final part of data for this stream */ + flags |= NGTCP2_WRITE_STREAM_FLAG_FIN; + } + sldns_buffer_clear(data->pkt_buf); + ret = ngtcp2_conn_writev_stream(data->conn, &ps.path, &pi, + sldns_buffer_begin(data->pkt_buf), + sldns_buffer_remaining(data->pkt_buf), &ndatalen, + flags, stream_id, datav, datav_count, ts); + if(ret < 0) { + if(ret == NGTCP2_ERR_WRITE_MORE) { + if(str) { + str->nwrite += ndatalen; + if(str->nwrite >= str->data_len+2) + query_write_is_done(data, str); + str = next; + accumulated_send += ndatalen + overhead_stream; + continue; + } + } + log_err("ngtcp2_conn_writev_stream failed: %s", + ngtcp2_strerror(ret)); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_liberr(&data->ccerr, ret, NULL, 0); +#else + ngtcp2_connection_close_error_set_transport_error_liberr( + &data->last_error, ret, NULL, 0); +#endif + disconnect(data); + return 0; + } + verbose(1, "writev_stream pkt size %d ndatawritten %d", + (int)ret, (int)ndatalen); + if(ndatalen >= 0 && str) { + /* add the new write offset */ + str->nwrite += ndatalen; + if(str->nwrite >= str->data_len+2) + query_write_is_done(data, str); + } + if(ret == 0) { + /* congestion limited */ + ngtcp2_conn_update_pkt_tx_time(data->conn, ts); + event_change_write(data, 0); + /* update the timer to wait until it is possible to + * write again */ + update_timer(data); + return 0; + } + if(!doq_client_send_pkt(data, pi.ecn, + sldns_buffer_begin(data->pkt_buf), ret, 0, + &send_is_blocked)) { + if(send_is_blocked) { + /* Blocked packet, wait until it is possible + * to write again and also set a timer. */ + event_change_write(data, 1); + update_timer(data); + return 0; + } + /* Packet could not be sent. Like lost and timeout. */ + ngtcp2_conn_update_pkt_tx_time(data->conn, ts); + event_change_write(data, 0); + update_timer(data); + return 0; + } + /* continue */ + if((size_t)ret >= send_quantum) + break; + send_quantum -= ret; + accumulated_send = 0; + str = next; + if(str == NULL) + break; + if(++num_packets == max_packets) + break; + } + ngtcp2_conn_update_pkt_tx_time(data->conn, ts); + event_change_write(data, 1); + return 1; +} + +/** send the blocked packet now that the stream is writable again. */ +static int +send_blocked_pkt(struct doq_client_data* data) +{ + ngtcp2_tstamp ts = get_timestamp_nanosec(); + int send_is_blocked = 0; + if(!doq_client_send_pkt(data, data->blocked_pkt_pi.ecn, + sldns_buffer_begin(data->pkt_buf), + sldns_buffer_limit(data->pkt_buf), 1, &send_is_blocked)) { + if(send_is_blocked) { + /* Send was blocked, again. Wait, again to retry. */ + event_change_write(data, 1); + /* make sure the timer is set while waiting */ + update_timer(data); + return 0; + } + /* The packed could not be sent. Like it was lost, timeout. */ + data->have_blocked_pkt = 0; + ngtcp2_conn_update_pkt_tx_time(data->conn, ts); + event_change_write(data, 0); + update_timer(data); + return 0; + } + /* The blocked packet has been sent, the holding buffer can be + * cleared. */ + data->have_blocked_pkt = 0; + ngtcp2_conn_update_pkt_tx_time(data->conn, ts); + return 1; +} + +/** perform write operations, if any, on fd */ +static void +on_write(struct doq_client_data* data) +{ + if(data->have_blocked_pkt) { + if(!send_blocked_pkt(data)) + return; + } + if( +#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD + ngtcp2_conn_in_closing_period(data->conn) +#else + ngtcp2_conn_is_in_closing_period(data->conn) +#endif + ) + return; + if(!write_streams(data)) + return; + update_timer(data); +} + +/** callback for main listening file descriptor */ +void +doq_client_event_cb(int ATTR_UNUSED(fd), short bits, void* arg) +{ + struct doq_client_data* data = (struct doq_client_data*)arg; + verbose(1, "doq_client_event_cb %s%s%s", + ((bits&UB_EV_READ)!=0?"EV_READ":""), + ((bits&(UB_EV_READ|UB_EV_WRITE))==(UB_EV_READ|UB_EV_WRITE)? + " ":""), + ((bits&UB_EV_WRITE)!=0?"EV_WRITE":"")); + if((bits&UB_EV_READ)) { + on_read(data); + } + /* Perform the write operation anyway. The read operation may + * have produced data, or there is content waiting and it is possible + * to write that. */ + on_write(data); +} + +/** read the TLS session from file */ +static int +early_data_setup_session(struct doq_client_data* data) +{ + SSL_SESSION* session; + BIO* f = BIO_new_file(data->session_file, "r"); + if(f == NULL) { + if(errno == ENOENT) { + verbose(1, "session file %s does not exist", + data->session_file); + return 0; + } + log_err("Could not read %s: %s", data->session_file, + strerror(errno)); + return 0; + } + session = PEM_read_bio_SSL_SESSION(f, NULL, 0, NULL); + if(session == NULL) { + log_crypto_err("Could not read session file with PEM_read_bio_SSL_SESSION"); + BIO_free(f); + return 0; + } + BIO_free(f); + if(!SSL_set_session(data->ssl, session)) { + log_crypto_err("Could not SSL_set_session"); + SSL_SESSION_free(session); + return 0; + } + if(SSL_SESSION_get_max_early_data(session) == 0) { + log_err("TLS session early data is 0"); + SSL_SESSION_free(session); + return 0; + } + SSL_set_quic_early_data_enabled(data->ssl, 1); + SSL_SESSION_free(session); + return 1; +} + +#ifndef HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS +/** parse one line from the transport file */ +static int +transport_parse_line(struct ngtcp2_transport_params* params, char* line) +{ + if(strncmp(line, "initial_max_streams_bidi=", 25) == 0) { + params->initial_max_streams_bidi = atoi(line+25); + return 1; + } + if(strncmp(line, "initial_max_streams_uni=", 24) == 0) { + params->initial_max_streams_uni = atoi(line+24); + return 1; + } + if(strncmp(line, "initial_max_stream_data_bidi_local=", 35) == 0) { + params->initial_max_stream_data_bidi_local = atoi(line+35); + return 1; + } + if(strncmp(line, "initial_max_stream_data_bidi_remote=", 36) == 0) { + params->initial_max_stream_data_bidi_remote = atoi(line+36); + return 1; + } + if(strncmp(line, "initial_max_stream_data_uni=", 28) == 0) { + params->initial_max_stream_data_uni = atoi(line+28); + return 1; + } + if(strncmp(line, "initial_max_data=", 17) == 0) { + params->initial_max_data = atoi(line+17); + return 1; + } + if(strncmp(line, "active_connection_id_limit=", 27) == 0) { + params->active_connection_id_limit = atoi(line+27); + return 1; + } + if(strncmp(line, "max_datagram_frame_size=", 24) == 0) { + params->max_datagram_frame_size = atoi(line+24); + return 1; + } + return 0; +} +#endif /* HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS */ + +/** setup the early data transport file and read it */ +static int +early_data_setup_transport(struct doq_client_data* data) +{ +#ifdef HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS + FILE* in; + uint8_t buf[1024]; + size_t len; + int rv; + in = fopen(data->transport_file, "r"); + if(!in) { + if(errno == ENOENT) { + verbose(1, "transport file %s does not exist", + data->transport_file); + return 0; + } + perror(data->transport_file); + return 0; + } + len = fread(buf, 1, sizeof(buf), in); + if(ferror(in)) { + log_err("%s: read failed: %s", data->transport_file, + strerror(errno)); + fclose(in); + return 0; + } + fclose(in); + rv = ngtcp2_conn_decode_and_set_0rtt_transport_params(data->conn, + buf, len); + if(rv != 0) { + log_err("ngtcp2_conn_decode_and_set_0rtt_transport_params failed: %s", + ngtcp2_strerror(rv)); + return 0; + } + return 1; +#else + FILE* in; + char buf[1024]; + struct ngtcp2_transport_params params; + memset(¶ms, 0, sizeof(params)); + in = fopen(data->transport_file, "r"); + if(!in) { + if(errno == ENOENT) { + verbose(1, "transport file %s does not exist", + data->transport_file); + return 0; + } + perror(data->transport_file); + return 0; + } + while(!feof(in)) { + if(!fgets(buf, sizeof(buf), in)) { + log_err("%s: read failed: %s", data->transport_file, + strerror(errno)); + fclose(in); + return 0; + } + if(!transport_parse_line(¶ms, buf)) { + log_err("%s: could not parse line '%s'", + data->transport_file, buf); + fclose(in); + return 0; + } + } + fclose(in); + ngtcp2_conn_set_early_remote_transport_params(data->conn, ¶ms); +#endif + return 1; +} + +/** setup for early data, read the transport file and session file */ +static void +early_data_setup(struct doq_client_data* data) +{ + if(!early_data_setup_session(data)) { + verbose(1, "TLS session resumption failed, early data is disabled"); + data->early_data_enabled = 0; + return; + } + if(!early_data_setup_transport(data)) { + verbose(1, "Transport parameters set failed, early data is disabled"); + data->early_data_enabled = 0; + return; + } +} + +/** start the early data transmission */ +static void +early_data_start(struct doq_client_data* data) +{ + query_streams_start(data); + on_write(data); +} + +/** create doq_client_data */ +static struct doq_client_data* +create_doq_client_data(const char* svr, int port, struct ub_event_base* base, + const char* transport_file, const char* session_file, int quiet) +{ + struct doq_client_data* data; + data = calloc(1, sizeof(*data)); + if(!data) fatal_exit("calloc failed: out of memory"); + data->base = base; + data->rnd = ub_initstate(NULL); + if(!data->rnd) fatal_exit("ub_initstate failed: out of memory"); + data->svr = svr; + get_dest_addr(data, svr, port); + data->port = port; + data->quiet = quiet; + data->pkt_buf = sldns_buffer_new(65552); + if(!data->pkt_buf) + fatal_exit("sldns_buffer_new failed: out of memory"); + data->blocked_pkt = sldns_buffer_new(65552); + if(!data->blocked_pkt) + fatal_exit("sldns_buffer_new failed: out of memory"); + data->fd = open_svr_udp(data); + get_local_addr(data); + data->conn = conn_client_setup(data); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_default(&data->ccerr); +#else + ngtcp2_connection_close_error_default(&data->last_error); +#endif + data->transport_file = transport_file; + data->session_file = session_file; + if(data->transport_file && data->session_file) + data->early_data_enabled = 1; + + generate_static_secret(data, 32); + data->ctx = ctx_client_setup(); + if(data->session_file) { + SSL_CTX_set_session_cache_mode(data->ctx, + SSL_SESS_CACHE_CLIENT | + SSL_SESS_CACHE_NO_INTERNAL_STORE); + SSL_CTX_sess_set_new_cb(data->ctx, new_session_cb); + } + data->ssl = ssl_client_setup(data); + ngtcp2_conn_set_tls_native_handle(data->conn, data->ssl); + if(data->early_data_enabled) + early_data_setup(data); + + data->ev = ub_event_new(base, data->fd, UB_EV_READ | UB_EV_WRITE | + UB_EV_PERSIST, doq_client_event_cb, data); + if(!data->ev) { + fatal_exit("could not ub_event_new"); + } + if(ub_event_add(data->ev, NULL) != 0) { + fatal_exit("could not ub_event_add"); + } + data->expire_timer = ub_event_new(data->base, -1, + UB_EV_TIMEOUT, &doq_client_timer_cb, data); + if(!data->expire_timer) + fatal_exit("could not ub_event_new"); + data->query_list_start = stream_list_create(); + data->query_list_send = stream_list_create(); + data->query_list_receive = stream_list_create(); + data->query_list_stop = stream_list_create(); + return data; +} + +/** delete doq_client_data */ +static void +delete_doq_client_data(struct doq_client_data* data) +{ + if(!data) + return; +#if defined(NGTCP2_USE_GENERIC_SOCKADDR) || defined(NGTCP2_USE_GENERIC_IPV6_SOCKADDR) + if(data->conn && data->dest_addr_len != 0) { + if(addr_is_ip6(&data->dest_addr, data->dest_addr_len)) { +# if defined(NGTCP2_USE_GENERIC_SOCKADDR) || defined(NGTCP2_USE_GENERIC_IPV6_SOCKADDR) + const struct ngtcp2_path* path6 = ngtcp2_conn_get_path(data->conn); + free(path6->local.addr); + free(path6->remote.addr); +# endif + } else { +# if defined(NGTCP2_USE_GENERIC_SOCKADDR) + const struct ngtcp2_path* path = ngtcp2_conn_get_path(data->conn); + free(path->local.addr); + free(path->remote.addr); +# endif + } + } +#endif + ngtcp2_conn_del(data->conn); + SSL_free(data->ssl); + sldns_buffer_free(data->pkt_buf); + sldns_buffer_free(data->blocked_pkt); + if(data->fd != -1) + sock_close(data->fd); + SSL_CTX_free(data->ctx); + stream_list_free(data->query_list_start); + stream_list_free(data->query_list_send); + stream_list_free(data->query_list_receive); + stream_list_free(data->query_list_stop); + ub_randfree(data->rnd); + if(data->ev) { + ub_event_del(data->ev); + ub_event_free(data->ev); + } + if(data->expire_timer_added) + ub_timer_del(data->expire_timer); + ub_event_free(data->expire_timer); + free(data->static_secret_data); + free(data); +} + +/** create the event base that registers events and timers */ +static struct ub_event_base* +create_event_base(time_t* secs, struct timeval* now) +{ + struct ub_event_base* base; + const char *evnm="event", *evsys="", *evmethod=""; + + memset(now, 0, sizeof(*now)); + base = ub_default_event_base(1, secs, now); + if(!base) fatal_exit("could not create ub_event base"); + + ub_get_event_sys(base, &evnm, &evsys, &evmethod); + if(verbosity) log_info("%s %s uses %s method", evnm, evsys, evmethod); + + return base; +} + +/** enter a query into the query list */ +static void +client_enter_query_buf(struct doq_client_data* data, struct sldns_buffer* buf) +{ + struct doq_client_stream* str; + str = client_stream_create(buf); + if(!str) + fatal_exit("client_stream_create failed: out of memory"); + stream_list_append(data->query_list_start, str); +} + +/** enter the queries into the query list */ +static void +client_enter_queries(struct doq_client_data* data, char** qs, int count) +{ + int i; + for(i=0; i 0) { + char* str; + log_buf(1, "send query", buf); + str = sldns_wire2str_pkt(sldns_buffer_begin(buf), + sldns_buffer_limit(buf)); + if(!str) verbose(1, "could not sldns_wire2str_pkt"); + else verbose(1, "send query:\n%s", str); + free(str); + } + client_enter_query_buf(data, buf); + sldns_buffer_free(buf); + } +} + +/** run the dohclient queries */ +static void run(const char* svr, int port, char** qs, int count, + const char* transport_file, const char* session_file, int quiet) +{ + time_t secs = 0; + struct timeval now; + struct ub_event_base* base; + struct doq_client_data* data; + + /* setup */ + base = create_event_base(&secs, &now); + data = create_doq_client_data(svr, port, base, transport_file, + session_file, quiet); + client_enter_queries(data, qs, count); + if(data->early_data_enabled) + early_data_start(data); + + /* run the queries */ + ub_event_base_dispatch(base); + + /* cleanup */ + delete_doq_client_data(data); + ub_event_base_free(base); +} +#endif /* HAVE_NGTCP2 */ + +#ifdef HAVE_NGTCP2 +/** getopt global, in case header files fail to declare it. */ +extern int optind; +/** getopt global, in case header files fail to declare it. */ +extern char* optarg; +int main(int ATTR_UNUSED(argc), char** ATTR_UNUSED(argv)) +{ + int c; + int port = UNBOUND_DNS_OVER_QUIC_PORT, quiet = 0; + const char* svr = "127.0.0.1", *transport_file = NULL, + *session_file = NULL; +#ifdef USE_WINSOCK + WSADATA wsa_data; + if(WSAStartup(MAKEWORD(2,2), &wsa_data) != 0) { + printf("WSAStartup failed\n"); + return 1; + } +#endif + checklock_set_output_name("ublocktrace-doqclient"); + checklock_start(); + log_init(0, 0, 0); + log_ident_set("doqclient"); + + while((c=getopt(argc, argv, "hp:qs:vx:y:")) != -1) { + switch(c) { + case 'p': + if(atoi(optarg)==0 && strcmp(optarg,"0")!=0) { + printf("error parsing port, " + "number expected: %s\n", optarg); + return 1; + } + port = atoi(optarg); + break; + case 'q': + quiet++; + break; + case 's': + svr = optarg; + break; + case 'v': + verbosity++; + break; + case 'x': + transport_file = optarg; + break; + case 'y': + session_file = optarg; + break; + case 'h': + case '?': + default: + usage(argv); + } + } + + argc -= optind; + argv += optind; + + if(argc%3!=0) { + printf("Invalid input. Specify qname, qtype, and qclass.\n"); + return 1; + } + if(port == 53) { + printf("Error: port number 53 not for DNS over QUIC. Port number 53 is not allowed to be used with DNS over QUIC. It is used for DNS datagrams.\n"); + return 1; + } + + run(svr, port, argv, argc, transport_file, session_file, quiet); + + checklock_stop(); +#ifdef USE_WINSOCK + WSACleanup(); +#endif + return 0; +} +#else /* HAVE_NGTCP2 */ +int main(int ATTR_UNUSED(argc), char** ATTR_UNUSED(argv)) +{ + printf("Compiled without ngtcp2 for QUIC, cannot run doqclient.\n"); + return 1; +} +#endif /* HAVE_NGTCP2 */ + +/***--- definitions to make fptr_wlist work. ---***/ +/* These are callbacks, similar to smallapp callbacks, except the debug + * tool callbacks are not in it */ +struct tube; +struct query_info; +#include "util/data/packed_rrset.h" +#include "daemon/worker.h" +#include "daemon/remote.h" +#include "util/fptr_wlist.h" +#include "libunbound/context.h" + +void worker_handle_control_cmd(struct tube* ATTR_UNUSED(tube), + uint8_t* ATTR_UNUSED(buffer), size_t ATTR_UNUSED(len), + int ATTR_UNUSED(error), void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} + +int worker_handle_request(struct comm_point* ATTR_UNUSED(c), + void* ATTR_UNUSED(arg), int ATTR_UNUSED(error), + struct comm_reply* ATTR_UNUSED(repinfo)) +{ + log_assert(0); + return 0; +} + +int worker_handle_service_reply(struct comm_point* ATTR_UNUSED(c), + void* ATTR_UNUSED(arg), int ATTR_UNUSED(error), + struct comm_reply* ATTR_UNUSED(reply_info)) +{ + log_assert(0); + return 0; +} + +int remote_accept_callback(struct comm_point* ATTR_UNUSED(c), + void* ATTR_UNUSED(arg), int ATTR_UNUSED(error), + struct comm_reply* ATTR_UNUSED(repinfo)) +{ + log_assert(0); + return 0; +} + +int remote_control_callback(struct comm_point* ATTR_UNUSED(c), + void* ATTR_UNUSED(arg), int ATTR_UNUSED(error), + struct comm_reply* ATTR_UNUSED(repinfo)) +{ + log_assert(0); + return 0; +} + +void worker_sighandler(int ATTR_UNUSED(sig), void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} + +struct outbound_entry* worker_send_query( + struct query_info* ATTR_UNUSED(qinfo), uint16_t ATTR_UNUSED(flags), + int ATTR_UNUSED(dnssec), int ATTR_UNUSED(want_dnssec), + int ATTR_UNUSED(nocaps), int ATTR_UNUSED(check_ratelimit), + struct sockaddr_storage* ATTR_UNUSED(addr), + socklen_t ATTR_UNUSED(addrlen), uint8_t* ATTR_UNUSED(zone), + size_t ATTR_UNUSED(zonelen), int ATTR_UNUSED(tcp_upstream), + int ATTR_UNUSED(ssl_upstream), char* ATTR_UNUSED(tls_auth_name), + struct module_qstate* ATTR_UNUSED(q), int* ATTR_UNUSED(was_ratelimited)) +{ + log_assert(0); + return 0; +} + +#ifdef UB_ON_WINDOWS +void +worker_win_stop_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), void* + ATTR_UNUSED(arg)) { + log_assert(0); +} + +void +wsvc_cron_cb(void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif /* UB_ON_WINDOWS */ + +void +worker_alloc_cleanup(void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} + +struct outbound_entry* libworker_send_query( + struct query_info* ATTR_UNUSED(qinfo), uint16_t ATTR_UNUSED(flags), + int ATTR_UNUSED(dnssec), int ATTR_UNUSED(want_dnssec), + int ATTR_UNUSED(nocaps), int ATTR_UNUSED(check_ratelimit), + struct sockaddr_storage* ATTR_UNUSED(addr), + socklen_t ATTR_UNUSED(addrlen), uint8_t* ATTR_UNUSED(zone), + size_t ATTR_UNUSED(zonelen), int ATTR_UNUSED(tcp_upstream), + int ATTR_UNUSED(ssl_upstream), char* ATTR_UNUSED(tls_auth_name), + struct module_qstate* ATTR_UNUSED(q), int* ATTR_UNUSED(was_ratelimited)) +{ + log_assert(0); + return 0; +} + +int libworker_handle_service_reply(struct comm_point* ATTR_UNUSED(c), + void* ATTR_UNUSED(arg), int ATTR_UNUSED(error), + struct comm_reply* ATTR_UNUSED(reply_info)) +{ + log_assert(0); + return 0; +} + +void libworker_handle_control_cmd(struct tube* ATTR_UNUSED(tube), + uint8_t* ATTR_UNUSED(buffer), size_t ATTR_UNUSED(len), + int ATTR_UNUSED(error), void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} + +void libworker_fg_done_cb(void* ATTR_UNUSED(arg), int ATTR_UNUSED(rcode), + struct sldns_buffer* ATTR_UNUSED(buf), enum sec_status ATTR_UNUSED(s), + char* ATTR_UNUSED(why_bogus), int ATTR_UNUSED(was_ratelimited)) +{ + log_assert(0); +} + +void libworker_bg_done_cb(void* ATTR_UNUSED(arg), int ATTR_UNUSED(rcode), + struct sldns_buffer* ATTR_UNUSED(buf), enum sec_status ATTR_UNUSED(s), + char* ATTR_UNUSED(why_bogus), int ATTR_UNUSED(was_ratelimited)) +{ + log_assert(0); +} + +void libworker_event_done_cb(void* ATTR_UNUSED(arg), int ATTR_UNUSED(rcode), + struct sldns_buffer* ATTR_UNUSED(buf), enum sec_status ATTR_UNUSED(s), + char* ATTR_UNUSED(why_bogus), int ATTR_UNUSED(was_ratelimited)) +{ + log_assert(0); +} + +int context_query_cmp(const void* ATTR_UNUSED(a), const void* ATTR_UNUSED(b)) +{ + log_assert(0); + return 0; +} + +void worker_stat_timer_cb(void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} + +void worker_probe_timer_cb(void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} + +void worker_start_accept(void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} + +void worker_stop_accept(void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} + +/** keep track of lock id in lock-verify application */ +struct order_id { + /** the thread id that created it */ + int thr; + /** the instance number of creation */ + int instance; +}; + +int order_lock_cmp(const void* e1, const void* e2) +{ + const struct order_id* o1 = e1; + const struct order_id* o2 = e2; + if(o1->thr < o2->thr) return -1; + if(o1->thr > o2->thr) return 1; + if(o1->instance < o2->instance) return -1; + if(o1->instance > o2->instance) return 1; + return 0; +} + +int +codeline_cmp(const void* a, const void* b) +{ + return strcmp(a, b); +} + +int replay_var_compare(const void* ATTR_UNUSED(a), const void* ATTR_UNUSED(b)) +{ + log_assert(0); + return 0; +} + +void remote_get_opt_ssl(char* ATTR_UNUSED(str), void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} + +#ifdef USE_DNSTAP +void dtio_tap_callback(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif + +#ifdef USE_DNSTAP +void dtio_mainfdcallback(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif diff --git a/testcode/fake_event.c b/testcode/fake_event.c index a517fa5f3..2f60b1381 100644 --- a/testcode/fake_event.c +++ b/testcode/fake_event.c @@ -939,6 +939,11 @@ listen_create(struct comm_base* base, struct listen_port* ATTR_UNUSED(ports), int ATTR_UNUSED(http_notls), struct tcl_list* ATTR_UNUSED(tcp_conn_limit), void* ATTR_UNUSED(sslctx), struct dt_env* ATTR_UNUSED(dtenv), + struct doq_table* ATTR_UNUSED(table), + struct ub_randstate* ATTR_UNUSED(rnd), + const char* ATTR_UNUSED(ssl_service_key), + const char* ATTR_UNUSED(ssl_service_pem), + struct config_file* ATTR_UNUSED(cfg), comm_point_callback_type* cb, void *cb_arg) { struct replay_runtime* runtime = (struct replay_runtime*)base; diff --git a/testcode/testbound.c b/testcode/testbound.c index 70feb7972..442e23434 100644 --- a/testcode/testbound.c +++ b/testcode/testbound.c @@ -600,3 +600,52 @@ void listen_desetup_locks(void) { /* nothing */ } + +#ifdef HAVE_NGTCP2 +void comm_point_doq_callback(int ATTR_UNUSED(fd), short ATTR_UNUSED(event), + void* ATTR_UNUSED(arg)) +{ + /* nothing */ +} + +int doq_conn_cmp(const void* ATTR_UNUSED(key1), const void* ATTR_UNUSED(key2)) +{ + return 0; +} + +int doq_conid_cmp(const void* ATTR_UNUSED(key1), const void* ATTR_UNUSED(key2)) +{ + return 0; +} + +int doq_timer_cmp(const void* ATTR_UNUSED(key1), const void* ATTR_UNUSED(key2)) +{ + return 0; +} + +int doq_stream_cmp(const void* ATTR_UNUSED(key1), const void* ATTR_UNUSED(key2)) +{ + return 0; +} + +struct doq_table* doq_table_create(struct config_file* ATTR_UNUSED(cfg), + struct ub_randstate* ATTR_UNUSED(rnd)) +{ + return calloc(1, sizeof(struct doq_table)); +} + +void doq_table_delete(struct doq_table* table) +{ + free(table); +} + +void doq_timer_cb(void* ATTR_UNUSED(arg)) +{ + /* nothing */ +} + +size_t doq_table_quic_size_get(struct doq_table* ATTR_UNUSED(table)) +{ + return 0; +} +#endif diff --git a/testcode/unitdoq.c b/testcode/unitdoq.c new file mode 100644 index 000000000..2b9160970 --- /dev/null +++ b/testcode/unitdoq.c @@ -0,0 +1,84 @@ +/* + * testcode/unitdoq.c - unit test for doq routines. + * + * Copyright (c) 2022, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +/** + * \file + * Calls doq related unit tests. Exits with code 1 on a failure. + */ + +#include "config.h" + +#ifdef HAVE_NGTCP2 + +#include "util/netevent.h" +#include "services/listen_dnsport.h" +#include "testcode/unitmain.h" + +/** check the size of a connection for doq */ +static void +doq_size_conn_check() +{ + /* Printout the size of one doq connection, in memory usage. + * A connection with a couple cids, of type doq_conid, and + * it has one stream, and that has a query and an answer. */ + size_t answer_size = 233; /* size of www.nlnetlabs.nl minimal answer + with dnssec and one A record. The unsigned answer is 176 with + additional data, 61 bytes minimal response one A record. */ + size_t query_size = 45; /* size of query for www.nlnetlabs.nl, with + an EDNS record with DO flag. */ + size_t conn_size = sizeof(struct doq_conn); + size_t conid_size = sizeof(struct doq_conid); + size_t stream_size = sizeof(struct doq_stream); + + conn_size += 16; /* DCID len in the conn key */ + conn_size += 0; /* the size of the ngtcp2_conn */ + conn_size += 0; /* the size of the SSL record */ + conn_size += 0; /* size of the close pkt, + but we do not count it here. Only if the conn gets closed. */ + conid_size += 16; /* the dcid of the conn key */ + conid_size += 16; /* the cid */ + stream_size += query_size; /* size of in buffer */ + stream_size += answer_size; /* size of out buffer */ + printf("doq connection size %u bytes\n", (unsigned)(conn_size + + conid_size*3 + stream_size)); +} + +void doq_test(void) +{ + unit_show_feature("doq"); + doq_size_conn_check(); +} +#endif /* HAVE_NGTCP2 */ diff --git a/testcode/unitmain.c b/testcode/unitmain.c index 9129d722b..653d3efbe 100644 --- a/testcode/unitmain.c +++ b/testcode/unitmain.c @@ -1432,6 +1432,9 @@ main(int argc, char* argv[]) #ifdef CLIENT_SUBNET ecs_test(); #endif /* CLIENT_SUBNET */ +#ifdef HAVE_NGTCP2 + doq_test(); +#endif /* HAVE_NGTCP2 */ if(log_get_lock()) { lock_basic_destroy((lock_basic_type*)log_get_lock()); } diff --git a/testcode/unitmain.h b/testcode/unitmain.h index adcd74f77..99d5240d2 100644 --- a/testcode/unitmain.h +++ b/testcode/unitmain.h @@ -84,5 +84,7 @@ void authzone_test(void); void zonemd_test(void); /** unit test for tcp_reuse functions */ void tcpreuse_test(void); +/** unit test for doq functions */ +void doq_test(void); #endif /* TESTCODE_UNITMAIN_H */ diff --git a/testdata/acl_interface.tdir/acl_interface.conf b/testdata/acl_interface.tdir/acl_interface.conf index 1d9f8c9aa..b1d948573 100644 --- a/testdata/acl_interface.tdir/acl_interface.conf +++ b/testdata/acl_interface.tdir/acl_interface.conf @@ -139,6 +139,20 @@ server: interface-view: @INTERFACE@@@PORT_VIEW_EXT@ "ext" interface-view: @INTERFACE@@@PORT_VIEW_INTEXT@ "intext" +# Interface with scope_id + interface: @INTERFACE@vlan50@@PORT_ALLOW@ + interface: @INTERFACE@vlan51@@PORT_ALLOW@ + interface-tag: @INTERFACE@vlan50@@PORT_ALLOW@ "one" + interface-tag: @INTERFACE@vlan51@@PORT_ALLOW@ "two" + interface-action: @INTERFACE@vlan50@@PORT_ALLOW@ allow + interface-action: @INTERFACE@vlan51@@PORT_ALLOW@ allow + local-zone: one.vtest. static + local-data: "one.vtest. A 1.1.1.1" + local-zone-tag: one.vtest. "one" + local-zone: two.vtest. static + local-data: "two.vtest. A 2.2.2.2" + local-zone-tag: two.vtest. "two" + # Local zones configuration local-zone: local. transparent local-data: "local. A 0.0.0.0" diff --git a/testdata/acl_interface.tdir/acl_interface.test.scenario b/testdata/acl_interface.tdir/acl_interface.test.scenario index 4ae0a42f0..6348d2ef4 100644 --- a/testdata/acl_interface.tdir/acl_interface.test.scenario +++ b/testdata/acl_interface.tdir/acl_interface.test.scenario @@ -17,6 +17,15 @@ ip addr add $INTERFACE_ADDR_3 dev $INTERFACE ip addr add $INTERFACE_ADDR_4 dev $INTERFACE ip link set $INTERFACE up +ip link add ${INTERFACE}vlan50 type dummy +ip addr add fe80::2/64 dev ${INTERFACE}vlan50 +ip link add ${INTERFACE}vlan51 type dummy +ip addr add fe80::2/64 dev ${INTERFACE}vlan51 +ip link set ${INTERFACE}vlan50 up +ip link set ${INTERFACE}vlan51 up + +ip addr show + # start the forwarder in the background get_ldns_testns $LDNS_TESTNS -p $FORWARD_PORT acl_interface.testns >fwd.log 2>&1 & @@ -250,4 +259,10 @@ for addr in $INTERFACE_ADDR_1 $INTERFACE_ADDR_2 $INTERFACE_ADDR_3 $INTERFACE_ADD expect_external_answer done +query_addr fe80::2%${INTERFACE}vlan50 $PORT_ALLOW "one.vtest." +expect_tag_one_answer + +query_addr fe80::2%${INTERFACE}vlan51 $PORT_ALLOW "two.vtest." +expect_tag_two_answer + end 0 diff --git a/testdata/cookie_file.tdir/cookie_file.test b/testdata/cookie_file.tdir/cookie_file.test index 7da4fa657..d5d2c2d1f 100644 --- a/testdata/cookie_file.tdir/cookie_file.test +++ b/testdata/cookie_file.tdir/cookie_file.test @@ -238,10 +238,12 @@ then echo "Got the same first cookie in the response while the second secret is active" exit 1 fi -if ! grep -q "COOKIE: $second_cookie" $outfile +if ! grep -q "COOKIE: .* (good)$" $outfile then + # dig can generate a different cookie value here than previous cookies. + # but make sure the output contains a valid cookie cat $outfile - echo "Did not get the same second cookie in the response" + echo "Did not get a valid cookie in the response" exit 1 fi diff --git a/testdata/doq_downstream.tdir/doq_downstream.conf b/testdata/doq_downstream.tdir/doq_downstream.conf new file mode 100644 index 000000000..babd35041 --- /dev/null +++ b/testdata/doq_downstream.tdir/doq_downstream.conf @@ -0,0 +1,21 @@ +server: + verbosity: 2 + # num-threads: 1 + interface: 127.0.0.1@@PORT@ + quic-port: @PORT@ + tls-service-key: "unbound_server.key" + tls-service-pem: "unbound_server.pem" + use-syslog: no + directory: . + pidfile: "unbound.pid" + chroot: "" + username: "" + do-not-query-localhost: no + + local-zone: "example.net" static + local-data: "www.example.net. IN A 1.2.3.4" + local-zone: "drop.net" deny + +forward-zone: + name: "." + forward-addr: "127.0.0.1@@TOPORT@" diff --git a/testdata/doq_downstream.tdir/doq_downstream.dsc b/testdata/doq_downstream.tdir/doq_downstream.dsc new file mode 100644 index 000000000..1e0b19d50 --- /dev/null +++ b/testdata/doq_downstream.tdir/doq_downstream.dsc @@ -0,0 +1,16 @@ +BaseName: doq_downstream +Version: 1.0 +Description: Test DNS-over-QUIC query processing +CreationDate: Mon Aug 01 16:00:00 CEST 2022 +Maintainer: +Category: +Component: +CmdDepends: +Depends: +Help: +Pre: doq_downstream.pre +Post: doq_downstream.post +Test: doq_downstream.test +AuxFiles: +Passed: +Failure: diff --git a/testdata/doq_downstream.tdir/doq_downstream.post b/testdata/doq_downstream.tdir/doq_downstream.post new file mode 100644 index 000000000..f1a31be3c --- /dev/null +++ b/testdata/doq_downstream.tdir/doq_downstream.post @@ -0,0 +1,13 @@ +# #-- doq_downstream.post --# +# source the master var file when it's there +[ -f ../.tpkg.var.master ] && source ../.tpkg.var.master +# source the test var file when it's there +[ -f .tpkg.var.test ] && source .tpkg.var.test +# +# do your teardown here +PRE="../.." +. ../common.sh +kill_pid $FWD_PID +if test -f unbound.pid; then + kill_pid $UNBOUND_PID +fi diff --git a/testdata/doq_downstream.tdir/doq_downstream.pre b/testdata/doq_downstream.tdir/doq_downstream.pre new file mode 100644 index 000000000..f748cc1f5 --- /dev/null +++ b/testdata/doq_downstream.tdir/doq_downstream.pre @@ -0,0 +1,44 @@ +# #-- doq_downstream.pre--# +# source the master var file when it's there +[ -f ../.tpkg.var.master ] && source ../.tpkg.var.master +# use .tpkg.var.test for in test variable passing +[ -f .tpkg.var.test ] && source .tpkg.var.test + +PRE="../.." +. ../common.sh +if grep "define HAVE_NGTCP2 1" $PRE/config.h; then echo test enabled; else skip_test "test skipped"; fi + +if test -f $PRE/unbound_do_valgrind_in_test; then + do_valgrind=yes +else + do_valgrind=no +fi +VALGRIND_FLAGS="--leak-check=full --show-leak-kinds=all" + +get_random_port 2 +UNBOUND_PORT=$RND_PORT +FWD_PORT=$(($RND_PORT + 1)) +echo "UNBOUND_PORT=$UNBOUND_PORT" >> .tpkg.var.test +echo "FWD_PORT=$FWD_PORT" >> .tpkg.var.test + +# start forwarder +get_ldns_testns +$LDNS_TESTNS -p $FWD_PORT doq_downstream.testns >fwd.log 2>&1 & +FWD_PID=$! +echo "FWD_PID=$FWD_PID" >> .tpkg.var.test + +# make config file +sed -e 's/@PORT\@/'$UNBOUND_PORT'/' -e 's/@TOPORT\@/'$FWD_PORT'/' < doq_downstream.conf > ub.conf +# start unbound in the background +if test $do_valgrind = "yes"; then +valgrind $VALGRIND_FLAGS $PRE/unbound -vvvv -d -c ub.conf >unbound.log 2>&1 & +else +$PRE/unbound -vvvv -d -c ub.conf >unbound.log 2>&1 & +fi +UNBOUND_PID=$! +echo "UNBOUND_PID=$UNBOUND_PID" >> .tpkg.var.test + +cat .tpkg.var.test +wait_ldns_testns_up fwd.log +wait_unbound_up unbound.log + diff --git a/testdata/doq_downstream.tdir/doq_downstream.test b/testdata/doq_downstream.tdir/doq_downstream.test new file mode 100644 index 000000000..a302e8da6 --- /dev/null +++ b/testdata/doq_downstream.tdir/doq_downstream.test @@ -0,0 +1,109 @@ +# #-- doq_downstream.test --# +# source the master var file when it's there +[ -f ../.tpkg.var.master ] && source ../.tpkg.var.master +# use .tpkg.var.test for in test variable passing +[ -f .tpkg.var.test ] && source .tpkg.var.test + +PRE="../.." +. ../common.sh +get_make +(cd $PRE; $MAKE doqclient) + +# test query from local-data, immediate like from cache +echo "> query www.example.net." +$PRE/doqclient -s 127.0.0.1 -p $UNBOUND_PORT www.example.net. A IN >outfile 2>&1 +cat outfile +if test "$?" -ne 0; then + echo "exit status not OK" + echo "> cat logfiles" + cat outfile + cat fwd.log + cat unbound.log + echo "Not OK" + exit 1 +fi +if grep "www.example.net" outfile | grep "1.2.3.4"; then + echo "content OK" +else + echo "result contents not OK" + echo "> cat logfiles" + cat outfile + cat fwd.log + cat unbound.log + echo "result contents not OK" + exit 1 +fi +echo "OK" + +# test query that is resolved +echo "> query www.example.com." +$PRE/doqclient -s 127.0.0.1 -p $UNBOUND_PORT www.example.com. A IN >outfile 2>&1 +cat outfile +if test "$?" -ne 0; then + echo "exit status not OK" + echo "> cat logfiles" + cat outfile + cat fwd.log + cat unbound.log + echo "Not OK" + exit 1 +fi +if grep "www.example.com" outfile | grep "10.20.30.40"; then + echo "content OK" +else + echo "result contents not OK" + echo "> cat logfiles" + cat outfile + cat fwd.log + cat unbound.log + echo "result contents not OK" + exit 1 +fi +echo "OK" + +# Perform the lock verify tests, stop the server first. +kill_pid $UNBOUND_PID +cat unbound.log +# Remove pidfile so that the post script does not try to stop the server, +# it is already stopped. +rm -f unbound.pid +if test -f ublocktrace-doqclient.0; then + if $PRE/lock-verify ublocktrace-doqclient.* 2>&1; then + echo "lock-verify test ublocktrace-doqclient worked." + else + echo "lock-verify test ublocktrace-doqclient failed." + exit 1 + fi +fi +if test -f ublocktrace.0; then + if $PRE/lock-verify ublocktrace.* 2>&1; then + echo "lock-verify test ublocktrace worked." + else + echo "lock-verify test ublocktrace failed." + exit 1 + fi + if grep "lock error" unbound.log >/dev/null; then + echo "lock error" + exit 1 + fi +fi +# check valgrind output +if test -f $PRE/unbound_do_valgrind_in_test; then + if grep "All heap blocks were freed -- no leaks are possible" unbound.log; then + : # clean + else + grep "^==" unbound.log + echo "Memory leaked" + grep "in use at exit" unbound.log + exit 1 + fi + if grep "ERROR SUMMARY: 0 errors from 0 contexts" unbound.log; then + : # clean + else + grep "^==" unbound.log + echo "Errors" + grep "ERROR SUMMARY" unbound.log + exit 1 + fi +fi +exit 0 diff --git a/testdata/doq_downstream.tdir/doq_downstream.testns b/testdata/doq_downstream.tdir/doq_downstream.testns new file mode 100644 index 000000000..2d0ea45a4 --- /dev/null +++ b/testdata/doq_downstream.tdir/doq_downstream.testns @@ -0,0 +1,13 @@ +; nameserver test file +$ORIGIN example.com. +$TTL 3600 + +ENTRY_BEGIN +MATCH opcode qtype qname +REPLY QR AA NOERROR +ADJUST copy_id +SECTION QUESTION +www IN A +SECTION ANSWER +www IN A 10.20.30.40 +ENTRY_END diff --git a/testdata/doq_downstream.tdir/unbound_server.key b/testdata/doq_downstream.tdir/unbound_server.key new file mode 100644 index 000000000..4256c421d --- /dev/null +++ b/testdata/doq_downstream.tdir/unbound_server.key @@ -0,0 +1,15 @@ +-----BEGIN RSA PRIVATE KEY----- +MIICWwIBAAKBgQC3F7Jsv2u01pLL9rFnjsMU/IaCFUIz/624DcaE84Z4gjMl5kWA +3axQcqul1wlwSrbKwrony+d9hH/+MX0tZwvl8w3OmhmOAiaQ+SHCsIuOjVwQjX0s +RLB61Pz5+PAiVvnPa9JIYB5QrK6DVEsxIHj8MOc5JKORrnESsFDh6yeMeQIDAQAB +AoGAAuWoGBprTOA8UGfl5LqYkaNxSWumsYXxLMFjC8WCsjN1NbtQDDr1uAwodSZS +6ujzvX+ZTHnofs7y64XC8k34HTOCD2zlW7kijWbT8YjRYFU6o9F5zUGD9RCan0ds +sVscT2psLSzfdsmFAcbmnGdxYkXk2PC1FHtaqExxehralGUCQQDcqrg9uQKXlhQi +XAaPr8SiWvtRm2a9IMMZkRfUWZclPHq6fCWNuUaCD+cTat4wAuqeknAz33VEosw3 +fXGsok//AkEA1GjIHXrOcSlpfVJb6NeOBugjRtZ7ZDT5gbtnMS9ob0qntKV6saaL +CNmJwuD9Q3XkU5j1+uHvYGP2NzcJd2CjhwJACV0hNlVMe9w9fHvFN4Gw6WbM9ViP +0oS6YrJafYNTu5vGZXVxLoNnL4u3NYa6aPUmuZXjNwBLfJ8f5VboZPf6RwJAINd2 +oYA8bSi/A755MX4qmozH74r4Fx1Nuq5UHTm8RwDe/0Javx8F/j9MWpJY9lZDEF3l +In5OebPa/NyInSmW/wJAZuP9aRn0nDBkHYri++1A7NykMiJ/nH0mDECbnk+wxx0S +LwqIetBhxb8eQwMg45+iAH7CHAMQ8BQuF/nFE6eotg== +-----END RSA PRIVATE KEY----- diff --git a/testdata/doq_downstream.tdir/unbound_server.pem b/testdata/doq_downstream.tdir/unbound_server.pem new file mode 100644 index 000000000..aeda3ff11 --- /dev/null +++ b/testdata/doq_downstream.tdir/unbound_server.pem @@ -0,0 +1,11 @@ +-----BEGIN CERTIFICATE----- +MIIBmzCCAQQCCQDsNJ1UmphEFzANBgkqhkiG9w0BAQUFADASMRAwDgYDVQQDEwd1 +bmJvdW5kMB4XDTA4MDkxMTA5MDk0MFoXDTI4MDUyOTA5MDk0MFowEjEQMA4GA1UE +AxMHdW5ib3VuZDCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAtxeybL9rtNaS +y/axZ47DFPyGghVCM/+tuA3GhPOGeIIzJeZFgN2sUHKrpdcJcEq2ysK6J8vnfYR/ +/jF9LWcL5fMNzpoZjgImkPkhwrCLjo1cEI19LESwetT8+fjwIlb5z2vSSGAeUKyu +g1RLMSB4/DDnOSSjka5xErBQ4esnjHkCAwEAATANBgkqhkiG9w0BAQUFAAOBgQAZ +9N0lnLENs4JMvPS+mn8C5m9bkkFITd32IiLjf0zgYpIUbFXH6XaEr9GNZBUG8feG +l/6WRXnbnVSblI5odQ4XxGZ9inYY6qtW30uv76HvoKp+QZ1c3460ddR8NauhcCHH +Z7S+QbLXi+r2JAhpPozZCjBHlRD0ixzA1mKQTJhJZg== +-----END CERTIFICATE----- diff --git a/testdata/val_negcache_ttl_prefetch.rpl b/testdata/val_negcache_ttl_prefetch.rpl new file mode 100644 index 000000000..103c81fba --- /dev/null +++ b/testdata/val_negcache_ttl_prefetch.rpl @@ -0,0 +1,316 @@ +; config options +; The island of trust is at testzone.nlnetlabs.nl +server: + trust-anchor: "testzone.nlnetlabs.nl. 3600 IN DS 1444 8 2 07633464c1c7b93abd6fc24c73f904a40f0f304b279a80667d7e33908eed43be" + val-override-date: "20180213111425" + target-fetch-policy: "0 0 0 0 0" + qname-minimisation: "no" + trust-anchor-signaling: no + aggressive-nsec: yes + prefetch: yes + +stub-zone: + name: "testzone.nlnetlabs.nl" + stub-addr: 185.49.140.60 +stub-zone: + name: "zone2.nlnetlabs.nl" + stub-addr: 185.49.140.61 +CONFIG_END + +SCENARIO_BEGIN Test validator with negative cache TTL (aggressive NSEC) and some prefetch. +; The NSEC records are restricted by the TTL of 900 of the SOA record. +; There are prefetch actions, both with and without a CNAME preceding. + +; testzone.nlnetlabs.nl nameserver +RANGE_BEGIN 0 100 + ADDRESS 185.49.140.60 + +; response to DNSKEY priming query +ENTRY_BEGIN +MATCH opcode qtype qname +ADJUST copy_id +REPLY QR AA NOERROR +SECTION QUESTION +testzone.nlnetlabs.nl. IN DNSKEY +SECTION ANSWER +testzone.nlnetlabs.nl. 3600 IN DNSKEY 257 3 8 AwEAAbd9WqjzE2Pynz21OG5doSf9hFzMr5dhzz2waZ3vTa+0o5r7AjTAqmA1yH/B3+aAMihUm5ucZSfVqo7+kOaRE8yFj9aivOmA1n1+JLevJq/oyvQyjxQN2Qb89LyaNUT5oKZIiL+uyyhNW3KDR3SSbQ/GBwQNDHVcZi+JDR3RC0r7 ;{id = 1444 (ksk), size = 1024b} +testzone.nlnetlabs.nl. 3600 IN RRSIG DNSKEY 8 3 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. kQ2sc41aQeMxQ7KInz2HrHi4nQcUGdv1olro0GmVYgPvIJh7SqBKW3yZWYeQrbWWwdc3klBERBbBI8gnkNYbl5kX3BBa5su8w71mpTQPRGtMxDTB17daxc0SxpPUxM35CpWU9QlBuDXcu+VNyVUuLvZGGLznlqr6ku888U2Rz+c= +ENTRY_END + +; response for antelope.testzone.nlnetlabs.nl. +; NSECs cover ant.testzone.nlnetlabs.nl as non-existent. +ENTRY_BEGIN +MATCH opcode qtype qname +ADJUST copy_id +REPLY QR AA NXDOMAIN +SECTION QUESTION +antelope.testzone.nlnetlabs.nl. IN TXT +SECTION ANSWER +SECTION AUTHORITY +testzone.nlnetlabs.nl. 3600 IN NSEC alligator.testzone.nlnetlabs.nl. NS SOA RRSIG NSEC DNSKEY +testzone.nlnetlabs.nl. 3600 IN RRSIG NSEC 8 3 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. tcW20hZu5Ao+ikM+qjqAlRt3ujNxTKi6kZF3waWJGY7Ldyp9XyWzB1DeoQzaNJ6zflPYFO32RUhj7jWhEIUphG4+lEvm7VGJAdSteUZ2yOppN6eZvOk0Nc0nAGPFGBjLO6ul1Wh1X+jL61q7mWt3nY+IFBZHWmhsi2Qi7vM/W4E= +alligator.testzone.nlnetlabs.nl. 3600 IN NSEC cheetah.testzone.nlnetlabs.nl. TXT RRSIG NSEC +alligator.testzone.nlnetlabs.nl. 3600 IN RRSIG NSEC 8 4 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. Zfkp3kmN8heAuIF/apf6RHhZAoGyXnvZLALRYTKIH7E9XC2wtvG9dZla4WLSr3ndA4d0CFgnKOt8mSVSLyNn232D0ahx4DFAnOJitnt9odT2+2sYhJbwCx38tPKhAUWmIn2jGZGMVjbVbEVi7WyQBrJYQqyhE/lADEDSdQZBNyA= +testzone.nlnetlabs.nl. 900 IN SOA ns.nlnetlabs.nl. ralph.nlnetlabs.nl. 1 14400 3600 604800 3600 +testzone.nlnetlabs.nl. 900 IN RRSIG SOA 8 3 900 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. abG0cByo/q5NaDNMz6FPvNvehHqUDhQRwLdvG72315hMGzCavLRWuAB5gieibMCrICH2WVHVj7fisjSuY0iPwf9xZlCGts3Z+xD9D72VRiTz7QXF+JjRWKl+3Uk6c29+pvIRKXC1Ht0r9uBXGmDTaHdV7cZCveoDwIVSngY+mQ0= +SECTION ADDITIONAL +ENTRY_END + +; No answer for ant.testzone.nlnetlabs.nl in this range + +; response for peanut.testzone.nlnetlabs.nl. AAAA +ENTRY_BEGIN +MATCH opcode qtype qname +ADJUST copy_id +REPLY QR AA NOERROR +SECTION QUESTION +peanut.testzone.nlnetlabs.nl. IN AAAA +SECTION AUTHORITY +peanut.testzone.nlnetlabs.nl. IN NSEC rust.testzone.nlnetlabs.nl. A RRSIG NSEC +peanut.testzone.nlnetlabs.nl. 3600 IN RRSIG NSEC 8 4 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. GhUUt3n1oVZCbU5l7XhbtE1kAhFXBRvQRvp/s3INitoHm1D54VERXWR33g+aQMcLAyCOe2TmpJMH1zDSbccf0zabvwEzqDzPmgcPt0KjXUdrN84/2XN+C4U84golbUui61lhhU+6bL8rylPuv3XtqQ4ppXy8sSe+gfsskauhMpg= +testzone.nlnetlabs.nl. 900 IN SOA ns.nlnetlabs.nl. ralph.nlnetlabs.nl. 1 14400 3600 604800 3600 +testzone.nlnetlabs.nl. 900 IN RRSIG SOA 8 3 900 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. abG0cByo/q5NaDNMz6FPvNvehHqUDhQRwLdvG72315hMGzCavLRWuAB5gieibMCrICH2WVHVj7fisjSuY0iPwf9xZlCGts3Z+xD9D72VRiTz7QXF+JjRWKl+3Uk6c29+pvIRKXC1Ht0r9uBXGmDTaHdV7cZCveoDwIVSngY+mQ0= +SECTION ADDITIONAL +ENTRY_END +RANGE_END + +; testzone.nlnetlabs.nl nameserver +RANGE_BEGIN 100 200 + ADDRESS 185.49.140.60 +; response for ant.testzone.nlnetlabs.nl +ENTRY_BEGIN +MATCH opcode qtype qname +REPLY QR AA NOERROR +SECTION QUESTION +ant.testzone.nlnetlabs.nl. IN TXT +SECTION ANSWER +ant.testzone.nlnetlabs.nl. TXT "heap" +ant.testzone.nlnetlabs.nl. 3600 IN RRSIG TXT 8 4 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. Sn8dBGMSYGGKs7yGWO0CShxbm3ba5Y6ysHyE/HJyFnS8NmsKIx/KVdFPRQx/Jm7a3hektRXrjxetfhfJm0SzJ2UFeKlkE+VJ/Lj2oAETqN1oqqkNr+RDdbKLMzLApMRgrhStSAO1Yb8/8oUIflyrjNbuDbAHSMbkOE+Z49LIais= +ENTRY_END +RANGE_END + +; zone2.nlnetlabs.nl nameserver +RANGE_BEGIN 0 100 + ADDRESS 185.49.140.61 +ENTRY_BEGIN +MATCH opcode qtype qname +REPLY QR AA NOERROR +SECTION QUESTION +redir.zone2.nlnetlabs.nl. IN TXT +SECTION ANSWER +redir.zone2.nlnetlabs.nl. 15 IN CNAME antelope.testzone.nlnetlabs.nl. +ENTRY_END +RANGE_END + +STEP 1 QUERY +ENTRY_BEGIN +REPLY RD DO +SECTION QUESTION +antelope.testzone.nlnetlabs.nl. IN TXT +ENTRY_END + +; recursion happens here. Expect NXDOMAIN. +STEP 2 CHECK_ANSWER +ENTRY_BEGIN +MATCH all ttl +REPLY QR RD RA DO AD NXDOMAIN +SECTION QUESTION +antelope.testzone.nlnetlabs.nl. IN TXT +SECTION ANSWER +SECTION AUTHORITY +testzone.nlnetlabs.nl. 3600 IN NSEC alligator.testzone.nlnetlabs.nl. NS SOA RRSIG NSEC DNSKEY +testzone.nlnetlabs.nl. 3600 IN RRSIG NSEC 8 3 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. tcW20hZu5Ao+ikM+qjqAlRt3ujNxTKi6kZF3waWJGY7Ldyp9XyWzB1DeoQzaNJ6zflPYFO32RUhj7jWhEIUphG4+lEvm7VGJAdSteUZ2yOppN6eZvOk0Nc0nAGPFGBjLO6ul1Wh1X+jL61q7mWt3nY+IFBZHWmhsi2Qi7vM/W4E= +alligator.testzone.nlnetlabs.nl. 3600 IN NSEC cheetah.testzone.nlnetlabs.nl. TXT RRSIG NSEC +alligator.testzone.nlnetlabs.nl. 3600 IN RRSIG NSEC 8 4 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. Zfkp3kmN8heAuIF/apf6RHhZAoGyXnvZLALRYTKIH7E9XC2wtvG9dZla4WLSr3ndA4d0CFgnKOt8mSVSLyNn232D0ahx4DFAnOJitnt9odT2+2sYhJbwCx38tPKhAUWmIn2jGZGMVjbVbEVi7WyQBrJYQqyhE/lADEDSdQZBNyA= +testzone.nlnetlabs.nl. 900 IN SOA ns.nlnetlabs.nl. ralph.nlnetlabs.nl. 1 14400 3600 604800 3600 +testzone.nlnetlabs.nl. 900 IN RRSIG SOA 8 3 900 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. abG0cByo/q5NaDNMz6FPvNvehHqUDhQRwLdvG72315hMGzCavLRWuAB5gieibMCrICH2WVHVj7fisjSuY0iPwf9xZlCGts3Z+xD9D72VRiTz7QXF+JjRWKl+3Uk6c29+pvIRKXC1Ht0r9uBXGmDTaHdV7cZCveoDwIVSngY+mQ0= +SECTION ADDITIONAL +ENTRY_END + +STEP 4 QUERY +ENTRY_BEGIN +REPLY RD DO +SECTION QUESTION +peanut.testzone.nlnetlabs.nl. IN AAAA +ENTRY_END + +STEP 5 CHECK_ANSWER +ENTRY_BEGIN +MATCH all ttl +REPLY QR RD RA AD DO NOERROR +SECTION QUESTION +peanut.testzone.nlnetlabs.nl. IN AAAA +SECTION AUTHORITY +peanut.testzone.nlnetlabs.nl. IN NSEC rust.testzone.nlnetlabs.nl. A RRSIG NSEC +peanut.testzone.nlnetlabs.nl. 3600 IN RRSIG NSEC 8 4 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. GhUUt3n1oVZCbU5l7XhbtE1kAhFXBRvQRvp/s3INitoHm1D54VERXWR33g+aQMcLAyCOe2TmpJMH1zDSbccf0zabvwEzqDzPmgcPt0KjXUdrN84/2XN+C4U84golbUui61lhhU+6bL8rylPuv3XtqQ4ppXy8sSe+gfsskauhMpg= +testzone.nlnetlabs.nl. 900 IN SOA ns.nlnetlabs.nl. ralph.nlnetlabs.nl. 1 14400 3600 604800 3600 +testzone.nlnetlabs.nl. 900 IN RRSIG SOA 8 3 900 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. abG0cByo/q5NaDNMz6FPvNvehHqUDhQRwLdvG72315hMGzCavLRWuAB5gieibMCrICH2WVHVj7fisjSuY0iPwf9xZlCGts3Z+xD9D72VRiTz7QXF+JjRWKl+3Uk6c29+pvIRKXC1Ht0r9uBXGmDTaHdV7cZCveoDwIVSngY+mQ0= +ENTRY_END + +; query for ant.testzone.nlnetlabs.nl (non-existent) +STEP 11 QUERY +ENTRY_BEGIN +REPLY RD DO +SECTION QUESTION +ant.testzone.nlnetlabs.nl. IN TXT +ENTRY_END + +; this is the synthesized NXDOMAIN from aggressive-nsec +STEP 12 CHECK_ANSWER +ENTRY_BEGIN +MATCH all ttl +REPLY QR RD RA AD DO NXDOMAIN +SECTION QUESTION +ant.testzone.nlnetlabs.nl. IN TXT +SECTION ANSWER +SECTION AUTHORITY +testzone.nlnetlabs.nl. 3600 IN NSEC alligator.testzone.nlnetlabs.nl. NS SOA RRSIG NSEC DNSKEY +testzone.nlnetlabs.nl. 3600 IN RRSIG NSEC 8 3 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. tcW20hZu5Ao+ikM+qjqAlRt3ujNxTKi6kZF3waWJGY7Ldyp9XyWzB1DeoQzaNJ6zflPYFO32RUhj7jWhEIUphG4+lEvm7VGJAdSteUZ2yOppN6eZvOk0Nc0nAGPFGBjLO6ul1Wh1X+jL61q7mWt3nY+IFBZHWmhsi2Qi7vM/W4E= +alligator.testzone.nlnetlabs.nl. 3600 IN NSEC cheetah.testzone.nlnetlabs.nl. TXT RRSIG NSEC +alligator.testzone.nlnetlabs.nl. 3600 IN RRSIG NSEC 8 4 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. Zfkp3kmN8heAuIF/apf6RHhZAoGyXnvZLALRYTKIH7E9XC2wtvG9dZla4WLSr3ndA4d0CFgnKOt8mSVSLyNn232D0ahx4DFAnOJitnt9odT2+2sYhJbwCx38tPKhAUWmIn2jGZGMVjbVbEVi7WyQBrJYQqyhE/lADEDSdQZBNyA= +testzone.nlnetlabs.nl. 900 IN SOA ns.nlnetlabs.nl. ralph.nlnetlabs.nl. 1 14400 3600 604800 3600 +testzone.nlnetlabs.nl. 900 IN RRSIG SOA 8 3 900 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. abG0cByo/q5NaDNMz6FPvNvehHqUDhQRwLdvG72315hMGzCavLRWuAB5gieibMCrICH2WVHVj7fisjSuY0iPwf9xZlCGts3Z+xD9D72VRiTz7QXF+JjRWKl+3Uk6c29+pvIRKXC1Ht0r9uBXGmDTaHdV7cZCveoDwIVSngY+mQ0= +ENTRY_END + +STEP 13 TIME_PASSES ELAPSE 860 +STEP 14 QUERY +ENTRY_BEGIN +REPLY RD DO +SECTION QUESTION +redir.zone2.nlnetlabs.nl. IN TXT +ENTRY_END + +STEP 15 CHECK_ANSWER +ENTRY_BEGIN +MATCH all ttl +REPLY QR RD RA DO NXDOMAIN +SECTION QUESTION +redir.zone2.nlnetlabs.nl. IN TXT +SECTION ANSWER +redir.zone2.nlnetlabs.nl. 15 IN CNAME antelope.testzone.nlnetlabs.nl. +SECTION AUTHORITY +testzone.nlnetlabs.nl. 2740 IN NSEC alligator.testzone.nlnetlabs.nl. NS SOA RRSIG NSEC DNSKEY +testzone.nlnetlabs.nl. 2740 IN RRSIG NSEC 8 3 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. tcW20hZu5Ao+ikM+qjqAlRt3ujNxTKi6kZF3waWJGY7Ldyp9XyWzB1DeoQzaNJ6zflPYFO32RUhj7jWhEIUphG4+lEvm7VGJAdSteUZ2yOppN6eZvOk0Nc0nAGPFGBjLO6ul1Wh1X+jL61q7mWt3nY+IFBZHWmhsi2Qi7vM/W4E= +alligator.testzone.nlnetlabs.nl. 2740 IN NSEC cheetah.testzone.nlnetlabs.nl. TXT RRSIG NSEC +alligator.testzone.nlnetlabs.nl. 2740 IN RRSIG NSEC 8 4 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. Zfkp3kmN8heAuIF/apf6RHhZAoGyXnvZLALRYTKIH7E9XC2wtvG9dZla4WLSr3ndA4d0CFgnKOt8mSVSLyNn232D0ahx4DFAnOJitnt9odT2+2sYhJbwCx38tPKhAUWmIn2jGZGMVjbVbEVi7WyQBrJYQqyhE/lADEDSdQZBNyA= +testzone.nlnetlabs.nl. 40 IN SOA ns.nlnetlabs.nl. ralph.nlnetlabs.nl. 1 14400 3600 604800 3600 +testzone.nlnetlabs.nl. 40 IN RRSIG SOA 8 3 900 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. abG0cByo/q5NaDNMz6FPvNvehHqUDhQRwLdvG72315hMGzCavLRWuAB5gieibMCrICH2WVHVj7fisjSuY0iPwf9xZlCGts3Z+xD9D72VRiTz7QXF+JjRWKl+3Uk6c29+pvIRKXC1Ht0r9uBXGmDTaHdV7cZCveoDwIVSngY+mQ0= +ENTRY_END + +STEP 16 TIME_PASSES ELAPSE 14 +STEP 17 QUERY +ENTRY_BEGIN +REPLY RD DO +SECTION QUESTION +redir.zone2.nlnetlabs.nl. IN TXT +ENTRY_END + +STEP 18 CHECK_ANSWER +ENTRY_BEGIN +MATCH all ttl +REPLY QR RD RA DO NXDOMAIN +SECTION QUESTION +redir.zone2.nlnetlabs.nl. IN TXT +SECTION ANSWER +redir.zone2.nlnetlabs.nl. 1 IN CNAME antelope.testzone.nlnetlabs.nl. +SECTION AUTHORITY +testzone.nlnetlabs.nl. 2726 IN NSEC alligator.testzone.nlnetlabs.nl. NS SOA RRSIG NSEC DNSKEY +testzone.nlnetlabs.nl. 2726 IN RRSIG NSEC 8 3 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. tcW20hZu5Ao+ikM+qjqAlRt3ujNxTKi6kZF3waWJGY7Ldyp9XyWzB1DeoQzaNJ6zflPYFO32RUhj7jWhEIUphG4+lEvm7VGJAdSteUZ2yOppN6eZvOk0Nc0nAGPFGBjLO6ul1Wh1X+jL61q7mWt3nY+IFBZHWmhsi2Qi7vM/W4E= +alligator.testzone.nlnetlabs.nl. 2726 IN NSEC cheetah.testzone.nlnetlabs.nl. TXT RRSIG NSEC +alligator.testzone.nlnetlabs.nl. 2726 IN RRSIG NSEC 8 4 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. Zfkp3kmN8heAuIF/apf6RHhZAoGyXnvZLALRYTKIH7E9XC2wtvG9dZla4WLSr3ndA4d0CFgnKOt8mSVSLyNn232D0ahx4DFAnOJitnt9odT2+2sYhJbwCx38tPKhAUWmIn2jGZGMVjbVbEVi7WyQBrJYQqyhE/lADEDSdQZBNyA= +testzone.nlnetlabs.nl. 26 IN SOA ns.nlnetlabs.nl. ralph.nlnetlabs.nl. 1 14400 3600 604800 3600 +testzone.nlnetlabs.nl. 26 IN RRSIG SOA 8 3 900 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. abG0cByo/q5NaDNMz6FPvNvehHqUDhQRwLdvG72315hMGzCavLRWuAB5gieibMCrICH2WVHVj7fisjSuY0iPwf9xZlCGts3Z+xD9D72VRiTz7QXF+JjRWKl+3Uk6c29+pvIRKXC1Ht0r9uBXGmDTaHdV7cZCveoDwIVSngY+mQ0= +ENTRY_END + +STEP 19 TRAFFIC + +STEP 20 QUERY +ENTRY_BEGIN +REPLY RD DO +SECTION QUESTION +peanut.testzone.nlnetlabs.nl. IN AAAA +ENTRY_END + +STEP 21 CHECK_ANSWER +ENTRY_BEGIN +MATCH all ttl +REPLY QR RD RA AD DO NOERROR +SECTION QUESTION +peanut.testzone.nlnetlabs.nl. IN AAAA +SECTION AUTHORITY +peanut.testzone.nlnetlabs.nl. 2726 IN NSEC rust.testzone.nlnetlabs.nl. A RRSIG NSEC +peanut.testzone.nlnetlabs.nl. 2726 IN RRSIG NSEC 8 4 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. GhUUt3n1oVZCbU5l7XhbtE1kAhFXBRvQRvp/s3INitoHm1D54VERXWR33g+aQMcLAyCOe2TmpJMH1zDSbccf0zabvwEzqDzPmgcPt0KjXUdrN84/2XN+C4U84golbUui61lhhU+6bL8rylPuv3XtqQ4ppXy8sSe+gfsskauhMpg= +testzone.nlnetlabs.nl. 900 IN SOA ns.nlnetlabs.nl. ralph.nlnetlabs.nl. 1 14400 3600 604800 3600 +testzone.nlnetlabs.nl. 900 IN RRSIG SOA 8 3 900 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. abG0cByo/q5NaDNMz6FPvNvehHqUDhQRwLdvG72315hMGzCavLRWuAB5gieibMCrICH2WVHVj7fisjSuY0iPwf9xZlCGts3Z+xD9D72VRiTz7QXF+JjRWKl+3Uk6c29+pvIRKXC1Ht0r9uBXGmDTaHdV7cZCveoDwIVSngY+mQ0= +ENTRY_END + +STEP 22 TRAFFIC +STEP 23 TIME_PASSES ELAPSE 901 + +STEP 24 QUERY +ENTRY_BEGIN +REPLY RD DO +SECTION QUESTION +peanut.testzone.nlnetlabs.nl. IN AAAA +ENTRY_END + +STEP 25 CHECK_ANSWER +ENTRY_BEGIN +MATCH all ttl +REPLY QR RD RA AD DO NOERROR +SECTION QUESTION +peanut.testzone.nlnetlabs.nl. IN AAAA +SECTION AUTHORITY +peanut.testzone.nlnetlabs.nl. IN NSEC rust.testzone.nlnetlabs.nl. A RRSIG NSEC +peanut.testzone.nlnetlabs.nl. 3600 IN RRSIG NSEC 8 4 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. GhUUt3n1oVZCbU5l7XhbtE1kAhFXBRvQRvp/s3INitoHm1D54VERXWR33g+aQMcLAyCOe2TmpJMH1zDSbccf0zabvwEzqDzPmgcPt0KjXUdrN84/2XN+C4U84golbUui61lhhU+6bL8rylPuv3XtqQ4ppXy8sSe+gfsskauhMpg= +testzone.nlnetlabs.nl. 900 IN SOA ns.nlnetlabs.nl. ralph.nlnetlabs.nl. 1 14400 3600 604800 3600 +testzone.nlnetlabs.nl. 900 IN RRSIG SOA 8 3 900 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. abG0cByo/q5NaDNMz6FPvNvehHqUDhQRwLdvG72315hMGzCavLRWuAB5gieibMCrICH2WVHVj7fisjSuY0iPwf9xZlCGts3Z+xD9D72VRiTz7QXF+JjRWKl+3Uk6c29+pvIRKXC1Ht0r9uBXGmDTaHdV7cZCveoDwIVSngY+mQ0= +ENTRY_END +STEP 26 TRAFFIC + +; Time passes and NSECs should be expired. +STEP 60 TIME_PASSES ELAPSE 60 + +; query something that gets the SOA record for the testzone in cache. +STEP 70 QUERY +ENTRY_BEGIN +REPLY RD DO +SECTION QUESTION +peanut.testzone.nlnetlabs.nl. IN AAAA +ENTRY_END + +STEP 80 CHECK_ANSWER +ENTRY_BEGIN +MATCH all ttl +REPLY QR RD RA AD DO NOERROR +SECTION QUESTION +peanut.testzone.nlnetlabs.nl. IN AAAA +SECTION AUTHORITY +peanut.testzone.nlnetlabs.nl. 3540 IN NSEC rust.testzone.nlnetlabs.nl. A RRSIG NSEC +peanut.testzone.nlnetlabs.nl. 3540 IN RRSIG NSEC 8 4 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. GhUUt3n1oVZCbU5l7XhbtE1kAhFXBRvQRvp/s3INitoHm1D54VERXWR33g+aQMcLAyCOe2TmpJMH1zDSbccf0zabvwEzqDzPmgcPt0KjXUdrN84/2XN+C4U84golbUui61lhhU+6bL8rylPuv3XtqQ4ppXy8sSe+gfsskauhMpg= +testzone.nlnetlabs.nl. 840 IN SOA ns.nlnetlabs.nl. ralph.nlnetlabs.nl. 1 14400 3600 604800 3600 +testzone.nlnetlabs.nl. 840 IN RRSIG SOA 8 3 900 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. abG0cByo/q5NaDNMz6FPvNvehHqUDhQRwLdvG72315hMGzCavLRWuAB5gieibMCrICH2WVHVj7fisjSuY0iPwf9xZlCGts3Z+xD9D72VRiTz7QXF+JjRWKl+3Uk6c29+pvIRKXC1Ht0r9uBXGmDTaHdV7cZCveoDwIVSngY+mQ0= +ENTRY_END + +; query for ant.testzone.nlnetlabs.nl. In this range it is on the nameserver. +STEP 110 QUERY +ENTRY_BEGIN +REPLY RD DO +SECTION QUESTION +ant.testzone.nlnetlabs.nl. IN TXT +ENTRY_END + +; Expect an answer since the 3600 TTL NSECs from STEP 10 should have been +; limited to 900 and be expired by now. +STEP 120 CHECK_ANSWER +ENTRY_BEGIN +MATCH all ttl +REPLY QR RD RA AD DO NOERROR +SECTION QUESTION +ant.testzone.nlnetlabs.nl. IN TXT +SECTION ANSWER +ant.testzone.nlnetlabs.nl. TXT "heap" +ant.testzone.nlnetlabs.nl. 3600 IN RRSIG TXT 8 4 3600 20180313101254 20180213101254 1444 testzone.nlnetlabs.nl. Sn8dBGMSYGGKs7yGWO0CShxbm3ba5Y6ysHyE/HJyFnS8NmsKIx/KVdFPRQx/Jm7a3hektRXrjxetfhfJm0SzJ2UFeKlkE+VJ/Lj2oAETqN1oqqkNr+RDdbKLMzLApMRgrhStSAO1Yb8/8oUIflyrjNbuDbAHSMbkOE+Z49LIais= +ENTRY_END + +SCENARIO_END diff --git a/util/alloc.c b/util/alloc.c index a6c911803..9a99246f5 100644 --- a/util/alloc.c +++ b/util/alloc.c @@ -519,6 +519,15 @@ void *unbound_stat_realloc_log(void *ptr, size_t size, const char* file, return unbound_stat_realloc(ptr, size); } +/** log to file where alloc was done */ +void *unbound_stat_reallocarray_log(void *ptr, size_t nmemb, size_t size, + const char* file, int line, const char* func) +{ + log_info("%s:%d %s reallocarray(%p, %u, %u)", file, line, func, + ptr, (unsigned)nmemb, (unsigned)size); + return unbound_stat_realloc(ptr, nmemb*size); +} + /** log to file where strdup was done */ char *unbound_stat_strdup_log(const char *s, const char* file, int line, const char* func) diff --git a/util/config_file.c b/util/config_file.c index 2eb81fcee..aca0039d4 100644 --- a/util/config_file.c +++ b/util/config_file.c @@ -135,6 +135,8 @@ config_create(void) cfg->http_query_buffer_size = 4*1024*1024; cfg->http_response_buffer_size = 4*1024*1024; cfg->http_nodelay = 1; + cfg->quic_port = UNBOUND_DNS_OVER_QUIC_PORT; + cfg->quic_size = 8*1024*1024; cfg->use_syslog = 1; cfg->log_identity = NULL; /* changed later with argv[0] */ cfg->log_time_ascii = 0; @@ -604,6 +606,8 @@ int config_set_option(struct config_file* cfg, const char* opt, else S_MEMSIZE("http-response-buffer-size:", http_response_buffer_size) else S_YNO("http-nodelay:", http_nodelay) else S_YNO("http-notls-downstream:", http_notls_downstream) + else S_NUMBER_NONZERO("quic-port:", quic_port) + else S_MEMSIZE("quic-size:", quic_size) else S_YNO("interface-automatic:", if_automatic) else S_STR("interface-automatic-ports:", if_automatic_ports) else S_YNO("use-systemd:", use_systemd) @@ -1154,6 +1158,8 @@ config_get_option(struct config_file* cfg, const char* opt, else O_MEM(opt, "http-response-buffer-size", http_response_buffer_size) else O_YNO(opt, "http-nodelay", http_nodelay) else O_YNO(opt, "http-notls-downstream", http_notls_downstream) + else O_DEC(opt, "quic-port", quic_port) + else O_MEM(opt, "quic-size", quic_size) else O_YNO(opt, "use-systemd", use_systemd) else O_YNO(opt, "do-daemonize", do_daemonize) else O_STR(opt, "chroot", chrootdir) @@ -2821,3 +2827,22 @@ if_is_dnscrypt(const char* ifname, const char* port, int dnscrypt_port) return 0; #endif } + +/** see if interface is quic, its port number == the quic port number */ +int +if_is_quic(const char* ifname, const char* port, int quic_port) +{ +#ifndef HAVE_NGTCP2 + (void)ifname; + (void)port; + (void)quic_port; + return 0; +#else + char* p = strchr(ifname, '@'); + if(!p && atoi(port) == quic_port) + return 1; + if(p && atoi(p+1) == quic_port) + return 1; + return 0; +#endif +} diff --git a/util/config_file.h b/util/config_file.h index fbb09aa62..2969f8433 100644 --- a/util/config_file.h +++ b/util/config_file.h @@ -161,6 +161,11 @@ struct config_file { /** Disable TLS for http sockets downstream */ int http_notls_downstream; + /** port on which to provide DNS over QUIC service */ + int quic_port; + /** size of the quic data, max bytes */ + size_t quic_size; + /** outgoing port range number of ports (per thread) */ int outgoing_num_ports; /** number of outgoing tcp buffers per (per thread) */ @@ -1406,6 +1411,10 @@ int if_is_pp2(const char* ifname, const char* port, /** see if interface is DNSCRYPT, its port number == the dnscrypt port number */ int if_is_dnscrypt(const char* ifname, const char* port, int dnscrypt_port); + +/** see if interface is quic, its port number == the quic port number */ +int if_is_quic(const char* ifname, const char* port, int quic_port); + #ifdef USE_LINUX_IP_LOCAL_PORT_RANGE #define LINUX_IP_LOCAL_PORT_RANGE_PATH "/proc/sys/net/ipv4/ip_local_port_range" #endif diff --git a/util/configlexer.lex b/util/configlexer.lex index dbf2910de..4c0416f73 100644 --- a/util/configlexer.lex +++ b/util/configlexer.lex @@ -269,6 +269,8 @@ http-query-buffer-size{COLON} { YDVAR(1, VAR_HTTP_QUERY_BUFFER_SIZE) } http-response-buffer-size{COLON} { YDVAR(1, VAR_HTTP_RESPONSE_BUFFER_SIZE) } http-nodelay{COLON} { YDVAR(1, VAR_HTTP_NODELAY) } http-notls-downstream{COLON} { YDVAR(1, VAR_HTTP_NOTLS_DOWNSTREAM) } +quic-port{COLON} { YDVAR(1, VAR_QUIC_PORT) } +quic-size{COLON} { YDVAR(1, VAR_QUIC_SIZE) } use-systemd{COLON} { YDVAR(1, VAR_USE_SYSTEMD) } do-daemonize{COLON} { YDVAR(1, VAR_DO_DAEMONIZE) } interface{COLON} { YDVAR(1, VAR_INTERFACE) } diff --git a/util/configparser.y b/util/configparser.y index 2ca16f81c..c10a5f475 100644 --- a/util/configparser.y +++ b/util/configparser.y @@ -203,6 +203,7 @@ extern struct config_parser_state* cfg_parser; %token VAR_RPZ_SIGNAL_NXDOMAIN_RA VAR_INTERFACE_AUTOMATIC_PORTS VAR_EDE %token VAR_INTERFACE_ACTION VAR_INTERFACE_VIEW VAR_INTERFACE_TAG %token VAR_INTERFACE_TAG_ACTION VAR_INTERFACE_TAG_DATA +%token VAR_QUIC_PORT VAR_QUIC_SIZE %token VAR_PROXY_PROTOCOL_PORT VAR_STATISTICS_INHIBIT_ZERO %token VAR_HARDEN_UNKNOWN_ADDITIONAL VAR_DISABLE_EDNS_DO VAR_CACHEDB_NO_STORE %token VAR_LOG_DESTADDR VAR_CACHEDB_CHECK_WHEN_SERVE_EXPIRED @@ -342,6 +343,7 @@ content_server: server_num_threads | server_verbosity | server_port | server_edns_client_string_opcode | server_nsid | server_zonemd_permissive_mode | server_max_reuse_tcp_queries | server_tcp_reuse_timeout | server_tcp_auth_query_timeout | + server_quic_port | server_quic_size | server_interface_automatic_ports | server_ede | server_proxy_protocol_port | server_statistics_inhibit_zero | server_harden_unknown_additional | server_disable_edns_do | @@ -1209,6 +1211,26 @@ server_http_notls_downstream: VAR_HTTP_NOTLS_DOWNSTREAM STRING_ARG else cfg_parser->cfg->http_notls_downstream = (strcmp($2, "yes")==0); free($2); }; +server_quic_port: VAR_QUIC_PORT STRING_ARG + { + OUTYY(("P(server_quic_port:%s)\n", $2)); +#ifndef HAVE_NGTCP2 + log_warn("%s:%d: Unbound is not compiled with " + "ngtcp2. This is required to use DNS " + "over QUIC.", cfg_parser->filename, cfg_parser->line); +#endif + if(atoi($2) == 0) + yyerror("port number expected"); + else cfg_parser->cfg->quic_port = atoi($2); + free($2); + }; +server_quic_size: VAR_QUIC_SIZE STRING_ARG + { + OUTYY(("P(server_quic_size:%s)\n", $2)); + if(!cfg_parse_memsize($2, &cfg_parser->cfg->quic_size)) + yyerror("memory size expected"); + free($2); + }; server_use_systemd: VAR_USE_SYSTEMD STRING_ARG { OUTYY(("P(server_use_systemd:%s)\n", $2)); diff --git a/util/data/msgencode.c b/util/data/msgencode.c index 898ff8412..6d116fb52 100644 --- a/util/data/msgencode.c +++ b/util/data/msgencode.c @@ -62,6 +62,10 @@ #define RETVAL_TRUNC -4 /** return code that means all is peachy keen. Equal to DNS rcode NOERROR */ #define RETVAL_OK 0 +/** Max compressions we are willing to perform; more than that will result + * in semi-compressed messages, or truncated even on TCP for huge messages, to + * avoid locking the CPU for long */ +#define MAX_COMPRESSION_PER_MESSAGE 120 /** * Data structure to help domain name compression in outgoing messages. @@ -284,15 +288,17 @@ write_compressed_dname(sldns_buffer* pkt, uint8_t* dname, int labs, /** compress owner name of RR, return RETVAL_OUTMEM RETVAL_TRUNC */ static int -compress_owner(struct ub_packed_rrset_key* key, sldns_buffer* pkt, - struct regional* region, struct compress_tree_node** tree, - size_t owner_pos, uint16_t* owner_ptr, int owner_labs) +compress_owner(struct ub_packed_rrset_key* key, sldns_buffer* pkt, + struct regional* region, struct compress_tree_node** tree, + size_t owner_pos, uint16_t* owner_ptr, int owner_labs, + size_t* compress_count) { struct compress_tree_node* p; struct compress_tree_node** insertpt = NULL; if(!*owner_ptr) { /* compress first time dname */ - if((p = compress_tree_lookup(tree, key->rk.dname, + if(*compress_count < MAX_COMPRESSION_PER_MESSAGE && + (p = compress_tree_lookup(tree, key->rk.dname, owner_labs, &insertpt))) { if(p->labs == owner_labs) /* avoid ptr chains, since some software is @@ -301,6 +307,7 @@ compress_owner(struct ub_packed_rrset_key* key, sldns_buffer* pkt, if(!write_compressed_dname(pkt, key->rk.dname, owner_labs, p)) return RETVAL_TRUNC; + (*compress_count)++; /* check if typeclass+4 ttl + rdatalen is available */ if(sldns_buffer_remaining(pkt) < 4+4+2) return RETVAL_TRUNC; @@ -313,7 +320,8 @@ compress_owner(struct ub_packed_rrset_key* key, sldns_buffer* pkt, if(owner_pos <= PTR_MAX_OFFSET) *owner_ptr = htons(PTR_CREATE(owner_pos)); } - if(!compress_tree_store(key->rk.dname, owner_labs, + if(*compress_count < MAX_COMPRESSION_PER_MESSAGE && + !compress_tree_store(key->rk.dname, owner_labs, owner_pos, region, p, insertpt)) return RETVAL_OUTMEM; } else { @@ -333,20 +341,24 @@ compress_owner(struct ub_packed_rrset_key* key, sldns_buffer* pkt, /** compress any domain name to the packet, return RETVAL_* */ static int -compress_any_dname(uint8_t* dname, sldns_buffer* pkt, int labs, - struct regional* region, struct compress_tree_node** tree) +compress_any_dname(uint8_t* dname, sldns_buffer* pkt, int labs, + struct regional* region, struct compress_tree_node** tree, + size_t* compress_count) { struct compress_tree_node* p; struct compress_tree_node** insertpt = NULL; size_t pos = sldns_buffer_position(pkt); - if((p = compress_tree_lookup(tree, dname, labs, &insertpt))) { + if(*compress_count < MAX_COMPRESSION_PER_MESSAGE && + (p = compress_tree_lookup(tree, dname, labs, &insertpt))) { if(!write_compressed_dname(pkt, dname, labs, p)) return RETVAL_TRUNC; + (*compress_count)++; } else { if(!dname_buffer_write(pkt, dname)) return RETVAL_TRUNC; } - if(!compress_tree_store(dname, labs, pos, region, p, insertpt)) + if(*compress_count < MAX_COMPRESSION_PER_MESSAGE && + !compress_tree_store(dname, labs, pos, region, p, insertpt)) return RETVAL_OUTMEM; return RETVAL_OK; } @@ -364,9 +376,9 @@ type_rdata_compressable(struct ub_packed_rrset_key* key) /** compress domain names in rdata, return RETVAL_* */ static int -compress_rdata(sldns_buffer* pkt, uint8_t* rdata, size_t todolen, - struct regional* region, struct compress_tree_node** tree, - const sldns_rr_descriptor* desc) +compress_rdata(sldns_buffer* pkt, uint8_t* rdata, size_t todolen, + struct regional* region, struct compress_tree_node** tree, + const sldns_rr_descriptor* desc, size_t* compress_count) { int labs, r, rdf = 0; size_t dname_len, len, pos = sldns_buffer_position(pkt); @@ -380,8 +392,8 @@ compress_rdata(sldns_buffer* pkt, uint8_t* rdata, size_t todolen, switch(desc->_wireformat[rdf]) { case LDNS_RDF_TYPE_DNAME: labs = dname_count_size_labels(rdata, &dname_len); - if((r=compress_any_dname(rdata, pkt, labs, region, - tree)) != RETVAL_OK) + if((r=compress_any_dname(rdata, pkt, labs, region, + tree, compress_count)) != RETVAL_OK) return r; rdata += dname_len; todolen -= dname_len; @@ -449,7 +461,8 @@ static int packed_rrset_encode(struct ub_packed_rrset_key* key, sldns_buffer* pkt, uint16_t* num_rrs, time_t timenow, struct regional* region, int do_data, int do_sig, struct compress_tree_node** tree, - sldns_pkt_section s, uint16_t qtype, int dnssec, size_t rr_offset) + sldns_pkt_section s, uint16_t qtype, int dnssec, size_t rr_offset, + size_t* compress_count) { size_t i, j, owner_pos; int r, owner_labs; @@ -477,9 +490,9 @@ packed_rrset_encode(struct ub_packed_rrset_key* key, sldns_buffer* pkt, for(i=0; icount; i++) { /* rrset roundrobin */ j = (i + rr_offset) % data->count; - if((r=compress_owner(key, pkt, region, tree, - owner_pos, &owner_ptr, owner_labs)) - != RETVAL_OK) + if((r=compress_owner(key, pkt, region, tree, + owner_pos, &owner_ptr, owner_labs, + compress_count)) != RETVAL_OK) return r; sldns_buffer_write(pkt, &key->rk.type, 2); sldns_buffer_write(pkt, &key->rk.rrset_class, 2); @@ -489,8 +502,8 @@ packed_rrset_encode(struct ub_packed_rrset_key* key, sldns_buffer* pkt, else sldns_buffer_write_u32(pkt, data->rr_ttl[j]-adjust); if(c) { if((r=compress_rdata(pkt, data->rr_data[j], - data->rr_len[j], region, tree, c)) - != RETVAL_OK) + data->rr_len[j], region, tree, c, + compress_count)) != RETVAL_OK) return r; } else { if(sldns_buffer_remaining(pkt) < data->rr_len[j]) @@ -510,9 +523,9 @@ packed_rrset_encode(struct ub_packed_rrset_key* key, sldns_buffer* pkt, return RETVAL_TRUNC; sldns_buffer_write(pkt, &owner_ptr, 2); } else { - if((r=compress_any_dname(key->rk.dname, - pkt, owner_labs, region, tree)) - != RETVAL_OK) + if((r=compress_any_dname(key->rk.dname, + pkt, owner_labs, region, tree, + compress_count)) != RETVAL_OK) return r; if(sldns_buffer_remaining(pkt) < 4+4+data->rr_len[i]) @@ -544,7 +557,8 @@ static int insert_section(struct reply_info* rep, size_t num_rrsets, uint16_t* num_rrs, sldns_buffer* pkt, size_t rrsets_before, time_t timenow, struct regional* region, struct compress_tree_node** tree, - sldns_pkt_section s, uint16_t qtype, int dnssec, size_t rr_offset) + sldns_pkt_section s, uint16_t qtype, int dnssec, size_t rr_offset, + size_t* compress_count) { int r; size_t i, setstart; @@ -560,7 +574,7 @@ insert_section(struct reply_info* rep, size_t num_rrsets, uint16_t* num_rrs, setstart = sldns_buffer_position(pkt); if((r=packed_rrset_encode(rep->rrsets[rrsets_before+i], pkt, num_rrs, timenow, region, 1, 1, tree, - s, qtype, dnssec, rr_offset)) + s, qtype, dnssec, rr_offset, compress_count)) != RETVAL_OK) { /* Bad, but if due to size must set TC bit */ /* trim off the rrset neatly. */ @@ -573,7 +587,7 @@ insert_section(struct reply_info* rep, size_t num_rrsets, uint16_t* num_rrs, setstart = sldns_buffer_position(pkt); if((r=packed_rrset_encode(rep->rrsets[rrsets_before+i], pkt, num_rrs, timenow, region, 1, 0, tree, - s, qtype, dnssec, rr_offset)) + s, qtype, dnssec, rr_offset, compress_count)) != RETVAL_OK) { sldns_buffer_set_position(pkt, setstart); return r; @@ -584,7 +598,7 @@ insert_section(struct reply_info* rep, size_t num_rrsets, uint16_t* num_rrs, setstart = sldns_buffer_position(pkt); if((r=packed_rrset_encode(rep->rrsets[rrsets_before+i], pkt, num_rrs, timenow, region, 0, 1, tree, - s, qtype, dnssec, rr_offset)) + s, qtype, dnssec, rr_offset, compress_count)) != RETVAL_OK) { sldns_buffer_set_position(pkt, setstart); return r; @@ -677,6 +691,7 @@ reply_info_encode(struct query_info* qinfo, struct reply_info* rep, struct compress_tree_node* tree = 0; int r; size_t rr_offset; + size_t compress_count=0; sldns_buffer_clear(buffer); if(udpsize < sldns_buffer_limit(buffer)) @@ -723,7 +738,7 @@ reply_info_encode(struct query_info* qinfo, struct reply_info* rep, arep.rrsets = &qinfo->local_alias->rrset; if((r=insert_section(&arep, 1, &ancount, buffer, 0, timezero, region, &tree, LDNS_SECTION_ANSWER, - qinfo->qtype, dnssec, rr_offset)) != RETVAL_OK) { + qinfo->qtype, dnssec, rr_offset, &compress_count)) != RETVAL_OK) { if(r == RETVAL_TRUNC) { /* create truncated message */ sldns_buffer_write_u16_at(buffer, 6, ancount); @@ -738,7 +753,7 @@ reply_info_encode(struct query_info* qinfo, struct reply_info* rep, /* insert answer section */ if((r=insert_section(rep, rep->an_numrrsets, &ancount, buffer, 0, timenow, region, &tree, LDNS_SECTION_ANSWER, qinfo->qtype, - dnssec, rr_offset)) != RETVAL_OK) { + dnssec, rr_offset, &compress_count)) != RETVAL_OK) { if(r == RETVAL_TRUNC) { /* create truncated message */ sldns_buffer_write_u16_at(buffer, 6, ancount); @@ -756,7 +771,7 @@ reply_info_encode(struct query_info* qinfo, struct reply_info* rep, if((r=insert_section(rep, rep->ns_numrrsets, &nscount, buffer, rep->an_numrrsets, timenow, region, &tree, LDNS_SECTION_AUTHORITY, qinfo->qtype, - dnssec, rr_offset)) != RETVAL_OK) { + dnssec, rr_offset, &compress_count)) != RETVAL_OK) { if(r == RETVAL_TRUNC) { /* create truncated message */ sldns_buffer_write_u16_at(buffer, 8, nscount); @@ -773,7 +788,7 @@ reply_info_encode(struct query_info* qinfo, struct reply_info* rep, if((r=insert_section(rep, rep->ar_numrrsets, &arcount, buffer, rep->an_numrrsets + rep->ns_numrrsets, timenow, region, &tree, LDNS_SECTION_ADDITIONAL, qinfo->qtype, - dnssec, rr_offset)) != RETVAL_OK) { + dnssec, rr_offset, &compress_count)) != RETVAL_OK) { if(r == RETVAL_TRUNC) { /* no need to set TC bit, this is the additional */ sldns_buffer_write_u16_at(buffer, 10, arcount); diff --git a/util/fptr_wlist.c b/util/fptr_wlist.c index 705dc1bbe..e94ec5bbc 100644 --- a/util/fptr_wlist.c +++ b/util/fptr_wlist.c @@ -47,6 +47,7 @@ #include "util/fptr_wlist.h" #include "util/mini_event.h" #include "services/outside_network.h" +#include "services/listen_dnsport.h" #include "services/mesh.h" #include "services/localzone.h" #include "services/authzone.h" @@ -132,6 +133,9 @@ fptr_whitelist_comm_timer(void (*fptr)(void*)) else if(fptr == &worker_stat_timer_cb) return 1; else if(fptr == &worker_probe_timer_cb) return 1; else if(fptr == &validate_suspend_timer_cb) return 1; +#ifdef HAVE_NGTCP2 + else if(fptr == &doq_timer_cb) return 1; +#endif #ifdef UB_ON_WINDOWS else if(fptr == &wsvc_cron_cb) return 1; #endif @@ -181,6 +185,9 @@ fptr_whitelist_event(void (*fptr)(int, short, void *)) else if(fptr == &tube_handle_signal) return 1; else if(fptr == &comm_base_handle_slow_accept) return 1; else if(fptr == &comm_point_http_handle_callback) return 1; +#ifdef HAVE_NGTCP2 + else if(fptr == &comm_point_doq_callback) return 1; +#endif #ifdef USE_DNSTAP else if(fptr == &dtio_output_cb) return 1; else if(fptr == &dtio_cmd_cb) return 1; @@ -190,6 +197,10 @@ fptr_whitelist_event(void (*fptr)(int, short, void *)) else if(fptr == &dtio_tap_callback) return 1; else if(fptr == &dtio_mainfdcallback) return 1; #endif +#ifdef HAVE_NGTCP2 + else if(fptr == &doq_client_event_cb) return 1; + else if(fptr == &doq_client_timer_cb) return 1; +#endif #ifdef UB_ON_WINDOWS else if(fptr == &worker_win_stop_cb) return 1; #endif @@ -248,6 +259,12 @@ fptr_whitelist_rbtree_cmp(int (*fptr) (const void *, const void *)) else if(fptr == &auth_zone_cmp) return 1; else if(fptr == &auth_data_cmp) return 1; else if(fptr == &auth_xfer_cmp) return 1; +#ifdef HAVE_NGTCP2 + else if(fptr == &doq_conn_cmp) return 1; + else if(fptr == &doq_conid_cmp) return 1; + else if(fptr == &doq_timer_cmp) return 1; + else if(fptr == &doq_stream_cmp) return 1; +#endif return 0; } diff --git a/util/locks.h b/util/locks.h index d86ee4923..eb698cb75 100644 --- a/util/locks.h +++ b/util/locks.h @@ -88,6 +88,7 @@ #define lock_get_mem(lock) (0) /* nothing */ #define checklock_start() /* nop */ #define checklock_stop() /* nop */ +#define checklock_set_output_name(name) /* nop */ #ifdef HAVE_PTHREAD #include diff --git a/util/net_help.c b/util/net_help.c index 5cf702ef9..96b2b19a0 100644 --- a/util/net_help.c +++ b/util/net_help.c @@ -727,6 +727,52 @@ sockaddr_cmp_addr(struct sockaddr_storage* addr1, socklen_t len1, } } +int +sockaddr_cmp_scopeid(struct sockaddr_storage* addr1, socklen_t len1, + struct sockaddr_storage* addr2, socklen_t len2) +{ + struct sockaddr_in* p1_in = (struct sockaddr_in*)addr1; + struct sockaddr_in* p2_in = (struct sockaddr_in*)addr2; + struct sockaddr_in6* p1_in6 = (struct sockaddr_in6*)addr1; + struct sockaddr_in6* p2_in6 = (struct sockaddr_in6*)addr2; + if(len1 < len2) + return -1; + if(len1 > len2) + return 1; + log_assert(len1 == len2); + if( p1_in->sin_family < p2_in->sin_family) + return -1; + if( p1_in->sin_family > p2_in->sin_family) + return 1; + log_assert( p1_in->sin_family == p2_in->sin_family ); + /* compare ip4 */ + if( p1_in->sin_family == AF_INET ) { + /* just order it, ntohs not required */ + if(p1_in->sin_port < p2_in->sin_port) + return -1; + if(p1_in->sin_port > p2_in->sin_port) + return 1; + log_assert(p1_in->sin_port == p2_in->sin_port); + return memcmp(&p1_in->sin_addr, &p2_in->sin_addr, INET_SIZE); + } else if (p1_in6->sin6_family == AF_INET6) { + /* just order it, ntohs not required */ + if(p1_in6->sin6_port < p2_in6->sin6_port) + return -1; + if(p1_in6->sin6_port > p2_in6->sin6_port) + return 1; + if(p1_in6->sin6_scope_id < p2_in6->sin6_scope_id) + return -1; + if(p1_in6->sin6_scope_id > p2_in6->sin6_scope_id) + return 1; + log_assert(p1_in6->sin6_port == p2_in6->sin6_port); + return memcmp(&p1_in6->sin6_addr, &p2_in6->sin6_addr, + INET6_SIZE); + } else { + /* eek unknown type, perform this comparison for sanity. */ + return memcmp(addr1, addr2, len1); + } +} + int addr_is_ip6(struct sockaddr_storage* addr, socklen_t len) { diff --git a/util/net_help.h b/util/net_help.h index 28245ea0c..eba38cee0 100644 --- a/util/net_help.h +++ b/util/net_help.h @@ -289,6 +289,18 @@ int sockaddr_cmp(struct sockaddr_storage* addr1, socklen_t len1, int sockaddr_cmp_addr(struct sockaddr_storage* addr1, socklen_t len1, struct sockaddr_storage* addr2, socklen_t len2); +/** + * Compare two sockaddrs. Imposes an ordering on the addresses. + * Compares address and port. It also compares scope_id for ip6. + * @param addr1: address 1. + * @param len1: lengths of addr1. + * @param addr2: address 2. + * @param len2: lengths of addr2. + * @return: 0 if addr1 == addr2. -1 if addr1 is smaller, +1 if larger. + */ +int sockaddr_cmp_scopeid(struct sockaddr_storage* addr1, socklen_t len1, + struct sockaddr_storage* addr2, socklen_t len2); + /** * Checkout address family. * @param addr: the sockaddr to examine. diff --git a/util/netevent.c b/util/netevent.c index 9d5131da9..b36f00f1a 100644 --- a/util/netevent.c +++ b/util/netevent.c @@ -53,6 +53,7 @@ #include "dnstap/dnstap.h" #include "dnscrypt/dnscrypt.h" #include "services/listen_dnsport.h" +#include "util/random.h" #ifdef HAVE_SYS_TYPES_H #include #endif @@ -72,9 +73,16 @@ #ifdef HAVE_OPENSSL_ERR_H #include #endif + +#ifdef HAVE_NGTCP2 +#include +#include +#endif + #ifdef HAVE_LINUX_NET_TSTAMP_H #include #endif + /* -------- Start of local definitions -------- */ /** if CMSG_ALIGN is not defined on this platform, a workaround */ #ifndef CMSG_ALIGN @@ -1068,97 +1076,1865 @@ comm_point_udp_ancil_callback(int fd, short event, void* arg) rep.remote_addrlen); } - fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); - if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { - /* send back immediate reply */ - struct sldns_buffer *buffer; -#ifdef USE_DNSCRYPT - buffer = rep.c->dnscrypt_buffer; + fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); + if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { + /* send back immediate reply */ + struct sldns_buffer *buffer; +#ifdef USE_DNSCRYPT + buffer = rep.c->dnscrypt_buffer; +#else + buffer = rep.c->buffer; +#endif + (void)comm_point_send_udp_msg_if(rep.c, buffer, + (struct sockaddr*)&rep.remote_addr, + rep.remote_addrlen, &rep); + } + if(!rep.c || rep.c->fd == -1) /* commpoint closed */ + break; + } +} +#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */ + +void +comm_point_udp_callback(int fd, short event, void* arg) +{ + struct comm_reply rep; + ssize_t rcv; + int i; + struct sldns_buffer *buffer; + + rep.c = (struct comm_point*)arg; + log_assert(rep.c->type == comm_udp); + + if(!(event&UB_EV_READ)) + return; + log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); + ub_comm_base_now(rep.c->ev->base); + for(i=0; ibuffer); + rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr); + log_assert(fd != -1); + log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); + rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), + sldns_buffer_remaining(rep.c->buffer), MSG_DONTWAIT, + (struct sockaddr*)&rep.remote_addr, &rep.remote_addrlen); + if(rcv == -1) { +#ifndef USE_WINSOCK + if(errno != EAGAIN && errno != EINTR + && udp_recv_needs_log(errno)) + log_err("recvfrom %d failed: %s", + fd, strerror(errno)); +#else + if(WSAGetLastError() != WSAEINPROGRESS && + WSAGetLastError() != WSAECONNRESET && + WSAGetLastError()!= WSAEWOULDBLOCK && + udp_recv_needs_log(WSAGetLastError())) + log_err("recvfrom failed: %s", + wsa_strerror(WSAGetLastError())); +#endif + return; + } + sldns_buffer_skip(rep.c->buffer, rcv); + sldns_buffer_flip(rep.c->buffer); + rep.srctype = 0; + rep.is_proxied = 0; + + if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer, + &rep, 0)) { + log_err("proxy_protocol: could not consume PROXYv2 header"); + return; + } + if(!rep.is_proxied) { + rep.client_addrlen = rep.remote_addrlen; + memmove(&rep.client_addr, &rep.remote_addr, + rep.remote_addrlen); + } + + fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); + if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { + /* send back immediate reply */ +#ifdef USE_DNSCRYPT + buffer = rep.c->dnscrypt_buffer; +#else + buffer = rep.c->buffer; +#endif + (void)comm_point_send_udp_msg(rep.c, buffer, + (struct sockaddr*)&rep.remote_addr, + rep.remote_addrlen, 0); + } + if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for + another UDP port. Note rep.c cannot be reused with TCP fd. */ + break; + } +} + +#ifdef HAVE_NGTCP2 +void +doq_pkt_addr_init(struct doq_pkt_addr* paddr) +{ + paddr->addrlen = (socklen_t)sizeof(paddr->addr); + paddr->localaddrlen = (socklen_t)sizeof(paddr->localaddr); + paddr->ifindex = 0; +} + +/** set the ecn on the transmission */ +static void +doq_set_ecn(int fd, int family, uint32_t ecn) +{ + unsigned int val = ecn; + if(family == AF_INET6) { + if(setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, &val, + (socklen_t)sizeof(val)) == -1) { + log_err("setsockopt(.. IPV6_TCLASS ..): %s", + strerror(errno)); + } + return; + } + if(setsockopt(fd, IPPROTO_IP, IP_TOS, &val, + (socklen_t)sizeof(val)) == -1) { + log_err("setsockopt(.. IP_TOS ..): %s", + strerror(errno)); + } +} + +/** set the local address in the control ancillary data */ +static void +doq_set_localaddr_cmsg(struct msghdr* msg, size_t control_size, + struct doq_addr_storage* localaddr, socklen_t localaddrlen, + int ifindex) +{ +#ifndef S_SPLINT_S + struct cmsghdr* cmsg; +#endif /* S_SPLINT_S */ +#ifndef S_SPLINT_S + cmsg = CMSG_FIRSTHDR(msg); + if(localaddr->sockaddr.in.sin_family == AF_INET) { +#ifdef IP_PKTINFO + struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; + struct in_pktinfo v4info; + log_assert(localaddrlen >= sizeof(struct sockaddr_in)); + msg->msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo)); + memset(msg->msg_control, 0, msg->msg_controllen); + log_assert(msg->msg_controllen <= control_size); + cmsg->cmsg_level = IPPROTO_IP; + cmsg->cmsg_type = IP_PKTINFO; + memset(&v4info, 0, sizeof(v4info)); +# ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST + memmove(&v4info.ipi_spec_dst, &sa->sin_addr, + sizeof(struct in_addr)); +# else + memmove(&v4info.ipi_addr, &sa->sin_addr, + sizeof(struct in_addr)); +# endif + v4info.ipi_ifindex = ifindex; + memmove(CMSG_DATA(cmsg), &v4info, sizeof(struct in_pktinfo)); + cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); +#elif defined(IP_SENDSRCADDR) + struct sockaddr_in* sa= (struct sockaddr_in*)localaddr; + log_assert(localaddrlen >= sizeof(struct sockaddr_in)); + msg->msg_controllen = CMSG_SPACE(sizeof(struct in_addr)); + memset(msg->msg_control, 0, msg->msg_controllen); + log_assert(msg->msg_controllen <= control_size); + cmsg->cmsg_level = IPPROTO_IP; + cmsg->cmsg_type = IP_SENDSRCADDR; + memmove(CMSG_DATA(cmsg), &sa->sin_addr, + sizeof(struct in_addr)); + cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr)); +#endif + } else { + struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr; + struct in6_pktinfo v6info; + log_assert(localaddrlen >= sizeof(struct sockaddr_in6)); + msg->msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); + memset(msg->msg_control, 0, msg->msg_controllen); + log_assert(msg->msg_controllen <= control_size); + cmsg->cmsg_level = IPPROTO_IPV6; + cmsg->cmsg_type = IPV6_PKTINFO; + memset(&v6info, 0, sizeof(v6info)); + memmove(&v6info.ipi6_addr, &sa6->sin6_addr, + sizeof(struct in6_addr)); + v6info.ipi6_ifindex = ifindex; + memmove(CMSG_DATA(cmsg), &v6info, sizeof(struct in6_pktinfo)); + cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); + } +#endif /* S_SPLINT_S */ + /* Ignore unused variables, if no assertions are compiled. */ + (void)localaddrlen; + (void)control_size; +} + +/** write address and port into strings */ +static int +doq_print_addr_port(struct doq_addr_storage* addr, socklen_t addrlen, + char* host, size_t hostlen, char* port, size_t portlen) +{ + if(addr->sockaddr.in.sin_family == AF_INET) { + struct sockaddr_in* sa = (struct sockaddr_in*)addr; + log_assert(addrlen >= sizeof(*sa)); + if(inet_ntop(sa->sin_family, &sa->sin_addr, host, + (socklen_t)hostlen) == 0) { + log_hex("inet_ntop error: address", &sa->sin_addr, + sizeof(sa->sin_addr)); + return 0; + } + snprintf(port, portlen, "%u", (unsigned)ntohs(sa->sin_port)); + } else if(addr->sockaddr.in.sin_family == AF_INET6) { + struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr; + log_assert(addrlen >= sizeof(*sa6)); + if(inet_ntop(sa6->sin6_family, &sa6->sin6_addr, host, + (socklen_t)hostlen) == 0) { + log_hex("inet_ntop error: address", &sa6->sin6_addr, + sizeof(sa6->sin6_addr)); + return 0; + } + snprintf(port, portlen, "%u", (unsigned)ntohs(sa6->sin6_port)); + } + return 1; +} + +/** doq store the blocked packet when write has blocked */ +static void +doq_store_blocked_pkt(struct comm_point* c, struct doq_pkt_addr* paddr, + uint32_t ecn) +{ + if(c->doq_socket->have_blocked_pkt) + return; /* should not happen that we write when there is + already a blocked write, but if so, drop it. */ + if(sldns_buffer_limit(c->doq_socket->pkt_buf) > + sldns_buffer_capacity(c->doq_socket->blocked_pkt)) + return; /* impossibly large, drop packet. impossible because + pkt_buf and blocked_pkt are the same size. */ + c->doq_socket->have_blocked_pkt = 1; + c->doq_socket->blocked_pkt_pi.ecn = ecn; + memcpy(c->doq_socket->blocked_paddr, paddr, + sizeof(*c->doq_socket->blocked_paddr)); + sldns_buffer_clear(c->doq_socket->blocked_pkt); + sldns_buffer_write(c->doq_socket->blocked_pkt, + sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_limit(c->doq_socket->pkt_buf)); + sldns_buffer_flip(c->doq_socket->blocked_pkt); +} + +void +doq_send_pkt(struct comm_point* c, struct doq_pkt_addr* paddr, uint32_t ecn) +{ + struct msghdr msg; + struct iovec iov[1]; + union { + struct cmsghdr hdr; + char buf[256]; + } control; + ssize_t ret; + iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf); + iov[0].iov_len = sldns_buffer_limit(c->doq_socket->pkt_buf); + memset(&msg, 0, sizeof(msg)); + msg.msg_name = (void*)&paddr->addr; + msg.msg_namelen = paddr->addrlen; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = control.buf; +#ifndef S_SPLINT_S + msg.msg_controllen = sizeof(control.buf); +#endif /* S_SPLINT_S */ + msg.msg_flags = 0; + + doq_set_localaddr_cmsg(&msg, sizeof(control.buf), &paddr->localaddr, + paddr->localaddrlen, paddr->ifindex); + doq_set_ecn(c->fd, paddr->addr.sockaddr.in.sin_family, ecn); + + for(;;) { + ret = sendmsg(c->fd, &msg, MSG_DONTWAIT); + if(ret == -1 && errno == EINTR) + continue; + break; + } + if(ret == -1) { +#ifndef USE_WINSOCK + if(errno == EAGAIN || +# ifdef EWOULDBLOCK + errno == EWOULDBLOCK || +# endif + errno == ENOBUFS) +#else + if(WSAGetLastError() == WSAEINPROGRESS || + WSAGetLastError() == WSAENOBUFS || + WSAGetLastError() == WSAEWOULDBLOCK) +#endif + { + /* udp send has blocked */ + doq_store_blocked_pkt(c, paddr, ecn); + return; + } + if(!udp_send_errno_needs_log((void*)&paddr->addr, + paddr->addrlen)) + return; + if(verbosity >= VERB_OPS) { + char host[256], port[32]; + if(doq_print_addr_port(&paddr->addr, paddr->addrlen, + host, sizeof(host), port, sizeof(port))) { + verbose(VERB_OPS, "doq sendmsg to %s %s " + "failed: %s", host, port, + strerror(errno)); + } else { + verbose(VERB_OPS, "doq sendmsg failed: %s", + strerror(errno)); + } + } + return; + } else if(ret != (ssize_t)sldns_buffer_limit(c->doq_socket->pkt_buf)) { + char host[256], port[32]; + if(doq_print_addr_port(&paddr->addr, paddr->addrlen, host, + sizeof(host), port, sizeof(port))) { + log_err("doq sendmsg to %s %s failed: " + "sent %d in place of %d bytes", + host, port, (int)ret, + (int)sldns_buffer_limit(c->doq_socket->pkt_buf)); + } else { + log_err("doq sendmsg failed: " + "sent %d in place of %d bytes", + (int)ret, (int)sldns_buffer_limit(c->doq_socket->pkt_buf)); + } + return; + } +} + +/** fetch port number */ +static int +doq_sockaddr_get_port(struct doq_addr_storage* addr) +{ + if(addr->sockaddr.in.sin_family == AF_INET) { + struct sockaddr_in* sa = (struct sockaddr_in*)addr; + return ntohs(sa->sin_port); + } else if(addr->sockaddr.in.sin_family == AF_INET6) { + struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr; + return ntohs(sa6->sin6_port); + } + return 0; +} + +/** get local address from ancillary data headers */ +static int +doq_get_localaddr_cmsg(struct comm_point* c, struct doq_pkt_addr* paddr, + int* pkt_continue, struct msghdr* msg) +{ +#ifndef S_SPLINT_S + struct cmsghdr* cmsg; +#endif /* S_SPLINT_S */ + + memset(&paddr->localaddr, 0, sizeof(paddr->localaddr)); +#ifndef S_SPLINT_S + for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + if( cmsg->cmsg_level == IPPROTO_IPV6 && + cmsg->cmsg_type == IPV6_PKTINFO) { + struct in6_pktinfo* v6info = + (struct in6_pktinfo*)CMSG_DATA(cmsg); + struct sockaddr_in6* sa= (struct sockaddr_in6*) + &paddr->localaddr; + struct sockaddr_in6* rema = (struct sockaddr_in6*) + &paddr->addr; + if(rema->sin6_family != AF_INET6) { + log_err("doq cmsg family mismatch cmsg is ip6"); + *pkt_continue = 1; + return 0; + } + sa->sin6_family = AF_INET6; + sa->sin6_port = htons(doq_sockaddr_get_port( + (void*)c->socket->addr)); + paddr->ifindex = v6info->ipi6_ifindex; + memmove(&sa->sin6_addr, &v6info->ipi6_addr, + sizeof(struct in6_addr)); + paddr->localaddrlen = sizeof(struct sockaddr_in6); + break; +#ifdef IP_PKTINFO + } else if( cmsg->cmsg_level == IPPROTO_IP && + cmsg->cmsg_type == IP_PKTINFO) { + struct in_pktinfo* v4info = + (struct in_pktinfo*)CMSG_DATA(cmsg); + struct sockaddr_in* sa= (struct sockaddr_in*) + &paddr->localaddr; + struct sockaddr_in* rema = (struct sockaddr_in*) + &paddr->addr; + if(rema->sin_family != AF_INET) { + log_err("doq cmsg family mismatch cmsg is ip4"); + *pkt_continue = 1; + return 0; + } + sa->sin_family = AF_INET; + sa->sin_port = htons(doq_sockaddr_get_port( + (void*)c->socket->addr)); + paddr->ifindex = v4info->ipi_ifindex; + memmove(&sa->sin_addr, &v4info->ipi_addr, + sizeof(struct in_addr)); + paddr->localaddrlen = sizeof(struct sockaddr_in); + break; +#elif defined(IP_RECVDSTADDR) + } else if( cmsg->cmsg_level == IPPROTO_IP && + cmsg->cmsg_type == IP_RECVDSTADDR) { + struct sockaddr_in* sa= (struct sockaddr_in*) + &paddr->localaddr; + struct sockaddr_in* rema = (struct sockaddr_in*) + &paddr->addr; + if(rema->sin_family != AF_INET) { + log_err("doq cmsg family mismatch cmsg is ip4"); + *pkt_continue = 1; + return 0; + } + sa->sin_family = AF_INET; + sa->sin_port = htons(doq_sockaddr_get_port( + (void*)c->socket->addr)); + paddr->ifindex = 0; + memmove(&sa.sin_addr, CMSG_DATA(cmsg), + sizeof(struct in_addr)); + paddr->localaddrlen = sizeof(struct sockaddr_in); + break; +#endif /* IP_PKTINFO or IP_RECVDSTADDR */ + } + } +#endif /* S_SPLINT_S */ + +return 1; +} + +/** get packet ecn information */ +static uint32_t +msghdr_get_ecn(struct msghdr* msg, int family) +{ +#ifndef S_SPLINT_S + struct cmsghdr* cmsg; + if(family == AF_INET6) { + for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + if(cmsg->cmsg_level == IPPROTO_IPV6 && + cmsg->cmsg_type == IPV6_TCLASS && + cmsg->cmsg_len != 0) { + uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg); + return *ecn; + } + } + return 0; + } + for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + if(cmsg->cmsg_level == IPPROTO_IP && + cmsg->cmsg_type == IP_TOS && + cmsg->cmsg_len != 0) { + uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg); + return *ecn; + } + } +#endif /* S_SPLINT_S */ + return 0; +} + +/** receive packet for DoQ on UDP. get ancillary data for addresses, + * return false if failed and the callback can stop receiving UDP packets + * if pkt_continue is false. */ +static int +doq_recv(struct comm_point* c, struct doq_pkt_addr* paddr, int* pkt_continue, + struct ngtcp2_pkt_info* pi) +{ + struct msghdr msg; + struct iovec iov[1]; + ssize_t rcv; + union { + struct cmsghdr hdr; + char buf[256]; + } ancil; + + msg.msg_name = &paddr->addr; + msg.msg_namelen = (socklen_t)sizeof(paddr->addr); + iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf); + iov[0].iov_len = sldns_buffer_remaining(c->doq_socket->pkt_buf); + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = ancil.buf; +#ifndef S_SPLINT_S + msg.msg_controllen = sizeof(ancil.buf); +#endif /* S_SPLINT_S */ + msg.msg_flags = 0; + + rcv = recvmsg(c->fd, &msg, MSG_DONTWAIT); + if(rcv == -1) { + if(errno != EAGAIN && errno != EINTR + && udp_recv_needs_log(errno)) { + log_err("recvmsg failed for doq: %s", strerror(errno)); + } + *pkt_continue = 0; + return 0; + } + + paddr->addrlen = msg.msg_namelen; + sldns_buffer_skip(c->doq_socket->pkt_buf, rcv); + sldns_buffer_flip(c->doq_socket->pkt_buf); + if(!doq_get_localaddr_cmsg(c, paddr, pkt_continue, &msg)) + return 0; + pi->ecn = msghdr_get_ecn(&msg, paddr->addr.sockaddr.in.sin_family); + return 1; +} + +/** send the version negotiation for doq. scid and dcid are flipped around + * to send back to the client. */ +static void +doq_send_version_negotiation(struct comm_point* c, struct doq_pkt_addr* paddr, + const uint8_t* dcid, size_t dcidlen, const uint8_t* scid, + size_t scidlen) +{ + uint32_t versions[2]; + size_t versions_len = 0; + ngtcp2_ssize ret; + uint8_t unused_random; + + /* fill the array with supported versions */ + versions[0] = NGTCP2_PROTO_VER_V1; + versions_len = 1; + unused_random = ub_random_max(c->doq_socket->rnd, 256); + sldns_buffer_clear(c->doq_socket->pkt_buf); + ret = ngtcp2_pkt_write_version_negotiation( + sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_capacity(c->doq_socket->pkt_buf), unused_random, + dcid, dcidlen, scid, scidlen, versions, versions_len); + if(ret < 0) { + log_err("ngtcp2_pkt_write_version_negotiation failed: %s", + ngtcp2_strerror(ret)); + return; + } + sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); + sldns_buffer_flip(c->doq_socket->pkt_buf); + doq_send_pkt(c, paddr, 0); +} + +/** Find the doq_conn object by remote address and dcid */ +static struct doq_conn* +doq_conn_find(struct doq_table* table, struct doq_addr_storage* addr, + socklen_t addrlen, struct doq_addr_storage* localaddr, + socklen_t localaddrlen, int ifindex, const uint8_t* dcid, + size_t dcidlen) +{ + struct rbnode_type* node; + struct doq_conn key; + memset(&key.node, 0, sizeof(key.node)); + key.node.key = &key; + memmove(&key.key.paddr.addr, addr, addrlen); + key.key.paddr.addrlen = addrlen; + memmove(&key.key.paddr.localaddr, localaddr, localaddrlen); + key.key.paddr.localaddrlen = localaddrlen; + key.key.paddr.ifindex = ifindex; + key.key.dcid = (void*)dcid; + key.key.dcidlen = dcidlen; + node = rbtree_search(table->conn_tree, &key); + if(node) + return (struct doq_conn*)node->key; + return NULL; +} + +/** find the doq_con by the connection id */ +static struct doq_conn* +doq_conn_find_by_id(struct doq_table* table, const uint8_t* dcid, + size_t dcidlen) +{ + struct doq_conid* conid; + lock_rw_rdlock(&table->conid_lock); + conid = doq_conid_find(table, dcid, dcidlen); + if(conid) { + /* make a copy of the key */ + struct doq_conn* conn; + struct doq_conn_key key = conid->key; + uint8_t cid[NGTCP2_MAX_CIDLEN]; + log_assert(conid->key.dcidlen <= NGTCP2_MAX_CIDLEN); + memcpy(cid, conid->key.dcid, conid->key.dcidlen); + key.dcid = cid; + lock_rw_unlock(&table->conid_lock); + + /* now that the conid lock is released, look up the conn */ + lock_rw_rdlock(&table->lock); + conn = doq_conn_find(table, &key.paddr.addr, + key.paddr.addrlen, &key.paddr.localaddr, + key.paddr.localaddrlen, key.paddr.ifindex, key.dcid, + key.dcidlen); + if(!conn) { + /* The connection got deleted between the conid lookup + * and the connection lock grab, it no longer exists, + * so return null. */ + lock_rw_unlock(&table->lock); + return NULL; + } + lock_basic_lock(&conn->lock); + if(conn->is_deleted) { + lock_rw_unlock(&table->lock); + lock_basic_unlock(&conn->lock); + return NULL; + } + lock_rw_unlock(&table->lock); + return conn; + } + lock_rw_unlock(&table->conid_lock); + return NULL; +} + +/** Find the doq_conn, by addr or by connection id */ +static struct doq_conn* +doq_conn_find_by_addr_or_cid(struct doq_table* table, + struct doq_pkt_addr* paddr, const uint8_t* dcid, size_t dcidlen) +{ + struct doq_conn* conn; + lock_rw_rdlock(&table->lock); + conn = doq_conn_find(table, &paddr->addr, paddr->addrlen, + &paddr->localaddr, paddr->localaddrlen, paddr->ifindex, + dcid, dcidlen); + if(conn && conn->is_deleted) { + conn = NULL; + } + if(conn) { + lock_basic_lock(&conn->lock); + lock_rw_unlock(&table->lock); + verbose(VERB_ALGO, "doq: found connection by address, dcid"); + } else { + lock_rw_unlock(&table->lock); + conn = doq_conn_find_by_id(table, dcid, dcidlen); + if(conn) { + verbose(VERB_ALGO, "doq: found connection by dcid"); + } + } + return conn; +} + +/** decode doq packet header, false on handled or failure, true to continue + * to process the packet */ +static int +doq_decode_pkt_header_negotiate(struct comm_point* c, + struct doq_pkt_addr* paddr, struct doq_conn** conn) +{ +#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID + struct ngtcp2_version_cid vc; +#else + uint32_t version; + const uint8_t *dcid, *scid; + size_t dcidlen, scidlen; +#endif + int rv; + +#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID + rv = ngtcp2_pkt_decode_version_cid(&vc, + sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_limit(c->doq_socket->pkt_buf), + c->doq_socket->sv_scidlen); +#else + rv = ngtcp2_pkt_decode_version_cid(&version, &dcid, &dcidlen, + &scid, &scidlen, sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_limit(c->doq_socket->pkt_buf), c->doq_socket->sv_scidlen); +#endif + if(rv != 0) { + if(rv == NGTCP2_ERR_VERSION_NEGOTIATION) { + /* send the version negotiation */ + doq_send_version_negotiation(c, paddr, +#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID + vc.scid, vc.scidlen, vc.dcid, vc.dcidlen +#else + scid, scidlen, dcid, dcidlen +#endif + ); + return 0; + } + verbose(VERB_ALGO, "doq: could not decode version " + "and CID from QUIC packet header: %s", + ngtcp2_strerror(rv)); + return 0; + } + + if(verbosity >= VERB_ALGO) { + verbose(VERB_ALGO, "ngtcp2_pkt_decode_version_cid packet has " + "QUIC protocol version %u", (unsigned) +#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID + vc. +#endif + version + ); + log_hex("dcid", +#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID + (void*)vc.dcid, vc.dcidlen +#else + (void*)dcid, dcidlen +#endif + ); + log_hex("scid", +#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID + (void*)vc.scid, vc.scidlen +#else + (void*)scid, scidlen +#endif + ); + } + *conn = doq_conn_find_by_addr_or_cid(c->doq_socket->table, paddr, +#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID + vc.dcid, vc.dcidlen +#else + dcid, dcidlen +#endif + ); + if(*conn) + (*conn)->doq_socket = c->doq_socket; + return 1; +} + +/** fill cid structure with random data */ +static void doq_cid_randfill(struct ngtcp2_cid* cid, size_t datalen, + struct ub_randstate* rnd) +{ + uint8_t buf[32]; + if(datalen > sizeof(buf)) + datalen = sizeof(buf); + doq_fill_rand(rnd, buf, datalen); + ngtcp2_cid_init(cid, buf, datalen); +} + +/** send retry packet for doq connection. */ +static void +doq_send_retry(struct comm_point* c, struct doq_pkt_addr* paddr, + struct ngtcp2_pkt_hd* hd) +{ + char host[256], port[32]; + struct ngtcp2_cid scid; + uint8_t token[NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN]; + ngtcp2_tstamp ts; + ngtcp2_ssize tokenlen, ret; + + if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host, + sizeof(host), port, sizeof(port))) { + log_err("doq_send_retry failed"); + return; + } + verbose(VERB_ALGO, "doq: sending retry packet to %s %s", host, port); + + /* the server chosen source connection ID */ + scid.datalen = c->doq_socket->sv_scidlen; + doq_cid_randfill(&scid, scid.datalen, c->doq_socket->rnd); + + ts = doq_get_timestamp_nanosec(); + + tokenlen = ngtcp2_crypto_generate_retry_token(token, + c->doq_socket->static_secret, c->doq_socket->static_secret_len, + hd->version, (void*)&paddr->addr, paddr->addrlen, &scid, + &hd->dcid, ts); + if(tokenlen < 0) { + log_err("ngtcp2_crypto_generate_retry_token failed: %s", + ngtcp2_strerror(tokenlen)); + return; + } + + sldns_buffer_clear(c->doq_socket->pkt_buf); + ret = ngtcp2_crypto_write_retry(sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version, + &hd->scid, &scid, &hd->dcid, token, tokenlen); + if(ret < 0) { + log_err("ngtcp2_crypto_write_retry failed: %s", + ngtcp2_strerror(ret)); + return; + } + sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); + sldns_buffer_flip(c->doq_socket->pkt_buf); + doq_send_pkt(c, paddr, 0); +} + +/** doq send stateless connection close */ +static void +doq_send_stateless_connection_close(struct comm_point* c, + struct doq_pkt_addr* paddr, struct ngtcp2_pkt_hd* hd, + uint64_t error_code) +{ + ngtcp2_ssize ret; + sldns_buffer_clear(c->doq_socket->pkt_buf); + ret = ngtcp2_crypto_write_connection_close( + sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version, &hd->scid, + &hd->dcid, error_code, NULL, 0); + if(ret < 0) { + log_err("ngtcp2_crypto_write_connection_close failed: %s", + ngtcp2_strerror(ret)); + return; + } + sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); + sldns_buffer_flip(c->doq_socket->pkt_buf); + doq_send_pkt(c, paddr, 0); +} + +/** doq verify retry token, false on failure */ +static int +doq_verify_retry_token(struct comm_point* c, struct doq_pkt_addr* paddr, + struct ngtcp2_cid* ocid, struct ngtcp2_pkt_hd* hd) +{ + char host[256], port[32]; + ngtcp2_tstamp ts; + if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host, + sizeof(host), port, sizeof(port))) { + log_err("doq_verify_retry_token failed"); + return 0; + } + ts = doq_get_timestamp_nanosec(); + verbose(VERB_ALGO, "doq: verifying retry token from %s %s", host, + port); + if(ngtcp2_crypto_verify_retry_token(ocid, +#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN + hd->token, hd->tokenlen, +#else + hd->token.base, hd->token.len, +#endif + c->doq_socket->static_secret, + c->doq_socket->static_secret_len, hd->version, + (void*)&paddr->addr, paddr->addrlen, &hd->dcid, + 10*NGTCP2_SECONDS, ts) != 0) { + verbose(VERB_ALGO, "doq: could not verify retry token " + "from %s %s", host, port); + return 0; + } + verbose(VERB_ALGO, "doq: verified retry token from %s %s", host, port); + return 1; +} + +/** doq verify token, false on failure */ +static int +doq_verify_token(struct comm_point* c, struct doq_pkt_addr* paddr, + struct ngtcp2_pkt_hd* hd) +{ + char host[256], port[32]; + ngtcp2_tstamp ts; + if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host, + sizeof(host), port, sizeof(port))) { + log_err("doq_verify_token failed"); + return 0; + } + ts = doq_get_timestamp_nanosec(); + verbose(VERB_ALGO, "doq: verifying token from %s %s", host, port); + if(ngtcp2_crypto_verify_regular_token( +#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN + hd->token, hd->tokenlen, +#else + hd->token.base, hd->token.len, +#endif + c->doq_socket->static_secret, c->doq_socket->static_secret_len, + (void*)&paddr->addr, paddr->addrlen, 3600*NGTCP2_SECONDS, + ts) != 0) { + verbose(VERB_ALGO, "doq: could not verify token from %s %s", + host, port); + return 0; + } + verbose(VERB_ALGO, "doq: verified token from %s %s", host, port); + return 1; +} + +/** delete and remove from the lookup tree the doq_conn connection */ +static void +doq_delete_connection(struct comm_point* c, struct doq_conn* conn) +{ + struct doq_conn copy; + uint8_t cid[NGTCP2_MAX_CIDLEN]; + rbnode_type* node; + if(!conn) + return; + /* Copy the key and set it deleted. */ + conn->is_deleted = 1; + doq_conn_write_disable(conn); + copy.key = conn->key; + log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN); + memcpy(cid, conn->key.dcid, conn->key.dcidlen); + copy.key.dcid = cid; + copy.node.key = © + lock_basic_unlock(&conn->lock); + + /* Now get the table lock to delete it from the tree */ + lock_rw_wrlock(&c->doq_socket->table->lock); + node = rbtree_delete(c->doq_socket->table->conn_tree, copy.node.key); + if(node) { + conn = (struct doq_conn*)node->key; + lock_basic_lock(&conn->lock); + doq_conn_write_list_remove(c->doq_socket->table, conn); + if(conn->timer.timer_in_list) { + /* Remove timer from list first, because finding the + * rbnode element of the setlist of same timeouts + * needs tree lookup. Edit the tree structure after + * that lookup. */ + doq_timer_list_remove(c->doq_socket->table, + &conn->timer); + } + if(conn->timer.timer_in_tree) + doq_timer_tree_remove(c->doq_socket->table, + &conn->timer); + } + lock_rw_unlock(&c->doq_socket->table->lock); + if(node) { + lock_basic_unlock(&conn->lock); + doq_table_quic_size_subtract(c->doq_socket->table, + sizeof(*conn)+conn->key.dcidlen); + doq_conn_delete(conn, c->doq_socket->table); + } +} + +/** create and setup a new doq connection, to a new destination, or with + * a new dcid. It has a new set of streams. It is inserted in the lookup tree. + * Returns NULL on failure. */ +static struct doq_conn* +doq_setup_new_conn(struct comm_point* c, struct doq_pkt_addr* paddr, + struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid) +{ + struct doq_conn* conn; + if(!doq_table_quic_size_available(c->doq_socket->table, + c->doq_socket->cfg, sizeof(*conn)+hd->dcid.datalen + + sizeof(struct doq_stream) + + 100 /* estimated input query */ + + 1200 /* estimated output query */)) { + verbose(VERB_ALGO, "doq: no mem available for new connection"); + doq_send_stateless_connection_close(c, paddr, hd, + NGTCP2_CONNECTION_REFUSED); + return NULL; + } + conn = doq_conn_create(c, paddr, hd->dcid.data, hd->dcid.datalen, + hd->version); + if(!conn) { + log_err("doq: could not allocate doq_conn"); + return NULL; + } + lock_rw_wrlock(&c->doq_socket->table->lock); + lock_basic_lock(&conn->lock); + if(!rbtree_insert(c->doq_socket->table->conn_tree, &conn->node)) { + lock_rw_unlock(&c->doq_socket->table->lock); + log_err("doq: duplicate connection"); + /* conn has no entry in writelist, and no timer yet. */ + lock_basic_unlock(&conn->lock); + doq_conn_delete(conn, c->doq_socket->table); + return NULL; + } + lock_rw_unlock(&c->doq_socket->table->lock); + doq_table_quic_size_add(c->doq_socket->table, + sizeof(*conn)+conn->key.dcidlen); + verbose(VERB_ALGO, "doq: created new connection"); + + /* the scid and dcid switch meaning from the accepted client + * connection to the server connection. The 'source' and 'destination' + * meaning is reversed. */ + if(!doq_conn_setup(conn, hd->scid.data, hd->scid.datalen, + (ocid?ocid->data:NULL), (ocid?ocid->datalen:0), +#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN + hd->token, hd->tokenlen +#else + hd->token.base, hd->token.len +#endif + )) { + log_err("doq: could not set up connection"); + doq_delete_connection(c, conn); + return NULL; + } + return conn; +} + +/** perform doq address validation */ +static int +doq_address_validation(struct comm_point* c, struct doq_pkt_addr* paddr, + struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid, + struct ngtcp2_cid** pocid) +{ +#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN + const uint8_t* token = hd->token; + size_t tokenlen = hd->tokenlen; +#else + const uint8_t* token = hd->token.base; + size_t tokenlen = hd->token.len; +#endif + verbose(VERB_ALGO, "doq stateless address validation"); + + if(tokenlen == 0 || token == NULL) { + doq_send_retry(c, paddr, hd); + return 0; + } + if(token[0] != NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY && + hd->dcid.datalen < NGTCP2_MIN_INITIAL_DCIDLEN) { + doq_send_stateless_connection_close(c, paddr, hd, + NGTCP2_INVALID_TOKEN); + return 0; + } + if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY) { + if(!doq_verify_retry_token(c, paddr, ocid, hd)) { + doq_send_stateless_connection_close(c, paddr, hd, + NGTCP2_INVALID_TOKEN); + return 0; + } + *pocid = ocid; + } else if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR) { + if(!doq_verify_token(c, paddr, hd)) { + doq_send_retry(c, paddr, hd); + return 0; + } +#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN + hd->token = NULL; + hd->tokenlen = 0; +#else + hd->token.base = NULL; + hd->token.len = 0; +#endif + } else { + verbose(VERB_ALGO, "doq address validation: unrecognised " + "token in hd.token.base with magic byte 0x%2.2x", + (int)token[0]); + if(c->doq_socket->validate_addr) { + doq_send_retry(c, paddr, hd); + return 0; + } +#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN + hd->token = NULL; + hd->tokenlen = 0; +#else + hd->token.base = NULL; + hd->token.len = 0; +#endif + } + return 1; +} + +/** the doq accept, returns false if no further processing of content */ +static int +doq_accept(struct comm_point* c, struct doq_pkt_addr* paddr, + struct doq_conn** conn, struct ngtcp2_pkt_info* pi) +{ + int rv; + struct ngtcp2_pkt_hd hd; + struct ngtcp2_cid ocid, *pocid=NULL; + int err_retry; + memset(&hd, 0, sizeof(hd)); + rv = ngtcp2_accept(&hd, sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_limit(c->doq_socket->pkt_buf)); + if(rv != 0) { + if(rv == NGTCP2_ERR_RETRY) { + doq_send_retry(c, paddr, &hd); + return 0; + } + log_err("doq: initial packet failed, ngtcp2_accept failed: %s", + ngtcp2_strerror(rv)); + return 0; + } + if(c->doq_socket->validate_addr || +#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN + hd.tokenlen +#else + hd.token.len +#endif + ) { + if(!doq_address_validation(c, paddr, &hd, &ocid, &pocid)) + return 0; + } + *conn = doq_setup_new_conn(c, paddr, &hd, pocid); + if(!*conn) + return 0; + (*conn)->doq_socket = c->doq_socket; + if(!doq_conn_recv(c, paddr, *conn, pi, &err_retry, NULL)) { + if(err_retry) + doq_send_retry(c, paddr, &hd); + doq_delete_connection(c, *conn); + *conn = NULL; + return 0; + } + return 1; +} + +/** doq pickup a timer to wait for for the worker. If any timer exists. */ +static void +doq_pickup_timer(struct comm_point* c) +{ + struct doq_timer* t; + struct timeval tv; + int have_time = 0; + memset(&tv, 0, sizeof(tv)); + + lock_rw_wrlock(&c->doq_socket->table->lock); + RBTREE_FOR(t, struct doq_timer*, c->doq_socket->table->timer_tree) { + if(t->worker_doq_socket == NULL || + t->worker_doq_socket == c->doq_socket) { + /* pick up this element */ + t->worker_doq_socket = c->doq_socket; + have_time = 1; + memcpy(&tv, &t->time, sizeof(tv)); + break; + } + } + lock_rw_unlock(&c->doq_socket->table->lock); + + if(have_time) { + struct timeval rel; + timeval_subtract(&rel, &tv, c->doq_socket->now_tv); + comm_timer_set(c->doq_socket->timer, &rel); + memcpy(&c->doq_socket->marked_time, &tv, + sizeof(c->doq_socket->marked_time)); + verbose(VERB_ALGO, "doq pickup timer at %d.%6.6d in %d.%6.6d", + (int)tv.tv_sec, (int)tv.tv_usec, (int)rel.tv_sec, + (int)rel.tv_usec); + } else { + if(comm_timer_is_set(c->doq_socket->timer)) + comm_timer_disable(c->doq_socket->timer); + memset(&c->doq_socket->marked_time, 0, + sizeof(c->doq_socket->marked_time)); + verbose(VERB_ALGO, "doq timer disabled"); + } +} + +/** doq done with connection, release locks and setup timer and write */ +static void +doq_done_setup_timer_and_write(struct comm_point* c, struct doq_conn* conn) +{ + struct doq_conn copy; + uint8_t cid[NGTCP2_MAX_CIDLEN]; + rbnode_type* node; + struct timeval new_tv; + int write_change = 0, timer_change = 0; + + /* No longer in callbacks, so the pointer to doq_socket is back + * to NULL. */ + conn->doq_socket = NULL; + + if(doq_conn_check_timer(conn, &new_tv)) + timer_change = 1; + if( (conn->write_interest && !conn->on_write_list) || + (!conn->write_interest && conn->on_write_list)) + write_change = 1; + + if(!timer_change && !write_change) { + /* Nothing to do. */ + lock_basic_unlock(&conn->lock); + return; + } + + /* The table lock is needed to change the write list and timer tree. + * So the connection lock is release and then the connection is + * looked up again. */ + copy.key = conn->key; + log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN); + memcpy(cid, conn->key.dcid, conn->key.dcidlen); + copy.key.dcid = cid; + copy.node.key = © + lock_basic_unlock(&conn->lock); + + lock_rw_wrlock(&c->doq_socket->table->lock); + node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key); + if(!node) { + lock_rw_unlock(&c->doq_socket->table->lock); + /* Must have been deleted in the mean time. */ + return; + } + conn = (struct doq_conn*)node->key; + lock_basic_lock(&conn->lock); + if(conn->is_deleted) { + /* It is deleted now. */ + lock_rw_unlock(&c->doq_socket->table->lock); + lock_basic_unlock(&conn->lock); + return; + } + + if(write_change) { + /* Edit the write lists, we are holding the table.lock and can + * edit the list first,last and also prev,next and on_list + * elements in the doq_conn structures. */ + doq_conn_set_write_list(c->doq_socket->table, conn); + } + if(timer_change) { + doq_timer_set(c->doq_socket->table, &conn->timer, + c->doq_socket, &new_tv); + } + lock_rw_unlock(&c->doq_socket->table->lock); + lock_basic_unlock(&conn->lock); +} + +/** doq done with connection callbacks, release locks and setup write */ +static void +doq_done_with_conn_cb(struct comm_point* c, struct doq_conn* conn) +{ + struct doq_conn copy; + uint8_t cid[NGTCP2_MAX_CIDLEN]; + rbnode_type* node; + + /* no longer in callbacks, so the pointer to doq_socket is back + * to NULL. */ + conn->doq_socket = NULL; + + if( (conn->write_interest && conn->on_write_list) || + (!conn->write_interest && !conn->on_write_list)) { + /* The connection already has the required write list + * status. */ + lock_basic_unlock(&conn->lock); + return; + } + + /* To edit the write list of connections we have to hold the table + * lock, so we release the connection and then look it up again. */ + copy.key = conn->key; + log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN); + memcpy(cid, conn->key.dcid, conn->key.dcidlen); + copy.key.dcid = cid; + copy.node.key = © + lock_basic_unlock(&conn->lock); + + lock_rw_wrlock(&c->doq_socket->table->lock); + node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key); + if(!node) { + lock_rw_unlock(&c->doq_socket->table->lock); + /* must have been deleted in the mean time */ + return; + } + conn = (struct doq_conn*)node->key; + lock_basic_lock(&conn->lock); + if(conn->is_deleted) { + /* it is deleted now. */ + lock_rw_unlock(&c->doq_socket->table->lock); + lock_basic_unlock(&conn->lock); + return; + } + + /* edit the write lists, we are holding the table.lock and can + * edit the list first,last and also prev,next and on_list elements + * in the doq_conn structures. */ + doq_conn_set_write_list(c->doq_socket->table, conn); + lock_rw_unlock(&c->doq_socket->table->lock); + lock_basic_unlock(&conn->lock); +} + +/** doq count the length of the write list */ +static size_t +doq_write_list_length(struct comm_point* c) +{ + size_t count = 0; + struct doq_conn* conn; + lock_rw_rdlock(&c->doq_socket->table->lock); + conn = c->doq_socket->table->write_list_first; + while(conn) { + count++; + conn = conn->write_next; + } + lock_rw_unlock(&c->doq_socket->table->lock); + return count; +} + +/** doq pop the first element from the write list to have write events */ +static struct doq_conn* +doq_pop_write_conn(struct comm_point* c) +{ + struct doq_conn* conn; + lock_rw_wrlock(&c->doq_socket->table->lock); + conn = doq_table_pop_first(c->doq_socket->table); + while(conn && conn->is_deleted) { + lock_basic_unlock(&conn->lock); + conn = doq_table_pop_first(c->doq_socket->table); + } + lock_rw_unlock(&c->doq_socket->table->lock); + if(conn) + conn->doq_socket = c->doq_socket; + return conn; +} + +/** doq the connection is done with write callbacks, release it. */ +static void +doq_done_with_write_cb(struct comm_point* c, struct doq_conn* conn, + int delete_it) +{ + if(delete_it) { + doq_delete_connection(c, conn); + return; + } + doq_done_setup_timer_and_write(c, conn); +} + +/** see if the doq socket wants to write packets */ +static int +doq_socket_want_write(struct comm_point* c) +{ + int want_write = 0; + if(c->doq_socket->have_blocked_pkt) + return 1; + lock_rw_rdlock(&c->doq_socket->table->lock); + if(c->doq_socket->table->write_list_first) + want_write = 1; + lock_rw_unlock(&c->doq_socket->table->lock); + return want_write; +} + +/** enable write event for the doq server socket fd */ +static void +doq_socket_write_enable(struct comm_point* c) +{ + verbose(VERB_ALGO, "doq socket want write"); + if(c->doq_socket->event_has_write) + return; + comm_point_listen_for_rw(c, 1, 1); + c->doq_socket->event_has_write = 1; +} + +/** disable write event for the doq server socket fd */ +static void +doq_socket_write_disable(struct comm_point* c) +{ + verbose(VERB_ALGO, "doq socket want no write"); + if(!c->doq_socket->event_has_write) + return; + comm_point_listen_for_rw(c, 1, 0); + c->doq_socket->event_has_write = 0; +} + +/** write blocked packet, if possible. returns false if failed, again. */ +static int +doq_write_blocked_pkt(struct comm_point* c) +{ + struct doq_pkt_addr paddr; + if(!c->doq_socket->have_blocked_pkt) + return 1; + c->doq_socket->have_blocked_pkt = 0; + if(sldns_buffer_limit(c->doq_socket->blocked_pkt) > + sldns_buffer_remaining(c->doq_socket->pkt_buf)) + return 1; /* impossibly large, drop it. + impossible since pkt_buf is same size as blocked_pkt buf. */ + sldns_buffer_clear(c->doq_socket->pkt_buf); + sldns_buffer_write(c->doq_socket->pkt_buf, + sldns_buffer_begin(c->doq_socket->blocked_pkt), + sldns_buffer_limit(c->doq_socket->blocked_pkt)); + sldns_buffer_flip(c->doq_socket->pkt_buf); + memcpy(&paddr, c->doq_socket->blocked_paddr, sizeof(paddr)); + doq_send_pkt(c, &paddr, c->doq_socket->blocked_pkt_pi.ecn); + if(c->doq_socket->have_blocked_pkt) + return 0; + return 1; +} + +/** doq find a timer that timeouted and return the conn, locked. */ +static struct doq_conn* +doq_timer_timeout_conn(struct doq_server_socket* doq_socket) +{ + struct doq_conn* conn = NULL; + struct rbnode_type* node; + lock_rw_wrlock(&doq_socket->table->lock); + node = rbtree_first(doq_socket->table->timer_tree); + if(node && node != RBTREE_NULL) { + struct doq_timer* t = (struct doq_timer*)node; + conn = t->conn; + + /* If now < timer then no further timeouts in tree. */ + if(timeval_smaller(doq_socket->now_tv, &t->time)) { + lock_rw_unlock(&doq_socket->table->lock); + return NULL; + } + + lock_basic_lock(&conn->lock); + conn->doq_socket = doq_socket; + + /* Now that the timer is fired, remove it. */ + doq_timer_unset(doq_socket->table, t); + lock_rw_unlock(&doq_socket->table->lock); + return conn; + } + lock_rw_unlock(&doq_socket->table->lock); + return NULL; +} + +/** doq timer erase the marker that said which timer the worker uses. */ +static void +doq_timer_erase_marker(struct doq_server_socket* doq_socket) +{ + struct doq_timer* t; + lock_rw_wrlock(&doq_socket->table->lock); + t = doq_timer_find_time(doq_socket->table, &doq_socket->marked_time); + if(t && t->worker_doq_socket == doq_socket) + t->worker_doq_socket = NULL; + lock_rw_unlock(&doq_socket->table->lock); + memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time)); +} + +void +doq_timer_cb(void* arg) +{ + struct doq_server_socket* doq_socket = (struct doq_server_socket*)arg; + struct doq_conn* conn; + verbose(VERB_ALGO, "doq timer callback"); + + doq_timer_erase_marker(doq_socket); + + while((conn = doq_timer_timeout_conn(doq_socket)) != NULL) { + if(conn->is_deleted || +#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD + ngtcp2_conn_in_closing_period(conn->conn) || #else - buffer = rep.c->buffer; + ngtcp2_conn_is_in_closing_period(conn->conn) || #endif - (void)comm_point_send_udp_msg_if(rep.c, buffer, - (struct sockaddr*)&rep.remote_addr, - rep.remote_addrlen, &rep); +#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD + ngtcp2_conn_in_draining_period(conn->conn) +#else + ngtcp2_conn_is_in_draining_period(conn->conn) +#endif + ) { + if(verbosity >= VERB_ALGO) { + char remotestr[256]; + addr_to_str((void*)&conn->key.paddr.addr, + conn->key.paddr.addrlen, remotestr, + sizeof(remotestr)); + verbose(VERB_ALGO, "doq conn %s is deleted " + "after timeout", remotestr); + } + doq_delete_connection(doq_socket->cp, conn); + continue; } - if(!rep.c || rep.c->fd == -1) /* commpoint closed */ - break; + if(!doq_conn_handle_timeout(conn)) + doq_delete_connection(doq_socket->cp, conn); + else doq_done_setup_timer_and_write(doq_socket->cp, conn); } + + if(doq_socket_want_write(doq_socket->cp)) + doq_socket_write_enable(doq_socket->cp); + else doq_socket_write_disable(doq_socket->cp); + doq_pickup_timer(doq_socket->cp); } -#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */ void -comm_point_udp_callback(int fd, short event, void* arg) +comm_point_doq_callback(int fd, short event, void* arg) { - struct comm_reply rep; - ssize_t rcv; - int i; - struct sldns_buffer *buffer; + struct comm_point* c; + struct doq_pkt_addr paddr; + int i, pkt_continue, err_drop; + struct doq_conn* conn; + struct ngtcp2_pkt_info pi; + size_t count, num_len; - rep.c = (struct comm_point*)arg; - log_assert(rep.c->type == comm_udp); + c = (struct comm_point*)arg; + log_assert(c->type == comm_doq); - if(!(event&UB_EV_READ)) - return; - log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); - ub_comm_base_now(rep.c->ev->base); - for(i=0; ibuffer); - rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr); - log_assert(fd != -1); - log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); - rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), - sldns_buffer_remaining(rep.c->buffer), MSG_DONTWAIT, - (struct sockaddr*)&rep.remote_addr, &rep.remote_addrlen); - if(rcv == -1) { -#ifndef USE_WINSOCK - if(errno != EAGAIN && errno != EINTR - && udp_recv_needs_log(errno)) - log_err("recvfrom %d failed: %s", - fd, strerror(errno)); + log_assert(c && c->doq_socket->pkt_buf && c->fd == fd); + ub_comm_base_now(c->ev->base); + + /* see if there is a blocked packet, and send that if possible. + * do not attempt to read yet, even if possible, that would just + * push more answers in reply to those read packets onto the list + * of written replies. First attempt to clear the write content out. + * That keeps the memory usage from bloating up. */ + if(c->doq_socket->have_blocked_pkt) { + if(!doq_write_blocked_pkt(c)) { + /* this write has also blocked, attempt to write + * later. Make sure the event listens to write + * events. */ + if(!c->doq_socket->event_has_write) + doq_socket_write_enable(c); + doq_pickup_timer(c); + return; + } + } + + /* see if there is write interest */ + count = 0; + num_len = doq_write_list_length(c); + while((conn = doq_pop_write_conn(c)) != NULL) { + if(conn->is_deleted || +#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD + ngtcp2_conn_in_closing_period(conn->conn) || #else - if(WSAGetLastError() != WSAEINPROGRESS && - WSAGetLastError() != WSAECONNRESET && - WSAGetLastError()!= WSAEWOULDBLOCK && - udp_recv_needs_log(WSAGetLastError())) - log_err("recvfrom failed: %s", - wsa_strerror(WSAGetLastError())); + ngtcp2_conn_is_in_closing_period(conn->conn) || #endif +#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD + ngtcp2_conn_in_draining_period(conn->conn) +#else + ngtcp2_conn_is_in_draining_period(conn->conn) +#endif + ) { + conn->doq_socket = NULL; + lock_basic_unlock(&conn->lock); + if(c->doq_socket->have_blocked_pkt) { + if(!c->doq_socket->event_has_write) + doq_socket_write_enable(c); + doq_pickup_timer(c); + return; + } + if(++count > num_len*2) + break; + continue; + } + if(verbosity >= VERB_ALGO) { + char remotestr[256]; + addr_to_str((void*)&conn->key.paddr.addr, + conn->key.paddr.addrlen, remotestr, + sizeof(remotestr)); + verbose(VERB_ALGO, "doq write connection %s %d", + remotestr, doq_sockaddr_get_port( + &conn->key.paddr.addr)); + } + if(doq_conn_write_streams(c, conn, &err_drop)) + err_drop = 0; + doq_done_with_write_cb(c, conn, err_drop); + if(c->doq_socket->have_blocked_pkt) { + if(!c->doq_socket->event_has_write) + doq_socket_write_enable(c); + doq_pickup_timer(c); return; } - sldns_buffer_skip(rep.c->buffer, rcv); - sldns_buffer_flip(rep.c->buffer); - rep.srctype = 0; - rep.is_proxied = 0; + /* Stop overly long write lists that are created + * while we are processing. Do those next time there + * is a write callback. Stops long loops, and keeps + * fair for other events. */ + if(++count > num_len*2) + break; + } - if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer, - &rep, 0)) { - log_err("proxy_protocol: could not consume PROXYv2 header"); + /* check for data to read */ + if((event&UB_EV_READ)!=0) + for(i=0; idoq_socket->have_blocked_pkt) { + if(!c->doq_socket->event_has_write) + doq_socket_write_enable(c); + doq_pickup_timer(c); return; } - if(!rep.is_proxied) { - rep.client_addrlen = rep.remote_addrlen; - memmove(&rep.client_addr, &rep.remote_addr, - rep.remote_addrlen); + sldns_buffer_clear(c->doq_socket->pkt_buf); + doq_pkt_addr_init(&paddr); + log_assert(fd != -1); + log_assert(sldns_buffer_remaining(c->doq_socket->pkt_buf) > 0); + if(!doq_recv(c, &paddr, &pkt_continue, &pi)) { + if(pkt_continue) + continue; + break; } - fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); - if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { - /* send back immediate reply */ -#ifdef USE_DNSCRYPT - buffer = rep.c->dnscrypt_buffer; + /* handle incoming packet from remote addr to localaddr */ + if(verbosity >= VERB_ALGO) { + char remotestr[256], localstr[256]; + addr_to_str((void*)&paddr.addr, paddr.addrlen, + remotestr, sizeof(remotestr)); + addr_to_str((void*)&paddr.localaddr, + paddr.localaddrlen, localstr, + sizeof(localstr)); + log_info("incoming doq packet from %s port %d on " + "%s port %d ifindex %d", + remotestr, doq_sockaddr_get_port(&paddr.addr), + localstr, + doq_sockaddr_get_port(&paddr.localaddr), + paddr.ifindex); + log_info("doq_recv length %d ecn 0x%x", + (int)sldns_buffer_limit(c->doq_socket->pkt_buf), + (int)pi.ecn); + } + + if(sldns_buffer_limit(c->doq_socket->pkt_buf) == 0) + continue; + + conn = NULL; + if(!doq_decode_pkt_header_negotiate(c, &paddr, &conn)) + continue; + if(!conn) { + if(!doq_accept(c, &paddr, &conn, &pi)) + continue; + if(!doq_conn_write_streams(c, conn, NULL)) { + doq_delete_connection(c, conn); + continue; + } + doq_done_setup_timer_and_write(c, conn); + continue; + } + if( +#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD + ngtcp2_conn_in_closing_period(conn->conn) #else - buffer = rep.c->buffer; + ngtcp2_conn_is_in_closing_period(conn->conn) #endif - (void)comm_point_send_udp_msg(rep.c, buffer, - (struct sockaddr*)&rep.remote_addr, - rep.remote_addrlen, 0); + ) { + if(!doq_conn_send_close(c, conn)) { + doq_delete_connection(c, conn); + } else { + doq_done_setup_timer_and_write(c, conn); + } + continue; } - if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for - another UDP port. Note rep.c cannot be reused with TCP fd. */ - break; + if( +#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD + ngtcp2_conn_in_draining_period(conn->conn) +#else + ngtcp2_conn_is_in_draining_period(conn->conn) +#endif + ) { + doq_done_setup_timer_and_write(c, conn); + continue; + } + if(!doq_conn_recv(c, &paddr, conn, &pi, NULL, &err_drop)) { + /* The receive failed, and if it also failed to send + * a close, drop the connection. That means it is not + * in the closing period. */ + if(err_drop) { + doq_delete_connection(c, conn); + } else { + doq_done_setup_timer_and_write(c, conn); + } + continue; + } + if(!doq_conn_write_streams(c, conn, &err_drop)) { + if(err_drop) { + doq_delete_connection(c, conn); + } else { + doq_done_setup_timer_and_write(c, conn); + } + continue; + } + doq_done_setup_timer_and_write(c, conn); + } + + /* see if we want to have more write events */ + verbose(VERB_ALGO, "doq check write enable"); + if(doq_socket_want_write(c)) + doq_socket_write_enable(c); + else doq_socket_write_disable(c); + doq_pickup_timer(c); +} + +/** create new doq server socket structure */ +static struct doq_server_socket* +doq_server_socket_create(struct doq_table* table, struct ub_randstate* rnd, + const char* ssl_service_key, const char* ssl_service_pem, + struct comm_point* c, struct comm_base* base, struct config_file* cfg) +{ + size_t doq_buffer_size = 4096; /* bytes buffer size, for one packet. */ + struct doq_server_socket* doq_socket; + doq_socket = calloc(1, sizeof(*doq_socket)); + if(!doq_socket) { + return NULL; + } + doq_socket->table = table; + doq_socket->rnd = rnd; + doq_socket->validate_addr = 1; + if(ssl_service_key == NULL || ssl_service_key[0]==0) { + log_err("doq server socket create: no tls-service-key"); + free(doq_socket); + return NULL; + } + if(ssl_service_pem == NULL || ssl_service_pem[0]==0) { + log_err("doq server socket create: no tls-service-pem"); + free(doq_socket); + return NULL; + } + doq_socket->ssl_service_key = strdup(ssl_service_key); + if(!doq_socket->ssl_service_key) { + free(doq_socket); + return NULL; + } + doq_socket->ssl_service_pem = strdup(ssl_service_pem); + if(!doq_socket->ssl_service_pem) { + free(doq_socket->ssl_service_key); + free(doq_socket); + return NULL; + } + doq_socket->ssl_verify_pem = NULL; + /* the doq_socket has its own copy of the static secret, as + * well as other config values, so that they do not need table.lock */ + doq_socket->static_secret_len = table->static_secret_len; + doq_socket->static_secret = memdup(table->static_secret, + table->static_secret_len); + if(!doq_socket->static_secret) { + free(doq_socket->ssl_service_key); + free(doq_socket->ssl_service_pem); + free(doq_socket->ssl_verify_pem); + free(doq_socket); + return NULL; + } + if(!doq_socket_setup_ctx(doq_socket)) { + free(doq_socket->ssl_service_key); + free(doq_socket->ssl_service_pem); + free(doq_socket->ssl_verify_pem); + free(doq_socket->static_secret); + free(doq_socket); + return NULL; + } + doq_socket->idle_timeout = table->idle_timeout; + doq_socket->sv_scidlen = table->sv_scidlen; + doq_socket->cp = c; + doq_socket->pkt_buf = sldns_buffer_new(doq_buffer_size); + if(!doq_socket->pkt_buf) { + free(doq_socket->ssl_service_key); + free(doq_socket->ssl_service_pem); + free(doq_socket->ssl_verify_pem); + free(doq_socket->static_secret); + SSL_CTX_free(doq_socket->ctx); + free(doq_socket); + return NULL; + } + doq_socket->blocked_pkt = sldns_buffer_new( + sldns_buffer_capacity(doq_socket->pkt_buf)); + if(!doq_socket->pkt_buf) { + free(doq_socket->ssl_service_key); + free(doq_socket->ssl_service_pem); + free(doq_socket->ssl_verify_pem); + free(doq_socket->static_secret); + SSL_CTX_free(doq_socket->ctx); + sldns_buffer_free(doq_socket->pkt_buf); + free(doq_socket); + return NULL; + } + doq_socket->blocked_paddr = calloc(1, + sizeof(*doq_socket->blocked_paddr)); + if(!doq_socket->blocked_paddr) { + free(doq_socket->ssl_service_key); + free(doq_socket->ssl_service_pem); + free(doq_socket->ssl_verify_pem); + free(doq_socket->static_secret); + SSL_CTX_free(doq_socket->ctx); + sldns_buffer_free(doq_socket->pkt_buf); + sldns_buffer_free(doq_socket->blocked_pkt); + free(doq_socket); + return NULL; + } + doq_socket->timer = comm_timer_create(base, doq_timer_cb, doq_socket); + if(!doq_socket->timer) { + free(doq_socket->ssl_service_key); + free(doq_socket->ssl_service_pem); + free(doq_socket->ssl_verify_pem); + free(doq_socket->static_secret); + SSL_CTX_free(doq_socket->ctx); + sldns_buffer_free(doq_socket->pkt_buf); + sldns_buffer_free(doq_socket->blocked_pkt); + free(doq_socket->blocked_paddr); + free(doq_socket); + return NULL; + } + memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time)); + comm_base_timept(base, &doq_socket->now_tt, &doq_socket->now_tv); + doq_socket->cfg = cfg; + return doq_socket; +} + +/** delete doq server socket structure */ +static void +doq_server_socket_delete(struct doq_server_socket* doq_socket) +{ + if(!doq_socket) + return; + free(doq_socket->static_secret); + SSL_CTX_free(doq_socket->ctx); +#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT + free(doq_socket->quic_method); +#endif + free(doq_socket->ssl_service_key); + free(doq_socket->ssl_service_pem); + free(doq_socket->ssl_verify_pem); + sldns_buffer_free(doq_socket->pkt_buf); + sldns_buffer_free(doq_socket->blocked_pkt); + free(doq_socket->blocked_paddr); + comm_timer_delete(doq_socket->timer); + free(doq_socket); +} + +/** find repinfo in the doq table */ +static struct doq_conn* +doq_lookup_repinfo(struct doq_table* table, struct comm_reply* repinfo) +{ + struct doq_conn* conn; + struct doq_conn_key key; + doq_conn_key_from_repinfo(&key, repinfo); + lock_rw_rdlock(&table->lock); + conn = doq_conn_find(table, &key.paddr.addr, + key.paddr.addrlen, &key.paddr.localaddr, + key.paddr.localaddrlen, key.paddr.ifindex, key.dcid, + key.dcidlen); + if(conn) { + lock_basic_lock(&conn->lock); + lock_rw_unlock(&table->lock); + return conn; + } + lock_rw_unlock(&table->lock); + return NULL; +} + +/** doq find connection and stream. From inside callbacks from worker. */ +static int +doq_lookup_conn_stream(struct comm_reply* repinfo, struct comm_point* c, + struct doq_conn** conn, struct doq_stream** stream) +{ + if(c->doq_socket->current_conn) { + *conn = c->doq_socket->current_conn; + } else { + *conn = doq_lookup_repinfo(c->doq_socket->table, repinfo); + if((*conn) && (*conn)->is_deleted) { + lock_basic_unlock(&(*conn)->lock); + *conn = NULL; + } + if(*conn) { + (*conn)->doq_socket = c->doq_socket; + } + } + if(!*conn) { + *stream = NULL; + return 0; + } + *stream = doq_stream_find(*conn, repinfo->doq_streamid); + if(!*stream) { + if(!c->doq_socket->current_conn) { + /* Not inside callbacks, we have our own lock on conn. + * Release it. */ + lock_basic_unlock(&(*conn)->lock); + } + return 0; + } + if((*stream)->is_closed) { + /* stream is closed, ignore reply or drop */ + if(!c->doq_socket->current_conn) { + /* Not inside callbacks, we have our own lock on conn. + * Release it. */ + lock_basic_unlock(&(*conn)->lock); + } + return 0; + } + return 1; +} + +/** doq send a reply from a comm reply */ +static void +doq_socket_send_reply(struct comm_reply* repinfo) +{ + struct doq_conn* conn; + struct doq_stream* stream; + log_assert(repinfo->c->type == comm_doq); + if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) { + verbose(VERB_ALGO, "doq: send_reply but %s is gone", + (conn?"stream":"connection")); + /* No stream, it may have been closed. */ + /* Drop the reply, it cannot be sent. */ + return; + } + if(!doq_stream_send_reply(conn, stream, repinfo->c->buffer)) + doq_stream_close(conn, stream, 1); + if(!repinfo->c->doq_socket->current_conn) { + /* Not inside callbacks, we have our own lock on conn. + * Release it. */ + doq_done_with_conn_cb(repinfo->c, conn); + /* since we sent a reply, or closed it, the assumption is + * that there is something to write, so enable write event. + * It waits until the write event happens to write the + * streams with answers, this allows some answers to be + * answered before the event loop reaches the doq fd, in + * repinfo->c->fd, and that collates answers. That would + * not happen if we write doq packets right now. */ + doq_socket_write_enable(repinfo->c); + } +} + +/** doq drop a reply from a comm reply */ +static void +doq_socket_drop_reply(struct comm_reply* repinfo) +{ + struct doq_conn* conn; + struct doq_stream* stream; + log_assert(repinfo->c->type == comm_doq); + if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) { + verbose(VERB_ALGO, "doq: drop_reply but %s is gone", + (conn?"stream":"connection")); + /* The connection or stream is already gone. */ + return; + } + doq_stream_close(conn, stream, 1); + if(!repinfo->c->doq_socket->current_conn) { + /* Not inside callbacks, we have our own lock on conn. + * Release it. */ + doq_done_with_conn_cb(repinfo->c, conn); + doq_socket_write_enable(repinfo->c); } } +#endif /* HAVE_NGTCP2 */ int adjusted_tcp_timeout(struct comm_point* c) { @@ -4081,6 +5857,96 @@ comm_point_create_udp_ancil(struct comm_base *base, int fd, } #endif +struct comm_point* +comm_point_create_doq(struct comm_base *base, int fd, sldns_buffer* buffer, + comm_point_callback_type* callback, void* callback_arg, + struct unbound_socket* socket, struct doq_table* table, + struct ub_randstate* rnd, const char* ssl_service_key, + const char* ssl_service_pem, struct config_file* cfg) +{ +#ifdef HAVE_NGTCP2 + struct comm_point* c = (struct comm_point*)calloc(1, + sizeof(struct comm_point)); + short evbits; + if(!c) + return NULL; + c->ev = (struct internal_event*)calloc(1, + sizeof(struct internal_event)); + if(!c->ev) { + free(c); + return NULL; + } + c->ev->base = base; + c->fd = fd; + c->buffer = buffer; + c->timeout = NULL; + c->tcp_is_reading = 0; + c->tcp_byte_count = 0; + c->tcp_parent = NULL; + c->max_tcp_count = 0; + c->cur_tcp_count = 0; + c->tcp_handlers = NULL; + c->tcp_free = NULL; + c->type = comm_doq; + c->tcp_do_close = 0; + c->do_not_close = 0; + c->tcp_do_toggle_rw = 0; + c->tcp_check_nb_connect = 0; +#ifdef USE_MSG_FASTOPEN + c->tcp_do_fastopen = 0; +#endif +#ifdef USE_DNSCRYPT + c->dnscrypt = 0; + c->dnscrypt_buffer = NULL; +#endif +#ifdef HAVE_NGTCP2 + c->doq_socket = doq_server_socket_create(table, rnd, ssl_service_key, + ssl_service_pem, c, base, cfg); + if(!c->doq_socket) { + log_err("could not create doq comm_point"); + comm_point_delete(c); + return NULL; + } +#endif + c->inuse = 0; + c->callback = callback; + c->cb_arg = callback_arg; + c->socket = socket; + c->pp2_enabled = 0; + c->pp2_header_state = pp2_header_none; + evbits = UB_EV_READ | UB_EV_PERSIST; + /* ub_event stuff */ + c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, + comm_point_doq_callback, c); + if(c->ev->ev == NULL) { + log_err("could not baseset udp event"); + comm_point_delete(c); + return NULL; + } + if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { + log_err("could not add udp event"); + comm_point_delete(c); + return NULL; + } + c->event_added = 1; + return c; +#else + /* no libngtcp2, so no QUIC support */ + (void)base; + (void)buffer; + (void)callback; + (void)callback_arg; + (void)socket; + (void)rnd; + (void)table; + (void)ssl_service_key; + (void)ssl_service_pem; + (void)cfg; + sock_close(fd); + return NULL; +#endif /* HAVE_NGTCP2 */ +} + static struct comm_point* comm_point_create_tcp_handler(struct comm_base *base, struct comm_point* parent, size_t bufsize, @@ -4749,11 +6615,29 @@ comm_point_delete(struct comm_point* c) http2_session_delete(c->h2_session); } } +#ifdef HAVE_NGTCP2 + if(c->doq_socket) + doq_server_socket_delete(c->doq_socket); +#endif ub_event_free(c->ev->ev); free(c->ev); free(c); } +#ifdef USE_DNSTAP +static void +send_reply_dnstap(struct dt_env* dtenv, + struct sockaddr* addr, socklen_t addrlen, + struct sockaddr_storage* client_addr, socklen_t client_addrlen, + enum comm_point_type type, void* ssl, sldns_buffer* buffer) +{ + log_addr(VERB_ALGO, "from local addr", (void*)addr, addrlen); + log_addr(VERB_ALGO, "response to client", client_addr, client_addrlen); + dt_msg_send_client_response(dtenv, client_addr, + (struct sockaddr_storage*)addr, type, ssl, buffer); +} +#endif + void comm_point_send_reply(struct comm_reply *repinfo) { @@ -4778,24 +6662,44 @@ comm_point_send_reply(struct comm_reply *repinfo) repinfo->remote_addrlen, 0); #ifdef USE_DNSTAP /* - * sending src (client)/dst (local service) addresses over DNSTAP from udp callback + * sending src (client)/dst (local service) addresses over + * DNSTAP from udp callback */ if(repinfo->c->dtenv != NULL && repinfo->c->dtenv->log_client_response_messages) { - log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr, repinfo->c->socket->addrlen); - log_addr(VERB_ALGO, "response to client", &repinfo->client_addr, repinfo->client_addrlen); - dt_msg_send_client_response(repinfo->c->dtenv, &repinfo->client_addr, (void*)repinfo->c->socket->addr, repinfo->c->type, repinfo->c->ssl, repinfo->c->buffer); + send_reply_dnstap(repinfo->c->dtenv, + repinfo->c->socket->addr, + repinfo->c->socket->addrlen, + &repinfo->client_addr, repinfo->client_addrlen, + repinfo->c->type, repinfo->c->ssl, + repinfo->c->buffer); } #endif } else { #ifdef USE_DNSTAP + struct dt_env* dtenv = +#ifdef HAVE_NGTCP2 + repinfo->c->doq_socket + ?repinfo->c->dtenv: +#endif + repinfo->c->tcp_parent->dtenv; + struct sldns_buffer* dtbuffer = repinfo->c->tcp_req_info + ?repinfo->c->tcp_req_info->spool_buffer + :repinfo->c->buffer; +#ifdef USE_DNSCRYPT + if(repinfo->c->dnscrypt && repinfo->is_dnscrypted) + dtbuffer = repinfo->c->buffer; +#endif /* - * sending src (client)/dst (local service) addresses over DNSTAP from TCP callback + * sending src (client)/dst (local service) addresses over + * DNSTAP from other callbacks */ - if(repinfo->c->tcp_parent->dtenv != NULL && repinfo->c->tcp_parent->dtenv->log_client_response_messages) { - log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr, repinfo->c->socket->addrlen); - log_addr(VERB_ALGO, "response to client", &repinfo->client_addr, repinfo->client_addrlen); - dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv, &repinfo->client_addr, (void*)repinfo->c->socket->addr, repinfo->c->type, repinfo->c->ssl, - ( repinfo->c->tcp_req_info? repinfo->c->tcp_req_info->spool_buffer: repinfo->c->buffer )); + if(dtenv != NULL && dtenv->log_client_response_messages) { + send_reply_dnstap(dtenv, + repinfo->c->socket->addr, + repinfo->c->socket->addrlen, + &repinfo->client_addr, repinfo->client_addrlen, + repinfo->c->type, repinfo->c->ssl, + dtbuffer); } #endif if(repinfo->c->tcp_req_info) { @@ -4811,6 +6715,10 @@ comm_point_send_reply(struct comm_reply *repinfo) comm_point_start_listening(repinfo->c, -1, adjusted_tcp_timeout(repinfo->c)); return; +#ifdef HAVE_NGTCP2 + } else if(repinfo->c->doq_socket) { + doq_socket_send_reply(repinfo); +#endif } else { comm_point_start_listening(repinfo->c, -1, adjusted_tcp_timeout(repinfo->c)); @@ -4838,6 +6746,11 @@ comm_point_drop_reply(struct comm_reply* repinfo) } reclaim_http_handler(repinfo->c); return; +#ifdef HAVE_NGTCP2 + } else if(repinfo->c->type == comm_doq) { + doq_socket_drop_reply(repinfo); + return; +#endif } reclaim_tcp_handler(repinfo->c); } diff --git a/util/netevent.h b/util/netevent.h index 6f43ce56c..acc4887b1 100644 --- a/util/netevent.h +++ b/util/netevent.h @@ -65,6 +65,9 @@ #ifdef HAVE_NGHTTP2_NGHTTP2_H #include #endif +#ifdef HAVE_NGTCP2 +#include +#endif struct sldns_buffer; struct comm_point; @@ -72,6 +75,11 @@ struct comm_reply; struct tcl_list; struct ub_event_base; struct unbound_socket; +struct doq_server_socket; +struct doq_table; +struct doq_conn; +struct config_file; +struct ub_randstate; struct mesh_state; struct mesh_area; @@ -105,6 +113,8 @@ typedef int comm_point_callback_type(struct comm_point*, void*, int, #define NETEVENT_SLOW_ACCEPT_TIME 2000 /** timeout to slow down log print, so it does not spam the logs, in sec */ #define SLOW_LOG_TIME 10 +/** for doq, the maximum dcid length, in ngtcp2 it is 20. */ +#define DOQ_MAX_CIDLEN 24 /** * A communication point dispatcher. Thread specific. @@ -164,6 +174,19 @@ struct comm_reply { struct sockaddr_storage client_addr; /** the original address length */ socklen_t client_addrlen; +#ifdef HAVE_NGTCP2 + /** the doq ifindex, together with addr and localaddr in pktinfo, + * and dcid makes the doq_conn_key to find the connection */ + int doq_ifindex; + /** the doq dcid, the connection id used to find the connection */ + uint8_t doq_dcid[DOQ_MAX_CIDLEN]; + /** the length of the doq dcid */ + size_t doq_dcidlen; + /** the doq stream id where the query came in on */ + int64_t doq_streamid; + /** port number for doq */ + int doq_srcport; +#endif /* HAVE_NGTCP2 */ }; /** @@ -266,6 +289,11 @@ struct comm_point { /** maximum number of HTTP/2 streams per connection. Send in HTTP/2 * SETTINGS frame. */ uint32_t http2_max_streams; + /* -------- DoQ ------- */ +#ifdef HAVE_NGTCP2 + /** the doq server socket, with list of doq connections */ + struct doq_server_socket* doq_socket; +#endif /* -------- dnstap ------- */ /** the dnstap environment */ @@ -281,6 +309,8 @@ struct comm_point { comm_tcp, /** HTTP handler socket */ comm_http, + /** DOQ handler socket */ + comm_doq, /** AF_UNIX socket - for internal commands. */ comm_local, /** raw - not DNS format - for pipe readers and writers */ @@ -552,6 +582,30 @@ struct comm_point* comm_point_create_udp_ancil(struct comm_base* base, int fd, struct sldns_buffer* buffer, int pp2_enabled, comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket); +/** + * Create an UDP comm point for DoQ. Calls malloc. + * setups the structure with the parameters you provide. + * @param base: in which base to alloc the commpoint. + * @param fd : file descriptor of open UDP socket. + * @param buffer: shared buffer by UDP sockets from this thread. + * @param callback: callback function pointer. + * @param callback_arg: will be passed to your callback function. + * @param socket: and opened socket properties will be passed to your callback function. + * @param table: the doq connection table for the host. + * @param rnd: random generator to use. + * @param ssl_service_key: the ssl service key file. + * @param ssl_service_pem: the ssl service pem file. + * @param cfg: config file struct. + * @return: returns the allocated communication point. NULL on error. + * Sets timeout to NULL. Turns off TCP options. + */ +struct comm_point* comm_point_create_doq(struct comm_base* base, + int fd, struct sldns_buffer* buffer, + comm_point_callback_type* callback, void* callback_arg, + struct unbound_socket* socket, struct doq_table* table, + struct ub_randstate* rnd, const char* ssl_service_key, + const char* ssl_service_pem, struct config_file* cfg); + /** * Create a TCP listener comm point. Calls malloc. * Setups the structure with the parameters you provide. @@ -821,6 +875,16 @@ void comm_point_udp_callback(int fd, short event, void* arg); */ void comm_point_udp_ancil_callback(int fd, short event, void* arg); +/** + * This routine is published for checks and tests, and is only used internally. + * handle libevent callback for doq comm point. + * @param fd: file descriptor. + * @param event: event bits from libevent: + * EV_READ, EV_WRITE, EV_SIGNAL, EV_TIMEOUT. + * @param arg: the comm_point structure. + */ +void comm_point_doq_callback(int fd, short event, void* arg); + /** * This routine is published for checks and tests, and is only used internally. * handle libevent callback for tcp accept comm point @@ -958,6 +1022,106 @@ void http2_stream_add_meshstate(struct http2_stream* h2_stream, /** Remove mesh state from stream. When the mesh state has been removed. */ void http2_stream_remove_mesh_state(struct http2_stream* h2_stream); +/** + * DoQ socket address storage for IP4 or IP6 address. Smaller than + * the sockaddr_storage because not with af_unix pathnames. + */ +struct doq_addr_storage { + union { + struct sockaddr_in in; +#ifdef AF_INET6 + struct sockaddr_in6 in6; +#endif + } sockaddr; +}; + +/** + * The DoQ server socket information, for DNS over QUIC. + */ +struct doq_server_socket { + /** the doq connection table */ + struct doq_table* table; + /** random generator */ + struct ub_randstate* rnd; + /** if address validation is enabled */ + uint8_t validate_addr; + /** the ssl service key file */ + char* ssl_service_key; + /** the ssl service pem file */ + char* ssl_service_pem; + /** the ssl verify pem file */ + char* ssl_verify_pem; + /** the server scid length */ + int sv_scidlen; + /** the idle timeout in nanoseconds */ + uint64_t idle_timeout; + /** the static secret for the server */ + uint8_t* static_secret; + /** length of the static secret */ + size_t static_secret_len; + /** ssl context, SSL_CTX* */ + void* ctx; +#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT + /** quic method functions, SSL_QUIC_METHOD* */ + void* quic_method; +#endif + /** the comm point for this doq server socket */ + struct comm_point* cp; + /** the buffer for packets, doq in and out */ + struct sldns_buffer* pkt_buf; + /** the current doq connection when we are in callbacks to worker, + * so that we have the already locked structure at our disposal. */ + struct doq_conn* current_conn; + /** if the callback event on the fd has write flags */ + uint8_t event_has_write; + /** if there is a blocked packet in the blocked_pkt buffer */ + int have_blocked_pkt; + /** store blocked packet, a packet that could not be send on the + * nonblocking socket. It has to be sent later, when the write on + * the udp socket unblocks. */ + struct sldns_buffer* blocked_pkt; +#ifdef HAVE_NGTCP2 + /** the ecn info for the blocked packet, congestion information. */ + struct ngtcp2_pkt_info blocked_pkt_pi; +#endif + /** the packet destination for the blocked packet. */ + struct doq_pkt_addr* blocked_paddr; + /** timer for this worker on this comm_point to wait on. */ + struct comm_timer* timer; + /** the timer that is marked by the doq_socket as waited on. */ + struct timeval marked_time; + /** the current time for use by time functions, time_t. */ + time_t* now_tt; + /** the current time for use by time functions, timeval. */ + struct timeval* now_tv; + /** config file for the worker. */ + struct config_file* cfg; +}; + +/** + * DoQ packet address information. From pktinfo, stores local and remote + * address and ifindex, so the packet can be sent there. + */ +struct doq_pkt_addr { + /** the remote addr, and local addr */ + struct doq_addr_storage addr, localaddr; + /** length of addr and length of localaddr */ + socklen_t addrlen, localaddrlen; + /** interface index from pktinfo ancillary information */ + int ifindex; +}; + +/** Initialize the pkt addr with lengths set to sizeof. That is ready for + * a call to recv. */ +void doq_pkt_addr_init(struct doq_pkt_addr* paddr); + +/** send doq packet over UDP. */ +void doq_send_pkt(struct comm_point* c, struct doq_pkt_addr* paddr, + uint32_t ecn); + +/** doq timer callback function. */ +void doq_timer_cb(void* arg); + /** * This routine is published for checks and tests, and is only used internally. * handle libevent callback for timer comm. diff --git a/util/storage/dnstree.c b/util/storage/dnstree.c index eef393f91..93a0cc4c3 100644 --- a/util/storage/dnstree.c +++ b/util/storage/dnstree.c @@ -75,7 +75,7 @@ int addr_tree_addrport_compare(const void* k1, const void* k2) { struct addr_tree_node* n1 = (struct addr_tree_node*)k1; struct addr_tree_node* n2 = (struct addr_tree_node*)k2; - return sockaddr_cmp(&n1->addr, n1->addrlen, &n2->addr, + return sockaddr_cmp_scopeid(&n1->addr, n1->addrlen, &n2->addr, n2->addrlen); } diff --git a/validator/val_neg.c b/validator/val_neg.c index 52bc68387..b5b678fde 100644 --- a/validator/val_neg.c +++ b/validator/val_neg.c @@ -823,7 +823,8 @@ void neg_insert_data(struct val_neg_cache* neg, it <= neg->nsec3_max_iter && (h != zone->nsec3_hash || it != zone->nsec3_iter || slen != zone->nsec3_saltlen || - memcmp(zone->nsec3_salt, s, slen) != 0)) { + (slen != 0 && zone->nsec3_salt && s + && memcmp(zone->nsec3_salt, s, slen) != 0))) { if(slen > 0) { uint8_t* sa = memdup(s, slen); @@ -1206,7 +1207,8 @@ neg_params_ok(struct val_neg_zone* zone, struct ub_packed_rrset_key* rrset) return 0; return (h == zone->nsec3_hash && it == zone->nsec3_iter && slen == zone->nsec3_saltlen && - memcmp(zone->nsec3_salt, s, slen) == 0); + (slen != 0 && zone->nsec3_salt && s + && memcmp(zone->nsec3_salt, s, slen) == 0)); } /** get next closer for nsec3 proof */ diff --git a/validator/val_nsec3.c b/validator/val_nsec3.c index e790e9982..998fcc4e3 100644 --- a/validator/val_nsec3.c +++ b/validator/val_nsec3.c @@ -565,7 +565,8 @@ nsec3_get_hashed(sldns_buffer* buf, uint8_t* nm, size_t nmlen, int algo, sldns_buffer_clear(buf); sldns_buffer_write(buf, nm, nmlen); query_dname_tolower(sldns_buffer_begin(buf)); - sldns_buffer_write(buf, salt, saltlen); + if(saltlen != 0) + sldns_buffer_write(buf, salt, saltlen); sldns_buffer_flip(buf); hash_len = nsec3_hash_algo_size_supported(algo); if(hash_len == 0) { @@ -580,7 +581,8 @@ nsec3_get_hashed(sldns_buffer* buf, uint8_t* nm, size_t nmlen, int algo, for(i=0; i