From b9658b5f4441a2de523a3b9e74dd53b93d47c74e Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Tue, 20 Jan 2026 15:37:52 -0400 Subject: [PATCH 1/2] Fix PTR response encoding --- src/dns/rr.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dns/rr.hpp b/src/dns/rr.hpp index eea0aba45..787a80dbe 100644 --- a/src/dns/rr.hpp +++ b/src/dns/rr.hpp @@ -135,7 +135,7 @@ namespace srouter::dns struct RR_PTR : RR_target { using RR_target::RR_target; - RRType rr_type() const override { return RRType::A; } + RRType rr_type() const override { return RRType::PTR; } }; struct RR_CNAME : RR_target { From 7670eb6de65a62c05132c947a1a52cca6f8e6d81 Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Tue, 20 Jan 2026 20:57:28 -0400 Subject: [PATCH 2/2] DNS: Add SOA records to negative responses Without SOA records, caching DNS servers in front of Session Router (such as you might use on a local network for network-side Session Router connectivity) cannot cache the result, but most of the NXDOMAIN and NODATAs we produce are and should be cachable. Unlike positive records, which have their own TTL, negative results requiring hacking the nack TTL through an SOA record in the authority section from which the MINIMUM value. This commit adds it so that we are doing the right thing. This should also prevent PTR records from falling back to upstream DNS (where the entire private range gets NACKed) by being properly authoritative for the local IPs we serve even when those IPs aren't currently mapped. --- src/dns/handler.cpp | 112 ++++++++++++++++++++++++++++++++++++++++++-- src/dns/handler.hpp | 11 +++++ src/dns/message.cpp | 5 +- src/dns/message.hpp | 2 +- src/dns/rr.cpp | 35 ++++++++++++++ src/dns/rr.hpp | 31 ++++++++++++ 6 files changed, 189 insertions(+), 7 deletions(-) diff --git a/src/dns/handler.cpp b/src/dns/handler.cpp index 19f36627a..5bd279882 100644 --- a/src/dns/handler.cpp +++ b/src/dns/handler.cpp @@ -134,13 +134,27 @@ namespace srouter::dns // is this firefox looking for their backdoor record? if (q.name() == "use-application-dns.net") + { // yea it is, let's turn off DoH because god is dead. + add_nx_soa(msg, "use-application-dns.net", 30s); return reply(msg.nxdomain().encode(tcp)); // press F to pay respects and send it back where it came from + } // Not for us, so forward to upstream handler forward(std::move(msg), std::move(reply), tcp); } + void RequestHandler::add_nx_soa(Message& m, std::string domain, std::chrono::seconds ttl) + { + m.authorities.push_back(std::make_unique( + domain, + 10min /* ttl of the SOA record itself */, + "localhost.sesh", + "sr.localhost.sesh", + _soa_serial++, + ttl)); + } + bool RequestHandler::handle_local(ReplyCallback& reply, Message& msg, std::string qname, bool tcp) { // hook any PTR (reverse DNS) lookups for our local ranges @@ -255,7 +269,7 @@ namespace srouter::dns cname_only = q.qtype == dns::RRType::CNAME, tcp]( std::optional maybe_netaddr, - bool /*assertive*/, + bool assertive, std::chrono::milliseconds ttl) mutable { auto& msg = *msg_ptr; msg.set_rr_name(lookup); @@ -275,10 +289,10 @@ namespace srouter::dns } return; } - // TODO FIXME: if `assertive` is true then we can provide a TTL for this failure - // (via an SOA authority record). (When not assertive we shouldn't do so, - // because not having an SOA TTL means a downstream recursive resolver shouldn't - // cache the negative response). + + if (assertive) + add_nx_soa(msg, "loki", std::chrono::floor(ttl)); + reply(msg.nxdomain().encode(tcp)); }); return true; @@ -302,7 +316,10 @@ namespace srouter::dns fmt::join(rc->version(), "."), rc->addr(), rc->timestamp().time_since_epoch().count())); } else + { + add_nx_soa(msg, std::string{RELAY_TLD}, 5s); msg.nxdomain(); + } } // TXT on path.PUBKEY.{sesh,snode} returns the current path info to that node, if a @@ -337,11 +354,16 @@ namespace srouter::dns else { log::warning(logcat, "Failed to parse network address {}.{} for path query", hostname, tld); + // If this name was invalid, allow the NXDOMAIN to be cached: + add_nx_soa(msg, tld, 30s); msg.nxdomain(); } } else + { + add_nx_soa(msg, tld, 30s); msg.nxdomain(); + } reply(msg.encode(tcp)); return true; } @@ -384,9 +406,20 @@ namespace srouter::dns // the proper DNS way to say "something exists at this address, but not with the // type you requested requested", as opposed to this nx_reply below, which means // "this record does not exist"). + // + // In order for this NODATA result to be properly cacheable, we need an SOA + // record included. It'll also never work in the future, so we can use a + // relatively longer negative TTL via the SOA. + add_nx_soa(msg, tld, 5min); } else + { + // We failed to initiate a sesssion for some reason, which likely means there's + // something invalid in what you requested, so make this response authoritative + // and cacheable. + add_nx_soa(msg, tld, 15s); msg.nxdomain(); + } reply(msg.encode(tcp)); return true; @@ -413,6 +446,8 @@ namespace srouter::dns msg->add_reply(srv); } else + // Re-trying the request could initiate a new lookup, so *don't* put an + // SOA on this so that the NACK isn't cached. msg->nxdomain(); reply(msg->encode(tcp)); @@ -423,6 +458,7 @@ namespace srouter::dns // If we got through everything above without answering then they requested something weird // (unhandled RR type, perhaps) and so let's just give an NXDOMAIN back: + add_nx_soa(msg, tld, 30s); reply(msg.nxdomain().encode(tcp)); return true; } @@ -444,7 +480,73 @@ namespace srouter::dns if (mapped) msg.add_ptr_reply(mapped->to_string()); else + { + // DNS NXDOMAIN records aren't cacheable unless they also have a pseudo-TTL, but there + // is no direct TTL for does-not-exist: instead it is carried in an SOA record, so make + // one of those here with a few seconds TTL so that it is (briefly) cacheable, and so + // that it not currently existing is treated as an authoritative response. + + std::string auth_name; + if (auto* addr4 = std::get_if(&*ip)) + { + auto net = _router.tun_endpoint()->get_ipv4_network().to_range(); + + // We were asked for A.B.C.D, and so in the SOA we need to indicate what we are + // authoritative over. For a /8, /16, or /24 this is easy, just A.in-addr.arpa or + // B.A.in-addr.arpa or C.B.A.in-addr.arpa but for, say, a /18 this is more + // complicated: we need to round up the netmask to the next multiple of 8 + // (effectively shrinking the network size) and then return SOA for that. + // + // For example, if we are responsible for 10.1.0.0/18 that means 10.1.0.* through + // 10.1.63.* but not (e.g.) 10.1.65.*, and so we need to "round up" the netmask to + // /24 and then assert authority over the /24 that included the asked for record. + // (And so technically we could have 64 different SOA records, but that's okay + // because we produce them on demand). + + uint8_t mask_up = (std::clamp(net.mask, 1, 32) + 7) / 8 * 8; + assert(mask_up % 8 == 0); + + uint32_t soa_addr = (*addr4 / mask_up).ip.addr >> (32 - mask_up); + for (uint8_t m = mask_up; m > 0; m -= 8) + { + fmt::format_to(std::back_inserter(auth_name), "{}.", soa_addr % 256); + soa_addr >>= 8; + } + auth_name += ".in-addr.arpa"; + } + else + { + auto& addr6 = std::get(*ip); + auto net = _router.tun_endpoint()->get_ipv6_network().to_range(); + + // Similar to the above, but everything operators on 4-bit hex nybbles rather than 8-bit + // integers. e.g. + // abcd:234::7 is 7.0.0.0........0.4.3.2.0.d.c.b.a + // and so we have to do the same SOA subdivision (basically our local range netmask + // might not be a multiple of 4). + uint8_t mask_up = (std::clamp(net.mask, 1, 128) + 3) / 4 * 4; + assert(mask_up % 4 == 0); + + // Rather than calculating this with lots of bit fiddling (complicated by the fact + // that we need to store the address in two uint64_ts), instead just cheat by using + // a string representation. Probably less efficient, but this is not a hot loop + // path. + auto soa_base = (addr6 / mask_up).ip; + + // Start with a full, 32-digit raw hex representation of the ipv6 addr (not the + // usual notation with :, just raw, full width hex digits): + auto full = fmt::format("{:016x}{:016x}", soa_base.hi, soa_base.lo); + + // chop the netmasked hex digits off the end: + full.resize(mask_up / 4); + + // now just reverse the remaining hex digits and join with .'s and the suffix: + auth_name = fmt::format("{}.ip6.arpa", fmt::join(full.rbegin(), full.rend(), ".")); + } + msg.nxdomain(); + add_nx_soa(msg, std::move(auth_name), 5s); + } reply(msg.encode(tcp)); diff --git a/src/dns/handler.hpp b/src/dns/handler.hpp index 84458a5ad..aa39513fd 100644 --- a/src/dns/handler.hpp +++ b/src/dns/handler.hpp @@ -40,6 +40,11 @@ namespace srouter::dns // something outside of Session Router domains. std::optional _unbound; + // Our serial for SOA records (typically only included for negative responses, i.e. + // something we are responsible for that doesn't exist). We increment this every time it is + // used to avoid caching issues. + uint32_t _soa_serial = 1; + // Called to check if the request is for a local name (i.e. .sesh, .snode, .loki, or a PTR // record for one of the addresses in our tun). If so, this handles the request and returns // true; otherwise returns false. @@ -51,6 +56,12 @@ namespace srouter::dns // Answers the question recursively via our configured upstream DNS servers (if any) void forward(Message&& m, ReplyCallback&& reply, bool tcp); + + // Adds an SOA authority record to the message; generally you should only do this for + // negative replies (i.e. record does not exist) when you want the lack of record to be + // cachable for up to `ttl`. Including it in other responses is pointless: positive results + // have their own TTL, and not including an SOA should prevent caching. + void add_nx_soa(Message& m, std::string domain, std::chrono::seconds ttl); }; } // namespace srouter::dns diff --git a/src/dns/message.cpp b/src/dns/message.cpp index 0edf61d18..2cfa82449 100644 --- a/src/dns/message.cpp +++ b/src/dns/message.cpp @@ -49,7 +49,7 @@ namespace srouter::dns hdr_fields, question ? uint16_t{1} : uint16_t{0}, static_cast(answers.size()), - static_cast(0 /*authorities.size()*/), + static_cast(authorities.size()), static_cast(additional_edns ? 1 : 0 /*additional.size()*/)); if (question) @@ -66,6 +66,9 @@ namespace srouter::dns for (auto& a : answers) a->encode(buf, prev_names, buf_offset); + for (auto& a : authorities) + a->encode(buf, prev_names, buf_offset); + if (additional_edns) additional_edns->encode(buf, prev_names, buf_offset); } diff --git a/src/dns/message.hpp b/src/dns/message.hpp index 0b0cd5471..f640ee36f 100644 --- a/src/dns/message.hpp +++ b/src/dns/message.hpp @@ -106,9 +106,9 @@ namespace srouter std::optional question; std::vector> answers; + std::vector> authorities; // Currently unused: - // std::vector authorities; // std::vector additional; // Currently the only additional record we do anything with is the OPT section for diff --git a/src/dns/rr.cpp b/src/dns/rr.cpp index f9d8e2a2c..9391cc76c 100644 --- a/src/dns/rr.cpp +++ b/src/dns/rr.cpp @@ -117,6 +117,41 @@ namespace srouter::dns } while (!value.empty()); } + RR_SOA::RR_SOA( + std::string rr_name, + std::chrono::seconds ttl, + std::string_view mname, + std::string_view rname, + uint32_t serial, + std::chrono::seconds minimum, + std::chrono::seconds refresh, + std::chrono::seconds retry, + std::chrono::seconds expire) + : RR_bytes{std::move(rr_name), ttl} + { + // for mname and rname we don't use name compression (it's allowed, but usually not done) + // and so each one of these, with standard encoding, goes from "abc.def.xyz." to + // '\x03abc\x03def\x03xyz\x00': that is, each dot-terminated segment gains a \x03, but we + // don't include the .'s, and then append a null, meaning the total size will be currsize + + // 1. If the string aren't .-terminated, however, we imply the . and thus each becomes + // currsize + 2. + rData.resize( + (mname.size() + !mname.ends_with('.') + 1) + (rname.size() + !rname.ends_with('.') + 1) + + 5 * sizeof(uint32_t)); + std::span buf{rData.data(), rData.size()}; + encode_name(buf, mname, nullptr, nullptr); + encode_name(buf, rname, nullptr, nullptr); + assert(buf.size() == 5 * sizeof(uint32_t)); + oxenc::write_host_as_big(serial, buf.data()); + buf = buf.subspan(sizeof(uint32_t)); + for (auto* i : {&refresh, &retry, &expire, &minimum}) + { + oxenc::write_host_as_big(static_cast(i->count()), buf.data()); + buf = buf.subspan(sizeof(uint32_t)); + } + assert(buf.empty()); + } + void RR_target::encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const { encode_name(buf, name, &prev_names, &buf_offset); diff --git a/src/dns/rr.hpp b/src/dns/rr.hpp index 787a80dbe..e8976dc2a 100644 --- a/src/dns/rr.hpp +++ b/src/dns/rr.hpp @@ -19,6 +19,7 @@ namespace srouter::dns { A = 1, CNAME = 5, + SOA = 6, PTR = 12, TXT = 16, AAAA = 28, @@ -119,6 +120,36 @@ namespace srouter::dns RR_TXT(std::string rr_name, std::chrono::seconds ttl, std::string_view value); RRType rr_type() const override { return RRType::TXT; } }; + struct RR_SOA : RR_bytes + { + // SOA records consist of: + // - MNAME -- "master name server", a DNS name (e.g. localhost.sesh). We don't require a + // trailing . + // - RNAME -- "responsible party", an e-mail address (but with @ replaced with a .) back in + // 1987's naïve version of the internet, and basically always ignored even back then. We + // don't require a trailing . + // - SERIAL -- should change whenever records change. In SR we just increment it on every + // request. + // - MINIMUM -- the TTL for NXDOMAIN and NODATA responses, i.e. the negative caching TTL for + // caching DNS servers that get this record. + // - REFRESH/RETRY/EXPIRY -- how often (in seconds) secondary DNS should wait to + // refresh/retry after error/expire data received from the primary. In SR context these + // values are fairly meaningless and are unlikely to be used by anything. + // + // (MINIMUM comes after REFRESH/RETRY/EXPIRY in the actual record, but we want to default + // them and so rearrange constructor arguments). + RR_SOA( + std::string rr_name, + std::chrono::seconds ttl, + std::string_view mname, + std::string_view rname, + uint32_t serial, + std::chrono::seconds minimum, + std::chrono::seconds refresh = 1h, + std::chrono::seconds retry = 15min, + std::chrono::seconds expire = 14 * 24h); + RRType rr_type() const override { return RRType::SOA; } + }; // Base class for RR types that have a single target name as the value, such as CNAME and PTR struct RR_target : ResourceRecord