diff --git a/scripts/sync_vendor.py b/scripts/sync_vendor.py index 2fb809a9f07..7d504ccc52b 100755 --- a/scripts/sync_vendor.py +++ b/scripts/sync_vendor.py @@ -5,7 +5,7 @@ import sys import subprocess -HTTPLIB_VERSION = "refs/tags/v0.34.0" +HTTPLIB_VERSION = "refs/tags/v0.35.0" vendor = { "https://github.com/nlohmann/json/releases/latest/download/json.hpp": "vendor/nlohmann/json.hpp", diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d98a090c327..7e0b17a7c1f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -257,6 +257,21 @@ set(LLAMA_TEST_NAME test-mtmd-c-api) llama_build_and_test(test-mtmd-c-api.c) target_link_libraries(${LLAMA_TEST_NAME} PRIVATE mtmd) +# GGUF model data fetcher library for tests that need real model metadata +# Only compile when cpp-httplib has SSL support (CPPHTTPLIB_OPENSSL_SUPPORT) +if (TARGET cpp-httplib) + get_target_property(_cpp_httplib_defs cpp-httplib INTERFACE_COMPILE_DEFINITIONS) + if (_cpp_httplib_defs MATCHES "CPPHTTPLIB_OPENSSL_SUPPORT") + add_library(gguf-model-data STATIC gguf-model-data.cpp) + target_link_libraries(gguf-model-data PRIVATE common cpp-httplib) + target_include_directories(gguf-model-data PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + + add_executable(test-gguf-model-data test-gguf-model-data.cpp) + target_link_libraries(test-gguf-model-data PRIVATE gguf-model-data common) + llama_test(test-gguf-model-data LABEL "model") + endif() +endif() + # dummy executable - not installed get_filename_component(TEST_TARGET test-c.c NAME_WE) add_executable(${TEST_TARGET} test-c.c) diff --git a/tests/gguf-model-data.cpp b/tests/gguf-model-data.cpp new file mode 100644 index 00000000000..3bc82c88dac --- /dev/null +++ b/tests/gguf-model-data.cpp @@ -0,0 +1,613 @@ +// GGUF binary parser adapted from the huggingface/gguf package. +// Reference: https://github.com/huggingface/huggingface.js + +#include "gguf-model-data.h" + +#include "common.h" +#include "gguf.h" + +#include +#include +#include +#include +#include + +#include "http.h" +#define JSON_ASSERT GGML_ASSERT +#include + +// Equivalent of RangeView +struct gguf_buf_reader { + const char * data; + size_t size; + size_t pos; + + gguf_buf_reader(const std::vector & buf) : data(buf.data()), size(buf.size()), pos(0) {} + + bool has_n_bytes(size_t n) const { + return pos + n <= size; + } + + template + bool read_val(T & out) { + if (!has_n_bytes(sizeof(T))) { + return false; + } + memcpy(&out, data + pos, sizeof(T)); + pos += sizeof(T); + return true; + } + + bool read_str(std::string & out) { + uint64_t len; + if (!read_val(len)) { + return false; + } + if (!has_n_bytes((size_t)len)) { + return false; + } + out.assign(data + pos, (size_t)len); + pos += (size_t)len; + return true; + } + + bool skip(size_t n) { + if (!has_n_bytes(n)) { + return false; + } + pos += n; + return true; + } +}; + +static size_t gguf_val_type_size(int32_t vtype) { + switch (vtype) { + case GGUF_TYPE_UINT8: return 1; + case GGUF_TYPE_INT8: return 1; + case GGUF_TYPE_UINT16: return 2; + case GGUF_TYPE_INT16: return 2; + case GGUF_TYPE_UINT32: return 4; + case GGUF_TYPE_INT32: return 4; + case GGUF_TYPE_FLOAT32: return 4; + case GGUF_TYPE_BOOL: return 1; + case GGUF_TYPE_UINT64: return 8; + case GGUF_TYPE_INT64: return 8; + case GGUF_TYPE_FLOAT64: return 8; + default: return 0; // string/array handled separately + } +} + +// Equivalent of readMetadataValue(), skips unused values rather than storing +static bool gguf_skip_value(gguf_buf_reader & r, int32_t vtype) { + if (vtype == GGUF_TYPE_STRING) { + std::string tmp; + return r.read_str(tmp); + } + if (vtype == GGUF_TYPE_ARRAY) { + int32_t elem_type; + uint64_t count; + if (!r.read_val(elem_type)) { + return false; + } + if (!r.read_val(count)) { + return false; + } + if (elem_type == GGUF_TYPE_STRING) { + for (uint64_t i = 0; i < count; i++) { + std::string tmp; + if (!r.read_str(tmp)) { + return false; + } + } + return true; + } + if (elem_type == GGUF_TYPE_ARRAY) { + // nested arrays - recurse + for (uint64_t i = 0; i < count; i++) { + if (!gguf_skip_value(r, GGUF_TYPE_ARRAY)) { + return false; + } + } + return true; + } + size_t elem_sz = gguf_val_type_size(elem_type); + if (elem_sz == 0) { + return false; + } + return r.skip((size_t)count * elem_sz); + } + size_t sz = gguf_val_type_size(vtype); + if (sz == 0) { + return false; + } + return r.skip(sz); +} + +static bool gguf_read_uint32_val(gguf_buf_reader & r, int32_t vtype, uint32_t & out) { + if (vtype == GGUF_TYPE_UINT8) { + uint8_t v; + if (!r.read_val(v)) { + return false; + } + out = v; + return true; + } + if (vtype == GGUF_TYPE_INT8) { + int8_t v; + if (!r.read_val(v)) { + return false; + } + out = (uint32_t)v; + return true; + } + if (vtype == GGUF_TYPE_UINT16) { + uint16_t v; + if (!r.read_val(v)) { + return false; + } + out = v; + return true; + } + if (vtype == GGUF_TYPE_INT16) { + int16_t v; + if (!r.read_val(v)) { + return false; + } + out = (uint32_t)v; + return true; + } + if (vtype == GGUF_TYPE_UINT32) { + uint32_t v; + if (!r.read_val(v)) { + return false; + } + out = v; + return true; + } + if (vtype == GGUF_TYPE_INT32) { + int32_t v; + if (!r.read_val(v)) { + return false; + } + out = (uint32_t)v; + return true; + } + if (vtype == GGUF_TYPE_UINT64) { + uint64_t v; + if (!r.read_val(v)) { + return false; + } + out = (uint32_t)v; + return true; + } + if (vtype == GGUF_TYPE_INT64) { + int64_t v; + if (!r.read_val(v)) { + return false; + } + out = (uint32_t)v; + return true; + } + return false; +} + +// Follows the same header -> KV -> tensor parsing sequence as gguf() huggingface/gguf +static std::optional gguf_parse_meta(const std::vector & buf) { + gguf_buf_reader r(buf); + + // Header: magic(4) + version(4) + tensor_count(8) + kv_count(8) = 24 bytes minimum + uint32_t magic_raw; + if (!r.read_val(magic_raw)) { + return std::nullopt; + } + if (memcmp(&magic_raw, "GGUF", 4) != 0) { + fprintf(stderr, "gguf_parse_meta: invalid magic\n"); + return std::nullopt; + } + + uint32_t version; + if (!r.read_val(version)) { + return std::nullopt; + } + if (version < 2 || version > 3) { + fprintf(stderr, "gguf_parse_meta: unsupported version %u\n", version); + return std::nullopt; + } + + int64_t tensor_count_raw; + int64_t kv_count_raw; + if (!r.read_val(tensor_count_raw)) { + return std::nullopt; + } + if (!r.read_val(kv_count_raw)) { + return std::nullopt; + } + + uint64_t tensor_count = (uint64_t)tensor_count_raw; + uint64_t kv_count = (uint64_t)kv_count_raw; + + gguf_remote_model model; + + std::string arch_prefix; + + // Parse KV pairs + for (uint64_t i = 0; i < kv_count; i++) { + std::string key; + if (!r.read_str(key)) { + return std::nullopt; + } + + int32_t vtype; + if (!r.read_val(vtype)) { + return std::nullopt; + } + + if (key == "general.architecture" && vtype == GGUF_TYPE_STRING) { + if (!r.read_str(model.architecture)) { + return std::nullopt; + } + arch_prefix = model.architecture + "."; + continue; + } + + // Extract split.count for proper handling of split files + if (key == "split.count") { + uint32_t v; + if (!gguf_read_uint32_val(r, vtype, v)) { + return std::nullopt; + } + model.n_split = (uint16_t)v; + continue; + } + + // Extract split.tensors.count so we can verify we have all tensors + if (key == "split.tensors.count") { + uint32_t v; + if (!gguf_read_uint32_val(r, vtype, v)) { + return std::nullopt; + } + model.n_split_tensors = v; + continue; + } + + if (!arch_prefix.empty()) { + uint32_t * target = nullptr; + + if (key == arch_prefix + "embedding_length") { target = &model.n_embd; } + else if (key == arch_prefix + "feed_forward_length") { target = &model.n_ff; } + else if (key == arch_prefix + "block_count") { target = &model.n_layer; } + else if (key == arch_prefix + "attention.head_count") { target = &model.n_head; } + else if (key == arch_prefix + "attention.head_count_kv") { target = &model.n_head_kv; } + else if (key == arch_prefix + "expert_count") { target = &model.n_expert; } + else if (key == arch_prefix + "attention.key_length") { target = &model.n_embd_head_k; } + else if (key == arch_prefix + "attention.value_length") { target = &model.n_embd_head_v; } + + if (target) { + if (!gguf_read_uint32_val(r, vtype, *target)) { + return std::nullopt; + } + continue; + } + } + + if (!gguf_skip_value(r, vtype)) { + return std::nullopt; + } + } + + // Parse tensor info entries + model.tensors.reserve((size_t)tensor_count); + for (uint64_t i = 0; i < tensor_count; i++) { + gguf_remote_tensor t; + + if (!r.read_str(t.name)) { + return std::nullopt; + } + if (!r.read_val(t.n_dims)) { + return std::nullopt; + } + + if (t.n_dims > 4) { + fprintf(stderr, "gguf_parse_meta: tensor '%s' has %u dims (max 4)\n", t.name.c_str(), t.n_dims); + return std::nullopt; + } + + for (uint32_t d = 0; d < t.n_dims; d++) { + if (!r.read_val(t.ne[d])) { + return std::nullopt; + } + } + + int32_t type_raw; + if (!r.read_val(type_raw)) { + return std::nullopt; + } + t.type = (ggml_type)type_raw; + + uint64_t offset; + if (!r.read_val(offset)) { + return std::nullopt; + } + + // Infer n_vocab from token_embd.weight + if (t.name == "token_embd.weight") { + model.n_vocab = (uint32_t)t.ne[1]; + } + + model.tensors.push_back(std::move(t)); + } + + return model; +} + +// cache handling for local download +static std::string get_default_cache_dir() { + return fs_get_cache_directory() + "gguf-headers/"; +} + +static std::string sanitize_for_path(const std::string & s) { + std::string out = s; + for (char & c : out) { + if (c == '/' || c == '\\' || c == ':') { + c = '_'; + } + } + return out; +} + +static bool read_file(const std::string & path, std::vector & out) { + std::ifstream f(path, std::ios::binary | std::ios::ate); + if (!f.good()) { + return false; + } + auto sz = f.tellg(); + if (sz <= 0) { + return false; + } + out.resize((size_t)sz); + f.seekg(0); + f.read(out.data(), sz); + return f.good(); +} + +static bool write_file(const std::string & path, const std::vector & data) { + std::ofstream f(path, std::ios::binary | std::ios::trunc); + if (!f.good()) { + return false; + } + f.write(data.data(), (std::streamsize)data.size()); + return f.good(); +} + +// HuggingFace file auto-detection and HTTP download +static std::pair> gguf_http_get( + const std::string & url, + const httplib::Headers & headers = {}, + int timeout_sec = 60) { + try { + auto [cli, parts] = common_http_client(url); + + if (timeout_sec > 0) { + cli.set_read_timeout(timeout_sec, 0); + cli.set_write_timeout(timeout_sec, 0); + } + cli.set_connection_timeout(30, 0); + + std::vector body; + auto res = cli.Get(parts.path, headers, + [&](const char * data, size_t len) { + body.insert(body.end(), data, data + len); + return true; + }, nullptr); + + if (!res) { + fprintf(stderr, "gguf_fetch: HTTP request failed for %s (error %d)\n", + url.c_str(), (int)res.error()); + return {-1, {}}; + } + return {res->status, std::move(body)}; + } catch (const std::exception & e) { + fprintf(stderr, "gguf_fetch: HTTP error: %s\n", e.what()); + return {-1, {}}; + } +} + +// Find the filename for given repo/quant. +// For split models, returns the first shard (the one containing "00001-of-") +// split_prefix is set to the portion before "-00001-of-XXXXX.gguf" when a split file is found +static std::string detect_gguf_filename(const std::string & repo, const std::string & quant, + std::string & split_prefix) { + split_prefix.clear(); + std::string api_url = "https://huggingface.co/api/models/" + repo; + + auto [code, body] = gguf_http_get(api_url, {}, 30); + if (code != 200 || body.empty()) { + fprintf(stderr, "gguf_fetch: failed to query HF API for %s (HTTP %ld)\n", repo.c_str(), code); + return ""; + } + + nlohmann::json j; + try { + j = nlohmann::json::parse(body.begin(), body.end()); + } catch (...) { + fprintf(stderr, "gguf_fetch: failed to parse HF API response\n"); + return ""; + } + + if (!j.contains("siblings") || !j["siblings"].is_array()) { + fprintf(stderr, "gguf_fetch: unexpected HF API response format\n"); + return ""; + } + + std::vector matches; + std::string quant_upper = quant; + for (char & c : quant_upper) { c = (char)toupper(c); } + + for (const auto & sibling : j["siblings"]) { + if (!sibling.contains("rfilename")) { continue; } + std::string fname = sibling["rfilename"].get(); + if (fname.size() < 5 || fname.substr(fname.size() - 5) != ".gguf") { + continue; + } + + std::string fname_upper = fname; + for (char & c : fname_upper) { c = (char)toupper(c); } + if (fname_upper.find(quant_upper) != std::string::npos) { + matches.push_back(fname); + } + } + + if (matches.empty()) { + fprintf(stderr, "gguf_fetch: no .gguf files matching '%s' in %s\n", quant.c_str(), repo.c_str()); + return ""; + } + + std::sort(matches.begin(), matches.end()); + + // Prefer non-split, non-supplementary file + for (const auto & m : matches) { + if (m.find("-of-") == std::string::npos && m.find("mmproj") == std::string::npos) { + return m; + } + } + + // Return the first shard (00001-of-) and extract the prefix + for (const auto & m : matches) { + auto pos = m.find("-00001-of-"); + if (pos != std::string::npos) { + split_prefix = m.substr(0, pos); + return m; + } + } + + return matches[0]; +} + +static std::optional fetch_and_parse( + const std::string & repo, + const std::string & filename, + const std::string & cache_path) { + std::string url = "https://huggingface.co/" + repo + "/resolve/main/" + filename; + + // Progressive download inspired by RangeView.fetchChunk() + // Start at 2MB, double each time, cap at 64MB + size_t chunk_size = 2 * 1024 * 1024; + const size_t max_chunk = 64 * 1024 * 1024; + + while (chunk_size <= max_chunk) { + fprintf(stderr, "gguf_fetch: downloading %zu bytes from %s\n", chunk_size, filename.c_str()); + + char range_buf[64]; + snprintf(range_buf, sizeof(range_buf), "bytes=0-%zu", chunk_size - 1); + httplib::Headers headers = {{"Range", range_buf}}; + + auto [code, body] = gguf_http_get(url, headers, 120); + if (code != 200 && code != 206) { + fprintf(stderr, "gguf_fetch: HTTP %ld fetching %s\n", code, url.c_str()); + return std::nullopt; + } + + if (body.empty()) { + fprintf(stderr, "gguf_fetch: empty response\n"); + return std::nullopt; + } + + auto result = gguf_parse_meta(body); + if (result.has_value()) { + write_file(cache_path, body); + return result; + } + + if (code == 200) { + fprintf(stderr, "gguf_fetch: server returned full response but metadata parse failed\n"); + return std::nullopt; + } + + // Parse failed, try larger chunk + chunk_size *= 2; + } + + fprintf(stderr, "gguf_fetch: metadata exceeds 64MB, giving up\n"); + return std::nullopt; +} + +// Try cache first, then fetch and parse a single GGUF shard. +static std::optional fetch_or_cached( + const std::string & repo, + const std::string & filename, + const std::string & cdir, + const std::string & repo_part) { + std::string fname_part = sanitize_for_path(filename); + std::string cache_path = cdir + "/" + repo_part + "--" + fname_part + ".partial"; + + { + std::vector cached; + if (std::filesystem::exists(cache_path) && read_file(cache_path, cached)) { + auto result = gguf_parse_meta(cached); + if (result.has_value()) { + fprintf(stderr, "gguf_fetch: loaded from cache: %s\n", cache_path.c_str()); + return result; + } + } + } + + fs_create_directory_with_parents(cdir); + return fetch_and_parse(repo, filename, cache_path); +} + +std::optional gguf_fetch_model_meta( + const std::string & repo, + const std::string & quant, + const std::string & cache_dir) { + std::string cdir = cache_dir.empty() ? get_default_cache_dir() : cache_dir; + std::string repo_part = sanitize_for_path(repo); + + std::string split_prefix; + std::string filename = detect_gguf_filename(repo, quant, split_prefix); + if (filename.empty()) { + return std::nullopt; + } + + auto model_opt = fetch_or_cached(repo, filename, cdir, repo_part); + if (!model_opt.has_value()) { + fprintf(stderr, "gguf_fetch: failed to fetch %s\n", filename.c_str()); + return std::nullopt; + } + + auto & model = model_opt.value(); + + // If the model is split across multiple files we need to fetch the remaining shards metadata + if (model.n_split > 1) { + if (split_prefix.empty()) { + fprintf(stderr, "gguf_fetch: model reports %u splits but filename has no split pattern\n", model.n_split); + return std::nullopt; + } + + fprintf(stderr, "gguf_fetch: split model with %u shards, fetching remaining %u...\n", + model.n_split, model.n_split - 1); + + for (int i = 2; i <= model.n_split; i++) { + char num_buf[6], total_buf[6]; + snprintf(num_buf, sizeof(num_buf), "%05d", i); + snprintf(total_buf, sizeof(total_buf), "%05d", (int)model.n_split); + std::string shard_name = split_prefix + "-" + num_buf + "-of-" + total_buf + ".gguf"; + + auto shard = fetch_or_cached(repo, shard_name, cdir, repo_part); + if (!shard.has_value()) { + fprintf(stderr, "gguf_fetch: failed to fetch shard %d: %s\n", i, shard_name.c_str()); + return std::nullopt; + } + + model.tensors.insert(model.tensors.end(), + std::make_move_iterator(shard->tensors.begin()), + std::make_move_iterator(shard->tensors.end())); + } + + if (model.n_split_tensors > 0 && model.tensors.size() != model.n_split_tensors) { + fprintf(stderr, "gguf_fetch: WARNING: expected %u tensors from split.tensors.count, got %zu\n", + model.n_split_tensors, model.tensors.size()); + } + } + + return model_opt; +} diff --git a/tests/gguf-model-data.h b/tests/gguf-model-data.h new file mode 100644 index 00000000000..ed433791ad7 --- /dev/null +++ b/tests/gguf-model-data.h @@ -0,0 +1,42 @@ +#pragma once + +#include "ggml.h" + +#include +#include +#include +#include + +struct gguf_remote_tensor { + std::string name; + ggml_type type = GGML_TYPE_F32; + int64_t ne[4] = {1, 1, 1, 1}; // dimensions, unused dims = 1 + uint32_t n_dims = 0; +}; + +struct gguf_remote_model { + // Selected KV metadata + std::string architecture; // general.architecture + uint32_t n_embd = 0; // .embedding_length + uint32_t n_ff = 0; // .feed_forward_length + uint32_t n_vocab = 0; // inferred from token_embd.weight ne[1] + uint32_t n_layer = 0; // .block_count + uint32_t n_head = 0; // .attention.head_count + uint32_t n_head_kv = 0; // .attention.head_count_kv + uint32_t n_expert = 0; // .expert_count (0 if absent) + uint32_t n_embd_head_k = 0; // .attention.key_length + uint32_t n_embd_head_v = 0; // .attention.value_length + uint16_t n_split = 0; // split.count (0 = not split) + uint32_t n_split_tensors = 0; // split.tensors.count (0 if not split) + + std::vector tensors; +}; + +// Fetch model metadata from HuggingFace with local caching. +// repo: e.g., "ggml-org/Qwen3-32B-GGUF" +// quant: e.g., "Q8_0" -- auto-detects filename (including first shard of split models) +// Returns nullopt if download fails or network is unavailable. +std::optional gguf_fetch_model_meta( + const std::string & repo, + const std::string & quant = "Q8_0", + const std::string & cache_dir = ""); // empty = default diff --git a/tests/test-gguf-model-data.cpp b/tests/test-gguf-model-data.cpp new file mode 100644 index 00000000000..cc0174961d3 --- /dev/null +++ b/tests/test-gguf-model-data.cpp @@ -0,0 +1,121 @@ +#include "gguf-model-data.h" + +#include + +#define TEST_ASSERT(cond, msg) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "FAIL: %s (line %d): %s\n", #cond, __LINE__, msg); \ + return 1; \ + } \ + } while (0) + +int main() { + fprintf(stderr, "=== test-gguf-model-data ===\n"); + + // Fetch Qwen3-0.6B Q8_0 metadata + auto result = gguf_fetch_model_meta("ggml-org/Qwen3-0.6B-GGUF", "Q8_0"); + + if (!result.has_value()) { + fprintf(stderr, "SKIP: could not fetch model metadata (no network or HTTP disabled)\n"); + return 0; + } + + const auto & model = result.value(); + + fprintf(stderr, "Architecture: %s\n", model.architecture.c_str()); + fprintf(stderr, "n_embd: %u\n", model.n_embd); + fprintf(stderr, "n_ff: %u\n", model.n_ff); + fprintf(stderr, "n_vocab: %u\n", model.n_vocab); + fprintf(stderr, "n_layer: %u\n", model.n_layer); + fprintf(stderr, "n_head: %u\n", model.n_head); + fprintf(stderr, "n_head_kv: %u\n", model.n_head_kv); + fprintf(stderr, "n_expert: %u\n", model.n_expert); + fprintf(stderr, "n_embd_head_k: %u\n", model.n_embd_head_k); + fprintf(stderr, "n_embd_head_v: %u\n", model.n_embd_head_v); + fprintf(stderr, "tensors: %zu\n", model.tensors.size()); + + // Verify architecture + TEST_ASSERT(model.architecture == "qwen3", "expected architecture 'qwen3'"); + + // Verify key dimensions (Qwen3-0.6B) + TEST_ASSERT(model.n_layer == 28, "expected n_layer == 28"); + TEST_ASSERT(model.n_embd == 1024, "expected n_embd == 1024"); + TEST_ASSERT(model.n_head == 16, "expected n_head == 16"); + TEST_ASSERT(model.n_head_kv == 8, "expected n_head_kv == 8"); + TEST_ASSERT(model.n_expert == 0, "expected n_expert == 0 (not MoE)"); + TEST_ASSERT(model.n_vocab == 151936, "expected n_vocab == 151936"); + + // Verify tensor count + TEST_ASSERT(model.tensors.size() == 311, "expected tensor count == 311"); + + // Verify known tensor names exist + bool found_attn_q = false; + bool found_token_embd = false; + bool found_output_norm = false; + for (const auto & t : model.tensors) { + if (t.name == "blk.0.attn_q.weight") { + found_attn_q = true; + } + if (t.name == "token_embd.weight") { + found_token_embd = true; + } + if (t.name == "output_norm.weight") { + found_output_norm = true; + } + } + TEST_ASSERT(found_attn_q, "expected tensor 'blk.0.attn_q.weight'"); + TEST_ASSERT(found_token_embd, "expected tensor 'token_embd.weight'"); + TEST_ASSERT(found_output_norm, "expected tensor 'output_norm.weight'"); + + // Verify token_embd.weight shape + for (const auto & t : model.tensors) { + if (t.name == "token_embd.weight") { + TEST_ASSERT(t.ne[0] == 1024, "expected token_embd.weight ne[0] == 1024"); + TEST_ASSERT(t.n_dims == 2, "expected token_embd.weight to be 2D"); + break; + } + } + + // Test that second call uses cache (just call again, it should work) + auto result2 = gguf_fetch_model_meta("ggml-org/Qwen3-0.6B-GGUF", "Q8_0"); + TEST_ASSERT(result2.has_value(), "cached fetch should succeed"); + TEST_ASSERT(result2->tensors.size() == model.tensors.size(), "cached result should match"); + + // Test a split MoE model without specifying quant (should default to Q8_0) + auto result3 = gguf_fetch_model_meta("ggml-org/GLM-4.6V-GGUF"); + if (!result3.has_value()) { + fprintf(stderr, "SKIP: could not fetch GLM-4.6V metadata (no network?)\n"); + return 0; + } + const auto & model3 = result3.value(); + + fprintf(stderr, "Architecture: %s\n", model3.architecture.c_str()); + fprintf(stderr, "n_embd: %u\n", model3.n_embd); + fprintf(stderr, "n_ff: %u\n", model3.n_ff); + fprintf(stderr, "n_vocab: %u\n", model3.n_vocab); + fprintf(stderr, "n_layer: %u\n", model3.n_layer); + fprintf(stderr, "n_head: %u\n", model3.n_head); + fprintf(stderr, "n_head_kv: %u\n", model3.n_head_kv); + fprintf(stderr, "n_expert: %u\n", model3.n_expert); + fprintf(stderr, "n_embd_head_k: %u\n", model3.n_embd_head_k); + fprintf(stderr, "n_embd_head_v: %u\n", model3.n_embd_head_v); + fprintf(stderr, "tensors: %zu\n", model3.tensors.size()); + + // Verify architecture + TEST_ASSERT(model3.architecture == "glm4moe", "expected architecture 'glm4moe'"); + + // Verify key dimensions (GLM-4.6V) + TEST_ASSERT(model3.n_layer == 46, "expected n_layer == 46"); + TEST_ASSERT(model3.n_embd == 4096, "expected n_embd == 4096"); + TEST_ASSERT(model3.n_head == 96, "expected n_head == 96"); + TEST_ASSERT(model3.n_head_kv == 8, "expected n_head_kv == 8"); + TEST_ASSERT(model3.n_expert == 128, "expected n_expert == 128 (MoE)"); + TEST_ASSERT(model3.n_vocab == 151552, "expected n_vocab == 151552"); + + // Verify tensor count + TEST_ASSERT(model3.tensors.size() == 780, "expected tensor count == 780"); + + fprintf(stderr, "=== ALL TESTS PASSED ===\n"); + return 0; +} diff --git a/vendor/cpp-httplib/CMakeLists.txt b/vendor/cpp-httplib/CMakeLists.txt index f2d3f980050..4960f9c861e 100644 --- a/vendor/cpp-httplib/CMakeLists.txt +++ b/vendor/cpp-httplib/CMakeLists.txt @@ -171,7 +171,6 @@ endif() if (CPPHTTPLIB_OPENSSL_SUPPORT) target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_OPENSSL_SUPPORT) # used in server.cpp if (APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin") - target_compile_definitions(${TARGET} PRIVATE CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN) find_library(CORE_FOUNDATION_FRAMEWORK CoreFoundation REQUIRED) find_library(SECURITY_FRAMEWORK Security REQUIRED) target_link_libraries(${TARGET} PUBLIC ${CORE_FOUNDATION_FRAMEWORK} ${SECURITY_FRAMEWORK}) diff --git a/vendor/cpp-httplib/httplib.cpp b/vendor/cpp-httplib/httplib.cpp index 15e118731f2..7f76978fd8d 100644 --- a/vendor/cpp-httplib/httplib.cpp +++ b/vendor/cpp-httplib/httplib.cpp @@ -2571,10 +2571,46 @@ find_content_type(const std::string &path, } } +std::string +extract_media_type(const std::string &content_type, + std::map *params = nullptr) { + // Extract type/subtype from Content-Type value (RFC 2045) + // e.g. "application/json; charset=utf-8" -> "application/json" + auto media_type = content_type; + auto semicolon_pos = media_type.find(';'); + if (semicolon_pos != std::string::npos) { + auto param_str = media_type.substr(semicolon_pos + 1); + media_type = media_type.substr(0, semicolon_pos); + + if (params) { + // Parse parameters: key=value pairs separated by ';' + split(param_str.data(), param_str.data() + param_str.size(), ';', + [&](const char *b, const char *e) { + std::string key; + std::string val; + split(b, e, '=', [&](const char *b2, const char *e2) { + if (key.empty()) { + key.assign(b2, e2); + } else { + val.assign(b2, e2); + } + }); + if (!key.empty()) { + params->emplace(trim_copy(key), trim_double_quotes_copy(val)); + } + }); + } + } + + // Trim whitespace from media type + return trim_copy(media_type); +} + bool can_compress_content_type(const std::string &content_type) { using udl::operator""_t; - auto tag = str2tag(content_type); + auto mime_type = extract_media_type(content_type); + auto tag = str2tag(mime_type); switch (tag) { case "image/svg+xml"_t: @@ -2586,7 +2622,7 @@ bool can_compress_content_type(const std::string &content_type) { case "text/event-stream"_t: return false; - default: return !content_type.rfind("text/", 0); + default: return !mime_type.rfind("text/", 0); } } @@ -3141,7 +3177,8 @@ bool is_chunked_transfer_encoding(const Headers &headers) { template bool prepare_content_receiver(T &x, int &status, ContentReceiverWithProgress receiver, - bool decompress, U callback) { + bool decompress, size_t payload_max_length, + bool &exceed_payload_max_length, U callback) { if (decompress) { std::string encoding = x.get_header_value("Content-Encoding"); std::unique_ptr decompressor; @@ -3157,12 +3194,22 @@ bool prepare_content_receiver(T &x, int &status, if (decompressor) { if (decompressor->is_valid()) { + size_t decompressed_size = 0; ContentReceiverWithProgress out = [&](const char *buf, size_t n, size_t off, size_t len) { - return decompressor->decompress(buf, n, - [&](const char *buf2, size_t n2) { - return receiver(buf2, n2, off, len); - }); + return decompressor->decompress( + buf, n, [&](const char *buf2, size_t n2) { + // Guard against zip-bomb: check + // decompressed size against limit. + if (payload_max_length > 0 && + (decompressed_size >= payload_max_length || + n2 > payload_max_length - decompressed_size)) { + exceed_payload_max_length = true; + return false; + } + decompressed_size += n2; + return receiver(buf2, n2, off, len); + }); }; return callback(std::move(out)); } else { @@ -3183,11 +3230,14 @@ template bool read_content(Stream &strm, T &x, size_t payload_max_length, int &status, DownloadProgress progress, ContentReceiverWithProgress receiver, bool decompress) { + bool exceed_payload_max_length = false; return prepare_content_receiver( - x, status, std::move(receiver), decompress, - [&](const ContentReceiverWithProgress &out) { + x, status, std::move(receiver), decompress, payload_max_length, + exceed_payload_max_length, [&](const ContentReceiverWithProgress &out) { auto ret = true; - auto exceed_payload_max_length = false; + // Note: exceed_payload_max_length may also be set by the decompressor + // wrapper in prepare_content_receiver when the decompressed payload + // size exceeds the limit. if (is_chunked_transfer_encoding(x.headers)) { auto result = read_content_chunked(strm, x, payload_max_length, out); @@ -3603,12 +3653,11 @@ std::string normalize_query_string(const std::string &query) { bool parse_multipart_boundary(const std::string &content_type, std::string &boundary) { - auto boundary_keyword = "boundary="; - auto pos = content_type.find(boundary_keyword); - if (pos == std::string::npos) { return false; } - auto end = content_type.find(';', pos); - auto beg = pos + strlen(boundary_keyword); - boundary = trim_double_quotes_copy(content_type.substr(beg, end - beg)); + std::map params; + extract_media_type(content_type, ¶ms); + auto it = params.find("boundary"); + if (it == params.end()) { return false; } + boundary = it->second; return !boundary.empty(); } @@ -3776,11 +3825,7 @@ bool parse_accept_header(const std::string &s, } // Remove additional parameters from media type - auto param_pos = accept_entry.media_type.find(';'); - if (param_pos != std::string::npos) { - accept_entry.media_type = - trim_copy(accept_entry.media_type.substr(0, param_pos)); - } + accept_entry.media_type = extract_media_type(accept_entry.media_type); // Basic validation of media type format if (accept_entry.media_type.empty()) { @@ -5610,7 +5655,7 @@ size_t Request::get_param_value_count(const std::string &key) const { bool Request::is_multipart_form_data() const { const auto &content_type = get_header_value("Content-Type"); - return !content_type.rfind("multipart/form-data", 0); + return detail::extract_media_type(content_type) == "multipart/form-data"; } // Multipart FormData implementation @@ -7092,7 +7137,8 @@ bool Server::read_content(Stream &strm, Request &req, Response &res) { return true; })) { const auto &content_type = req.get_header_value("Content-Type"); - if (!content_type.find("application/x-www-form-urlencoded")) { + if (detail::extract_media_type(content_type) == + "application/x-www-form-urlencoded") { if (req.body.size() > CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH) { res.status = StatusCode::PayloadTooLarge_413; // NOTE: should be 414? output_error_log(Error::ExceedMaxPayloadSize, &req); @@ -7479,45 +7525,63 @@ bool Server::routing(Request &req, Response &res, Stream &strm) { if (detail::expect_content(req)) { // Content reader handler { + // Track whether the ContentReader was aborted due to the decompressed + // payload exceeding `payload_max_length_`. + // The user handler runs after the lambda returns, so we must restore the + // 413 status if the handler overwrites it. + bool content_reader_payload_too_large = false; + ContentReader reader( [&](ContentReceiver receiver) { auto result = read_content_with_content_receiver( strm, req, res, std::move(receiver), nullptr, nullptr); - if (!result) { output_error_log(Error::Read, &req); } + if (!result) { + output_error_log(Error::Read, &req); + if (res.status == StatusCode::PayloadTooLarge_413) { + content_reader_payload_too_large = true; + } + } return result; }, [&](FormDataHeader header, ContentReceiver receiver) { auto result = read_content_with_content_receiver( strm, req, res, nullptr, std::move(header), std::move(receiver)); - if (!result) { output_error_log(Error::Read, &req); } + if (!result) { + output_error_log(Error::Read, &req); + if (res.status == StatusCode::PayloadTooLarge_413) { + content_reader_payload_too_large = true; + } + } return result; }); + bool dispatched = false; if (req.method == "POST") { - if (dispatch_request_for_content_reader( - req, res, std::move(reader), - post_handlers_for_content_reader_)) { - return true; - } + dispatched = dispatch_request_for_content_reader( + req, res, std::move(reader), post_handlers_for_content_reader_); } else if (req.method == "PUT") { - if (dispatch_request_for_content_reader( - req, res, std::move(reader), - put_handlers_for_content_reader_)) { - return true; - } + dispatched = dispatch_request_for_content_reader( + req, res, std::move(reader), put_handlers_for_content_reader_); } else if (req.method == "PATCH") { - if (dispatch_request_for_content_reader( - req, res, std::move(reader), - patch_handlers_for_content_reader_)) { - return true; - } + dispatched = dispatch_request_for_content_reader( + req, res, std::move(reader), patch_handlers_for_content_reader_); } else if (req.method == "DELETE") { - if (dispatch_request_for_content_reader( - req, res, std::move(reader), - delete_handlers_for_content_reader_)) { - return true; + dispatched = dispatch_request_for_content_reader( + req, res, std::move(reader), delete_handlers_for_content_reader_); + } + + if (dispatched) { + if (content_reader_payload_too_large) { + // Enforce the limit: override any status the handler may have set + // and return false so the error path sends a plain 413 response. + res.status = StatusCode::PayloadTooLarge_413; + res.body.clear(); + res.content_length_ = 0; + res.content_provider_ = nullptr; + return false; } + return true; } } @@ -7930,16 +7994,6 @@ Server::process_request(Stream &strm, const std::string &remote_addr, routed = true; } else { res.status = StatusCode::InternalServerError_500; - std::string val; - auto s = e.what(); - for (size_t i = 0; s[i]; i++) { - switch (s[i]) { - case '\r': val += "\\r"; break; - case '\n': val += "\\n"; break; - default: val += s[i]; break; - } - } - res.set_header("EXCEPTION_WHAT", val); } } catch (...) { if (exception_handler_) { @@ -7948,7 +8002,6 @@ Server::process_request(Stream &strm, const std::string &remote_addr, routed = true; } else { res.status = StatusCode::InternalServerError_500; - res.set_header("EXCEPTION_WHAT", "UNKNOWN"); } } #endif @@ -11629,8 +11682,7 @@ void SSLClient::set_session_verifier( session_verifier_ = std::move(verifier); } -#if defined(_WIN32) && \ - !defined(CPPHTTPLIB_DISABLE_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE) +#ifdef CPPHTTPLIB_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE void SSLClient::enable_windows_certificate_verification(bool enabled) { enable_windows_cert_verification_ = enabled; } @@ -11788,8 +11840,7 @@ bool SSLClient::initialize_ssl(Socket &socket, Error &error) { } } -#if defined(_WIN32) && \ - !defined(CPPHTTPLIB_DISABLE_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE) +#ifdef CPPHTTPLIB_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE // Additional Windows Schannel verification. // This provides real-time certificate validation with Windows Update // integration, working with both OpenSSL and MbedTLS backends. @@ -11835,8 +11886,7 @@ void Client::enable_server_hostname_verification(bool enabled) { cli_->enable_server_hostname_verification(enabled); } -#if defined(_WIN32) && \ - !defined(CPPHTTPLIB_DISABLE_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE) +#ifdef CPPHTTPLIB_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE void Client::enable_windows_certificate_verification(bool enabled) { if (is_ssl_) { static_cast(*cli_).enable_windows_certificate_verification( @@ -11959,7 +12009,7 @@ bool enumerate_windows_system_certs(Callback cb) { } #endif -#if defined(__APPLE__) && defined(CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN) +#ifdef CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN // Enumerate macOS Keychain certificates and call callback with DER data template bool enumerate_macos_keychain_certs(Callback cb) { diff --git a/vendor/cpp-httplib/httplib.h b/vendor/cpp-httplib/httplib.h index a39876891e6..aea6fd308bc 100644 --- a/vendor/cpp-httplib/httplib.h +++ b/vendor/cpp-httplib/httplib.h @@ -8,8 +8,8 @@ #ifndef CPPHTTPLIB_HTTPLIB_H #define CPPHTTPLIB_HTTPLIB_H -#define CPPHTTPLIB_VERSION "0.34.0" -#define CPPHTTPLIB_VERSION_NUM "0x002200" +#define CPPHTTPLIB_VERSION "0.35.0" +#define CPPHTTPLIB_VERSION_NUM "0x002300" /* * Platform compatibility check @@ -357,14 +357,32 @@ using socket_t = int; #include #endif +// On macOS with a TLS backend, enable Keychain root certificates by default +// unless the user explicitly opts out. +#if defined(__APPLE__) && \ + !defined(CPPHTTPLIB_DISABLE_MACOSX_AUTOMATIC_ROOT_CERTIFICATES) && \ + (defined(CPPHTTPLIB_OPENSSL_SUPPORT) || \ + defined(CPPHTTPLIB_MBEDTLS_SUPPORT) || \ + defined(CPPHTTPLIB_WOLFSSL_SUPPORT)) +#ifndef CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN +#define CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN +#endif +#endif + +// On Windows, enable Schannel certificate verification by default +// unless the user explicitly opts out. +#if defined(_WIN32) && \ + !defined(CPPHTTPLIB_DISABLE_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE) +#define CPPHTTPLIB_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE +#endif + #if defined(CPPHTTPLIB_USE_NON_BLOCKING_GETADDRINFO) || \ defined(CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN) #if TARGET_OS_MAC #include #include #endif -#endif // CPPHTTPLIB_USE_NON_BLOCKING_GETADDRINFO or - // CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN +#endif #ifdef CPPHTTPLIB_OPENSSL_SUPPORT #ifdef _WIN32 @@ -382,11 +400,11 @@ using socket_t = int; #endif #endif // _WIN32 -#if defined(CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN) +#ifdef CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN #if TARGET_OS_MAC #include #endif -#endif // CPPHTTPLIB_USE_NON_BLOCKING_GETADDRINFO +#endif #include #include @@ -430,11 +448,11 @@ using socket_t = int; #pragma comment(lib, "crypt32.lib") #endif #endif // _WIN32 -#if defined(CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN) +#ifdef CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN #if TARGET_OS_MAC #include #endif -#endif // CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN +#endif // Mbed TLS 3.x API compatibility #if MBEDTLS_VERSION_MAJOR >= 3 @@ -473,11 +491,11 @@ using socket_t = int; #pragma comment(lib, "crypt32.lib") #endif #endif // _WIN32 -#if defined(CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN) +#ifdef CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN #if TARGET_OS_MAC #include #endif -#endif // CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN +#endif #endif // CPPHTTPLIB_WOLFSSL_SUPPORT // Define CPPHTTPLIB_SSL_ENABLED if any SSL backend is available @@ -2557,8 +2575,7 @@ class Client { tls::ctx_t tls_context() const; -#if defined(_WIN32) && \ - !defined(CPPHTTPLIB_DISABLE_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE) +#ifdef CPPHTTPLIB_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE void enable_windows_certificate_verification(bool enabled); #endif @@ -2679,8 +2696,7 @@ class SSLClient final : public ClientImpl { tls::ctx_t tls_context() const { return ctx_; } -#if defined(_WIN32) && \ - !defined(CPPHTTPLIB_DISABLE_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE) +#ifdef CPPHTTPLIB_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE void enable_windows_certificate_verification(bool enabled); #endif @@ -2712,8 +2728,7 @@ class SSLClient final : public ClientImpl { std::function session_verifier_; -#if defined(_WIN32) && \ - !defined(CPPHTTPLIB_DISABLE_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE) +#ifdef CPPHTTPLIB_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE bool enable_windows_cert_verification_ = true; #endif