Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -1292,7 +1292,9 @@ extern "C" {
// available samplers:

LLAMA_API struct llama_sampler * llama_sampler_init_greedy(void);
LLAMA_API struct llama_sampler * llama_sampler_init_dist (uint32_t seed);

/// seed == LLAMA_DEFAULT_SEED to use a random seed.
LLAMA_API struct llama_sampler * llama_sampler_init_dist(uint32_t seed);

/// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
/// Setting k <= 0 makes this a noop
Expand Down
2 changes: 1 addition & 1 deletion src/llama-sampling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2142,7 +2142,7 @@ struct llama_sampler_xtc {
const uint32_t seed;
uint32_t seed_cur;

std::mt19937 rng;
std::mt19937 rng;
};

static const char * llama_sampler_xtc_name(const struct llama_sampler * /*smpl*/) {
Expand Down
13 changes: 8 additions & 5 deletions tools/server/server-context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#include "server-task.h"
#include "server-queue.h"

#include "arg.h"
#include "common.h"
#include "llama.h"
#include "log.h"
Expand All @@ -16,7 +15,6 @@
#include <cstddef>
#include <cinttypes>
#include <memory>
#include <unordered_set>
#include <filesystem>

// fix problem with std::min and std::max
Expand Down Expand Up @@ -2927,9 +2925,14 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
if (task.params.n_cmpl > 1) {
task.n_children = task.params.n_cmpl - 1;
for (size_t j = 0; j < task.n_children; j++) {
server_task child = task.create_child(
task.id,
rd.get_new_id());
server_task child = task.create_child(task.id, rd.get_new_id());

// use different sampling seed for each child
// note: https://github.com/ggml-org/llama.cpp/pull/18700#discussion_r2675115723
if (child.params.sampling.seed != LLAMA_DEFAULT_SEED) {
child.params.sampling.seed += j + 1;
}

tasks.push_back(std::move(child));
}
}
Expand Down
1 change: 0 additions & 1 deletion tools/server/tests/unit/test_chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,5 +503,4 @@ def test_chat_completions_multiple_choices():
assert len(res.body["choices"]) == 2
for choice in res.body["choices"]:
assert "assistant" == choice["message"]["role"]
assert match_regex("Suddenly", choice["message"]["content"])
assert choice["finish_reason"] == "length"
Loading