From 892540829a92ab932ed29e26ffbc58098fea92d4 Mon Sep 17 00:00:00 2001 From: bitflicker64 Date: Thu, 5 Mar 2026 13:42:24 +0530 Subject: [PATCH] fix(pd): add timeout and null-safety to getLeaderGrpcAddress() The bolt RPC call in getLeaderGrpcAddress() returns null in Docker bridge network mode, causing NPE when a follower PD node attempts to discover the leader's gRPC address. This breaks store registration and partition distribution when any node other than pd0 wins the raft leader election. Add a bounded timeout using the configured rpc-timeout, null-check the RPC response, and fall back to deriving the address from the raft endpoint IP when the RPC fails. Closes apache/hugegraph#2959 --- .../apache/hugegraph/pd/raft/RaftEngine.java | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java index e70ac92340..494ca674d7 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java @@ -26,6 +26,8 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; @@ -239,8 +241,20 @@ public String getLeaderGrpcAddress() throws ExecutionException, InterruptedExcep waitingForLeader(10000); } - return raftRpcClient.getGrpcAddress(raftNode.getLeaderId().getEndpoint().toString()).get() - .getGrpcAddress(); + try { + RaftRpcProcessor.GetMemberResponse response = raftRpcClient + .getGrpcAddress(raftNode.getLeaderId().getEndpoint().toString()) + .get(config.getRpcTimeout(), TimeUnit.MILLISECONDS); + if (response != null && response.getGrpcAddress() != null) { + return response.getGrpcAddress(); + } + } catch (TimeoutException | ExecutionException e) { + log.warn("Failed to get leader gRPC address via RPC, falling back to endpoint derivation", e); + } + + // Fallback: derive from raft endpoint IP + local gRPC port (best effort) + String leaderIp = raftNode.getLeaderId().getEndpoint().getIp(); + return leaderIp + ":" + config.getGrpcPort(); } /**