目录
摘要:
组建三节点的raft集群, 及研究主从切换的过程
raft业务代码:
main.go:
package main
import (
"context"
"flag"
"fmt"
"log"
"net"
"os"
"path/filepath"
pb "github.com/Jille/raft-grpc-example/proto"
"github.com/Jille/raft-grpc-leader-rpc/leaderhealth"
transport "github.com/Jille/raft-grpc-transport"
"github.com/Jille/raftadmin"
"github.com/hashicorp/raft"
boltdb "github.com/hashicorp/raft-boltdb"
"google.golang.org/grpc"
"google.golang.org/grpc/reflection"
)
var (
myAddr = flag.String("address", "localhost:50051", "TCP host+port for this node")
raftId = flag.String("raft_id", "", "Node id used by Raft")
raftDir = flag.String("raft_data_dir", "data/", "Raft data dir")
raftBootstrap = flag.Bool("raft_bootstrap", false, "Whether to bootstrap the Raft cluster")
logFile = flag.String("log_file", "./log", "log file")
)
func main() {
flag.Parse()
{
f, err := os.OpenFile(*logFile, os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_APPEND,0755)
if nil != err {
fmt.Println("ERROR: os.OpenFile fail, ", logFile)
return
}
os.Stdout = f
os.Stderr = f
fmt.Println("user log file = ", *logFile)
log.SetOutput(f)
}
if *raftId == "" {
log.Fatalf("flag --raft_id is required")
}
ctx := context.Background()
_, port, err := net.SplitHostPort(*myAddr)
if err != nil {
log.Fatalf("failed to parse local address (%q): %v", *myAddr, err)
}
sock, err := net.Listen("tcp", fmt.Sprintf(":%s", port))
if err != nil {
log.Fatalf("failed to listen: %v", err)
}
wt := &wordTracker{}
r, tm, err := NewRaft(ctx, *raftId, *myAddr, wt)
if err != nil {
log.Fatalf("failed to start raft: %v", err)
}
s := grpc.NewServer()
pb.RegisterExampleServer(s, &rpcInterface{
wordTracker: wt,
raft: r,
})
tm.Register(s)
leaderhealth.Setup(r, s, []string{"Example"})
raftadmin.Register(s, r)
reflection.Register(s)
if err := s.Serve(sock); err != nil {
log.Fatalf("failed to serve: %v", err)
}
}
func NewRaft(ctx context.Context, myID, myAddress string, fsm raft.FSM) (*raft.Raft, *transport.Manager, error) {
c := raft.DefaultConfig()
c.LocalID = raft.ServerID(myID)
baseDir := filepath.Join(*raftDir, myID)
ldb, err := boltdb.NewBoltStore(filepath.Join(baseDir, "logs.dat"))
if err != nil {
return nil, nil, fmt.Errorf(`boltdb.NewBoltStore(%q): %v`, filepath.Join(baseDir, "logs.dat"), err)
}
sdb, err := boltdb.NewBoltStore(filepath.Join(baseDir, "stable.dat"))
if err != nil {
return nil, nil, fmt.Errorf(`boltdb.NewBoltStore(%q): %v`, filepath.Join(baseDir, "stable.dat"), err)
}
fss, err := raft.NewFileSnapshotStore(baseDir, 3, os.Stderr)
if err != nil {
return nil, nil, fmt.Errorf(`raft.NewFileSnapshotStore(%q, ...): %v`, baseDir, err)
}
tm := transport.New(raft.ServerAddress(myAddress), []grpc.DialOption{grpc.WithInsecure()})
r, err := raft.NewRaft(c, fsm, ldb, sdb, fss, tm.Transport())
if err != nil {
return nil, nil, fmt.Errorf("raft.NewRaft: %v", err)
}
if *raftBootstrap {
cfg := raft.Configuration{
Servers: []raft.Server{
{
Suffrage: raft.Voter,
ID: raft.ServerID(myID),
Address: raft.ServerAddress(myAddress),
},
},
}
f := r.BootstrapCluster(cfg)
if err := f.Error(); err != nil {
return nil, nil, fmt.Errorf("raft.Raft.BootstrapCluster: %v", err)
}
}
return r, tm, nil
}
|
application.go:
package main
import (
"context"
"fmt"
"io"
"io/ioutil"
"strings"
"sync"
"time"
pb "github.com/Jille/raft-grpc-example/proto"
"github.com/Jille/raft-grpc-leader-rpc/rafterrors"
"github.com/hashicorp/raft"
)
// wordTracker keeps track of the three longest words it ever saw.
type wordTracker struct {
mtx sync.RWMutex
words [3]string
}
var _ raft.FSM = &wordTracker{}
// compareWords returns true if a is longer (lexicography breaking ties).
func compareWords(a, b string) bool {
if len(a) == len(b) {
return a < b
}
return len(a) > len(b)
}
func cloneWords(words [3]string) []string {
var ret [3]string
copy(ret[:], words[:])
return ret[:]
}
func (f *wordTracker) Apply(l *raft.Log) interface{} {
f.mtx.Lock()
defer f.mtx.Unlock()
w := string(l.Data)
for i := 0; i < len(f.words); i++ {
if compareWords(w, f.words[i]) {
copy(f.words[i+1:], f.words[i:])
f.words[i] = w
break
}
}
return nil
}
func (f *wordTracker) Snapshot() (raft.FSMSnapshot, error) {
// Make sure that any future calls to f.Apply() don't change the snapshot.
return &snapshot{cloneWords(f.words)}, nil
}
func (f *wordTracker) Restore(r io.ReadCloser) error {
b, err := ioutil.ReadAll(r)
if err != nil {
return err
}
words := strings.Split(string(b), "\n")
copy(f.words[:], words)
return nil
}
type snapshot struct {
words []string
}
func (s *snapshot) Persist(sink raft.SnapshotSink) error {
_, err := sink.Write([]byte(strings.Join(s.words, "\n")))
if err != nil {
sink.Cancel()
return fmt.Errorf("sink.Write(): %v", err)
}
return sink.Close()
}
func (s *snapshot) Release() {
}
type rpcInterface struct {
wordTracker *wordTracker
raft *raft.Raft
}
func (r rpcInterface) AddWord(ctx context.Context, req *pb.AddWordRequest) (*pb.AddWordResponse, error) {
f := r.raft.Apply([]byte(req.GetWord()), time.Second)
if err := f.Error(); err != nil {
return nil, rafterrors.MarkRetriable(err)
}
return &pb.AddWordResponse{
CommitIndex: f.Index(),
}, nil
}
func (r rpcInterface) GetWords(ctx context.Context, req *pb.GetWordsRequest) (*pb.GetWordsResponse, error) {
r.wordTracker.mtx.RLock()
defer r.wordTracker.mtx.RUnlock()
return &pb.GetWordsResponse{
BestWords: cloneWords(r.wordTracker.words),
ReadAtIndex: r.raft.AppliedIndex(),
}, nil
}
|
启动raft集群:
启动raft节点:
./raft-cli --raft_bootstrap --raft_id=nodeA --address=localhost:50051 --raft_data_dir /tmp/my-raft-cluster --log_file ./logs/node_a.log & ./raft-cli --raft_id=nodeB --address=localhost:50052 --raft_data_dir /tmp/my-raft-cluster --log_file ./logs/node_b.log & ./raft-cli --raft_id=nodeC --address=localhost:50053 --raft_data_dir /tmp/my-raft-cluster --log_file ./logs/node_c.log & |
组件raft三节点的集群:
./raftadmin localhost:50051 add_voter nodeB localhost:50052 0 ./raftadmin --leader multi:///localhost:50051,localhost:50052 add_voter nodeC localhost:50053 0 |
Wed Mar 2 11:41:11 CST 2022 ./raftadmin localhost:50051 add_voter nodeB localhost:50052 0 2022/03/02 11:41:12 Invoking AddVoter(id: "nodeB" address: "localhost:50052" ) 2022/03/02 11:41:12 Response: operation_token: "803448781d455d7b6e5f03bb9e87c589cafc28c9" 2022/03/02 11:41:12 Invoking Await(operation_token: "803448781d455d7b6e5f03bb9e87c589cafc28c9" ) 2022/03/02 11:41:12 Response: index: 3 Wed Mar 2 11:41:12 CST 2022 ./raftadmin --leader multi:///localhost:50051,localhost:50052 add_voter nodeC localhost:50053 0 2022/03/02 11:41:12 Invoking AddVoter(id: "nodeC" address: "localhost:50053" ) 2022/03/02 11:41:12 Response: operation_token: "9c04ea46a003eef734e00b3568768a3761242050" 2022/03/02 11:41:12 Invoking Await(operation_token: "9c04ea46a003eef734e00b3568768a3761242050" ) 2022/03/02 11:41:12 Response: index: 4 |
查看集群信息:
Wed Mar 2 11:41:36 CST 2022 ./raftadmin --leader multi:///localhost:50051,localhost:50052,localhost:50053 get_configuration
2022/03/02 11:41:37 Invoking GetConfiguration()
2022/03/02 11:41:37 Response: servers: {
id: "nodeA"
address: "localhost:50051"
}
servers: {
id: "nodeB"
address: "localhost:50052"
}
servers: {
id: "nodeC"
address: "localhost:50053"
}
Wed Mar 2 11:41:37 CST 2022 ./raftadmin --leader multi:///localhost:50051,localhost:50052,localhost:50053 leader
2022/03/02 11:41:37 Invoking Leader()
2022/03/02 11:41:37 Response: address: "localhost:50051"
|
查看主从切换过程:
杀掉当前leader nodeA, 查看选主过程及新主的日志
nodeB日志:
user log file = ./logs/node_b.log
2022-03-02T11:41:04.237+0800 [INFO] raft: initial configuration: index=0 servers=[]
2022-03-02T11:41:04.238+0800 [INFO] raft: entering follower state: follower="Node at localhost:50052 [Follower]" leader=
2022-03-02T11:41:05.735+0800 [WARN] raft: no known peers, aborting election
2022-03-02T11:41:12.012+0800 [WARN] raft: failed to get previous log: previous-index=3 last-index=0 error="log not found"
2022-03-02T11:41:23.593+0800 [WARN] raft: heartbeat timeout reached, starting election: last-leader=localhost:50051
2022-03-02T11:41:23.594+0800 [INFO] raft: entering candidate state: node="Node at localhost:50052 [Candidate]" term=3
2022-03-02T11:41:23.595+0800 [DEBUG] raft: votes: needed=2
2022-03-02T11:41:23.595+0800 [DEBUG] raft: vote granted: from=nodeB term=3 tally=1
2022-03-02T11:41:23.596+0800 [ERROR] raft: failed to make requestVote RPC: target="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:24.242+0800 [INFO] raft: duplicate requestVote for same term: term=3
2022-03-02T11:41:25.040+0800 [WARN] raft: Election timeout reached, restarting election
2022-03-02T11:41:25.041+0800 [INFO] raft: entering candidate state: node="Node at localhost:50052 [Candidate]" term=4
2022-03-02T11:41:25.042+0800 [DEBUG] raft: votes: needed=2
2022-03-02T11:41:25.042+0800 [DEBUG] raft: vote granted: from=nodeB term=4 tally=1
2022-03-02T11:41:25.042+0800 [ERROR] raft: failed to make requestVote RPC: target="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.043+0800 [DEBUG] raft: vote granted: from=nodeC term=4 tally=2
2022-03-02T11:41:25.044+0800 [INFO] raft: election won: tally=2
2022-03-02T11:41:25.044+0800 [INFO] raft: entering leader state: leader="Node at localhost:50052 [Leader]"
2022-03-02T11:41:25.044+0800 [INFO] raft: added peer, starting replication: peer=nodeA
2022-03-02T11:41:25.044+0800 [INFO] raft: added peer, starting replication: peer=nodeC
2022-03-02T11:41:25.045+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.045+0800 [INFO] raft: pipelining replication: peer="{Voter nodeC localhost:50053}"
2022-03-02T11:41:25.057+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.134+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.211+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.227+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.317+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.382+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.468+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.546+0800 [WARN] raft: failed to contact: server-id=nodeA time=501.466164ms
2022-03-02T11:41:25.549+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.702+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.738+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.957+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:26.003+0800 [WARN] raft: failed to contact: server-id=nodeA time=958.706587ms
2022-03-02T11:41:26.113+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:26.166+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:26.478+0800 [WARN] raft: failed to contact: server-id=nodeA time=1.433317227s
2022-03-02T11:41:26.489+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:26.838+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:26.943+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=1.898614989s
2022-03-02T11:41:26.977+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:27.428+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=2.383548498s
2022-03-02T11:41:27.751+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:27.841+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=2.79708776s
2022-03-02T11:41:28.207+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:28.310+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=3.265780605s
2022-03-02T11:41:28.761+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=3.717052776s
2022-03-02T11:41:29.210+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:29.223+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=4.178659765s
2022-03-02T11:41:29.707+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=4.662458131s
2022-03-02T11:41:30.196+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=5.152043059s
2022-03-02T11:41:30.670+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=5.625986263s
2022-03-02T11:41:30.822+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:31.158+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=6.113560034s
2022-03-02T11:41:31.639+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=6.594738853s
2022-03-02T11:41:31.912+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:32.095+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=7.050420391s
2022-03-02T11:41:32.568+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=7.52402893s
2022-03-02T11:41:33.047+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=8.002913832s
2022-03-02T11:41:33.230+0800 [WARN] raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052
2022-03-02T11:41:33.520+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=8.476139203s
2022-03-02T11:41:33.958+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=8.913755378s
2022-03-02T11:41:34.422+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=9.377328512s
2022-03-02T11:41:34.550+0800 [WARN] raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052
2022-03-02T11:41:34.897+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=9.852982432s
2022-03-02T11:41:35.374+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=10.329970354s
2022-03-02T11:41:35.873+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=10.828381275s
2022-03-02T11:41:36.001+0800 [ERROR] raft: peer has newer term, stopping replication: peer="{Voter nodeA localhost:50051}"
2022-03-02T11:41:36.003+0800 [INFO] raft: entering follower state: follower="Node at localhost:50052 [Follower]" leader=
2022-03-02T11:41:36.003+0800 [INFO] raft: aborting pipeline replication: peer="{Voter nodeC localhost:50053}"
2022-03-02T11:41:36.281+0800 [DEBUG] raft: lost leadership because received a requestVote with a newer term
2022-03-02T11:41:36.282+0800 [WARN] raft: rejecting vote request since our last term is greater: candidate=localhost:50051 last-term=4 last-candidate-term=2
2022-03-02T11:41:37.363+0800 [DEBUG] raft: lost leadership because received a requestVote with a newer term
2022-03-02T11:41:37.364+0800 [WARN] raft: rejecting vote request since our last term is greater: candidate=localhost:50051 last-term=4 last-candidate-term=2
2022-03-02T11:41:37.580+0800 [WARN] raft: heartbeat timeout reached, starting election: last-leader=
2022-03-02T11:41:37.580+0800 [INFO] raft: entering candidate state: node="Node at localhost:50052 [Candidate]" term=8
2022-03-02T11:41:37.582+0800 [DEBUG] raft: votes: needed=2
2022-03-02T11:41:37.582+0800 [DEBUG] raft: vote granted: from=nodeB term=8 tally=1
2022-03-02T11:41:37.583+0800 [DEBUG] raft: vote granted: from=nodeA term=8 tally=2
2022-03-02T11:41:37.584+0800 [INFO] raft: election won: tally=2
2022-03-02T11:41:37.584+0800 [INFO] raft: entering leader state: leader="Node at localhost:50052 [Leader]"
2022-03-02T11:41:37.585+0800 [INFO] raft: added peer, starting replication: peer=nodeA
2022-03-02T11:41:37.586+0800 [INFO] raft: added peer, starting replication: peer=nodeC
2022-03-02T11:41:37.587+0800 [INFO] raft: pipelining replication: peer="{Voter nodeC localhost:50053}"
2022-03-02T11:41:37.588+0800 [WARN] raft: appendEntries rejected, sending older logs: peer="{Voter nodeA localhost:50051}" next=5
2022-03-02T11:41:37.591+0800 [INFO] raft: pipelining replication: peer="{Voter nodeA localhost:50051}"
|
nodeC日志:
user log file = ./logs/node_c.log
2022-03-02T11:41:04.234+0800 [INFO] raft: initial configuration: index=0 servers=[]
2022-03-02T11:41:04.235+0800 [INFO] raft: entering follower state: follower="Node at localhost:50053 [Follower]" leader=
2022-03-02T11:41:05.358+0800 [WARN] raft: no known peers, aborting election
2022-03-02T11:41:12.038+0800 [WARN] raft: failed to get previous log: previous-index=4 last-index=0 error="log not found"
2022-03-02T11:41:23.596+0800 [WARN] raft: rejecting vote request since we have a leader: from=localhost:50052 leader=localhost:50051
2022-03-02T11:41:24.236+0800 [WARN] raft: heartbeat timeout reached, starting election: last-leader=localhost:50051
2022-03-02T11:41:24.238+0800 [INFO] raft: entering candidate state: node="Node at localhost:50053 [Candidate]" term=3
2022-03-02T11:41:24.241+0800 [ERROR] raft: failed to make requestVote RPC: target="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:24.242+0800 [DEBUG] raft: votes: needed=2
2022-03-02T11:41:24.244+0800 [DEBUG] raft: vote granted: from=nodeC term=3 tally=1
2022-03-02T11:41:25.042+0800 [DEBUG] raft: lost leadership because received a requestVote with a newer term
2022-03-02T11:41:25.043+0800 [INFO] raft: entering follower state: follower="Node at localhost:50053 [Follower]" leader=
2022-03-02T11:41:33.230+0800 [WARN] raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052
2022-03-02T11:41:34.550+0800 [WARN] raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052
2022-03-02T11:41:36.281+0800 [WARN] raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052
2022-03-02T11:41:37.363+0800 [WARN] raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052
2022-03-02T11:41:37.582+0800 [DEBUG] raft: lost leadership because received a requestVote with a newer term
|
重启nodeA, 查看nodeA日志
user log file = ./logs/node_a.log
2022-03-02T11:41:04.237+0800 [INFO] raft: initial configuration: index=0 servers=[]
2022-03-02T11:41:04.237+0800 [INFO] raft: entering follower state: follower="Node at localhost:50051 [Follower]" leader=
2022-03-02T11:41:05.725+0800 [WARN] raft: heartbeat timeout reached, starting election: last-leader=
2022-03-02T11:41:05.726+0800 [INFO] raft: entering candidate state: node="Node at localhost:50051 [Candidate]" term=2
2022-03-02T11:41:05.727+0800 [DEBUG] raft: votes: needed=1
2022-03-02T11:41:05.728+0800 [DEBUG] raft: vote granted: from=nodeA term=2 tally=1
2022-03-02T11:41:05.728+0800 [INFO] raft: election won: tally=1
2022-03-02T11:41:05.728+0800 [INFO] raft: entering leader state: leader="Node at localhost:50051 [Leader]"
2022-03-02T11:41:12.002+0800 [INFO] raft: updating configuration: command=AddVoter server-id=nodeB server-addr=localhost:50052 servers="[{Suffrage:Voter ID:nodeA Address:localhost:50051} {Suffrage:Voter ID:nodeB Address:localhost:50052}]"
2022-03-02T11:41:12.008+0800 [INFO] raft: added peer, starting replication: peer=nodeB
2022-03-02T11:41:12.013+0800 [WARN] raft: appendEntries rejected, sending older logs: peer="{Voter nodeB localhost:50052}" next=1
2022-03-02T11:41:12.021+0800 [INFO] raft: pipelining replication: peer="{Voter nodeB localhost:50052}"
2022-03-02T11:41:12.027+0800 [INFO] raft: updating configuration: command=AddVoter server-id=nodeC server-addr=localhost:50053 servers="[{Suffrage:Voter ID:nodeA Address:localhost:50051} {Suffrage:Voter ID:nodeB Address:localhost:50052} {Suffrage:Voter ID:nodeC Address:localhost:50053}]"
2022-03-02T11:41:12.033+0800 [INFO] raft: added peer, starting replication: peer=nodeC
2022-03-02T11:41:12.040+0800 [WARN] raft: appendEntries rejected, sending older logs: peer="{Voter nodeC localhost:50053}" next=1
2022-03-02T11:41:12.042+0800 [INFO] raft: pipelining replication: peer="{Voter nodeC localhost:50053}"
user log file = ./logs/node_a.log
2022-03-02T11:41:32.220+0800 [INFO] raft: initial configuration: index=4 servers="[{Suffrage:Voter ID:nodeA Address:localhost:50051} {Suffrage:Voter ID:nodeB Address:localhost:50052} {Suffrage:Voter ID:nodeC Address:localhost:50053}]"
2022-03-02T11:41:32.220+0800 [INFO] raft: entering follower state: follower="Node at localhost:50051 [Follower]" leader=
2022-03-02T11:41:33.226+0800 [WARN] raft: heartbeat timeout reached, starting election: last-leader=
2022-03-02T11:41:33.227+0800 [INFO] raft: entering candidate state: node="Node at localhost:50051 [Candidate]" term=3
2022-03-02T11:41:33.228+0800 [DEBUG] raft: votes: needed=2
2022-03-02T11:41:33.229+0800 [DEBUG] raft: vote granted: from=nodeA term=3 tally=1
2022-03-02T11:41:33.231+0800 [DEBUG] raft: newer term discovered, fallback to follower
2022-03-02T11:41:33.231+0800 [INFO] raft: entering follower state: follower="Node at localhost:50051 [Follower]" leader=
2022-03-02T11:41:34.546+0800 [WARN] raft: heartbeat timeout reached, starting election: last-leader=
2022-03-02T11:41:34.548+0800 [INFO] raft: entering candidate state: node="Node at localhost:50051 [Candidate]" term=5
2022-03-02T11:41:34.549+0800 [DEBUG] raft: votes: needed=2
2022-03-02T11:41:34.550+0800 [DEBUG] raft: vote granted: from=nodeA term=5 tally=1
2022-03-02T11:41:36.274+0800 [WARN] raft: Election timeout reached, restarting election
2022-03-02T11:41:36.276+0800 [INFO] raft: entering candidate state: node="Node at localhost:50051 [Candidate]" term=6
2022-03-02T11:41:36.281+0800 [DEBUG] raft: votes: needed=2
2022-03-02T11:41:36.282+0800 [DEBUG] raft: vote granted: from=nodeA term=6 tally=1
2022-03-02T11:41:37.360+0800 [WARN] raft: Election timeout reached, restarting election
2022-03-02T11:41:37.361+0800 [INFO] raft: entering candidate state: node="Node at localhost:50051 [Candidate]" term=7
2022-03-02T11:41:37.363+0800 [DEBUG] raft: votes: needed=2
2022-03-02T11:41:37.364+0800 [DEBUG] raft: vote granted: from=nodeA term=7 tally=1
2022-03-02T11:41:37.582+0800 [DEBUG] raft: lost leadership because received a requestVote with a newer term
2022-03-02T11:41:37.583+0800 [INFO] raft: entering follower state: follower="Node at localhost:50051 [Follower]" leader=
2022-03-02T11:41:37.587+0800 [WARN] raft: failed to get previous log: previous-index=5 last-index=4 error="log not found"
|
主从切换结论:
- leader挂掉后, 两个follow自发选举出新的leader
- 新leader重复尝试与挂掉的旧主建立连接做日志复制
- 旧主重新启动后, 成为follow










