目录
摘要:
组建三节点的raft集群, 及研究主从切换的过程
raft业务代码:
main.go:
package main import ( "context" "flag" "fmt" "log" "net" "os" "path/filepath" pb "github.com/Jille/raft-grpc-example/proto" "github.com/Jille/raft-grpc-leader-rpc/leaderhealth" transport "github.com/Jille/raft-grpc-transport" "github.com/Jille/raftadmin" "github.com/hashicorp/raft" boltdb "github.com/hashicorp/raft-boltdb" "google.golang.org/grpc" "google.golang.org/grpc/reflection" ) var ( myAddr = flag.String("address", "localhost:50051", "TCP host+port for this node") raftId = flag.String("raft_id", "", "Node id used by Raft") raftDir = flag.String("raft_data_dir", "data/", "Raft data dir") raftBootstrap = flag.Bool("raft_bootstrap", false, "Whether to bootstrap the Raft cluster") logFile = flag.String("log_file", "./log", "log file") ) func main() { flag.Parse() { f, err := os.OpenFile(*logFile, os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_APPEND,0755) if nil != err { fmt.Println("ERROR: os.OpenFile fail, ", logFile) return } os.Stdout = f os.Stderr = f fmt.Println("user log file = ", *logFile) log.SetOutput(f) } if *raftId == "" { log.Fatalf("flag --raft_id is required") } ctx := context.Background() _, port, err := net.SplitHostPort(*myAddr) if err != nil { log.Fatalf("failed to parse local address (%q): %v", *myAddr, err) } sock, err := net.Listen("tcp", fmt.Sprintf(":%s", port)) if err != nil { log.Fatalf("failed to listen: %v", err) } wt := &wordTracker{} r, tm, err := NewRaft(ctx, *raftId, *myAddr, wt) if err != nil { log.Fatalf("failed to start raft: %v", err) } s := grpc.NewServer() pb.RegisterExampleServer(s, &rpcInterface{ wordTracker: wt, raft: r, }) tm.Register(s) leaderhealth.Setup(r, s, []string{"Example"}) raftadmin.Register(s, r) reflection.Register(s) if err := s.Serve(sock); err != nil { log.Fatalf("failed to serve: %v", err) } } func NewRaft(ctx context.Context, myID, myAddress string, fsm raft.FSM) (*raft.Raft, *transport.Manager, error) { c := raft.DefaultConfig() c.LocalID = raft.ServerID(myID) baseDir := filepath.Join(*raftDir, myID) ldb, err := boltdb.NewBoltStore(filepath.Join(baseDir, "logs.dat")) if err != nil { return nil, nil, fmt.Errorf(`boltdb.NewBoltStore(%q): %v`, filepath.Join(baseDir, "logs.dat"), err) } sdb, err := boltdb.NewBoltStore(filepath.Join(baseDir, "stable.dat")) if err != nil { return nil, nil, fmt.Errorf(`boltdb.NewBoltStore(%q): %v`, filepath.Join(baseDir, "stable.dat"), err) } fss, err := raft.NewFileSnapshotStore(baseDir, 3, os.Stderr) if err != nil { return nil, nil, fmt.Errorf(`raft.NewFileSnapshotStore(%q, ...): %v`, baseDir, err) } tm := transport.New(raft.ServerAddress(myAddress), []grpc.DialOption{grpc.WithInsecure()}) r, err := raft.NewRaft(c, fsm, ldb, sdb, fss, tm.Transport()) if err != nil { return nil, nil, fmt.Errorf("raft.NewRaft: %v", err) } if *raftBootstrap { cfg := raft.Configuration{ Servers: []raft.Server{ { Suffrage: raft.Voter, ID: raft.ServerID(myID), Address: raft.ServerAddress(myAddress), }, }, } f := r.BootstrapCluster(cfg) if err := f.Error(); err != nil { return nil, nil, fmt.Errorf("raft.Raft.BootstrapCluster: %v", err) } } return r, tm, nil } |
application.go:
package main import ( "context" "fmt" "io" "io/ioutil" "strings" "sync" "time" pb "github.com/Jille/raft-grpc-example/proto" "github.com/Jille/raft-grpc-leader-rpc/rafterrors" "github.com/hashicorp/raft" ) // wordTracker keeps track of the three longest words it ever saw. type wordTracker struct { mtx sync.RWMutex words [3]string } var _ raft.FSM = &wordTracker{} // compareWords returns true if a is longer (lexicography breaking ties). func compareWords(a, b string) bool { if len(a) == len(b) { return a < b } return len(a) > len(b) } func cloneWords(words [3]string) []string { var ret [3]string copy(ret[:], words[:]) return ret[:] } func (f *wordTracker) Apply(l *raft.Log) interface{} { f.mtx.Lock() defer f.mtx.Unlock() w := string(l.Data) for i := 0; i < len(f.words); i++ { if compareWords(w, f.words[i]) { copy(f.words[i+1:], f.words[i:]) f.words[i] = w break } } return nil } func (f *wordTracker) Snapshot() (raft.FSMSnapshot, error) { // Make sure that any future calls to f.Apply() don't change the snapshot. return &snapshot{cloneWords(f.words)}, nil } func (f *wordTracker) Restore(r io.ReadCloser) error { b, err := ioutil.ReadAll(r) if err != nil { return err } words := strings.Split(string(b), "\n") copy(f.words[:], words) return nil } type snapshot struct { words []string } func (s *snapshot) Persist(sink raft.SnapshotSink) error { _, err := sink.Write([]byte(strings.Join(s.words, "\n"))) if err != nil { sink.Cancel() return fmt.Errorf("sink.Write(): %v", err) } return sink.Close() } func (s *snapshot) Release() { } type rpcInterface struct { wordTracker *wordTracker raft *raft.Raft } func (r rpcInterface) AddWord(ctx context.Context, req *pb.AddWordRequest) (*pb.AddWordResponse, error) { f := r.raft.Apply([]byte(req.GetWord()), time.Second) if err := f.Error(); err != nil { return nil, rafterrors.MarkRetriable(err) } return &pb.AddWordResponse{ CommitIndex: f.Index(), }, nil } func (r rpcInterface) GetWords(ctx context.Context, req *pb.GetWordsRequest) (*pb.GetWordsResponse, error) { r.wordTracker.mtx.RLock() defer r.wordTracker.mtx.RUnlock() return &pb.GetWordsResponse{ BestWords: cloneWords(r.wordTracker.words), ReadAtIndex: r.raft.AppliedIndex(), }, nil } |
启动raft集群:
启动raft节点:
./raft-cli --raft_bootstrap --raft_id=nodeA --address=localhost:50051 --raft_data_dir /tmp/my-raft-cluster --log_file ./logs/node_a.log & ./raft-cli --raft_id=nodeB --address=localhost:50052 --raft_data_dir /tmp/my-raft-cluster --log_file ./logs/node_b.log & ./raft-cli --raft_id=nodeC --address=localhost:50053 --raft_data_dir /tmp/my-raft-cluster --log_file ./logs/node_c.log & |
组件raft三节点的集群:
./raftadmin localhost:50051 add_voter nodeB localhost:50052 0 ./raftadmin --leader multi:///localhost:50051,localhost:50052 add_voter nodeC localhost:50053 0 |
Wed Mar 2 11:41:11 CST 2022 ./raftadmin localhost:50051 add_voter nodeB localhost:50052 0 2022/03/02 11:41:12 Invoking AddVoter(id: "nodeB" address: "localhost:50052" ) 2022/03/02 11:41:12 Response: operation_token: "803448781d455d7b6e5f03bb9e87c589cafc28c9" 2022/03/02 11:41:12 Invoking Await(operation_token: "803448781d455d7b6e5f03bb9e87c589cafc28c9" ) 2022/03/02 11:41:12 Response: index: 3 Wed Mar 2 11:41:12 CST 2022 ./raftadmin --leader multi:///localhost:50051,localhost:50052 add_voter nodeC localhost:50053 0 2022/03/02 11:41:12 Invoking AddVoter(id: "nodeC" address: "localhost:50053" ) 2022/03/02 11:41:12 Response: operation_token: "9c04ea46a003eef734e00b3568768a3761242050" 2022/03/02 11:41:12 Invoking Await(operation_token: "9c04ea46a003eef734e00b3568768a3761242050" ) 2022/03/02 11:41:12 Response: index: 4 |
查看集群信息:
Wed Mar 2 11:41:36 CST 2022 ./raftadmin --leader multi:///localhost:50051,localhost:50052,localhost:50053 get_configuration 2022/03/02 11:41:37 Invoking GetConfiguration() 2022/03/02 11:41:37 Response: servers: { id: "nodeA" address: "localhost:50051" } servers: { id: "nodeB" address: "localhost:50052" } servers: { id: "nodeC" address: "localhost:50053" } Wed Mar 2 11:41:37 CST 2022 ./raftadmin --leader multi:///localhost:50051,localhost:50052,localhost:50053 leader 2022/03/02 11:41:37 Invoking Leader() 2022/03/02 11:41:37 Response: address: "localhost:50051" |
查看主从切换过程:
杀掉当前leader nodeA, 查看选主过程及新主的日志
nodeB日志:
user log file = ./logs/node_b.log 2022-03-02T11:41:04.237+0800 [INFO] raft: initial configuration: index=0 servers=[] 2022-03-02T11:41:04.238+0800 [INFO] raft: entering follower state: follower="Node at localhost:50052 [Follower]" leader= 2022-03-02T11:41:05.735+0800 [WARN] raft: no known peers, aborting election 2022-03-02T11:41:12.012+0800 [WARN] raft: failed to get previous log: previous-index=3 last-index=0 error="log not found" 2022-03-02T11:41:23.593+0800 [WARN] raft: heartbeat timeout reached, starting election: last-leader=localhost:50051 2022-03-02T11:41:23.594+0800 [INFO] raft: entering candidate state: node="Node at localhost:50052 [Candidate]" term=3 2022-03-02T11:41:23.595+0800 [DEBUG] raft: votes: needed=2 2022-03-02T11:41:23.595+0800 [DEBUG] raft: vote granted: from=nodeB term=3 tally=1 2022-03-02T11:41:23.596+0800 [ERROR] raft: failed to make requestVote RPC: target="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:24.242+0800 [INFO] raft: duplicate requestVote for same term: term=3 2022-03-02T11:41:25.040+0800 [WARN] raft: Election timeout reached, restarting election 2022-03-02T11:41:25.041+0800 [INFO] raft: entering candidate state: node="Node at localhost:50052 [Candidate]" term=4 2022-03-02T11:41:25.042+0800 [DEBUG] raft: votes: needed=2 2022-03-02T11:41:25.042+0800 [DEBUG] raft: vote granted: from=nodeB term=4 tally=1 2022-03-02T11:41:25.042+0800 [ERROR] raft: failed to make requestVote RPC: target="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:25.043+0800 [DEBUG] raft: vote granted: from=nodeC term=4 tally=2 2022-03-02T11:41:25.044+0800 [INFO] raft: election won: tally=2 2022-03-02T11:41:25.044+0800 [INFO] raft: entering leader state: leader="Node at localhost:50052 [Leader]" 2022-03-02T11:41:25.044+0800 [INFO] raft: added peer, starting replication: peer=nodeA 2022-03-02T11:41:25.044+0800 [INFO] raft: added peer, starting replication: peer=nodeC 2022-03-02T11:41:25.045+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:25.045+0800 [INFO] raft: pipelining replication: peer="{Voter nodeC localhost:50053}" 2022-03-02T11:41:25.057+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:25.134+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:25.211+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:25.227+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:25.317+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:25.382+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:25.468+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:25.546+0800 [WARN] raft: failed to contact: server-id=nodeA time=501.466164ms 2022-03-02T11:41:25.549+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:25.702+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:25.738+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:25.957+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:26.003+0800 [WARN] raft: failed to contact: server-id=nodeA time=958.706587ms 2022-03-02T11:41:26.113+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:26.166+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:26.478+0800 [WARN] raft: failed to contact: server-id=nodeA time=1.433317227s 2022-03-02T11:41:26.489+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:26.838+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:26.943+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=1.898614989s 2022-03-02T11:41:26.977+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:27.428+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=2.383548498s 2022-03-02T11:41:27.751+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:27.841+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=2.79708776s 2022-03-02T11:41:28.207+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:28.310+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=3.265780605s 2022-03-02T11:41:28.761+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=3.717052776s 2022-03-02T11:41:29.210+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:29.223+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=4.178659765s 2022-03-02T11:41:29.707+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=4.662458131s 2022-03-02T11:41:30.196+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=5.152043059s 2022-03-02T11:41:30.670+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=5.625986263s 2022-03-02T11:41:30.822+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:31.158+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=6.113560034s 2022-03-02T11:41:31.639+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=6.594738853s 2022-03-02T11:41:31.912+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:32.095+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=7.050420391s 2022-03-02T11:41:32.568+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=7.52402893s 2022-03-02T11:41:33.047+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=8.002913832s 2022-03-02T11:41:33.230+0800 [WARN] raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052 2022-03-02T11:41:33.520+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=8.476139203s 2022-03-02T11:41:33.958+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=8.913755378s 2022-03-02T11:41:34.422+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=9.377328512s 2022-03-02T11:41:34.550+0800 [WARN] raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052 2022-03-02T11:41:34.897+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=9.852982432s 2022-03-02T11:41:35.374+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=10.329970354s 2022-03-02T11:41:35.873+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=10.828381275s 2022-03-02T11:41:36.001+0800 [ERROR] raft: peer has newer term, stopping replication: peer="{Voter nodeA localhost:50051}" 2022-03-02T11:41:36.003+0800 [INFO] raft: entering follower state: follower="Node at localhost:50052 [Follower]" leader= 2022-03-02T11:41:36.003+0800 [INFO] raft: aborting pipeline replication: peer="{Voter nodeC localhost:50053}" 2022-03-02T11:41:36.281+0800 [DEBUG] raft: lost leadership because received a requestVote with a newer term 2022-03-02T11:41:36.282+0800 [WARN] raft: rejecting vote request since our last term is greater: candidate=localhost:50051 last-term=4 last-candidate-term=2 2022-03-02T11:41:37.363+0800 [DEBUG] raft: lost leadership because received a requestVote with a newer term 2022-03-02T11:41:37.364+0800 [WARN] raft: rejecting vote request since our last term is greater: candidate=localhost:50051 last-term=4 last-candidate-term=2 2022-03-02T11:41:37.580+0800 [WARN] raft: heartbeat timeout reached, starting election: last-leader= 2022-03-02T11:41:37.580+0800 [INFO] raft: entering candidate state: node="Node at localhost:50052 [Candidate]" term=8 2022-03-02T11:41:37.582+0800 [DEBUG] raft: votes: needed=2 2022-03-02T11:41:37.582+0800 [DEBUG] raft: vote granted: from=nodeB term=8 tally=1 2022-03-02T11:41:37.583+0800 [DEBUG] raft: vote granted: from=nodeA term=8 tally=2 2022-03-02T11:41:37.584+0800 [INFO] raft: election won: tally=2 2022-03-02T11:41:37.584+0800 [INFO] raft: entering leader state: leader="Node at localhost:50052 [Leader]" 2022-03-02T11:41:37.585+0800 [INFO] raft: added peer, starting replication: peer=nodeA 2022-03-02T11:41:37.586+0800 [INFO] raft: added peer, starting replication: peer=nodeC 2022-03-02T11:41:37.587+0800 [INFO] raft: pipelining replication: peer="{Voter nodeC localhost:50053}" 2022-03-02T11:41:37.588+0800 [WARN] raft: appendEntries rejected, sending older logs: peer="{Voter nodeA localhost:50051}" next=5 2022-03-02T11:41:37.591+0800 [INFO] raft: pipelining replication: peer="{Voter nodeA localhost:50051}" |
nodeC日志:
user log file = ./logs/node_c.log 2022-03-02T11:41:04.234+0800 [INFO] raft: initial configuration: index=0 servers=[] 2022-03-02T11:41:04.235+0800 [INFO] raft: entering follower state: follower="Node at localhost:50053 [Follower]" leader= 2022-03-02T11:41:05.358+0800 [WARN] raft: no known peers, aborting election 2022-03-02T11:41:12.038+0800 [WARN] raft: failed to get previous log: previous-index=4 last-index=0 error="log not found" 2022-03-02T11:41:23.596+0800 [WARN] raft: rejecting vote request since we have a leader: from=localhost:50052 leader=localhost:50051 2022-03-02T11:41:24.236+0800 [WARN] raft: heartbeat timeout reached, starting election: last-leader=localhost:50051 2022-03-02T11:41:24.238+0800 [INFO] raft: entering candidate state: node="Node at localhost:50053 [Candidate]" term=3 2022-03-02T11:41:24.241+0800 [ERROR] raft: failed to make requestVote RPC: target="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused"" 2022-03-02T11:41:24.242+0800 [DEBUG] raft: votes: needed=2 2022-03-02T11:41:24.244+0800 [DEBUG] raft: vote granted: from=nodeC term=3 tally=1 2022-03-02T11:41:25.042+0800 [DEBUG] raft: lost leadership because received a requestVote with a newer term 2022-03-02T11:41:25.043+0800 [INFO] raft: entering follower state: follower="Node at localhost:50053 [Follower]" leader= 2022-03-02T11:41:33.230+0800 [WARN] raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052 2022-03-02T11:41:34.550+0800 [WARN] raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052 2022-03-02T11:41:36.281+0800 [WARN] raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052 2022-03-02T11:41:37.363+0800 [WARN] raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052 2022-03-02T11:41:37.582+0800 [DEBUG] raft: lost leadership because received a requestVote with a newer term |
重启nodeA, 查看nodeA日志
user log file = ./logs/node_a.log 2022-03-02T11:41:04.237+0800 [INFO] raft: initial configuration: index=0 servers=[] 2022-03-02T11:41:04.237+0800 [INFO] raft: entering follower state: follower="Node at localhost:50051 [Follower]" leader= 2022-03-02T11:41:05.725+0800 [WARN] raft: heartbeat timeout reached, starting election: last-leader= 2022-03-02T11:41:05.726+0800 [INFO] raft: entering candidate state: node="Node at localhost:50051 [Candidate]" term=2 2022-03-02T11:41:05.727+0800 [DEBUG] raft: votes: needed=1 2022-03-02T11:41:05.728+0800 [DEBUG] raft: vote granted: from=nodeA term=2 tally=1 2022-03-02T11:41:05.728+0800 [INFO] raft: election won: tally=1 2022-03-02T11:41:05.728+0800 [INFO] raft: entering leader state: leader="Node at localhost:50051 [Leader]" 2022-03-02T11:41:12.002+0800 [INFO] raft: updating configuration: command=AddVoter server-id=nodeB server-addr=localhost:50052 servers="[{Suffrage:Voter ID:nodeA Address:localhost:50051} {Suffrage:Voter ID:nodeB Address:localhost:50052}]" 2022-03-02T11:41:12.008+0800 [INFO] raft: added peer, starting replication: peer=nodeB 2022-03-02T11:41:12.013+0800 [WARN] raft: appendEntries rejected, sending older logs: peer="{Voter nodeB localhost:50052}" next=1 2022-03-02T11:41:12.021+0800 [INFO] raft: pipelining replication: peer="{Voter nodeB localhost:50052}" 2022-03-02T11:41:12.027+0800 [INFO] raft: updating configuration: command=AddVoter server-id=nodeC server-addr=localhost:50053 servers="[{Suffrage:Voter ID:nodeA Address:localhost:50051} {Suffrage:Voter ID:nodeB Address:localhost:50052} {Suffrage:Voter ID:nodeC Address:localhost:50053}]" 2022-03-02T11:41:12.033+0800 [INFO] raft: added peer, starting replication: peer=nodeC 2022-03-02T11:41:12.040+0800 [WARN] raft: appendEntries rejected, sending older logs: peer="{Voter nodeC localhost:50053}" next=1 2022-03-02T11:41:12.042+0800 [INFO] raft: pipelining replication: peer="{Voter nodeC localhost:50053}" user log file = ./logs/node_a.log 2022-03-02T11:41:32.220+0800 [INFO] raft: initial configuration: index=4 servers="[{Suffrage:Voter ID:nodeA Address:localhost:50051} {Suffrage:Voter ID:nodeB Address:localhost:50052} {Suffrage:Voter ID:nodeC Address:localhost:50053}]" 2022-03-02T11:41:32.220+0800 [INFO] raft: entering follower state: follower="Node at localhost:50051 [Follower]" leader= 2022-03-02T11:41:33.226+0800 [WARN] raft: heartbeat timeout reached, starting election: last-leader= 2022-03-02T11:41:33.227+0800 [INFO] raft: entering candidate state: node="Node at localhost:50051 [Candidate]" term=3 2022-03-02T11:41:33.228+0800 [DEBUG] raft: votes: needed=2 2022-03-02T11:41:33.229+0800 [DEBUG] raft: vote granted: from=nodeA term=3 tally=1 2022-03-02T11:41:33.231+0800 [DEBUG] raft: newer term discovered, fallback to follower 2022-03-02T11:41:33.231+0800 [INFO] raft: entering follower state: follower="Node at localhost:50051 [Follower]" leader= 2022-03-02T11:41:34.546+0800 [WARN] raft: heartbeat timeout reached, starting election: last-leader= 2022-03-02T11:41:34.548+0800 [INFO] raft: entering candidate state: node="Node at localhost:50051 [Candidate]" term=5 2022-03-02T11:41:34.549+0800 [DEBUG] raft: votes: needed=2 2022-03-02T11:41:34.550+0800 [DEBUG] raft: vote granted: from=nodeA term=5 tally=1 2022-03-02T11:41:36.274+0800 [WARN] raft: Election timeout reached, restarting election 2022-03-02T11:41:36.276+0800 [INFO] raft: entering candidate state: node="Node at localhost:50051 [Candidate]" term=6 2022-03-02T11:41:36.281+0800 [DEBUG] raft: votes: needed=2 2022-03-02T11:41:36.282+0800 [DEBUG] raft: vote granted: from=nodeA term=6 tally=1 2022-03-02T11:41:37.360+0800 [WARN] raft: Election timeout reached, restarting election 2022-03-02T11:41:37.361+0800 [INFO] raft: entering candidate state: node="Node at localhost:50051 [Candidate]" term=7 2022-03-02T11:41:37.363+0800 [DEBUG] raft: votes: needed=2 2022-03-02T11:41:37.364+0800 [DEBUG] raft: vote granted: from=nodeA term=7 tally=1 2022-03-02T11:41:37.582+0800 [DEBUG] raft: lost leadership because received a requestVote with a newer term 2022-03-02T11:41:37.583+0800 [INFO] raft: entering follower state: follower="Node at localhost:50051 [Follower]" leader= 2022-03-02T11:41:37.587+0800 [WARN] raft: failed to get previous log: previous-index=5 last-index=4 error="log not found" |
主从切换结论:
- leader挂掉后, 两个follow自发选举出新的leader
- 新leader重复尝试与挂掉的旧主建立连接做日志复制
- 旧主重新启动后, 成为follow