0
点赞
收藏
分享

微信扫一扫

2022-03-02 组建raft集群实战及主从切换调研

张宏涛心理 2022-03-11 阅读 35

目录

摘要:

raft业务代码:

main.go:

application.go:

启动raft集群:

启动raft节点:

组件raft三节点的集群:

查看集群信息:

查看主从切换过程:

nodeB日志:

nodeC日志:

重启nodeA, 查看nodeA日志

主从切换结论:


摘要:

组建三节点的raft集群, 及研究主从切换的过程

raft业务代码:

main.go:

package main

import (
	"context"
	"flag"
	"fmt"
	"log"
	"net"
	"os"
	"path/filepath"

	pb "github.com/Jille/raft-grpc-example/proto"
	"github.com/Jille/raft-grpc-leader-rpc/leaderhealth"
	transport "github.com/Jille/raft-grpc-transport"
	"github.com/Jille/raftadmin"
	"github.com/hashicorp/raft"
	boltdb "github.com/hashicorp/raft-boltdb"
	"google.golang.org/grpc"
	"google.golang.org/grpc/reflection"

)

var (
	myAddr = flag.String("address", "localhost:50051", "TCP host+port for this node")
	raftId = flag.String("raft_id", "", "Node id used by Raft")

	raftDir       = flag.String("raft_data_dir", "data/", "Raft data dir")
	raftBootstrap = flag.Bool("raft_bootstrap", false, "Whether to bootstrap the Raft cluster")

	logFile       = flag.String("log_file", "./log", "log file")

)

func main() {

	flag.Parse()

	{
		f, err := os.OpenFile(*logFile, os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_APPEND,0755)
		if nil != err {
			fmt.Println("ERROR: os.OpenFile fail, ", logFile)
			return
		}

		os.Stdout = f
		os.Stderr = f
		fmt.Println("user log file = ", *logFile)
		log.SetOutput(f)
	}

	if *raftId == "" {
		log.Fatalf("flag --raft_id is required")
	}

	ctx := context.Background()
	_, port, err := net.SplitHostPort(*myAddr)
	if err != nil {
		log.Fatalf("failed to parse local address (%q): %v", *myAddr, err)
	}
	sock, err := net.Listen("tcp", fmt.Sprintf(":%s", port))
	if err != nil {
		log.Fatalf("failed to listen: %v", err)
	}

	wt := &wordTracker{}

	r, tm, err := NewRaft(ctx, *raftId, *myAddr, wt)
	if err != nil {
		log.Fatalf("failed to start raft: %v", err)
	}
	s := grpc.NewServer()
	pb.RegisterExampleServer(s, &rpcInterface{
		wordTracker: wt,
		raft:        r,
	})
	tm.Register(s)
	leaderhealth.Setup(r, s, []string{"Example"})
	raftadmin.Register(s, r)
	reflection.Register(s)
	if err := s.Serve(sock); err != nil {
		log.Fatalf("failed to serve: %v", err)
	}
}

func NewRaft(ctx context.Context, myID, myAddress string, fsm raft.FSM) (*raft.Raft, *transport.Manager, error) {
	c := raft.DefaultConfig()
	c.LocalID = raft.ServerID(myID)

	baseDir := filepath.Join(*raftDir, myID)

	ldb, err := boltdb.NewBoltStore(filepath.Join(baseDir, "logs.dat"))
	if err != nil {
		return nil, nil, fmt.Errorf(`boltdb.NewBoltStore(%q): %v`, filepath.Join(baseDir, "logs.dat"), err)
	}

	sdb, err := boltdb.NewBoltStore(filepath.Join(baseDir, "stable.dat"))
	if err != nil {
		return nil, nil, fmt.Errorf(`boltdb.NewBoltStore(%q): %v`, filepath.Join(baseDir, "stable.dat"), err)
	}

	fss, err := raft.NewFileSnapshotStore(baseDir, 3, os.Stderr)
	if err != nil {
		return nil, nil, fmt.Errorf(`raft.NewFileSnapshotStore(%q, ...): %v`, baseDir, err)
	}

	tm := transport.New(raft.ServerAddress(myAddress), []grpc.DialOption{grpc.WithInsecure()})

	r, err := raft.NewRaft(c, fsm, ldb, sdb, fss, tm.Transport())
	if err != nil {
		return nil, nil, fmt.Errorf("raft.NewRaft: %v", err)
	}

	if *raftBootstrap {
		cfg := raft.Configuration{
			Servers: []raft.Server{
				{
					Suffrage: raft.Voter,
					ID:       raft.ServerID(myID),
					Address:  raft.ServerAddress(myAddress),
				},
			},
		}
		f := r.BootstrapCluster(cfg)
		if err := f.Error(); err != nil {
			return nil, nil, fmt.Errorf("raft.Raft.BootstrapCluster: %v", err)
		}
	}

	return r, tm, nil
}

application.go:

package main

import (
	"context"
	"fmt"
	"io"
	"io/ioutil"
	"strings"
	"sync"
	"time"

	pb "github.com/Jille/raft-grpc-example/proto"
	"github.com/Jille/raft-grpc-leader-rpc/rafterrors"
	"github.com/hashicorp/raft"
)

// wordTracker keeps track of the three longest words it ever saw.
type wordTracker struct {
	mtx   sync.RWMutex
	words [3]string
}

var _ raft.FSM = &wordTracker{}

// compareWords returns true if a is longer (lexicography breaking ties).
func compareWords(a, b string) bool {
	if len(a) == len(b) {
		return a < b
	}
	return len(a) > len(b)
}

func cloneWords(words [3]string) []string {
	var ret [3]string
	copy(ret[:], words[:])
	return ret[:]
}

func (f *wordTracker) Apply(l *raft.Log) interface{} {
	f.mtx.Lock()
	defer f.mtx.Unlock()
	w := string(l.Data)
	for i := 0; i < len(f.words); i++ {
		if compareWords(w, f.words[i]) {
			copy(f.words[i+1:], f.words[i:])
			f.words[i] = w
			break
		}
	}
	return nil
}

func (f *wordTracker) Snapshot() (raft.FSMSnapshot, error) {
	// Make sure that any future calls to f.Apply() don't change the snapshot.
	return &snapshot{cloneWords(f.words)}, nil
}

func (f *wordTracker) Restore(r io.ReadCloser) error {
	b, err := ioutil.ReadAll(r)
	if err != nil {
		return err
	}
	words := strings.Split(string(b), "\n")
	copy(f.words[:], words)
	return nil
}

type snapshot struct {
	words []string
}

func (s *snapshot) Persist(sink raft.SnapshotSink) error {
	_, err := sink.Write([]byte(strings.Join(s.words, "\n")))
	if err != nil {
		sink.Cancel()
		return fmt.Errorf("sink.Write(): %v", err)
	}
	return sink.Close()
}

func (s *snapshot) Release() {
}

type rpcInterface struct {
	wordTracker *wordTracker
	raft        *raft.Raft
}

func (r rpcInterface) AddWord(ctx context.Context, req *pb.AddWordRequest) (*pb.AddWordResponse, error) {
	f := r.raft.Apply([]byte(req.GetWord()), time.Second)
	if err := f.Error(); err != nil {
		return nil, rafterrors.MarkRetriable(err)
	}
	return &pb.AddWordResponse{
		CommitIndex: f.Index(),
	}, nil
}

func (r rpcInterface) GetWords(ctx context.Context, req *pb.GetWordsRequest) (*pb.GetWordsResponse, error) {
	r.wordTracker.mtx.RLock()
	defer r.wordTracker.mtx.RUnlock()
	return &pb.GetWordsResponse{
		BestWords:   cloneWords(r.wordTracker.words),
		ReadAtIndex: r.raft.AppliedIndex(),
	}, nil
}

启动raft集群:

启动raft节点:

./raft-cli --raft_bootstrap --raft_id=nodeA --address=localhost:50051 --raft_data_dir /tmp/my-raft-cluster --log_file ./logs/node_a.log &

./raft-cli --raft_id=nodeB --address=localhost:50052 --raft_data_dir /tmp/my-raft-cluster --log_file ./logs/node_b.log &

./raft-cli  --raft_id=nodeC --address=localhost:50053 --raft_data_dir /tmp/my-raft-cluster --log_file ./logs/node_c.log &

组件raft三节点的集群:

./raftadmin localhost:50051 add_voter nodeB localhost:50052 0

./raftadmin --leader multi:///localhost:50051,localhost:50052 add_voter nodeC localhost:50053 0

Wed Mar 2 11:41:11 CST 2022 ./raftadmin localhost:50051 add_voter nodeB localhost:50052 0
2022/03/02 11:41:12 Invoking AddVoter(id: "nodeB"
address: "localhost:50052"
)
2022/03/02 11:41:12 Response: operation_token: "803448781d455d7b6e5f03bb9e87c589cafc28c9"
2022/03/02 11:41:12 Invoking Await(operation_token: "803448781d455d7b6e5f03bb9e87c589cafc28c9"
)
2022/03/02 11:41:12 Response: index: 3
Wed Mar 2 11:41:12 CST 2022 ./raftadmin --leader multi:///localhost:50051,localhost:50052 add_voter nodeC localhost:50053 0
2022/03/02 11:41:12 Invoking AddVoter(id: "nodeC"
address: "localhost:50053"
)
2022/03/02 11:41:12 Response: operation_token: "9c04ea46a003eef734e00b3568768a3761242050"
2022/03/02 11:41:12 Invoking Await(operation_token: "9c04ea46a003eef734e00b3568768a3761242050"
)
2022/03/02 11:41:12 Response: index: 4

查看集群信息:

Wed Mar 2 11:41:36 CST 2022 ./raftadmin --leader multi:///localhost:50051,localhost:50052,localhost:50053 get_configuration
2022/03/02 11:41:37 Invoking GetConfiguration()
2022/03/02 11:41:37 Response: servers: {
  id: "nodeA"
  address: "localhost:50051"
}
servers: {
  id: "nodeB"
  address: "localhost:50052"
}
servers: {
  id: "nodeC"
  address: "localhost:50053"
}


Wed Mar 2 11:41:37 CST 2022 ./raftadmin --leader multi:///localhost:50051,localhost:50052,localhost:50053 leader
2022/03/02 11:41:37 Invoking Leader()
2022/03/02 11:41:37 Response: address: "localhost:50051"

查看主从切换过程:

杀掉当前leader nodeA,  查看选主过程及新主的日志

nodeB日志:

user log file =  ./logs/node_b.log
2022-03-02T11:41:04.237+0800 [INFO]  raft: initial configuration: index=0 servers=[]
2022-03-02T11:41:04.238+0800 [INFO]  raft: entering follower state: follower="Node at localhost:50052 [Follower]" leader=
2022-03-02T11:41:05.735+0800 [WARN]  raft: no known peers, aborting election
2022-03-02T11:41:12.012+0800 [WARN]  raft: failed to get previous log: previous-index=3 last-index=0 error="log not found"
2022-03-02T11:41:23.593+0800 [WARN]  raft: heartbeat timeout reached, starting election: last-leader=localhost:50051
2022-03-02T11:41:23.594+0800 [INFO]  raft: entering candidate state: node="Node at localhost:50052 [Candidate]" term=3
2022-03-02T11:41:23.595+0800 [DEBUG] raft: votes: needed=2
2022-03-02T11:41:23.595+0800 [DEBUG] raft: vote granted: from=nodeB term=3 tally=1
2022-03-02T11:41:23.596+0800 [ERROR] raft: failed to make requestVote RPC: target="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:24.242+0800 [INFO]  raft: duplicate requestVote for same term: term=3
2022-03-02T11:41:25.040+0800 [WARN]  raft: Election timeout reached, restarting election
2022-03-02T11:41:25.041+0800 [INFO]  raft: entering candidate state: node="Node at localhost:50052 [Candidate]" term=4
2022-03-02T11:41:25.042+0800 [DEBUG] raft: votes: needed=2
2022-03-02T11:41:25.042+0800 [DEBUG] raft: vote granted: from=nodeB term=4 tally=1
2022-03-02T11:41:25.042+0800 [ERROR] raft: failed to make requestVote RPC: target="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.043+0800 [DEBUG] raft: vote granted: from=nodeC term=4 tally=2
2022-03-02T11:41:25.044+0800 [INFO]  raft: election won: tally=2
2022-03-02T11:41:25.044+0800 [INFO]  raft: entering leader state: leader="Node at localhost:50052 [Leader]"
2022-03-02T11:41:25.044+0800 [INFO]  raft: added peer, starting replication: peer=nodeA
2022-03-02T11:41:25.044+0800 [INFO]  raft: added peer, starting replication: peer=nodeC
2022-03-02T11:41:25.045+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.045+0800 [INFO]  raft: pipelining replication: peer="{Voter nodeC localhost:50053}"
2022-03-02T11:41:25.057+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.134+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.211+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.227+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.317+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.382+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.468+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.546+0800 [WARN]  raft: failed to contact: server-id=nodeA time=501.466164ms
2022-03-02T11:41:25.549+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.702+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.738+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:25.957+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:26.003+0800 [WARN]  raft: failed to contact: server-id=nodeA time=958.706587ms
2022-03-02T11:41:26.113+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:26.166+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:26.478+0800 [WARN]  raft: failed to contact: server-id=nodeA time=1.433317227s
2022-03-02T11:41:26.489+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:26.838+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:26.943+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=1.898614989s
2022-03-02T11:41:26.977+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:27.428+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=2.383548498s
2022-03-02T11:41:27.751+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:27.841+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=2.79708776s
2022-03-02T11:41:28.207+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:28.310+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=3.265780605s
2022-03-02T11:41:28.761+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=3.717052776s
2022-03-02T11:41:29.210+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:29.223+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=4.178659765s
2022-03-02T11:41:29.707+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=4.662458131s
2022-03-02T11:41:30.196+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=5.152043059s
2022-03-02T11:41:30.670+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=5.625986263s
2022-03-02T11:41:30.822+0800 [ERROR] raft: failed to appendEntries to: peer="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:31.158+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=6.113560034s
2022-03-02T11:41:31.639+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=6.594738853s
2022-03-02T11:41:31.912+0800 [ERROR] raft: failed to heartbeat to: peer=localhost:50051 error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:32.095+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=7.050420391s
2022-03-02T11:41:32.568+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=7.52402893s
2022-03-02T11:41:33.047+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=8.002913832s
2022-03-02T11:41:33.230+0800 [WARN]  raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052
2022-03-02T11:41:33.520+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=8.476139203s
2022-03-02T11:41:33.958+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=8.913755378s
2022-03-02T11:41:34.422+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=9.377328512s
2022-03-02T11:41:34.550+0800 [WARN]  raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052
2022-03-02T11:41:34.897+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=9.852982432s
2022-03-02T11:41:35.374+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=10.329970354s
2022-03-02T11:41:35.873+0800 [DEBUG] raft: failed to contact: server-id=nodeA time=10.828381275s
2022-03-02T11:41:36.001+0800 [ERROR] raft: peer has newer term, stopping replication: peer="{Voter nodeA localhost:50051}"
2022-03-02T11:41:36.003+0800 [INFO]  raft: entering follower state: follower="Node at localhost:50052 [Follower]" leader=
2022-03-02T11:41:36.003+0800 [INFO]  raft: aborting pipeline replication: peer="{Voter nodeC localhost:50053}"
2022-03-02T11:41:36.281+0800 [DEBUG] raft: lost leadership because received a requestVote with a newer term
2022-03-02T11:41:36.282+0800 [WARN]  raft: rejecting vote request since our last term is greater: candidate=localhost:50051 last-term=4 last-candidate-term=2
2022-03-02T11:41:37.363+0800 [DEBUG] raft: lost leadership because received a requestVote with a newer term
2022-03-02T11:41:37.364+0800 [WARN]  raft: rejecting vote request since our last term is greater: candidate=localhost:50051 last-term=4 last-candidate-term=2
2022-03-02T11:41:37.580+0800 [WARN]  raft: heartbeat timeout reached, starting election: last-leader=
2022-03-02T11:41:37.580+0800 [INFO]  raft: entering candidate state: node="Node at localhost:50052 [Candidate]" term=8
2022-03-02T11:41:37.582+0800 [DEBUG] raft: votes: needed=2
2022-03-02T11:41:37.582+0800 [DEBUG] raft: vote granted: from=nodeB term=8 tally=1
2022-03-02T11:41:37.583+0800 [DEBUG] raft: vote granted: from=nodeA term=8 tally=2
2022-03-02T11:41:37.584+0800 [INFO]  raft: election won: tally=2
2022-03-02T11:41:37.584+0800 [INFO]  raft: entering leader state: leader="Node at localhost:50052 [Leader]"
2022-03-02T11:41:37.585+0800 [INFO]  raft: added peer, starting replication: peer=nodeA
2022-03-02T11:41:37.586+0800 [INFO]  raft: added peer, starting replication: peer=nodeC
2022-03-02T11:41:37.587+0800 [INFO]  raft: pipelining replication: peer="{Voter nodeC localhost:50053}"
2022-03-02T11:41:37.588+0800 [WARN]  raft: appendEntries rejected, sending older logs: peer="{Voter nodeA localhost:50051}" next=5
2022-03-02T11:41:37.591+0800 [INFO]  raft: pipelining replication: peer="{Voter nodeA localhost:50051}"

nodeC日志:

user log file =  ./logs/node_c.log
2022-03-02T11:41:04.234+0800 [INFO]  raft: initial configuration: index=0 servers=[]
2022-03-02T11:41:04.235+0800 [INFO]  raft: entering follower state: follower="Node at localhost:50053 [Follower]" leader=
2022-03-02T11:41:05.358+0800 [WARN]  raft: no known peers, aborting election
2022-03-02T11:41:12.038+0800 [WARN]  raft: failed to get previous log: previous-index=4 last-index=0 error="log not found"
2022-03-02T11:41:23.596+0800 [WARN]  raft: rejecting vote request since we have a leader: from=localhost:50052 leader=localhost:50051
2022-03-02T11:41:24.236+0800 [WARN]  raft: heartbeat timeout reached, starting election: last-leader=localhost:50051
2022-03-02T11:41:24.238+0800 [INFO]  raft: entering candidate state: node="Node at localhost:50053 [Candidate]" term=3
2022-03-02T11:41:24.241+0800 [ERROR] raft: failed to make requestVote RPC: target="{Voter nodeA localhost:50051}" error="rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing dial tcp [::1]:50051: connect: connection refused""
2022-03-02T11:41:24.242+0800 [DEBUG] raft: votes: needed=2
2022-03-02T11:41:24.244+0800 [DEBUG] raft: vote granted: from=nodeC term=3 tally=1
2022-03-02T11:41:25.042+0800 [DEBUG] raft: lost leadership because received a requestVote with a newer term
2022-03-02T11:41:25.043+0800 [INFO]  raft: entering follower state: follower="Node at localhost:50053 [Follower]" leader=
2022-03-02T11:41:33.230+0800 [WARN]  raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052
2022-03-02T11:41:34.550+0800 [WARN]  raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052
2022-03-02T11:41:36.281+0800 [WARN]  raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052
2022-03-02T11:41:37.363+0800 [WARN]  raft: rejecting vote request since we have a leader: from=localhost:50051 leader=localhost:50052
2022-03-02T11:41:37.582+0800 [DEBUG] raft: lost leadership because received a requestVote with a newer term

重启nodeA, 查看nodeA日志

user log file =  ./logs/node_a.log
2022-03-02T11:41:04.237+0800 [INFO]  raft: initial configuration: index=0 servers=[]
2022-03-02T11:41:04.237+0800 [INFO]  raft: entering follower state: follower="Node at localhost:50051 [Follower]" leader=
2022-03-02T11:41:05.725+0800 [WARN]  raft: heartbeat timeout reached, starting election: last-leader=
2022-03-02T11:41:05.726+0800 [INFO]  raft: entering candidate state: node="Node at localhost:50051 [Candidate]" term=2
2022-03-02T11:41:05.727+0800 [DEBUG] raft: votes: needed=1
2022-03-02T11:41:05.728+0800 [DEBUG] raft: vote granted: from=nodeA term=2 tally=1
2022-03-02T11:41:05.728+0800 [INFO]  raft: election won: tally=1
2022-03-02T11:41:05.728+0800 [INFO]  raft: entering leader state: leader="Node at localhost:50051 [Leader]"
2022-03-02T11:41:12.002+0800 [INFO]  raft: updating configuration: command=AddVoter server-id=nodeB server-addr=localhost:50052 servers="[{Suffrage:Voter ID:nodeA Address:localhost:50051} {Suffrage:Voter ID:nodeB Address:localhost:50052}]"
2022-03-02T11:41:12.008+0800 [INFO]  raft: added peer, starting replication: peer=nodeB
2022-03-02T11:41:12.013+0800 [WARN]  raft: appendEntries rejected, sending older logs: peer="{Voter nodeB localhost:50052}" next=1
2022-03-02T11:41:12.021+0800 [INFO]  raft: pipelining replication: peer="{Voter nodeB localhost:50052}"
2022-03-02T11:41:12.027+0800 [INFO]  raft: updating configuration: command=AddVoter server-id=nodeC server-addr=localhost:50053 servers="[{Suffrage:Voter ID:nodeA Address:localhost:50051} {Suffrage:Voter ID:nodeB Address:localhost:50052} {Suffrage:Voter ID:nodeC Address:localhost:50053}]"
2022-03-02T11:41:12.033+0800 [INFO]  raft: added peer, starting replication: peer=nodeC
2022-03-02T11:41:12.040+0800 [WARN]  raft: appendEntries rejected, sending older logs: peer="{Voter nodeC localhost:50053}" next=1
2022-03-02T11:41:12.042+0800 [INFO]  raft: pipelining replication: peer="{Voter nodeC localhost:50053}"
user log file =  ./logs/node_a.log
2022-03-02T11:41:32.220+0800 [INFO]  raft: initial configuration: index=4 servers="[{Suffrage:Voter ID:nodeA Address:localhost:50051} {Suffrage:Voter ID:nodeB Address:localhost:50052} {Suffrage:Voter ID:nodeC Address:localhost:50053}]"
2022-03-02T11:41:32.220+0800 [INFO]  raft: entering follower state: follower="Node at localhost:50051 [Follower]" leader=
2022-03-02T11:41:33.226+0800 [WARN]  raft: heartbeat timeout reached, starting election: last-leader=
2022-03-02T11:41:33.227+0800 [INFO]  raft: entering candidate state: node="Node at localhost:50051 [Candidate]" term=3
2022-03-02T11:41:33.228+0800 [DEBUG] raft: votes: needed=2
2022-03-02T11:41:33.229+0800 [DEBUG] raft: vote granted: from=nodeA term=3 tally=1
2022-03-02T11:41:33.231+0800 [DEBUG] raft: newer term discovered, fallback to follower
2022-03-02T11:41:33.231+0800 [INFO]  raft: entering follower state: follower="Node at localhost:50051 [Follower]" leader=
2022-03-02T11:41:34.546+0800 [WARN]  raft: heartbeat timeout reached, starting election: last-leader=
2022-03-02T11:41:34.548+0800 [INFO]  raft: entering candidate state: node="Node at localhost:50051 [Candidate]" term=5
2022-03-02T11:41:34.549+0800 [DEBUG] raft: votes: needed=2
2022-03-02T11:41:34.550+0800 [DEBUG] raft: vote granted: from=nodeA term=5 tally=1
2022-03-02T11:41:36.274+0800 [WARN]  raft: Election timeout reached, restarting election
2022-03-02T11:41:36.276+0800 [INFO]  raft: entering candidate state: node="Node at localhost:50051 [Candidate]" term=6
2022-03-02T11:41:36.281+0800 [DEBUG] raft: votes: needed=2
2022-03-02T11:41:36.282+0800 [DEBUG] raft: vote granted: from=nodeA term=6 tally=1
2022-03-02T11:41:37.360+0800 [WARN]  raft: Election timeout reached, restarting election
2022-03-02T11:41:37.361+0800 [INFO]  raft: entering candidate state: node="Node at localhost:50051 [Candidate]" term=7
2022-03-02T11:41:37.363+0800 [DEBUG] raft: votes: needed=2
2022-03-02T11:41:37.364+0800 [DEBUG] raft: vote granted: from=nodeA term=7 tally=1
2022-03-02T11:41:37.582+0800 [DEBUG] raft: lost leadership because received a requestVote with a newer term
2022-03-02T11:41:37.583+0800 [INFO]  raft: entering follower state: follower="Node at localhost:50051 [Follower]" leader=
2022-03-02T11:41:37.587+0800 [WARN]  raft: failed to get previous log: previous-index=5 last-index=4 error="log not found"

主从切换结论:

  1.  leader挂掉后, 两个follow自发选举出新的leader
  2.  新leader重复尝试与挂掉的旧主建立连接做日志复制
  3.  旧主重新启动后, 成为follow

举报

相关推荐

0 条评论