From aac485d67b341948e1509584e2b73b566e8c7266 Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Tue, 6 Aug 2013 10:50:04 -0700 Subject: [PATCH] bump(github.com/coreos/go-raft): 75c9644bbc43e5ef7805480c13deacec02195706 --- .../github.com/coreos/go-raft/.gitignore | 24 + .../github.com/coreos/go-raft/.travis.yml | 8 + third_party/github.com/coreos/go-raft/LICENSE | 20 + .../github.com/coreos/go-raft/Makefile | 13 + .../github.com/coreos/go-raft/README.md | 68 + .../coreos/go-raft/append_entries_request.go | 98 ++ .../go-raft/append_entries_request_test.go | 40 + .../coreos/go-raft/append_entries_response.go | 70 + .../go-raft/append_entries_response_test.go | 34 + .../github.com/coreos/go-raft/command.go | 92 ++ .../github.com/coreos/go-raft/debug.go | 116 ++ .../coreos/go-raft/http_transporter.go | 195 +++ .../coreos/go-raft/http_transporter_test.go | 153 ++ .../github.com/coreos/go-raft/join_command.go | 28 + .../coreos/go-raft/leave_command.go | 27 + third_party/github.com/coreos/go-raft/log.go | 610 ++++++++ .../github.com/coreos/go-raft/log_entry.go | 99 ++ .../github.com/coreos/go-raft/log_test.go | 232 +++ .../github.com/coreos/go-raft/nop_command.go | 26 + third_party/github.com/coreos/go-raft/peer.go | 271 ++++ .../protobuf/append_entries_request.pb.go | 115 ++ .../protobuf/append_entries_request.proto | 18 + .../protobuf/append_entries_responses.pb.go | 57 + .../protobuf/append_entries_responses.proto | 8 + .../coreos/go-raft/protobuf/log_entry.pb.go | 57 + .../coreos/go-raft/protobuf/log_entry.proto | 8 + .../protobuf/request_vote_request.pb.go | 57 + .../protobuf/request_vote_request.proto | 8 + .../protobuf/request_vote_responses.pb.go | 41 + .../protobuf/request_vote_responses.proto | 6 + .../protobuf/snapshot_recovery_request.pb.go | 65 + .../protobuf/snapshot_recovery_request.proto | 9 + .../protobuf/snapshot_recovery_response.pb.go | 49 + .../protobuf/snapshot_recovery_response.proto | 7 + .../go-raft/protobuf/snapshot_request.pb.go | 49 + .../go-raft/protobuf/snapshot_request.proto | 7 + .../go-raft/protobuf/snapshot_response.pb.go | 33 + .../go-raft/protobuf/snapshot_response.proto | 5 + .../coreos/go-raft/request_vote_request.go | 68 + .../coreos/go-raft/request_vote_response.go | 61 + .../github.com/coreos/go-raft/server.go | 1260 +++++++++++++++++ .../github.com/coreos/go-raft/server_test.go | 504 +++++++ .../github.com/coreos/go-raft/snapshot.go | 65 + .../go-raft/snapshot_recovery_request.go | 77 + .../go-raft/snapshot_recovery_response.go | 69 + .../coreos/go-raft/snapshot_request.go | 70 + .../coreos/go-raft/snapshot_response.go | 61 + third_party/github.com/coreos/go-raft/sort.go | 23 + .../github.com/coreos/go-raft/statemachine.go | 14 + third_party/github.com/coreos/go-raft/test.go | 179 +++ third_party/github.com/coreos/go-raft/time.go | 17 + .../github.com/coreos/go-raft/timer.go | 170 +++ .../github.com/coreos/go-raft/timer_test.go | 86 ++ .../github.com/coreos/go-raft/transporter.go | 16 + .../github.com/coreos/go-raft/z_test.go | 13 + 55 files changed, 5576 insertions(+) create mode 100644 third_party/github.com/coreos/go-raft/.gitignore create mode 100644 third_party/github.com/coreos/go-raft/.travis.yml create mode 100644 third_party/github.com/coreos/go-raft/LICENSE create mode 100644 third_party/github.com/coreos/go-raft/Makefile create mode 100644 third_party/github.com/coreos/go-raft/README.md create mode 100644 third_party/github.com/coreos/go-raft/append_entries_request.go create mode 100644 third_party/github.com/coreos/go-raft/append_entries_request_test.go create mode 100644 third_party/github.com/coreos/go-raft/append_entries_response.go create mode 100644 third_party/github.com/coreos/go-raft/append_entries_response_test.go create mode 100644 third_party/github.com/coreos/go-raft/command.go create mode 100644 third_party/github.com/coreos/go-raft/debug.go create mode 100644 third_party/github.com/coreos/go-raft/http_transporter.go create mode 100644 third_party/github.com/coreos/go-raft/http_transporter_test.go create mode 100644 third_party/github.com/coreos/go-raft/join_command.go create mode 100644 third_party/github.com/coreos/go-raft/leave_command.go create mode 100644 third_party/github.com/coreos/go-raft/log.go create mode 100644 third_party/github.com/coreos/go-raft/log_entry.go create mode 100644 third_party/github.com/coreos/go-raft/log_test.go create mode 100644 third_party/github.com/coreos/go-raft/nop_command.go create mode 100644 third_party/github.com/coreos/go-raft/peer.go create mode 100644 third_party/github.com/coreos/go-raft/protobuf/append_entries_request.pb.go create mode 100644 third_party/github.com/coreos/go-raft/protobuf/append_entries_request.proto create mode 100644 third_party/github.com/coreos/go-raft/protobuf/append_entries_responses.pb.go create mode 100644 third_party/github.com/coreos/go-raft/protobuf/append_entries_responses.proto create mode 100644 third_party/github.com/coreos/go-raft/protobuf/log_entry.pb.go create mode 100644 third_party/github.com/coreos/go-raft/protobuf/log_entry.proto create mode 100644 third_party/github.com/coreos/go-raft/protobuf/request_vote_request.pb.go create mode 100644 third_party/github.com/coreos/go-raft/protobuf/request_vote_request.proto create mode 100644 third_party/github.com/coreos/go-raft/protobuf/request_vote_responses.pb.go create mode 100644 third_party/github.com/coreos/go-raft/protobuf/request_vote_responses.proto create mode 100644 third_party/github.com/coreos/go-raft/protobuf/snapshot_recovery_request.pb.go create mode 100644 third_party/github.com/coreos/go-raft/protobuf/snapshot_recovery_request.proto create mode 100644 third_party/github.com/coreos/go-raft/protobuf/snapshot_recovery_response.pb.go create mode 100644 third_party/github.com/coreos/go-raft/protobuf/snapshot_recovery_response.proto create mode 100644 third_party/github.com/coreos/go-raft/protobuf/snapshot_request.pb.go create mode 100644 third_party/github.com/coreos/go-raft/protobuf/snapshot_request.proto create mode 100644 third_party/github.com/coreos/go-raft/protobuf/snapshot_response.pb.go create mode 100644 third_party/github.com/coreos/go-raft/protobuf/snapshot_response.proto create mode 100644 third_party/github.com/coreos/go-raft/request_vote_request.go create mode 100644 third_party/github.com/coreos/go-raft/request_vote_response.go create mode 100644 third_party/github.com/coreos/go-raft/server.go create mode 100644 third_party/github.com/coreos/go-raft/server_test.go create mode 100644 third_party/github.com/coreos/go-raft/snapshot.go create mode 100644 third_party/github.com/coreos/go-raft/snapshot_recovery_request.go create mode 100644 third_party/github.com/coreos/go-raft/snapshot_recovery_response.go create mode 100644 third_party/github.com/coreos/go-raft/snapshot_request.go create mode 100644 third_party/github.com/coreos/go-raft/snapshot_response.go create mode 100644 third_party/github.com/coreos/go-raft/sort.go create mode 100644 third_party/github.com/coreos/go-raft/statemachine.go create mode 100644 third_party/github.com/coreos/go-raft/test.go create mode 100644 third_party/github.com/coreos/go-raft/time.go create mode 100644 third_party/github.com/coreos/go-raft/timer.go create mode 100644 third_party/github.com/coreos/go-raft/timer_test.go create mode 100644 third_party/github.com/coreos/go-raft/transporter.go create mode 100644 third_party/github.com/coreos/go-raft/z_test.go diff --git a/third_party/github.com/coreos/go-raft/.gitignore b/third_party/github.com/coreos/go-raft/.gitignore new file mode 100644 index 000000000..56a5e9893 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/.gitignore @@ -0,0 +1,24 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe + +coverage.html diff --git a/third_party/github.com/coreos/go-raft/.travis.yml b/third_party/github.com/coreos/go-raft/.travis.yml new file mode 100644 index 000000000..5f70bdf4c --- /dev/null +++ b/third_party/github.com/coreos/go-raft/.travis.yml @@ -0,0 +1,8 @@ +language: go + +go: + - 1.1 + +install: + - make dependencies + diff --git a/third_party/github.com/coreos/go-raft/LICENSE b/third_party/github.com/coreos/go-raft/LICENSE new file mode 100644 index 000000000..ee7f22228 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/LICENSE @@ -0,0 +1,20 @@ +Copyright 2013 go-raft contributors + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/third_party/github.com/coreos/go-raft/Makefile b/third_party/github.com/coreos/go-raft/Makefile new file mode 100644 index 000000000..afbbf63c7 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/Makefile @@ -0,0 +1,13 @@ +all: test + +coverage: + gocov test github.com/benbjohnson/go-raft | gocov-html > coverage.html + open coverage.html + +dependencies: + go get -d . + +test: + go test -v ./... + +.PHONY: coverage dependencies test diff --git a/third_party/github.com/coreos/go-raft/README.md b/third_party/github.com/coreos/go-raft/README.md new file mode 100644 index 000000000..a41b251df --- /dev/null +++ b/third_party/github.com/coreos/go-raft/README.md @@ -0,0 +1,68 @@ +[![Stories in Ready](http://badge.waffle.io/benbjohnson/go-raft.png)](http://waffle.io/benbjohnson/go-raft) +go-raft +======= + +## Overview + +This is an Go implementation of the Raft distributed consensus protocol. +Raft is a protocol by which a cluster of nodes can maintain a replicated state machine. +The state machine is kept in sync through the use of a replicated log. + +For more details on Raft, you can read [In Search of an Understandable Consensus Algorithm](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf) by Diego Ongaro and John Ousterhout. + + +## The Raft Protocol + +### Overview + +Maintaining state in a single process on a single server is easy. +Your process is a single point of authority so there are no conflicts when reading and writing state. +Even multi-threaded processes can rely on locks or coroutines to serialize access to the data. + +However, in a distributed system there is no single point of authority. +Servers can crash or the network between two machines can become unavailable or any number of other problems can occur. + +A distributed consensus protocol is used for maintaining a consistent state across multiple servers in a cluster. +Many distributed systems are built upon the Paxos protocol but Paxos can be difficult to understand and there are many gaps between Paxos and real world implementation. + +An alternative is the [Raft distributed consensus protocol](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf) by Diego Ongaro and John Ousterhout. +Raft is a protocol built with understandability as a primary tenant and it centers around two things: + +1. Leader Election +2. Replicated Log + +With these two constructs, you can build a system that can maintain state across multiple servers -- even in the event of multiple failures. + +### Leader Election + +The Raft protocol effectively works as a master-slave system whereby state changes are written to a single server in the cluster and are distributed out to the rest of the servers in the cluster. +This simplifies the protocol since there is only one data authority and conflicts will not have to be resolved. + +Raft ensures that there is only one leader at a time. +It does this by performing elections among the nodes in the cluster and requiring that a node must receive a majority of the votes in order to become leader. +For example, if you have 3 nodes in your cluster then a single node would need 2 votes in order to become the leader. +For a 5 node cluster, a server would need 3 votes to become leader. + +### Replicated Log + +To maintain state, a log of commands is maintained. +Each command makes a change to the state of the server and the command is deterministic. +By ensuring that this log is replicated identically between all the nodes in the cluster we can replicate the state at any point in time in the log by running each command sequentially. + +Replicating the log under normal conditions is done by sending an `AppendEntries` RPC from the leader to each of the other servers in the cluster (called Peers). +Each peer will append the entries from the leader through a 2-phase commit process which ensure that a majority of servers in the cluster have entries written to log. + +For a more detailed explanation on the failover process and election terms please see the full paper describing the protocol: [In Search of an Understandable Consensus Algorithm](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf) + + +## Project Status + +The go-raft library is feature complete but in alpha. +There is a reference implementation called [raftd](https://github.com/benbjohnson/raftd) that demonstrates how to use the library + +The library will be considered experimental until it has significant production usage. +I'm writing the library for the purpose of including distributed processing in my behavioral analytics database called [Sky](https://github.com/skydb/sky). +However, I hope other projects can benefit from having a distributed consensus protocol so the go-raft library is available under MIT license. + +If you have a project that you're using go-raft in, please add it to this README and send a pull request so others can see implementation examples. +If you have any questions on implementing go-raft in your project, feel free to contact me on [GitHub](https://github.com/benbjohnson), [Twitter](https://twitter.com/benbjohnson) or by e-mail at [ben@skylandlabs.com](mailto:ben@skylandlabs.com). diff --git a/third_party/github.com/coreos/go-raft/append_entries_request.go b/third_party/github.com/coreos/go-raft/append_entries_request.go new file mode 100644 index 000000000..0d22ef9fd --- /dev/null +++ b/third_party/github.com/coreos/go-raft/append_entries_request.go @@ -0,0 +1,98 @@ +package raft + +import ( + "code.google.com/p/goprotobuf/proto" + "github.com/benbjohnson/go-raft/protobuf" + "io" + "io/ioutil" +) + +// The request sent to a server to append entries to the log. +type AppendEntriesRequest struct { + Term uint64 + PrevLogIndex uint64 + PrevLogTerm uint64 + CommitIndex uint64 + LeaderName string + Entries []*LogEntry +} + +// Creates a new AppendEntries request. +func newAppendEntriesRequest(term uint64, prevLogIndex uint64, prevLogTerm uint64, commitIndex uint64, leaderName string, entries []*LogEntry) *AppendEntriesRequest { + return &AppendEntriesRequest{ + Term: term, + PrevLogIndex: prevLogIndex, + PrevLogTerm: prevLogTerm, + CommitIndex: commitIndex, + LeaderName: leaderName, + Entries: entries, + } +} + +// Encodes the AppendEntriesRequest to a buffer. Returns the number of bytes +// written and any error that may have occurred. +func (req *AppendEntriesRequest) encode(w io.Writer) (int, error) { + + protoEntries := make([]*protobuf.ProtoAppendEntriesRequest_ProtoLogEntry, len(req.Entries)) + + for i, entry := range req.Entries { + protoEntries[i] = &protobuf.ProtoAppendEntriesRequest_ProtoLogEntry{ + Index: proto.Uint64(entry.Index), + Term: proto.Uint64(entry.Term), + CommandName: proto.String(entry.CommandName), + Command: entry.Command, + } + } + + pb := &protobuf.ProtoAppendEntriesRequest{ + Term: proto.Uint64(req.Term), + PrevLogIndex: proto.Uint64(req.PrevLogIndex), + PrevLogTerm: proto.Uint64(req.PrevLogTerm), + CommitIndex: proto.Uint64(req.CommitIndex), + LeaderName: proto.String(req.LeaderName), + Entries: protoEntries, + } + + p, err := proto.Marshal(pb) + if err != nil { + return -1, err + } + + return w.Write(p) +} + +// Decodes the AppendEntriesRequest from a buffer. Returns the number of bytes read and +// any error that occurs. +func (req *AppendEntriesRequest) decode(r io.Reader) (int, error) { + data, err := ioutil.ReadAll(r) + + if err != nil { + return -1, err + } + + totalBytes := len(data) + + pb := &protobuf.ProtoAppendEntriesRequest{} + if err := proto.Unmarshal(data, pb); err != nil { + return -1, err + } + + req.Term = pb.GetTerm() + req.PrevLogIndex = pb.GetPrevLogIndex() + req.PrevLogTerm = pb.GetPrevLogTerm() + req.CommitIndex = pb.GetCommitIndex() + req.LeaderName = pb.GetLeaderName() + + req.Entries = make([]*LogEntry, len(pb.Entries)) + + for i, entry := range pb.Entries { + req.Entries[i] = &LogEntry{ + Index: entry.GetIndex(), + Term: entry.GetTerm(), + CommandName: entry.GetCommandName(), + Command: entry.Command, + } + } + + return totalBytes, nil +} diff --git a/third_party/github.com/coreos/go-raft/append_entries_request_test.go b/third_party/github.com/coreos/go-raft/append_entries_request_test.go new file mode 100644 index 000000000..ef6732fc4 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/append_entries_request_test.go @@ -0,0 +1,40 @@ +package raft + +import ( + "bytes" + "testing" +) + +func BenchmarkAppendEntriesRequestEncoding(b *testing.B) { + req, tmp := createTestAppendEntriesRequest(2000) + b.ResetTimer() + for i := 0; i < b.N; i++ { + var buf bytes.Buffer + req.encode(&buf) + } + b.SetBytes(int64(len(tmp))) +} + +func BenchmarkAppendEntriesRequestDecoding(b *testing.B) { + req, buf := createTestAppendEntriesRequest(2000) + b.ResetTimer() + for i := 0; i < b.N; i++ { + req.decode(bytes.NewReader(buf)) + } + b.SetBytes(int64(len(buf))) +} + +func createTestAppendEntriesRequest(entryCount int) (*AppendEntriesRequest, []byte) { + entries := make([]*LogEntry, 0) + for i := 0; i < entryCount; i++ { + command := &DefaultJoinCommand{Name: "localhost:1000"} + entry, _ := newLogEntry(nil, 1, 2, command) + entries = append(entries, entry) + } + req := newAppendEntriesRequest(1, 1, 1, 1, "leader", entries) + + var buf bytes.Buffer + req.encode(&buf) + + return req, buf.Bytes() +} diff --git a/third_party/github.com/coreos/go-raft/append_entries_response.go b/third_party/github.com/coreos/go-raft/append_entries_response.go new file mode 100644 index 000000000..ed0c29e24 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/append_entries_response.go @@ -0,0 +1,70 @@ +package raft + +import ( + "code.google.com/p/goprotobuf/proto" + "github.com/benbjohnson/go-raft/protobuf" + "io" + "io/ioutil" +) + +// The response returned from a server appending entries to the log. +type AppendEntriesResponse struct { + Term uint64 + // the current index of the server + Index uint64 + Success bool + CommitIndex uint64 + peer string + append bool +} + +// Creates a new AppendEntries response. +func newAppendEntriesResponse(term uint64, success bool, index uint64, commitIndex uint64) *AppendEntriesResponse { + return &AppendEntriesResponse{ + Term: term, + Success: success, + Index: index, + CommitIndex: commitIndex, + } +} + +// Encodes the AppendEntriesResponse to a buffer. Returns the number of bytes +// written and any error that may have occurred. +func (resp *AppendEntriesResponse) encode(w io.Writer) (int, error) { + pb := &protobuf.ProtoAppendEntriesResponse{ + Term: proto.Uint64(resp.Term), + Index: proto.Uint64(resp.Index), + CommitIndex: proto.Uint64(resp.CommitIndex), + Success: proto.Bool(resp.Success), + } + p, err := proto.Marshal(pb) + if err != nil { + return -1, err + } + + return w.Write(p) +} + +// Decodes the AppendEntriesResponse from a buffer. Returns the number of bytes read and +// any error that occurs. +func (resp *AppendEntriesResponse) decode(r io.Reader) (int, error) { + data, err := ioutil.ReadAll(r) + + if err != nil { + return -1, err + } + + totalBytes := len(data) + + pb := &protobuf.ProtoAppendEntriesResponse{} + if err := proto.Unmarshal(data, pb); err != nil { + return -1, err + } + + resp.Term = pb.GetTerm() + resp.Index = pb.GetIndex() + resp.CommitIndex = pb.GetCommitIndex() + resp.Success = pb.GetSuccess() + + return totalBytes, nil +} diff --git a/third_party/github.com/coreos/go-raft/append_entries_response_test.go b/third_party/github.com/coreos/go-raft/append_entries_response_test.go new file mode 100644 index 000000000..038dcda76 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/append_entries_response_test.go @@ -0,0 +1,34 @@ +package raft + +import ( + "bytes" + "testing" +) + +func BenchmarkAppendEntriesResponseEncoding(b *testing.B) { + req, tmp := createTestAppendEntriesResponse(2000) + b.ResetTimer() + for i := 0; i < b.N; i++ { + var buf bytes.Buffer + req.encode(&buf) + } + b.SetBytes(int64(len(tmp))) +} + +func BenchmarkAppendEntriesResponseDecoding(b *testing.B) { + req, buf := createTestAppendEntriesResponse(2000) + b.ResetTimer() + for i := 0; i < b.N; i++ { + req.decode(bytes.NewReader(buf)) + } + b.SetBytes(int64(len(buf))) +} + +func createTestAppendEntriesResponse(entryCount int) (*AppendEntriesResponse, []byte) { + resp := newAppendEntriesResponse(1, true, 1, 1) + + var buf bytes.Buffer + resp.encode(&buf) + + return resp, buf.Bytes() +} diff --git a/third_party/github.com/coreos/go-raft/command.go b/third_party/github.com/coreos/go-raft/command.go new file mode 100644 index 000000000..2c0495171 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/command.go @@ -0,0 +1,92 @@ +package raft + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "reflect" +) + +//------------------------------------------------------------------------------ +// +// Globals +// +//------------------------------------------------------------------------------ + +var commandTypes map[string]Command + +func init() { + commandTypes = map[string]Command{} +} + +//------------------------------------------------------------------------------ +// +// Typedefs +// +//------------------------------------------------------------------------------ + +// A command represents an action to be taken on the replicated state machine. +type Command interface { + CommandName() string + Apply(server *Server) (interface{}, error) +} + +type CommandEncoder interface { + Encode(w io.Writer) error + Decode(r io.Reader) error +} + +//------------------------------------------------------------------------------ +// +// Functions +// +//------------------------------------------------------------------------------ + +//-------------------------------------- +// Instantiation +//-------------------------------------- + +// Creates a new instance of a command by name. +func newCommand(name string, data []byte) (Command, error) { + // Find the registered command. + command := commandTypes[name] + if command == nil { + return nil, fmt.Errorf("raft.Command: Unregistered command type: %s", name) + } + + // Make a copy of the command. + v := reflect.New(reflect.Indirect(reflect.ValueOf(command)).Type()).Interface() + copy, ok := v.(Command) + if !ok { + panic(fmt.Sprintf("raft: Unable to copy command: %s (%v)", command.CommandName(), reflect.ValueOf(v).Kind().String())) + } + + // If data for the command was passed in the decode it. + if data != nil { + if encoder, ok := copy.(CommandEncoder); ok { + if err := encoder.Decode(bytes.NewReader(data)); err != nil { + return nil, err + } + } else { + json.NewDecoder(bytes.NewReader(data)).Decode(copy) + } + } + + return copy, nil +} + +//-------------------------------------- +// Registration +//-------------------------------------- + +// Registers a command by storing a reference to an instance of it. +func RegisterCommand(command Command) { + if command == nil { + panic(fmt.Sprintf("raft: Cannot register nil")) + } else if commandTypes[command.CommandName()] != nil { + panic(fmt.Sprintf("raft: Duplicate registration: %s", command.CommandName())) + return + } + commandTypes[command.CommandName()] = command +} diff --git a/third_party/github.com/coreos/go-raft/debug.go b/third_party/github.com/coreos/go-raft/debug.go new file mode 100644 index 000000000..97e2bc772 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/debug.go @@ -0,0 +1,116 @@ +package raft + +import ( + "log" + "os" +) + +//------------------------------------------------------------------------------ +// +// Variables +// +//------------------------------------------------------------------------------ + +const ( + Debug = 1 + Trace = 2 +) + +var logLevel int = 0 +var logger *log.Logger + +func init() { + logger = log.New(os.Stdout, "[raft]", log.Lmicroseconds) +} + +//------------------------------------------------------------------------------ +// +// Functions +// +//------------------------------------------------------------------------------ + +func LogLevel() int { + return logLevel +} + +func SetLogLevel(level int) { + logLevel = level +} + +//-------------------------------------- +// Warnings +//-------------------------------------- + +// Prints to the standard logger. Arguments are handled in the manner of +// fmt.Print. +func warn(v ...interface{}) { + logger.Print(v...) +} + +// Prints to the standard logger. Arguments are handled in the manner of +// fmt.Printf. +func warnf(format string, v ...interface{}) { + logger.Printf(format, v...) +} + +// Prints to the standard logger. Arguments are handled in the manner of +// fmt.Println. +func warnln(v ...interface{}) { + logger.Println(v...) +} + +//-------------------------------------- +// Basic debugging +//-------------------------------------- + +// Prints to the standard logger if debug mode is enabled. Arguments +// are handled in the manner of fmt.Print. +func debug(v ...interface{}) { + if logLevel >= Debug { + logger.Print(v...) + } +} + +// Prints to the standard logger if debug mode is enabled. Arguments +// are handled in the manner of fmt.Printf. +func debugf(format string, v ...interface{}) { + if logLevel >= Debug { + logger.Printf(format, v...) + } +} + +// Prints to the standard logger if debug mode is enabled. Arguments +// are handled in the manner of fmt.Println. +func debugln(v ...interface{}) { + if logLevel >= Debug { + logger.Println(v...) + } +} + +//-------------------------------------- +// Trace-level debugging +//-------------------------------------- + +// Prints to the standard logger if trace debugging is enabled. Arguments +// are handled in the manner of fmt.Print. +func trace(v ...interface{}) { + if logLevel >= Trace { + logger.Print(v...) + } +} + +// Prints to the standard logger if trace debugging is enabled. Arguments +// are handled in the manner of fmt.Printf. +func tracef(format string, v ...interface{}) { + if logLevel >= Trace { + logger.Printf(format, v...) + } +} + +// Prints to the standard logger if trace debugging is enabled. Arguments +// are handled in the manner of debugln. +func traceln(v ...interface{}) { + if logLevel >= Trace { + logger.Println(v...) + } +} diff --git a/third_party/github.com/coreos/go-raft/http_transporter.go b/third_party/github.com/coreos/go-raft/http_transporter.go new file mode 100644 index 000000000..1125f91f5 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/http_transporter.go @@ -0,0 +1,195 @@ +package raft + +import ( + "bytes" + "fmt" + "io" + "net/http" +) + +// Parts from this transporter were heavily influenced by Peter Bougon's +// raft implementation: https://github.com/peterbourgon/raft + +//------------------------------------------------------------------------------ +// +// Typedefs +// +//------------------------------------------------------------------------------ + +// An HTTPTransporter is a default transport layer used to communicate between +// multiple servers. +type HTTPTransporter struct { + DisableKeepAlives bool + prefix string + appendEntriesPath string + requestVotePath string +} + +type HTTPMuxer interface { + HandleFunc(string, func(http.ResponseWriter, *http.Request)) +} + +//------------------------------------------------------------------------------ +// +// Constructor +// +//------------------------------------------------------------------------------ + +// Creates a new HTTP transporter with the given path prefix. +func NewHTTPTransporter(prefix string) *HTTPTransporter { + return &HTTPTransporter{ + DisableKeepAlives: false, + prefix: prefix, + appendEntriesPath: fmt.Sprintf("%s%s", prefix, "/appendEntries"), + requestVotePath: fmt.Sprintf("%s%s", prefix, "/requestVote"), + } +} + +//------------------------------------------------------------------------------ +// +// Accessors +// +//------------------------------------------------------------------------------ + +// Retrieves the path prefix used by the transporter. +func (t *HTTPTransporter) Prefix() string { + return t.prefix +} + +// Retrieves the AppendEntries path. +func (t *HTTPTransporter) AppendEntriesPath() string { + return t.appendEntriesPath +} + +// Retrieves the RequestVote path. +func (t *HTTPTransporter) RequestVotePath() string { + return t.requestVotePath +} + +//------------------------------------------------------------------------------ +// +// Methods +// +//------------------------------------------------------------------------------ + +//-------------------------------------- +// Installation +//-------------------------------------- + +// Applies Raft routes to an HTTP router for a given server. +func (t *HTTPTransporter) Install(server *Server, mux HTTPMuxer) { + mux.HandleFunc(t.AppendEntriesPath(), t.appendEntriesHandler(server)) + mux.HandleFunc(t.RequestVotePath(), t.requestVoteHandler(server)) +} + +//-------------------------------------- +// Outgoing +//-------------------------------------- + +// Sends an AppendEntries RPC to a peer. +func (t *HTTPTransporter) SendAppendEntriesRequest(server *Server, peer *Peer, req *AppendEntriesRequest) *AppendEntriesResponse { + var b bytes.Buffer + if _, err := req.encode(&b); err != nil { + traceln("transporter.ae.encoding.error:", err) + return nil + } + + url := fmt.Sprintf("http://%s%s", peer.Name(), t.AppendEntriesPath()) + traceln(server.Name(), "POST", url) + + client := &http.Client{Transport: &http.Transport{DisableKeepAlives: t.DisableKeepAlives}} + httpResp, err := client.Post(url, "application/protobuf", &b) + if httpResp == nil || err != nil { + traceln("transporter.ae.response.error:", err) + return nil + } + defer httpResp.Body.Close() + + resp := &AppendEntriesResponse{} + if _, err = resp.decode(httpResp.Body); err != nil && err != io.EOF { + traceln("transporter.ae.decoding.error:", err) + return nil + } + + return resp +} + +// Sends a RequestVote RPC to a peer. +func (t *HTTPTransporter) SendVoteRequest(server *Server, peer *Peer, req *RequestVoteRequest) *RequestVoteResponse { + var b bytes.Buffer + if _, err := req.encode(&b); err != nil { + traceln("transporter.rv.encoding.error:", err) + return nil + } + + url := fmt.Sprintf("http://%s%s", peer.Name(), t.RequestVotePath()) + traceln(server.Name(), "POST", url) + + client := &http.Client{Transport: &http.Transport{DisableKeepAlives: t.DisableKeepAlives}} + httpResp, err := client.Post(url, "application/protobuf", &b) + if httpResp == nil || err != nil { + traceln("transporter.rv.response.error:", err) + return nil + } + defer httpResp.Body.Close() + + resp := &RequestVoteResponse{} + if _, err = resp.decode(httpResp.Body); err != nil && err != io.EOF { + traceln("transporter.rv.decoding.error:", err) + return nil + } + + return resp +} + +// Sends a SnapshotRequest RPC to a peer. +func (t *HTTPTransporter) SendSnapshotRequest(server *Server, peer *Peer, req *SnapshotRequest) *SnapshotResponse { + return nil +} + +// Sends a SnapshotRequest RPC to a peer. +func (t *HTTPTransporter) SendSnapshotRecoveryRequest(server *Server, peer *Peer, req *SnapshotRecoveryRequest) *SnapshotRecoveryResponse { + return nil +} + +//-------------------------------------- +// Incoming +//-------------------------------------- + +// Handles incoming AppendEntries requests. +func (t *HTTPTransporter) appendEntriesHandler(server *Server) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + traceln(server.Name(), "RECV /appendEntries") + + req := &AppendEntriesRequest{} + if _, err := req.decode(r.Body); err != nil { + http.Error(w, "", http.StatusBadRequest) + return + } + + resp := server.AppendEntries(req) + if _, err := resp.encode(w); err != nil { + http.Error(w, "", http.StatusInternalServerError) + return + } + } +} + +// Handles incoming RequestVote requests. +func (t *HTTPTransporter) requestVoteHandler(server *Server) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + traceln(server.Name(), "RECV /requestVote") + + req := &RequestVoteRequest{} + if _, err := req.decode(r.Body); err != nil { + http.Error(w, "", http.StatusBadRequest) + return + } + + resp := server.RequestVote(req) + if _, err := resp.encode(w); err != nil { + http.Error(w, "", http.StatusInternalServerError) + return + } + } +} diff --git a/third_party/github.com/coreos/go-raft/http_transporter_test.go b/third_party/github.com/coreos/go-raft/http_transporter_test.go new file mode 100644 index 000000000..3bd4a6d74 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/http_transporter_test.go @@ -0,0 +1,153 @@ +package raft + +import ( + "fmt" + "net" + "net/http" + "sync" + "testing" + "time" +) + +// Ensure that we can start several servers and have them communicate. +func TestHTTPTransporter(t *testing.T) { + transporter := NewHTTPTransporter("/raft") + transporter.DisableKeepAlives = true + + servers := []*Server{} + f0 := func(server *Server, httpServer *http.Server) { + // Stop the leader and wait for an election. + server.Stop() + time.Sleep(testElectionTimeout * 2) + + if servers[1].State() != Leader && servers[2].State() != Leader { + t.Fatal("Expected re-election:", servers[1].State(), servers[2].State()) + } + server.Start() + } + f1 := func(server *Server, httpServer *http.Server) { + } + f2 := func(server *Server, httpServer *http.Server) { + } + runTestHttpServers(t, &servers, transporter, f0, f1, f2) +} + +// Starts multiple independent Raft servers wrapped with HTTP servers. +func runTestHttpServers(t *testing.T, servers *[]*Server, transporter *HTTPTransporter, callbacks ...func(*Server, *http.Server)) { + var wg sync.WaitGroup + httpServers := []*http.Server{} + listeners := []net.Listener{} + for i := range callbacks { + wg.Add(1) + port := 9000 + i + + // Create raft server. + server := newTestServer(fmt.Sprintf("localhost:%d", port), transporter) + server.SetHeartbeatTimeout(testHeartbeatTimeout) + server.SetElectionTimeout(testElectionTimeout) + server.Start() + + defer server.Stop() + *servers = append(*servers, server) + + // Create listener for HTTP server and start it. + listener, err := net.Listen("tcp", fmt.Sprintf(":%d", port)) + if err != nil { + panic(err) + } + defer listener.Close() + listeners = append(listeners, listener) + + // Create wrapping HTTP server. + mux := http.NewServeMux() + transporter.Install(server, mux) + httpServer := &http.Server{Addr: fmt.Sprintf(":%d", port), Handler: mux} + httpServers = append(httpServers, httpServer) + go func() { httpServer.Serve(listener) }() + } + + // Setup configuration. + for _, server := range *servers { + if _, err := (*servers)[0].Do(&DefaultJoinCommand{Name: server.Name()}); err != nil { + t.Fatalf("Server %s unable to join: %v", server.Name(), err) + } + } + + // Wait for configuration to propagate. + time.Sleep(testHeartbeatTimeout * 2) + + // Execute all the callbacks at the same time. + for _i, _f := range callbacks { + i, f := _i, _f + go func() { + defer wg.Done() + f((*servers)[i], httpServers[i]) + }() + } + + // Wait until everything is done. + wg.Wait() +} + +func BenchmarkSpeed(b *testing.B) { + + transporter := NewHTTPTransporter("/raft") + transporter.DisableKeepAlives = true + + servers := []*Server{} + + for i := 0; i < 3; i++ { + port := 9000 + i + + // Create raft server. + server := newTestServer(fmt.Sprintf("localhost:%d", port), transporter) + server.SetHeartbeatTimeout(testHeartbeatTimeout) + server.SetElectionTimeout(testElectionTimeout) + server.Start() + + defer server.Stop() + servers = append(servers, server) + + // Create listener for HTTP server and start it. + listener, err := net.Listen("tcp", fmt.Sprintf(":%d", port)) + if err != nil { + panic(err) + } + defer listener.Close() + + // Create wrapping HTTP server. + mux := http.NewServeMux() + transporter.Install(server, mux) + httpServer := &http.Server{Addr: fmt.Sprintf(":%d", port), Handler: mux} + + go func() { httpServer.Serve(listener) }() + } + + // Setup configuration. + for _, server := range servers { + (servers)[0].Do(&DefaultJoinCommand{Name: server.Name()}) + } + + c := make(chan bool) + + // Wait for configuration to propagate. + time.Sleep(testHeartbeatTimeout * 2) + + b.ResetTimer() + for n := 0; n < b.N; n++ { + for i := 0; i < 1000; i++ { + go send(c, servers[0]) + } + + for i := 0; i < 1000; i++ { + <-c + } + } +} + +func send(c chan bool, s *Server) { + for i := 0; i < 20; i++ { + s.Do(&NOPCommand{}) + } + c <- true +} diff --git a/third_party/github.com/coreos/go-raft/join_command.go b/third_party/github.com/coreos/go-raft/join_command.go new file mode 100644 index 000000000..74e14239d --- /dev/null +++ b/third_party/github.com/coreos/go-raft/join_command.go @@ -0,0 +1,28 @@ +package raft + +// Join command interface +type JoinCommand interface { + CommandName() string + Apply(server *Server) (interface{}, error) + NodeName() string +} + +// Join command +type DefaultJoinCommand struct { + Name string `json:"name"` +} + +// The name of the Join command in the log +func (c *DefaultJoinCommand) CommandName() string { + return "raft:join" +} + +func (c *DefaultJoinCommand) Apply(server *Server) (interface{}, error) { + err := server.AddPeer(c.Name) + + return []byte("join"), err +} + +func (c *DefaultJoinCommand) NodeName() string { + return c.Name +} diff --git a/third_party/github.com/coreos/go-raft/leave_command.go b/third_party/github.com/coreos/go-raft/leave_command.go new file mode 100644 index 000000000..c2a4923a0 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/leave_command.go @@ -0,0 +1,27 @@ +package raft + +// Leave command interface +type LeaveCommand interface { + CommandName() string + Apply(server *Server) (interface{}, error) + NodeName() string +} + +// Leave command +type DefaultLeaveCommand struct { + Name string `json:"name"` +} + +// The name of the Leave command in the log +func (c *DefaultLeaveCommand) CommandName() string { + return "raft:leave" +} + +func (c *DefaultLeaveCommand) Apply(server *Server) (interface{}, error) { + err := server.RemovePeer(c.Name) + + return []byte("leave"), err +} +func (c *DefaultLeaveCommand) NodeName() string { + return c.Name +} diff --git a/third_party/github.com/coreos/go-raft/log.go b/third_party/github.com/coreos/go-raft/log.go new file mode 100644 index 000000000..4033e92f9 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/log.go @@ -0,0 +1,610 @@ +package raft + +import ( + "bufio" + "code.google.com/p/goprotobuf/proto" + "errors" + "fmt" + "github.com/benbjohnson/go-raft/protobuf" + "io" + "os" + "sync" +) + +//------------------------------------------------------------------------------ +// +// Typedefs +// +//------------------------------------------------------------------------------ + +// A log is a collection of log entries that are persisted to durable storage. +type Log struct { + ApplyFunc func(Command) (interface{}, error) + file *os.File + path string + entries []*LogEntry + results []*logResult + commitIndex uint64 + mutex sync.RWMutex + startIndex uint64 // the index before the first entry in the Log entries + startTerm uint64 + pBuffer *proto.Buffer + pLogEntry *protobuf.ProtoLogEntry +} + +// The results of the applying a log entry. +type logResult struct { + returnValue interface{} + err error +} + +//------------------------------------------------------------------------------ +// +// Constructor +// +//------------------------------------------------------------------------------ + +// Creates a new log. +func newLog() *Log { + return &Log{ + entries: make([]*LogEntry, 0), + pBuffer: proto.NewBuffer(nil), + pLogEntry: &protobuf.ProtoLogEntry{}, + } +} + +//------------------------------------------------------------------------------ +// +// Accessors +// +//------------------------------------------------------------------------------ + +//-------------------------------------- +// Log Indices +//-------------------------------------- + +// The last committed index in the log. +func (l *Log) CommitIndex() uint64 { + l.mutex.RLock() + defer l.mutex.RUnlock() + return l.commitIndex +} + +// The current index in the log. +func (l *Log) currentIndex() uint64 { + l.mutex.RLock() + defer l.mutex.RUnlock() + + if len(l.entries) == 0 { + return l.startIndex + } + return l.entries[len(l.entries)-1].Index +} + +// The current index in the log without locking +func (l *Log) internalCurrentIndex() uint64 { + if len(l.entries) == 0 { + return l.startIndex + } + return l.entries[len(l.entries)-1].Index +} + +// The next index in the log. +func (l *Log) nextIndex() uint64 { + return l.currentIndex() + 1 +} + +// Determines if the log contains zero entries. +func (l *Log) isEmpty() bool { + l.mutex.RLock() + defer l.mutex.RUnlock() + return (len(l.entries) == 0) && (l.startIndex == 0) +} + +// The name of the last command in the log. +func (l *Log) lastCommandName() string { + l.mutex.RLock() + defer l.mutex.RUnlock() + if len(l.entries) > 0 { + if entry := l.entries[len(l.entries)-1]; entry != nil { + return entry.CommandName + } + } + return "" +} + +//-------------------------------------- +// Log Terms +//-------------------------------------- + +// The current term in the log. +func (l *Log) currentTerm() uint64 { + l.mutex.RLock() + defer l.mutex.RUnlock() + + if len(l.entries) == 0 { + return l.startTerm + } + return l.entries[len(l.entries)-1].Term +} + +//------------------------------------------------------------------------------ +// +// Methods +// +//------------------------------------------------------------------------------ + +//-------------------------------------- +// State +//-------------------------------------- + +// Opens the log file and reads existing entries. The log can remain open and +// continue to append entries to the end of the log. +func (l *Log) open(path string) error { + l.mutex.Lock() + defer l.mutex.Unlock() + + // Read all the entries from the log if one exists. + var readBytes int64 + + var err error + debugln("log.open.open ", path) + // open log file + l.file, err = os.OpenFile(path, os.O_RDWR, 0600) + l.path = path + + if err != nil { + // if the log file does not exist before + // we create the log file and set commitIndex to 0 + if os.IsNotExist(err) { + l.file, err = os.OpenFile(path, os.O_WRONLY|os.O_CREATE, 0600) + debugln("log.open.create ", path) + + return err + } + return err + } + debugln("log.open.exist ", path) + + // Read the file and decode entries. + for { + + // Instantiate log entry and decode into it. + entry, _ := newLogEntry(l, 0, 0, nil) + entry.Position, _ = l.file.Seek(0, os.SEEK_CUR) + + n, err := entry.decode(l.file) + if err != nil { + if err == io.EOF { + debugln("open.log.append: finish ") + } else { + if err = os.Truncate(path, readBytes); err != nil { + return fmt.Errorf("raft.Log: Unable to recover: %v", err) + } + } + break + } + + // Append entry. + l.entries = append(l.entries, entry) + debugln("open.log.append log index ", entry.Index) + + readBytes += int64(n) + } + l.results = make([]*logResult, len(l.entries)) + debugln("open.log.recovery number of log ", len(l.entries)) + return nil +} + +// Closes the log file. +func (l *Log) close() { + l.mutex.Lock() + defer l.mutex.Unlock() + + if l.file != nil { + l.file.Close() + l.file = nil + } + l.entries = make([]*LogEntry, 0) + l.results = make([]*logResult, 0) +} + +//-------------------------------------- +// Entries +//-------------------------------------- + +// Creates a log entry associated with this log. +func (l *Log) createEntry(term uint64, command Command) (*LogEntry, error) { + return newLogEntry(l, l.nextIndex(), term, command) +} + +// Retrieves an entry from the log. If the entry has been eliminated because +// of a snapshot then nil is returned. +func (l *Log) getEntry(index uint64) *LogEntry { + l.mutex.RLock() + defer l.mutex.RUnlock() + + if index <= l.startIndex || index > (l.startIndex+uint64(len(l.entries))) { + return nil + } + return l.entries[index-l.startIndex-1] +} + +// Checks if the log contains a given index/term combination. +func (l *Log) containsEntry(index uint64, term uint64) bool { + entry := l.getEntry(index) + return (entry != nil && entry.Term == term) +} + +// Retrieves a list of entries after a given index as well as the term of the +// index provided. A nil list of entries is returned if the index no longer +// exists because a snapshot was made. +func (l *Log) getEntriesAfter(index uint64, maxLogEntriesPerRequest uint64) ([]*LogEntry, uint64) { + l.mutex.Lock() + defer l.mutex.Unlock() + + // Return nil if index is before the start of the log. + if index < l.startIndex { + traceln("log.entriesAfter.before: ", index, " ", l.startIndex) + return nil, 0 + } + + // Return an error if the index doesn't exist. + if index > (uint64(len(l.entries)) + l.startIndex) { + panic(fmt.Sprintf("raft: Index is beyond end of log: %v %v", len(l.entries), index)) + } + + // If we're going from the beginning of the log then return the whole log. + if index == l.startIndex { + traceln("log.entriesAfter.beginning: ", index, " ", l.startIndex) + return l.entries, l.startTerm + } + + traceln("log.entriesAfter.partial: ", index, " ", l.entries[len(l.entries)-1].Index) + + entries := l.entries[index-l.startIndex:] + length := len(entries) + + if uint64(length) < maxLogEntriesPerRequest { + // Determine the term at the given entry and return a subslice. + return entries, l.entries[index-1-l.startIndex].Term + } else { + return entries[:maxLogEntriesPerRequest], l.entries[index-1-l.startIndex].Term + } +} + +// Retrieves the return value and error for an entry. The result can only exist +// after the entry has been committed. +func (l *Log) getEntryResult(entry *LogEntry, clear bool) (interface{}, error) { + l.mutex.RLock() + defer l.mutex.RUnlock() + + if entry == nil { + panic("raft: Log entry required for error retrieval") + } + + // If a result exists for the entry then return it with its error. + if entry.Index > l.startIndex && entry.Index <= uint64(len(l.results)) { + if result := l.results[entry.Index-l.startIndex-1]; result != nil { + + // keep the records before remove it + returnValue, err := result.returnValue, result.err + + // Remove reference to result if it's being cleared after retrieval. + if clear { + result.returnValue = nil + } + + return returnValue, err + } + } + + return nil, nil +} + +//-------------------------------------- +// Commit +//-------------------------------------- + +// Retrieves the last index and term that has been committed to the log. +func (l *Log) commitInfo() (index uint64, term uint64) { + l.mutex.RLock() + defer l.mutex.RUnlock() + + // If we don't have any entries then just return zeros. + if l.commitIndex == 0 { + return 0, 0 + } + + // No new commit log after snapshot + if l.commitIndex == l.startIndex { + return l.startIndex, l.startTerm + } + + // Return the last index & term from the last committed entry. + entry := l.entries[l.commitIndex-1-l.startIndex] + return entry.Index, entry.Term +} + +// Retrieves the last index and term that has been committed to the log. +func (l *Log) lastInfo() (index uint64, term uint64) { + l.mutex.RLock() + defer l.mutex.RUnlock() + + // If we don't have any entries then just return zeros. + if len(l.entries) == 0 { + return l.startIndex, l.startTerm + } + + // Return the last index & term + entry := l.entries[len(l.entries)-1] + return entry.Index, entry.Term +} + +// Updates the commit index +func (l *Log) updateCommitIndex(index uint64) { + l.mutex.Lock() + defer l.mutex.Unlock() + l.commitIndex = index +} + +// Updates the commit index and writes entries after that index to the stable storage. +func (l *Log) setCommitIndex(index uint64) error { + l.mutex.Lock() + defer l.mutex.Unlock() + + // this is not error any more after limited the number of sending entries + // commit up to what we already have + if index > l.startIndex+uint64(len(l.entries)) { + debugln("raft.Log: Commit index", index, "set back to ", len(l.entries)) + index = l.startIndex + uint64(len(l.entries)) + } + + // Do not allow previous indices to be committed again. + + // This could happens, since the guarantee is that the new leader has up-to-dated + // log entires rather than has most up-to-dated committed index + + // For example, Leader 1 send log 80 to follower 2 and follower 3 + // follower 2 and follow 3 all got the new entries and reply + // leader 1 committed entry 80 and send reply to follower 2 and follower3 + // follower 2 receive the new committed index and update committed index to 80 + // leader 1 fail to send the committed index to follower 3 + // follower 3 promote to leader (server 1 and server 2 will vote, since leader 3 + // has up-to-dated the entries) + // when new leader 3 send heartbeat with committed index = 0 to follower 2, + // follower 2 should reply success and let leader 3 update the committed index to 80 + + if index < l.commitIndex { + return nil + } + + // Find all entries whose index is between the previous index and the current index. + for i := l.commitIndex + 1; i <= index; i++ { + entryIndex := i - 1 - l.startIndex + entry := l.entries[entryIndex] + + // Update commit index. + l.commitIndex = entry.Index + + // Decode the command. + command, err := newCommand(entry.CommandName, entry.Command) + if err != nil { + return err + } + + // Apply the changes to the state machine and store the error code. + returnValue, err := l.ApplyFunc(command) + l.results[entryIndex] = &logResult{returnValue: returnValue, err: err} + } + return nil +} + +// Set the commitIndex at the head of the log file to the current +// commit Index. This should be called after obtained a log lock +func (l *Log) flushCommitIndex() { + l.file.Seek(0, os.SEEK_SET) + fmt.Fprintf(l.file, "%8x\n", l.commitIndex) + l.file.Seek(0, os.SEEK_END) +} + +//-------------------------------------- +// Truncation +//-------------------------------------- + +// Truncates the log to the given index and term. This only works if the log +// at the index has not been committed. +func (l *Log) truncate(index uint64, term uint64) error { + l.mutex.Lock() + defer l.mutex.Unlock() + debugln("log.truncate: ", index) + + // Do not allow committed entries to be truncated. + if index < l.commitIndex { + debugln("log.truncate.before") + return fmt.Errorf("raft.Log: Index is already committed (%v): (IDX=%v, TERM=%v)", l.commitIndex, index, term) + } + + // Do not truncate past end of entries. + if index > l.startIndex+uint64(len(l.entries)) { + debugln("log.truncate.after") + return fmt.Errorf("raft.Log: Entry index does not exist (MAX=%v): (IDX=%v, TERM=%v)", len(l.entries), index, term) + } + + // If we're truncating everything then just clear the entries. + if index == l.startIndex { + debugln("log.truncate.clear") + l.file.Truncate(0) + l.file.Seek(0, os.SEEK_SET) + l.entries = []*LogEntry{} + } else { + // Do not truncate if the entry at index does not have the matching term. + entry := l.entries[index-l.startIndex-1] + if len(l.entries) > 0 && entry.Term != term { + debugln("log.truncate.termMismatch") + return fmt.Errorf("raft.Log: Entry at index does not have matching term (%v): (IDX=%v, TERM=%v)", entry.Term, index, term) + } + + // Otherwise truncate up to the desired entry. + if index < l.startIndex+uint64(len(l.entries)) { + debugln("log.truncate.finish") + position := l.entries[index-l.startIndex].Position + l.file.Truncate(position) + l.file.Seek(position, os.SEEK_SET) + l.entries = l.entries[0 : index-l.startIndex] + } + } + + return nil +} + +//-------------------------------------- +// Append +//-------------------------------------- + +// Appends a series of entries to the log. These entries are not written to +// disk until setCommitIndex() is called. +func (l *Log) appendEntries(entries []*LogEntry) error { + l.mutex.Lock() + defer l.mutex.Unlock() + + startPosition, _ := l.file.Seek(0, os.SEEK_CUR) + + w := bufio.NewWriter(l.file) + + var size int64 + var err error + // Append each entry but exit if we hit an error. + for _, entry := range entries { + entry.log = l + if size, err = l.writeEntry(entry, w); err != nil { + return err + } + entry.Position = startPosition + startPosition += size + } + w.Flush() + + return nil +} + +// Writes a single log entry to the end of the log. This function does not +// obtain a lock and should only be used internally. Use AppendEntries() and +// AppendEntry() to use it externally. +func (l *Log) appendEntry(entry *LogEntry) error { + if l.file == nil { + return errors.New("raft.Log: Log is not open") + } + + // Make sure the term and index are greater than the previous. + if len(l.entries) > 0 { + lastEntry := l.entries[len(l.entries)-1] + if entry.Term < lastEntry.Term { + return fmt.Errorf("raft.Log: Cannot append entry with earlier term (%x:%x <= %x:%x)", entry.Term, entry.Index, lastEntry.Term, lastEntry.Index) + } else if entry.Term == lastEntry.Term && entry.Index <= lastEntry.Index { + return fmt.Errorf("raft.Log: Cannot append entry with earlier index in the same term (%x:%x <= %x:%x)", entry.Term, entry.Index, lastEntry.Term, lastEntry.Index) + } + } + + position, _ := l.file.Seek(0, os.SEEK_CUR) + + entry.Position = position + + // Write to storage. + if _, err := entry.encode(l.file); err != nil { + return err + } + + // Append to entries list if stored on disk. + l.entries = append(l.entries, entry) + l.results = append(l.results, nil) + + return nil +} + +// appendEntry with Buffered io +func (l *Log) writeEntry(entry *LogEntry, w io.Writer) (int64, error) { + if l.file == nil { + return -1, errors.New("raft.Log: Log is not open") + } + + // Make sure the term and index are greater than the previous. + if len(l.entries) > 0 { + lastEntry := l.entries[len(l.entries)-1] + if entry.Term < lastEntry.Term { + return -1, fmt.Errorf("raft.Log: Cannot append entry with earlier term (%x:%x <= %x:%x)", entry.Term, entry.Index, lastEntry.Term, lastEntry.Index) + } else if entry.Term == lastEntry.Term && entry.Index <= lastEntry.Index { + return -1, fmt.Errorf("raft.Log: Cannot append entry with earlier index in the same term (%x:%x <= %x:%x)", entry.Term, entry.Index, lastEntry.Term, lastEntry.Index) + } + } + + // Write to storage. + size, err := entry.encode(w) + if err != nil { + return -1, err + } + + // Append to entries list if stored on disk. + l.entries = append(l.entries, entry) + l.results = append(l.results, nil) + + return int64(size), nil +} + +//-------------------------------------- +// Log compaction +//-------------------------------------- + +// compaction the log before index +func (l *Log) compact(index uint64, term uint64) error { + var entries []*LogEntry + + l.mutex.Lock() + defer l.mutex.Unlock() + + // nothing to compaction + // the index may be greater than the current index if + // we just recovery from on snapshot + if index >= l.internalCurrentIndex() { + entries = make([]*LogEntry, 0) + } else { + + // get all log entries after index + entries = l.entries[index-l.startIndex:] + } + + // create a new log file and add all the entries + file, err := os.OpenFile(l.path+".new", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600) + if err != nil { + return err + } + for _, entry := range entries { + position, _ := l.file.Seek(0, os.SEEK_CUR) + entry.Position = position + + if _, err = entry.encode(file); err != nil { + return err + } + } + // close the current log file + l.file.Close() + + // remove the current log file to .bak + err = os.Remove(l.path) + if err != nil { + return err + } + + // rename the new log file + err = os.Rename(l.path+".new", l.path) + if err != nil { + return err + } + l.file = file + + // compaction the in memory log + l.entries = entries + l.startIndex = index + l.startTerm = term + return nil +} diff --git a/third_party/github.com/coreos/go-raft/log_entry.go b/third_party/github.com/coreos/go-raft/log_entry.go new file mode 100644 index 000000000..a1a505c7d --- /dev/null +++ b/third_party/github.com/coreos/go-raft/log_entry.go @@ -0,0 +1,99 @@ +package raft + +import ( + "bytes" + "code.google.com/p/goprotobuf/proto" + "encoding/json" + "fmt" + "github.com/benbjohnson/go-raft/protobuf" + "io" +) + +// A log entry stores a single item in the log. +type LogEntry struct { + log *Log + Index uint64 + Term uint64 + CommandName string + Command []byte + Position int64 // position in the log file + commit chan bool +} + +// Creates a new log entry associated with a log. +func newLogEntry(log *Log, index uint64, term uint64, command Command) (*LogEntry, error) { + var buf bytes.Buffer + var commandName string + if command != nil { + commandName = command.CommandName() + if encoder, ok := command.(CommandEncoder); ok { + if err := encoder.Encode(&buf); err != nil { + return nil, err + } + } else { + json.NewEncoder(&buf).Encode(command) + } + } + + e := &LogEntry{ + log: log, + Index: index, + Term: term, + CommandName: commandName, + Command: buf.Bytes(), + commit: make(chan bool, 5), + } + + return e, nil +} + +// Encodes the log entry to a buffer. Returns the number of bytes +// written and any error that may have occurred. +func (e *LogEntry) encode(w io.Writer) (int, error) { + defer e.log.pBuffer.Reset() + e.log.pLogEntry.Index = proto.Uint64(e.Index) + e.log.pLogEntry.Term = proto.Uint64(e.Term) + e.log.pLogEntry.CommandName = proto.String(e.CommandName) + e.log.pLogEntry.Command = e.Command + + err := e.log.pBuffer.Marshal(e.log.pLogEntry) + if err != nil { + return -1, err + } + + if _, err = fmt.Fprintf(w, "%8x\n", len(e.log.pBuffer.Bytes())); err != nil { + return -1, err + } + + return w.Write(e.log.pBuffer.Bytes()) +} + +// Decodes the log entry from a buffer. Returns the number of bytes read and +// any error that occurs. +func (e *LogEntry) decode(r io.Reader) (int, error) { + + var length int + _, err := fmt.Fscanf(r, "%8x\n", &length) + if err != nil { + return -1, err + } + + data := make([]byte, length) + _, err = r.Read(data) + + if err != nil { + return -1, err + } + + pb := &protobuf.ProtoLogEntry{} + if err = proto.Unmarshal(data, pb); err != nil { + return -1, err + } + + e.Term = pb.GetTerm() + e.Index = pb.GetIndex() + e.CommandName = pb.GetCommandName() + e.Command = pb.Command + + return length, nil +} diff --git a/third_party/github.com/coreos/go-raft/log_test.go b/third_party/github.com/coreos/go-raft/log_test.go new file mode 100644 index 000000000..e890090c3 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/log_test.go @@ -0,0 +1,232 @@ +package raft + +import ( + "io/ioutil" + "os" + "reflect" + "testing" +) + +//------------------------------------------------------------------------------ +// +// Tests +// +//------------------------------------------------------------------------------ + +//-------------------------------------- +// Append +//-------------------------------------- + +// Ensure that we can append to a new log. +func TestLogNewLog(t *testing.T) { + path := getLogPath() + log := newLog() + log.ApplyFunc = func(c Command) (interface{}, error) { + return nil, nil + } + if err := log.open(path); err != nil { + t.Fatalf("Unable to open log: %v", err) + } + defer log.close() + defer os.Remove(path) + + e, _ := newLogEntry(log, 1, 1, &testCommand1{Val: "foo", I: 20}) + if err := log.appendEntry(e); err != nil { + t.Fatalf("Unable to append: %v", err) + } + e, _ = newLogEntry(log, 2, 1, &testCommand2{X: 100}) + if err := log.appendEntry(e); err != nil { + t.Fatalf("Unable to append: %v", err) + } + e, _ = newLogEntry(log, 3, 2, &testCommand1{Val: "bar", I: 0}) + if err := log.appendEntry(e); err != nil { + t.Fatalf("Unable to append: %v", err) + } + + // Partial commit. + if err := log.setCommitIndex(2); err != nil { + t.Fatalf("Unable to partially commit: %v", err) + } + if index, term := log.commitInfo(); index != 2 || term != 1 { + t.Fatalf("Invalid commit info [IDX=%v, TERM=%v]", index, term) + } + + // Full commit. + if err := log.setCommitIndex(3); err != nil { + t.Fatalf("Unable to commit: %v", err) + } + if index, term := log.commitInfo(); index != 3 || term != 2 { + t.Fatalf("Invalid commit info [IDX=%v, TERM=%v]", index, term) + } +} + +// Ensure that we can decode and encode to an existing log. +func TestLogExistingLog(t *testing.T) { + tmpLog := newLog() + e0, _ := newLogEntry(tmpLog, 1, 1, &testCommand1{Val: "foo", I: 20}) + e1, _ := newLogEntry(tmpLog, 2, 1, &testCommand2{X: 100}) + e2, _ := newLogEntry(tmpLog, 3, 2, &testCommand1{Val: "bar", I: 0}) + log, path := setupLog([]*LogEntry{e0, e1, e2}) + defer log.close() + defer os.Remove(path) + + // Validate existing log entries. + if len(log.entries) != 3 { + t.Fatalf("Expected 3 entries, got %d", len(log.entries)) + } + if log.entries[0].Index != 1 || log.entries[0].Term != 1 { + t.Fatalf("Unexpected entry[0]: %v", log.entries[0]) + } + if log.entries[1].Index != 2 || log.entries[1].Term != 1 { + t.Fatalf("Unexpected entry[1]: %v", log.entries[1]) + } + if log.entries[2].Index != 3 || log.entries[2].Term != 2 { + t.Fatalf("Unexpected entry[2]: %v", log.entries[2]) + } +} + +// Ensure that we can check the contents of the log by index/term. +func TestLogContainsEntries(t *testing.T) { + tmpLog := newLog() + e0, _ := newLogEntry(tmpLog, 1, 1, &testCommand1{Val: "foo", I: 20}) + e1, _ := newLogEntry(tmpLog, 2, 1, &testCommand2{X: 100}) + e2, _ := newLogEntry(tmpLog, 3, 2, &testCommand1{Val: "bar", I: 0}) + log, path := setupLog([]*LogEntry{e0, e1, e2}) + defer log.close() + defer os.Remove(path) + + if log.containsEntry(0, 0) { + t.Fatalf("Zero-index entry should not exist in log.") + } + if log.containsEntry(1, 0) { + t.Fatalf("Entry with mismatched term should not exist") + } + if log.containsEntry(4, 0) { + t.Fatalf("Out-of-range entry should not exist") + } + if !log.containsEntry(2, 1) { + t.Fatalf("Entry 2/1 should exist") + } + if !log.containsEntry(3, 2) { + t.Fatalf("Entry 2/1 should exist") + } +} + +// Ensure that we can recover from an incomplete/corrupt log and continue logging. +func TestLogRecovery(t *testing.T) { + tmpLog := newLog() + e0, _ := newLogEntry(tmpLog, 1, 1, &testCommand1{Val: "foo", I: 20}) + e1, _ := newLogEntry(tmpLog, 2, 1, &testCommand2{X: 100}) + f, _ := ioutil.TempFile("", "raft-log-") + + e0.encode(f) + e1.encode(f) + f.WriteString("CORRUPT!") + f.Close() + + log := newLog() + log.ApplyFunc = func(c Command) (interface{}, error) { + return nil, nil + } + if err := log.open(f.Name()); err != nil { + t.Fatalf("Unable to open log: %v", err) + } + defer log.close() + defer os.Remove(f.Name()) + + e, _ := newLogEntry(log, 3, 2, &testCommand1{Val: "bat", I: -5}) + if err := log.appendEntry(e); err != nil { + t.Fatalf("Unable to append: %v", err) + } + + // Validate existing log entries. + if len(log.entries) != 3 { + t.Fatalf("Expected 3 entries, got %d", len(log.entries)) + } + if log.entries[0].Index != 1 || log.entries[0].Term != 1 { + t.Fatalf("Unexpected entry[0]: %v", log.entries[0]) + } + if log.entries[1].Index != 2 || log.entries[1].Term != 1 { + t.Fatalf("Unexpected entry[1]: %v", log.entries[1]) + } + if log.entries[2].Index != 3 || log.entries[2].Term != 2 { + t.Fatalf("Unexpected entry[2]: %v", log.entries[2]) + } +} + +//-------------------------------------- +// Append +//-------------------------------------- + +// Ensure that we can truncate uncommitted entries in the log. +func TestLogTruncate(t *testing.T) { + log, path := setupLog(nil) + if err := log.open(path); err != nil { + t.Fatalf("Unable to open log: %v", err) + } + + defer os.Remove(path) + + entry1, _ := newLogEntry(log, 1, 1, &testCommand1{Val: "foo", I: 20}) + if err := log.appendEntry(entry1); err != nil { + t.Fatalf("Unable to append: %v", err) + } + entry2, _ := newLogEntry(log, 2, 1, &testCommand2{X: 100}) + if err := log.appendEntry(entry2); err != nil { + t.Fatalf("Unable to append: %v", err) + } + entry3, _ := newLogEntry(log, 3, 2, &testCommand1{Val: "bar", I: 0}) + if err := log.appendEntry(entry3); err != nil { + t.Fatalf("Unable to append: %v", err) + } + if err := log.setCommitIndex(2); err != nil { + t.Fatalf("Unable to partially commit: %v", err) + } + + // Truncate committed entry. + if err := log.truncate(1, 1); err == nil || err.Error() != "raft.Log: Index is already committed (2): (IDX=1, TERM=1)" { + t.Fatalf("Truncating committed entries shouldn't work: %v", err) + } + // Truncate past end of log. + if err := log.truncate(4, 2); err == nil || err.Error() != "raft.Log: Entry index does not exist (MAX=3): (IDX=4, TERM=2)" { + t.Fatalf("Truncating past end-of-log shouldn't work: %v", err) + } + // Truncate entry with mismatched term. + if err := log.truncate(2, 2); err == nil || err.Error() != "raft.Log: Entry at index does not have matching term (1): (IDX=2, TERM=2)" { + t.Fatalf("Truncating mismatched entries shouldn't work: %v", err) + } + // Truncate end of log. + if err := log.truncate(3, 2); !(err == nil && reflect.DeepEqual(log.entries, []*LogEntry{entry1, entry2, entry3})) { + t.Fatalf("Truncating end of log should work: %v\n\nEntries:\nActual: %v\nExpected: %v", err, log.entries, []*LogEntry{entry1, entry2, entry3}) + } + // Truncate at last commit. + if err := log.truncate(2, 1); !(err == nil && reflect.DeepEqual(log.entries, []*LogEntry{entry1, entry2})) { + t.Fatalf("Truncating at last commit should work: %v\n\nEntries:\nActual: %v\nExpected: %v", err, log.entries, []*LogEntry{entry1, entry2}) + } + + // Append after truncate + if err := log.appendEntry(entry3); err != nil { + t.Fatalf("Unable to append after truncate: %v", err) + } + + log.close() + + // Recovery the truncated log + log = newLog() + if err := log.open(path); err != nil { + t.Fatalf("Unable to open log: %v", err) + } + // Validate existing log entries. + if len(log.entries) != 3 { + t.Fatalf("Expected 3 entries, got %d", len(log.entries)) + } + if log.entries[0].Index != 1 || log.entries[0].Term != 1 { + t.Fatalf("Unexpected entry[0]: %v", log.entries[0]) + } + if log.entries[1].Index != 2 || log.entries[1].Term != 1 { + t.Fatalf("Unexpected entry[1]: %v", log.entries[1]) + } + if log.entries[2].Index != 3 || log.entries[2].Term != 2 { + t.Fatalf("Unexpected entry[2]: %v", log.entries[2]) + } +} diff --git a/third_party/github.com/coreos/go-raft/nop_command.go b/third_party/github.com/coreos/go-raft/nop_command.go new file mode 100644 index 000000000..e3183cdd8 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/nop_command.go @@ -0,0 +1,26 @@ +package raft + +import ( + "io" +) + +// NOP command +type NOPCommand struct { +} + +// The name of the NOP command in the log +func (c NOPCommand) CommandName() string { + return "raft:nop" +} + +func (c NOPCommand) Apply(server *Server) (interface{}, error) { + return nil, nil +} + +func (c NOPCommand) Encode(w io.Writer) error { + return nil +} + +func (c NOPCommand) Decode(r io.Reader) error { + return nil +} diff --git a/third_party/github.com/coreos/go-raft/peer.go b/third_party/github.com/coreos/go-raft/peer.go new file mode 100644 index 000000000..e7761dd97 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/peer.go @@ -0,0 +1,271 @@ +package raft + +import ( + "sync" + "time" +) + +//------------------------------------------------------------------------------ +// +// Typedefs +// +//------------------------------------------------------------------------------ + +// A peer is a reference to another server involved in the consensus protocol. +type Peer struct { + server *Server + name string + prevLogIndex uint64 + mutex sync.RWMutex + stopChan chan bool + heartbeatTimeout time.Duration +} + +//------------------------------------------------------------------------------ +// +// Constructor +// +//------------------------------------------------------------------------------ + +// Creates a new peer. +func newPeer(server *Server, name string, heartbeatTimeout time.Duration) *Peer { + return &Peer{ + server: server, + name: name, + heartbeatTimeout: heartbeatTimeout, + } +} + +//------------------------------------------------------------------------------ +// +// Accessors +// +//------------------------------------------------------------------------------ + +// Retrieves the name of the peer. +func (p *Peer) Name() string { + return p.name +} + +// Sets the heartbeat timeout. +func (p *Peer) setHeartbeatTimeout(duration time.Duration) { + p.heartbeatTimeout = duration +} + +//-------------------------------------- +// Prev log index +//-------------------------------------- + +// Retrieves the previous log index. +func (p *Peer) getPrevLogIndex() uint64 { + p.mutex.RLock() + defer p.mutex.RUnlock() + return p.prevLogIndex +} + +// Sets the previous log index. +func (p *Peer) setPrevLogIndex(value uint64) { + p.mutex.Lock() + defer p.mutex.Unlock() + p.prevLogIndex = value +} + +//------------------------------------------------------------------------------ +// +// Methods +// +//------------------------------------------------------------------------------ + +//-------------------------------------- +// Heartbeat +//-------------------------------------- + +// Starts the peer heartbeat. +func (p *Peer) startHeartbeat() { + p.stopChan = make(chan bool, 1) + c := make(chan bool) + go p.heartbeat(c) + <-c +} + +// Stops the peer heartbeat. +func (p *Peer) stopHeartbeat() { + // here is a problem + // the previous stop is no buffer leader may get blocked + // when heartbeat returns at line 132 + // I make the channel with 1 buffer + // and try to panic here + select { + case p.stopChan <- true: + + default: + panic("[" + p.server.Name() + "] cannot stop [" + p.Name() + "] heartbeat") + } +} + +//-------------------------------------- +// Copying +//-------------------------------------- + +// Clones the state of the peer. The clone is not attached to a server and +// the heartbeat timer will not exist. +func (p *Peer) clone() *Peer { + p.mutex.Lock() + defer p.mutex.Unlock() + return &Peer{ + name: p.name, + prevLogIndex: p.prevLogIndex, + } +} + +//-------------------------------------- +// Heartbeat +//-------------------------------------- + +// Listens to the heartbeat timeout and flushes an AppendEntries RPC. +func (p *Peer) heartbeat(c chan bool) { + stopChan := p.stopChan + + c <- true + + debugln("peer.heartbeat: ", p.Name(), p.heartbeatTimeout) + + for { + select { + case <-stopChan: + debugln("peer.heartbeat.stop: ", p.Name()) + return + + case <-time.After(p.heartbeatTimeout): + debugln("peer.heartbeat.run: ", p.Name()) + prevLogIndex := p.getPrevLogIndex() + entries, prevLogTerm := p.server.log.getEntriesAfter(prevLogIndex, p.server.maxLogEntriesPerRequest) + + if p.server.State() != Leader { + return + } + + if entries != nil { + p.sendAppendEntriesRequest(newAppendEntriesRequest(p.server.currentTerm, prevLogIndex, prevLogTerm, p.server.log.CommitIndex(), p.server.name, entries)) + } else { + p.sendSnapshotRequest(newSnapshotRequest(p.server.name, p.server.lastSnapshot)) + } + } + } +} + +//-------------------------------------- +// Append Entries +//-------------------------------------- + +// Sends an AppendEntries request to the peer through the transport. +func (p *Peer) sendAppendEntriesRequest(req *AppendEntriesRequest) { + traceln("peer.flush.send: ", p.server.Name(), "->", p.Name(), " ", len(req.Entries)) + + resp := p.server.Transporter().SendAppendEntriesRequest(p.server, p, req) + if resp == nil { + debugln("peer.flush.timeout: ", p.server.Name(), "->", p.Name()) + return + } + traceln("peer.flush.recv: ", p.Name()) + + // If successful then update the previous log index. + p.mutex.Lock() + if resp.Success { + if len(req.Entries) > 0 { + p.prevLogIndex = req.Entries[len(req.Entries)-1].Index + + // if peer append a log entry from the current term + // we set append to true + if req.Entries[len(req.Entries)-1].Term == p.server.currentTerm { + resp.append = true + } + } + traceln("peer.flush.success: ", p.server.Name(), "->", p.Name(), "; idx =", p.prevLogIndex) + + // If it was unsuccessful then decrement the previous log index and + // we'll try again next time. + } else { + if resp.CommitIndex >= p.prevLogIndex { + + // we may miss a response from peer + // so maybe the peer has commited the logs we sent + // but we did not receive the success reply and did not increase + // the prevLogIndex + + p.prevLogIndex = resp.CommitIndex + + debugln("peer.flush.commitIndex: ", p.server.Name(), "->", p.Name(), " idx =", p.prevLogIndex) + } else if p.prevLogIndex > 0 { + // Decrement the previous log index down until we find a match. Don't + // let it go below where the peer's commit index is though. That's a + // problem. + p.prevLogIndex-- + // if it not enough, we directly decrease to the index of the + if p.prevLogIndex > resp.Index { + p.prevLogIndex = resp.Index + } + + debugln("peer.flush.decrement: ", p.server.Name(), "->", p.Name(), " idx =", p.prevLogIndex) + } + } + p.mutex.Unlock() + + // Attach the peer to resp, thus server can know where it comes from + resp.peer = p.Name() + // Send response to server for processing. + p.server.send(resp) +} + +// Sends an Snapshot request to the peer through the transport. +func (p *Peer) sendSnapshotRequest(req *SnapshotRequest) { + debugln("peer.snap.send: ", p.name) + + resp := p.server.Transporter().SendSnapshotRequest(p.server, p, req) + if resp == nil { + debugln("peer.snap.timeout: ", p.name) + return + } + + debugln("peer.snap.recv: ", p.name) + + // If successful, the peer should have been to snapshot state + // Send it the snapshot! + if resp.Success { + p.sendSnapshotRecoveryRequest() + } else { + debugln("peer.snap.failed: ", p.name) + return + } + +} + +// Sends an Snapshot Recovery request to the peer through the transport. +func (p *Peer) sendSnapshotRecoveryRequest() { + req := newSnapshotRecoveryRequest(p.server.name, p.server.lastSnapshot) + debugln("peer.snap.recovery.send: ", p.name) + resp := p.server.Transporter().SendSnapshotRecoveryRequest(p.server, p, req) + if resp.Success { + p.prevLogIndex = req.LastIndex + } else { + debugln("peer.snap.recovery.failed: ", p.name) + return + } + // Send response to server for processing. + p.server.send(&AppendEntriesResponse{Term: resp.Term, Success: resp.Success, append: (resp.Term == p.server.currentTerm)}) +} + +//-------------------------------------- +// Vote Requests +//-------------------------------------- + +// send VoteRequest Request +func (p *Peer) sendVoteRequest(req *RequestVoteRequest, c chan *RequestVoteResponse) { + debugln("peer.vote: ", p.server.Name(), "->", p.Name()) + req.peer = p + if resp := p.server.Transporter().SendVoteRequest(p.server, p, req); resp != nil { + debugln("peer.vote: recv", p.server.Name(), "<-", p.Name()) + resp.peer = p + c <- resp + } +} diff --git a/third_party/github.com/coreos/go-raft/protobuf/append_entries_request.pb.go b/third_party/github.com/coreos/go-raft/protobuf/append_entries_request.pb.go new file mode 100644 index 000000000..f7ef595d8 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/append_entries_request.pb.go @@ -0,0 +1,115 @@ +// Code generated by protoc-gen-go. +// source: append_entries_request.proto +// DO NOT EDIT! + +package protobuf + +import proto "code.google.com/p/goprotobuf/proto" +import json "encoding/json" +import math "math" + +// Reference proto, json, and math imports to suppress error if they are not otherwise used. +var _ = proto.Marshal +var _ = &json.SyntaxError{} +var _ = math.Inf + +type ProtoAppendEntriesRequest struct { + Term *uint64 `protobuf:"varint,1,req" json:"Term,omitempty"` + PrevLogIndex *uint64 `protobuf:"varint,2,req" json:"PrevLogIndex,omitempty"` + PrevLogTerm *uint64 `protobuf:"varint,3,req" json:"PrevLogTerm,omitempty"` + CommitIndex *uint64 `protobuf:"varint,4,req" json:"CommitIndex,omitempty"` + LeaderName *string `protobuf:"bytes,5,req" json:"LeaderName,omitempty"` + Entries []*ProtoAppendEntriesRequest_ProtoLogEntry `protobuf:"bytes,6,rep" json:"Entries,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ProtoAppendEntriesRequest) Reset() { *m = ProtoAppendEntriesRequest{} } +func (m *ProtoAppendEntriesRequest) String() string { return proto.CompactTextString(m) } +func (*ProtoAppendEntriesRequest) ProtoMessage() {} + +func (m *ProtoAppendEntriesRequest) GetTerm() uint64 { + if m != nil && m.Term != nil { + return *m.Term + } + return 0 +} + +func (m *ProtoAppendEntriesRequest) GetPrevLogIndex() uint64 { + if m != nil && m.PrevLogIndex != nil { + return *m.PrevLogIndex + } + return 0 +} + +func (m *ProtoAppendEntriesRequest) GetPrevLogTerm() uint64 { + if m != nil && m.PrevLogTerm != nil { + return *m.PrevLogTerm + } + return 0 +} + +func (m *ProtoAppendEntriesRequest) GetCommitIndex() uint64 { + if m != nil && m.CommitIndex != nil { + return *m.CommitIndex + } + return 0 +} + +func (m *ProtoAppendEntriesRequest) GetLeaderName() string { + if m != nil && m.LeaderName != nil { + return *m.LeaderName + } + return "" +} + +func (m *ProtoAppendEntriesRequest) GetEntries() []*ProtoAppendEntriesRequest_ProtoLogEntry { + if m != nil { + return m.Entries + } + return nil +} + +type ProtoAppendEntriesRequest_ProtoLogEntry struct { + Index *uint64 `protobuf:"varint,1,req" json:"Index,omitempty"` + Term *uint64 `protobuf:"varint,2,req" json:"Term,omitempty"` + CommandName *string `protobuf:"bytes,3,req" json:"CommandName,omitempty"` + Command []byte `protobuf:"bytes,4,opt" json:"Command,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ProtoAppendEntriesRequest_ProtoLogEntry) Reset() { + *m = ProtoAppendEntriesRequest_ProtoLogEntry{} +} +func (m *ProtoAppendEntriesRequest_ProtoLogEntry) String() string { return proto.CompactTextString(m) } +func (*ProtoAppendEntriesRequest_ProtoLogEntry) ProtoMessage() {} + +func (m *ProtoAppendEntriesRequest_ProtoLogEntry) GetIndex() uint64 { + if m != nil && m.Index != nil { + return *m.Index + } + return 0 +} + +func (m *ProtoAppendEntriesRequest_ProtoLogEntry) GetTerm() uint64 { + if m != nil && m.Term != nil { + return *m.Term + } + return 0 +} + +func (m *ProtoAppendEntriesRequest_ProtoLogEntry) GetCommandName() string { + if m != nil && m.CommandName != nil { + return *m.CommandName + } + return "" +} + +func (m *ProtoAppendEntriesRequest_ProtoLogEntry) GetCommand() []byte { + if m != nil { + return m.Command + } + return nil +} + +func init() { +} diff --git a/third_party/github.com/coreos/go-raft/protobuf/append_entries_request.proto b/third_party/github.com/coreos/go-raft/protobuf/append_entries_request.proto new file mode 100644 index 000000000..90790d13a --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/append_entries_request.proto @@ -0,0 +1,18 @@ +package protobuf; + +message ProtoAppendEntriesRequest { + required uint64 Term=1; + required uint64 PrevLogIndex=2; + required uint64 PrevLogTerm=3; + required uint64 CommitIndex=4; + required string LeaderName=5; + + message ProtoLogEntry { + required uint64 Index=1; + required uint64 Term=2; + required string CommandName=3; + optional bytes Command=4; + } + + repeated ProtoLogEntry Entries=6; +} \ No newline at end of file diff --git a/third_party/github.com/coreos/go-raft/protobuf/append_entries_responses.pb.go b/third_party/github.com/coreos/go-raft/protobuf/append_entries_responses.pb.go new file mode 100644 index 000000000..30a990d5e --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/append_entries_responses.pb.go @@ -0,0 +1,57 @@ +// Code generated by protoc-gen-go. +// source: append_entries_responses.proto +// DO NOT EDIT! + +package protobuf + +import proto "code.google.com/p/goprotobuf/proto" +import json "encoding/json" +import math "math" + +// Reference proto, json, and math imports to suppress error if they are not otherwise used. +var _ = proto.Marshal +var _ = &json.SyntaxError{} +var _ = math.Inf + +type ProtoAppendEntriesResponse struct { + Term *uint64 `protobuf:"varint,1,req" json:"Term,omitempty"` + Index *uint64 `protobuf:"varint,2,req" json:"Index,omitempty"` + CommitIndex *uint64 `protobuf:"varint,3,req" json:"CommitIndex,omitempty"` + Success *bool `protobuf:"varint,4,req" json:"Success,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ProtoAppendEntriesResponse) Reset() { *m = ProtoAppendEntriesResponse{} } +func (m *ProtoAppendEntriesResponse) String() string { return proto.CompactTextString(m) } +func (*ProtoAppendEntriesResponse) ProtoMessage() {} + +func (m *ProtoAppendEntriesResponse) GetTerm() uint64 { + if m != nil && m.Term != nil { + return *m.Term + } + return 0 +} + +func (m *ProtoAppendEntriesResponse) GetIndex() uint64 { + if m != nil && m.Index != nil { + return *m.Index + } + return 0 +} + +func (m *ProtoAppendEntriesResponse) GetCommitIndex() uint64 { + if m != nil && m.CommitIndex != nil { + return *m.CommitIndex + } + return 0 +} + +func (m *ProtoAppendEntriesResponse) GetSuccess() bool { + if m != nil && m.Success != nil { + return *m.Success + } + return false +} + +func init() { +} diff --git a/third_party/github.com/coreos/go-raft/protobuf/append_entries_responses.proto b/third_party/github.com/coreos/go-raft/protobuf/append_entries_responses.proto new file mode 100644 index 000000000..b6f793249 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/append_entries_responses.proto @@ -0,0 +1,8 @@ +package protobuf; + +message ProtoAppendEntriesResponse { + required uint64 Term=1; + required uint64 Index=2; + required uint64 CommitIndex=3; + required bool Success=4; +} \ No newline at end of file diff --git a/third_party/github.com/coreos/go-raft/protobuf/log_entry.pb.go b/third_party/github.com/coreos/go-raft/protobuf/log_entry.pb.go new file mode 100644 index 000000000..631928e8f --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/log_entry.pb.go @@ -0,0 +1,57 @@ +// Code generated by protoc-gen-go. +// source: log_entry.proto +// DO NOT EDIT! + +package protobuf + +import proto "code.google.com/p/goprotobuf/proto" +import json "encoding/json" +import math "math" + +// Reference proto, json, and math imports to suppress error if they are not otherwise used. +var _ = proto.Marshal +var _ = &json.SyntaxError{} +var _ = math.Inf + +type ProtoLogEntry struct { + Index *uint64 `protobuf:"varint,1,req" json:"Index,omitempty"` + Term *uint64 `protobuf:"varint,2,req" json:"Term,omitempty"` + CommandName *string `protobuf:"bytes,3,req" json:"CommandName,omitempty"` + Command []byte `protobuf:"bytes,4,opt" json:"Command,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ProtoLogEntry) Reset() { *m = ProtoLogEntry{} } +func (m *ProtoLogEntry) String() string { return proto.CompactTextString(m) } +func (*ProtoLogEntry) ProtoMessage() {} + +func (m *ProtoLogEntry) GetIndex() uint64 { + if m != nil && m.Index != nil { + return *m.Index + } + return 0 +} + +func (m *ProtoLogEntry) GetTerm() uint64 { + if m != nil && m.Term != nil { + return *m.Term + } + return 0 +} + +func (m *ProtoLogEntry) GetCommandName() string { + if m != nil && m.CommandName != nil { + return *m.CommandName + } + return "" +} + +func (m *ProtoLogEntry) GetCommand() []byte { + if m != nil { + return m.Command + } + return nil +} + +func init() { +} diff --git a/third_party/github.com/coreos/go-raft/protobuf/log_entry.proto b/third_party/github.com/coreos/go-raft/protobuf/log_entry.proto new file mode 100644 index 000000000..c63d86912 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/log_entry.proto @@ -0,0 +1,8 @@ +package protobuf; + +message ProtoLogEntry { + required uint64 Index=1; + required uint64 Term=2; + required string CommandName=3; + optional bytes Command=4; // for nop-command +} \ No newline at end of file diff --git a/third_party/github.com/coreos/go-raft/protobuf/request_vote_request.pb.go b/third_party/github.com/coreos/go-raft/protobuf/request_vote_request.pb.go new file mode 100644 index 000000000..dc5a2ee9a --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/request_vote_request.pb.go @@ -0,0 +1,57 @@ +// Code generated by protoc-gen-go. +// source: request_vote_request.proto +// DO NOT EDIT! + +package protobuf + +import proto "code.google.com/p/goprotobuf/proto" +import json "encoding/json" +import math "math" + +// Reference proto, json, and math imports to suppress error if they are not otherwise used. +var _ = proto.Marshal +var _ = &json.SyntaxError{} +var _ = math.Inf + +type ProtoRequestVoteRequest struct { + Term *uint64 `protobuf:"varint,1,req" json:"Term,omitempty"` + LastLogIndex *uint64 `protobuf:"varint,2,req" json:"LastLogIndex,omitempty"` + LastLogTerm *uint64 `protobuf:"varint,3,req" json:"LastLogTerm,omitempty"` + CandidateName *string `protobuf:"bytes,4,req" json:"CandidateName,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ProtoRequestVoteRequest) Reset() { *m = ProtoRequestVoteRequest{} } +func (m *ProtoRequestVoteRequest) String() string { return proto.CompactTextString(m) } +func (*ProtoRequestVoteRequest) ProtoMessage() {} + +func (m *ProtoRequestVoteRequest) GetTerm() uint64 { + if m != nil && m.Term != nil { + return *m.Term + } + return 0 +} + +func (m *ProtoRequestVoteRequest) GetLastLogIndex() uint64 { + if m != nil && m.LastLogIndex != nil { + return *m.LastLogIndex + } + return 0 +} + +func (m *ProtoRequestVoteRequest) GetLastLogTerm() uint64 { + if m != nil && m.LastLogTerm != nil { + return *m.LastLogTerm + } + return 0 +} + +func (m *ProtoRequestVoteRequest) GetCandidateName() string { + if m != nil && m.CandidateName != nil { + return *m.CandidateName + } + return "" +} + +func init() { +} diff --git a/third_party/github.com/coreos/go-raft/protobuf/request_vote_request.proto b/third_party/github.com/coreos/go-raft/protobuf/request_vote_request.proto new file mode 100644 index 000000000..e729926ee --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/request_vote_request.proto @@ -0,0 +1,8 @@ +package protobuf; + +message ProtoRequestVoteRequest { + required uint64 Term=1; + required uint64 LastLogIndex=2; + required uint64 LastLogTerm=3; + required string CandidateName=4; +} \ No newline at end of file diff --git a/third_party/github.com/coreos/go-raft/protobuf/request_vote_responses.pb.go b/third_party/github.com/coreos/go-raft/protobuf/request_vote_responses.pb.go new file mode 100644 index 000000000..16e0e582a --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/request_vote_responses.pb.go @@ -0,0 +1,41 @@ +// Code generated by protoc-gen-go. +// source: request_vote_responses.proto +// DO NOT EDIT! + +package protobuf + +import proto "code.google.com/p/goprotobuf/proto" +import json "encoding/json" +import math "math" + +// Reference proto, json, and math imports to suppress error if they are not otherwise used. +var _ = proto.Marshal +var _ = &json.SyntaxError{} +var _ = math.Inf + +type ProtoRequestVoteResponse struct { + Term *uint64 `protobuf:"varint,1,req" json:"Term,omitempty"` + VoteGranted *bool `protobuf:"varint,2,req" json:"VoteGranted,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ProtoRequestVoteResponse) Reset() { *m = ProtoRequestVoteResponse{} } +func (m *ProtoRequestVoteResponse) String() string { return proto.CompactTextString(m) } +func (*ProtoRequestVoteResponse) ProtoMessage() {} + +func (m *ProtoRequestVoteResponse) GetTerm() uint64 { + if m != nil && m.Term != nil { + return *m.Term + } + return 0 +} + +func (m *ProtoRequestVoteResponse) GetVoteGranted() bool { + if m != nil && m.VoteGranted != nil { + return *m.VoteGranted + } + return false +} + +func init() { +} diff --git a/third_party/github.com/coreos/go-raft/protobuf/request_vote_responses.proto b/third_party/github.com/coreos/go-raft/protobuf/request_vote_responses.proto new file mode 100644 index 000000000..577491b61 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/request_vote_responses.proto @@ -0,0 +1,6 @@ +package protobuf; + +message ProtoRequestVoteResponse { + required uint64 Term=1; + required bool VoteGranted=2; +} \ No newline at end of file diff --git a/third_party/github.com/coreos/go-raft/protobuf/snapshot_recovery_request.pb.go b/third_party/github.com/coreos/go-raft/protobuf/snapshot_recovery_request.pb.go new file mode 100644 index 000000000..f580de6ab --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/snapshot_recovery_request.pb.go @@ -0,0 +1,65 @@ +// Code generated by protoc-gen-go. +// source: snapshot_recovery_request.proto +// DO NOT EDIT! + +package protobuf + +import proto "code.google.com/p/goprotobuf/proto" +import json "encoding/json" +import math "math" + +// Reference proto, json, and math imports to suppress error if they are not otherwise used. +var _ = proto.Marshal +var _ = &json.SyntaxError{} +var _ = math.Inf + +type ProtoSnapshotRecoveryRequest struct { + LeaderName *string `protobuf:"bytes,1,req" json:"LeaderName,omitempty"` + LastIndex *uint64 `protobuf:"varint,2,req" json:"LastIndex,omitempty"` + LastTerm *uint64 `protobuf:"varint,3,req" json:"LastTerm,omitempty"` + Peers []string `protobuf:"bytes,4,rep" json:"Peers,omitempty"` + State []byte `protobuf:"bytes,5,req" json:"State,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ProtoSnapshotRecoveryRequest) Reset() { *m = ProtoSnapshotRecoveryRequest{} } +func (m *ProtoSnapshotRecoveryRequest) String() string { return proto.CompactTextString(m) } +func (*ProtoSnapshotRecoveryRequest) ProtoMessage() {} + +func (m *ProtoSnapshotRecoveryRequest) GetLeaderName() string { + if m != nil && m.LeaderName != nil { + return *m.LeaderName + } + return "" +} + +func (m *ProtoSnapshotRecoveryRequest) GetLastIndex() uint64 { + if m != nil && m.LastIndex != nil { + return *m.LastIndex + } + return 0 +} + +func (m *ProtoSnapshotRecoveryRequest) GetLastTerm() uint64 { + if m != nil && m.LastTerm != nil { + return *m.LastTerm + } + return 0 +} + +func (m *ProtoSnapshotRecoveryRequest) GetPeers() []string { + if m != nil { + return m.Peers + } + return nil +} + +func (m *ProtoSnapshotRecoveryRequest) GetState() []byte { + if m != nil { + return m.State + } + return nil +} + +func init() { +} diff --git a/third_party/github.com/coreos/go-raft/protobuf/snapshot_recovery_request.proto b/third_party/github.com/coreos/go-raft/protobuf/snapshot_recovery_request.proto new file mode 100644 index 000000000..000c54d48 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/snapshot_recovery_request.proto @@ -0,0 +1,9 @@ +package protobuf; + +message ProtoSnapshotRecoveryRequest { + required string LeaderName=1; + required uint64 LastIndex=2; + required uint64 LastTerm=3; + repeated string Peers=4; + required bytes State=5; +} \ No newline at end of file diff --git a/third_party/github.com/coreos/go-raft/protobuf/snapshot_recovery_response.pb.go b/third_party/github.com/coreos/go-raft/protobuf/snapshot_recovery_response.pb.go new file mode 100644 index 000000000..62081f5c1 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/snapshot_recovery_response.pb.go @@ -0,0 +1,49 @@ +// Code generated by protoc-gen-go. +// source: snapshot_recovery_response.proto +// DO NOT EDIT! + +package protobuf + +import proto "code.google.com/p/goprotobuf/proto" +import json "encoding/json" +import math "math" + +// Reference proto, json, and math imports to suppress error if they are not otherwise used. +var _ = proto.Marshal +var _ = &json.SyntaxError{} +var _ = math.Inf + +type ProtoSnapshotRecoveryResponse struct { + Term *uint64 `protobuf:"varint,1,req" json:"Term,omitempty"` + Success *bool `protobuf:"varint,2,req" json:"Success,omitempty"` + CommitIndex *uint64 `protobuf:"varint,3,req" json:"CommitIndex,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ProtoSnapshotRecoveryResponse) Reset() { *m = ProtoSnapshotRecoveryResponse{} } +func (m *ProtoSnapshotRecoveryResponse) String() string { return proto.CompactTextString(m) } +func (*ProtoSnapshotRecoveryResponse) ProtoMessage() {} + +func (m *ProtoSnapshotRecoveryResponse) GetTerm() uint64 { + if m != nil && m.Term != nil { + return *m.Term + } + return 0 +} + +func (m *ProtoSnapshotRecoveryResponse) GetSuccess() bool { + if m != nil && m.Success != nil { + return *m.Success + } + return false +} + +func (m *ProtoSnapshotRecoveryResponse) GetCommitIndex() uint64 { + if m != nil && m.CommitIndex != nil { + return *m.CommitIndex + } + return 0 +} + +func init() { +} diff --git a/third_party/github.com/coreos/go-raft/protobuf/snapshot_recovery_response.proto b/third_party/github.com/coreos/go-raft/protobuf/snapshot_recovery_response.proto new file mode 100644 index 000000000..41ff83d25 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/snapshot_recovery_response.proto @@ -0,0 +1,7 @@ +package protobuf; + +message ProtoSnapshotRecoveryResponse { + required uint64 Term=1; + required bool Success=2; + required uint64 CommitIndex=3; +} \ No newline at end of file diff --git a/third_party/github.com/coreos/go-raft/protobuf/snapshot_request.pb.go b/third_party/github.com/coreos/go-raft/protobuf/snapshot_request.pb.go new file mode 100644 index 000000000..510145748 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/snapshot_request.pb.go @@ -0,0 +1,49 @@ +// Code generated by protoc-gen-go. +// source: snapshot_request.proto +// DO NOT EDIT! + +package protobuf + +import proto "code.google.com/p/goprotobuf/proto" +import json "encoding/json" +import math "math" + +// Reference proto, json, and math imports to suppress error if they are not otherwise used. +var _ = proto.Marshal +var _ = &json.SyntaxError{} +var _ = math.Inf + +type ProtoSnapshotRequest struct { + LeaderName *string `protobuf:"bytes,1,req" json:"LeaderName,omitempty"` + LastIndex *uint64 `protobuf:"varint,2,req" json:"LastIndex,omitempty"` + LastTerm *uint64 `protobuf:"varint,3,req" json:"LastTerm,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ProtoSnapshotRequest) Reset() { *m = ProtoSnapshotRequest{} } +func (m *ProtoSnapshotRequest) String() string { return proto.CompactTextString(m) } +func (*ProtoSnapshotRequest) ProtoMessage() {} + +func (m *ProtoSnapshotRequest) GetLeaderName() string { + if m != nil && m.LeaderName != nil { + return *m.LeaderName + } + return "" +} + +func (m *ProtoSnapshotRequest) GetLastIndex() uint64 { + if m != nil && m.LastIndex != nil { + return *m.LastIndex + } + return 0 +} + +func (m *ProtoSnapshotRequest) GetLastTerm() uint64 { + if m != nil && m.LastTerm != nil { + return *m.LastTerm + } + return 0 +} + +func init() { +} diff --git a/third_party/github.com/coreos/go-raft/protobuf/snapshot_request.proto b/third_party/github.com/coreos/go-raft/protobuf/snapshot_request.proto new file mode 100644 index 000000000..2b7c3850f --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/snapshot_request.proto @@ -0,0 +1,7 @@ +package protobuf; + +message ProtoSnapshotRequest { + required string LeaderName=1; + required uint64 LastIndex=2; + required uint64 LastTerm=3; +} \ No newline at end of file diff --git a/third_party/github.com/coreos/go-raft/protobuf/snapshot_response.pb.go b/third_party/github.com/coreos/go-raft/protobuf/snapshot_response.pb.go new file mode 100644 index 000000000..43c05dc61 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/snapshot_response.pb.go @@ -0,0 +1,33 @@ +// Code generated by protoc-gen-go. +// source: snapshot_response.proto +// DO NOT EDIT! + +package protobuf + +import proto "code.google.com/p/goprotobuf/proto" +import json "encoding/json" +import math "math" + +// Reference proto, json, and math imports to suppress error if they are not otherwise used. +var _ = proto.Marshal +var _ = &json.SyntaxError{} +var _ = math.Inf + +type ProtoSnapshotResponse struct { + Success *bool `protobuf:"varint,1,req" json:"Success,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ProtoSnapshotResponse) Reset() { *m = ProtoSnapshotResponse{} } +func (m *ProtoSnapshotResponse) String() string { return proto.CompactTextString(m) } +func (*ProtoSnapshotResponse) ProtoMessage() {} + +func (m *ProtoSnapshotResponse) GetSuccess() bool { + if m != nil && m.Success != nil { + return *m.Success + } + return false +} + +func init() { +} diff --git a/third_party/github.com/coreos/go-raft/protobuf/snapshot_response.proto b/third_party/github.com/coreos/go-raft/protobuf/snapshot_response.proto new file mode 100644 index 000000000..225c19208 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/protobuf/snapshot_response.proto @@ -0,0 +1,5 @@ +package protobuf; + +message ProtoSnapshotResponse { + required bool Success=1; +} \ No newline at end of file diff --git a/third_party/github.com/coreos/go-raft/request_vote_request.go b/third_party/github.com/coreos/go-raft/request_vote_request.go new file mode 100644 index 000000000..c928f5f28 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/request_vote_request.go @@ -0,0 +1,68 @@ +package raft + +import ( + "code.google.com/p/goprotobuf/proto" + "github.com/benbjohnson/go-raft/protobuf" + "io" + "io/ioutil" +) + +// The request sent to a server to vote for a candidate to become a leader. +type RequestVoteRequest struct { + peer *Peer + Term uint64 + LastLogIndex uint64 + LastLogTerm uint64 + CandidateName string +} + +// Creates a new RequestVote request. +func newRequestVoteRequest(term uint64, candidateName string, lastLogIndex uint64, lastLogTerm uint64) *RequestVoteRequest { + return &RequestVoteRequest{ + Term: term, + LastLogIndex: lastLogIndex, + LastLogTerm: lastLogTerm, + CandidateName: candidateName, + } +} + +// Encodes the RequestVoteRequest to a buffer. Returns the number of bytes +// written and any error that may have occurred. +func (req *RequestVoteRequest) encode(w io.Writer) (int, error) { + pb := &protobuf.ProtoRequestVoteRequest{ + Term: proto.Uint64(req.Term), + LastLogIndex: proto.Uint64(req.LastLogIndex), + LastLogTerm: proto.Uint64(req.LastLogTerm), + CandidateName: proto.String(req.CandidateName), + } + p, err := proto.Marshal(pb) + if err != nil { + return -1, err + } + + return w.Write(p) +} + +// Decodes the RequestVoteRequest from a buffer. Returns the number of bytes read and +// any error that occurs. +func (req *RequestVoteRequest) decode(r io.Reader) (int, error) { + data, err := ioutil.ReadAll(r) + + if err != nil { + return -1, err + } + + totalBytes := len(data) + + pb := &protobuf.ProtoRequestVoteRequest{} + if err = proto.Unmarshal(data, pb); err != nil { + return -1, err + } + + req.Term = pb.GetTerm() + req.LastLogIndex = pb.GetLastLogIndex() + req.LastLogTerm = pb.GetLastLogTerm() + req.CandidateName = pb.GetCandidateName() + + return totalBytes, nil +} diff --git a/third_party/github.com/coreos/go-raft/request_vote_response.go b/third_party/github.com/coreos/go-raft/request_vote_response.go new file mode 100644 index 000000000..d12004430 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/request_vote_response.go @@ -0,0 +1,61 @@ +package raft + +import ( + "code.google.com/p/goprotobuf/proto" + "github.com/benbjohnson/go-raft/protobuf" + "io" + "io/ioutil" +) + +// The response returned from a server after a vote for a candidate to become a leader. +type RequestVoteResponse struct { + peer *Peer + Term uint64 + VoteGranted bool +} + +// Creates a new RequestVote response. +func newRequestVoteResponse(term uint64, voteGranted bool) *RequestVoteResponse { + return &RequestVoteResponse{ + Term: term, + VoteGranted: voteGranted, + } +} + +// Encodes the RequestVoteResponse to a buffer. Returns the number of bytes +// written and any error that may have occurred. +func (resp *RequestVoteResponse) encode(w io.Writer) (int, error) { + pb := &protobuf.ProtoRequestVoteResponse{ + Term: proto.Uint64(resp.Term), + VoteGranted: proto.Bool(resp.VoteGranted), + } + + p, err := proto.Marshal(pb) + if err != nil { + return -1, err + } + + return w.Write(p) +} + +// Decodes the RequestVoteResponse from a buffer. Returns the number of bytes read and +// any error that occurs. +func (resp *RequestVoteResponse) decode(r io.Reader) (int, error) { + data, err := ioutil.ReadAll(r) + + if err != nil { + return 0, err + } + + totalBytes := len(data) + + pb := &protobuf.ProtoRequestVoteResponse{} + if err = proto.Unmarshal(data, pb); err != nil { + return -1, err + } + + resp.Term = pb.GetTerm() + resp.VoteGranted = pb.GetVoteGranted() + + return totalBytes, nil +} diff --git a/third_party/github.com/coreos/go-raft/server.go b/third_party/github.com/coreos/go-raft/server.go new file mode 100644 index 000000000..074ca6f26 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/server.go @@ -0,0 +1,1260 @@ +package raft + +import ( + "encoding/json" + "errors" + "fmt" + "hash/crc32" + "io" + "io/ioutil" + "os" + "path" + "sort" + "sync" + "time" +) + +//------------------------------------------------------------------------------ +// +// Constants +// +//------------------------------------------------------------------------------ + +const ( + Stopped = "stopped" + Follower = "follower" + Candidate = "candidate" + Leader = "leader" + Snapshotting = "snapshotting" +) + +const ( + MaxLogEntriesPerRequest = 2000 + NumberOfLogEntriesAfterSnapshot = 200 +) + +const ( + DefaultHeartbeatTimeout = 50 * time.Millisecond + DefaultElectionTimeout = 150 * time.Millisecond +) + +var stopValue interface{} + +//------------------------------------------------------------------------------ +// +// Errors +// +//------------------------------------------------------------------------------ + +var NotLeaderError = errors.New("raft.Server: Not current leader") +var DuplicatePeerError = errors.New("raft.Server: Duplicate peer") +var CommandTimeoutError = errors.New("raft: Command timeout") + +//------------------------------------------------------------------------------ +// +// Typedefs +// +//------------------------------------------------------------------------------ + +// A server is involved in the consensus protocol and can act as a follower, +// candidate or a leader. +type Server struct { + name string + path string + state string + transporter Transporter + context interface{} + currentTerm uint64 + + votedFor string + log *Log + leader string + peers map[string]*Peer + mutex sync.RWMutex + syncedPeer map[string]bool + + c chan *event + electionTimeout time.Duration + heartbeatTimeout time.Duration + + currentSnapshot *Snapshot + lastSnapshot *Snapshot + stateMachine StateMachine + maxLogEntriesPerRequest uint64 + + confFile *os.File +} + +// An event to be processed by the server's event loop. +type event struct { + target interface{} + returnValue interface{} + c chan error +} + +//------------------------------------------------------------------------------ +// +// Constructor +// +//------------------------------------------------------------------------------ + +// Creates a new server with a log at the given path. +func NewServer(name string, path string, transporter Transporter, stateMachine StateMachine, context interface{}) (*Server, error) { + if name == "" { + return nil, errors.New("raft.Server: Name cannot be blank") + } + if transporter == nil { + panic("raft: Transporter required") + } + + s := &Server{ + name: name, + path: path, + transporter: transporter, + stateMachine: stateMachine, + context: context, + state: Stopped, + peers: make(map[string]*Peer), + log: newLog(), + c: make(chan *event, 256), + electionTimeout: DefaultElectionTimeout, + heartbeatTimeout: DefaultHeartbeatTimeout, + maxLogEntriesPerRequest: MaxLogEntriesPerRequest, + } + + // Setup apply function. + s.log.ApplyFunc = func(c Command) (interface{}, error) { + result, err := c.Apply(s) + return result, err + } + + return s, nil +} + +//------------------------------------------------------------------------------ +// +// Accessors +// +//------------------------------------------------------------------------------ + +//-------------------------------------- +// General +//-------------------------------------- + +// Retrieves the name of the server. +func (s *Server) Name() string { + return s.name +} + +// Retrieves the storage path for the server. +func (s *Server) Path() string { + return s.path +} + +// The name of the current leader. +func (s *Server) Leader() string { + return s.leader +} + +// Retrieves a copy of the peer data. +func (s *Server) Peers() map[string]*Peer { + s.mutex.Lock() + defer s.mutex.Unlock() + + peers := make(map[string]*Peer) + for name, peer := range s.peers { + peers[name] = peer.clone() + } + return peers +} + +// Retrieves the object that transports requests. +func (s *Server) Transporter() Transporter { + s.mutex.RLock() + defer s.mutex.RUnlock() + return s.transporter +} + +func (s *Server) SetTransporter(t Transporter) { + s.mutex.Lock() + defer s.mutex.Unlock() + s.transporter = t +} + +// Retrieves the context passed into the constructor. +func (s *Server) Context() interface{} { + return s.context +} + +// Retrieves the log path for the server. +func (s *Server) LogPath() string { + return path.Join(s.path, "log") +} + +// Retrieves the current state of the server. +func (s *Server) State() string { + s.mutex.RLock() + defer s.mutex.RUnlock() + return s.state +} + +// Sets the state of the server. +func (s *Server) setState(state string) { + s.mutex.Lock() + defer s.mutex.Unlock() + s.state = state + if state == Leader { + s.leader = s.Name() + } +} + +// Retrieves the current term of the server. +func (s *Server) Term() uint64 { + return s.currentTerm +} + +// Retrieves the current commit index of the server. +func (s *Server) CommitIndex() uint64 { + return s.log.commitIndex +} + +// Retrieves the name of the candidate this server voted for in this term. +func (s *Server) VotedFor() string { + return s.votedFor +} + +// Retrieves whether the server's log has no entries. +func (s *Server) IsLogEmpty() bool { + return s.log.isEmpty() +} + +// A list of all the log entries. This should only be used for debugging purposes. +func (s *Server) LogEntries() []*LogEntry { + return s.log.entries +} + +// A reference to the command name of the last entry. +func (s *Server) LastCommandName() string { + return s.log.lastCommandName() +} + +// Get the state of the server for debugging +func (s *Server) GetState() string { + s.mutex.RLock() + defer s.mutex.RUnlock() + return fmt.Sprintf("Name: %s, State: %s, Term: %v, Index: %v ", s.name, s.state, s.currentTerm, s.log.commitIndex) +} + +// Check if the server is promotable +func (s *Server) promotable() bool { + return s.log.currentIndex() > 0 +} + +//-------------------------------------- +// Membership +//-------------------------------------- + +// Retrieves the number of member servers in the consensus. +func (s *Server) MemberCount() int { + s.mutex.Lock() + defer s.mutex.Unlock() + return len(s.peers) + 1 +} + +// Retrieves the number of servers required to make a quorum. +func (s *Server) QuorumSize() int { + return (s.MemberCount() / 2) + 1 +} + +//-------------------------------------- +// Election timeout +//-------------------------------------- + +// Retrieves the election timeout. +func (s *Server) ElectionTimeout() time.Duration { + return s.electionTimeout +} + +// Sets the election timeout. +func (s *Server) SetElectionTimeout(duration time.Duration) { + s.electionTimeout = duration +} + +//-------------------------------------- +// Heartbeat timeout +//-------------------------------------- + +// Retrieves the heartbeat timeout. +func (s *Server) HeartbeatTimeout() time.Duration { + return s.heartbeatTimeout +} + +// Sets the heartbeat timeout. +func (s *Server) SetHeartbeatTimeout(duration time.Duration) { + s.mutex.Lock() + defer s.mutex.Unlock() + + s.heartbeatTimeout = duration + for _, peer := range s.peers { + peer.setHeartbeatTimeout(duration) + } +} + +//------------------------------------------------------------------------------ +// +// Methods +// +//------------------------------------------------------------------------------ + +//-------------------------------------- +// Initialization +//-------------------------------------- + +// Reg the NOPCommand +func init() { + RegisterCommand(&NOPCommand{}) + RegisterCommand(&DefaultJoinCommand{}) + RegisterCommand(&DefaultLeaveCommand{}) +} + +// Start as follow +// If log entries exist then allow promotion to candidate if no AEs received. +// If no log entries exist then wait for AEs from another node. +// If no log entries exist and a self-join command is issued then +// immediately become leader and commit entry. + +func (s *Server) Start() error { + // Exit if the server is already running. + if s.state != Stopped { + return errors.New("raft.Server: Server already running") + } + + // Create snapshot directory if not exist + os.Mkdir(path.Join(s.path, "snapshot"), 0700) + + // Initialize the log and load it up. + if err := s.log.open(s.LogPath()); err != nil { + s.debugln("raft: Log error: ", err) + return fmt.Errorf("raft: Initialization error: %s", err) + } + + if err := s.readConf(); err != nil { + s.debugln("raft: Conf file error: ", err) + return fmt.Errorf("raft: Initialization error: %s", err) + } + + // Update the term to the last term in the log. + _, s.currentTerm = s.log.lastInfo() + + s.setState(Follower) + + // If no log entries exist then + // 1. wait for AEs from another node + // 2. wait for self-join command + // to set itself promotable + if !s.promotable() { + s.debugln("start as a new raft server") + + // If log entries exist then allow promotion to candidate + // if no AEs received. + } else { + s.debugln("start from previous saved state") + } + + go s.loop() + + return nil +} + +// Read the configuration for the server. +func (s *Server) readConf() error { + var err error + confPath := path.Join(s.path, "conf") + s.debugln("readConf.open ", confPath) + // open conf file + s.confFile, err = os.OpenFile(confPath, os.O_RDWR, 0600) + + if err != nil { + if os.IsNotExist(err) { + s.confFile, err = os.OpenFile(confPath, os.O_WRONLY|os.O_CREATE, 0600) + debugln("readConf.create ", confPath) + if err != nil { + return err + } + } + return err + } + + for { + var peerName string + _, err = fmt.Fscanf(s.confFile, "%s\n", &peerName) + + if err != nil { + if err == io.EOF { + s.debugln("server.peer.conf: finish") + return nil + } + return err + } + s.debugln("server.peer.conf.read: ", peerName) + + peer := newPeer(s, peerName, s.heartbeatTimeout) + + s.peers[peer.name] = peer + + } + + return nil +} + +// Shuts down the server. +func (s *Server) Stop() { + s.send(&stopValue) + s.mutex.Lock() + s.log.close() + s.mutex.Unlock() +} + +// Checks if the server is currently running. +func (s *Server) Running() bool { + s.mutex.RLock() + defer s.mutex.RUnlock() + return s.state != Stopped +} + +//-------------------------------------- +// Term +//-------------------------------------- + +// Sets the current term for the server. This is only used when an external +// current term is found. +func (s *Server) setCurrentTerm(term uint64, leaderName string, append bool) { + s.mutex.Lock() + defer s.mutex.Unlock() + + // update the term and clear vote for + if term > s.currentTerm { + s.state = Follower + s.currentTerm = term + s.leader = leaderName + s.votedFor = "" + return + } + + // discover new leader when candidate + // save leader name when follower + if term == s.currentTerm && s.state != Leader && append { + s.state = Follower + s.leader = leaderName + } + +} + +//-------------------------------------- +// Event Loop +//-------------------------------------- + +// ________ +// --|Snapshot| timeout +// | -------- ______ +// recover | ^ | | +// snapshot / | |snapshot | | +// higher | | v | recv majority votes +// term | -------- timeout ----------- ----------- +// |-> |Follower| ----------> | Candidate |--------------------> | Leader | +// -------- ----------- ----------- +// ^ higher term/ | higher term | +// | new leader | | +// |_______________________|____________________________________ | +// The main event loop for the server +func (s *Server) loop() { + defer s.debugln("server.loop.end") + + for { + state := s.State() + + s.debugln("server.loop.run ", state) + switch state { + case Follower: + s.followerLoop() + + case Candidate: + s.candidateLoop() + + case Leader: + s.leaderLoop() + + case Snapshotting: + s.snapshotLoop() + + case Stopped: + return + } + } +} + +// Sends an event to the event loop to be processed. The function will wait +// until the event is actually processed before returning. +func (s *Server) send(value interface{}) (interface{}, error) { + event := s.sendAsync(value) + err := <-event.c + return event.returnValue, err +} + +func (s *Server) sendAsync(value interface{}) *event { + event := &event{target: value, c: make(chan error, 1)} + s.c <- event + return event +} + +// The event loop that is run when the server is in a Follower state. +// Responds to RPCs from candidates and leaders. +// Converts to candidate if election timeout elapses without either: +// 1.Receiving valid AppendEntries RPC, or +// 2.Granting vote to candidate +func (s *Server) followerLoop() { + + s.setState(Follower) + timeoutChan := afterBetween(s.ElectionTimeout(), s.ElectionTimeout()*2) + + for { + var err error + update := false + select { + case e := <-s.c: + if e.target == &stopValue { + s.setState(Stopped) + } else if command, ok := e.target.(JoinCommand); ok { + //If no log entries exist and a self-join command is issued + //then immediately become leader and commit entry. + if s.log.currentIndex() == 0 && command.NodeName() == s.Name() { + s.debugln("selfjoin and promote to leader") + s.setState(Leader) + s.processCommand(command, e) + } else { + err = NotLeaderError + } + } else if req, ok := e.target.(*AppendEntriesRequest); ok { + e.returnValue, update = s.processAppendEntriesRequest(req) + } else if req, ok := e.target.(*RequestVoteRequest); ok { + e.returnValue, update = s.processRequestVoteRequest(req) + } else if req, ok := e.target.(*SnapshotRequest); ok { + e.returnValue = s.processSnapshotRequest(req) + } else { + err = NotLeaderError + } + + // Callback to event. + e.c <- err + + case <-timeoutChan: + + // only allow synced follower to promote to candidate + if s.promotable() { + s.setState(Candidate) + } else { + update = true + } + } + + // Converts to candidate if election timeout elapses without either: + // 1.Receiving valid AppendEntries RPC, or + // 2.Granting vote to candidate + if update { + timeoutChan = afterBetween(s.ElectionTimeout(), s.ElectionTimeout()*2) + } + + // Exit loop on state change. + if s.State() != Follower { + break + } + } +} + +// The event loop that is run when the server is in a Candidate state. +func (s *Server) candidateLoop() { + lastLogIndex, lastLogTerm := s.log.lastInfo() + s.leader = "" + + for { + // Increment current term, vote for self. + s.currentTerm++ + s.votedFor = s.name + + // Send RequestVote RPCs to all other servers. + respChan := make(chan *RequestVoteResponse, len(s.peers)) + for _, peer := range s.peers { + go peer.sendVoteRequest(newRequestVoteRequest(s.currentTerm, s.name, lastLogIndex, lastLogTerm), respChan) + } + + // Wait for either: + // * Votes received from majority of servers: become leader + // * AppendEntries RPC received from new leader: step down. + // * Election timeout elapses without election resolution: increment term, start new election + // * Discover higher term: step down (§5.1) + votesGranted := 1 + timeoutChan := afterBetween(s.ElectionTimeout(), s.ElectionTimeout()*2) + timeout := false + + for { + // If we received enough votes then stop waiting for more votes. + s.debugln("server.candidate.votes: ", votesGranted, " quorum:", s.QuorumSize()) + if votesGranted >= s.QuorumSize() { + s.setState(Leader) + break + } + + // Collect votes from peers. + select { + case resp := <-respChan: + if resp.VoteGranted { + s.debugln("server.candidate.vote.granted: ", votesGranted) + votesGranted++ + } else if resp.Term > s.currentTerm { + s.debugln("server.candidate.vote.failed") + s.setCurrentTerm(resp.Term, "", false) + } else { + s.debugln("server.candidate.vote: denied") + } + + case e := <-s.c: + var err error + if e.target == &stopValue { + s.setState(Stopped) + } else if _, ok := e.target.(Command); ok { + err = NotLeaderError + } else if req, ok := e.target.(*AppendEntriesRequest); ok { + e.returnValue, _ = s.processAppendEntriesRequest(req) + } else if req, ok := e.target.(*RequestVoteRequest); ok { + e.returnValue, _ = s.processRequestVoteRequest(req) + } + + // Callback to event. + e.c <- err + + case <-timeoutChan: + timeout = true + } + + // both process AER and RVR can make the server to follower + // also break when timeout happens + if s.State() != Candidate || timeout { + break + } + } + + // break when we are not candidate + if s.State() != Candidate { + break + } + + // continue when timeout happened + } +} + +// The event loop that is run when the server is in a Candidate state. +func (s *Server) leaderLoop() { + s.setState(Leader) + s.syncedPeer = make(map[string]bool) + logIndex, _ := s.log.lastInfo() + + // Update the peers prevLogIndex to leader's lastLogIndex and start heartbeat. + s.debugln("leaderLoop.set.PrevIndex to ", logIndex) + for _, peer := range s.peers { + peer.setPrevLogIndex(logIndex) + peer.startHeartbeat() + } + + go s.Do(NOPCommand{}) + + // Begin to collect response from followers + for { + var err error + select { + case e := <-s.c: + if e.target == &stopValue { + s.setState(Stopped) + } else if command, ok := e.target.(Command); ok { + s.processCommand(command, e) + continue + } else if req, ok := e.target.(*AppendEntriesRequest); ok { + e.returnValue, _ = s.processAppendEntriesRequest(req) + } else if resp, ok := e.target.(*AppendEntriesResponse); ok { + s.processAppendEntriesResponse(resp) + } else if req, ok := e.target.(*RequestVoteRequest); ok { + e.returnValue, _ = s.processRequestVoteRequest(req) + } + + // Callback to event. + e.c <- err + } + + // Exit loop on state change. + if s.State() != Leader { + break + } + } + + // Stop all peers. + for _, peer := range s.peers { + peer.stopHeartbeat() + } + s.syncedPeer = nil +} + +func (s *Server) snapshotLoop() { + s.setState(Snapshotting) + + for { + var err error + + e := <-s.c + + if e.target == &stopValue { + s.setState(Stopped) + } else if _, ok := e.target.(Command); ok { + err = NotLeaderError + } else if req, ok := e.target.(*AppendEntriesRequest); ok { + e.returnValue, _ = s.processAppendEntriesRequest(req) + } else if req, ok := e.target.(*RequestVoteRequest); ok { + e.returnValue, _ = s.processRequestVoteRequest(req) + } else if req, ok := e.target.(*SnapshotRecoveryRequest); ok { + e.returnValue = s.processSnapshotRecoveryRequest(req) + } + + // Callback to event. + e.c <- err + + // Exit loop on state change. + if s.State() != Snapshotting { + break + } + } +} + +//-------------------------------------- +// Commands +//-------------------------------------- + +// Attempts to execute a command and replicate it. The function will return +// when the command has been successfully committed or an error has occurred. + +func (s *Server) Do(command Command) (interface{}, error) { + return s.send(command) +} + +// Processes a command. +func (s *Server) processCommand(command Command, e *event) { + s.debugln("server.command.process") + + // Create an entry for the command in the log. + entry, err := s.log.createEntry(s.currentTerm, command) + + if err != nil { + s.debugln("server.command.log.entry.error:", err) + e.c <- err + return + } + + if err := s.log.appendEntry(entry); err != nil { + s.debugln("server.command.log.error:", err) + e.c <- err + return + } + + // Issue a callback for the entry once it's committed. + go func() { + // Wait for the entry to be committed. + select { + case <-entry.commit: + var err error + s.debugln("server.command.commit") + e.returnValue, err = s.log.getEntryResult(entry, true) + e.c <- err + case <-time.After(time.Second): + s.debugln("server.command.timeout") + e.c <- CommandTimeoutError + } + }() + + // Issue an append entries response for the server. + resp := newAppendEntriesResponse(s.currentTerm, true, s.log.currentIndex(), s.log.CommitIndex()) + resp.append = true + resp.peer = s.Name() + + // this must be async + // sendAsync is not really async every time + // when the sending speed of the user is larger than + // the processing speed of the server, the buffered channel + // will be full. Then sendAsync will become sync, which will + // cause deadlock here. + // so we use a goroutine to avoid the deadlock + go s.sendAsync(resp) +} + +//-------------------------------------- +// Append Entries +//-------------------------------------- + +// Appends zero or more log entry from the leader to this server. +func (s *Server) AppendEntries(req *AppendEntriesRequest) *AppendEntriesResponse { + ret, _ := s.send(req) + resp, _ := ret.(*AppendEntriesResponse) + return resp +} + +// Processes the "append entries" request. +func (s *Server) processAppendEntriesRequest(req *AppendEntriesRequest) (*AppendEntriesResponse, bool) { + + s.traceln("server.ae.process") + + if req.Term < s.currentTerm { + s.debugln("server.ae.error: stale term") + return newAppendEntriesResponse(s.currentTerm, false, s.log.currentIndex(), s.log.CommitIndex()), false + } + + // Update term and leader. + s.setCurrentTerm(req.Term, req.LeaderName, true) + + // Reject if log doesn't contain a matching previous entry. + if err := s.log.truncate(req.PrevLogIndex, req.PrevLogTerm); err != nil { + s.debugln("server.ae.truncate.error: ", err) + return newAppendEntriesResponse(s.currentTerm, false, s.log.currentIndex(), s.log.CommitIndex()), true + } + + // Append entries to the log. + if err := s.log.appendEntries(req.Entries); err != nil { + s.debugln("server.ae.append.error: ", err) + return newAppendEntriesResponse(s.currentTerm, false, s.log.currentIndex(), s.log.CommitIndex()), true + } + + // Commit up to the commit index. + if err := s.log.setCommitIndex(req.CommitIndex); err != nil { + s.debugln("server.ae.commit.error: ", err) + return newAppendEntriesResponse(s.currentTerm, false, s.log.currentIndex(), s.log.CommitIndex()), true + } + + // once the server appended and commited all the log entries from the leader + + return newAppendEntriesResponse(s.currentTerm, true, s.log.currentIndex(), s.log.CommitIndex()), true +} + +// Processes the "append entries" response from the peer. This is only +// processed when the server is a leader. Responses received during other +// states are dropped. +func (s *Server) processAppendEntriesResponse(resp *AppendEntriesResponse) { + + // If we find a higher term then change to a follower and exit. + if resp.Term > s.currentTerm { + s.setCurrentTerm(resp.Term, "", false) + return + } + + // panic response if it's not successful. + if !resp.Success { + return + } + + // if one peer successfully append a log from the leader term, + // we add it to the synced list + if resp.append == true { + s.syncedPeer[resp.peer] = true + } + + // Increment the commit count to make sure we have a quorum before committing. + if len(s.syncedPeer) < s.QuorumSize() { + return + } + + // Determine the committed index that a majority has. + var indices []uint64 + indices = append(indices, s.log.currentIndex()) + for _, peer := range s.peers { + indices = append(indices, peer.getPrevLogIndex()) + } + sort.Sort(uint64Slice(indices)) + + // We can commit up to the index which the majority of the members have appended. + commitIndex := indices[s.QuorumSize()-1] + committedIndex := s.log.commitIndex + + if commitIndex > committedIndex { + s.log.setCommitIndex(commitIndex) + s.debugln("commit index ", commitIndex) + for i := committedIndex; i < commitIndex; i++ { + if entry := s.log.getEntry(i + 1); entry != nil { + // if the leader is a new one and the entry came from the + // old leader, the commit channel will be nil and no go routine + // is waiting from this channel + // if we try to send to it, the new leader will get stuck + if entry.commit != nil { + select { + case entry.commit <- true: + default: + panic("server unable to send signal to commit channel") + } + } + } + } + } +} + +//-------------------------------------- +// Request Vote +//-------------------------------------- + +// Requests a vote from a server. A vote can be obtained if the vote's term is +// at the server's current term and the server has not made a vote yet. A vote +// can also be obtained if the term is greater than the server's current term. +func (s *Server) RequestVote(req *RequestVoteRequest) *RequestVoteResponse { + ret, _ := s.send(req) + resp, _ := ret.(*RequestVoteResponse) + return resp +} + +// Processes a "request vote" request. +func (s *Server) processRequestVoteRequest(req *RequestVoteRequest) (*RequestVoteResponse, bool) { + + // If the request is coming from an old term then reject it. + if req.Term < s.currentTerm { + s.debugln("server.rv.error: stale term") + return newRequestVoteResponse(s.currentTerm, false), false + } + + s.setCurrentTerm(req.Term, "", false) + + // If we've already voted for a different candidate then don't vote for this candidate. + if s.votedFor != "" && s.votedFor != req.CandidateName { + s.debugln("server.rv.error: duplicate vote: ", req.CandidateName, + " already vote for ", s.votedFor) + return newRequestVoteResponse(s.currentTerm, false), false + } + + // If the candidate's log is not at least as up-to-date as our last log then don't vote. + lastIndex, lastTerm := s.log.lastInfo() + if lastIndex > req.LastLogIndex || lastTerm > req.LastLogTerm { + s.debugln("server.rv.error: out of date log: ", req.CandidateName, + "Index :[", lastIndex, "]", " [", req.LastLogIndex, "]", + "Term :[", lastTerm, "]", " [", req.LastLogTerm, "]") + return newRequestVoteResponse(s.currentTerm, false), false + } + + // If we made it this far then cast a vote and reset our election time out. + s.debugln("server.rv.vote: ", s.name, " votes for", req.CandidateName, "at term", req.Term) + s.votedFor = req.CandidateName + + return newRequestVoteResponse(s.currentTerm, true), true +} + +//-------------------------------------- +// Membership +//-------------------------------------- + +// Adds a peer to the server. +func (s *Server) AddPeer(name string) error { + s.debugln("server.peer.add: ", name, len(s.peers)) + + // Do not allow peers to be added twice. + if s.peers[name] != nil { + return nil + } + + // Only add the peer if it doesn't have the same name. + if s.name != name { + _, err := fmt.Fprintln(s.confFile, name) + s.debugln("server.peer.conf.write: ", name) + if err != nil { + return err + } + peer := newPeer(s, name, s.heartbeatTimeout) + if s.State() == Leader { + peer.startHeartbeat() + } + s.peers[peer.name] = peer + } + + return nil +} + +// Removes a peer from the server. +func (s *Server) RemovePeer(name string) error { + s.debugln("server.peer.remove: ", name, len(s.peers)) + + // Ignore removal of the server itself. + if s.name == name { + return nil + } + // Return error if peer doesn't exist. + peer := s.peers[name] + if peer == nil { + return fmt.Errorf("raft: Peer not found: %s", name) + } + + // TODO: Flush entries to the peer first. + + // Stop peer and remove it. + peer.stopHeartbeat() + + delete(s.peers, name) + + s.confFile.Truncate(0) + s.confFile.Seek(0, os.SEEK_SET) + + for peer := range s.peers { + _, err := fmt.Fprintln(s.confFile, peer) + if err != nil { + return err + } + } + + return nil +} + +//-------------------------------------- +// Log compaction +//-------------------------------------- + +// The background snapshot function +func (s *Server) Snapshot() { + for { + // TODO: change this... to something reasonable + time.Sleep(1 * time.Second) + + s.takeSnapshot() + } +} + +func (s *Server) takeSnapshot() error { + //TODO put a snapshot mutex + s.debugln("take Snapshot") + if s.currentSnapshot != nil { + return errors.New("handling snapshot") + } + + lastIndex, lastTerm := s.log.commitInfo() + + if lastIndex == 0 || lastTerm == 0 { + return errors.New("No logs") + } + + path := s.SnapshotPath(lastIndex, lastTerm) + + var state []byte + var err error + + if s.stateMachine != nil { + state, err = s.stateMachine.Save() + + if err != nil { + return err + } + + } else { + state = []byte{0} + } + + var peerNames []string + + for _, peer := range s.peers { + peerNames = append(peerNames, peer.Name()) + } + peerNames = append(peerNames, s.Name()) + + s.currentSnapshot = &Snapshot{lastIndex, lastTerm, peerNames, state, path} + + s.saveSnapshot() + + // We keep some log entries after the snapshot + // We do not want to send the whole snapshot + // to the slightly slow machines + if lastIndex-s.log.startIndex > NumberOfLogEntriesAfterSnapshot { + compactIndex := lastIndex - NumberOfLogEntriesAfterSnapshot + compactTerm := s.log.getEntry(compactIndex).Term + s.log.compact(compactIndex, compactTerm) + } + + return nil +} + +// Retrieves the log path for the server. +func (s *Server) saveSnapshot() error { + + if s.currentSnapshot == nil { + return errors.New("no snapshot to save") + } + + err := s.currentSnapshot.save() + + if err != nil { + return err + } + + tmp := s.lastSnapshot + s.lastSnapshot = s.currentSnapshot + + // delete the previous snapshot if there is any change + if tmp != nil && !(tmp.LastIndex == s.lastSnapshot.LastIndex && tmp.LastTerm == s.lastSnapshot.LastTerm) { + tmp.remove() + } + s.currentSnapshot = nil + return nil +} + +// Retrieves the log path for the server. +func (s *Server) SnapshotPath(lastIndex uint64, lastTerm uint64) string { + return path.Join(s.path, "snapshot", fmt.Sprintf("%v_%v.ss", lastTerm, lastIndex)) +} + +func (s *Server) RequestSnapshot(req *SnapshotRequest) *SnapshotResponse { + ret, _ := s.send(req) + resp, _ := ret.(*SnapshotResponse) + return resp +} + +func (s *Server) processSnapshotRequest(req *SnapshotRequest) *SnapshotResponse { + + // If the follower’s log contains an entry at the snapshot’s last index with a term + // that matches the snapshot’s last term + // Then the follower already has all the information found in the snapshot + // and can reply false + + entry := s.log.getEntry(req.LastIndex) + + if entry != nil && entry.Term == req.LastTerm { + return newSnapshotResponse(false) + } + + s.setState(Snapshotting) + + return newSnapshotResponse(true) +} + +func (s *Server) SnapshotRecoveryRequest(req *SnapshotRecoveryRequest) *SnapshotRecoveryResponse { + ret, _ := s.send(req) + resp, _ := ret.(*SnapshotRecoveryResponse) + return resp +} + +func (s *Server) processSnapshotRecoveryRequest(req *SnapshotRecoveryRequest) *SnapshotRecoveryResponse { + + s.stateMachine.Recovery(req.State) + + // clear the peer map + s.peers = make(map[string]*Peer) + + // recovery the cluster configuration + for _, peerName := range req.Peers { + s.AddPeer(peerName) + } + + //update term and index + s.currentTerm = req.LastTerm + + s.log.updateCommitIndex(req.LastIndex) + + snapshotPath := s.SnapshotPath(req.LastIndex, req.LastTerm) + + s.currentSnapshot = &Snapshot{req.LastIndex, req.LastTerm, req.Peers, req.State, snapshotPath} + + s.saveSnapshot() + + // clear the previous log entries + s.log.compact(req.LastIndex, req.LastTerm) + + return newSnapshotRecoveryResponse(req.LastTerm, true, req.LastIndex) + +} + +// Load a snapshot at restart +func (s *Server) LoadSnapshot() error { + dir, err := os.OpenFile(path.Join(s.path, "snapshot"), os.O_RDONLY, 0) + if err != nil { + + return err + } + + filenames, err := dir.Readdirnames(-1) + + if err != nil { + dir.Close() + panic(err) + } + + dir.Close() + if len(filenames) == 0 { + return errors.New("no snapshot") + } + + // not sure how many snapshot we should keep + sort.Strings(filenames) + snapshotPath := path.Join(s.path, "snapshot", filenames[len(filenames)-1]) + + // should not fail + file, err := os.OpenFile(snapshotPath, os.O_RDONLY, 0) + defer file.Close() + if err != nil { + panic(err) + } + + // TODO check checksum first + + var snapshotBytes []byte + var checksum uint32 + + n, err := fmt.Fscanf(file, "%08x\n", &checksum) + + if err != nil { + return err + } + + if n != 1 { + return errors.New("Bad snapshot file") + } + + snapshotBytes, _ = ioutil.ReadAll(file) + s.debugln(string(snapshotBytes)) + + // Generate checksum. + byteChecksum := crc32.ChecksumIEEE(snapshotBytes) + + if uint32(checksum) != byteChecksum { + s.debugln(checksum, " ", byteChecksum) + return errors.New("bad snapshot file") + } + + err = json.Unmarshal(snapshotBytes, &s.lastSnapshot) + + if err != nil { + s.debugln("unmarshal error: ", err) + return err + } + + err = s.stateMachine.Recovery(s.lastSnapshot.State) + + if err != nil { + s.debugln("recovery error: ", err) + return err + } + + for _, peerName := range s.lastSnapshot.Peers { + s.AddPeer(peerName) + } + + s.log.startTerm = s.lastSnapshot.LastTerm + s.log.startIndex = s.lastSnapshot.LastIndex + s.log.updateCommitIndex(s.lastSnapshot.LastIndex) + + return err +} + +//-------------------------------------- +// Debugging +//-------------------------------------- + +func (s *Server) debugln(v ...interface{}) { + debugf("[%s Term:%d] %s", s.name, s.currentTerm, fmt.Sprintln(v...)) +} + +func (s *Server) traceln(v ...interface{}) { + tracef("[%s] %s", s.name, fmt.Sprintln(v...)) +} diff --git a/third_party/github.com/coreos/go-raft/server_test.go b/third_party/github.com/coreos/go-raft/server_test.go new file mode 100644 index 000000000..0410846a2 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/server_test.go @@ -0,0 +1,504 @@ +package raft + +import ( + "fmt" + "reflect" + "strconv" + "sync" + "testing" + "time" +) + +//------------------------------------------------------------------------------ +// +// Tests +// +//------------------------------------------------------------------------------ + +//-------------------------------------- +// Request Vote +//-------------------------------------- + +// Ensure that we can request a vote from a server that has not voted. +func TestServerRequestVote(t *testing.T) { + server := newTestServer("1", &testTransporter{}) + + server.Start() + if _, err := server.Do(&DefaultJoinCommand{Name: server.Name()}); err != nil { + t.Fatalf("Server %s unable to join: %v", server.Name(), err) + } + + defer server.Stop() + resp := server.RequestVote(newRequestVoteRequest(1, "foo", 1, 0)) + if resp.Term != 1 || !resp.VoteGranted { + t.Fatalf("Invalid request vote response: %v/%v", resp.Term, resp.VoteGranted) + } +} + +// // Ensure that a vote request is denied if it comes from an old term. +func TestServerRequestVoteDeniedForStaleTerm(t *testing.T) { + server := newTestServer("1", &testTransporter{}) + + server.Start() + if _, err := server.Do(&DefaultJoinCommand{Name: server.Name()}); err != nil { + t.Fatalf("Server %s unable to join: %v", server.Name(), err) + } + + server.currentTerm = 2 + defer server.Stop() + resp := server.RequestVote(newRequestVoteRequest(1, "foo", 1, 0)) + if resp.Term != 2 || resp.VoteGranted { + t.Fatalf("Invalid request vote response: %v/%v", resp.Term, resp.VoteGranted) + } + if server.currentTerm != 2 && server.State() != Follower { + t.Fatalf("Server did not update term and demote: %v / %v", server.currentTerm, server.State()) + } +} + +// Ensure that a vote request is denied if we've already voted for a different candidate. +func TestServerRequestVoteDeniedIfAlreadyVoted(t *testing.T) { + server := newTestServer("1", &testTransporter{}) + + server.Start() + if _, err := server.Do(&DefaultJoinCommand{Name: server.Name()}); err != nil { + t.Fatalf("Server %s unable to join: %v", server.Name(), err) + } + + server.currentTerm = 2 + defer server.Stop() + resp := server.RequestVote(newRequestVoteRequest(2, "foo", 1, 0)) + if resp.Term != 2 || !resp.VoteGranted { + t.Fatalf("First vote should not have been denied") + } + resp = server.RequestVote(newRequestVoteRequest(2, "bar", 1, 0)) + if resp.Term != 2 || resp.VoteGranted { + t.Fatalf("Second vote should have been denied") + } +} + +// Ensure that a vote request is approved if vote occurs in a new term. +func TestServerRequestVoteApprovedIfAlreadyVotedInOlderTerm(t *testing.T) { + server := newTestServer("1", &testTransporter{}) + + server.Start() + if _, err := server.Do(&DefaultJoinCommand{Name: server.Name()}); err != nil { + t.Fatalf("Server %s unable to join: %v", server.Name(), err) + } + + time.Sleep(time.Millisecond * 100) + + server.currentTerm = 2 + defer server.Stop() + resp := server.RequestVote(newRequestVoteRequest(2, "foo", 2, 1)) + if resp.Term != 2 || !resp.VoteGranted || server.VotedFor() != "foo" { + t.Fatalf("First vote should not have been denied") + } + resp = server.RequestVote(newRequestVoteRequest(3, "bar", 2, 1)) + + if resp.Term != 3 || !resp.VoteGranted || server.VotedFor() != "bar" { + t.Fatalf("Second vote should have been approved") + } +} + +// Ensure that a vote request is denied if the log is out of date. +func TestServerRequestVoteDenyIfCandidateLogIsBehind(t *testing.T) { + tmpLog := newLog() + e0, _ := newLogEntry(tmpLog, 1, 1, &testCommand1{Val: "foo", I: 20}) + e1, _ := newLogEntry(tmpLog, 2, 1, &testCommand2{X: 100}) + e2, _ := newLogEntry(tmpLog, 3, 2, &testCommand1{Val: "bar", I: 0}) + server := newTestServerWithLog("1", &testTransporter{}, []*LogEntry{e0, e1, e2}) + + // start as a follower with term 2 and index 3 + server.Start() + + defer server.Stop() + + // request vote from term 3 with last log entry 2, 2 + resp := server.RequestVote(newRequestVoteRequest(3, "foo", 2, 2)) + if resp.Term != 3 || resp.VoteGranted { + t.Fatalf("Stale index vote should have been denied [%v/%v]", resp.Term, resp.VoteGranted) + } + + // request vote from term 2 with last log entry 2, 3 + resp = server.RequestVote(newRequestVoteRequest(2, "foo", 3, 2)) + if resp.Term != 3 || resp.VoteGranted { + t.Fatalf("Stale term vote should have been denied [%v/%v]", resp.Term, resp.VoteGranted) + } + + // request vote from term 3 with last log entry 2, 3 + resp = server.RequestVote(newRequestVoteRequest(3, "foo", 3, 2)) + if resp.Term != 3 || !resp.VoteGranted { + t.Fatalf("Matching log vote should have been granted") + } + + // request vote from term 3 with last log entry 2, 4 + resp = server.RequestVote(newRequestVoteRequest(3, "foo", 4, 2)) + if resp.Term != 3 || !resp.VoteGranted { + t.Fatalf("Ahead-of-log vote should have been granted") + } +} + +// //-------------------------------------- +// // Promotion +// //-------------------------------------- + +// // Ensure that we can self-promote a server to candidate, obtain votes and become a fearless leader. +func TestServerPromoteSelf(t *testing.T) { + e0, _ := newLogEntry(newLog(), 1, 1, &testCommand1{Val: "foo", I: 20}) + server := newTestServerWithLog("1", &testTransporter{}, []*LogEntry{e0}) + + // start as a follower + server.Start() + + defer server.Stop() + + time.Sleep(2 * testElectionTimeout) + + if server.State() != Leader { + t.Fatalf("Server self-promotion failed: %v", server.State()) + } +} + +//Ensure that we can promote a server within a cluster to a leader. +func TestServerPromote(t *testing.T) { + lookup := map[string]*Server{} + transporter := &testTransporter{} + transporter.sendVoteRequestFunc = func(server *Server, peer *Peer, req *RequestVoteRequest) *RequestVoteResponse { + return lookup[peer.Name()].RequestVote(req) + } + transporter.sendAppendEntriesRequestFunc = func(server *Server, peer *Peer, req *AppendEntriesRequest) *AppendEntriesResponse { + return lookup[peer.Name()].AppendEntries(req) + } + servers := newTestCluster([]string{"1", "2", "3"}, transporter, lookup) + + servers[0].Start() + servers[1].Start() + servers[2].Start() + + time.Sleep(2 * testElectionTimeout) + + if servers[0].State() != Leader && servers[1].State() != Leader && servers[2].State() != Leader { + t.Fatalf("No leader elected: (%s, %s, %s)", servers[0].State(), servers[1].State(), servers[2].State()) + } + for _, server := range servers { + server.Stop() + } +} + +//-------------------------------------- +// Append Entries +//-------------------------------------- + +// Ensure we can append entries to a server. +func TestServerAppendEntries(t *testing.T) { + server := newTestServer("1", &testTransporter{}) + + server.SetHeartbeatTimeout(time.Second * 10) + server.Start() + defer server.Stop() + + // Append single entry. + e, _ := newLogEntry(nil, 1, 1, &testCommand1{Val: "foo", I: 10}) + entries := []*LogEntry{e} + resp := server.AppendEntries(newAppendEntriesRequest(1, 0, 0, 0, "ldr", entries)) + if resp.Term != 1 || !resp.Success { + t.Fatalf("AppendEntries failed: %v/%v", resp.Term, resp.Success) + } + if index, term := server.log.commitInfo(); index != 0 || term != 0 { + t.Fatalf("Invalid commit info [IDX=%v, TERM=%v]", index, term) + } + + // Append multiple entries + commit the last one. + e1, _ := newLogEntry(nil, 2, 1, &testCommand1{Val: "bar", I: 20}) + e2, _ := newLogEntry(nil, 3, 1, &testCommand1{Val: "baz", I: 30}) + entries = []*LogEntry{e1, e2} + resp = server.AppendEntries(newAppendEntriesRequest(1, 1, 1, 1, "ldr", entries)) + if resp.Term != 1 || !resp.Success { + t.Fatalf("AppendEntries failed: %v/%v", resp.Term, resp.Success) + } + if index, term := server.log.commitInfo(); index != 1 || term != 1 { + t.Fatalf("Invalid commit info [IDX=%v, TERM=%v]", index, term) + } + + // Send zero entries and commit everything. + resp = server.AppendEntries(newAppendEntriesRequest(2, 3, 1, 3, "ldr", []*LogEntry{})) + if resp.Term != 2 || !resp.Success { + t.Fatalf("AppendEntries failed: %v/%v", resp.Term, resp.Success) + } + if index, term := server.log.commitInfo(); index != 3 || term != 1 { + t.Fatalf("Invalid commit info [IDX=%v, TERM=%v]", index, term) + } +} + +//Ensure that entries with stale terms are rejected. +func TestServerAppendEntriesWithStaleTermsAreRejected(t *testing.T) { + server := newTestServer("1", &testTransporter{}) + + server.Start() + + defer server.Stop() + server.currentTerm = 2 + + // Append single entry. + e, _ := newLogEntry(nil, 1, 1, &testCommand1{Val: "foo", I: 10}) + entries := []*LogEntry{e} + resp := server.AppendEntries(newAppendEntriesRequest(1, 0, 0, 0, "ldr", entries)) + if resp.Term != 2 || resp.Success { + t.Fatalf("AppendEntries should have failed: %v/%v", resp.Term, resp.Success) + } + if index, term := server.log.commitInfo(); index != 0 || term != 0 { + t.Fatalf("Invalid commit info [IDX=%v, TERM=%v]", index, term) + } +} + +// Ensure that we reject entries if the commit log is different. +func TestServerAppendEntriesRejectedIfAlreadyCommitted(t *testing.T) { + server := newTestServer("1", &testTransporter{}) + server.Start() + + defer server.Stop() + + // Append single entry + commit. + e1, _ := newLogEntry(nil, 1, 1, &testCommand1{Val: "foo", I: 10}) + e2, _ := newLogEntry(nil, 2, 1, &testCommand1{Val: "foo", I: 15}) + entries := []*LogEntry{e1, e2} + resp := server.AppendEntries(newAppendEntriesRequest(1, 0, 0, 2, "ldr", entries)) + if resp.Term != 1 || !resp.Success { + t.Fatalf("AppendEntries failed: %v/%v", resp.Term, resp.Success) + } + + // Append entry again (post-commit). + e, _ := newLogEntry(nil, 2, 1, &testCommand1{Val: "bar", I: 20}) + entries = []*LogEntry{e} + resp = server.AppendEntries(newAppendEntriesRequest(1, 2, 1, 1, "ldr", entries)) + if resp.Term != 1 || resp.Success { + t.Fatalf("AppendEntries should have failed: %v/%v", resp.Term, resp.Success) + } +} + +// Ensure that we uncommitted entries are rolled back if new entries overwrite them. +func TestServerAppendEntriesOverwritesUncommittedEntries(t *testing.T) { + server := newTestServer("1", &testTransporter{}) + server.Start() + defer server.Stop() + + entry1, _ := newLogEntry(nil, 1, 1, &testCommand1{Val: "foo", I: 10}) + entry2, _ := newLogEntry(nil, 2, 1, &testCommand1{Val: "foo", I: 15}) + entry3, _ := newLogEntry(nil, 2, 2, &testCommand1{Val: "bar", I: 20}) + + // Append single entry + commit. + entries := []*LogEntry{entry1, entry2} + resp := server.AppendEntries(newAppendEntriesRequest(1, 0, 0, 1, "ldr", entries)) + if resp.Term != 1 || !resp.Success || server.log.commitIndex != 1 || !reflect.DeepEqual(server.log.entries, []*LogEntry{entry1, entry2}) { + t.Fatalf("AppendEntries failed: %v/%v", resp.Term, resp.Success) + } + + // Append entry that overwrites the second (uncommitted) entry. + entries = []*LogEntry{entry3} + resp = server.AppendEntries(newAppendEntriesRequest(2, 1, 1, 2, "ldr", entries)) + if resp.Term != 2 || !resp.Success || server.log.commitIndex != 2 || !reflect.DeepEqual(server.log.entries, []*LogEntry{entry1, entry3}) { + t.Fatalf("AppendEntries should have succeeded: %v/%v", resp.Term, resp.Success) + } +} + +//-------------------------------------- +// Command Execution +//-------------------------------------- + +// Ensure that a follower cannot execute a command. +func TestServerDenyCommandExecutionWhenFollower(t *testing.T) { + server := newTestServer("1", &testTransporter{}) + server.Start() + defer server.Stop() + var err error + if _, err = server.Do(&testCommand1{Val: "foo", I: 10}); err != NotLeaderError { + t.Fatalf("Expected error: %v, got: %v", NotLeaderError, err) + } +} + +//-------------------------------------- +// Membership +//-------------------------------------- + +// Ensure that we can start a single server and append to its log. +func TestServerSingleNode(t *testing.T) { + server := newTestServer("1", &testTransporter{}) + if server.State() != Stopped { + t.Fatalf("Unexpected server state: %v", server.State()) + } + + server.Start() + + time.Sleep(testHeartbeatTimeout) + + // Join the server to itself. + if _, err := server.Do(&DefaultJoinCommand{Name: "1"}); err != nil { + t.Fatalf("Unable to join: %v", err) + } + debugln("finish command") + + if server.State() != Leader { + t.Fatalf("Unexpected server state: %v", server.State()) + } + + server.Stop() + + if server.State() != Stopped { + t.Fatalf("Unexpected server state: %v", server.State()) + } +} + +// Ensure that we can start multiple servers and determine a leader. +func TestServerMultiNode(t *testing.T) { + // Initialize the servers. + var mutex sync.RWMutex + servers := map[string]*Server{} + + transporter := &testTransporter{} + transporter.sendVoteRequestFunc = func(server *Server, peer *Peer, req *RequestVoteRequest) *RequestVoteResponse { + mutex.RLock() + s := servers[peer.name] + mutex.RUnlock() + return s.RequestVote(req) + } + transporter.sendAppendEntriesRequestFunc = func(server *Server, peer *Peer, req *AppendEntriesRequest) *AppendEntriesResponse { + mutex.RLock() + s := servers[peer.name] + mutex.RUnlock() + return s.AppendEntries(req) + } + + disTransporter := &testTransporter{} + disTransporter.sendVoteRequestFunc = func(server *Server, peer *Peer, req *RequestVoteRequest) *RequestVoteResponse { + return nil + } + disTransporter.sendAppendEntriesRequestFunc = func(server *Server, peer *Peer, req *AppendEntriesRequest) *AppendEntriesResponse { + return nil + } + + var names []string + + n := 5 + + // add n servers + for i := 1; i <= n; i++ { + names = append(names, strconv.Itoa(i)) + } + + var leader *Server + for _, name := range names { + server := newTestServer(name, transporter) + defer server.Stop() + + mutex.Lock() + servers[name] = server + mutex.Unlock() + + if name == "1" { + leader = server + server.SetHeartbeatTimeout(testHeartbeatTimeout) + server.Start() + time.Sleep(testHeartbeatTimeout) + } else { + server.SetElectionTimeout(testElectionTimeout) + server.SetHeartbeatTimeout(testHeartbeatTimeout) + server.Start() + time.Sleep(testHeartbeatTimeout) + } + if _, err := leader.Do(&DefaultJoinCommand{Name: name}); err != nil { + t.Fatalf("Unable to join server[%s]: %v", name, err) + } + + } + time.Sleep(2 * testElectionTimeout) + + // Check that two peers exist on leader. + mutex.RLock() + if leader.MemberCount() != n { + t.Fatalf("Expected member count to be %v, got %v", n, leader.MemberCount()) + } + if servers["2"].State() == Leader || servers["3"].State() == Leader { + t.Fatalf("Expected leader should be 1: 2=%v, 3=%v\n", servers["2"].state, servers["3"].state) + } + mutex.RUnlock() + + for i := 0; i < 20; i++ { + retry := 0 + fmt.Println("Round ", i) + + num := strconv.Itoa(i%(len(servers)) + 1) + num_1 := strconv.Itoa((i+3)%(len(servers)) + 1) + toStop := servers[num] + toStop_1 := servers[num_1] + + // Stop the first server and wait for a re-election. + time.Sleep(2 * testElectionTimeout) + debugln("Disconnect ", toStop.Name()) + debugln("disconnect ", num, " ", num_1) + toStop.SetTransporter(disTransporter) + toStop_1.SetTransporter(disTransporter) + time.Sleep(2 * testElectionTimeout) + // Check that either server 2 or 3 is the leader now. + //mutex.Lock() + + leader := 0 + + for key, value := range servers { + debugln("Play begin") + if key != num && key != num_1 { + if value.State() == Leader { + debugln("Found leader") + for i := 0; i < 10; i++ { + debugln("[Test] do ", value.Name()) + if _, err := value.Do(&testCommand2{X: 1}); err != nil { + break + } + debugln("[Test] Done") + } + debugln("Leader is ", value.Name(), " Index ", value.log.commitIndex) + } + debugln("Not Found leader") + } + } + for { + for key, value := range servers { + if key != num && key != num_1 { + if value.State() == Leader { + leader++ + } + debugln(value.Name(), " ", value.currentTerm, " ", value.state) + } + } + + if leader > 1 { + if retry < 300 { + debugln("retry") + retry++ + leader = 0 + time.Sleep(2 * testElectionTimeout) + continue + } + t.Fatalf("wrong leader number %v", leader) + } + if leader == 0 { + if retry < 300 { + retry++ + fmt.Println("retry 0") + leader = 0 + time.Sleep(2 * testElectionTimeout) + continue + } + t.Fatalf("wrong leader number %v", leader) + } + if leader == 1 { + break + } + } + + //mutex.Unlock() + + toStop.SetTransporter(transporter) + toStop_1.SetTransporter(transporter) + } + +} diff --git a/third_party/github.com/coreos/go-raft/snapshot.go b/third_party/github.com/coreos/go-raft/snapshot.go new file mode 100644 index 000000000..d35474f8a --- /dev/null +++ b/third_party/github.com/coreos/go-raft/snapshot.go @@ -0,0 +1,65 @@ +package raft + +import ( + //"bytes" + "encoding/json" + "fmt" + "hash/crc32" + "os" + "syscall" +) + +//------------------------------------------------------------------------------ +// +// Typedefs +// +//------------------------------------------------------------------------------ + +// the in memory SnapShot struct +// TODO add cluster configuration +type Snapshot struct { + LastIndex uint64 `json:"lastIndex"` + LastTerm uint64 `json:"lastTerm"` + // cluster configuration. + Peers []string `json: "peers"` + State []byte `json: "state"` + Path string `json: "path"` +} + +// Save the snapshot to a file +func (ss *Snapshot) save() error { + // Write machine state to temporary buffer. + + // open file + file, err := os.OpenFile(ss.Path, os.O_CREATE|os.O_WRONLY, 0600) + + if err != nil { + return err + } + + defer file.Close() + + b, err := json.Marshal(ss) + + // Generate checksum. + checksum := crc32.ChecksumIEEE(b) + + // Write snapshot with checksum. + if _, err = fmt.Fprintf(file, "%08x\n", checksum); err != nil { + return err + } + + if _, err = file.Write(b); err != nil { + return err + } + + // force the change writting to disk + syscall.Fsync(int(file.Fd())) + return err +} + +// remove the file of the snapshot +func (ss *Snapshot) remove() error { + err := os.Remove(ss.Path) + return err +} diff --git a/third_party/github.com/coreos/go-raft/snapshot_recovery_request.go b/third_party/github.com/coreos/go-raft/snapshot_recovery_request.go new file mode 100644 index 000000000..2aa0c12e5 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/snapshot_recovery_request.go @@ -0,0 +1,77 @@ +package raft + +import ( + "code.google.com/p/goprotobuf/proto" + "github.com/benbjohnson/go-raft/protobuf" + "io" + "io/ioutil" +) + +// The request sent to a server to start from the snapshot. +type SnapshotRecoveryRequest struct { + LeaderName string + LastIndex uint64 + LastTerm uint64 + Peers []string + State []byte +} + +//------------------------------------------------------------------------------ +// +// Constructors +// +//------------------------------------------------------------------------------ + +// Creates a new Snapshot request. +func newSnapshotRecoveryRequest(leaderName string, snapshot *Snapshot) *SnapshotRecoveryRequest { + return &SnapshotRecoveryRequest{ + LeaderName: leaderName, + LastIndex: snapshot.LastIndex, + LastTerm: snapshot.LastTerm, + Peers: snapshot.Peers, + State: snapshot.State, + } +} + +// Encodes the SnapshotRecoveryRequest to a buffer. Returns the number of bytes +// written and any error that may have occurred. +func (req *SnapshotRecoveryRequest) encode(w io.Writer) (int, error) { + pb := &protobuf.ProtoSnapshotRecoveryRequest{ + LeaderName: proto.String(req.LeaderName), + LastIndex: proto.Uint64(req.LastIndex), + LastTerm: proto.Uint64(req.LastTerm), + Peers: req.Peers, + State: req.State, + } + p, err := proto.Marshal(pb) + if err != nil { + return -1, err + } + + return w.Write(p) +} + +// Decodes the SnapshotRecoveryRequest from a buffer. Returns the number of bytes read and +// any error that occurs. +func (req *SnapshotRecoveryRequest) decode(r io.Reader) (int, error) { + data, err := ioutil.ReadAll(r) + + if err != nil { + return 0, err + } + + totalBytes := len(data) + + pb := &protobuf.ProtoSnapshotRequest{} + if err = proto.Unmarshal(data, pb); err != nil { + return -1, err + } + + req.LeaderName = pb.GetLeaderName() + req.LastIndex = pb.GetLastIndex() + req.LastTerm = pb.GetLastTerm() + req.Peers = req.Peers + req.State = req.State + + return totalBytes, nil +} diff --git a/third_party/github.com/coreos/go-raft/snapshot_recovery_response.go b/third_party/github.com/coreos/go-raft/snapshot_recovery_response.go new file mode 100644 index 000000000..14f8e0450 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/snapshot_recovery_response.go @@ -0,0 +1,69 @@ +package raft + +import ( + "code.google.com/p/goprotobuf/proto" + "github.com/benbjohnson/go-raft/protobuf" + "io" + "io/ioutil" +) + +// The response returned from a server appending entries to the log. +type SnapshotRecoveryResponse struct { + Term uint64 + Success bool + CommitIndex uint64 +} + +//------------------------------------------------------------------------------ +// +// Constructors +// +//------------------------------------------------------------------------------ + +// Creates a new Snapshot response. +func newSnapshotRecoveryResponse(term uint64, success bool, commitIndex uint64) *SnapshotRecoveryResponse { + return &SnapshotRecoveryResponse{ + Term: term, + Success: success, + CommitIndex: commitIndex, + } +} + +// Encodes the SnapshotRecoveryResponse to a buffer. Returns the number of bytes +// written and any error that may have occurred. +func (req *SnapshotRecoveryResponse) encode(w io.Writer) (int, error) { + pb := &protobuf.ProtoSnapshotRecoveryResponse{ + Term: proto.Uint64(req.Term), + Success: proto.Bool(req.Success), + CommitIndex: proto.Uint64(req.CommitIndex), + } + p, err := proto.Marshal(pb) + if err != nil { + return -1, err + } + + return w.Write(p) +} + +// Decodes the SnapshotRecoveryResponse from a buffer. Returns the number of bytes read and +// any error that occurs. +func (req *SnapshotRecoveryResponse) decode(r io.Reader) (int, error) { + data, err := ioutil.ReadAll(r) + + if err != nil { + return 0, err + } + + totalBytes := len(data) + + pb := &protobuf.ProtoSnapshotRecoveryResponse{} + if err := proto.Unmarshal(data, pb); err != nil { + return -1, err + } + + req.Term = pb.GetTerm() + req.Success = pb.GetSuccess() + req.CommitIndex = pb.GetCommitIndex() + + return totalBytes, nil +} diff --git a/third_party/github.com/coreos/go-raft/snapshot_request.go b/third_party/github.com/coreos/go-raft/snapshot_request.go new file mode 100644 index 000000000..5d37b4ed8 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/snapshot_request.go @@ -0,0 +1,70 @@ +package raft + +import ( + "code.google.com/p/goprotobuf/proto" + "github.com/benbjohnson/go-raft/protobuf" + "io" + "io/ioutil" +) + +// The request sent to a server to start from the snapshot. +type SnapshotRequest struct { + LeaderName string + LastIndex uint64 + LastTerm uint64 +} + +//------------------------------------------------------------------------------ +// +// Constructors +// +//------------------------------------------------------------------------------ + +// Creates a new Snapshot request. +func newSnapshotRequest(leaderName string, snapshot *Snapshot) *SnapshotRequest { + return &SnapshotRequest{ + LeaderName: leaderName, + LastIndex: snapshot.LastIndex, + LastTerm: snapshot.LastTerm, + } +} + +// Encodes the SnapshotRequest to a buffer. Returns the number of bytes +// written and any error that may have occurred. +func (req *SnapshotRequest) encode(w io.Writer) (int, error) { + pb := &protobuf.ProtoSnapshotRequest{ + LeaderName: proto.String(req.LeaderName), + LastIndex: proto.Uint64(req.LastIndex), + LastTerm: proto.Uint64(req.LastTerm), + } + p, err := proto.Marshal(pb) + if err != nil { + return -1, err + } + + return w.Write(p) +} + +// Decodes the SnapshotRequest from a buffer. Returns the number of bytes read and +// any error that occurs. +func (req *SnapshotRequest) decode(r io.Reader) (int, error) { + data, err := ioutil.ReadAll(r) + + if err != nil { + return 0, err + } + + totalBytes := len(data) + + pb := &protobuf.ProtoSnapshotRequest{} + + if err := proto.Unmarshal(data, pb); err != nil { + return -1, err + } + + req.LeaderName = pb.GetLeaderName() + req.LastIndex = pb.GetLastIndex() + req.LastTerm = pb.GetLastTerm() + + return totalBytes, nil +} diff --git a/third_party/github.com/coreos/go-raft/snapshot_response.go b/third_party/github.com/coreos/go-raft/snapshot_response.go new file mode 100644 index 000000000..c3b9ae40d --- /dev/null +++ b/third_party/github.com/coreos/go-raft/snapshot_response.go @@ -0,0 +1,61 @@ +package raft + +import ( + "code.google.com/p/goprotobuf/proto" + "github.com/benbjohnson/go-raft/protobuf" + "io" + "io/ioutil" +) + +// The response returned if the follower entered snapshot state +type SnapshotResponse struct { + Success bool `json:"success"` +} + +//------------------------------------------------------------------------------ +// +// Constructors +// +//------------------------------------------------------------------------------ + +// Creates a new Snapshot response. +func newSnapshotResponse(success bool) *SnapshotResponse { + return &SnapshotResponse{ + Success: success, + } +} + +// Encodes the SnapshotResponse to a buffer. Returns the number of bytes +// written and any error that may have occurred. +func (resp *SnapshotResponse) encode(w io.Writer) (int, error) { + pb := &protobuf.ProtoSnapshotResponse{ + Success: proto.Bool(resp.Success), + } + p, err := proto.Marshal(pb) + if err != nil { + return -1, err + } + + return w.Write(p) +} + +// Decodes the SnapshotResponse from a buffer. Returns the number of bytes read and +// any error that occurs. +func (resp *SnapshotResponse) decode(r io.Reader) (int, error) { + data, err := ioutil.ReadAll(r) + + if err != nil { + return 0, err + } + + totalBytes := len(data) + + pb := &protobuf.ProtoSnapshotResponse{} + if err := proto.Unmarshal(data, pb); err != nil { + return -1, err + } + + resp.Success = pb.GetSuccess() + + return totalBytes, nil +} diff --git a/third_party/github.com/coreos/go-raft/sort.go b/third_party/github.com/coreos/go-raft/sort.go new file mode 100644 index 000000000..bf4c303af --- /dev/null +++ b/third_party/github.com/coreos/go-raft/sort.go @@ -0,0 +1,23 @@ +package raft + +//------------------------------------------------------------------------------ +// +// Typedefs +// +//------------------------------------------------------------------------------ + +type uint64Slice []uint64 + +//------------------------------------------------------------------------------ +// +// Functions +// +//------------------------------------------------------------------------------ + +//-------------------------------------- +// uint64 +//-------------------------------------- + +func (p uint64Slice) Len() int { return len(p) } +func (p uint64Slice) Less(i, j int) bool { return p[i] < p[j] } +func (p uint64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } diff --git a/third_party/github.com/coreos/go-raft/statemachine.go b/third_party/github.com/coreos/go-raft/statemachine.go new file mode 100644 index 000000000..e59036cef --- /dev/null +++ b/third_party/github.com/coreos/go-raft/statemachine.go @@ -0,0 +1,14 @@ +package raft + +//------------------------------------------------------------------------------ +// +// Typedefs +// +//------------------------------------------------------------------------------ + +// StateMachine is the interface for allowing the host application to save and +// recovery the state machine +type StateMachine interface { + Save() ([]byte, error) + Recovery([]byte) error +} diff --git a/third_party/github.com/coreos/go-raft/test.go b/third_party/github.com/coreos/go-raft/test.go new file mode 100644 index 000000000..606594bf7 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/test.go @@ -0,0 +1,179 @@ +package raft + +import ( + "fmt" + "io/ioutil" + "os" + "time" +) + +const ( + testHeartbeatTimeout = 50 * time.Millisecond + testElectionTimeout = 200 * time.Millisecond +) + +func init() { + RegisterCommand(&testCommand1{}) + RegisterCommand(&testCommand2{}) +} + +//------------------------------------------------------------------------------ +// +// Helpers +// +//------------------------------------------------------------------------------ + +//-------------------------------------- +// Logs +//-------------------------------------- + +func getLogPath() string { + f, _ := ioutil.TempFile("", "raft-log-") + f.Close() + os.Remove(f.Name()) + return f.Name() +} + +func setupLog(entries []*LogEntry) (*Log, string) { + f, _ := ioutil.TempFile("", "raft-log-") + + for _, entry := range entries { + entry.encode(f) + } + err := f.Close() + + if err != nil { + panic(err) + } + + log := newLog() + log.ApplyFunc = func(c Command) (interface{}, error) { + return nil, nil + } + if err := log.open(f.Name()); err != nil { + panic(err) + } + return log, f.Name() +} + +//-------------------------------------- +// Servers +//-------------------------------------- + +func newTestServer(name string, transporter Transporter) *Server { + p, _ := ioutil.TempDir("", "raft-server-") + if err := os.MkdirAll(p, 0644); err != nil { + panic(err.Error()) + } + server, _ := NewServer(name, p, transporter, nil, nil) + return server +} + +func newTestServerWithLog(name string, transporter Transporter, entries []*LogEntry) *Server { + server := newTestServer(name, transporter) + f, err := os.Create(server.LogPath()) + if err != nil { + panic(err) + } + + for _, entry := range entries { + entry.encode(f) + } + f.Close() + return server +} + +func newTestCluster(names []string, transporter Transporter, lookup map[string]*Server) []*Server { + servers := []*Server{} + e0, _ := newLogEntry(newLog(), 1, 1, &testCommand1{Val: "foo", I: 20}) + + for _, name := range names { + if lookup[name] != nil { + panic(fmt.Sprintf("raft: Duplicate server in test cluster! %v", name)) + } + server := newTestServerWithLog("1", transporter, []*LogEntry{e0}) + server.SetElectionTimeout(testElectionTimeout) + servers = append(servers, server) + lookup[name] = server + } + for _, server := range servers { + server.SetHeartbeatTimeout(testHeartbeatTimeout) + server.Start() + for _, peer := range servers { + server.AddPeer(peer.Name()) + } + } + return servers +} + +//-------------------------------------- +// Transporter +//-------------------------------------- + +type testTransporter struct { + sendVoteRequestFunc func(server *Server, peer *Peer, req *RequestVoteRequest) *RequestVoteResponse + sendAppendEntriesRequestFunc func(server *Server, peer *Peer, req *AppendEntriesRequest) *AppendEntriesResponse + sendSnapshotRequestFunc func(server *Server, peer *Peer, req *SnapshotRequest) *SnapshotResponse +} + +func (t *testTransporter) SendVoteRequest(server *Server, peer *Peer, req *RequestVoteRequest) *RequestVoteResponse { + return t.sendVoteRequestFunc(server, peer, req) +} + +func (t *testTransporter) SendAppendEntriesRequest(server *Server, peer *Peer, req *AppendEntriesRequest) *AppendEntriesResponse { + return t.sendAppendEntriesRequestFunc(server, peer, req) +} + +func (t *testTransporter) SendSnapshotRequest(server *Server, peer *Peer, req *SnapshotRequest) *SnapshotResponse { + return t.sendSnapshotRequestFunc(server, peer, req) +} + +func (t *testTransporter) SendSnapshotRecoveryRequest(server *Server, peer *Peer, req *SnapshotRecoveryRequest) *SnapshotRecoveryResponse { + return t.SendSnapshotRecoveryRequest(server, peer, req) +} + +type testStateMachine struct { + saveFunc func() ([]byte, error) + recoveryFunc func([]byte) error +} + +func (sm *testStateMachine) Save() ([]byte, error) { + return sm.saveFunc() +} + +func (sm *testStateMachine) Recovery(state []byte) error { + return sm.recoveryFunc(state) +} + +//-------------------------------------- +// Command1 +//-------------------------------------- + +type testCommand1 struct { + Val string `json:"val"` + I int `json:"i"` +} + +func (c *testCommand1) CommandName() string { + return "cmd_1" +} + +func (c *testCommand1) Apply(server *Server) (interface{}, error) { + return nil, nil +} + +//-------------------------------------- +// Command2 +//-------------------------------------- + +type testCommand2 struct { + X int `json:"x"` +} + +func (c *testCommand2) CommandName() string { + return "cmd_2" +} + +func (c *testCommand2) Apply(server *Server) (interface{}, error) { + return nil, nil +} diff --git a/third_party/github.com/coreos/go-raft/time.go b/third_party/github.com/coreos/go-raft/time.go new file mode 100644 index 000000000..cae863ccf --- /dev/null +++ b/third_party/github.com/coreos/go-raft/time.go @@ -0,0 +1,17 @@ +package raft + +import ( + "math/rand" + "time" +) + +// Waits for a random time between two durations and sends the current time on +// the returned channel. +func afterBetween(min time.Duration, max time.Duration) <-chan time.Time { + rand := rand.New(rand.NewSource(time.Now().UnixNano())) + d, delta := min, (max - min) + if delta > 0 { + d += time.Duration(rand.Int63n(int64(delta))) + } + return time.After(d) +} diff --git a/third_party/github.com/coreos/go-raft/timer.go b/third_party/github.com/coreos/go-raft/timer.go new file mode 100644 index 000000000..d0c258a3e --- /dev/null +++ b/third_party/github.com/coreos/go-raft/timer.go @@ -0,0 +1,170 @@ +package raft + +import ( + "math/rand" + "sync" + "time" +) + +//------------------------------------------------------------------------------ +// +// Typedefs +// +//------------------------------------------------------------------------------ + +type timer struct { + fireChan chan time.Time + stopChan chan bool + state int + + rand *rand.Rand + minDuration time.Duration + maxDuration time.Duration + internalTimer *time.Timer + + mutex sync.Mutex +} + +const ( + STOPPED = iota + READY + RUNNING +) + +//------------------------------------------------------------------------------ +// +// Constructors +// +//------------------------------------------------------------------------------ + +// Creates a new timer. Panics if a non-positive duration is used. +func newTimer(minDuration time.Duration, maxDuration time.Duration) *timer { + if minDuration <= 0 { + panic("raft: Non-positive minimum duration not allowed") + } else if maxDuration <= 0 { + panic("raft: Non-positive maximum duration not allowed") + } else if minDuration > maxDuration { + panic("raft: Minimum duration cannot be greater than maximum duration") + } + + return &timer{ + minDuration: minDuration, + maxDuration: maxDuration, + state: READY, + rand: rand.New(rand.NewSource(time.Now().UnixNano())), + stopChan: make(chan bool, 1), + fireChan: make(chan time.Time), + } +} + +//------------------------------------------------------------------------------ +// +// Accessors +// +//------------------------------------------------------------------------------ + +// Sets the minimum and maximum duration of the timer. +func (t *timer) setDuration(duration time.Duration) { + t.minDuration = duration + t.maxDuration = duration +} + +//------------------------------------------------------------------------------ +// +// Methods +// +//------------------------------------------------------------------------------ + +// Checks if the timer is currently running. +func (t *timer) running() bool { + return t.state == RUNNING +} + +// Stops the timer and closes the channel. +func (t *timer) stop() { + t.mutex.Lock() + defer t.mutex.Unlock() + + if t.internalTimer != nil { + t.internalTimer.Stop() + } + + if t.state != STOPPED { + t.state = STOPPED + + // non-blocking buffer + t.stopChan <- true + } +} + +// Change the state of timer to ready +func (t *timer) ready() { + t.mutex.Lock() + defer t.mutex.Unlock() + + if t.state == RUNNING { + panic("Timer is already running") + } + t.state = READY + t.stopChan = make(chan bool, 1) + t.fireChan = make(chan time.Time) +} + +// Fire at the timer +func (t *timer) fire() { + select { + case t.fireChan <- time.Now(): + return + default: + return + } +} + +// Start the timer, this func will be blocked until the timer: +// (1) times out +// (2) stopped +// (3) fired +// Return false if stopped. +// Make sure the start func will not restart the stopped timer. +func (t *timer) start() bool { + t.mutex.Lock() + + if t.state != READY { + t.mutex.Unlock() + return false + } + t.state = RUNNING + + d := t.minDuration + + if t.maxDuration > t.minDuration { + d += time.Duration(t.rand.Int63n(int64(t.maxDuration - t.minDuration))) + } + + t.internalTimer = time.NewTimer(d) + internalTimer := t.internalTimer + + t.mutex.Unlock() + + // Wait for the timer channel, stop channel or fire channel. + stopped := false + select { + case <-internalTimer.C: + case <-t.fireChan: + case <-t.stopChan: + stopped = true + } + + // Clean up timer and state. + t.mutex.Lock() + t.internalTimer.Stop() + t.internalTimer = nil + if stopped { + t.state = STOPPED + } else if t.state == RUNNING { + t.state = READY + } + t.mutex.Unlock() + + return !stopped +} diff --git a/third_party/github.com/coreos/go-raft/timer_test.go b/third_party/github.com/coreos/go-raft/timer_test.go new file mode 100644 index 000000000..60cd746b2 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/timer_test.go @@ -0,0 +1,86 @@ +package raft + +import ( + "testing" + "time" +) + +//------------------------------------------------------------------------------ +// +// Tests +// +//------------------------------------------------------------------------------ + +// Ensure that we can start an election timer and it will go off in the specified duration. +func TestTimer(t *testing.T) { + timer := newTimer(5*time.Millisecond, 10*time.Millisecond) + + // test timer start + for i := 0; i < 10; i++ { + start := time.Now() + timer.start() + + duration := time.Now().Sub(start) + if duration > 12*time.Millisecond || duration < 5*time.Millisecond { + t.Fatal("Duration Error! ", duration) + } + } + + // test timer stop + for i := 0; i < 100; i++ { + start := time.Now() + go stop(timer) + timer.start() + + duration := time.Now().Sub(start) + if duration > 3*time.Millisecond { + t.Fatal("Duration Error! ", duration) + } + + // ready the timer after stop it + timer.ready() + } + + // test timer fire + for i := 0; i < 100; i++ { + start := time.Now() + go fire(timer) + timer.start() + + duration := time.Now().Sub(start) + if duration > 3*time.Millisecond { + t.Fatal("Fire Duration Error! ", duration) + } + } + + resp := make(chan bool) + + // play with start and stop + // make sure we can stop timer + // in all the possible seq of start and stop + for i := 0; i < 100; i++ { + go stop(timer) + go start(timer, resp) + ret := <-resp + if ret != false { + t.Fatal("cannot stop timer!") + } + timer.ready() + } + +} + +func stop(t *timer) { + time.Sleep(time.Millisecond) + t.stop() +} + +func start(t *timer, resp chan bool) { + time.Sleep(time.Millisecond) + resp <- t.start() +} + +func fire(t *timer) { + time.Sleep(time.Millisecond) + t.fire() +} diff --git a/third_party/github.com/coreos/go-raft/transporter.go b/third_party/github.com/coreos/go-raft/transporter.go new file mode 100644 index 000000000..f7d51e527 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/transporter.go @@ -0,0 +1,16 @@ +package raft + +//------------------------------------------------------------------------------ +// +// Typedefs +// +//------------------------------------------------------------------------------ + +// Transporter is the interface for allowing the host application to transport +// requests to other nodes. +type Transporter interface { + SendVoteRequest(server *Server, peer *Peer, req *RequestVoteRequest) *RequestVoteResponse + SendAppendEntriesRequest(server *Server, peer *Peer, req *AppendEntriesRequest) *AppendEntriesResponse + SendSnapshotRequest(server *Server, peer *Peer, req *SnapshotRequest) *SnapshotResponse + SendSnapshotRecoveryRequest(server *Server, peer *Peer, req *SnapshotRecoveryRequest) *SnapshotRecoveryResponse +} diff --git a/third_party/github.com/coreos/go-raft/z_test.go b/third_party/github.com/coreos/go-raft/z_test.go new file mode 100644 index 000000000..cafdf8905 --- /dev/null +++ b/third_party/github.com/coreos/go-raft/z_test.go @@ -0,0 +1,13 @@ +package raft + +/* +import ( + "testing" + "time" +) + +func TestGC(t *testing.T) { + <-time.After(500 * time.Millisecond) + panic("Oh god no!") +} +*/