mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
commit
6ee5cd9105
191
Documentation/rfc/v3api.md
Normal file
191
Documentation/rfc/v3api.md
Normal file
@ -0,0 +1,191 @@
|
|||||||
|
## Design
|
||||||
|
|
||||||
|
1. Flatten binary key-value space
|
||||||
|
|
||||||
|
2. Keep the event history until compaction
|
||||||
|
- access to old version of keys
|
||||||
|
- user controlled history compaction
|
||||||
|
|
||||||
|
3. Support range query
|
||||||
|
- Pagination support with limit argument
|
||||||
|
- Support consistency guarantee across multiple range queries
|
||||||
|
|
||||||
|
4. Replace TTL key with Lease
|
||||||
|
- more efficient/ low cost keep alive
|
||||||
|
- a logical group of TTL keys
|
||||||
|
|
||||||
|
5. Replace CAS/CAD with multi-object Tnx
|
||||||
|
- MUCH MORE powerful and flexible
|
||||||
|
|
||||||
|
6. Support efficient watching with multiple ranges
|
||||||
|
|
||||||
|
7. RPC API supports the completed set of APIs.
|
||||||
|
- more efficient than JSON/HTTP
|
||||||
|
- additional tnx/lease support
|
||||||
|
|
||||||
|
8. HTTP API supports a subset of APIs.
|
||||||
|
- easy for people to try out etcd
|
||||||
|
- easy for people to write simple etcd application
|
||||||
|
|
||||||
|
|
||||||
|
## Protobuf Defined API
|
||||||
|
|
||||||
|
[protobuf](./v3api.proto)
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
|
||||||
|
#### Put a key (foo=bar)
|
||||||
|
```
|
||||||
|
// A put is always successful
|
||||||
|
Put( PutRequest { key = foo, value = bar } )
|
||||||
|
|
||||||
|
PutResponse {
|
||||||
|
cluster_id = 0x1000,
|
||||||
|
member_id = 0x1,
|
||||||
|
index = 1,
|
||||||
|
raft_term = 0x1,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Get a key (assume we have foo=bar)
|
||||||
|
```
|
||||||
|
Get ( RangeRequest { key = foo } )
|
||||||
|
|
||||||
|
RangeResponse {
|
||||||
|
cluster_id = 0x1000,
|
||||||
|
member_id = 0x1,
|
||||||
|
index = 1,
|
||||||
|
raft_term = 0x1,
|
||||||
|
kvs = {
|
||||||
|
{
|
||||||
|
key = foo,
|
||||||
|
value = bar,
|
||||||
|
create_index = 1,
|
||||||
|
mod_index = 1,
|
||||||
|
version = 1;
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Range over a key space (assume we have foo0=bar0… foo100=bar100)
|
||||||
|
```
|
||||||
|
Range ( RangeRequest { key = foo, end_key = foo80, limit = 30 } )
|
||||||
|
|
||||||
|
RangeResponse {
|
||||||
|
cluster_id = 0x1000,
|
||||||
|
member_id = 0x1,
|
||||||
|
index = 100,
|
||||||
|
raft_term = 0x1,
|
||||||
|
kvs = {
|
||||||
|
{
|
||||||
|
key = foo0,
|
||||||
|
value = bar0,
|
||||||
|
create_index = 1,
|
||||||
|
mod_index = 1,
|
||||||
|
version = 1;
|
||||||
|
},
|
||||||
|
...,
|
||||||
|
{
|
||||||
|
key = foo30,
|
||||||
|
value = bar30,
|
||||||
|
create_index = 30,
|
||||||
|
mod_index = 30,
|
||||||
|
version = 1;
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Finish a tnx (assume we have foo0=bar0, foo1=bar1)
|
||||||
|
```
|
||||||
|
Tnx(TnxRequest {
|
||||||
|
// mod_index of foo0 is equal to 1, mod_index of foo1 is greater than 1
|
||||||
|
compare = {
|
||||||
|
{compareType = equal, key = foo0, mod_index = 1},
|
||||||
|
{compareType = greater, key = foo1, mod_index = 1}}
|
||||||
|
},
|
||||||
|
// if the comparison succeeds, put foo2 = bar2
|
||||||
|
success = {PutRequest { key = foo2, value = success }},
|
||||||
|
// if the comparison fails, put foo2=fail
|
||||||
|
failure = {PutRequest { key = foo2, value = failure }},
|
||||||
|
)
|
||||||
|
|
||||||
|
TnxResponse {
|
||||||
|
cluster_id = 0x1000,
|
||||||
|
member_id = 0x1,
|
||||||
|
index = 3,
|
||||||
|
raft_term = 0x1,
|
||||||
|
succeeded = true,
|
||||||
|
responses = {
|
||||||
|
// response of PUT foo2=success
|
||||||
|
{
|
||||||
|
cluster_id = 0x1000,
|
||||||
|
member_id = 0x1,
|
||||||
|
index = 3,
|
||||||
|
raft_term = 0x1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Watch on a key/range
|
||||||
|
|
||||||
|
```
|
||||||
|
Watch( WatchRequest{
|
||||||
|
key = foo,
|
||||||
|
end_key = fop, // prefix foo
|
||||||
|
start_index = 20,
|
||||||
|
end_index = 10000,
|
||||||
|
// server decided notification frequency
|
||||||
|
progress_notification = true,
|
||||||
|
}
|
||||||
|
… // this can be a watch request stream
|
||||||
|
)
|
||||||
|
|
||||||
|
// put (foo0=bar0) event at 3
|
||||||
|
WatchResponse {
|
||||||
|
cluster_id = 0x1000,
|
||||||
|
member_id = 0x1,
|
||||||
|
index = 3,
|
||||||
|
raft_term = 0x1,
|
||||||
|
event_type = put,
|
||||||
|
kv = {
|
||||||
|
key = foo0,
|
||||||
|
value = bar0,
|
||||||
|
create_index = 1,
|
||||||
|
mod_index = 1,
|
||||||
|
version = 1;
|
||||||
|
},
|
||||||
|
}
|
||||||
|
…
|
||||||
|
|
||||||
|
// a notification at 2000
|
||||||
|
WatchResponse {
|
||||||
|
cluster_id = 0x1000,
|
||||||
|
member_id = 0x1,
|
||||||
|
index = 2000,
|
||||||
|
raft_term = 0x1,
|
||||||
|
// nil event as notification
|
||||||
|
}
|
||||||
|
|
||||||
|
…
|
||||||
|
|
||||||
|
// put (foo0=bar3000) event at 3000
|
||||||
|
WatchResponse {
|
||||||
|
cluster_id = 0x1000,
|
||||||
|
member_id = 0x1,
|
||||||
|
index = 3000,
|
||||||
|
raft_term = 0x1,
|
||||||
|
event_type = put,
|
||||||
|
kv = {
|
||||||
|
key = foo0,
|
||||||
|
value = bar3000,
|
||||||
|
create_index = 1,
|
||||||
|
mod_index = 3000,
|
||||||
|
version = 2;
|
||||||
|
},
|
||||||
|
}
|
||||||
|
…
|
||||||
|
|
||||||
|
```
|
272
Documentation/rfc/v3api.proto
Normal file
272
Documentation/rfc/v3api.proto
Normal file
@ -0,0 +1,272 @@
|
|||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
// Interface exported by the server.
|
||||||
|
service etcd {
|
||||||
|
// Range gets the keys in the range from the store.
|
||||||
|
rpc Range(RangeRequest) returns (RangeResponse) {}
|
||||||
|
|
||||||
|
// Put puts the given key into the store.
|
||||||
|
// A put request increases the index of the store,
|
||||||
|
// and generates one event in the event history.
|
||||||
|
rpc Put(PutRequest) returns (PutResponse) {}
|
||||||
|
|
||||||
|
// Delete deletes the given range from the store.
|
||||||
|
// A delete request increase the index of the store,
|
||||||
|
// and generates one event in the event history.
|
||||||
|
rpc DeleteRange(DeleteRangeRequest) returns (DeleteRangeResponse) {}
|
||||||
|
|
||||||
|
// Tnx processes all the requests in one transaction.
|
||||||
|
// A tnx request increases the index of the store,
|
||||||
|
// and generates events with the same index in the event history.
|
||||||
|
rpc Tnx(TnxRequest) returns (TnxResponse) {}
|
||||||
|
|
||||||
|
// Watch watches the events happening or happened in etcd. Both input and output
|
||||||
|
// are stream. One watch rpc can watch for multiple ranges and get a stream of
|
||||||
|
// events. The whole events history can be watched unless compacted.
|
||||||
|
rpc WatchRange(stream WatchRangeRequest) returns (stream WatchRangeResponse) {}
|
||||||
|
|
||||||
|
// Compact compacts the event history in etcd. User should compact the
|
||||||
|
// event history periodically, or it will grow infinitely.
|
||||||
|
rpc Compact(CompactionRequest) returns (CompactionResponse) {}
|
||||||
|
|
||||||
|
// LeaseCreate creates a lease. A lease has a TTL. The lease will expire if the
|
||||||
|
// server does not receive a keepAlive within TTL from the lease holder.
|
||||||
|
// All keys attached to the lease will be expired and deleted if the lease expires.
|
||||||
|
// The key expiration generates an event in event history.
|
||||||
|
rpc LeaseCreate(LeaseCreateRequest) returns (LeaseCreateResponse) {}
|
||||||
|
|
||||||
|
// LeaseRevoke revokes a lease. All the key attached to the lease will be expired and deleted.
|
||||||
|
rpc LeaseRevoke(LeaseRevokeRequest) returns (LeaseRevokeResponse) {}
|
||||||
|
|
||||||
|
// LeaseAttach attaches keys with a lease.
|
||||||
|
rpc LeaseAttach(LeaseAttachRequest) returns (LeaseAttachResponse) {}
|
||||||
|
|
||||||
|
// LeaseTnx likes Tnx. It has two addition success and failure LeaseAttachRequest list.
|
||||||
|
// If the Tnx is successful, then the success list will be executed. Or the failure list
|
||||||
|
// will be executed.
|
||||||
|
rpc LeaseTnx(LeaseTnxRequest) returns (LeaseTnxResponse) {}
|
||||||
|
|
||||||
|
// KeepAlive keeps the lease alive.
|
||||||
|
rpc LeaseKeepAlive(stream LeaseKeepAliveRequest) returns (stream LeaseKeepAliveResponse) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
message ResponseHeader {
|
||||||
|
// an error type message?
|
||||||
|
optional string error = 1;
|
||||||
|
optional uint64 cluster_id = 2;
|
||||||
|
optional uint64 member_id = 3;
|
||||||
|
// index of the store when the request was applied.
|
||||||
|
optional int64 index = 4;
|
||||||
|
// term of raft when the request was applied.
|
||||||
|
optional uint64 raft_term = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
message RangeRequest {
|
||||||
|
// if the range_end is not given, the request returns the key.
|
||||||
|
optional bytes key = 1;
|
||||||
|
// if the range_end is given, it gets the keys in range [key, range_end).
|
||||||
|
optional bytes range_end = 2;
|
||||||
|
// limit the number of keys returned.
|
||||||
|
optional int64 limit = 3;
|
||||||
|
// the response will be consistent with previous request with same token if the token is
|
||||||
|
// given and is vaild.
|
||||||
|
optional bytes consistent_token = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
message RangeResponse {
|
||||||
|
optional ResponseHeader header = 1;
|
||||||
|
repeated KeyValue kvs = 2;
|
||||||
|
optional bytes consistent_token = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
message PutRequest {
|
||||||
|
optional bytes key = 1;
|
||||||
|
optional bytes value = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message PutResponse {
|
||||||
|
optional ResponseHeader header = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message DeleteRangeRequest {
|
||||||
|
// if the range_end is not given, the request deletes the key.
|
||||||
|
optional bytes key = 1;
|
||||||
|
// if the range_end is given, it deletes the keys in range [key, range_end).
|
||||||
|
optional bytes range_end = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message DeleteRangeResponse {
|
||||||
|
optional ResponseHeader header = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message RequestUnion {
|
||||||
|
oneof request {
|
||||||
|
RangeRequest request_range = 1;
|
||||||
|
PutRequest request_put = 2;
|
||||||
|
DeleteRangeRequest request_delete_range = 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
message ResponseUnion {
|
||||||
|
oneof response {
|
||||||
|
RangeResponse reponse_range = 1;
|
||||||
|
PutResponse response_put = 2;
|
||||||
|
DeleteRangeResponse response_delete_range = 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
message Compare {
|
||||||
|
enum CompareType {
|
||||||
|
EQUAL = 0;
|
||||||
|
GREATER = 1;
|
||||||
|
LESS = 2;
|
||||||
|
}
|
||||||
|
optional CompareType type = 1;
|
||||||
|
// key path
|
||||||
|
optional bytes key = 2;
|
||||||
|
oneof target {
|
||||||
|
// version of the given key
|
||||||
|
int64 version = 3;
|
||||||
|
// create index of the given key
|
||||||
|
int64 create_index = 4;
|
||||||
|
// last modified index of the given key
|
||||||
|
int64 mod_index = 5;
|
||||||
|
// value of the given key
|
||||||
|
bytes value = 6;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// First all the compare requests are processed.
|
||||||
|
// If all the compare succeed, all the success
|
||||||
|
// requests will be processed.
|
||||||
|
// Or all the failure requests will be processed and
|
||||||
|
// all the errors in the comparison will be returned.
|
||||||
|
|
||||||
|
// From google paxosdb paper:
|
||||||
|
// Our implementation hinges around a powerful primitive which we call MultiOp. All other database
|
||||||
|
// operations except for iteration are implemented as a single call to MultiOp. A MultiOp is applied atomically
|
||||||
|
// and consists of three components:
|
||||||
|
// 1. A list of tests called guard. Each test in guard checks a single entry in the database. It may check
|
||||||
|
// for the absence or presence of a value, or compare with a given value. Two different tests in the guard
|
||||||
|
// may apply to the same or different entries in the database. All tests in the guard are applied and
|
||||||
|
// MultiOp returns the results. If all tests are true, MultiOp executes t op (see item 2 below), otherwise
|
||||||
|
// it executes f op (see item 3 below).
|
||||||
|
// 2. A list of database operations called t op. Each operation in the list is either an insert, delete, or
|
||||||
|
// lookup operation, and applies to a single database entry. Two different operations in the list may apply
|
||||||
|
// to the same or different entries in the database. These operations are executed
|
||||||
|
// if guard evaluates to
|
||||||
|
// true.
|
||||||
|
// 3. A list of database operations called f op. Like t op, but executed if guard evaluates to false.
|
||||||
|
message TnxRequest {
|
||||||
|
repeated Compare compare = 1;
|
||||||
|
repeated RequestUnion success = 2;
|
||||||
|
repeated RequestUnion failure = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
message TnxResponse {
|
||||||
|
optional ResponseHeader header = 1;
|
||||||
|
optional bool succeeded = 2;
|
||||||
|
repeated ResponseUnion responses = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
message KeyValue {
|
||||||
|
optional bytes key = 1;
|
||||||
|
// mod_index is the last modified index of the key.
|
||||||
|
optional int64 create_index = 2;
|
||||||
|
optional int64 mod_index = 3;
|
||||||
|
// version is the version of the key. A deletion resets
|
||||||
|
// the version to zero and any modification of the key
|
||||||
|
// increases its version.
|
||||||
|
optional int64 version = 4;
|
||||||
|
optional bytes value = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
message WatchRangeRequest {
|
||||||
|
// if the range_end is not given, the request returns the key.
|
||||||
|
optional bytes key = 1;
|
||||||
|
// if the range_end is given, it gets the keys in range [key, range_end).
|
||||||
|
optional bytes range_end = 2;
|
||||||
|
// start_index is an optional index (including) to watch from. No start_index is "now".
|
||||||
|
optional int64 start_index = 3;
|
||||||
|
// end_index is an optional index (excluding) to end watch. No end_index is "forever".
|
||||||
|
optional int64 end_index = 4;
|
||||||
|
optional bool progress_notification = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
message WatchRangeResponse {
|
||||||
|
optional ResponseHeader header = 1;
|
||||||
|
repeated Event events = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Event {
|
||||||
|
enum EventType {
|
||||||
|
PUT = 0;
|
||||||
|
DELETE = 1;
|
||||||
|
EXPIRE = 2;
|
||||||
|
}
|
||||||
|
optional EventType event_type = 1;
|
||||||
|
// a put event contains the current key-value
|
||||||
|
// a delete/expire event contains the previous
|
||||||
|
// key-value
|
||||||
|
optional KeyValue kv = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message CompactionRequest {
|
||||||
|
optional int64 index = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message CompactionResponse {
|
||||||
|
optional ResponseHeader header = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message LeaseCreateRequest {
|
||||||
|
// advisory ttl in seconds
|
||||||
|
optional int64 ttl = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message LeaseCreateResponse {
|
||||||
|
optional ResponseHeader header = 1;
|
||||||
|
optional int64 lease_id = 2;
|
||||||
|
// server decided ttl in second
|
||||||
|
optional int64 ttl = 3;
|
||||||
|
optional string error = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
message LeaseRevokeRequest {
|
||||||
|
optional int64 lease_id = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message LeaseRevokeResponse {
|
||||||
|
optional ResponseHeader header = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message LeaseTnxRequest {
|
||||||
|
optional TnxRequest request = 1;
|
||||||
|
repeated LeaseAttachRequest success = 2;
|
||||||
|
repeated LeaseAttachRequest failure = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
message LeaseTnxResponse {
|
||||||
|
optional ResponseHeader header = 1;
|
||||||
|
optional TnxResponse response = 2;
|
||||||
|
repeated LeaseAttachResponse attach_responses = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
message LeaseAttachRequest {
|
||||||
|
optional int64 lease_id = 1;
|
||||||
|
optional bytes key = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message LeaseAttachResponse {
|
||||||
|
optional ResponseHeader header = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message LeaseKeepAliveRequest {
|
||||||
|
optional int64 lease_id = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message LeaseKeepAliveResponse {
|
||||||
|
optional ResponseHeader header = 1;
|
||||||
|
optional int64 lease_id = 2;
|
||||||
|
optional int64 ttl = 3;
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user