mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
commit
6ee5cd9105
191
Documentation/rfc/v3api.md
Normal file
191
Documentation/rfc/v3api.md
Normal file
@ -0,0 +1,191 @@
|
||||
## Design
|
||||
|
||||
1. Flatten binary key-value space
|
||||
|
||||
2. Keep the event history until compaction
|
||||
- access to old version of keys
|
||||
- user controlled history compaction
|
||||
|
||||
3. Support range query
|
||||
- Pagination support with limit argument
|
||||
- Support consistency guarantee across multiple range queries
|
||||
|
||||
4. Replace TTL key with Lease
|
||||
- more efficient/ low cost keep alive
|
||||
- a logical group of TTL keys
|
||||
|
||||
5. Replace CAS/CAD with multi-object Tnx
|
||||
- MUCH MORE powerful and flexible
|
||||
|
||||
6. Support efficient watching with multiple ranges
|
||||
|
||||
7. RPC API supports the completed set of APIs.
|
||||
- more efficient than JSON/HTTP
|
||||
- additional tnx/lease support
|
||||
|
||||
8. HTTP API supports a subset of APIs.
|
||||
- easy for people to try out etcd
|
||||
- easy for people to write simple etcd application
|
||||
|
||||
|
||||
## Protobuf Defined API
|
||||
|
||||
[protobuf](./v3api.proto)
|
||||
|
||||
### Examples
|
||||
|
||||
#### Put a key (foo=bar)
|
||||
```
|
||||
// A put is always successful
|
||||
Put( PutRequest { key = foo, value = bar } )
|
||||
|
||||
PutResponse {
|
||||
cluster_id = 0x1000,
|
||||
member_id = 0x1,
|
||||
index = 1,
|
||||
raft_term = 0x1,
|
||||
}
|
||||
```
|
||||
|
||||
#### Get a key (assume we have foo=bar)
|
||||
```
|
||||
Get ( RangeRequest { key = foo } )
|
||||
|
||||
RangeResponse {
|
||||
cluster_id = 0x1000,
|
||||
member_id = 0x1,
|
||||
index = 1,
|
||||
raft_term = 0x1,
|
||||
kvs = {
|
||||
{
|
||||
key = foo,
|
||||
value = bar,
|
||||
create_index = 1,
|
||||
mod_index = 1,
|
||||
version = 1;
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
#### Range over a key space (assume we have foo0=bar0… foo100=bar100)
|
||||
```
|
||||
Range ( RangeRequest { key = foo, end_key = foo80, limit = 30 } )
|
||||
|
||||
RangeResponse {
|
||||
cluster_id = 0x1000,
|
||||
member_id = 0x1,
|
||||
index = 100,
|
||||
raft_term = 0x1,
|
||||
kvs = {
|
||||
{
|
||||
key = foo0,
|
||||
value = bar0,
|
||||
create_index = 1,
|
||||
mod_index = 1,
|
||||
version = 1;
|
||||
},
|
||||
...,
|
||||
{
|
||||
key = foo30,
|
||||
value = bar30,
|
||||
create_index = 30,
|
||||
mod_index = 30,
|
||||
version = 1;
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
#### Finish a tnx (assume we have foo0=bar0, foo1=bar1)
|
||||
```
|
||||
Tnx(TnxRequest {
|
||||
// mod_index of foo0 is equal to 1, mod_index of foo1 is greater than 1
|
||||
compare = {
|
||||
{compareType = equal, key = foo0, mod_index = 1},
|
||||
{compareType = greater, key = foo1, mod_index = 1}}
|
||||
},
|
||||
// if the comparison succeeds, put foo2 = bar2
|
||||
success = {PutRequest { key = foo2, value = success }},
|
||||
// if the comparison fails, put foo2=fail
|
||||
failure = {PutRequest { key = foo2, value = failure }},
|
||||
)
|
||||
|
||||
TnxResponse {
|
||||
cluster_id = 0x1000,
|
||||
member_id = 0x1,
|
||||
index = 3,
|
||||
raft_term = 0x1,
|
||||
succeeded = true,
|
||||
responses = {
|
||||
// response of PUT foo2=success
|
||||
{
|
||||
cluster_id = 0x1000,
|
||||
member_id = 0x1,
|
||||
index = 3,
|
||||
raft_term = 0x1,
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Watch on a key/range
|
||||
|
||||
```
|
||||
Watch( WatchRequest{
|
||||
key = foo,
|
||||
end_key = fop, // prefix foo
|
||||
start_index = 20,
|
||||
end_index = 10000,
|
||||
// server decided notification frequency
|
||||
progress_notification = true,
|
||||
}
|
||||
… // this can be a watch request stream
|
||||
)
|
||||
|
||||
// put (foo0=bar0) event at 3
|
||||
WatchResponse {
|
||||
cluster_id = 0x1000,
|
||||
member_id = 0x1,
|
||||
index = 3,
|
||||
raft_term = 0x1,
|
||||
event_type = put,
|
||||
kv = {
|
||||
key = foo0,
|
||||
value = bar0,
|
||||
create_index = 1,
|
||||
mod_index = 1,
|
||||
version = 1;
|
||||
},
|
||||
}
|
||||
…
|
||||
|
||||
// a notification at 2000
|
||||
WatchResponse {
|
||||
cluster_id = 0x1000,
|
||||
member_id = 0x1,
|
||||
index = 2000,
|
||||
raft_term = 0x1,
|
||||
// nil event as notification
|
||||
}
|
||||
|
||||
…
|
||||
|
||||
// put (foo0=bar3000) event at 3000
|
||||
WatchResponse {
|
||||
cluster_id = 0x1000,
|
||||
member_id = 0x1,
|
||||
index = 3000,
|
||||
raft_term = 0x1,
|
||||
event_type = put,
|
||||
kv = {
|
||||
key = foo0,
|
||||
value = bar3000,
|
||||
create_index = 1,
|
||||
mod_index = 3000,
|
||||
version = 2;
|
||||
},
|
||||
}
|
||||
…
|
||||
|
||||
```
|
272
Documentation/rfc/v3api.proto
Normal file
272
Documentation/rfc/v3api.proto
Normal file
@ -0,0 +1,272 @@
|
||||
syntax = "proto3";
|
||||
|
||||
// Interface exported by the server.
|
||||
service etcd {
|
||||
// Range gets the keys in the range from the store.
|
||||
rpc Range(RangeRequest) returns (RangeResponse) {}
|
||||
|
||||
// Put puts the given key into the store.
|
||||
// A put request increases the index of the store,
|
||||
// and generates one event in the event history.
|
||||
rpc Put(PutRequest) returns (PutResponse) {}
|
||||
|
||||
// Delete deletes the given range from the store.
|
||||
// A delete request increase the index of the store,
|
||||
// and generates one event in the event history.
|
||||
rpc DeleteRange(DeleteRangeRequest) returns (DeleteRangeResponse) {}
|
||||
|
||||
// Tnx processes all the requests in one transaction.
|
||||
// A tnx request increases the index of the store,
|
||||
// and generates events with the same index in the event history.
|
||||
rpc Tnx(TnxRequest) returns (TnxResponse) {}
|
||||
|
||||
// Watch watches the events happening or happened in etcd. Both input and output
|
||||
// are stream. One watch rpc can watch for multiple ranges and get a stream of
|
||||
// events. The whole events history can be watched unless compacted.
|
||||
rpc WatchRange(stream WatchRangeRequest) returns (stream WatchRangeResponse) {}
|
||||
|
||||
// Compact compacts the event history in etcd. User should compact the
|
||||
// event history periodically, or it will grow infinitely.
|
||||
rpc Compact(CompactionRequest) returns (CompactionResponse) {}
|
||||
|
||||
// LeaseCreate creates a lease. A lease has a TTL. The lease will expire if the
|
||||
// server does not receive a keepAlive within TTL from the lease holder.
|
||||
// All keys attached to the lease will be expired and deleted if the lease expires.
|
||||
// The key expiration generates an event in event history.
|
||||
rpc LeaseCreate(LeaseCreateRequest) returns (LeaseCreateResponse) {}
|
||||
|
||||
// LeaseRevoke revokes a lease. All the key attached to the lease will be expired and deleted.
|
||||
rpc LeaseRevoke(LeaseRevokeRequest) returns (LeaseRevokeResponse) {}
|
||||
|
||||
// LeaseAttach attaches keys with a lease.
|
||||
rpc LeaseAttach(LeaseAttachRequest) returns (LeaseAttachResponse) {}
|
||||
|
||||
// LeaseTnx likes Tnx. It has two addition success and failure LeaseAttachRequest list.
|
||||
// If the Tnx is successful, then the success list will be executed. Or the failure list
|
||||
// will be executed.
|
||||
rpc LeaseTnx(LeaseTnxRequest) returns (LeaseTnxResponse) {}
|
||||
|
||||
// KeepAlive keeps the lease alive.
|
||||
rpc LeaseKeepAlive(stream LeaseKeepAliveRequest) returns (stream LeaseKeepAliveResponse) {}
|
||||
}
|
||||
|
||||
message ResponseHeader {
|
||||
// an error type message?
|
||||
optional string error = 1;
|
||||
optional uint64 cluster_id = 2;
|
||||
optional uint64 member_id = 3;
|
||||
// index of the store when the request was applied.
|
||||
optional int64 index = 4;
|
||||
// term of raft when the request was applied.
|
||||
optional uint64 raft_term = 5;
|
||||
}
|
||||
|
||||
message RangeRequest {
|
||||
// if the range_end is not given, the request returns the key.
|
||||
optional bytes key = 1;
|
||||
// if the range_end is given, it gets the keys in range [key, range_end).
|
||||
optional bytes range_end = 2;
|
||||
// limit the number of keys returned.
|
||||
optional int64 limit = 3;
|
||||
// the response will be consistent with previous request with same token if the token is
|
||||
// given and is vaild.
|
||||
optional bytes consistent_token = 4;
|
||||
}
|
||||
|
||||
message RangeResponse {
|
||||
optional ResponseHeader header = 1;
|
||||
repeated KeyValue kvs = 2;
|
||||
optional bytes consistent_token = 3;
|
||||
}
|
||||
|
||||
message PutRequest {
|
||||
optional bytes key = 1;
|
||||
optional bytes value = 2;
|
||||
}
|
||||
|
||||
message PutResponse {
|
||||
optional ResponseHeader header = 1;
|
||||
}
|
||||
|
||||
message DeleteRangeRequest {
|
||||
// if the range_end is not given, the request deletes the key.
|
||||
optional bytes key = 1;
|
||||
// if the range_end is given, it deletes the keys in range [key, range_end).
|
||||
optional bytes range_end = 2;
|
||||
}
|
||||
|
||||
message DeleteRangeResponse {
|
||||
optional ResponseHeader header = 1;
|
||||
}
|
||||
|
||||
message RequestUnion {
|
||||
oneof request {
|
||||
RangeRequest request_range = 1;
|
||||
PutRequest request_put = 2;
|
||||
DeleteRangeRequest request_delete_range = 3;
|
||||
}
|
||||
}
|
||||
|
||||
message ResponseUnion {
|
||||
oneof response {
|
||||
RangeResponse reponse_range = 1;
|
||||
PutResponse response_put = 2;
|
||||
DeleteRangeResponse response_delete_range = 3;
|
||||
}
|
||||
}
|
||||
|
||||
message Compare {
|
||||
enum CompareType {
|
||||
EQUAL = 0;
|
||||
GREATER = 1;
|
||||
LESS = 2;
|
||||
}
|
||||
optional CompareType type = 1;
|
||||
// key path
|
||||
optional bytes key = 2;
|
||||
oneof target {
|
||||
// version of the given key
|
||||
int64 version = 3;
|
||||
// create index of the given key
|
||||
int64 create_index = 4;
|
||||
// last modified index of the given key
|
||||
int64 mod_index = 5;
|
||||
// value of the given key
|
||||
bytes value = 6;
|
||||
}
|
||||
}
|
||||
|
||||
// First all the compare requests are processed.
|
||||
// If all the compare succeed, all the success
|
||||
// requests will be processed.
|
||||
// Or all the failure requests will be processed and
|
||||
// all the errors in the comparison will be returned.
|
||||
|
||||
// From google paxosdb paper:
|
||||
// Our implementation hinges around a powerful primitive which we call MultiOp. All other database
|
||||
// operations except for iteration are implemented as a single call to MultiOp. A MultiOp is applied atomically
|
||||
// and consists of three components:
|
||||
// 1. A list of tests called guard. Each test in guard checks a single entry in the database. It may check
|
||||
// for the absence or presence of a value, or compare with a given value. Two different tests in the guard
|
||||
// may apply to the same or different entries in the database. All tests in the guard are applied and
|
||||
// MultiOp returns the results. If all tests are true, MultiOp executes t op (see item 2 below), otherwise
|
||||
// it executes f op (see item 3 below).
|
||||
// 2. A list of database operations called t op. Each operation in the list is either an insert, delete, or
|
||||
// lookup operation, and applies to a single database entry. Two different operations in the list may apply
|
||||
// to the same or different entries in the database. These operations are executed
|
||||
// if guard evaluates to
|
||||
// true.
|
||||
// 3. A list of database operations called f op. Like t op, but executed if guard evaluates to false.
|
||||
message TnxRequest {
|
||||
repeated Compare compare = 1;
|
||||
repeated RequestUnion success = 2;
|
||||
repeated RequestUnion failure = 3;
|
||||
}
|
||||
|
||||
message TnxResponse {
|
||||
optional ResponseHeader header = 1;
|
||||
optional bool succeeded = 2;
|
||||
repeated ResponseUnion responses = 3;
|
||||
}
|
||||
|
||||
message KeyValue {
|
||||
optional bytes key = 1;
|
||||
// mod_index is the last modified index of the key.
|
||||
optional int64 create_index = 2;
|
||||
optional int64 mod_index = 3;
|
||||
// version is the version of the key. A deletion resets
|
||||
// the version to zero and any modification of the key
|
||||
// increases its version.
|
||||
optional int64 version = 4;
|
||||
optional bytes value = 5;
|
||||
}
|
||||
|
||||
message WatchRangeRequest {
|
||||
// if the range_end is not given, the request returns the key.
|
||||
optional bytes key = 1;
|
||||
// if the range_end is given, it gets the keys in range [key, range_end).
|
||||
optional bytes range_end = 2;
|
||||
// start_index is an optional index (including) to watch from. No start_index is "now".
|
||||
optional int64 start_index = 3;
|
||||
// end_index is an optional index (excluding) to end watch. No end_index is "forever".
|
||||
optional int64 end_index = 4;
|
||||
optional bool progress_notification = 5;
|
||||
}
|
||||
|
||||
message WatchRangeResponse {
|
||||
optional ResponseHeader header = 1;
|
||||
repeated Event events = 2;
|
||||
}
|
||||
|
||||
message Event {
|
||||
enum EventType {
|
||||
PUT = 0;
|
||||
DELETE = 1;
|
||||
EXPIRE = 2;
|
||||
}
|
||||
optional EventType event_type = 1;
|
||||
// a put event contains the current key-value
|
||||
// a delete/expire event contains the previous
|
||||
// key-value
|
||||
optional KeyValue kv = 2;
|
||||
}
|
||||
|
||||
message CompactionRequest {
|
||||
optional int64 index = 1;
|
||||
}
|
||||
|
||||
message CompactionResponse {
|
||||
optional ResponseHeader header = 1;
|
||||
}
|
||||
|
||||
message LeaseCreateRequest {
|
||||
// advisory ttl in seconds
|
||||
optional int64 ttl = 1;
|
||||
}
|
||||
|
||||
message LeaseCreateResponse {
|
||||
optional ResponseHeader header = 1;
|
||||
optional int64 lease_id = 2;
|
||||
// server decided ttl in second
|
||||
optional int64 ttl = 3;
|
||||
optional string error = 4;
|
||||
}
|
||||
|
||||
message LeaseRevokeRequest {
|
||||
optional int64 lease_id = 1;
|
||||
}
|
||||
|
||||
message LeaseRevokeResponse {
|
||||
optional ResponseHeader header = 1;
|
||||
}
|
||||
|
||||
message LeaseTnxRequest {
|
||||
optional TnxRequest request = 1;
|
||||
repeated LeaseAttachRequest success = 2;
|
||||
repeated LeaseAttachRequest failure = 3;
|
||||
}
|
||||
|
||||
message LeaseTnxResponse {
|
||||
optional ResponseHeader header = 1;
|
||||
optional TnxResponse response = 2;
|
||||
repeated LeaseAttachResponse attach_responses = 3;
|
||||
}
|
||||
|
||||
message LeaseAttachRequest {
|
||||
optional int64 lease_id = 1;
|
||||
optional bytes key = 2;
|
||||
}
|
||||
|
||||
message LeaseAttachResponse {
|
||||
optional ResponseHeader header = 1;
|
||||
}
|
||||
|
||||
message LeaseKeepAliveRequest {
|
||||
optional int64 lease_id = 1;
|
||||
}
|
||||
|
||||
message LeaseKeepAliveResponse {
|
||||
optional ResponseHeader header = 1;
|
||||
optional int64 lease_id = 2;
|
||||
optional int64 ttl = 3;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user