mirror of
https://github.com/kaspanet/kaspad.git
synced 2025-03-30 15:08:33 +00:00
Implement a stability test to stress test the DAA algorithm (#1767)
* Copy over boilerplate and begin implementing TestDAA. * Implement a fairly reliable method of hashing at a certain hashrate. * Convert the DAA test to an application. * Start kaspad and make sure that hashrate throttling works with that as well. * Finish implementing testConstantHashRate. * Tidied up a bit. * Convert TestDAA back into a go test. * Reorganize TestDAA to be more like a traditional test. * Add sudden hashrate drop/jump tests. * Simplify targetHashNanosecondsFunction. * Improve progress logs. * Add more tests. * Remove the no-longer relevant `hashes` part of targetHashNanosecondsFunction. * Implement a constant hashrate increase test. * Implement a constant hashrate decrease test. * Give the correct run duration to the constant hashrate decrease test. * Add cooldowns to exponential functions. * Add run.sh to the DAA test. * Add a README. * Add `daa` to run-slow.sh. * Make go lint happy. * Fix the README's title. * Keep running tests even if one of them failed on high block rate deviation. * Fix hashrate peak/valley tests. * Preallocate arrays for hash and mining durations. * Add more statistics to the "mined block" log. * Make sure runDAATest stops when it's suppposed to. * Add a newline after "5 minute cooldown." * Fix variable names. * Rename totalElapsedTime to tatalElapsedDuration. * In measureMachineHashNanoseconds, generate a random nonce only once. * In runDAATest, add "DAA" to the start/finish log. * Remove --logdir from kaspadRunCommand. * In runDAATest, enlarge the nonce range to the entirety of uint64. * Explain what targetHashNanosecondsFunction is. * Move RunKaspadForTesting into common. * Rename runForDuration to loopForDuration. * Make go lint happy. * Extract fetchBlockForMining to a separate function. * Extract waitUntilTargetHashDurationHadElapsed to a separate function. * Extract pushHashDuration and pushMiningDuration to separate functions. * Extract logMinedBlockStatsAndUpdateStatFields to a separate function. * Extract submitMinedBlock to a separate function. * Extract tryNonceForMiningAndIncrementNonce to a separate function. * Add comments. * Use a rolling average instead of appending to an array for performance/accuracy. * Change a word in a comment. * Explain why we wait for five minutes at the end of the exponential increase/decrease tests. Co-authored-by: Svarog <feanorr@gmail.com>
This commit is contained in:
parent
2f272cd517
commit
487fab0e2b
53
stability-tests/common/run-kaspad.go
Normal file
53
stability-tests/common/run-kaspad.go
Normal file
@ -0,0 +1,53 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/kaspanet/kaspad/domain/dagconfig"
|
||||
"os"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// RunKaspadForTesting runs kaspad for testing purposes
|
||||
func RunKaspadForTesting(t *testing.T, testName string, rpcAddress string) func() {
|
||||
appDir, err := TempDir(testName)
|
||||
if err != nil {
|
||||
t.Fatalf("TempDir: %s", err)
|
||||
}
|
||||
|
||||
kaspadRunCommand, err := StartCmd("KASPAD",
|
||||
"kaspad",
|
||||
NetworkCliArgumentFromNetParams(&dagconfig.DevnetParams),
|
||||
"--appdir", appDir,
|
||||
"--rpclisten", rpcAddress,
|
||||
"--loglevel", "debug",
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("StartCmd: %s", err)
|
||||
}
|
||||
t.Logf("Kaspad started with --appdir=%s", appDir)
|
||||
|
||||
isShutdown := uint64(0)
|
||||
go func() {
|
||||
err := kaspadRunCommand.Wait()
|
||||
if err != nil {
|
||||
if atomic.LoadUint64(&isShutdown) == 0 {
|
||||
panic(fmt.Sprintf("Kaspad closed unexpectedly: %s. See logs at: %s", err, appDir))
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return func() {
|
||||
err := kaspadRunCommand.Process.Signal(syscall.SIGTERM)
|
||||
if err != nil {
|
||||
t.Fatalf("Signal: %s", err)
|
||||
}
|
||||
err = os.RemoveAll(appDir)
|
||||
if err != nil {
|
||||
t.Fatalf("RemoveAll: %s", err)
|
||||
}
|
||||
atomic.StoreUint64(&isShutdown, 1)
|
||||
t.Logf("Kaspad stopped")
|
||||
}
|
||||
}
|
11
stability-tests/daa/README.md
Normal file
11
stability-tests/daa/README.md
Normal file
@ -0,0 +1,11 @@
|
||||
# DAA tool
|
||||
|
||||
This tool simulates various hashrate patterns to stress-test the DAA algorithm
|
||||
|
||||
## Running
|
||||
|
||||
1. `go install` kaspad and daa.
|
||||
2. `cd run`
|
||||
3. `./run.sh`
|
||||
|
||||
|
37
stability-tests/daa/average_duration.go
Normal file
37
stability-tests/daa/average_duration.go
Normal file
@ -0,0 +1,37 @@
|
||||
package daa
|
||||
|
||||
import "time"
|
||||
|
||||
type averageDuration struct {
|
||||
average float64
|
||||
count uint64
|
||||
sampleSize uint64
|
||||
}
|
||||
|
||||
func newAverageDuration(sampleSize uint64) *averageDuration {
|
||||
return &averageDuration{
|
||||
average: 0,
|
||||
count: 0,
|
||||
sampleSize: sampleSize,
|
||||
}
|
||||
}
|
||||
|
||||
func (ad *averageDuration) add(duration time.Duration) {
|
||||
durationNanoseconds := float64(duration.Nanoseconds())
|
||||
|
||||
ad.count++
|
||||
if ad.count > ad.sampleSize {
|
||||
ad.count = ad.sampleSize
|
||||
}
|
||||
|
||||
if ad.count == 1 {
|
||||
ad.average = durationNanoseconds
|
||||
return
|
||||
}
|
||||
|
||||
ad.average = ad.average + ((durationNanoseconds - ad.average) / float64(ad.count))
|
||||
}
|
||||
|
||||
func (ad *averageDuration) toDuration() time.Duration {
|
||||
return time.Duration(ad.average)
|
||||
}
|
334
stability-tests/daa/daa_test.go
Normal file
334
stability-tests/daa/daa_test.go
Normal file
@ -0,0 +1,334 @@
|
||||
package daa
|
||||
|
||||
import (
|
||||
"github.com/kaspanet/kaspad/app/appmessage"
|
||||
"github.com/kaspanet/kaspad/domain/consensus/model/externalapi"
|
||||
"github.com/kaspanet/kaspad/domain/consensus/utils/pow"
|
||||
"github.com/kaspanet/kaspad/domain/dagconfig"
|
||||
"github.com/kaspanet/kaspad/infrastructure/network/rpcclient"
|
||||
"github.com/kaspanet/kaspad/stability-tests/common"
|
||||
"github.com/kaspanet/kaspad/util/difficulty"
|
||||
"math"
|
||||
"math/big"
|
||||
"math/rand"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
const rpcAddress = "localhost:9000"
|
||||
const miningAddress = "kaspadev:qrcqat6l9zcjsu7swnaztqzrv0s7hu04skpaezxk43y4etj8ncwfkuhy0zmax"
|
||||
const blockRateDeviationThreshold = 0.5
|
||||
const averageBlockRateSampleSize = 60
|
||||
const averageHashRateSampleSize = 100_000
|
||||
|
||||
func TestDAA(t *testing.T) {
|
||||
if os.Getenv("RUN_STABILITY_TESTS") == "" {
|
||||
t.Skip()
|
||||
}
|
||||
|
||||
machineHashNanoseconds := measureMachineHashNanoseconds(t)
|
||||
t.Logf("Machine hashes per second: %d", hashNanosecondsToHashesPerSecond(machineHashNanoseconds))
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
runDuration time.Duration
|
||||
|
||||
// targetHashNanosecondsFunction receives the duration of time between now and the start
|
||||
// of the run (moments before the first hash has been calculated). It returns the target
|
||||
// duration of a single hash operation in nanoseconds (greater return value = lower hash rate)
|
||||
targetHashNanosecondsFunction func(totalElapsedDuration time.Duration) int64
|
||||
}{
|
||||
{
|
||||
name: "constant hash rate",
|
||||
runDuration: 10 * time.Minute,
|
||||
targetHashNanosecondsFunction: func(totalElapsedDuration time.Duration) int64 {
|
||||
return machineHashNanoseconds * 2
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "sudden hash rate drop",
|
||||
runDuration: 15 * time.Minute,
|
||||
targetHashNanosecondsFunction: func(totalElapsedDuration time.Duration) int64 {
|
||||
if totalElapsedDuration < 5*time.Minute {
|
||||
return machineHashNanoseconds * 2
|
||||
}
|
||||
return machineHashNanoseconds * 10
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "sudden hash rate jump",
|
||||
runDuration: 15 * time.Minute,
|
||||
targetHashNanosecondsFunction: func(totalElapsedDuration time.Duration) int64 {
|
||||
if totalElapsedDuration < 5*time.Minute {
|
||||
return machineHashNanoseconds * 10
|
||||
}
|
||||
return machineHashNanoseconds * 2
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "hash rate peak",
|
||||
runDuration: 10 * time.Minute,
|
||||
targetHashNanosecondsFunction: func(totalElapsedDuration time.Duration) int64 {
|
||||
if totalElapsedDuration > 4*time.Minute && totalElapsedDuration < 5*time.Minute {
|
||||
return machineHashNanoseconds * 2
|
||||
}
|
||||
return machineHashNanoseconds * 10
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "hash rate valley",
|
||||
runDuration: 10 * time.Minute,
|
||||
targetHashNanosecondsFunction: func(totalElapsedDuration time.Duration) int64 {
|
||||
if totalElapsedDuration > 4*time.Minute && totalElapsedDuration < 5*time.Minute {
|
||||
return machineHashNanoseconds * 10
|
||||
}
|
||||
return machineHashNanoseconds * 2
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "periodic hash rate peaks",
|
||||
runDuration: 10 * time.Minute,
|
||||
targetHashNanosecondsFunction: func(totalElapsedDuration time.Duration) int64 {
|
||||
if int(totalElapsedDuration.Seconds())%30 == 0 {
|
||||
return machineHashNanoseconds * 2
|
||||
}
|
||||
return machineHashNanoseconds * 10
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "periodic hash rate valleys",
|
||||
runDuration: 10 * time.Minute,
|
||||
targetHashNanosecondsFunction: func(totalElapsedDuration time.Duration) int64 {
|
||||
if int(totalElapsedDuration.Seconds())%30 == 0 {
|
||||
return machineHashNanoseconds * 10
|
||||
}
|
||||
return machineHashNanoseconds * 2
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "constant exponential hash rate increase",
|
||||
runDuration: 15 * time.Minute,
|
||||
targetHashNanosecondsFunction: func(totalElapsedDuration time.Duration) int64 {
|
||||
fromHashNanoseconds := machineHashNanoseconds * 10
|
||||
toHashNanoseconds := machineHashNanoseconds * 2
|
||||
|
||||
if totalElapsedDuration < 10*time.Minute {
|
||||
exponentialIncreaseDuration := 10 * time.Minute
|
||||
timeElapsedFraction := float64(totalElapsedDuration.Nanoseconds()) / float64(exponentialIncreaseDuration.Nanoseconds())
|
||||
|
||||
return fromHashNanoseconds -
|
||||
int64(math.Pow(float64(fromHashNanoseconds-toHashNanoseconds), timeElapsedFraction))
|
||||
}
|
||||
|
||||
// 5 minute cooldown. We expect the DAA to still be "catching up" at the end
|
||||
// of the exponential increase so, for the sake of testing, we wait a while for
|
||||
// the hash rate to stabilize
|
||||
|
||||
return toHashNanoseconds
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "constant exponential hash rate decrease",
|
||||
runDuration: 15 * time.Minute,
|
||||
targetHashNanosecondsFunction: func(totalElapsedDuration time.Duration) int64 {
|
||||
fromHashNanoseconds := machineHashNanoseconds * 2
|
||||
toHashNanoseconds := machineHashNanoseconds * 10
|
||||
|
||||
if totalElapsedDuration < 10*time.Minute {
|
||||
exponentialDecreaseDuration := 10 * time.Minute
|
||||
timeElapsedFraction := float64(totalElapsedDuration.Nanoseconds()) / float64(exponentialDecreaseDuration.Nanoseconds())
|
||||
|
||||
return fromHashNanoseconds +
|
||||
int64(math.Pow(float64(toHashNanoseconds-fromHashNanoseconds), timeElapsedFraction))
|
||||
}
|
||||
|
||||
// 5 minute cooldown. We expect the DAA to still be "catching up" at the end
|
||||
// of the exponential decrease so, for the sake of testing, we wait a while for
|
||||
// the hash rate to stabilize
|
||||
|
||||
return toHashNanoseconds
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
runDAATest(t, test.name, test.runDuration, test.targetHashNanosecondsFunction)
|
||||
}
|
||||
}
|
||||
|
||||
func measureMachineHashNanoseconds(t *testing.T) int64 {
|
||||
t.Logf("Measuring machine hash rate")
|
||||
defer t.Logf("Finished measuring machine hash rate")
|
||||
|
||||
genesisBlock := dagconfig.DevnetParams.GenesisBlock
|
||||
targetDifficulty := difficulty.CompactToBig(genesisBlock.Header.Bits())
|
||||
headerForMining := genesisBlock.Header.ToMutable()
|
||||
|
||||
machineHashesPerSecondMeasurementDuration := 10 * time.Second
|
||||
hashes := int64(0)
|
||||
nonce := rand.Uint64()
|
||||
loopForDuration(machineHashesPerSecondMeasurementDuration, func(isFinished *bool) {
|
||||
headerForMining.SetNonce(nonce)
|
||||
pow.CheckProofOfWorkWithTarget(headerForMining, targetDifficulty)
|
||||
hashes++
|
||||
nonce++
|
||||
})
|
||||
|
||||
return machineHashesPerSecondMeasurementDuration.Nanoseconds() / hashes
|
||||
}
|
||||
|
||||
func runDAATest(t *testing.T, testName string, runDuration time.Duration,
|
||||
targetHashNanosecondsFunction func(totalElapsedDuration time.Duration) int64) {
|
||||
|
||||
t.Logf("DAA TEST STARTED: %s", testName)
|
||||
defer t.Logf("DAA TEST FINISHED: %s", testName)
|
||||
|
||||
tearDownKaspad := common.RunKaspadForTesting(t, "kaspad-daa-test", rpcAddress)
|
||||
defer tearDownKaspad()
|
||||
|
||||
rpcClient, err := rpcclient.NewRPCClient(rpcAddress)
|
||||
if err != nil {
|
||||
t.Fatalf("NewRPCClient: %s", err)
|
||||
}
|
||||
|
||||
// These variables are for gathering stats. Useful mostly for debugging
|
||||
averageHashDuration := newAverageDuration(averageHashRateSampleSize)
|
||||
averageMiningDuration := newAverageDuration(averageBlockRateSampleSize)
|
||||
previousDifficulty := float64(0)
|
||||
blocksMined := 0
|
||||
|
||||
// Mine blocks the same way a CPU miner mines blocks until `runDuration` elapses
|
||||
startTime := time.Now()
|
||||
loopForDuration(runDuration, func(isFinished *bool) {
|
||||
templateBlock := fetchBlockForMining(t, rpcClient)
|
||||
targetDifficulty := difficulty.CompactToBig(templateBlock.Header.Bits())
|
||||
headerForMining := templateBlock.Header.ToMutable()
|
||||
|
||||
// Try hashes until we find a valid block
|
||||
miningStartTime := time.Now()
|
||||
nonce := rand.Uint64()
|
||||
for {
|
||||
hashStartTime := time.Now()
|
||||
|
||||
blockFound := tryNonceForMiningAndIncrementNonce(headerForMining, &nonce, targetDifficulty, templateBlock)
|
||||
if blockFound {
|
||||
break
|
||||
}
|
||||
|
||||
// Throttle the hash rate by waiting until the target hash duration elapses
|
||||
waitUntilTargetHashDurationHadElapsed(startTime, hashStartTime, targetHashNanosecondsFunction)
|
||||
|
||||
// Collect stats about hash rate
|
||||
hashDuration := time.Since(hashStartTime)
|
||||
averageHashDuration.add(hashDuration)
|
||||
|
||||
// Exit early if the test is finished
|
||||
if *isFinished {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Collect stats about block rate
|
||||
miningDuration := time.Since(miningStartTime)
|
||||
averageMiningDuration.add(miningDuration)
|
||||
|
||||
logMinedBlockStatsAndUpdateStatFields(t, rpcClient, averageMiningDuration, averageHashDuration, startTime,
|
||||
miningDuration, &previousDifficulty, &blocksMined)
|
||||
|
||||
// Exit early if the test is finished
|
||||
if *isFinished {
|
||||
return
|
||||
}
|
||||
|
||||
submitMinedBlock(t, rpcClient, templateBlock)
|
||||
})
|
||||
|
||||
averageMiningDurationInSeconds := averageMiningDuration.toDuration().Seconds()
|
||||
expectedAverageMiningDurationInSeconds := float64(1)
|
||||
deviation := math.Abs(expectedAverageMiningDurationInSeconds - averageMiningDurationInSeconds)
|
||||
if deviation > blockRateDeviationThreshold {
|
||||
t.Errorf("Block rate deviation %f is higher than threshold %f. Want: %f, got: %f",
|
||||
deviation, blockRateDeviationThreshold, expectedAverageMiningDurationInSeconds, averageMiningDurationInSeconds)
|
||||
}
|
||||
}
|
||||
|
||||
func fetchBlockForMining(t *testing.T, rpcClient *rpcclient.RPCClient) *externalapi.DomainBlock {
|
||||
getBlockTemplateResponse, err := rpcClient.GetBlockTemplate(miningAddress)
|
||||
if err != nil {
|
||||
t.Fatalf("GetBlockTemplate: %s", err)
|
||||
}
|
||||
templateBlock, err := appmessage.RPCBlockToDomainBlock(getBlockTemplateResponse.Block)
|
||||
if err != nil {
|
||||
t.Fatalf("RPCBlockToDomainBlock: %s", err)
|
||||
}
|
||||
return templateBlock
|
||||
}
|
||||
|
||||
func tryNonceForMiningAndIncrementNonce(headerForMining externalapi.MutableBlockHeader, nonce *uint64,
|
||||
targetDifficulty *big.Int, templateBlock *externalapi.DomainBlock) bool {
|
||||
|
||||
headerForMining.SetNonce(*nonce)
|
||||
if pow.CheckProofOfWorkWithTarget(headerForMining, targetDifficulty) {
|
||||
templateBlock.Header = headerForMining.ToImmutable()
|
||||
return true
|
||||
}
|
||||
|
||||
*nonce++
|
||||
return false
|
||||
}
|
||||
|
||||
func waitUntilTargetHashDurationHadElapsed(startTime time.Time, hashStartTime time.Time,
|
||||
targetHashNanosecondsFunction func(totalElapsedDuration time.Duration) int64) {
|
||||
|
||||
// Yielding a thread in Go takes up to a few milliseconds whereas hashing once
|
||||
// takes a few hundred nanoseconds, so we spin in place instead of e.g. calling time.Sleep()
|
||||
for {
|
||||
targetHashNanoseconds := targetHashNanosecondsFunction(time.Since(startTime))
|
||||
hashElapsedDurationNanoseconds := time.Since(hashStartTime).Nanoseconds()
|
||||
if hashElapsedDurationNanoseconds >= targetHashNanoseconds {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func logMinedBlockStatsAndUpdateStatFields(t *testing.T, rpcClient *rpcclient.RPCClient,
|
||||
averageMiningDuration *averageDuration, averageHashDurations *averageDuration,
|
||||
startTime time.Time, miningDuration time.Duration, previousDifficulty *float64, blocksMined *int) {
|
||||
|
||||
averageMiningDurationAsDuration := averageMiningDuration.toDuration()
|
||||
averageHashNanoseconds := averageHashDurations.toDuration().Nanoseconds()
|
||||
averageHashesPerSecond := hashNanosecondsToHashesPerSecond(averageHashNanoseconds)
|
||||
blockDAGInfoResponse, err := rpcClient.GetBlockDAGInfo()
|
||||
if err != nil {
|
||||
t.Fatalf("GetBlockDAGInfo: %s", err)
|
||||
}
|
||||
difficultyDelta := blockDAGInfoResponse.Difficulty - *previousDifficulty
|
||||
*previousDifficulty = blockDAGInfoResponse.Difficulty
|
||||
*blocksMined++
|
||||
t.Logf("Mined block. Took: %s, average block mining duration: %s, "+
|
||||
"average hashes per second: %d, difficulty delta: %f, time elapsed: %s, blocks mined: %d",
|
||||
miningDuration, averageMiningDurationAsDuration, averageHashesPerSecond, difficultyDelta, time.Since(startTime), *blocksMined)
|
||||
}
|
||||
|
||||
func submitMinedBlock(t *testing.T, rpcClient *rpcclient.RPCClient, block *externalapi.DomainBlock) {
|
||||
_, err := rpcClient.SubmitBlock(block)
|
||||
if err != nil {
|
||||
t.Fatalf("SubmitBlock: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func hashNanosecondsToHashesPerSecond(hashNanoseconds int64) int64 {
|
||||
return time.Second.Nanoseconds() / hashNanoseconds
|
||||
}
|
||||
|
||||
func loopForDuration(duration time.Duration, runFunction func(isFinished *bool)) {
|
||||
isFinished := false
|
||||
go func() {
|
||||
for !isFinished {
|
||||
runFunction(&isFinished)
|
||||
}
|
||||
}()
|
||||
time.Sleep(duration)
|
||||
isFinished = true
|
||||
}
|
13
stability-tests/daa/run/run.sh
Executable file
13
stability-tests/daa/run/run.sh
Executable file
@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
RUN_STABILITY_TESTS=true go test ../ -v -timeout 86400s
|
||||
TEST_EXIT_CODE=$?
|
||||
|
||||
echo "Exit code: $TEST_EXIT_CODE"
|
||||
|
||||
if [ $TEST_EXIT_CODE -eq 0 ]; then
|
||||
echo "daa test: PASSED"
|
||||
exit 0
|
||||
fi
|
||||
echo "daa test: FAILED"
|
||||
exit 1
|
@ -38,6 +38,10 @@ echo "Running many-tips"
|
||||
cd "${PROJECT_ROOT}/many-tips/run" && ./run.sh || failedTests+=("many-tips")
|
||||
echo "Done running many-tips"
|
||||
|
||||
echo "Running daa"
|
||||
cd "${PROJECT_ROOT}/daa/run" && ./run.sh || failedTests+=("daa")
|
||||
echo "Done running daa"
|
||||
|
||||
echo "Running reorg"
|
||||
cd "${PROJECT_ROOT}/reorg/run" && ./run-full-finality-window-reorg.sh || failedTests+=("reorg")
|
||||
echo "Done running reorg"
|
||||
|
Loading…
x
Reference in New Issue
Block a user