Merge pull request #221 from TheBestPessimist/tbp/google_rate_limit_exceeded

Tbp/google rate limit exceeded
This commit is contained in:
gilbertchen
2017-10-16 19:48:06 -04:00
committed by GitHub

View File

@@ -27,10 +27,11 @@ import (
type GCDStorage struct {
RateLimitedStorage
service *drive.Service
idCache map[string]string
idCacheLock *sync.Mutex
backoffs []int
service *drive.Service
idCache map[string]string
idCacheLock *sync.Mutex
backoffs []float64
backoffsRetries []int
isConnected bool
numberOfThreads int
@@ -45,11 +46,19 @@ type GCDConfig struct {
}
func (storage *GCDStorage) shouldRetry(threadIndex int, err error) (bool, error) {
const LIMIT_BACKOFF_TIME = 64
const MAX_NUMBER_OF_RETRIES = 15
minimumSleepRatio := 0.1
maximumSleepRatio := 0.2
minimumSleep := float64(storage.numberOfThreads) * minimumSleepRatio
maximumSleep := float64(storage.numberOfThreads) * maximumSleepRatio
rand.Seed(time.Now().UnixNano()) // unsure if this is needed
retry := false
message := ""
if err == nil {
storage.backoffs[threadIndex] = 1
storage.backoffs[threadIndex] = computeInitialBackoff(minimumSleep, maximumSleep)
storage.backoffsRetries[threadIndex] = 0
return false, nil
} else if e, ok := err.(*googleapi.Error); ok {
if 500 <= e.Code && e.Code < 600 {
@@ -62,8 +71,9 @@ func (storage *GCDStorage) shouldRetry(threadIndex int, err error) (bool, error)
retry = true
} else if e.Code == 403 {
// User Rate Limit Exceeded
message = "User Rate Limit Exceeded"
message = e.Message // "User Rate Limit Exceeded"
retry = true
} else if e.Code == 401 {
// Only retry on authorization error when storage has been connected before
if storage.isConnected {
@@ -83,18 +93,37 @@ func (storage *GCDStorage) shouldRetry(threadIndex int, err error) (bool, error)
retry = err.Temporary()
}
if !retry || storage.backoffs[threadIndex] >= 256 {
storage.backoffs[threadIndex] = 1
if !retry || storage.backoffsRetries[threadIndex] >= MAX_NUMBER_OF_RETRIES {
LOG_INFO("GCD_RETRY", "Thread: %03d. Maximum number of retries reached. Backoff time: %.2f. Number of retries: %d", threadIndex, storage.backoffs[threadIndex], storage.backoffsRetries[threadIndex])
storage.backoffs[threadIndex] = computeInitialBackoff(minimumSleep, maximumSleep)
storage.backoffsRetries[threadIndex] = 0
return false, err
}
delay := float32(storage.backoffs[threadIndex]) * rand.Float32()
LOG_DEBUG("GCD_RETRY", "%s; retrying after %.2f seconds", message, delay)
time.Sleep(time.Duration(float32(storage.backoffs[threadIndex]) * float32(time.Second)))
storage.backoffs[threadIndex] *= 2
if storage.backoffs[threadIndex] < LIMIT_BACKOFF_TIME {
storage.backoffs[threadIndex] *= 2.0
} else {
storage.backoffs[threadIndex] = LIMIT_BACKOFF_TIME
storage.backoffsRetries[threadIndex] += 1
}
delay := storage.backoffs[threadIndex]*rand.Float64() + storage.backoffs[threadIndex]*rand.Float64()
LOG_DEBUG("GCD_RETRY", "Thread: %3d. Message: %s. Retrying after %6.2f seconds. Current backoff: %6.2f. Number of retries: %2d.", threadIndex, message, delay, storage.backoffs[threadIndex], storage.backoffsRetries[threadIndex])
time.Sleep(time.Duration(delay * float64(time.Second)))
return true, nil
}
/*
logic for said calculus is here: https://stackoverflow.com/questions/1527803/generating-random-whole-numbers-in-javascript-in-a-specific-range
chose 0.1*thread number as a minimum sleep time
and 0.2*thread number as a maximum sleep time
for the first sleep of the first backoff of the threads.
This would mean that both when the program is started, and when multiple threads retry, google won't be ddosed :^)
*/
func computeInitialBackoff(minimumSleep float64, maximumSleep float64) float64 {
return rand.Float64()*(maximumSleep-minimumSleep+1) + minimumSleep
}
func (storage *GCDStorage) convertFilePath(filePath string) string {
if strings.HasPrefix(filePath, "chunks/") && strings.HasSuffix(filePath, ".fsl") {
return "fossils/" + filePath[len("chunks/"):len(filePath)-len(".fsl")]
@@ -274,7 +303,12 @@ func CreateGCDStorage(tokenFile string, storagePath string, threads int) (storag
numberOfThreads: threads,
idCache: make(map[string]string),
idCacheLock: &sync.Mutex{},
backoffs: make([]int, threads),
backoffs: make([]float64, threads),
backoffsRetries: make([]int, threads),
}
for b := range storage.backoffs {
storage.backoffs[b] = 0.1 * float64(storage.numberOfThreads) // at the first error, we should still sleep some amount
}
storagePathID, err := storage.getIDFromPath(0, storagePath)
@@ -295,7 +329,7 @@ func CreateGCDStorage(tokenFile string, storagePath string, threads int) (storag
return nil, err
}
} else if !isDir {
return nil, fmt.Errorf("%s/%s is not a directory", storagePath+"/"+dir)
return nil, fmt.Errorf("%s/%s is not a directory", storagePath, dir)
} else {
storage.idCache[dir] = dirID
}