Merge pull request #221 from TheBestPessimist/tbp/google_rate_limit_exceeded

Tbp/google rate limit exceeded
This commit is contained in:
gilbertchen
2017-10-16 19:48:06 -04:00
committed by GitHub

View File

@@ -27,10 +27,11 @@ import (
type GCDStorage struct { type GCDStorage struct {
RateLimitedStorage RateLimitedStorage
service *drive.Service service *drive.Service
idCache map[string]string idCache map[string]string
idCacheLock *sync.Mutex idCacheLock *sync.Mutex
backoffs []int backoffs []float64
backoffsRetries []int
isConnected bool isConnected bool
numberOfThreads int numberOfThreads int
@@ -45,11 +46,19 @@ type GCDConfig struct {
} }
func (storage *GCDStorage) shouldRetry(threadIndex int, err error) (bool, error) { func (storage *GCDStorage) shouldRetry(threadIndex int, err error) (bool, error) {
const LIMIT_BACKOFF_TIME = 64
const MAX_NUMBER_OF_RETRIES = 15
minimumSleepRatio := 0.1
maximumSleepRatio := 0.2
minimumSleep := float64(storage.numberOfThreads) * minimumSleepRatio
maximumSleep := float64(storage.numberOfThreads) * maximumSleepRatio
rand.Seed(time.Now().UnixNano()) // unsure if this is needed
retry := false retry := false
message := "" message := ""
if err == nil { if err == nil {
storage.backoffs[threadIndex] = 1 storage.backoffs[threadIndex] = computeInitialBackoff(minimumSleep, maximumSleep)
storage.backoffsRetries[threadIndex] = 0
return false, nil return false, nil
} else if e, ok := err.(*googleapi.Error); ok { } else if e, ok := err.(*googleapi.Error); ok {
if 500 <= e.Code && e.Code < 600 { if 500 <= e.Code && e.Code < 600 {
@@ -62,8 +71,9 @@ func (storage *GCDStorage) shouldRetry(threadIndex int, err error) (bool, error)
retry = true retry = true
} else if e.Code == 403 { } else if e.Code == 403 {
// User Rate Limit Exceeded // User Rate Limit Exceeded
message = "User Rate Limit Exceeded" message = e.Message // "User Rate Limit Exceeded"
retry = true retry = true
} else if e.Code == 401 { } else if e.Code == 401 {
// Only retry on authorization error when storage has been connected before // Only retry on authorization error when storage has been connected before
if storage.isConnected { if storage.isConnected {
@@ -83,18 +93,37 @@ func (storage *GCDStorage) shouldRetry(threadIndex int, err error) (bool, error)
retry = err.Temporary() retry = err.Temporary()
} }
if !retry || storage.backoffs[threadIndex] >= 256 { if !retry || storage.backoffsRetries[threadIndex] >= MAX_NUMBER_OF_RETRIES {
storage.backoffs[threadIndex] = 1 LOG_INFO("GCD_RETRY", "Thread: %03d. Maximum number of retries reached. Backoff time: %.2f. Number of retries: %d", threadIndex, storage.backoffs[threadIndex], storage.backoffsRetries[threadIndex])
storage.backoffs[threadIndex] = computeInitialBackoff(minimumSleep, maximumSleep)
storage.backoffsRetries[threadIndex] = 0
return false, err return false, err
} }
delay := float32(storage.backoffs[threadIndex]) * rand.Float32() if storage.backoffs[threadIndex] < LIMIT_BACKOFF_TIME {
LOG_DEBUG("GCD_RETRY", "%s; retrying after %.2f seconds", message, delay) storage.backoffs[threadIndex] *= 2.0
time.Sleep(time.Duration(float32(storage.backoffs[threadIndex]) * float32(time.Second))) } else {
storage.backoffs[threadIndex] *= 2 storage.backoffs[threadIndex] = LIMIT_BACKOFF_TIME
storage.backoffsRetries[threadIndex] += 1
}
delay := storage.backoffs[threadIndex]*rand.Float64() + storage.backoffs[threadIndex]*rand.Float64()
LOG_DEBUG("GCD_RETRY", "Thread: %3d. Message: %s. Retrying after %6.2f seconds. Current backoff: %6.2f. Number of retries: %2d.", threadIndex, message, delay, storage.backoffs[threadIndex], storage.backoffsRetries[threadIndex])
time.Sleep(time.Duration(delay * float64(time.Second)))
return true, nil return true, nil
} }
/*
logic for said calculus is here: https://stackoverflow.com/questions/1527803/generating-random-whole-numbers-in-javascript-in-a-specific-range
chose 0.1*thread number as a minimum sleep time
and 0.2*thread number as a maximum sleep time
for the first sleep of the first backoff of the threads.
This would mean that both when the program is started, and when multiple threads retry, google won't be ddosed :^)
*/
func computeInitialBackoff(minimumSleep float64, maximumSleep float64) float64 {
return rand.Float64()*(maximumSleep-minimumSleep+1) + minimumSleep
}
func (storage *GCDStorage) convertFilePath(filePath string) string { func (storage *GCDStorage) convertFilePath(filePath string) string {
if strings.HasPrefix(filePath, "chunks/") && strings.HasSuffix(filePath, ".fsl") { if strings.HasPrefix(filePath, "chunks/") && strings.HasSuffix(filePath, ".fsl") {
return "fossils/" + filePath[len("chunks/"):len(filePath)-len(".fsl")] return "fossils/" + filePath[len("chunks/"):len(filePath)-len(".fsl")]
@@ -274,7 +303,12 @@ func CreateGCDStorage(tokenFile string, storagePath string, threads int) (storag
numberOfThreads: threads, numberOfThreads: threads,
idCache: make(map[string]string), idCache: make(map[string]string),
idCacheLock: &sync.Mutex{}, idCacheLock: &sync.Mutex{},
backoffs: make([]int, threads), backoffs: make([]float64, threads),
backoffsRetries: make([]int, threads),
}
for b := range storage.backoffs {
storage.backoffs[b] = 0.1 * float64(storage.numberOfThreads) // at the first error, we should still sleep some amount
} }
storagePathID, err := storage.getIDFromPath(0, storagePath) storagePathID, err := storage.getIDFromPath(0, storagePath)
@@ -295,7 +329,7 @@ func CreateGCDStorage(tokenFile string, storagePath string, threads int) (storag
return nil, err return nil, err
} }
} else if !isDir { } else if !isDir {
return nil, fmt.Errorf("%s/%s is not a directory", storagePath+"/"+dir) return nil, fmt.Errorf("%s/%s is not a directory", storagePath, dir)
} else { } else {
storage.idCache[dir] = dirID storage.idCache[dir] = dirID
} }