From 0e00f6f74041a5fec528d1511095ea4974150239 Mon Sep 17 00:00:00 2001
From: Kitsune Ral <Kitsune-Ral@users.sf.net>
Date: Tue, 21 Mar 2017 19:01:35 +0900
Subject: Jobs retry on network and timeout errors

As of now, the retry logic (see BaseJob::finishJob() method) invokes the same network request several times with increasing timeouts and retry intervals. Some additional signals and accessors are also provided to control the behaviour from inheriting classes (see a notable example with SyncJob in the same commit) and clients (support of retries in Quaternion comes in a respective commit shortly).
---
 jobs/basejob.cpp | 85 ++++++++++++++++++++++++++++++++++++++++++++++----------
 jobs/basejob.h   | 40 +++++++++++++++++++++-----
 jobs/syncjob.cpp |  4 +--
 3 files changed, 105 insertions(+), 24 deletions(-)

(limited to 'jobs')

diff --git a/jobs/basejob.cpp b/jobs/basejob.cpp
index 5c1be0f0..fd70312e 100644
--- a/jobs/basejob.cpp
+++ b/jobs/basejob.cpp
@@ -18,6 +18,8 @@
 
 #include "basejob.h"
 
+#include <array>
+
 #include <QtNetwork/QNetworkAccessManager>
 #include <QtNetwork/QNetworkRequest>
 #include <QtNetwork/QNetworkReply>
@@ -63,6 +65,10 @@ class BaseJob::Private
         Status status;
 
         QTimer timer;
+        QTimer retryTimer;
+
+        size_t maxRetries = 3;
+        size_t retriesTaken = 0;
 };
 
 inline QDebug operator<<(QDebug dbg, const BaseJob* j)
@@ -76,12 +82,16 @@ BaseJob::BaseJob(ConnectionData* connection, JobHttpType type, QString name,
     : d(new Private(connection, type, endpoint, query, data, needsToken))
 {
     setObjectName(name);
+    d->timer.setSingleShot(true);
     connect (&d->timer, &QTimer::timeout, this, &BaseJob::timeout);
+    d->retryTimer.setSingleShot(true);
+    connect (&d->retryTimer, &QTimer::timeout, this, &BaseJob::start);
     qDebug() << this << "created";
 }
 
 BaseJob::~BaseJob()
 {
+    stop();
     qDebug() << this << "destroyed";
 }
 
@@ -143,10 +153,13 @@ void BaseJob::Private::sendRequest()
 
 void BaseJob::start()
 {
+    emit aboutToStart();
+    d->retryTimer.stop(); // In case we were counting down at the moment
     d->sendRequest();
     connect( d->reply.data(), &QNetworkReply::sslErrors, this, &BaseJob::sslErrors );
     connect( d->reply.data(), &QNetworkReply::finished, this, &BaseJob::gotReply );
-    d->timer.start( 120*1000 );
+    d->timer.start(getCurrentTimeout());
+    emit started();
 }
 
 void BaseJob::gotReply()
@@ -155,7 +168,7 @@ void BaseJob::gotReply()
     if (status().good())
         setStatus(parseReply(d->reply->readAll()));
 
-    finishJob(true);
+    finishJob();
 }
 
 BaseJob::Status BaseJob::checkReply(QNetworkReply* reply) const
@@ -198,33 +211,75 @@ BaseJob::Status BaseJob::parseJson(const QJsonDocument&)
     return Success;
 }
 
-void BaseJob::finishJob(bool emitResult)
+void BaseJob::stop()
 {
     d->timer.stop();
     if (!d->reply)
     {
-        qWarning() << this << "finishes with empty network reply";
+        qWarning() << this << "stopped with empty network reply";
     }
     else if (d->reply->isRunning())
     {
-        qWarning() << this << "finishes without ready network reply";
+        qWarning() << this << "stopped without ready network reply";
         d->reply->disconnect(this); // Ignore whatever comes from the reply
+        d->reply->abort();
+    }
+}
+
+void BaseJob::finishJob()
+{
+    stop();
+    if ((error() == NetworkError || error() == TimeoutError)
+            && d->retriesTaken < d->maxRetries)
+    {
+        const auto retryInterval = getNextRetryInterval();
+        ++d->retriesTaken;
+        qWarning() << this << "will take retry" << d->retriesTaken
+                   << "in" << retryInterval/1000 << "s";
+        d->retryTimer.start(retryInterval);
+        emit retryScheduled(d->retriesTaken, retryInterval);
+        return;
     }
 
-    // Notify those that are interested in any completion of the job (including killing)
+    // Notify those interested in any completion of the job (including killing)
     emit finished(this);
 
-    if (emitResult) {
-        emit result(this);
-        if (error())
-            emit failure(this);
-        else
-            emit success(this);
-    }
+    emit result(this);
+    if (error())
+        emit failure(this);
+    else
+        emit success(this);
 
     deleteLater();
 }
 
+BaseJob::duration_t BaseJob::getCurrentTimeout() const
+{
+    static const std::array<int, 4> timeouts = { 90, 90, 120, 120 };
+    return timeouts[std::min(d->retriesTaken, timeouts.size() - 1)] * 1000;
+}
+
+BaseJob::duration_t BaseJob::getNextRetryInterval() const
+{
+    static const std::array<int, 3> intervals = { 5, 10, 30 };
+    return intervals[std::min(d->retriesTaken, intervals.size() - 1)] * 1000;
+}
+
+BaseJob::duration_t BaseJob::millisToRetry() const
+{
+    return d->retryTimer.isActive() ? d->retryTimer.remainingTime() : 0;
+}
+
+size_t BaseJob::maxRetries() const
+{
+    return d->maxRetries;
+}
+
+void BaseJob::setMaxRetries(size_t newMaxRetries)
+{
+    d->maxRetries = newMaxRetries;
+}
+
 BaseJob::Status BaseJob::status() const
 {
     return d->status;
@@ -256,13 +311,13 @@ void BaseJob::setStatus(int code, QString message)
 
 void BaseJob::abandon()
 {
-    finishJob(false);
+    deleteLater();
 }
 
 void BaseJob::timeout()
 {
     setStatus( TimeoutError, "The job has timed out" );
-    finishJob(true);
+    finishJob();
 }
 
 void BaseJob::sslErrors(const QList<QSslError>& errors)
diff --git a/jobs/basejob.h b/jobs/basejob.h
index e54580bd..83ba46fa 100644
--- a/jobs/basejob.h
+++ b/jobs/basejob.h
@@ -36,6 +36,7 @@ namespace QMatrixClient
     class BaseJob: public QObject
     {
             Q_OBJECT
+            Q_PROPERTY(size_t maxRetries READ maxRetries WRITE setMaxRetries)
         public:
             /* Just in case, the values are compatible with KJob
              * (which BaseJob used to inherit from). */
@@ -107,12 +108,26 @@ namespace QMatrixClient
                     QString message;
             };
 
+            using duration_t = int; // milliseconds
+
         public:
             BaseJob(ConnectionData* connection, JobHttpType type, QString name,
                     QString endpoint, Query query = Query(), Data data = Data(),
                     bool needsToken = true);
             virtual ~BaseJob();
 
+            Status status() const;
+            int error() const;
+            virtual QString errorString() const;
+
+            size_t maxRetries() const;
+            void setMaxRetries(size_t newMaxRetries);
+
+            Q_INVOKABLE duration_t getCurrentTimeout() const;
+            Q_INVOKABLE duration_t getNextRetryInterval() const;
+            Q_INVOKABLE duration_t millisToRetry() const;
+
+        public slots:
             void start();
 
             /**
@@ -124,11 +139,21 @@ namespace QMatrixClient
              */
             void abandon();
 
-            Status status() const;
-            int error() const;
-            virtual QString errorString() const;
-
         signals:
+            /** The job is about to send a network request */
+            void aboutToStart();
+
+            /** The job has sent a network request */
+            void started();
+
+            /**
+             * The previous network request has failed; the next attempt will
+             * be done in the specified time
+             * @param nextAttempt the 1-based number of attempt (will always be more than 1)
+             * @param inMilliseconds the interval after which the next attempt will be taken
+             */
+            void retryScheduled(size_t nextAttempt, int inMilliseconds);
+
             /**
              * Emitted when the job is finished, in any case. It is used to notify
              * observers that the job is terminated and that progress can be hidden.
@@ -181,7 +206,7 @@ namespace QMatrixClient
             void setRequestData(const Data& data);
 
             /**
-             * Used by gotReply() slot to check the received reply for general
+             * Used by gotReply() to check the received reply for general
              * issues such as network errors or access denial.
              * Returning anything except NoError/Success prevents
              * further parseReply()/parseJson() invocation.
@@ -212,7 +237,7 @@ namespace QMatrixClient
              * @see parseReply
              */
             virtual Status parseJson(const QJsonDocument&);
-            
+
             void setStatus(Status s);
             void setStatus(int code, QString message);
 
@@ -224,7 +249,8 @@ namespace QMatrixClient
             void gotReply();
 
         private:
-            void finishJob(bool emitResult);
+            void stop();
+            void finishJob();
 
             class Private;
             QScopedPointer<Private> d;
diff --git a/jobs/syncjob.cpp b/jobs/syncjob.cpp
index cec9595f..e3421d35 100644
--- a/jobs/syncjob.cpp
+++ b/jobs/syncjob.cpp
@@ -21,8 +21,6 @@
 #include <QtCore/QJsonArray>
 #include <QtCore/QDebug>
 
-#include "../connectiondata.h"
-
 using namespace QMatrixClient;
 
 class SyncJob::Private
@@ -50,6 +48,8 @@ SyncJob::SyncJob(ConnectionData* connection,
     if( !since.isEmpty() )
         query.addQueryItem("since", since);
     setRequestQuery(query);
+
+    setMaxRetries(std::numeric_limits<int>::max());
 }
 
 SyncJob::~SyncJob()
-- 
cgit v1.2.3