/*
 * Copyright 2018 Bloomberg Finance LP
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef INCLUDED_BUILDBOXWORKER_WORKER
#define INCLUDED_BUILDBOXWORKER_WORKER

#include <buildboxcommon_casclient.h>
#include <buildboxcommon_commandline.h>
#include <buildboxcommon_connectionoptions.h>
#include <buildboxcommon_localexecutionclient.h>
#include <buildboxcommon_protos.h>
#include <buildboxcommon_standardoutputstreamer.h>
#include <buildboxcommon_temporaryfile.h>
#include <buildboxcommonmetrics_metricsconfigtype.h>

#include <build/bazel/remote/execution/v2/remote_execution.grpc.pb.h>
#include <google/devtools/remoteworkers/v1test2/bots.grpc.pb.h>

#include <condition_variable>
#include <memory>
#include <mutex>
#include <set>
#include <string>
#include <vector>

namespace buildboxworker {

namespace proto {
using namespace google::devtools::remoteworkers::v1test2;
} // namespace proto

using namespace build::bazel::remote::execution::v2;

using RunnerPartialExecutionMetadata =
    std::unordered_map<std::string /*leaseId*/, ExecutedActionMetadata>;

class SubprocessGuard;

class Worker {
    friend class SubprocessGuard;

  public:
    virtual ~Worker() = default;

    Worker() {}
    Worker(const buildboxcommon::CommandLine &cml, const std::string &botId,
           buildboxcommon::buildboxcommonmetrics::MetricsConfigType
               *metricsConfig);

    bool validateConfiguration();

    /**
     * Create and initialize the `LocalExecutionClient` instance.
     */
    void initLocalExecutionClient();

    /**
     * Connect to the Bots and CAS servers and run jobs until
     * `d_stopAfterJobs` reaches 0.
     */
    void runWorker();

    /**
     * Allows verifying that a runner CLI specification is correct. I.e., that
     * the path to the binary is valid, that it supports all the given options,
     * and that we are setting all the required ones.
     *
     * For that it generates a command with `buildRunnerCommand()` and appends
     * the `--validate-parameters` option to it.
     *
     * Returns whether the call succeeded.
     */
    bool testRunnerCommand();

    // Fork a subprocess to execute the command for a lease
    // If successful, the pid is recorded in d_activeJobsToSubprocesses and
    // returned. It returns -1 if unsuccessful.
    //
    // This method should be called only from the worker thread for now due to
    // the thread management mechanism
    virtual buildboxcommon::Operation spawnSubprocessForLease(
        const std::string &leaseId, const buildboxcommon::Action &action,
        const std::string &stdoutFile, const std::string &stderrFile,
        const std::string &partialExecutionMetadataFile);

    // Decrements the thread counter and issues a notification.
    void decrementThreadCount();

    // Getter
    const std::chrono::microseconds &maxWaitTime() const;

    // Untrack the subprocess related to a lease.
    // The subprocess is assumed to be already killed or signaled.
    // It returns true if the subprocess was tracked and successfully removed.
    bool untrackLeaseSubprocess(const std::string &leaseId);

  protected: // (Allow access to test fixtures)
    // NOLINTBEGIN (cppcoreguidelines-non-private-member-variables-in-classes)
    // This no lint can be removed by making the test fixture a friend class
    // and defining accessors in there
    // https://google.github.io/googletest/advanced.html#testing-private-code
    buildboxcommon::ConnectionOptions d_botsServer;
    buildboxcommon::ConnectionOptions d_casServer;
    buildboxcommon::ConnectionOptions d_logStreamServer;
    buildboxcommon::ConnectionOptions d_assetServer;

    std::shared_ptr<proto::Bots::StubInterface> d_botsStub;
    std::shared_ptr<buildboxcommon::CASClient> d_casClient;
    std::string d_botID;
    std::vector<std::pair<std::string, std::string>> d_platform;

    int d_maxConcurrentJobs = 0;

    // The worker will stop running after executing this many jobs. If this
    // is negative (the default), it will never stop.
    int d_stopAfterJobs = 0;

    std::string d_runnerCommand = "buildbox-run";
    std::vector<std::string> d_extraRunArgs;

    std::string d_logLevel;
    std::string d_logDirectory;
    std::string d_configFileName;

    // The group pids of all spawned subprocesses (set to their pids)
    std::set<pid_t> d_subprocessPgid;
    proto::BotStatus d_botStatus = proto::BotStatus::OK;

    // Consecutive error tracking for worker health management
    int d_consecutiveErrorsThreshold = 0;   // From command line (0 = disabled)
    int d_consecutiveErrorCount = 0;        // Current consecutive error count
    bool d_markedUnhealthyByErrors = false; // Persistent UNHEALTHY flag

    // Disk usage health check configuration
    // Vector of (path, threshold_percentage) pairs
    std::vector<std::pair<std::string, int>> d_diskUsageChecks;

    // Read the all the runner metadata tmp files and return a map of lease IDs
    // to partial execution action metadata. This also stores the leaseID in
    // the auxiliary metadata feild.
    RunnerPartialExecutionMetadata getPartialExecutionMetadataFromRunner();

    // Default time between `UpdateBotSession()` calls:
    static const std::chrono::microseconds s_defaultWaitTime;
    // Max. time between `UpdateBotSession()` calls:
    std::chrono::microseconds d_maxWaitTime{};

    proto::BotSession d_session;

    std::unique_ptr<buildboxcommon::LocalExecutionClient> d_execClient;

    std::mutex d_sessionMutex;

    // Notified when a worker thread finishes a job.
    std::condition_variable d_sessionCondition;

    // Total number of detached threads.
    int d_detachedThreadCount = 0;

    // Keep track of Lease IDs that have been assigned to a worker
    // Lease IDs are added when the while-loop spawns worker threads and
    // are removed when by the worker threads when they're done.
    std::set<std::string> d_activeJobs;

    // Mapping Lease IDs of active jobs to operation names.
    // The key set of this map should be a subset of `d_activeJobs`
    // because of the gap between when the job is marked as active and when the
    // subprocess is created
    std::map<std::string, std::string> d_activeJobsToOperations;

    // Mapping Lease IDs of active jobs to the tmp file containing the
    // associated partial execution metadata. This provided a link for
    // the worker to retrieve metadata from the runner.
    std::map<std::string /*LeaseID*/, std::string /*tmpFilePath*/>
        d_activeJobsToPartialExecutionMetadataTmpFile;

    // Stores the `ExecuteOperationMetadata` messages that were sent as
    // metadata in the last `UpdateBotSession()` call (it is overwritten
    // periodically). This should be accessed under the session mutex.
    // The entries are indexed by `ExecuteOperationMetadata.action_digest()`.
    std::unordered_multimap<buildboxcommon::Digest,
                            buildboxcommon::ExecuteOperationMetadata>
        d_sessionExecuteOperationMetadata;

    // NOLINTEND (cppcoreguidelines-non-private-member-variables-in-classes)

    // In order for the worker to enable the streaming of standard outputs, the
    // execution server can specify an endpoint by attaching an
    // `ExecuteOperationMetadata` message `m` under name
    // "executeoperationmetadata-bin" with:
    //
    //  * `m.action_digest()` equal to the digest of the pertinent action in
    //  the `Lease`, and
    //
    //  * `m.stdout_stream_name()` pointing to a ByteStream resource name.

    // Returns the time to wait for a job. (That time is based on the value of
    // `BotSession::expire_time` and the availability of active jobs.)
    std::chrono::system_clock::time_point
    calculateWaitTime(const std::chrono::system_clock::time_point currentTime =
                          std::chrono::system_clock::now()) const;

    // Run worker without bot session for testing purposes
    void runWorkerWithoutBotSession();

    // Functions to process the session's leases.
    void processLeases(bool *skipPollDelay);
    void processPendingLease(proto::Lease *lease, bool *skipPollDelay);
    void processActiveLease(const proto::Lease &lease);
    void processCancelledLease(const proto::Lease &lease);

  private:
    /**
     * Execute the specified action.
     */

    google::rpc::Status executeAction(
        const std::string &leaseId, const buildboxcommon::Action &action,
        const std::string &stdoutStream, const std::string &stderrStream,
        buildboxcommon::ActionResult *actionResult);

    void workerThread(const std::string &leaseId);

    // Searches for the Action corresponding to the given lease.
    // If found, exports the Action to the file, writes its digest and
    // returns true.
    // Otherwise, returns false or throws `std::runtime_error` on I/O errors
    // writing the action to the file.
    // If the lease is not found it will also decrement the thread count and
    // wake up other threads.
    bool fetchAction(const std::string &leaseId,
                     buildboxcommon::Action *action,
                     buildboxcommon::Digest *actionDigest);

    // Update the lease with the status of its execution. If `status.ok()`,
    // ActionResult is added to the lease.
    // It also decrements the thread count and removes the lease from the
    // active jobs list.
    void
    storeActionResultInLease(const std::string &leaseId,
                             const google::rpc::Status &leaseStatus,
                             const buildboxcommon::ActionResult &actionResult);

    // Keep track of Lease IDs that have been accepted by the worker, but
    // whose acceptance has not yet been acknowledged by the server (so we
    // haven't actually started work on them yet).
    std::set<std::string> d_jobsPendingAck;

    // The worker should then immediately update the bot session
    bool d_botSessionUpdate = false;

    // Register the signal handler. On errors `exit(1)`.
    static void registerSignals();

    // Set the platform properties for `d_session`.
    void setPlatformProperties();

    // Set the worker configuration values for `d_session`.
    void setWorkerConfig();

    // Returns whether the main loop in `runWorker()` should keep going.
    bool hasJobsToProcess() const;

    // If connected to a LogStream server when resource names were specified,
    // start streaming the contents of the standard outputs. If not, this
    // function has no effect.
    // (When `LOGSTREAM_DEBUG` is defined, launches `d_logstreamDebugCommand`.)
    void setUpStreamingIfNeeded(
        const std::string &stdoutStream, const std::string &stdoutFilePath,
        const std::string &stderrStream, const std::string &stderrFilePath,
        std::unique_ptr<buildboxcommon::StandardOutputStreamer>
            *stdoutStreamer,
        std::unique_ptr<buildboxcommon::StandardOutputStreamer>
            *stderrStreamer);

    // Call `stop()` and log the result for pointers to running instances.
    // For nullptrs it will have no effect.
    void stopStreamingIfNeeded(
        const std::unique_ptr<buildboxcommon::StandardOutputStreamer>
            &stdoutStreamer,
        const std::unique_ptr<buildboxcommon::StandardOutputStreamer>
            &stderrStreamer);

    // Send a SIGTERM to all tracked subprocesses groups
    void shutdownTrackedSubprocessPgids();

    void logSuppliedParameters() const;

    // Given an action digest, searches
    // `d_sessionExecuteOperationMetadata` for an entry that refers to that
    // action. If one is found, returns a pair (`stdout_stream_name`,
    // `stderr_stream_name`) otherwise returns an empty string.
    std::pair<std::string, std::string> standardOutputsStreamNames(
        const buildboxcommon::Digest &actionDigest) const;

    // Helper function to terminate the subprocess related a lease.
    // The caller thread should already hold the mutex.
    // It returns true if the subprocess exists and SIGTERM is succsessfully
    // sent
    bool terminateJobSubprocess(const std::string &leaseId);

    // Check disk usage for all configured paths.
    // Returns true if all paths have sufficient space, false otherwise.
    bool checkDiskUsageHealth();

#ifdef LOGSTREAM_DEBUG
    std::string d_logstreamDebugCommand;
#endif
};

} // namespace buildboxworker
#endif
