job-server/src/main/resources/application.conf

# Settings for safe local mode development
spark {
  # local[...], yarn or mesos://...
  master = "local[4]"

  # client or cluster deployment
  submit.deployMode = "client"

  # For more details see https://github.com/spark-jobserver/spark-jobserver/tree/master/doc/supervise-mode.md
  driver.supervise = false

  # spark web UI port
  webUrlPort = 8080

  jobserver {
    port = 8090
    bind-address = "0.0.0.0"

    # If true, a separate JVM is forked for each Spark context
    context-per-jvm = false

    # Number of job results to keep per JobResultActor/context
    job-result-cache-size = 5000

    kill-context-on-supervisor-down = false
    manager-initialization-timeout = 40s

    # For cluster mode, the following modes can be used to set
    # akka.remote.netty.tcp.hostname
    # - akka - Let Akka decide based on hostname
    # - manual - SJS uses whatever value is specified in hostname.
    # - auto - Use SPARK_LOCAL_IP or automatically find externally accessible IPv4
    network-address-resolver = "akka"

    dao-timeout = 6s

    # Cleans up jobs, job configs and contexts in final state older than this duration
    dao-cleanup-after = 72h

    # At jobserver startup, a cleanup class can be provided which cleans the dao.
    # Currently, only ZK implementation is provided.
    # Empty string means disabled, full class path means enabled.
    # Note: For H2, migrations should be used.
    startup_dao_cleanup_class = ""

    # Automatically load a set of jars at startup time.  Key is the appName, value is the path/URL.
    # job-bin-paths {
    #   test = /path/to/my/test.jar
    # }

    # Cache binary on upload.
    # If set to false, the binary will be cached on job start only.
    cache-on-upload = true

    daorootdir = "/tmp/jobserver-dao"
    binarydao {
      class = spark.jobserver.io.SqlBinaryObjectsDAO
    }
    metadatadao {
      class = spark.jobserver.io.MetaDataSqlDAO
    }

    datadao {
      # storage directory for files that are uploaded to the server
      # via POST/data commands
      rootdir = /tmp/spark-jobserver/upload
    }

    zookeeperdao {
      dir = "jobserver/db"
      connection-string = "localhost:2181"
      curator {
        retries = 3
        # settings below are chosen for the cluster with 3 Zookeeper nodes
        sleepMsBetweenRetries = 1000
        connectionTimeoutMs = 2350
        sessionTimeoutMs = 10000
      }
      autopurge = false
      autopurge_after_hours = 168
    }

    # To load up job jars on startup, place them here,
    # with the app name as the key and the path to the jar as the value
    # job-bin-paths {
    #   test = ../job-server-tests/target/scala-2.10/job-server-tests_2.10-0.5.3-SNAPSHOT.jar
    # }

    sqldao {
      # Slick database driver, full classpath
      slick-driver = slick.jdbc.H2Profile

      # JDBC driver, full classpath
      jdbc-driver = org.h2.Driver

      # Directory where binaries are cached and default H2 driver stores its data
      rootdir = /tmp/spark-jobserver/sqldao/data

      # Full JDBC URL / init string, along with username and password.  Sorry, needs to match above.
      # Substitutions may be used to launch job-server, but leave it out here in the default or tests won't pass
      jdbc {
        url = "jdbc:h2:file:/tmp/spark-jobserver/sqldao/data/h2-db"
        user = ""
        password = ""
      }

      # DB connection pool settings
      dbcp {
        enabled = false
        maxactive = 20
        maxidle = 10
        initialsize = 10
      }
    }

    # Start embedded H2 Server (usefull in cluster deployment)
    startH2Server = false

    # Health Check Class to be invoked for healthz API
    healthcheck = spark.jobserver.util.APIHealthCheck

    # The ask pattern timeout for Api
    short-timeout = 3 s

    # Time out for job server to wait while creating contexts
    context-creation-timeout = 60 s

    # Time out for job server to wait while creating named objects
    named-object-creation-timeout = 60 s

    # Number of jobs that can be run simultaneously per context
    # If not set, defaults to number of cores on machine where jobserver is running
    max-jobs-per-context = 8

    # in yarn deployment, time out for job server to wait while creating contexts
    yarn-context-creation-timeout = 40 s

    # - Empty list of master means HA mode is not enabled.
    # - Specify the masters as a comma seperated list of format IP:PORT,IP2:PORT
    #   - Try to keep the PORT for master equal to the config setting akka.remote.netty.tcp.port
    # - On all masters, the order should be the same for the following property
    # - Starting master nodes is out of scope of jobserver, external entity should start all nodes
    # - HA setup is best suited with a HA dao layer. Currently, combined dao with HDFS + ZK is preferable.
    # - HA setup can also be configured for client/cluster/supervise mode
    #   - For supervise mode, akka.remote.netty.tcp.port setting should be hardcoded. See doc/supervise-mode.md
    #   - For cluster mode, see doc/cluster.md and configure this property
    #   - For client mode, use hardcoded akka.remote.netty.tcp.port instead of random
    ha {
      masters = []
    }
  }

  # predefined Spark contexts
  # Below is an example, but do not uncomment it.   Everything defined here is carried over to
  # deploy-time configs, so they will be created in all environments.  :(
  contexts {
    # abc-demo {
    #   num-cpu-cores = 4            # Number of cores to allocate.  Required.
    #   memory-per-node = 1024m      # Executor memory per node, -Xmx style eg 512m, 1G, etc.
    # }
    # define additional contexts here
  }

  # Default settings for ad hoc as well as manually created contexts
  # You can add any Spark config params here, for example, spark.mesos.coarse = true
  context-settings {
    num-cpu-cores = 4           # Number of cores to allocate.  Required.
    memory-per-node = 512m      # Executor memory per node, -Xmx style eg 512m, 1G, etc.

    # A zero-arg class implementing spark.jobserver.context.SparkContextFactory
    # Determines the type of jobs that can run in a SparkContext
    context-factory = spark.jobserver.context.DefaultSparkContextFactory

    # By default Hive support is enabled for SparkSession in jobserver.
    # This property can be used to disable Hive support.
    spark.session.hive.enabled = true

    streaming {
      # Default batch interval for Spark Streaming contexts in milliseconds
      batch_interval = 1000

      # if true, stops gracefully by waiting for the processing of all received data to be completed
      stopGracefully = true

      # if true, stops the SparkContext with the StreamingContext. The underlying SparkContext will be
      # stopped regardless of whether the StreamingContext has been started.
      stopSparkContext = true
    }

    # uris of jars to be loaded into the classpath for this context. Uris is a string list, or a string separated by commas ','
    # dependent-jar-uris = ["file:///some/path/present/in/each/mesos/slave/somepackage.jar"]

    # Timeout for forked JVM to spin up and acquire resources
    forked-jvm-init-timeout = 30s

    # Timeout for SupervisorActor to wait for forked (separate JVM) contexts to initialize
    context-init-timeout = 60s

    passthrough {
      spark.driver.allowMultipleContexts = true  # Ignore the Multiple context exception related with SPARK-2243
    }

    #This adds configuration to the underlying Hadoop configuration in the Spark Context
    #hadoop {
    #  mapreduce.framework.name = "FooFramework"
    #}

    #Settings for configuring instance of PythonContextFactory
    python {
      #Any locations of python libraries to be included on the PYTHONPATH when running Python jobs
      paths = []
      #The shell command to run when launching the subprocess for python jobs
      #Please note: Spark2 currently only supports Python < 3.8.
      executable = "python3"
    }

    # All the above sections have higher precedence because those properties are added directly to
    # SparkConf.
    # The following properties are added directly to SparkLauncher (spark-submit). All
    # launcher specific properties should be added here and they can be overridden while creating
    # context e.g.
    # curl -X POST "localhost:8090/contexts/test-context?launcher.spark.driver.memory=512m"
    launcher {
      spark.driver.memory = 1g
    }
  }
}

# Flyway locations to scan recursively for migrations
flyway.locations = "db/h2/migration"
flyway.initOnMigrate = false

akka {
  # Use SLF4J/logback for deployed environment logging
  loggers = ["akka.event.slf4j.Slf4jLogger"]

  actor {
    provider = "akka.cluster.ClusterActorRefProvider"
    warn-about-java-serializer-usage = off
    serializers {
      customStartJobSerializer = "spark.jobserver.util.StartJobSerializer"
    }
    serialization-bindings {
      "spark.jobserver.JobManagerActor$StartJob" = customStartJobSerializer
    }
  }

  remote {
    log-remote-lifecycle-events = off
    netty.tcp {
      hostname = "127.0.0.1"
      port = 0
      send-buffer-size = 20 MiB
      receive-buffer-size = 20 MiB
      # This controls the maximum message size, including job results, that can be sent
      maximum-frame-size = 10 MiB
    }
  }

  cluster {
    auto-down-unreachable-after = 20s
    metrics.enabled = off
    failure-detector.acceptable-heartbeat-pause = 3s
    failure-detector.threshold = 12.0
    allow-weakly-up-members = "off"
  }

  cluster.singleton {
    singleton-name = "context-supervisor"
    role = "supervisor"
  }

  cluster.singleton-proxy {
    singleton-name = ${akka.cluster.singleton.singleton-name}
    role = ${akka.cluster.singleton.role}
    singleton-identification-interval = 1s
    buffer-size = 1000
  }

  coordinated-shutdown {
    run-by-jvm-shutdown-hook = "off"
  }

}

custom-downing {
  stable-after = 20s
  down-removal-margin = 1s

  oldest-auto-downing {
    preferred-oldest-member-role = ${akka.cluster.singleton.role}
  }
}

# check the reference.conf at https://doc.akka.io/docs/akka-http/current/configuration.html for all defined settings
akka.http.server {
  ## uncomment the next lines for making this an HTTPS example
  # ssl-encryption = on
  # keystore = "/some/path/server-keystore.jks"
  # keystorePW = "changeit"
  encryptionType = "SSL"
  keystoreType = "JKS"
  provider = "SunX509"
  #
  ## Client Authentication (optional): activated upon providing a truststore file
  # truststore = "/some/path/server-truststore.jks"
  # truststorePW = "changeit"
  truststore-type = "JKS"
  truststore-provider = "SunX509"
  #
  # see http://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#SSLContext for more examples
  # typical are either SSL or TLS
  # key manager factory provider
  # ssl engine provider protocols
  enabledProtocols = ["SSLv3", "TLSv1"]
  idle-timeout = 60 s
  request-timeout = 40 s
  pipelining-limit = 2 # for maximum performance (prevents StopReading / ResumeReading messages to the IOBridge)
  # Needed for HTTP/1.0 requests with missing Host headers
  default-host-header = "spray.io:8765"

  # Increase this in order to upload bigger job jars
  parsing.max-content-length = 30m
}

access-control {
  # note that ssl-encryption should also be on when authentication/authorization is on to avoid that passwords
  # can be sniffed out
  # Authentication provider class. Implementations have to extend SJSAuthenticator
  provider = spark.jobserver.auth.AllowAllAccessControl
  # Time out for job server to wait for authentication requests
  auth-timeout = 10 s
  shiro {
    # absolute path to shiro config file, including file name
    config.path = "/some/path/shiro.ini"
    # when authentication is on, then this flag controls whether the authenticated user
    # is always used as the proxy-user when starting contexts
    # (note that this will overwrite the spark.proxy.user parameter that may be provided to the POST /contexts/new-context command)
    use-as-proxy-user = off
  }

  keycloak {
    authServerUrl = "https://example.com/auth"
    realmName = "realm"
    client = "client"
    clientSecret = "clientSecret"
  }

  # If turned on, access-control for authorized users is cached to improve performance. While a user is cached, the
  # provided password is *not* validated introducing a potential security threat.
  use-cache = false
  akka.http.caching.lfu-cache {
    max-capacity = 512
    initial-capacity = 16
    time-to-live = 15 minutes
    time-to-idle = 1 minutes
  }
}