apache · beliefer · Jan 2, 2020 · Jan 2, 2020 · Jan 2, 2020 · Jan 3, 2020
diff --git a/.gitattributes b/.gitattributes
@@ -1,2 +1,7 @@
 *.bat text eol=crlf
 *.cmd text eol=crlf
+*.java text eol=lf
+*.scala text eol=lf
+*.xml text eol=lf
+*.py text eol=lf
+*.R text eol=lf
diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE
@@ -1,12 +1,42 @@
-## What changes were proposed in this pull request?
+<!--
+Thanks for sending a pull request!  Here are some tips for you:
+  1. If this is your first time, please read our contributor guidelines: https://spark.apache.org/contributing.html
+  2. Ensure you have added or run the appropriate tests for your PR: https://spark.apache.org/developer-tools.html
+  3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP][SPARK-XXXX] Your PR title ...'.
+  4. Be sure to keep the PR description updated to reflect all changes.
+  5. Please write your PR title to summarize what this PR proposes.
+  6. If possible, provide a concise example to reproduce the issue for a faster review.
+-->
 
-(Please fill in changes proposed in this fix)
+### What changes were proposed in this pull request?
+<!--
+Please clarify what changes you are proposing. The purpose of this section is to outline the changes and how this PR fixes the issue. 
+If possible, please consider writing useful notes for better and faster reviews in your PR. See the examples below.
+  1. If you refactor some codes with changing classes, showing the class hierarchy will help reviewers.
+  2. If you fix some SQL features, you can provide some references of other DBMSes.
+  3. If there is design documentation, please add the link.
+  4. If there is a discussion in the mailing list, please add the link.
+-->
 
 
-## How was this patch tested?
+### Why are the changes needed?
+<!--
+Please clarify why the changes are needed. For instance,
+  1. If you propose a new API, clarify the use case for a new API.
+  2. If you fix a bug, you can clarify why it is a bug.
+-->
 
-(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
 
+### Does this PR introduce any user-facing change?
+<!--
+If yes, please clarify the previous behavior and the change this PR proposes - provide the console output, description and/or an example to show the behavior difference if possible.
+If no, write 'No'.
+-->
 
-(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)
 
+### How was this patch tested?
+<!--
+If tests were added, say they were added here. Please make sure to add some test cases that check the changes thoroughly including negative and positive cases if possible.
+If it was tested in a way different from regular unit tests, please clarify how you tested step by step, ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future.
+If tests were not added, please describe why they were not added and/or why it was difficult to add.
+-->
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
@@ -0,0 +1,119 @@
+name: master
+
+on:
+  push:
+    branches:
+    - master
+  pull_request:
+    branches:
+    - master
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        java: [ '1.8', '11' ]
+        hadoop: [ 'hadoop-2.7', 'hadoop-3.2' ]
+        hive: [ 'hive-1.2', 'hive-2.3' ]
+        exclude:
+        - java: '11'
+          hive: 'hive-1.2'
+        - hadoop: 'hadoop-3.2'
+          hive: 'hive-1.2'
+    name: Build Spark - JDK${{ matrix.java }}/${{ matrix.hadoop }}/${{ matrix.hive }}
+
+    steps:
+    - uses: actions/checkout@master
+    # We split caches because GitHub Action Cache has a 400MB-size limit.
+    - uses: actions/cache@v1
+      with:
+        path: build
+        key: build-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          build-
+    - uses: actions/cache@v1
+      with:
+        path: ~/.m2/repository/com
+        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-
+    - uses: actions/cache@v1
+      with:
+        path: ~/.m2/repository/org
+        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-
+    - uses: actions/cache@v1
+      with:
+        path: ~/.m2/repository/net
+        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-
+    - uses: actions/cache@v1
+      with:
+        path: ~/.m2/repository/io
+        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-
+    - name: Set up JDK ${{ matrix.java }}
+      uses: actions/setup-java@v1
+      with:
+        java-version: ${{ matrix.java }}
+    - name: Build with Maven
+      run: |
+        export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
+        export MAVEN_CLI_OPTS="--no-transfer-progress"
+        mkdir -p ~/.m2
+        ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -P${{ matrix.hive }} -Phive-thriftserver -P${{ matrix.hadoop }} -Phadoop-cloud -Djava.version=${{ matrix.java }} install
+        rm -rf ~/.m2/repository/org/apache/spark
+
+
+  lint:
+    runs-on: ubuntu-latest
+    name: Linters (Java/Scala/Python), licenses, dependencies
+    steps:
+    - uses: actions/checkout@master
+    - uses: actions/setup-java@v1
+      with:
+        java-version: '11'
+    - uses: actions/setup-python@v1
+      with:
+        python-version: '3.x'
+        architecture: 'x64'
+    - name: Scala
+      run: ./dev/lint-scala
+    - name: Java
+      run: ./dev/lint-java
+    - name: Python
+      run: |
+        pip install flake8 sphinx numpy
+        ./dev/lint-python
+    - name: License
+      run: ./dev/check-license
+    - name: Dependencies
+      run: ./dev/test-dependencies.sh
+
+  lintr:
+    runs-on: ubuntu-latest
+    name: Linter (R)
+    steps:
+    - uses: actions/checkout@master
+    - uses: actions/setup-java@v1
+      with:
+        java-version: '11'
+    - name: install R
+      run: |
+        echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/' | sudo tee -a /etc/apt/sources.list
+        curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add
+        sudo apt-get update
+        sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
+    - name: install R packages
+      run: |
+        sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')"
+        sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')"
+    - name: package and install SparkR
+      run: ./R/install-dev.sh
+    - name: lint-r
+      run: ./dev/lint-r
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
@@ -0,0 +1,24 @@
+name: Close stale PRs
+
+on:
+  schedule:
+  - cron: "0 0 * * *"
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/stale@v1.1.0
+      with:
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        stale-pr-message: >
+          We're closing this PR because it hasn't been updated in a while.
+          This isn't a judgement on the merit of the PR in any way. It's just
+          a way of keeping the PR queue manageable.
+
+          If you'd like to revive this PR, please reopen it and ask a
+          committer to remove the Stale tag!
+        days-before-stale: 100
+        # Setting this to 0 is the same as setting it to 1.
+        # See: https://github.com/actions/stale/issues/28
+        days-before-close: 0
diff --git a/.gitignore b/.gitignore
@@ -24,6 +24,8 @@
 R-unit-tests.log
 R/unit-tests.out
 R/cran-check.out
+R/pkg/vignettes/sparkr-vignettes.html
+R/pkg/tests/fulltests/Rplots.pdf
 build/*.jar
 build/apache-maven*
 build/scala*
@@ -41,9 +43,12 @@ dependency-reduced-pom.xml
 derby.log
 dev/create-release/*final
 dev/create-release/*txt
+dev/pr-deps/
 dist/
-docs/_site
+docs/_site/
 docs/api
+sql/docs
+sql/site
 lib_managed/
 lint-r-report.log
 log/
@@ -56,17 +61,25 @@ project/plugins/project/build.properties
 project/plugins/src_managed/
 project/plugins/target/
 python/lib/pyspark.zip
+python/.eggs/
+python/deps
+python/docs/_site/
+python/test_coverage/coverage_data
+python/test_coverage/htmlcov
+python/pyspark/python
 reports/
 scalastyle-on-compile.generated.xml
 scalastyle-output.xml
 scalastyle.txt
 spark-*-bin-*.tgz
+spark-resources/
 spark-tests.log
 src_managed/
 streaming-tests.log
 target/
 unit-tests.log
 work/
+docs/.jekyll-metadata
 
 # For Hive
 TempStatsStore/
@@ -84,3 +97,6 @@ spark-warehouse/
 *.Rproj.*
 
 .Rproj.user
+
+# For SBT
+.jvmopts
diff --git a/.travis.yml b/.travis.yml
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -1,12 +1,12 @@
 ## Contributing to Spark
 
 *Before opening a pull request*, review the 
-[Contributing to Spark wiki](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark). 
+[Contributing to Spark guide](https://spark.apache.org/contributing.html). 
 It lists steps that are required before creating a PR. In particular, consider:
 
 - Is the change important and ready enough to ask the community to spend time reviewing?
 - Have you searched for existing, related JIRAs and pull requests?
-- Is this a new feature that can stand alone as a [third party project](https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects) ?
+- Is this a new feature that can stand alone as a [third party project](https://spark.apache.org/third-party-projects.html) ?
 - Is the change being proposed clearly explained and motivated?
 
 When you contribute code, you affirm that the contribution is your original work and that you