diff --git a/core/pom.xml b/core/pom.xml
index fc42f48973fe9..262a3320db106 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -381,35 +381,6 @@
target/scala-${scala.binary.version}/classes
target/scala-${scala.binary.version}/test-classes
-
-
- org.apache.maven.plugins
- maven-antrun-plugin
-
-
- generate-resources
-
- run
-
-
-
-
-
-
-
-
-
-
- maven-clean-plugin
-
-
-
- ${basedir}/../python/build
-
-
- true
-
-
org.apache.maven.plugins
maven-dependency-plugin
@@ -438,24 +409,6 @@
-
-
-
- src/main/resources
-
-
- ../python
-
- pyspark/*.py
-
-
-
- ../python/build
-
- py4j/*.py
-
-
-
diff --git a/mllib/pom.xml b/mllib/pom.xml
index a3c57ae26000b..0c07ca1a62fd3 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -141,16 +141,5 @@
target/scala-${scala.binary.version}/classes
target/scala-${scala.binary.version}/test-classes
-
-
- ../python
-
- pyspark/mllib/*.py
- pyspark/mllib/stat/*.py
- pyspark/ml/*.py
- pyspark/ml/param/*.py
-
-
-
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 186345af0e60e..1b87e4e98bd83 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -168,7 +168,7 @@ object SparkBuild extends PomBuild {
/* Enable Assembly for all assembly projects */
assemblyProjects.foreach(enable(Assembly.settings))
- /* Package pyspark artifacts in the main assembly. */
+ /* Package pyspark artifacts in a separate zip file for YARN. */
enable(PySparkAssembly.settings)(assembly)
/* Enable unidoc only for the root spark project */
@@ -373,22 +373,15 @@ object PySparkAssembly {
import java.util.zip.{ZipOutputStream, ZipEntry}
lazy val settings = Seq(
- unmanagedJars in Compile += { BuildCommons.sparkHome / "python/lib/py4j-0.8.2.1-src.zip" },
// Use a resource generator to copy all .py files from python/pyspark into a managed directory
// to be included in the assembly. We can't just add "python/" to the assembly's resource dir
// list since that will copy unneeded / unwanted files.
resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File =>
val src = new File(BuildCommons.sparkHome, "python/pyspark")
-
val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip")
zipFile.delete()
zipRecursive(src, zipFile)
-
- val dst = new File(outDir, "pyspark")
- if (!dst.isDirectory()) {
- require(dst.mkdirs())
- }
- copy(src, dst)
+ Seq[File]()
}
)
@@ -416,42 +409,11 @@ object PySparkAssembly {
output.write(buf, 0, n)
}
}
+ output.closeEntry()
in.close()
}
}
- private def copy(src: File, dst: File): Seq[File] = {
- src.listFiles().flatMap { f =>
- val child = new File(dst, f.getName())
- if (f.isDirectory()) {
- child.mkdir()
- copy(f, child)
- } else if (f.getName().endsWith(".py")) {
- var in: Option[FileInputStream] = None
- var out: Option[FileOutputStream] = None
- try {
- in = Some(new FileInputStream(f))
- out = Some(new FileOutputStream(child))
-
- val bytes = new Array[Byte](1024)
- var read = 0
- while (read >= 0) {
- read = in.get.read(bytes)
- if (read > 0) {
- out.get.write(bytes, 0, read)
- }
- }
-
- Some(child)
- } finally {
- in.foreach(_.close())
- out.foreach(_.close())
- }
- } else {
- None
- }
- }
- }
}
object Unidoc {
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 7d274a73e079f..ffe95bb49188f 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -103,13 +103,5 @@
target/scala-${scala.binary.version}/classes
target/scala-${scala.binary.version}/test-classes
-
-
- ../../python
-
- pyspark/sql/*.py
-
-
-
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 5ca55a4f680bb..5ab7f4472c38b 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -105,13 +105,5 @@
-
-
- ../python
-
- pyspark/streaming/*.py
-
-
-