Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into scriptTransform
Browse files Browse the repository at this point in the history
  • Loading branch information
gatorsmile committed Feb 29, 2016
2 parents 0931295 + 6dfc4a7 commit c1f31d1
Show file tree
Hide file tree
Showing 117 changed files with 43 additions and 9 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions dev/sparktestsupport/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,11 +473,11 @@ def __hash__(self):
dependencies=[],
source_file_regexes=[
"yarn/",
"network/yarn/",
"common/network-yarn/",
],
sbt_test_goals=[
"yarn/test",
"network-yarn/test",
"common/network-yarn/test",
],
test_tags=[
"org.apache.spark.tags.ExtendedYarnTest"
Expand Down
2 changes: 1 addition & 1 deletion docs/job-scheduling.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ In YARN mode, start the shuffle service on each `NodeManager` as follows:
1. Build Spark with the [YARN profile](building-spark.html). Skip this step if you are using a
pre-packaged distribution.
2. Locate the `spark-<version>-yarn-shuffle.jar`. This should be under
`$SPARK_HOME/network/yarn/target/scala-<version>` if you are building Spark yourself, and under
`$SPARK_HOME/common/network-yarn/target/scala-<version>` if you are building Spark yourself, and under
`lib` if you are using a distribution.
2. Add this jar to the classpath of all `NodeManager`s in your cluster.
3. In the `yarn-site.xml` on each node, add `spark_shuffle` to `yarn.nodemanager.aux-services`,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ List<String> buildClassPath(String appClassPath) throws IOException {
String scala = getScalaVersion();
List<String> projects = Arrays.asList("core", "repl", "mllib", "graphx",
"streaming", "tools", "sql/catalyst", "sql/core", "sql/hive", "sql/hive-thriftserver",
"yarn", "launcher", "network/common", "network/shuffle", "network/yarn");
"yarn", "launcher",
"common/network-common", "common/network-shuffle", "common/network-yarn");
if (prependClasses) {
if (!isTesting) {
System.err.println(
Expand Down
2 changes: 1 addition & 1 deletion make-distribution.sh
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ cp "$SPARK_HOME"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/"
cp "$SPARK_HOME"/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/"
# This will fail if the -Pyarn profile is not provided
# In this case, silence the error and ignore the return code of this command
cp "$SPARK_HOME"/network/yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/lib/" &> /dev/null || :
cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/lib/" &> /dev/null || :

# Copy example sources (needed for python and SQL)
mkdir -p "$DISTDIR/examples/src/main"
Expand Down
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,13 @@

<modules>
<module>common/sketch</module>
<module>common/network-common</module>
<module>common/network-shuffle</module>
<module>tags</module>
<module>core</module>
<module>graphx</module>
<module>mllib</module>
<module>tools</module>
<module>network/common</module>
<module>network/shuffle</module>
<module>streaming</module>
<module>sql/catalyst</module>
<module>sql/core</module>
Expand Down Expand Up @@ -2442,7 +2442,7 @@
<id>yarn</id>
<modules>
<module>yarn</module>
<module>network/yarn</module>
<module>common/network-yarn</module>
</modules>
</profile>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public final void readBytes(int total, ColumnVector c, int rowId) {
for (int i = 0; i < total; i++) {
// Bytes are stored as a 4-byte little endian int. Just read the first byte.
// TODO: consider pushing this in ColumnVector by adding a readBytes with a stride.
c.putByte(rowId + i, buffer[offset]);
c.putByte(rowId + i, Platform.getByte(buffer, offset));
offset += 4;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,4 +175,37 @@ class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest {
}
}
}

test(s"SPARK-13537: Fix readBytes in VectorizedPlainValuesReader") {
withTempPath { file =>
val path = file.getCanonicalPath

val schema = new StructType()
.add("index", IntegerType, nullable = false)
.add("col", ByteType, nullable = true)

val data = Seq(Row(1, -33.toByte), Row(2, 0.toByte), Row(3, -55.toByte), Row(4, 56.toByte),
Row(5, 127.toByte), Row(6, -44.toByte), Row(7, 23.toByte), Row(8, -95.toByte),
Row(9, 127.toByte), Row(10, 13.toByte))

val rdd = sqlContext.sparkContext.parallelize(data)
val df = sqlContext.createDataFrame(rdd, schema).orderBy("index").coalesce(1)

df.write
.mode("overwrite")
.format(dataSourceName)
.option("dataSchema", df.schema.json)
.save(path)

val loadedDF = sqlContext
.read
.format(dataSourceName)
.option("dataSchema", df.schema.json)
.schema(df.schema)
.load(path)
.orderBy("index")

checkAnswer(loadedDF, df)
}
}
}

0 comments on commit c1f31d1

Please sign in to comment.