Skip to content

Commit

Permalink
remove local hive tests and add more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mengxr committed Nov 3, 2014
1 parent 39f19e0 commit 4e84fce
Showing 1 changed file with 11 additions and 28 deletions.
39 changes: 11 additions & 28 deletions python/pyspark/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,9 @@ def fromJson(cls, json):
UDT = getattr(m, pyClass)
return UDT()

def __eq__(self, other):
return type(self) == type(other)


_all_primitive_types = dict((v.typeName(), v)
for v in globals().itervalues()
Expand Down Expand Up @@ -947,13 +950,20 @@ def _verify_type(obj, dataType):
Traceback (most recent call last):
...
ValueError:...
>>> from pyspark.tests import ExamplePoint, ExamplePointUDT
>>> _verify_type(ExamplePoint(1.0, 2.0), ExamplePointUDT())
>>> _verify_type([1.0, 2.0], ExamplePointUDT()) # doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
ValueError:...
"""
# all objects are nullable
if obj is None:
return

if isinstance(dataType, UserDefinedType):
# TODO: check UDT
if not (hasattr(obj, '__UDT__') and obj.__UDT__ == dataType):
raise ValueError("%r is not an instance of type %r" % (obj, dataType))
return

_type = type(dataType)
Expand Down Expand Up @@ -1621,33 +1631,6 @@ def hql(self, hqlQuery):

class LocalHiveContext(HiveContext):

"""Starts up an instance of hive where metadata is stored locally.
An in-process metadata data is created with data stored in ./metadata.
Warehouse data is stored in in ./warehouse.
# >>> import os
# >>> hiveCtx = LocalHiveContext(sc)
# >>> try:
# ... supress = hiveCtx.sql("DROP TABLE src")
# ... except Exception:
# ... pass
# >>> kv1 = os.path.join(os.environ["SPARK_HOME"],
# ... 'examples/src/main/resources/kv1.txt')
# >>> supress = hiveCtx.sql(
# ... "CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
# >>> supress = hiveCtx.sql("LOAD DATA LOCAL INPATH '%s' INTO TABLE src"
# ... % kv1)
# >>> results = hiveCtx.sql("FROM src SELECT value"
# ... ).map(lambda r: int(r.value.split('_')[1]))
# >>> num = results.count()
# >>> reduce_sum = results.reduce(lambda x, y: x + y)
# >>> num
# 500
# >>> reduce_sum
# 130091
"""

def __init__(self, sparkContext, sqlContext=None):
HiveContext.__init__(self, sparkContext, sqlContext)
warnings.warn("LocalHiveContext is deprecated. "
Expand Down

0 comments on commit 4e84fce

Please sign in to comment.