Some tests will fail with the newer pandas. Last working version is pandas=0.20.3
This results in test_dataframe_join_mismatch_cats failing when testing left, right, outer
Below is the error output in-case anyone else runs into this issue
============================= test session starts ==============================
platform linux -- Python 3.5.5, pytest-3.4.2, py-1.5.2, pluggy-0.6.0
rootdir: /home/jenkins/workspace/mike-wendt-pygdf, inifile:
collected 565 items
pygdf/tests/test_applymap.py ........ [ 1%]
pygdf/tests/test_binops.py ............................................. [ 9%]
........................................................................ [ 22%]
.............................................................. [ 33%]
pygdf/tests/test_categorical.py ........... [ 35%]
pygdf/tests/test_cuda_apply.py ......................................... [ 42%]
......................... [ 46%]
pygdf/tests/test_dataframe.py ..................... [ 50%]
pygdf/tests/test_factorize.py ... [ 50%]
pygdf/tests/test_gpu_arrow_parser.py ... [ 51%]
pygdf/tests/test_groupby.py ............................. [ 56%]
pygdf/tests/test_index.py ..... [ 57%]
pygdf/tests/test_joining.py ..................FF.F [ 61%]
pygdf/tests/test_label_encode.py ............ [ 63%]
pygdf/tests/test_multi.py .... [ 64%]
pygdf/tests/test_numpy_interop.py .......... [ 66%]
pygdf/tests/test_onehot.py .... [ 66%]
pygdf/tests/test_pandas_interop.py .... [ 67%]
pygdf/tests/test_pickling.py ..... [ 68%]
pygdf/tests/test_query.py .............................................. [ 76%]
....................................... [ 83%]
pygdf/tests/test_serialize.py ssssss [ 84%]
pygdf/tests/test_settings.py . [ 84%]
pygdf/tests/test_sorting.py ............................................ [ 92%]
........ [ 93%]
pygdf/tests/test_sparse_df.py ... [ 94%]
pygdf/tests/test_stats.py ....................... [ 98%]
pygdf/tests/test_unaops.py ......... [100%]
=================================== FAILURES ===================================
___________________ test_dataframe_join_mismatch_cats[left] ____________________
how = 'left'
@pytest.mark.parametrize('how', ['left', 'right', 'inner', 'outer'])
def test_dataframe_join_mismatch_cats(how):
pdf1 = pd.DataFrame({"join_col": ["a", "b", "c", "d", "e"],
"data_col_left": [10, 20, 30, 40, 50]})
pdf2 = pd.DataFrame({"join_col": ["c", "e", "f"],
"data_col_right": [6, 7, 8]})
pdf1["join_col"] = pdf1["join_col"].astype("category")
pdf2["join_col"] = pdf2["join_col"].astype("category")
gdf1 = DataFrame.from_pandas(pdf1)
gdf2 = DataFrame.from_pandas(pdf2)
gdf1 = gdf1.set_index("join_col")
gdf2 = gdf2.set_index("join_col")
pdf1 = pdf1.set_index('join_col')
pdf2 = pdf2.set_index('join_col')
join_gdf = gdf1.join(gdf2, how=how)
join_pdf = pdf1.join(pdf2, how=how)
got = join_gdf.to_pandas()
expect = join_pdf.fillna(-1) # note: pygdf join doesn't mask NA
expect.data_col_right = expect.data_col_right.astype(np.int64)
expect.data_col_left = expect.data_col_left.astype(np.int64)
pd.util.testing.assert_frame_equal(got, expect, check_names=False,
check_index_type=False,
# For inner joins, pandas return weird categories.
> check_categorical=how != 'inner')
pygdf/tests/test_joining.py:161:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../../.conda/envs/pygdf_dev/lib/python3.5/site-packages/pandas/util/testing.py:1364: in assert_frame_equal
obj='{obj}.index'.format(obj=obj))
../../.conda/envs/pygdf_dev/lib/python3.5/site-packages/pandas/util/testing.py:941: in assert_index_equal
obj='{obj} category'.format(obj=obj))
../../.conda/envs/pygdf_dev/lib/python3.5/site-packages/pandas/util/testing.py:1055: in assert_categorical_equal
_check_isinstance(left, right, Categorical)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
left = [a, b, c, d, e]
Categories (5, object): [a, b, c, d, e]
right = array(['a', 'b', 'c', 'd', 'e'], dtype=object)
cls = <class 'pandas.core.categorical.Categorical'>
def _check_isinstance(left, right, cls):
"""
Helper method for our assert_* methods that ensures that
the two objects being compared have the right type before
proceeding with the comparison.
Parameters
----------
left : The first object being compared.
right : The second object being compared.
cls : The class type to check against.
Raises
------
AssertionError : Either `left` or `right` is not an instance of `cls`.
"""
err_msg = "{name} Expected type {exp_type}, found {act_type} instead"
cls_name = cls.__name__
if not isinstance(left, cls):
raise AssertionError(err_msg.format(name=cls_name, exp_type=cls,
act_type=type(left)))
if not isinstance(right, cls):
raise AssertionError(err_msg.format(name=cls_name, exp_type=cls,
> act_type=type(right)))
E AssertionError: Categorical Expected type <class 'pandas.core.categorical.Categorical'>, found <class 'numpy.ndarray'> instead
../../.conda/envs/pygdf_dev/lib/python3.5/site-packages/pandas/util/testing.py:253: AssertionError
___________________ test_dataframe_join_mismatch_cats[right] ___________________
how = 'right'
@pytest.mark.parametrize('how', ['left', 'right', 'inner', 'outer'])
def test_dataframe_join_mismatch_cats(how):
pdf1 = pd.DataFrame({"join_col": ["a", "b", "c", "d", "e"],
"data_col_left": [10, 20, 30, 40, 50]})
pdf2 = pd.DataFrame({"join_col": ["c", "e", "f"],
"data_col_right": [6, 7, 8]})
pdf1["join_col"] = pdf1["join_col"].astype("category")
pdf2["join_col"] = pdf2["join_col"].astype("category")
gdf1 = DataFrame.from_pandas(pdf1)
gdf2 = DataFrame.from_pandas(pdf2)
gdf1 = gdf1.set_index("join_col")
gdf2 = gdf2.set_index("join_col")
pdf1 = pdf1.set_index('join_col')
pdf2 = pdf2.set_index('join_col')
join_gdf = gdf1.join(gdf2, how=how)
join_pdf = pdf1.join(pdf2, how=how)
got = join_gdf.to_pandas()
expect = join_pdf.fillna(-1) # note: pygdf join doesn't mask NA
expect.data_col_right = expect.data_col_right.astype(np.int64)
expect.data_col_left = expect.data_col_left.astype(np.int64)
pd.util.testing.assert_frame_equal(got, expect, check_names=False,
check_index_type=False,
# For inner joins, pandas return weird categories.
> check_categorical=how != 'inner')
pygdf/tests/test_joining.py:161:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../../.conda/envs/pygdf_dev/lib/python3.5/site-packages/pandas/util/testing.py:1364: in assert_frame_equal
obj='{obj}.index'.format(obj=obj))
../../.conda/envs/pygdf_dev/lib/python3.5/site-packages/pandas/util/testing.py:941: in assert_index_equal
obj='{obj} category'.format(obj=obj))
../../.conda/envs/pygdf_dev/lib/python3.5/site-packages/pandas/util/testing.py:1055: in assert_categorical_equal
_check_isinstance(left, right, Categorical)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
left = [c, e, f]
Categories (3, object): [c, e, f]
right = array(['c', 'e', 'f'], dtype=object)
cls = <class 'pandas.core.categorical.Categorical'>
def _check_isinstance(left, right, cls):
"""
Helper method for our assert_* methods that ensures that
the two objects being compared have the right type before
proceeding with the comparison.
Parameters
----------
left : The first object being compared.
right : The second object being compared.
cls : The class type to check against.
Raises
------
AssertionError : Either `left` or `right` is not an instance of `cls`.
"""
err_msg = "{name} Expected type {exp_type}, found {act_type} instead"
cls_name = cls.__name__
if not isinstance(left, cls):
raise AssertionError(err_msg.format(name=cls_name, exp_type=cls,
act_type=type(left)))
if not isinstance(right, cls):
raise AssertionError(err_msg.format(name=cls_name, exp_type=cls,
> act_type=type(right)))
E AssertionError: Categorical Expected type <class 'pandas.core.categorical.Categorical'>, found <class 'numpy.ndarray'> instead
../../.conda/envs/pygdf_dev/lib/python3.5/site-packages/pandas/util/testing.py:253: AssertionError
___________________ test_dataframe_join_mismatch_cats[outer] ___________________
how = 'outer'
@pytest.mark.parametrize('how', ['left', 'right', 'inner', 'outer'])
def test_dataframe_join_mismatch_cats(how):
pdf1 = pd.DataFrame({"join_col": ["a", "b", "c", "d", "e"],
"data_col_left": [10, 20, 30, 40, 50]})
pdf2 = pd.DataFrame({"join_col": ["c", "e", "f"],
"data_col_right": [6, 7, 8]})
pdf1["join_col"] = pdf1["join_col"].astype("category")
pdf2["join_col"] = pdf2["join_col"].astype("category")
gdf1 = DataFrame.from_pandas(pdf1)
gdf2 = DataFrame.from_pandas(pdf2)
gdf1 = gdf1.set_index("join_col")
gdf2 = gdf2.set_index("join_col")
pdf1 = pdf1.set_index('join_col')
pdf2 = pdf2.set_index('join_col')
join_gdf = gdf1.join(gdf2, how=how)
join_pdf = pdf1.join(pdf2, how=how)
got = join_gdf.to_pandas()
expect = join_pdf.fillna(-1) # note: pygdf join doesn't mask NA
expect.data_col_right = expect.data_col_right.astype(np.int64)
expect.data_col_left = expect.data_col_left.astype(np.int64)
pd.util.testing.assert_frame_equal(got, expect, check_names=False,
check_index_type=False,
# For inner joins, pandas return weird categories.
> check_categorical=how != 'inner')
pygdf/tests/test_joining.py:161:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../../.conda/envs/pygdf_dev/lib/python3.5/site-packages/pandas/util/testing.py:1364: in assert_frame_equal
obj='{obj}.index'.format(obj=obj))
../../.conda/envs/pygdf_dev/lib/python3.5/site-packages/pandas/util/testing.py:941: in assert_index_equal
obj='{obj} category'.format(obj=obj))
../../.conda/envs/pygdf_dev/lib/python3.5/site-packages/pandas/util/testing.py:1055: in assert_categorical_equal
_check_isinstance(left, right, Categorical)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
left = [a, b, c, d, e, f]
Categories (6, object): [a, b, c, d, e, f]
right = array(['a', 'b', 'c', 'd', 'e', 'f'], dtype=object)
cls = <class 'pandas.core.categorical.Categorical'>
def _check_isinstance(left, right, cls):
"""
Helper method for our assert_* methods that ensures that
the two objects being compared have the right type before
proceeding with the comparison.
Parameters
----------
left : The first object being compared.
right : The second object being compared.
cls : The class type to check against.
Raises
------
AssertionError : Either `left` or `right` is not an instance of `cls`.
"""
err_msg = "{name} Expected type {exp_type}, found {act_type} instead"
cls_name = cls.__name__
if not isinstance(left, cls):
raise AssertionError(err_msg.format(name=cls_name, exp_type=cls,
act_type=type(left)))
if not isinstance(right, cls):
raise AssertionError(err_msg.format(name=cls_name, exp_type=cls,
> act_type=type(right)))
E AssertionError: Categorical Expected type <class 'pandas.core.categorical.Categorical'>, found <class 'numpy.ndarray'> instead
../../.conda/envs/pygdf_dev/lib/python3.5/site-packages/pandas/util/testing.py:253: AssertionError
=============== 3 failed, 556 passed, 6 skipped in 61.06 seconds ===============
With the merge of #120 this is closed
A couple of different projects I'm working on use Pandas .23 features.
I understand that #120 fixes the broken test, but are there any ongoing efforts to support Pandas 0.23+ with PyGDF?
Reopening this as we need to support Pandas 0.23+.
Resolved by #668
Most helpful comment
A couple of different projects I'm working on use Pandas .23 features.
I understand that #120 fixes the broken test, but are there any ongoing efforts to support Pandas 0.23+ with PyGDF?