Skip to content

Commit 7f5f5ac

Browse files
committed
CLN: Clean pandas compat
Ensure concat works correctly Move testing out of compat
1 parent e3e927f commit 7f5f5ac

19 files changed

+177
-153
lines changed

linearmodels/compat/pandas.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,27 @@
1+
from distutils.version import LooseVersion
2+
3+
import pandas as pd
4+
5+
PD_LT_023 = LooseVersion(pd.__version__) < LooseVersion('0.23')
6+
7+
8+
def concat(*args, **kwargs):
9+
"""
10+
Shim around pandas concat that passes sort if allowed
11+
12+
See pandas.compat
13+
"""
14+
if PD_LT_023 and 'sort' in kwargs:
15+
kwargs = kwargs.copy()
16+
del kwargs['sort']
17+
else:
18+
if 'sort' not in kwargs:
19+
kwargs = kwargs.copy()
20+
kwargs['sort'] = True
21+
22+
return pd.concat(*args, **kwargs)
23+
24+
125
try:
226
from pandas.api.types import (is_numeric_dtype, is_categorical,
327
is_string_dtype, is_categorical_dtype,
@@ -24,11 +48,6 @@ def is_string_like(obj):
2448
is_categorical, is_categorical_dtype,
2549
is_datetime64_any_dtype, is_string_like)
2650

27-
try:
28-
from pandas.testing import assert_frame_equal, assert_series_equal
29-
except ImportError:
30-
from pandas.util.testing import assert_frame_equal, assert_series_equal
31-
3251
__all__ = ['is_string_dtype', 'is_numeric_dtype', 'is_categorical',
3352
'is_string_like', 'is_categorical_dtype', 'is_datetime64_any_dtype',
34-
'assert_frame_equal', 'assert_series_equal']
53+
'concat']

linearmodels/iv/data.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from linearmodels.compat.pandas import (is_categorical, is_categorical_dtype,
1010
is_numeric_dtype, is_string_dtype,
11-
is_string_like)
11+
is_string_like, concat)
1212

1313
dim_err = '{0} has too many dims. Maximum is 2, actual is {1}'
1414
type_err = 'Only ndarrays, DataArrays and Series and DataFrames are supported'
@@ -25,7 +25,7 @@ def convert_columns(s, drop_first):
2525
def expand_categoricals(x, drop_first):
2626
if x.shape[1] == 0:
2727
return x
28-
return pd.concat([convert_columns(x[c], drop_first) for c in x.columns], axis=1)
28+
return concat([convert_columns(x[c], drop_first) for c in x.columns], axis=1)
2929

3030

3131
class IVData(object):

linearmodels/panel/data.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from linearmodels.compat.pandas import (is_categorical,
99
is_datetime64_any_dtype,
1010
is_numeric_dtype, is_string_dtype,
11-
is_string_like)
11+
is_string_like, concat)
1212
from linearmodels.utility import ensure_unique_column, panel_to_frame
1313

1414
__all__ = ['PanelData']
@@ -89,7 +89,7 @@ def convert_columns(s, drop_first):
8989

9090

9191
def expand_categoricals(x, drop_first):
92-
return pd.concat([convert_columns(x[c], drop_first) for c in x.columns], axis=1)
92+
return concat([convert_columns(x[c], drop_first) for c in x.columns], axis=1)
9393

9494

9595
class PanelData(object):

linearmodels/tests/asset_pricing/test_formulas.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
import numpy as np
2-
import pandas as pd
32
import pytest
3+
from pandas.testing import assert_frame_equal
44

55
from linearmodels.asset_pricing.model import (LinearFactorModel,
66
LinearFactorModelGMM,
77
TradedFactorModel)
8-
from linearmodels.compat.pandas import assert_frame_equal
8+
from linearmodels.compat.pandas import concat
99
from linearmodels.tests.asset_pricing._utility import generate_data
1010

1111
FORMULA_FACTORS = 'factor_1 + factor_2 + factor_3'
@@ -29,7 +29,7 @@ def non_traded_model(request):
2929
def data(request):
3030
premia = np.array([.1, .1, .1])
3131
out = generate_data(nportfolio=10, output='pandas', alpha=True, premia=premia)
32-
out['joined'] = pd.concat([out.factors, out.portfolios], 1)
32+
out['joined'] = concat([out.factors, out.portfolios], 1)
3333
return out
3434

3535

linearmodels/tests/iv/test_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
except ImportError:
1313
MISSING_XARRAY = True
1414

15-
from linearmodels.compat.pandas import assert_frame_equal, assert_series_equal
15+
from pandas.testing import assert_frame_equal, assert_series_equal
1616
from linearmodels.iv.data import IVData
1717

1818

linearmodels/tests/iv/test_formulas.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import numpy as np
2-
import pandas as pd
32
import pytest
43
from numpy.testing import assert_allclose, assert_equal
4+
from pandas import DataFrame, Categorical
5+
from pandas.testing import assert_frame_equal
56

6-
from linearmodels.compat.pandas import assert_frame_equal
7+
from linearmodels.compat.pandas import concat
78
from linearmodels.formula import iv_2sls, iv_gmm, iv_gmm_cue, iv_liml
89
from linearmodels.iv import IV2SLS, IVGMM, IVGMMCUE, IVLIML
910

@@ -49,7 +50,7 @@ def data():
4950
y = x @ params + e
5051
cols = ['y'] + ['x' + str(i) for i in range(1, 6)]
5152
cols += ['z' + str(i) for i in range(1, 4)]
52-
data = pd.DataFrame(np.c_[y, x, z], columns=cols)
53+
data = DataFrame(np.c_[y, x, z], columns=cols)
5354
data['Intercept'] = 1.0
5455
data['weights'] = np.random.chisquare(10, size=data.shape[0]) / 10
5556
return data
@@ -164,8 +165,8 @@ def test_categorical(model_and_func):
164165
y = np.random.randn(1000)
165166
x1 = np.random.randn(1000)
166167
d = np.random.randint(0, 4, 1000)
167-
d = pd.Categorical(d)
168-
data = pd.DataFrame({'y': y, 'x1': x1, 'd': d})
168+
d = Categorical(d)
169+
data = DataFrame({'y': y, 'x1': x1, 'd': d})
169170
data['Intercept'] = 1.0
170171
model, func = model_and_func
171172
mod = model.from_formula(formula, data)
@@ -199,7 +200,7 @@ def test_formula_function(data, model_and_func):
199200
dep = data.y
200201
exog = [data[['Intercept']], sigmoid(data[['x3']]), data[['x4']],
201202
np.exp(data[['x5']])]
202-
exog = pd.concat(exog, 1)
203+
exog = concat(exog, 1)
203204
endog = data[['x1', 'x2']]
204205
instr = data[['z1', 'z2', 'z3']]
205206
mod = model(dep, exog, endog, instr)
@@ -220,7 +221,7 @@ def test_predict_formula_function(data, model_and_func):
220221

221222
exog = [data[['Intercept']], sigmoid(data[['x3']]), data[['x4']],
222223
np.exp(data[['x5']])]
223-
exog = pd.concat(exog, 1)
224+
exog = concat(exog, 1)
224225
endog = data[['x1', 'x2']]
225226
pred = res.predict(exog, endog)
226227
pred2 = res.predict(data=data)

linearmodels/tests/iv/test_results.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
import pandas as pd
21
import pytest
32
from numpy.testing import assert_allclose
3+
from pandas import DataFrame
4+
from pandas.testing import assert_series_equal
45

5-
from linearmodels.compat.pandas import assert_series_equal
66
from linearmodels.iv.data import IVData
77
from linearmodels.iv.model import IV2SLS, IVGMM, IVGMMCUE, IVLIML
88
from linearmodels.tests.iv._utility import generate_data
@@ -65,12 +65,12 @@ def test_fitted_predict(data, model):
6565
assert_series_equal(res.idiosyncratic, res.resids)
6666
y = mod.dependent.pandas
6767
expected = y.values - res.resids.values[:, None]
68-
expected = pd.DataFrame(expected, y.index, ['fitted_values'])
68+
expected = DataFrame(expected, y.index, ['fitted_values'])
6969
assert_frame_similar(expected, res.fitted_values)
7070
assert_allclose(expected, res.fitted_values)
7171
pred = res.predict()
7272
nobs = res.resids.shape[0]
73-
assert isinstance(pred, pd.DataFrame)
73+
assert isinstance(pred, DataFrame)
7474
assert pred.shape == (nobs, 1)
7575
pred = res.predict(idiosyncratic=True, missing=True)
7676
nobs = IVData(data.dep).pandas.shape[0]

linearmodels/tests/panel/_utility.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import numpy as np
2-
import pandas as pd
32
from numpy.random import standard_normal
43
from numpy.testing import assert_allclose
4+
from pandas import DataFrame, Categorical, get_dummies, date_range
5+
from pandas.testing import assert_frame_equal, assert_series_equal
56

67
from linearmodels.compat.numpy import lstsq
78
from linearmodels.panel.data import PanelData
@@ -13,38 +14,37 @@
1314
MISSING_XARRAY = False
1415
except ImportError:
1516
MISSING_XARRAY = True
16-
from linearmodels.compat.pandas import assert_frame_equal, assert_series_equal
1717
from linearmodels.utility import AttrDict
1818

1919
datatypes = ['numpy', 'pandas']
2020
if not MISSING_XARRAY:
2121
datatypes += ['xarray']
2222

2323

24-
def lsdv(y: pd.DataFrame, x: pd.DataFrame, has_const=False, entity=False, time=False,
24+
def lsdv(y: DataFrame, x: DataFrame, has_const=False, entity=False, time=False,
2525
general=None):
2626
nvar = x.shape[1]
2727
temp = x.reset_index()
2828
cat_index = temp.index
2929
if entity:
30-
cat = pd.Categorical(temp.iloc[:, 0])
30+
cat = Categorical(temp.iloc[:, 0])
3131
cat.index = cat_index
32-
dummies = pd.get_dummies(cat, drop_first=has_const)
33-
x = pd.DataFrame(np.c_[x.values, dummies.values.astype(np.float64)],
32+
dummies = get_dummies(cat, drop_first=has_const)
33+
x = DataFrame(np.c_[x.values, dummies.values.astype(np.float64)],
3434
index=x.index,
3535
columns=list(x.columns) + list(dummies.columns))
3636
if time:
37-
cat = pd.Categorical(temp.iloc[:, 1])
37+
cat = Categorical(temp.iloc[:, 1])
3838
cat.index = cat_index
39-
dummies = pd.get_dummies(cat, drop_first=(has_const or entity))
40-
x = pd.DataFrame(np.c_[x.values, dummies.values.astype(np.float64)],
39+
dummies = get_dummies(cat, drop_first=(has_const or entity))
40+
x = DataFrame(np.c_[x.values, dummies.values.astype(np.float64)],
4141
index=x.index,
4242
columns=list(x.columns) + list(dummies.columns))
4343
if general is not None:
44-
cat = pd.Categorical(general)
44+
cat = Categorical(general)
4545
cat.index = cat_index
46-
dummies = pd.get_dummies(cat, drop_first=(has_const or entity or time))
47-
x = pd.DataFrame(np.c_[x.values, dummies.values.astype(np.float64)],
46+
dummies = get_dummies(cat, drop_first=(has_const or entity or time))
47+
x = DataFrame(np.c_[x.values, dummies.values.astype(np.float64)],
4848
index=x.index,
4949
columns=list(x.columns) + list(dummies.columns))
5050
w = np.ones_like(y)
@@ -92,9 +92,9 @@ def generate_data(missing, datatype, const=False, ntk=(971, 7, 5), other_effects
9292

9393
if datatype in ('pandas', 'xarray'):
9494
entities = ['firm' + str(i) for i in range(n)]
95-
time = pd.date_range('1-1-1900', periods=t, freq='A-DEC')
95+
time = date_range('1-1-1900', periods=t, freq='A-DEC')
9696
var_names = ['x' + str(i) for i in range(k)]
97-
# y = pd.DataFrame(y, index=time, columns=entities)
97+
# y = DataFrame(y, index=time, columns=entities)
9898
y = panel_to_frame(y[None], items=['y'], major_axis=time, minor_axis=entities, swap=True)
9999
w = panel_to_frame(w[None], items=['w'], major_axis=time, minor_axis=entities, swap=True)
100100
w = w.reindex(y.index)

linearmodels/tests/panel/results/generate-panel-data.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
import pandas as pd
33

4+
from linearmodels.compat.pandas import concat
45
from linearmodels.panel.data import PanelData
56
from linearmodels.utility import panel_to_frame
67

@@ -30,7 +31,7 @@
3031
y = PanelData(y)
3132
w = PanelData(w)
3233

33-
z = pd.concat([x.dataframe, y.dataframe, w.dataframe], 1)
34+
z = concat([x.dataframe, y.dataframe, w.dataframe], 1)
3435
final_index = pd.MultiIndex.from_product([minor, major])
3536
final_index.levels[0].name = 'firm'
3637
z = z.reindex(final_index)

0 commit comments

Comments
 (0)