Skip to content

Commit 1345f1b

Browse files
committed
BUG: Fix small bugs due to singleton dropping
Correct indexation of cluster variable Ensure collected effects always returns the expected type Refactor duplicated and wrong code Fix dummy creation to ensure dtype can handle data Extend test data generation to handle sparse panels Add extended tests of singleton dropping
1 parent 535401c commit 1345f1b

File tree

5 files changed

+62
-20
lines changed

5 files changed

+62
-20
lines changed

.travis.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ matrix:
4747
- PANDAS=0.22
4848
- XARRAY=0.10
4949
- STATSMODELS=0.9
50-
- python: 3.6
50+
- python: 3.7
5151
env:
5252
- PYTHON=3.7
5353
- NUMPY=1.15
@@ -56,7 +56,7 @@ matrix:
5656
- XARRAY=0.10
5757
- STATSMODELS=0.9
5858
- OPENBLAS=1
59-
- python: 3.6
59+
- python: 3.7
6060
env:
6161
- PYTHON=3.7
6262
- USE_PYPI=true

linearmodels/panel/model.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ def _choose_cov(self, cov_type, **cov_config):
566566
else:
567567
clusters = pd.DataFrame(group_ids)
568568
if self._singleton_index is not None and clusters is not None:
569-
clusters = clusters.loc(~self._singleton_index)
569+
clusters = clusters.loc[~self._singleton_index]
570570

571571
cov_config_upd['clusters'] = np.asarray(clusters) if clusters is not None else clusters
572572

@@ -780,6 +780,8 @@ def __init__(self, dependent, exog, *, weights=None, entity_effects=False, time_
780780
self._drop_singletons()
781781

782782
def _collect_effects(self):
783+
if not self._has_effect:
784+
return np.empty((self.dependent.shape[0], 0))
783785
effects = []
784786
if self.entity_effects:
785787
effects.append(np.asarray(self.dependent.entity_ids).squeeze())
@@ -792,8 +794,7 @@ def _collect_effects(self):
792794
return np.column_stack(effects)
793795

794796
def _drop_singletons(self):
795-
has_effects = self.entity_effects or self.time_effects or self.other_effects is not None
796-
if self._singletons or not has_effects:
797+
if self._singletons or not self._has_effect:
797798
return
798799
effects = self._collect_effects()
799800
retain = in_2core_graph(effects)
@@ -1187,12 +1188,7 @@ def _determine_df_adjustment(self, cov_type, **cov_config):
11871188
if clusters is None: # No clusters
11881189
return True
11891190

1190-
effects = [self._other_effect_cats] if self.other_effects else []
1191-
if self.entity_effects:
1192-
effects.append(self.dependent.entity_ids)
1193-
if self.time_effects:
1194-
effects.append(self.dependent.time_ids)
1195-
effects = np.column_stack(effects)
1191+
effects = self._collect_effects()
11961192
if num_effects == 1:
11971193
return not self._is_effect_nested(effects, clusters)
11981194
return True # Default case for 2-way -- not completely clear

linearmodels/panel/utility.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def dummy_matrix(cats, format='csc', drop='first', drop_all=False):
4848
rows = np.arange(nobs)
4949
ucats, inverse = np.unique(codes[:, i], return_inverse=True)
5050
ncategories = len(ucats)
51-
bits = min([i for i in (8, 16, 32, 64) if i - 1 > np.log2(ncategories)])
51+
bits = min([i for i in (8, 16, 32, 64) if i - 1 > np.log2(ncategories + total_dummies)])
5252
replacements = np.arange(ncategories, dtype='int{:d}'.format(bits))
5353
cols = replacements[inverse]
5454
if i == 0 and not drop_all:
@@ -91,10 +91,10 @@ def _remove_node(node, meta, orig_dest):
9191
node : int
9292
ID of the node to remove
9393
meta : ndarray
94-
Array with rows containins node, count, and address where
95-
address is used to find the first occurence in orig_desk
94+
Array with rows containing node, count, and address where
95+
address is used to find the first occurrence in orig_desk
9696
orig_dest : ndarray
97-
Array with rows containins origin and destination nodes
97+
Array with rows containing origin and destination nodes
9898
9999
Returns
100100
-------
@@ -145,10 +145,10 @@ def _drop_singletons(meta, orig_dest):
145145
Parameters
146146
----------
147147
meta : ndarray
148-
Array with rows containins node, count, and address where
149-
address is used to find the first occurence in orig_desk
148+
Array with rows containing node, count, and address where
149+
address is used to find the first occurrence in orig_desk
150150
orig_dest : ndarray
151-
Array with rows containins origin and destination nodes
151+
Array with rows containing origin and destination nodes
152152
"""
153153
for i in range(meta.shape[0]):
154154
if meta[i, 1] == 1:

linearmodels/tests/panel/_utility.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ def lsdv(y: DataFrame, x: DataFrame, has_const=False, entity=False, time=False,
5757
return params[:nvar]
5858

5959

60-
def generate_data(missing, datatype, const=False, ntk=(971, 7, 5), other_effects=0, rng=None):
60+
def generate_data(missing, datatype, const=False, ntk=(971, 7, 5), other_effects=0, rng=None,
61+
num_cats=4):
6162
if rng is None:
6263
np.random.seed(12345)
6364
else:
@@ -75,7 +76,13 @@ def generate_data(missing, datatype, const=False, ntk=(971, 7, 5), other_effects
7576
else:
7677
cats = ['cat.' + str(i) for i in range(other_effects)]
7778
if other_effects:
78-
c = np.random.randint(0, 4, (other_effects, t, n))
79+
if not isinstance(num_cats, list):
80+
num_cats = [num_cats] * other_effects
81+
c = []
82+
for i in range(other_effects):
83+
nc = num_cats[i]
84+
c.append(np.random.randint(0, nc, (1, t, n)))
85+
c = np.concatenate(c, 0)
7986

8087
vcats = ['varcat.' + str(i) for i in range(2)]
8188
vc2 = np.ones((2, t, 1)) @ np.random.randint(0, n // 2, (2, 1, n))

linearmodels/tests/panel/test_panel_ols.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,17 @@ def large_data(request):
3636
return generate_data(missing, datatype, const=const, ntk=(51, 71, 5), other_effects=2)
3737

3838

39+
singleton_ids = [i for i, p in zip(ids, perms) if p[1] == 'pandas' and not p[-1]]
40+
singleton_perms = [p for p in perms if p[1] == 'pandas' and not p[-1]]
41+
42+
43+
@pytest.fixture(params=singleton_perms, ids=singleton_ids)
44+
def singleton_data(request):
45+
missing, datatype, const = request.param
46+
return generate_data(missing, datatype, const=const, ntk=(91, 15, 5), other_effects=2,
47+
num_cats=[5 * 91, 15])
48+
49+
3950
perms = list(product(missing, datatypes))
4051
ids = list(map(lambda s: '-'.join(map(str, s)), perms))
4152

@@ -1118,3 +1129,31 @@ def test_masked_singleton_removal():
11181129
mod = PanelOLS(y, x, singletons=False, entity_effects=True, time_effects=True)
11191130
res = mod.fit()
11201131
assert res.nobs == 6
1132+
1133+
1134+
def test_singleton_removal_other_effects(data):
1135+
mod_keep = PanelOLS(data.y, data.x, weights=data.w, other_effects=data.c, singletons=True)
1136+
res_keep = mod_keep.fit()
1137+
1138+
mod = PanelOLS(data.y, data.x, weights=data.w, other_effects=data.c, singletons=False)
1139+
res = mod.fit(cov_type='clustered', clusters=data.vc1)
1140+
1141+
assert res.nobs <= res_keep.nobs
1142+
1143+
1144+
@pytest.mark.slow
1145+
@pytest.mark.parametrize('other_effects', [1, 2])
1146+
def test_singleton_removal_mixed(singleton_data, other_effects):
1147+
if other_effects == 1:
1148+
other_effects = PanelData(singleton_data.c).dataframe.iloc[:, [0]]
1149+
elif other_effects == 2:
1150+
other_effects = singleton_data.c
1151+
mod = PanelOLS(singleton_data.y, singleton_data.x,
1152+
other_effects=other_effects)
1153+
res_keep = mod.fit(use_lsmr=True)
1154+
1155+
mod = PanelOLS(singleton_data.y, singleton_data.x,
1156+
other_effects=other_effects, singletons=False)
1157+
res = mod.fit(cov_type='clustered', clusters=singleton_data.vc2, use_lsmr=True)
1158+
assert_allclose(res_keep.params, res.params)
1159+
assert res.nobs <= res_keep.nobs

0 commit comments

Comments
 (0)