1
+ from typing import Iterator
2
+
1
3
import pandas as pd
2
4
import numpy as np
3
5
# from lenskit.crossfold import partition_rows
4
- from lenskit .splitting import crossfold_records
5
- from lenskit .data import from_interactions_df
6
+ from lenskit .splitting import crossfold_records , crossfold_users , SampleFrac , TTSplit
7
+ from lenskit .data import from_interactions_df , Dataset
6
8
7
9
8
10
def validation_split (data : pd .DataFrame , strategie : str = 'user_based' , num_folds : int = 1 ,
@@ -99,7 +101,8 @@ def row_based_validation_split(data: pd.DataFrame, num_folds: int = 1, frac: flo
99
101
return fold_indices
100
102
101
103
102
- def user_based_validation_split (data : pd .DataFrame , num_folds : int = 1 , frac : float = 0.25 , random_state = 42 ) -> dict :
104
+ def user_based_validation_split (data : Dataset , num_folds : int = 1 , frac : float = 0.25 , random_state = 42 ) -> Iterator [
105
+ TTSplit ]:
103
106
"""
104
107
Returns a dictionary with the indices of the train and validation split for the given data.
105
108
The dictionary has the following structure:
@@ -131,6 +134,8 @@ def user_based_validation_split(data: pd.DataFrame, num_folds: int = 1, frac: fl
131
134
dict
132
135
dictionary with the indices of the train and validation split for the given data.
133
136
"""
137
+
138
+ """
134
139
# initialize a dictionary with the indices of the train and validation split for the given data
135
140
fold_indices = {i: {"train": np.array([]), "validation": np.array([])} for i in
136
141
range(num_folds)}
@@ -150,6 +155,12 @@ def user_based_validation_split(data: pd.DataFrame, num_folds: int = 1, frac: fl
150
155
num_folds=num_folds)
151
156
152
157
return fold_indices
158
+ """
159
+
160
+ splits = crossfold_users (data = data , partitions = num_folds , method = SampleFrac (0.25 ))
161
+
162
+ return splits
163
+
153
164
154
165
155
166
def __holdout_validation_split (fold_indices : dict , data : pd .DataFrame , frac : float , random_state = 42 ):
0 commit comments