Skip to content

Commit ad24076

Browse files
committed
creating integration test cases
1 parent 928b826 commit ad24076

File tree

6 files changed

+89
-3
lines changed

6 files changed

+89
-3
lines changed

data/download_data.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ function parse_yaml {
1919
}
2020

2121
# setting important variables
22-
eval $(parse_yaml ../credentials.yaml "CONFIG_")
22+
eval $(parse_yaml ../config/credentials.yaml "CONFIG_")
2323

2424
# defining important variables
2525
export KAGGLE_USERNAME="$CONFIG_KAGGLE_USERNAME"

src/model/inference.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,12 @@ def predict(self, x: np.ndarray, transform_to_str: bool = True) -> np.ndarray:
6262
"""
6363
prediction = self.model.predict(x)
6464

65+
print(prediction.shape)
66+
6567
if transform_to_str:
6668
prediction = label_encoder.inverse_transform(prediction)
69+
else:
70+
prediction = np.max(prediction, axis=1)
6771

6872
logger.info(f"Prediction: {prediction}.")
6973
return prediction

tests/integration/__init__.py

Whitespace-only changes.
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import pathlib
2+
3+
import pandas as pd
4+
import numpy as np
5+
6+
from src.config.settings import general_settings
7+
from src.config.model import model_settings
8+
from src.data.processing import data_processing_inference, load_dataset
9+
10+
11+
# loading the raw dataset that was used to train the model
12+
dataset = load_dataset(
13+
path=pathlib.Path.joinpath(
14+
general_settings.DATA_PATH,
15+
general_settings.RAW_FILE_NAME
16+
)
17+
)
18+
19+
def test_data_processing_pipeline():
20+
"""
21+
Testing the integration of the entire data processing pipeline.
22+
"""
23+
_dataset = dataset.copy()
24+
_dataset = _dataset.drop(columns=general_settings.TARGET_COLUMN)
25+
26+
X = data_processing_inference(dataframe=_dataset)
27+
28+
assert isinstance(_dataset, pd.DataFrame)
29+
assert isinstance(X, np.ndarray)
30+
assert X.shape[1] == len(model_settings.FEATURES)
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import pathlib
2+
3+
import pandas as pd
4+
import numpy as np
5+
6+
from src.config.settings import general_settings
7+
from src.config.model import model_settings
8+
from src.data.processing import data_processing_inference, load_dataset
9+
from src.model.inference import ModelServe
10+
11+
# loading the raw dataset that was used to train the model
12+
dataset = load_dataset(
13+
path=pathlib.Path.joinpath(
14+
general_settings.DATA_PATH,
15+
general_settings.RAW_FILE_NAME
16+
)
17+
)
18+
19+
def test_model_inference_pipeline():
20+
"""
21+
Testing the integration of the entire model inference pipeline.
22+
"""
23+
_dataset = dataset.copy()
24+
_dataset = _dataset.drop(columns=general_settings.TARGET_COLUMN)
25+
26+
X = data_processing_inference(dataframe=_dataset)
27+
28+
assert isinstance(_dataset, pd.DataFrame)
29+
assert isinstance(X, np.ndarray)
30+
assert X.shape[1] == len(model_settings.FEATURES)
31+
32+
loaded_model = ModelServe(
33+
model_name=model_settings.MODEL_NAME,
34+
model_flavor=model_settings.MODEL_FLAVOR,
35+
model_version=model_settings.VERSION,
36+
)
37+
loaded_model.load()
38+
39+
assert loaded_model.model is not None
40+
41+
predictions = loaded_model.predict(X, transform_to_str=False)
42+
43+
assert isinstance(predictions, np.ndarray)
44+
assert predictions.shape[0] == X.shape[0]
45+
assert isinstance(predictions.dtype, type(np.dtype("float64")))
46+
47+
# FIXME: fix this
48+
# predictions = loaded_model.predict(X, transform_to_str=True)
49+
50+
# assert isinstance(predictions, List)
51+
# assert len(predictions) == X.shape[0]
52+
# assert isinstance(type(predictions[0]), str)

tests/unit/test_model_functions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def test_model_performance() -> None:
104104
)
105105
y_train = np.max(y_train, axis=1)
106106

107-
train_predictions = np.max(loaded_model.predict(X_train, transform_to_str=False), axis=1)
107+
train_predictions = loaded_model.predict(X_train, transform_to_str=False)
108108
train_score = f1_score(y_true=y_train, y_pred=train_predictions, average="weighted")
109109

110110
X_valid = load_feature(
@@ -117,7 +117,7 @@ def test_model_performance() -> None:
117117
)
118118
y_valid = np.max(y_valid, axis=1)
119119

120-
valid_predictions = np.max(loaded_model.predict(X_valid, transform_to_str=False), axis=1)
120+
valid_predictions = loaded_model.predict(X_valid, transform_to_str=False)
121121
valid_score = f1_score(y_true=y_valid, y_pred=valid_predictions, average="weighted")
122122

123123
assert train_score == train_score

0 commit comments

Comments
 (0)