Skip to content

Commit 91f1b0b

Browse files
author
Tomasz-Kluczkowski
committed
Add get_value_sum and tests.
Clean up code a bit.
1 parent 179aea4 commit 91f1b0b

File tree

2 files changed

+27
-20
lines changed

2 files changed

+27
-20
lines changed

data_extractor.py

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,22 @@
1-
from websites.resources.data import WEBSITES
2-
3-
41
class DataExtractor:
52
"""
6-
Use to extract, cleanse and amend incorrect website data collection.
3+
Use to extract, cleanse, sum and amend incorrect website data collection.
74
"""
85
def __init__(self, data):
96
self.data = data
107

118
def find_items(self, value=4):
129
"""
13-
Find and return a new list of items where key "value" is greater than or equal to parameter value. Default = 4.
10+
Find and return a new list of items where key "value" is greater than or equal to parameter value.
11+
:param value: int, value to find items for, default = 4.
1412
:return: list(dict), list of dictionaries matching the above filtering rule.
1513
"""
1614
return [item for item in self.data if item.get('value') and item.get('value') >= value]
1715

1816
def amend_domain_values(self, prefix='www.'):
1917
"""
2018
Fixes missing parts of the domain names.
21-
:param prefix: str, prefix to add to the domain name. Default = 'www'.
19+
:param prefix: str, prefix to add to the domain name, default = 'www'.
2220
:return: amended: list(dict), amended list of web records.
2321
"""
2422
amended = []
@@ -38,19 +36,19 @@ def cleanse_data(self):
3836
for item in self.data:
3937
url = item.get('url')
4038
secure = item.get('secure')
41-
# https marked as secure = False
42-
if url and url.startswith('https:') and not item.get('secure'):
43-
item['secure'] = True
44-
# http marked as secure = True
45-
elif url and url.startswith('http:') and item.get('secure'):
46-
item['secure'] = False
39+
if url:
40+
# https marked as secure = False
41+
if url.startswith('https:') and not secure:
42+
item['secure'] = True
43+
# http marked as secure = True
44+
elif url.startswith('http:') and secure:
45+
item['secure'] = False
4746
amended.append(item)
4847
return amended
4948

50-
51-
52-
# data_extractor = DataExtractor(WEBSITES)
53-
# print(data_extractor.amend_domain_values())
54-
# print(data_extractor.find_items(4))
55-
# print(len(data_extractor.find_items(4)))
56-
# print(data_extractor.cleanse_data())
49+
def get_value_sum(self):
50+
"""
51+
Returns sum of all value keys in the data set.
52+
:return: int, sum of all value keys in the data set.
53+
"""
54+
return sum([item.get('value', 0) for item in self.data])

tests/test_data_extractor.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
import pytest
1+
# Tests are written for pytest framework.
2+
# use: pip install pytest
3+
# to test: pytest
24

35
from data_extractor import DataExtractor
46
from websites.resources.data import WEBSITES
@@ -172,3 +174,10 @@ def test_cleanse_data(self):
172174
]
173175
_data_extractor = DataExtractor(test_data)
174176
assert _data_extractor.cleanse_data() == expected
177+
178+
def test_get_value_sum(self):
179+
assert data_extractor.get_value_sum() == 23
180+
181+
def test_get_value_sum_empty_data_set(self):
182+
_data_extractor = DataExtractor([])
183+
assert _data_extractor.get_value_sum() == 0

0 commit comments

Comments
 (0)