Skip to content

Commit cc3411e

Browse files
author
Tomasz-Kluczkowski
committed
Add get_value_sum and tests.
Clean up code a bit.
1 parent 179aea4 commit cc3411e

File tree

2 files changed

+22
-16
lines changed

2 files changed

+22
-16
lines changed

data_extractor.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,22 @@
1-
from websites.resources.data import WEBSITES
2-
3-
41
class DataExtractor:
52
"""
6-
Use to extract, cleanse and amend incorrect website data collection.
3+
Use to extract, cleanse, sum and amend incorrect website data collection.
74
"""
85
def __init__(self, data):
96
self.data = data
107

118
def find_items(self, value=4):
129
"""
13-
Find and return a new list of items where key "value" is greater than or equal to parameter value. Default = 4.
10+
Find and return a new list of items where key "value" is greater than or equal to parameter value.
11+
:param value: int, value to find items for, default = 4.
1412
:return: list(dict), list of dictionaries matching the above filtering rule.
1513
"""
1614
return [item for item in self.data if item.get('value') and item.get('value') >= value]
1715

1816
def amend_domain_values(self, prefix='www.'):
1917
"""
2018
Fixes missing parts of the domain names.
21-
:param prefix: str, prefix to add to the domain name. Default = 'www'.
19+
:param prefix: str, prefix to add to the domain name, default = 'www'.
2220
:return: amended: list(dict), amended list of web records.
2321
"""
2422
amended = []
@@ -39,18 +37,17 @@ def cleanse_data(self):
3937
url = item.get('url')
4038
secure = item.get('secure')
4139
# https marked as secure = False
42-
if url and url.startswith('https:') and not item.get('secure'):
40+
if url and url.startswith('https:') and not secure:
4341
item['secure'] = True
4442
# http marked as secure = True
45-
elif url and url.startswith('http:') and item.get('secure'):
43+
elif url and url.startswith('http:') and secure:
4644
item['secure'] = False
4745
amended.append(item)
4846
return amended
4947

50-
51-
52-
# data_extractor = DataExtractor(WEBSITES)
53-
# print(data_extractor.amend_domain_values())
54-
# print(data_extractor.find_items(4))
55-
# print(len(data_extractor.find_items(4)))
56-
# print(data_extractor.cleanse_data())
48+
def get_value_sum(self):
49+
"""
50+
Returns sum of all value keys in the data set.
51+
:return: int, sum of all value keys in the data set.
52+
"""
53+
return sum([item.get('value', 0) for item in self.data])

tests/test_data_extractor.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
import pytest
1+
# Tests are written for pytest framework.
2+
# use: pip install pytest
3+
# to test: pytest
24

35
from data_extractor import DataExtractor
46
from websites.resources.data import WEBSITES
@@ -172,3 +174,10 @@ def test_cleanse_data(self):
172174
]
173175
_data_extractor = DataExtractor(test_data)
174176
assert _data_extractor.cleanse_data() == expected
177+
178+
def test_get_value_sum(self):
179+
assert data_extractor.get_value_sum() == 23
180+
181+
def test_get_value_sum_empty_data_set(self):
182+
_data_extractor = DataExtractor([])
183+
assert _data_extractor.get_value_sum() == 0

0 commit comments

Comments
 (0)