Skip to content

Commit 71997eb

Browse files
author
Kartik Prabhu
committed
for implied name: replace img with alt but not with src. update tests
1 parent ab25aa8 commit 71997eb

File tree

4 files changed

+16
-9
lines changed

4 files changed

+16
-9
lines changed

mf2py/dom_helpers.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def get_descendents(node):
6969
if isinstance(desc, bs4.Tag):
7070
yield desc
7171

72-
def get_textContent(el, replace_img=False, base_url=''):
72+
def get_textContent(el, replace_img=False, img_to_src=True, base_url=''):
7373
""" Get the text content of an element, replacing images by alt or src
7474
"""
7575

@@ -78,7 +78,7 @@ def get_textContent(el, replace_img=False, base_url=''):
7878
P_BREAK_BEFORE = 1
7979
P_BREAK_AFTER = 0
8080

81-
def text_collection(el, replace_img=False, base_url=''):
81+
def text_collection(el, replace_img=False, img_to_src=True, base_url=''):
8282
# returns array of strings or integers
8383

8484
items = []
@@ -103,7 +103,7 @@ def text_collection(el, replace_img=False, base_url=''):
103103

104104
elif el.name == 'img' and replace_img:
105105
value = el.get('alt')
106-
if value is None:
106+
if value is None and img_to_src:
107107
value = el.get('src')
108108
if value is not None:
109109
value = urljoin(base_url, value)
@@ -117,7 +117,7 @@ def text_collection(el, replace_img=False, base_url=''):
117117
else:
118118
for child in el.children:
119119

120-
child_items = text_collection(child, replace_img, base_url)
120+
child_items = text_collection(child, replace_img, img_to_src, base_url)
121121
items.extend(child_items)
122122

123123
if el.name == 'p':
@@ -127,7 +127,7 @@ def text_collection(el, replace_img=False, base_url=''):
127127

128128
return items
129129

130-
results = [t for t in text_collection(el, replace_img, base_url) if t is not '']
130+
results = [t for t in text_collection(el, replace_img, img_to_src, base_url) if t is not '']
131131

132132
if results:
133133
# remove <space> if it is first and last or if it is preceded by a <space> or <int> or followed by a <int>

mf2py/implied_properties.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,9 @@ def non_empty(val):
7070
return text_type(prop_value)
7171

7272
# use text if all else fails
73-
# don't replace images in implied name (https://github.com/microformats/microformats2-parsing/issues/35)
74-
return get_textContent(el, base_url=base_url)
73+
# replace images with alt but not with src in implied name
74+
# proposal: https://github.com/microformats/microformats2-parsing/issues/35#issuecomment-393615508
75+
return get_textContent(el, replace_img=True, img_to_src=False, base_url=base_url)
7576

7677

7778
def photo(el, dict_class, img_with_alt, base_url=''):

test/examples/implied_properties/implied_properties.html

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
<a class="h-card" href="http://tommorris.org/"><img src="http://tommorris.org/photo.png" alt="" />Tom Morris</a>
1313

14+
<a class="h-card" href="http://tommorris.org/"><img src="http://tommorris.org/photo.png"/>Tom Morris</a>
15+
1416
<a class="h-card" href="http://tommorris.org/"><img src="http://tommorris.org/photo.png" alt="Tom Morris" /></a>
1517

1618
<img class="h-card" src="http://tommorris.org/photo.png" alt="Tom Morris" />

test/test_parser.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ def test_nested_values():
431431
def test_implied_name():
432432
result = parse_fixture("implied_properties/implied_properties.html")
433433

434-
for i in range(6):
434+
for i in range(7):
435435
assert_equal(result["items"][i]["properties"]["name"][0], "Tom Morris")
436436

437437

@@ -474,8 +474,12 @@ def test_implied_nested_photo():
474474
result = parse_fixture("implied_properties/implied_properties.html", url="http://bar.org")
475475
assert_equal(result["items"][2]["properties"]["photo"][0],
476476
"http://tommorris.org/photo.png")
477+
assert_equal(result["items"][3]["properties"]["photo"][0],
478+
"http://tommorris.org/photo.png")
479+
assert_equal(result["items"][4]["properties"]["photo"][0],
480+
"http://tommorris.org/photo.png")
477481
# src="" is relative to the base url
478-
assert_equal(result["items"][5]["properties"]["photo"][0],
482+
assert_equal(result["items"][6]["properties"]["photo"][0],
479483
"http://bar.org")
480484

481485

0 commit comments

Comments
 (0)