Skip to content

Commit 9a84d56

Browse files
authored
Merge pull request #106 from kartikprabhu/master
new version 1.1.1 - looks good to me
2 parents dda3a59 + 71997eb commit 9a84d56

35 files changed

+1387
-263
lines changed

CHANGELOG.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,26 @@
11
# Change Log
22
All notable changes to this project will be documented in this file.
33

4+
## 1.1.1 - 2018-06-15
5+
6+
- streamline backcompat to use JSON only.
7+
- fix multiple mf1 root rel-tag parsing
8+
- correct url and photo for hreview.
9+
- add rules for nested hreview. update backcompat to use multiple matches in old properties.
10+
- fix `rel-tag` to `p-category` conversion so that other classes are not lost.
11+
- use original authored html for `e-*` parsing in backcompat
12+
- make classes and rels into unordered (alphabetically ordered) deduped arrays.
13+
- only use class names for mf2 which follow the naming rules
14+
- fix `parse` method to use default html parser.
15+
- always use the first value for attributes for rels.
16+
- correct AM/PM conversion in datetime value class pattern.
17+
- add ordinal date parsing to datetimes value class pattern. ordinal date is normalised to YYYY-MM-DD
18+
- remove hack for html tag classes since that is fixed in new BS
19+
- better whitespace algorithm for `name` and `html.value` parsing
20+
- experimental flag for including `alt` in `u-photo` parsing
21+
- make a copy of the BeautifulSoup given by user to work on for parsing to prevent changes to original doc
22+
- bump version to 1.1.1
23+
424
## 1.1.0 - 2018-03-16
525

626
- bump version to 1.1.0 since it is a "major" change

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ Filter by microformat type
5454

5555
p.to_dict(filter_by_type="h-entry")
5656
p.to_json(filter_by_type="h-entry")
57+
58+
Experimental features
59+
---------------------
60+
- pass the optional argument `img_with_alt=True` to either the `Parser` object or to the `parse` method to enable parsing of the `alt` attribute of `<img>` tags according to [issue: image alt text is lost during parsing](https://github.com/microformats/microformats2-parsing/issues/2). By default this is `False` to be backwards compatible.
5761

5862
Frontends
5963
-------------
@@ -68,3 +72,4 @@ Contributions
6872
We welcome contributions and bug reports via Github, and on the microformats wiki.
6973

7074
We try to follow the [IndieWebCamp code of conduct](http://indiewebcamp.com/code-of-conduct). Please be respectful of other contributors, and forge a spirit of positive co-operation without discrimination or disrespect.
75+

mf2py/backcompat-rules/hentry.json

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,13 @@
3535
"longitude": [
3636
"p-longitude"
3737
]
38+
},
39+
"rels": {
40+
"bookmark": [
41+
"u-url"
42+
],
43+
"tag": [
44+
"p-category"
45+
]
3846
}
39-
}
47+
}

mf2py/backcompat-rules/hfeed.json

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,10 @@
1818
"title": [
1919
"p-name"
2020
]
21+
},
22+
"rels": {
23+
"tag": [
24+
"p-category"
25+
]
2126
}
22-
}
27+
}

mf2py/backcompat-rules/hproduct.json

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,10 @@
2626
],
2727
"review": [
2828
"p-review",
29-
"h-review",
30-
"e-description"
29+
"h-review"
3130
],
3231
"fn": [
3332
"p-name"
3433
]
3534
}
36-
}
35+
}

mf2py/backcompat-rules/hrecipe.json

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,14 @@
3030
],
3131
"ingredient": [
3232
"p-ingredient"
33+
],
34+
"category": [
35+
"p-category"
36+
]
37+
},
38+
"rels": {
39+
"tag": [
40+
"p-category"
3341
]
3442
}
35-
}
43+
}

mf2py/backcompat-rules/hreview.json

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,13 @@
1717
"h-card"
1818
],
1919
"url": [
20+
"p-item",
21+
"h-item",
2022
"u-url"
2123
],
2224
"photo": [
25+
"p-item",
26+
"h-item",
2327
"u-photo"
2428
],
2529
"best": [
@@ -35,6 +39,26 @@
3539
],
3640
"summary": [
3741
"p-name"
42+
],
43+
"item vcard": [
44+
"p-item",
45+
"vcard"
46+
],
47+
"item vevent": [
48+
"p-item",
49+
"vevent"
50+
],
51+
"item hproduct": [
52+
"p-item",
53+
"hproduct"
54+
]
55+
},
56+
"rels": {
57+
"self bookmark": [
58+
"u-url"
59+
],
60+
"tag": [
61+
"p-category"
3862
]
3963
}
40-
}
64+
}

mf2py/backcompat-rules/vcard.json

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,12 @@
9797
],
9898
"organization-name": [
9999
"p-organization-name"
100+
],
101+
"title": [
102+
"p-job-title"
103+
],
104+
"role": [
105+
"p-role"
100106
]
101107
}
102-
}
108+
}

mf2py/backcompat.py

Lines changed: 73 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
"""
55

66
from __future__ import unicode_literals, print_function
7-
from .dom_helpers import get_descendents
7+
from .dom_helpers import get_children
8+
from .mf_helpers import unordered_list
89
from . import mf2_classes
910
import bs4
1011
import copy
@@ -18,11 +19,8 @@
1819
else:
1920
from urllib.parse import unquote
2021

21-
# Classic Root Classname map
22-
CLASSIC_ROOT_MAP = {}
23-
24-
# Classic Root properties map
25-
CLASSIC_PROPERTY_MAP = {}
22+
# Classic map
23+
_CLASSIC_MAP = {}
2624

2725
# populate backcompat rules from JSON files
2826

@@ -34,86 +32,93 @@
3432
with codecs.open(file_path, 'r', 'utf-8') as f:
3533
rules = json.load(f)
3634

37-
CLASSIC_ROOT_MAP[root] = rules['type'][0]
38-
CLASSIC_PROPERTY_MAP[root] = rules['properties']
39-
40-
41-
42-
def root(classes):
43-
"""get all backcompat root classnames
44-
"""
45-
return [c for c in classes if c in CLASSIC_ROOT_MAP]
35+
_CLASSIC_MAP[root] = rules
4636

4737

48-
def make_classes_rule(old_class, new_classes):
38+
def _make_classes_rule(old_classes, new_classes):
4939
"""Builds a rule for augmenting an mf1 class with its mf2
5040
equivalent(s).
5141
"""
5242
def f(child, **kwargs):
43+
child_original = child.original or copy.copy(child)
5344
child_classes = child.get('class', [])
54-
if old_class in child_classes:
55-
child_classes += [c for c in new_classes
56-
if c not in child_classes]
45+
if all(cl in child_classes for cl in old_classes):
46+
child_classes.extend([cl for cl in new_classes if cl not in child_classes])
5747
child['class'] = child_classes
58-
return f
59-
60-
61-
# The RULES map has a list of rules for each root class type.
62-
# We'll build the vast majority of it from the CLASSIC_PROPERTY_MAP
63-
RULES = dict(
64-
(old_root, [make_classes_rule(old_class, new_classes)
65-
for old_class, new_classes in properties.items()])
66-
for old_root, properties in CLASSIC_PROPERTY_MAP.items())
67-
68-
69-
def rel_bookmark_to_url_rule(child, **kwargs):
70-
"""rel=bookmark gets augmented with class="u-url
71-
"""
72-
child_classes = child.get('class', [])
73-
if ('bookmark' in child.get('rel', [])
74-
and 'u-url' not in child_classes):
75-
child_classes.append('u-url')
76-
child['class'] = child_classes
7748

49+
# if any new class is e-* attach original to parse originally authored HTML
50+
if mf2_classes.embedded(child_classes) and child.original is None:
51+
child.original = child_original
52+
return f
7853

79-
def rel_tag_to_category_rule(child, **kwargs):
54+
def _rel_tag_to_category_rule(child, html_parser, **kwargs):
8055
"""rel=tag converts to p-category using a special transformation (the
81-
category becomes the tag href's last path segment). This rule adds a new
82-
data tag so that
83-
<a rel="tag" href="http://example.com/tags/cat"></a> gets augmented with
56+
category becomes the tag href's last path segment). This rule adds a new data tag so that
57+
<a rel="tag" href="http://example.com/tags/cat"></a> gets replaced with
8458
<data class="p-category" value="cat"></data>
8559
"""
60+
61+
href = child.get('href', '')
8662
rels = child.get('rel', [])
87-
classes = child.get('class', [])
88-
if ('tag' in rels and child.get('href')
89-
and 'p-category' not in classes
90-
and 'u-category' not in classes):
91-
segments = [seg for seg in child.get('href').split('/') if seg]
63+
if 'tag' in rels and href:
64+
segments = [seg for seg in href.split('/') if seg]
9265
if segments:
93-
data = bs4.BeautifulSoup('<data></data>').data
94-
# use mf1 class here so it doesn't get removed later
95-
data['class'] = ['category']
66+
if html_parser:
67+
soup = bs4.BeautifulSoup('', features=html_parser)
68+
else:
69+
soup = bs4.BeautifulSoup('')
70+
71+
data = soup.new_tag('data')
72+
# this does not use what's given in the JSON
73+
# but that is not a problem currently
74+
# use mf1 class so it doesn't get removed later
75+
data['class'] = ['p-category']
9676
data['value'] = unquote(segments[-1])
97-
child.parent.append(data)
77+
child.insert_before(data)
78+
# remove tag from rels to avoid repeat
79+
child['rel'] = [r for r in rels if r != 'tag']
9880

9981

100-
# Augment with special rules
101-
RULES['hentry'] += [
102-
rel_bookmark_to_url_rule,
103-
rel_tag_to_category_rule,
104-
]
82+
def _make_rels_rule(old_rels, new_classes, html_parser):
83+
"""Builds a rule for augmenting an mf1 rel with its mf2 class equivalent(s).
84+
"""
85+
86+
# need to special case rel=tag as it operates differently
87+
88+
def f(child, **kwargs):
89+
child_rels = child.get('rel', [])
90+
child_classes = child.get('class', [])
91+
if all(r in child_rels for r in old_rels):
92+
if 'tag' in old_rels:
93+
_rel_tag_to_category_rule(child, html_parser, **kwargs)
94+
else:
95+
child_classes.extend([cl for cl in new_classes if cl not in child_classes])
96+
child['class'] = child_classes
97+
return f
10598

106-
def apply_rules(el):
107-
"""add modern classnames for older mf1 classnames
10899

109-
returns a copy of el and does not modify the original
100+
def _get_rules(old_root, html_parser):
101+
""" for given mf1 root get the rules as a list of functions to act on children """
102+
103+
class_rules = [_make_classes_rule(old_classes.split(), new_classes)
104+
for old_classes, new_classes in _CLASSIC_MAP[old_root].get('properties', {}).items()]
105+
rel_rules = [_make_rels_rule(old_rels.split(), new_classes, html_parser)
106+
for old_rels, new_classes in _CLASSIC_MAP[old_root].get('rels', {}).items()]
107+
108+
return class_rules + rel_rules
109+
110+
def root(classes):
111+
"""get all backcompat root classnames
110112
"""
113+
return unordered_list([c for c in classes if c in _CLASSIC_MAP])
111114

112-
el_copy = copy.copy(el)
115+
def apply_rules(el, html_parser):
116+
"""add modern classnames for older mf1 classnames
117+
"""
113118

114119
def apply_prop_rules_to_children(parent, rules):
115120

116-
for child in (c for c in parent.children if isinstance(c, bs4.Tag)):
121+
for child in get_children(parent):
117122
classes = child.get('class',[])
118123
# find existing mf2 properties if any and delete them
119124
mf2_props = mf2_classes.property_classes(classes)
@@ -129,19 +134,19 @@ def apply_prop_rules_to_children(parent, rules):
129134

130135

131136
# add mf2 root equivalent
132-
classes = el_copy.get('class', [])
137+
classes = el.get('class', [])
133138
old_roots = root(classes)
134139
for old_root in old_roots:
135-
new_root = CLASSIC_ROOT_MAP[old_root]
136-
if new_root not in classes:
137-
el_copy['class'].append(new_root)
140+
new_roots = _CLASSIC_MAP[old_root]['type']
141+
classes.extend(new_roots)
142+
el['class'] = classes
138143

139144

140145
# add mf2 prop equivalent to descendents and remove existing mf2 props
141146
rules = []
142147
for old_root in old_roots:
143-
rules.extend(RULES.get(old_root,[]))
148+
rules.extend(_get_rules(old_root, html_parser))
144149

145-
apply_prop_rules_to_children(el_copy, rules)
150+
apply_prop_rules_to_children(el, rules)
146151

147-
return el_copy
152+
return el

0 commit comments

Comments
 (0)