4
4
"""
5
5
6
6
from __future__ import unicode_literals , print_function
7
- from .dom_helpers import get_descendents
7
+ from .dom_helpers import get_children
8
+ from .mf_helpers import unordered_list
8
9
from . import mf2_classes
9
10
import bs4
10
11
import copy
18
19
else :
19
20
from urllib .parse import unquote
20
21
21
- # Classic Root Classname map
22
- CLASSIC_ROOT_MAP = {}
23
-
24
- # Classic Root properties map
25
- CLASSIC_PROPERTY_MAP = {}
22
+ # Classic map
23
+ _CLASSIC_MAP = {}
26
24
27
25
# populate backcompat rules from JSON files
28
26
34
32
with codecs .open (file_path , 'r' , 'utf-8' ) as f :
35
33
rules = json .load (f )
36
34
37
- CLASSIC_ROOT_MAP [root ] = rules ['type' ][0 ]
38
- CLASSIC_PROPERTY_MAP [root ] = rules ['properties' ]
39
-
40
-
41
-
42
- def root (classes ):
43
- """get all backcompat root classnames
44
- """
45
- return [c for c in classes if c in CLASSIC_ROOT_MAP ]
35
+ _CLASSIC_MAP [root ] = rules
46
36
47
37
48
- def make_classes_rule ( old_class , new_classes ):
38
+ def _make_classes_rule ( old_classes , new_classes ):
49
39
"""Builds a rule for augmenting an mf1 class with its mf2
50
40
equivalent(s).
51
41
"""
52
42
def f (child , ** kwargs ):
43
+ child_original = child .original or copy .copy (child )
53
44
child_classes = child .get ('class' , [])
54
- if old_class in child_classes :
55
- child_classes += [c for c in new_classes
56
- if c not in child_classes ]
45
+ if all (cl in child_classes for cl in old_classes ):
46
+ child_classes .extend ([cl for cl in new_classes if cl not in child_classes ])
57
47
child ['class' ] = child_classes
58
- return f
59
-
60
-
61
- # The RULES map has a list of rules for each root class type.
62
- # We'll build the vast majority of it from the CLASSIC_PROPERTY_MAP
63
- RULES = dict (
64
- (old_root , [make_classes_rule (old_class , new_classes )
65
- for old_class , new_classes in properties .items ()])
66
- for old_root , properties in CLASSIC_PROPERTY_MAP .items ())
67
-
68
-
69
- def rel_bookmark_to_url_rule (child , ** kwargs ):
70
- """rel=bookmark gets augmented with class="u-url
71
- """
72
- child_classes = child .get ('class' , [])
73
- if ('bookmark' in child .get ('rel' , [])
74
- and 'u-url' not in child_classes ):
75
- child_classes .append ('u-url' )
76
- child ['class' ] = child_classes
77
48
49
+ # if any new class is e-* attach original to parse originally authored HTML
50
+ if mf2_classes .embedded (child_classes ) and child .original is None :
51
+ child .original = child_original
52
+ return f
78
53
79
- def rel_tag_to_category_rule (child , ** kwargs ):
54
+ def _rel_tag_to_category_rule (child , html_parser , ** kwargs ):
80
55
"""rel=tag converts to p-category using a special transformation (the
81
- category becomes the tag href's last path segment). This rule adds a new
82
- data tag so that
83
- <a rel="tag" href="http://example.com/tags/cat"></a> gets augmented with
56
+ category becomes the tag href's last path segment). This rule adds a new data tag so that
57
+ <a rel="tag" href="http://example.com/tags/cat"></a> gets replaced with
84
58
<data class="p-category" value="cat"></data>
85
59
"""
60
+
61
+ href = child .get ('href' , '' )
86
62
rels = child .get ('rel' , [])
87
- classes = child .get ('class' , [])
88
- if ('tag' in rels and child .get ('href' )
89
- and 'p-category' not in classes
90
- and 'u-category' not in classes ):
91
- segments = [seg for seg in child .get ('href' ).split ('/' ) if seg ]
63
+ if 'tag' in rels and href :
64
+ segments = [seg for seg in href .split ('/' ) if seg ]
92
65
if segments :
93
- data = bs4 .BeautifulSoup ('<data></data>' ).data
94
- # use mf1 class here so it doesn't get removed later
95
- data ['class' ] = ['category' ]
66
+ if html_parser :
67
+ soup = bs4 .BeautifulSoup ('' , features = html_parser )
68
+ else :
69
+ soup = bs4 .BeautifulSoup ('' )
70
+
71
+ data = soup .new_tag ('data' )
72
+ # this does not use what's given in the JSON
73
+ # but that is not a problem currently
74
+ # use mf1 class so it doesn't get removed later
75
+ data ['class' ] = ['p-category' ]
96
76
data ['value' ] = unquote (segments [- 1 ])
97
- child .parent .append (data )
77
+ child .insert_before (data )
78
+ # remove tag from rels to avoid repeat
79
+ child ['rel' ] = [r for r in rels if r != 'tag' ]
98
80
99
81
100
- # Augment with special rules
101
- RULES ['hentry' ] += [
102
- rel_bookmark_to_url_rule ,
103
- rel_tag_to_category_rule ,
104
- ]
82
+ def _make_rels_rule (old_rels , new_classes , html_parser ):
83
+ """Builds a rule for augmenting an mf1 rel with its mf2 class equivalent(s).
84
+ """
85
+
86
+ # need to special case rel=tag as it operates differently
87
+
88
+ def f (child , ** kwargs ):
89
+ child_rels = child .get ('rel' , [])
90
+ child_classes = child .get ('class' , [])
91
+ if all (r in child_rels for r in old_rels ):
92
+ if 'tag' in old_rels :
93
+ _rel_tag_to_category_rule (child , html_parser , ** kwargs )
94
+ else :
95
+ child_classes .extend ([cl for cl in new_classes if cl not in child_classes ])
96
+ child ['class' ] = child_classes
97
+ return f
105
98
106
- def apply_rules (el ):
107
- """add modern classnames for older mf1 classnames
108
99
109
- returns a copy of el and does not modify the original
100
+ def _get_rules (old_root , html_parser ):
101
+ """ for given mf1 root get the rules as a list of functions to act on children """
102
+
103
+ class_rules = [_make_classes_rule (old_classes .split (), new_classes )
104
+ for old_classes , new_classes in _CLASSIC_MAP [old_root ].get ('properties' , {}).items ()]
105
+ rel_rules = [_make_rels_rule (old_rels .split (), new_classes , html_parser )
106
+ for old_rels , new_classes in _CLASSIC_MAP [old_root ].get ('rels' , {}).items ()]
107
+
108
+ return class_rules + rel_rules
109
+
110
+ def root (classes ):
111
+ """get all backcompat root classnames
110
112
"""
113
+ return unordered_list ([c for c in classes if c in _CLASSIC_MAP ])
111
114
112
- el_copy = copy .copy (el )
115
+ def apply_rules (el , html_parser ):
116
+ """add modern classnames for older mf1 classnames
117
+ """
113
118
114
119
def apply_prop_rules_to_children (parent , rules ):
115
120
116
- for child in ( c for c in parent . children if isinstance ( c , bs4 . Tag ) ):
121
+ for child in get_children ( parent ):
117
122
classes = child .get ('class' ,[])
118
123
# find existing mf2 properties if any and delete them
119
124
mf2_props = mf2_classes .property_classes (classes )
@@ -129,19 +134,19 @@ def apply_prop_rules_to_children(parent, rules):
129
134
130
135
131
136
# add mf2 root equivalent
132
- classes = el_copy .get ('class' , [])
137
+ classes = el .get ('class' , [])
133
138
old_roots = root (classes )
134
139
for old_root in old_roots :
135
- new_root = CLASSIC_ROOT_MAP [old_root ]
136
- if new_root not in classes :
137
- el_copy ['class' ]. append ( new_root )
140
+ new_roots = _CLASSIC_MAP [old_root ][ 'type' ]
141
+ classes . extend ( new_roots )
142
+ el ['class' ] = classes
138
143
139
144
140
145
# add mf2 prop equivalent to descendents and remove existing mf2 props
141
146
rules = []
142
147
for old_root in old_roots :
143
- rules .extend (RULES . get (old_root ,[] ))
148
+ rules .extend (_get_rules (old_root , html_parser ))
144
149
145
- apply_prop_rules_to_children (el_copy , rules )
150
+ apply_prop_rules_to_children (el , rules )
146
151
147
- return el_copy
152
+ return el
0 commit comments