@@ -3,16 +3,11 @@ import std/httpclient
3
3
import std/ os
4
4
import std/ sequtils
5
5
import std/ strutils
6
- import std/ sugar
7
- import std/ osproc
8
- import std/ streams
9
6
import std/ hashes
10
7
# import chame/minidom
11
- import std/ htmlparser
12
8
import std/ xmltree
13
9
import std/ strtabs
14
-
15
- import pretty
10
+ import pkg/ htmlparser
16
11
17
12
type HtmlLoader* = ref object of Agent
18
13
@@ -119,7 +114,7 @@ proc loadPage*(loader: HtmlLoader, url: string) {.slot.} =
119
114
if " submission" in getAttr(elem, " class" ):
120
115
if sub != nil : subs.add(move sub)
121
116
sub = TableSubmission(subTr: elem)
122
-
117
+
123
118
if sub != nil and elem.attrs == nil :
124
119
sub.subTextTr = elem
125
120
@@ -166,7 +161,7 @@ proc loadPage*(loader: HtmlLoader, url: string) {.slot.} =
166
161
submission.subText.comments = parseInt(txts[0 ])
167
162
168
163
submissions.add(submission)
169
-
164
+
170
165
# for sub in submissions:
171
166
# echo $sub
172
167
emit loader.htmlDone(submissions)
@@ -192,7 +187,7 @@ proc loadPageMarkdown*(loader: HtmlLoader, url: string) {.slot.} =
192
187
else :
193
188
let client = newHttpClient(timeout= 1_000 )
194
189
let res = client.get(url)
195
-
190
+
196
191
197
192
when true :
198
193
let document = parseHTML(res.body)
@@ -208,11 +203,11 @@ proc loadPageMarkdown*(loader: HtmlLoader, url: string) {.slot.} =
208
203
" html2markdown" ,
209
204
options= {poUsePath, poStdErrToStdOut}
210
205
)
211
-
206
+
212
207
# Write HTML to stdin of html2markdown
213
208
process.inputStream.write(res.body)
214
209
process.inputStream.close()
215
-
210
+
216
211
# Read markdown from stdout
217
212
markdown = process.outputStream.readAll()
218
213
process.close()
@@ -221,7 +216,7 @@ proc loadPageMarkdown*(loader: HtmlLoader, url: string) {.slot.} =
221
216
echo " error running html2markdown: " , $ err.getStackTrace()
222
217
markdown = " error running html2markdown:\n " & $ err.msg
223
218
markdown.add " try installing html2markdown: https://github.com/JohannesKaufmann/html-to-markdown"
224
-
219
+
225
220
when isMainModule :
226
221
echo " markdown:\n " , markdown
227
222
@@ -241,4 +236,3 @@ when isMainModule:
241
236
242
237
let m = HtmlLoader()
243
238
m.loadPageMarkdown(" https://example.com" )
244
-
0 commit comments