Skip to content

Commit bc01391

Browse files
TLINDENThomas von Dein
andauthored
Fix ad condition parsing (#118)
* fix #117: use details slice and pre-set to properly extract condition * also added the type part of the detail content (original de: "Art") --------- Co-authored-by: Thomas von Dein <tom@vondein.org>
1 parent cd3d00a commit bc01391

File tree

10 files changed

+40
-14
lines changed

10 files changed

+40
-14
lines changed

ad.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ type Ad struct {
3131
Title string `goquery:"h1"`
3232
Slug string
3333
ID string
34-
Condition string `goquery:".addetailslist--detail--value,text"`
34+
Details []string `goquery:".addetailslist--detail--value,text"`
35+
Condition string // post processed from details
36+
Type string // post processed from details
3537
Category string
3638
CategoryTree []string `goquery:".breadcrump-link,text"`
3739
Price string `goquery:"h2#viewad-price"`
@@ -56,6 +58,9 @@ func (ad *Ad) LogValue() slog.Value {
5658
)
5759
}
5860

61+
// static set of conditions available, used for post processing details
62+
var CONDITIONS = []string{"Neu", "Gut", "Sehr Gut", "In Ordnung"}
63+
5964
// check for completeness. I erected these fields to be mandatory
6065
// (though I really don't know if they really are). I consider images
6166
// and meta optional. So, if either of the checked fields here is

config.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,17 @@ import (
3434
)
3535

3636
const (
37-
VERSION string = "0.3.13"
37+
VERSION string = "0.3.14"
3838
Baseuri string = "https://www.kleinanzeigen.de"
3939
Listuri string = "/s-bestandsliste.html"
4040
Defaultdir string = "."
4141

4242
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
43-
"Category: {{.Category}}\nCondition: {{.Condition}}\n" +
43+
"Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\n" +
4444
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
4545

4646
DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" +
47-
"Category: {{.Category}}\r\nCondition: {{.Condition}}\r\n" +
47+
"Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\n" +
4848
"Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n"
4949

5050
DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +

example.conf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ outdir = "test"
2323
#Id: {{.Id}}
2424
#Category: {{.Category}}
2525
#Condition: {{.Condition}}
26+
#Type: {{.Type}}
2627
#Created: {{.Created}}
2728

2829
#{{.Text}}

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ require (
2323

2424
require (
2525
github.com/PuerkitoBio/goquery v1.5.1 // indirect
26+
github.com/alecthomas/repr v0.4.0 // indirect
2627
github.com/andybalholm/cascadia v1.1.0 // indirect
2728
github.com/fatih/color v1.16.0 // indirect
2829
github.com/fsnotify/fsnotify v1.7.0 // indirect

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno=
33
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
44
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
55
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
6+
github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
7+
github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
68
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
79
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
810
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=

kleingebaeck.1

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@
133133
.\" ========================================================================
134134
.\"
135135
.IX Title "KLEINGEBAECK 1"
136-
.TH KLEINGEBAECK 1 "2024-02-10" "1" "User Commands"
136+
.TH KLEINGEBAECK 1 "2025-02-06" "1" "User Commands"
137137
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
138138
.\" way too many mistakes in technical documents.
139139
.if n .ad l
@@ -174,7 +174,7 @@ well. We use \s-1TOML\s0 as our configuration language. See
174174
.PP
175175
Format is pretty simple:
176176
.PP
177-
.Vb 11
177+
.Vb 12
178178
\& user = 1010101
179179
\& loglevel = verbose
180180
\& outdir = "test"
@@ -185,6 +185,7 @@ Format is pretty simple:
185185
\& Id: {{.ID}}
186186
\& Category: {{.Category}}
187187
\& Condition: {{.Condition}}
188+
\& Type: {{.Type}}
188189
\& Created: {{.Created}}
189190
\&
190191
\& {{.Text}}
@@ -267,12 +268,13 @@ variables as the ad name template above.
267268
.PP
268269
This is the default template:
269270
.PP
270-
.Vb 7
271+
.Vb 8
271272
\& Title: {{.Title}}
272273
\& Price: {{.Price}}
273274
\& Id: {{.ID}}
274275
\& Category: {{.Category}}
275276
\& Condition: {{.Condition}}
277+
\& Type: {{.Type}}
276278
\& Created: {{.Created}}
277279
\& Expire: {{.Expire}}
278280
\&

kleingebaeck.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ CONFIGURATION
4646
Id: {{.ID}}
4747
Category: {{.Category}}
4848
Condition: {{.Condition}}
49+
Type: {{.Type}}
4950
Created: {{.Created}}
5051
5152
{{.Text}}
@@ -111,6 +112,7 @@ TEMPLATES
111112
Id: {{.ID}}
112113
Category: {{.Category}}
113114
Condition: {{.Condition}}
115+
Type: {{.Type}}
114116
Created: {{.Created}}
115117
Expire: {{.Expire}}
116118

kleingebaeck.pod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ Format is pretty simple:
4646
Id: {{.ID}}
4747
Category: {{.Category}}
4848
Condition: {{.Condition}}
49+
Type: {{.Type}}
4950
Created: {{.Created}}
5051

5152
{{.Text}}
@@ -131,6 +132,7 @@ This is the default template:
131132
Id: {{.ID}}
132133
Category: {{.Category}}
133134
Condition: {{.Condition}}
135+
Type: {{.Type}}
134136
Created: {{.Created}}
135137
Expire: {{.Expire}}
136138

main_test.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -256,14 +256,15 @@ type AdConfig struct {
256256
Images []string // files in ./t/
257257
}
258258

259+
// used to generate ad listings returned by httpmock using templates
259260
var adsrc = []AdConfig{
260261
{
261262
Title: "First Ad",
262263
ID: "1", Price: "5€",
263264
Category: "Klimbim",
264265
Text: "Thing to sale",
265266
Slug: "first-ad",
266-
Condition: "works",
267+
Condition: "Sehr Gut",
267268
Created: "Yesterday",
268269
Images: []string{"t/1.jpg", "t/2.jpg"},
269270
},
@@ -273,7 +274,7 @@ var adsrc = []AdConfig{
273274
Category: "Kram",
274275
Text: "Thing to sale",
275276
Slug: "second-ad",
276-
Condition: "works",
277+
Condition: "Gut",
277278
Created: "Yesterday",
278279
Images: []string{"t/1.jpg", "t/2.jpg"},
279280
},
@@ -284,7 +285,7 @@ var adsrc = []AdConfig{
284285
Category: "Kuddelmuddel",
285286
Text: "Thing to sale",
286287
Slug: "third-ad",
287-
Condition: "works",
288+
Condition: "In Ordnung",
288289
Created: "Yesterday",
289290
Images: []string{"t/1.jpg", "t/2.jpg"},
290291
},
@@ -295,7 +296,7 @@ var adsrc = []AdConfig{
295296
Category: "Krempel",
296297
Text: "Thing to sale",
297298
Slug: "fourth-ad",
298-
Condition: "works",
299+
Condition: "Neu",
299300
Created: "Yesterday",
300301
Images: []string{"t/1.jpg", "t/2.jpg"},
301302
},
@@ -306,7 +307,7 @@ var adsrc = []AdConfig{
306307
Category: "Kladderadatsch",
307308
Text: "Thing to sale",
308309
Slug: "fifth-ad",
309-
Condition: "works",
310+
Condition: "Sehr Gut",
310311
Created: "Yesterday",
311312
Images: []string{"t/1.jpg", "t/2.jpg"},
312313
},
@@ -317,7 +318,7 @@ var adsrc = []AdConfig{
317318
Category: "Klunker",
318319
Text: "Thing to sale",
319320
Slug: "sixth-ad",
320-
Condition: "works",
321+
Condition: "Sehr Gut",
321322
Created: "Yesterday",
322323
Images: []string{"t/1.jpg", "t/2.jpg"},
323324
},
@@ -328,7 +329,7 @@ var adsrc = []AdConfig{
328329
Category: "Klunker",
329330
Text: "Thing to sale",
330331
Slug: "seventh-ad",
331-
Condition: "works",
332+
Condition: "Sehr Gut",
332333
Created: "Yesterday",
333334
Images: []string{"t/1.png", "t/1.gif", "t/1.webp", "t/1.jpg"},
334335
},

scrape.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"fmt"
2323
"log/slog"
2424
"path/filepath"
25+
"slices"
2526
"strconv"
2627
"strings"
2728
"time"
@@ -124,6 +125,15 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
124125
return fmt.Errorf("could not extract ad data from page, got empty struct")
125126
}
126127

128+
for _, detail := range advertisement.Details {
129+
if slices.Contains(CONDITIONS, detail) {
130+
advertisement.Condition = detail
131+
} else {
132+
advertisement.Type = detail
133+
}
134+
135+
}
136+
127137
advertisement.CalculateExpire()
128138

129139
// prepare ad dir name

0 commit comments

Comments
 (0)