Skip to content

Commit c41c7ae

Browse files
always quit driver and only get lidl sublinks with most value
1 parent afb1cda commit c41c7ae

File tree

2 files changed

+14
-8
lines changed

2 files changed

+14
-8
lines changed

src/main/kotlin/com/stefanbratanov/sofiasupermarketsapi/brochure/FantasticoBrochureDownloader.kt

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,9 @@ class FantasticoBrochureDownloader(@Value("\${fantastico.url}") private val url:
5454
val htmlDoc = getHtmlDocument(url)
5555

5656
val driver = ChromeDriver(options)
57-
5857
val waitDriver = WebDriverWait(driver, Duration.ofSeconds(30))
5958

60-
val brochures =
59+
return try {
6160
htmlDoc
6261
.select("div.brochure-container.first div.hold-options")
6362
.filter {
@@ -81,6 +80,7 @@ class FantasticoBrochureDownloader(@Value("\${fantastico.url}") private val url:
8180

8281
// loading the flipping book
8382
for (attempt in 1..3) try {
83+
log.info("Loading flipping book from {}", flippingBookUrl)
8484
driver.get(flippingBookUrl)
8585
break
8686
} catch (_: TimeoutException) {
@@ -110,10 +110,9 @@ class FantasticoBrochureDownloader(@Value("\${fantastico.url}") private val url:
110110

111111
Brochure(downloadPath, dateRange?.first, dateRange?.second)
112112
}
113-
114-
driver.quit()
115-
116-
return brochures
113+
} finally {
114+
driver.quit()
115+
}
117116
}
118117

119118
private fun extractDateRange(description: String?): Pair<LocalDate?, LocalDate?>? {

src/main/kotlin/com/stefanbratanov/sofiasupermarketsapi/links/LidlSublinksScraper.kt

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,21 @@ import org.springframework.stereotype.Component
1212
@Component
1313
class LidlSublinksScraper(@Value("\${lidl.base.url}") private val baseUrl: URL) : SublinksScraper {
1414

15+
private val sublinksToAccept =
16+
listOf("niska-tsena-visoko-kachestvo".toRegex(), "lidl-plus".toRegex())
17+
1518
override fun getSublinks(): List<URL> {
1619
log.info("Scraping {} for sublinks", baseUrl)
1720

1821
return getHtmlDocument(baseUrl)
1922
.select("li.AHeroStageItems__Item > a")
20-
.map {
23+
.mapNotNull {
2124
val href = it.attr("href")
22-
baseUrl.toURI().resolve(href).toURL()
25+
if (sublinksToAccept.none { rgx -> href.contains(rgx) }) {
26+
null
27+
} else {
28+
baseUrl.toURI().resolve(href).toURL()
29+
}
2330
}
2431
.distinct()
2532
}

0 commit comments

Comments
 (0)