Skip to content

Commit 6a1b157

Browse files
committed
feat: Scrape any KitchenOwl instance
Closes #483
1 parent 79b82dd commit 6a1b157

File tree

1 file changed

+57
-11
lines changed

1 file changed

+57
-11
lines changed

backend/app/service/recipe_scraping.py

Lines changed: 57 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import hashlib
12
import re
23
from recipe_scrapers import scrape_html
34
from recipe_scrapers._exceptions import SchemaOrgException
@@ -9,39 +10,75 @@
910
from app.models import Recipe, Item, Household
1011

1112

12-
def scrapePublic(url: str, household: Household) -> dict | None:
13+
def scrapePublic(url: str, html: str, household: Household) -> dict | None:
1314
try:
14-
scraper = scrape_html(None, url, online=True, supported_only=False, wild_mode=True)
15+
scraper = scrape_html(html, url, supported_only=False, wild_mode=True)
1516
except:
1617
return None
1718
recipe = Recipe()
1819
recipe.name = scraper.title()
1920
try:
2021
recipe.time = int(scraper.total_time())
21-
except (NotImplementedError, ValueError, TypeError, AttributeError, SchemaOrgException):
22+
except (
23+
NotImplementedError,
24+
ValueError,
25+
TypeError,
26+
AttributeError,
27+
SchemaOrgException,
28+
):
2229
pass
2330
try:
2431
recipe.cook_time = int(scraper.cook_time())
25-
except (NotImplementedError, ValueError, TypeError, AttributeError, SchemaOrgException):
32+
except (
33+
NotImplementedError,
34+
ValueError,
35+
TypeError,
36+
AttributeError,
37+
SchemaOrgException,
38+
):
2639
pass
2740
try:
2841
recipe.prep_time = int(scraper.prep_time())
29-
except (NotImplementedError, ValueError, TypeError, AttributeError, SchemaOrgException):
42+
except (
43+
NotImplementedError,
44+
ValueError,
45+
TypeError,
46+
AttributeError,
47+
SchemaOrgException,
48+
):
3049
pass
3150
try:
3251
yields = re.search(r"\d*", scraper.yields())
3352
if yields:
3453
recipe.yields = int(yields.group())
35-
except (NotImplementedError, ValueError, TypeError, AttributeError, SchemaOrgException):
54+
except (
55+
NotImplementedError,
56+
ValueError,
57+
TypeError,
58+
AttributeError,
59+
SchemaOrgException,
60+
):
3661
pass
3762
description = ""
3863
try:
3964
description = scraper.description() + "\n\n"
40-
except (NotImplementedError, ValueError, TypeError, AttributeError, SchemaOrgException):
65+
except (
66+
NotImplementedError,
67+
ValueError,
68+
TypeError,
69+
AttributeError,
70+
SchemaOrgException,
71+
):
4172
pass
4273
try:
4374
description = description + scraper.instructions()
44-
except (NotImplementedError, ValueError, TypeError, AttributeError, SchemaOrgException):
75+
except (
76+
NotImplementedError,
77+
ValueError,
78+
TypeError,
79+
AttributeError,
80+
SchemaOrgException,
81+
):
4582
pass
4683
recipe.description = description
4784
recipe.photo = scraper.image()
@@ -114,11 +151,20 @@ def scrape(url: str, household: Household) -> dict | None:
114151
return scrapeLocal(int(localMatch.group(2)), household)
115152

116153
kitchenowlMatch = re.fullmatch(
117-
r"(https:\/\/app\.kitchenowl\.org)\/recipe\/(\d+)", url
154+
r"(https?:\/\/app\.kitchenowl\.org|.+)\/recipe\/(\d+)", url
118155
)
119-
if kitchenowlMatch:
156+
if kitchenowlMatch and url.startswith("https://app.kitchenowl.org/"):
157+
return scrapeKitchenOwl(
158+
url, "https://app.kitchenowl.org/api", int(kitchenowlMatch.group(2))
159+
)
160+
161+
res = requests.get(url=url)
162+
if res.status_code != requests.codes.ok:
163+
return None
164+
165+
if kitchenowlMatch and hashlib.sha256(res.text.encode()).hexdigest() == "3fc2629051e92fa54c26cf5e44efac1014eb89a2eb46dd644dae4f3db5cd3eaa":
120166
return scrapeKitchenOwl(
121167
url, kitchenowlMatch.group(1) + "/api", int(kitchenowlMatch.group(2))
122168
)
123169

124-
return scrapePublic(url, household)
170+
return scrapePublic(url, res.text, household)

0 commit comments

Comments
 (0)