|
| 1 | +import hashlib |
1 | 2 | import re
|
2 | 3 | from recipe_scrapers import scrape_html
|
3 | 4 | from recipe_scrapers._exceptions import SchemaOrgException
|
|
9 | 10 | from app.models import Recipe, Item, Household
|
10 | 11 |
|
11 | 12 |
|
12 |
| -def scrapePublic(url: str, household: Household) -> dict | None: |
| 13 | +def scrapePublic(url: str, html: str, household: Household) -> dict | None: |
13 | 14 | try:
|
14 |
| - scraper = scrape_html(None, url, online=True, supported_only=False, wild_mode=True) |
| 15 | + scraper = scrape_html(html, url, supported_only=False, wild_mode=True) |
15 | 16 | except:
|
16 | 17 | return None
|
17 | 18 | recipe = Recipe()
|
18 | 19 | recipe.name = scraper.title()
|
19 | 20 | try:
|
20 | 21 | recipe.time = int(scraper.total_time())
|
21 |
| - except (NotImplementedError, ValueError, TypeError, AttributeError, SchemaOrgException): |
| 22 | + except ( |
| 23 | + NotImplementedError, |
| 24 | + ValueError, |
| 25 | + TypeError, |
| 26 | + AttributeError, |
| 27 | + SchemaOrgException, |
| 28 | + ): |
22 | 29 | pass
|
23 | 30 | try:
|
24 | 31 | recipe.cook_time = int(scraper.cook_time())
|
25 |
| - except (NotImplementedError, ValueError, TypeError, AttributeError, SchemaOrgException): |
| 32 | + except ( |
| 33 | + NotImplementedError, |
| 34 | + ValueError, |
| 35 | + TypeError, |
| 36 | + AttributeError, |
| 37 | + SchemaOrgException, |
| 38 | + ): |
26 | 39 | pass
|
27 | 40 | try:
|
28 | 41 | recipe.prep_time = int(scraper.prep_time())
|
29 |
| - except (NotImplementedError, ValueError, TypeError, AttributeError, SchemaOrgException): |
| 42 | + except ( |
| 43 | + NotImplementedError, |
| 44 | + ValueError, |
| 45 | + TypeError, |
| 46 | + AttributeError, |
| 47 | + SchemaOrgException, |
| 48 | + ): |
30 | 49 | pass
|
31 | 50 | try:
|
32 | 51 | yields = re.search(r"\d*", scraper.yields())
|
33 | 52 | if yields:
|
34 | 53 | recipe.yields = int(yields.group())
|
35 |
| - except (NotImplementedError, ValueError, TypeError, AttributeError, SchemaOrgException): |
| 54 | + except ( |
| 55 | + NotImplementedError, |
| 56 | + ValueError, |
| 57 | + TypeError, |
| 58 | + AttributeError, |
| 59 | + SchemaOrgException, |
| 60 | + ): |
36 | 61 | pass
|
37 | 62 | description = ""
|
38 | 63 | try:
|
39 | 64 | description = scraper.description() + "\n\n"
|
40 |
| - except (NotImplementedError, ValueError, TypeError, AttributeError, SchemaOrgException): |
| 65 | + except ( |
| 66 | + NotImplementedError, |
| 67 | + ValueError, |
| 68 | + TypeError, |
| 69 | + AttributeError, |
| 70 | + SchemaOrgException, |
| 71 | + ): |
41 | 72 | pass
|
42 | 73 | try:
|
43 | 74 | description = description + scraper.instructions()
|
44 |
| - except (NotImplementedError, ValueError, TypeError, AttributeError, SchemaOrgException): |
| 75 | + except ( |
| 76 | + NotImplementedError, |
| 77 | + ValueError, |
| 78 | + TypeError, |
| 79 | + AttributeError, |
| 80 | + SchemaOrgException, |
| 81 | + ): |
45 | 82 | pass
|
46 | 83 | recipe.description = description
|
47 | 84 | recipe.photo = scraper.image()
|
@@ -114,11 +151,20 @@ def scrape(url: str, household: Household) -> dict | None:
|
114 | 151 | return scrapeLocal(int(localMatch.group(2)), household)
|
115 | 152 |
|
116 | 153 | kitchenowlMatch = re.fullmatch(
|
117 |
| - r"(https:\/\/app\.kitchenowl\.org)\/recipe\/(\d+)", url |
| 154 | + r"(https?:\/\/app\.kitchenowl\.org|.+)\/recipe\/(\d+)", url |
118 | 155 | )
|
119 |
| - if kitchenowlMatch: |
| 156 | + if kitchenowlMatch and url.startswith("https://app.kitchenowl.org/"): |
| 157 | + return scrapeKitchenOwl( |
| 158 | + url, "https://app.kitchenowl.org/api", int(kitchenowlMatch.group(2)) |
| 159 | + ) |
| 160 | + |
| 161 | + res = requests.get(url=url) |
| 162 | + if res.status_code != requests.codes.ok: |
| 163 | + return None |
| 164 | + |
| 165 | + if kitchenowlMatch and hashlib.sha256(res.text.encode()).hexdigest() == "3fc2629051e92fa54c26cf5e44efac1014eb89a2eb46dd644dae4f3db5cd3eaa": |
120 | 166 | return scrapeKitchenOwl(
|
121 | 167 | url, kitchenowlMatch.group(1) + "/api", int(kitchenowlMatch.group(2))
|
122 | 168 | )
|
123 | 169 |
|
124 |
| - return scrapePublic(url, household) |
| 170 | + return scrapePublic(url, res.text, household) |
0 commit comments