Skip to content

Commit 5ee5665

Browse files
committed
Merge branch '1.0.x' into 1.1.x
2 parents ddb969e + 329b57b commit 5ee5665

File tree

3 files changed

+125
-2
lines changed

3 files changed

+125
-2
lines changed
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
from lxml import etree
2+
from tracardi.service.plugin.domain.register import Plugin, Spec, MetaData, Documentation, PortDoc, Form, FormGroup, \
3+
FormField, FormComponent
4+
from tracardi.service.plugin.domain.result import Result
5+
from tracardi.service.plugin.domain.config import PluginConfig
6+
from tracardi.service.plugin.runner import ActionRunner
7+
from pydantic import field_validator
8+
from tracardi.domain.profile import Profile
9+
from tracardi.service.tracardi_http_client import HttpClient
10+
11+
from bs4 import BeautifulSoup
12+
13+
class Configuration(PluginConfig):
14+
html: str
15+
method: str = "get_text"
16+
17+
@field_validator('html')
18+
@classmethod
19+
def html_must_not_be_empty(cls, value):
20+
if value.strip() == "":
21+
raise ValueError("HTML must not be empty.")
22+
return value
23+
24+
def validate(config: dict):
25+
return Configuration(**config)
26+
27+
class BeautifulSoupAction(ActionRunner):
28+
29+
config: Configuration
30+
31+
async def set_up(self, init):
32+
self.config = validate(init)
33+
34+
async def run(self, payload: dict, in_edge=None) -> Result:
35+
36+
dot = self._get_dot_accessor(payload)
37+
38+
try:
39+
40+
html=dot[self.config.html]
41+
soup = BeautifulSoup(html, 'html.parser')
42+
43+
text = ''
44+
45+
if self.config.method == "get_text":
46+
text = soup.get_text()
47+
else:
48+
raise ValueError(f"Unsupported method: {self.config.method}")
49+
50+
return Result(port='result', value={'text':text})
51+
52+
except Exception as e:
53+
return Result(value={"message": str(e)}, port="error")
54+
55+
def register() -> Plugin:
56+
return Plugin(
57+
start=False,
58+
spec=Spec(
59+
module=__name__,
60+
className=BeautifulSoupAction.__name__,
61+
inputs=["payload"],
62+
outputs=["result", "error"],
63+
version="1.0.4",
64+
init={
65+
"html": "",
66+
},
67+
form=Form(groups=[
68+
FormGroup(
69+
name="Beautiful Soup configuration",
70+
fields=[
71+
FormField(
72+
id="html",
73+
name="HTML",
74+
description="The HTML to be converted.",
75+
component=FormComponent(type="dotPath", props={
76+
"label": "HTML"
77+
}),
78+
),
79+
FormField(
80+
id="method",
81+
name="Method",
82+
description="The BeautifulSoup method to apply to the HTML",
83+
component=FormComponent(type="select", props={
84+
"label": "Method",
85+
"items": {
86+
"get_text": "get_text",
87+
}
88+
})
89+
),
90+
]),
91+
]),
92+
license="MIT",
93+
author="Matt Cameron",
94+
manual="beautifulsoup",
95+
96+
),
97+
metadata=MetaData(
98+
name='BeautifulSoup',
99+
desc='Converts HTML to text.',
100+
icon='BeautifulSoup',
101+
group=['Data Processing'],
102+
documentation=Documentation(
103+
inputs={
104+
"payload": PortDoc(desc="This port takes payload object.")
105+
},
106+
outputs={
107+
"result": PortDoc(desc="Returns response from Sitemap service."),
108+
"error": PortDoc(desc="Returns error message if plugin fails.")
109+
}
110+
)
111+
)
112+
)
113+

tracardi/process_engine/action/v1/connectors/html/fetch/plugin.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ async def run(self, payload: dict, in_edge=None) -> Result:
5555

5656
async with client.request(
5757
method=self.config.method,
58-
url=str(self.config.url),
58+
url=str(dot[self.config.url]),
5959
headers=headers,
6060
cookies=cookies,
6161
ssl=self.config.ssl_check,

tracardi/service/setup/setup_plugins.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,17 @@
791791
},
792792
resource=None),
793793
),
794-
794+
795+
"tracardi.process_engine.action.v1.beautifulsoup_action": PluginMetadata(
796+
test=PluginTest(
797+
init={
798+
'HTML': '',
799+
'method': 'get_text',
800+
801+
},
802+
resource=None),
803+
),
804+
795805
}
796806

797807
if License.has_service(SCHEDULER):

0 commit comments

Comments
 (0)