Skip to content

Commit 9c1a97d

Browse files
committed
Implement fastrp endpoint
1 parent 3f66571 commit 9c1a97d

File tree

5 files changed

+1037
-0
lines changed

5 files changed

+1037
-0
lines changed
Lines changed: 374 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,374 @@
1+
from __future__ import annotations
2+
3+
from abc import ABC, abstractmethod
4+
from typing import Any, List, Optional, Union
5+
6+
from pandas import DataFrame
7+
from pydantic import BaseModel, ConfigDict
8+
from pydantic.alias_generators import to_camel
9+
10+
from ...graph.graph_object import Graph
11+
from .estimation_result import EstimationResult
12+
13+
14+
class FastRPEndpoints(ABC):
15+
"""
16+
Abstract base class defining the API for the FastRP (Fast Random Projection) algorithm.
17+
18+
FastRP is a node embedding algorithm that creates vector representations of nodes
19+
by combining random projections and iterative neighbor aggregation.
20+
"""
21+
22+
@abstractmethod
23+
def mutate(
24+
self,
25+
G: Graph,
26+
mutate_property: str,
27+
embedding_dimension: int,
28+
iteration_weights: Optional[List[float]] = None,
29+
normalization_strength: Optional[float] = None,
30+
node_self_influence: Optional[float] = None,
31+
property_ratio: Optional[float] = None,
32+
feature_properties: Optional[List[str]] = None,
33+
relationship_types: Optional[List[str]] = None,
34+
node_labels: Optional[List[str]] = None,
35+
sudo: Optional[bool] = None,
36+
log_progress: Optional[bool] = None,
37+
username: Optional[str] = None,
38+
concurrency: Optional[Any] = None,
39+
job_id: Optional[Any] = None,
40+
relationship_weight_property: Optional[str] = None,
41+
random_seed: Optional[Any] = None,
42+
) -> FastRPMutateResult:
43+
"""
44+
Executes the FastRP algorithm and writes the results back to the graph as a node property.
45+
46+
Parameters
47+
----------
48+
G : Graph
49+
The graph to run the algorithm on
50+
mutate_property : str
51+
The property name to store the FastRP embeddings for each node
52+
embedding_dimension : int
53+
The dimension of the generated embeddings
54+
iteration_weights : Optional[List[float]], default=None
55+
Weights for each iteration. Controls the influence of each iteration on the final embedding
56+
normalization_strength : Optional[float], default=None
57+
The normalization strength parameter controls how much the embedding is normalized
58+
node_self_influence : Optional[float], default=None
59+
The influence of the node's own features on its embedding
60+
property_ratio : Optional[float], default=None
61+
The ratio of node properties to use in the embedding
62+
feature_properties : Optional[List[str]], default=None
63+
List of node properties to use as features in the embedding
64+
relationship_types : Optional[List[str]], default=None
65+
The relationship types used to select relationships for this algorithm run
66+
node_labels : Optional[List[str]], default=None
67+
The node labels used to select nodes for this algorithm run
68+
sudo : Optional[bool], default=None
69+
Override memory estimation limits
70+
log_progress : Optional[bool], default=None
71+
Whether to log progress
72+
username : Optional[str], default=None
73+
The username to attribute the procedure run to
74+
concurrency : Optional[Any], default=None
75+
The number of concurrent threads
76+
job_id : Optional[Any], default=None
77+
An identifier for the job
78+
relationship_weight_property : Optional[str], default=None
79+
The property name that contains weight
80+
random_seed : Optional[Any], default=None
81+
Random seed for reproducible results
82+
83+
Returns
84+
-------
85+
FastRPMutateResult
86+
Algorithm metrics and statistics
87+
"""
88+
89+
@abstractmethod
90+
def stats(
91+
self,
92+
G: Graph,
93+
embedding_dimension: int,
94+
iteration_weights: Optional[List[float]] = None,
95+
normalization_strength: Optional[float] = None,
96+
node_self_influence: Optional[float] = None,
97+
property_ratio: Optional[float] = None,
98+
feature_properties: Optional[List[str]] = None,
99+
relationship_types: Optional[List[str]] = None,
100+
node_labels: Optional[List[str]] = None,
101+
sudo: Optional[bool] = None,
102+
log_progress: Optional[bool] = None,
103+
username: Optional[str] = None,
104+
concurrency: Optional[Any] = None,
105+
job_id: Optional[Any] = None,
106+
relationship_weight_property: Optional[str] = None,
107+
random_seed: Optional[Any] = None,
108+
) -> FastRPStatsResult:
109+
"""
110+
Executes the FastRP algorithm and returns result statistics without writing the result to Neo4j.
111+
112+
Parameters
113+
----------
114+
G : Graph
115+
The graph to run the algorithm on
116+
embedding_dimension : int
117+
The dimension of the generated embeddings
118+
iteration_weights : Optional[List[float]], default=None
119+
Weights for each iteration. Controls the influence of each iteration on the final embedding
120+
normalization_strength : Optional[float], default=None
121+
The normalization strength parameter controls how much the embedding is normalized
122+
node_self_influence : Optional[float], default=None
123+
The influence of the node's own features on its embedding
124+
property_ratio : Optional[float], default=None
125+
The ratio of node properties to use in the embedding
126+
feature_properties : Optional[List[str]], default=None
127+
List of node properties to use as features in the embedding
128+
relationship_types : Optional[List[str]], default=None
129+
The relationship types used to select relationships for this algorithm run
130+
node_labels : Optional[List[str]], default=None
131+
The node labels used to select nodes for this algorithm run
132+
sudo : Optional[bool], default=None
133+
Override memory estimation limits
134+
log_progress : Optional[bool], default=None
135+
Whether to log progress
136+
username : Optional[str] = None
137+
The username to attribute the procedure run to
138+
concurrency : Optional[Any], default=None
139+
The number of concurrent threads
140+
job_id : Optional[Any], default=None
141+
An identifier for the job
142+
relationship_weight_property : Optional[str], default=None
143+
The property name that contains weight
144+
random_seed : Optional[Any], default=None
145+
Random seed for reproducible results
146+
147+
Returns
148+
-------
149+
FastRPStatsResult
150+
Algorithm statistics
151+
"""
152+
153+
@abstractmethod
154+
def stream(
155+
self,
156+
G: Graph,
157+
embedding_dimension: int,
158+
iteration_weights: Optional[List[float]] = None,
159+
normalization_strength: Optional[float] = None,
160+
node_self_influence: Optional[float] = None,
161+
property_ratio: Optional[float] = None,
162+
feature_properties: Optional[List[str]] = None,
163+
relationship_types: Optional[List[str]] = None,
164+
node_labels: Optional[List[str]] = None,
165+
sudo: Optional[bool] = None,
166+
log_progress: Optional[bool] = None,
167+
username: Optional[str] = None,
168+
concurrency: Optional[Any] = None,
169+
job_id: Optional[Any] = None,
170+
relationship_weight_property: Optional[str] = None,
171+
random_seed: Optional[Any] = None,
172+
) -> DataFrame:
173+
"""
174+
Executes the FastRP algorithm and returns the results as a stream.
175+
176+
Parameters
177+
----------
178+
G : Graph
179+
The graph to run the algorithm on
180+
embedding_dimension : int
181+
The dimension of the generated embeddings
182+
iteration_weights : Optional[List[float]], default=None
183+
Weights for each iteration. Controls the influence of each iteration on the final embedding
184+
normalization_strength : Optional[float], default=None
185+
The normalization strength parameter controls how much the embedding is normalized
186+
node_self_influence : Optional[float], default=None
187+
The influence of the node's own features on its embedding
188+
property_ratio : Optional[float], default=None
189+
The ratio of node properties to use in the embedding
190+
feature_properties : Optional[List[str]], default=None
191+
List of node properties to use as features in the embedding
192+
relationship_types : Optional[List[str]], default=None
193+
The relationship types used to select relationships for this algorithm run
194+
node_labels : Optional[List[str]], default=None
195+
The node labels used to select nodes for this algorithm run
196+
sudo : Optional[bool], default=None
197+
Override memory estimation limits
198+
log_progress : Optional[bool], default=None
199+
Whether to log progress
200+
username : Optional[str], default=None
201+
The username to attribute the procedure run to
202+
concurrency : Optional[Any], default=None
203+
The number of concurrent threads
204+
job_id : Optional[Any], default=None
205+
An identifier for the job
206+
relationship_weight_property : Optional[str], default=None
207+
The property name that contains weight
208+
random_seed : Optional[Any], default=None
209+
Random seed for reproducible results
210+
211+
Returns
212+
-------
213+
DataFrame
214+
DataFrame with node IDs and their FastRP embeddings
215+
"""
216+
217+
@abstractmethod
218+
def write(
219+
self,
220+
G: Graph,
221+
write_property: str,
222+
embedding_dimension: int,
223+
iteration_weights: Optional[List[float]] = None,
224+
normalization_strength: Optional[float] = None,
225+
node_self_influence: Optional[float] = None,
226+
property_ratio: Optional[float] = None,
227+
feature_properties: Optional[List[str]] = None,
228+
relationship_types: Optional[List[str]] = None,
229+
node_labels: Optional[List[str]] = None,
230+
sudo: Optional[bool] = None,
231+
log_progress: Optional[bool] = None,
232+
username: Optional[str] = None,
233+
concurrency: Optional[Any] = None,
234+
job_id: Optional[Any] = None,
235+
relationship_weight_property: Optional[str] = None,
236+
random_seed: Optional[Any] = None,
237+
write_concurrency: Optional[int] = None,
238+
) -> FastRPWriteResult:
239+
"""
240+
Executes the FastRP algorithm and writes the results to Neo4j.
241+
242+
Parameters
243+
----------
244+
G : Graph
245+
The graph to run the algorithm on
246+
write_property : str
247+
The property name to write the FastRP embeddings for each node
248+
embedding_dimension : int
249+
The dimension of the generated embeddings
250+
iteration_weights : Optional[List[float]], default=None
251+
Weights for each iteration. Controls the influence of each iteration on the final embedding
252+
normalization_strength : Optional[float], default=None
253+
The normalization strength parameter controls how much the embedding is normalized
254+
node_self_influence : Optional[float], default=None
255+
The influence of the node's own features on its embedding
256+
property_ratio : Optional[float], default=None
257+
The ratio of node properties to use in the embedding
258+
feature_properties : Optional[List[str]], default=None
259+
List of node properties to use as features in the embedding
260+
relationship_types : Optional[List[str]], default=None
261+
The relationship types used to select relationships for this algorithm run
262+
node_labels : Optional[List[str]], default=None
263+
The node labels used to select nodes for this algorithm run
264+
sudo : Optional[bool], default=None
265+
Override memory estimation limits
266+
log_progress : Optional[bool], default=None
267+
Whether to log progress
268+
username : Optional[str], default=None
269+
The username to attribute the procedure run to
270+
concurrency : Optional[Any], default=None
271+
The number of concurrent threads
272+
job_id : Optional[Any], default=None
273+
An identifier for the job
274+
relationship_weight_property : Optional[str], default=None
275+
The property name that contains weight
276+
random_seed : Optional[Any], default=None
277+
Random seed for reproducible results
278+
write_concurrency : Optional[int], default=None
279+
The number of concurrent threads used for writing
280+
281+
Returns
282+
-------
283+
FastRPWriteResult
284+
Algorithm metrics and statistics
285+
"""
286+
287+
@abstractmethod
288+
def estimate(
289+
self,
290+
G: Union[Graph, dict[str, Any]],
291+
embedding_dimension: int,
292+
iteration_weights: Optional[List[float]] = None,
293+
normalization_strength: Optional[float] = None,
294+
node_self_influence: Optional[float] = None,
295+
property_ratio: Optional[float] = None,
296+
feature_properties: Optional[List[str]] = None,
297+
relationship_types: Optional[List[str]] = None,
298+
node_labels: Optional[List[str]] = None,
299+
concurrency: Optional[Any] = None,
300+
relationship_weight_property: Optional[str] = None,
301+
random_seed: Optional[Any] = None,
302+
) -> EstimationResult:
303+
"""
304+
Returns an estimation of the memory consumption for that procedure.
305+
306+
Parameters
307+
----------
308+
G : Union[Graph, dict[str, Any]]
309+
The graph to run the algorithm on or a dictionary representing the graph.
310+
embedding_dimension : int
311+
The dimension of the generated embeddings
312+
iteration_weights : Optional[List[float]], default=None
313+
Weights for each iteration. Controls the influence of each iteration on the final embedding
314+
normalization_strength : Optional[float], default=None
315+
The normalization strength parameter controls how much the embedding is normalized
316+
node_self_influence : Optional[float], default=None
317+
The influence of the node's own features on its embedding
318+
property_ratio : Optional[float], default=None
319+
The ratio of node properties to use in the embedding
320+
feature_properties : Optional[List[str]], default=None
321+
List of node properties to use as features in the embedding
322+
relationship_types : Optional[List[str]], default=None
323+
The relationship types used to select relationships for this algorithm run
324+
node_labels : Optional[List[str]], default=None
325+
The node labels used to select nodes for this algorithm run
326+
concurrency : Optional[Any], default=None
327+
The number of concurrent threads
328+
relationship_weight_property : Optional[str], default=None
329+
The property name that contains weight
330+
random_seed : Optional[Any], default=None
331+
Random seed for reproducible results
332+
333+
Returns
334+
-------
335+
EstimationResult
336+
Memory estimation details
337+
"""
338+
339+
340+
class FastRPMutateResult(BaseModel):
341+
model_config = ConfigDict(alias_generator=to_camel)
342+
343+
pre_processing_millis: int
344+
compute_millis: int
345+
mutate_millis: int
346+
node_properties_written: int
347+
configuration: dict[str, Any]
348+
349+
def __getitem__(self, item: str) -> Any:
350+
return self.__dict__[item]
351+
352+
353+
class FastRPStatsResult(BaseModel):
354+
model_config = ConfigDict(alias_generator=to_camel)
355+
356+
pre_processing_millis: int
357+
compute_millis: int
358+
configuration: dict[str, Any]
359+
360+
def __getitem__(self, item: str) -> Any:
361+
return self.__dict__[item]
362+
363+
364+
class FastRPWriteResult(BaseModel):
365+
model_config = ConfigDict(alias_generator=to_camel)
366+
367+
pre_processing_millis: int
368+
compute_millis: int
369+
write_millis: int
370+
node_properties_written: int
371+
configuration: dict[str, Any]
372+
373+
def __getitem__(self, item: str) -> Any:
374+
return self.__dict__[item]

0 commit comments

Comments
 (0)