Skip to content

Commit 3d01461

Browse files
committed
Add hashgnn endpoints
1 parent 9c1a97d commit 3d01461

File tree

7 files changed

+901
-2
lines changed

7 files changed

+901
-2
lines changed
Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
from __future__ import annotations
2+
3+
from abc import ABC, abstractmethod
4+
from typing import Any, Dict, List, Optional, Union
5+
6+
from pandas import DataFrame
7+
from pydantic import BaseModel, ConfigDict
8+
from pydantic.alias_generators import to_camel
9+
10+
from ...graph.graph_object import Graph
11+
from .estimation_result import EstimationResult
12+
13+
14+
class HashGNNEndpoints(ABC):
15+
"""
16+
Abstract base class for HashGNN (Heterogeneous Graph Neural Network) endpoints.
17+
"""
18+
19+
@abstractmethod
20+
def mutate(
21+
self,
22+
G: Graph,
23+
iterations: int,
24+
embedding_density: int,
25+
mutate_property: str,
26+
output_dimension: Optional[int] = None,
27+
neighbor_influence: Optional[float] = None,
28+
generate_features: Optional[Dict[str, Any]] = None,
29+
binarize_features: Optional[Dict[str, Any]] = None,
30+
heterogeneous: Optional[bool] = None,
31+
feature_properties: Optional[List[str]] = None,
32+
random_seed: Optional[int] = None,
33+
relationship_types: Optional[List[str]] = None,
34+
node_labels: Optional[List[str]] = None,
35+
sudo: Optional[bool] = None,
36+
log_progress: Optional[bool] = None,
37+
username: Optional[str] = None,
38+
concurrency: Optional[Any] = None,
39+
job_id: Optional[Any] = None,
40+
) -> HashGNNMutateResult:
41+
"""
42+
Executes the HashGNN algorithm and writes the results back to the graph as a node property.
43+
44+
Parameters
45+
----------
46+
G : Graph
47+
The graph to run the algorithm on
48+
iterations : int
49+
The number of iterations to run the algorithm
50+
embedding_density : int
51+
The density of the generated embeddings (number of bits per embedding)
52+
mutate_property : str
53+
The name of the node property to store the embeddings
54+
output_dimension : Optional[int], default=None
55+
The dimension of the output embeddings
56+
neighbor_influence : Optional[float], default=None
57+
The influence of neighboring nodes
58+
generate_features : Optional[Dict[str, Any]], default=None
59+
Configuration for generating synthetic features from existing node properties
60+
binarize_features : Optional[Dict[str, Any]], default=None
61+
Configuration for binarizing continuous features
62+
heterogeneous : Optional[bool], default=None
63+
Whether to use heterogeneous node processing for different node types
64+
feature_properties : Optional[List[str]], default=None
65+
The names of the node properties to use as input features
66+
random_seed : Optional[int], default=None
67+
Seed for random number generation to ensure reproducible results
68+
69+
Returns
70+
-------
71+
HashGNNMutateResult
72+
Algorithm metrics and statistics
73+
"""
74+
75+
@abstractmethod
76+
def stream(
77+
self,
78+
G: Graph,
79+
iterations: int,
80+
embedding_density: int,
81+
output_dimension: Optional[int] = None,
82+
neighbor_influence: Optional[float] = None,
83+
generate_features: Optional[Dict[str, Any]] = None,
84+
binarize_features: Optional[Dict[str, Any]] = None,
85+
heterogeneous: Optional[bool] = None,
86+
feature_properties: Optional[List[str]] = None,
87+
random_seed: Optional[int] = None,
88+
relationship_types: Optional[List[str]] = None,
89+
node_labels: Optional[List[str]] = None,
90+
sudo: Optional[bool] = None,
91+
log_progress: Optional[bool] = None,
92+
username: Optional[str] = None,
93+
concurrency: Optional[Any] = None,
94+
job_id: Optional[Any] = None,
95+
) -> DataFrame:
96+
"""
97+
Executes the HashGNN algorithm and returns the results as a stream.
98+
99+
Parameters
100+
----------
101+
G : Graph
102+
The graph to run the algorithm on
103+
iterations : int
104+
The number of iterations to run the algorithm
105+
embedding_density : int
106+
The density of the generated embeddings (number of bits per embedding)
107+
output_dimension : Optional[int], default=None
108+
The dimension of the output embeddings
109+
neighbor_influence : Optional[float], default=None
110+
The influence of neighboring nodes
111+
generate_features : Optional[Dict[str, Any]], default=None
112+
Configuration for generating synthetic features from existing node properties
113+
binarize_features : Optional[Dict[str, Any]], default=None
114+
Configuration for binarizing continuous features
115+
heterogeneous : Optional[bool], default=None
116+
Whether to use heterogeneous node processing for different node types
117+
feature_properties : Optional[List[str]], default=None
118+
The names of the node properties to use as input features
119+
random_seed : Optional[int], default=None
120+
Seed for random number generation to ensure reproducible results
121+
122+
Returns
123+
-------
124+
DataFrame
125+
DataFrame with node IDs and their embeddings
126+
"""
127+
128+
@abstractmethod
129+
def write(
130+
self,
131+
G: Graph,
132+
iterations: int,
133+
embedding_density: int,
134+
write_property: str,
135+
output_dimension: Optional[int] = None,
136+
neighbor_influence: Optional[float] = None,
137+
generate_features: Optional[Dict[str, Any]] = None,
138+
binarize_features: Optional[Dict[str, Any]] = None,
139+
heterogeneous: Optional[bool] = None,
140+
feature_properties: Optional[List[str]] = None,
141+
relationship_types: Optional[List[str]] = None,
142+
node_labels: Optional[List[str]] = None,
143+
sudo: Optional[bool] = None,
144+
log_progress: Optional[bool] = None,
145+
username: Optional[str] = None,
146+
concurrency: Optional[Any] = None,
147+
job_id: Optional[Any] = None,
148+
write_concurrency: Optional[int] = None,
149+
random_seed: Optional[int] = None,
150+
) -> HashGNNWriteResult:
151+
"""
152+
Executes the HashGNN algorithm and writes the results back to the database.
153+
154+
Parameters
155+
----------
156+
G : Graph
157+
The graph to run the algorithm on
158+
iterations : int
159+
The number of iterations to run the algorithm
160+
embedding_density : int
161+
The density of the generated embeddings (number of bits per embedding)
162+
write_property : str
163+
The name of the node property to write the embeddings to
164+
output_dimension : Optional[int], default=None
165+
The dimension of the output embeddings. If not specified, defaults to embedding_density / 64
166+
neighbor_influence : Optional[float], default=None
167+
The influence of neighboring nodes (0.0 to 1.0)
168+
generate_features : Optional[Dict[str, Any]], default=None
169+
Configuration for generating synthetic features from existing node properties
170+
binarize_features : Optional[Dict[str, Any]], default=None
171+
Configuration for binarizing continuous features
172+
heterogeneous : Optional[bool], default=None
173+
Whether to use heterogeneous node processing for different node types
174+
feature_properties : Optional[List[str]], default=None
175+
The names of the node properties to use as input features
176+
relationship_types : Optional[List[str]], default=None
177+
The relationship types used to select relationships for this algorithm run
178+
node_labels : Optional[List[str]], default=None
179+
The node labels used to select nodes for this algorithm run
180+
sudo : Optional[bool], default=None
181+
Override memory estimation limits
182+
log_progress : Optional[bool], default=None
183+
Whether to log progress
184+
username : Optional[str], default=None
185+
The username to attribute the procedure run to
186+
concurrency : Optional[Any], default=None
187+
The number of concurrent threads
188+
job_id : Optional[Any], default=None
189+
An identifier for the job
190+
write_concurrency : Optional[int], default=None
191+
The number of concurrent threads used for writing
192+
random_seed : Optional[int], default=None
193+
Seed for random number generation to ensure reproducible results
194+
195+
Returns
196+
-------
197+
HashGNNWriteResult
198+
Algorithm metrics and statistics
199+
"""
200+
201+
@abstractmethod
202+
def estimate(
203+
self,
204+
G: Union[Graph, dict[str, Any]],
205+
iterations: int,
206+
embedding_density: int,
207+
output_dimension: Optional[int] = None,
208+
neighbor_influence: Optional[float] = None,
209+
generate_features: Optional[Dict[str, Any]] = None,
210+
binarize_features: Optional[Dict[str, Any]] = None,
211+
heterogeneous: Optional[bool] = None,
212+
feature_properties: Optional[List[str]] = None,
213+
random_seed: Optional[int] = None,
214+
) -> EstimationResult:
215+
"""
216+
Returns an estimation of the memory consumption for that procedure.
217+
218+
Parameters
219+
----------
220+
G : Union[Graph, dict[str, Any]]
221+
The graph to run the algorithm on or a dictionary representing the graph.
222+
iterations : int
223+
The number of iterations to run the algorithm
224+
embedding_density : int
225+
The density of the generated embeddings (number of bits per embedding)
226+
output_dimension : Optional[int], default=None
227+
The dimension of the output embeddings.
228+
neighbor_influence : Optional[float], default=None
229+
The influence of neighboring nodes.
230+
generate_features : Optional[Dict[str, Any]], default=None
231+
Configuration for generating synthetic features from existing node properties
232+
binarize_features : Optional[Dict[str, Any]], default=None
233+
Configuration for binarizing continuous features
234+
heterogeneous : Optional[bool], default=None
235+
Whether to use heterogeneous node processing for different node types
236+
feature_properties : Optional[List[str]], default=None
237+
The names of the node properties to use as input features
238+
random_seed : Optional[int], default=None
239+
Seed for random number generation to ensure reproducible results
240+
241+
Returns
242+
-------
243+
EstimationResult
244+
The estimated cost of running the algorithm
245+
"""
246+
247+
248+
class HashGNNMutateResult(BaseModel):
249+
"""
250+
Result object representing the results of running a HashGNN algorithm in mutate mode.
251+
"""
252+
253+
model_config = ConfigDict(alias_generator=to_camel)
254+
255+
node_count: int
256+
node_properties_written: int
257+
pre_processing_millis: int
258+
compute_millis: int
259+
mutate_millis: int
260+
configuration: Dict[str, Any]
261+
262+
def __getitem__(self, item: str) -> Any:
263+
return self.__dict__[item]
264+
265+
266+
class HashGNNWriteResult(BaseModel):
267+
"""
268+
Result object representing the results of running a HashGNN algorithm in write mode.
269+
"""
270+
271+
model_config = ConfigDict(alias_generator=to_camel)
272+
273+
node_count: int
274+
node_properties_written: int
275+
pre_processing_millis: int
276+
compute_millis: int
277+
write_millis: int
278+
configuration: Dict[str, Any]
279+
280+
def __getitem__(self, item: str) -> Any:
281+
return self.__dict__[item]

0 commit comments

Comments
 (0)