@@ -67,14 +67,23 @@ def find_parent_process(output_id: str, mongodb: Database) -> Optional[dict[str,
67
67
68
68
69
69
def get_biosample_input_ids (
70
- input_id : str , mongodb : Database , results : set [str ], sampled_portions : set [str ]
70
+ input_id : str ,
71
+ mongodb : Database ,
72
+ results : set [str ],
73
+ sampled_portions : set [str ],
74
+ direct_input : bool ,
71
75
) -> set [Any ]:
72
76
"""
73
77
Given an input ID return all biosample objects that are included in the input resource.
74
78
75
79
OmicsProcessing objects can take Biosamples or ProcessedSamples as inputs. Work needs to be done
76
80
to determine which biosamples make up a given ProcessedSample. This function recursively tries
77
81
to determine those Biosamples.
82
+
83
+ As a side effect, a set of `sampled_portion` values gets populated. Whether or not a processed
84
+ samples' `sampled_portion`s get added to the set is driven by the `direct_input` parameter. Only
85
+ processed samples who are inputs directly to a data generation will have their
86
+ `sampled_portion`s added.
78
87
"""
79
88
# Base case, the input is already a biosample
80
89
biosample_collection : Collection = mongodb ["biosample_set" ]
@@ -92,15 +101,16 @@ def get_biosample_input_ids(
92
101
processed_sample = query [0 ]
93
102
processed_sample_id = processed_sample ["id" ]
94
103
sampled_portion = set (processed_sample .get ("sampled_portion" , []))
95
- if sampled_portion :
104
+ # only store sampled portion values for immediate input to a data generation
105
+ if direct_input and sampled_portion :
96
106
sampled_portions .update (sampled_portion )
97
107
98
108
# Recursive case. For processed samples find the process that created it,
99
109
# and check the inputs of that process.
100
110
parent_process = find_parent_process (processed_sample_id , mongodb )
101
111
if parent_process :
102
112
for parent_input_id in parent_process ["has_input" ]:
103
- get_biosample_input_ids (parent_input_id , mongodb , results , sampled_portions )
113
+ get_biosample_input_ids (parent_input_id , mongodb , results , sampled_portions , False )
104
114
return results
105
115
106
116
@@ -145,7 +155,7 @@ def load_omics_processing(db: Session, obj: Dict[str, Any], mongodb: Database, l
145
155
sampled_portions : set [str ] = set ()
146
156
for input_id in input_ids :
147
157
biosample_input_ids .union (
148
- get_biosample_input_ids (input_id , mongodb , biosample_input_ids , sampled_portions )
158
+ get_biosample_input_ids (input_id , mongodb , biosample_input_ids , sampled_portions , True )
149
159
)
150
160
if sampled_portions :
151
161
obj ["sampled_portions" ] = list (sampled_portions )
0 commit comments