@@ -66,13 +66,24 @@ def find_parent_process(output_id: str, mongodb: Database) -> Optional[dict[str,
66
66
return None
67
67
68
68
69
- def get_biosample_input_ids (input_id : str , mongodb : Database , results : set ) -> set [Any ]:
69
+ def get_biosample_input_ids (
70
+ input_id : str ,
71
+ mongodb : Database ,
72
+ results : set [str ],
73
+ sampled_portions : set [str ],
74
+ direct_input : bool ,
75
+ ) -> set [Any ]:
70
76
"""
71
77
Given an input ID return all biosample objects that are included in the input resource.
72
78
73
79
OmicsProcessing objects can take Biosamples or ProcessedSamples as inputs. Work needs to be done
74
80
to determine which biosamples make up a given ProcessedSample. This function recursively tries
75
81
to determine those Biosamples.
82
+
83
+ As a side effect, a set of `sampled_portion` values gets populated. Whether or not a processed
84
+ samples' `sampled_portion`s get added to the set is driven by the `direct_input` parameter. Only
85
+ processed samples which are inputs directly to a data generation will have their
86
+ `sampled_portion`s added.
76
87
"""
77
88
# Base case, the input is already a biosample
78
89
biosample_collection : Collection = mongodb ["biosample_set" ]
@@ -87,25 +98,32 @@ def get_biosample_input_ids(input_id: str, mongodb: Database, results: set) -> s
87
98
if not query :
88
99
return results
89
100
90
- processed_sample_id = query [0 ]["id" ]
101
+ processed_sample = query [0 ]
102
+ processed_sample_id = processed_sample ["id" ]
103
+ sampled_portion = set (processed_sample .get ("sampled_portion" , []))
104
+ # only store sampled portion values for immediate input to a data generation
105
+ if direct_input and sampled_portion :
106
+ sampled_portions .update (sampled_portion )
91
107
92
108
# Recursive case. For processed samples find the process that created it,
93
109
# and check the inputs of that process.
94
110
parent_process = find_parent_process (processed_sample_id , mongodb )
95
111
if parent_process :
96
112
for parent_input_id in parent_process ["has_input" ]:
97
- get_biosample_input_ids (parent_input_id , mongodb , results )
113
+ get_biosample_input_ids (parent_input_id , mongodb , results , sampled_portions , False )
98
114
return results
99
115
100
116
101
- def get_configuration_name (mongodb : Database , configuration_id : str , config_map ) -> Optional [str ]:
117
+ def get_configuration_property (
118
+ mongodb : Database , configuration_id : str , key : str , config_map
119
+ ) -> Optional [str ]:
102
120
config_set = "configuration_set"
103
121
if configuration_id in config_map :
104
122
config_record = config_map [configuration_id ]
105
123
else :
106
124
config_record = mongodb [config_set ].find_one ({"id" : configuration_id })
107
125
config_map [configuration_id ] = config_record
108
- return config_record ["name" ] if config_record else None
126
+ return config_record [key ] if config_record else None
109
127
110
128
111
129
def get_poolable_replicate_manifest (
@@ -134,8 +152,13 @@ def load_omics_processing(db: Session, obj: Dict[str, Any], mongodb: Database, l
134
152
logger = get_logger (__name__ )
135
153
input_ids : list [str ] = obj .pop ("has_input" , ["" ])
136
154
biosample_input_ids : set [str ] = set ()
155
+ sampled_portions : set [str ] = set ()
137
156
for input_id in input_ids :
138
- biosample_input_ids .union (get_biosample_input_ids (input_id , mongodb , biosample_input_ids ))
157
+ biosample_input_ids .union (
158
+ get_biosample_input_ids (input_id , mongodb , biosample_input_ids , sampled_portions , True )
159
+ )
160
+ if sampled_portions :
161
+ obj ["sampled_portions" ] = list (sampled_portions )
139
162
140
163
obj ["biosample_inputs" ] = []
141
164
biosample_input_objects = []
@@ -161,14 +184,20 @@ def load_omics_processing(db: Session, obj: Dict[str, Any], mongodb: Database, l
161
184
162
185
# Get configuration info
163
186
mass_spec_config_id = obj .pop ("has_mass_spectrometry_configuration" , None )
164
- mass_spec_config_name = get_configuration_name (mongodb , mass_spec_config_id , config_map )
187
+ mass_spec_config_name = get_configuration_property (
188
+ mongodb , mass_spec_config_id , "name" , config_map
189
+ )
190
+ mass_spec_polarity_mode = get_configuration_property (
191
+ mongodb , mass_spec_config_id , "polarity_mode" , config_map
192
+ )
165
193
if mass_spec_config_name :
166
194
obj ["mass_spectrometry_configuration_name" ] = mass_spec_config_name
167
195
obj ["mass_spectrometry_configuration_id" ] = mass_spec_config_id
196
+ obj ["mass_spectrometry_config_polarity_mode" ] = mass_spec_polarity_mode
168
197
169
198
chromatography_config_id = obj .pop ("has_chromatography_configuration" , None )
170
- chromatography_config_name = get_configuration_name (
171
- mongodb , chromatography_config_id , config_map
199
+ chromatography_config_name = get_configuration_property (
200
+ mongodb , chromatography_config_id , "name" , config_map
172
201
)
173
202
if chromatography_config_name :
174
203
obj ["chromatography_configuration_name" ] = chromatography_config_name
0 commit comments