3
3
import click
4
4
from pymongo import MongoClient
5
5
from pymongo .collection import Collection
6
+ from pymongo .cursor import Cursor
6
7
from sqlalchemy .orm import Session
7
8
8
9
from nmdc_server import models
@@ -101,10 +102,21 @@ def load(db: Session, function_limit=None, skip_annotation=False):
101
102
)
102
103
db .commit ()
103
104
105
+ """
106
+ nmdc:ReadQcAnalysis
107
+ nmdc:MagsAnalysis
108
+ nmdc:MetabolomicsAnalysis
109
+ nmdc:MetagenomeSequencing
110
+ nmdc:ReadBasedTaxonomyAnalysis
111
+ nmdc:MetagenomeAssembly
112
+ nmdc:MetagenomeAnnotation
113
+ nmdc:NomAnalysis
114
+ """
115
+
104
116
logger .info ("Loading metabolomics analysis..." )
105
117
pipeline .load (
106
118
db ,
107
- mongodb ["metabolomics_analysis_activity_set " ].find (),
119
+ mongodb ["workflow_execution_set " ].find ({ "type" : "nmdc:MetabolomicsAnalysis" } ),
108
120
pipeline .load_metabolomics_analysis ,
109
121
WorkflowActivityTypeEnum .metabolomics_analysis .value ,
110
122
)
@@ -113,7 +125,7 @@ def load(db: Session, function_limit=None, skip_annotation=False):
113
125
logger .info ("Loading read based analysis..." )
114
126
pipeline .load (
115
127
db ,
116
- mongodb ["read_based_taxonomy_analysis_activity_set " ].find (),
128
+ mongodb ["workflow_execution_set " ].find ({ "type" : "nmdc:ReadBasedTaxonomyAnalysis" } ),
117
129
pipeline .load_read_based_analysis ,
118
130
WorkflowActivityTypeEnum .read_based_analysis .value ,
119
131
)
@@ -122,15 +134,15 @@ def load(db: Session, function_limit=None, skip_annotation=False):
122
134
logger .info ("Loading metatranscriptome activities..." )
123
135
pipeline .load (
124
136
db ,
125
- mongodb ["metatranscriptome_activity_set " ].find (),
137
+ mongodb ["workflow_execution_set " ].find ({ "type" : "nmdc:MetatranscriptomeAnalysis" } ),
126
138
pipeline .load_metatranscriptome ,
127
139
WorkflowActivityTypeEnum .metatranscriptome .value ,
128
140
)
129
141
130
142
logger .info ("Loading NOM analysis..." )
131
143
pipeline .load (
132
144
db ,
133
- mongodb ["nom_analysis_activity_set " ].find (),
145
+ mongodb ["workflow_execution_set " ].find ({ "type" : "nmdc:NomAnalysis" } ),
134
146
pipeline .load_nom_analysis ,
135
147
WorkflowActivityTypeEnum .nom_analysis .value ,
136
148
)
@@ -139,7 +151,7 @@ def load(db: Session, function_limit=None, skip_annotation=False):
139
151
logger .info ("Loading MAGs..." )
140
152
pipeline .load (
141
153
db ,
142
- mongodb ["mags_activity_set " ].find (),
154
+ mongodb ["workflow_execution_set " ].find ({ "type" : "nmdc:MagsAnalysis" } ),
143
155
pipeline .load_mags ,
144
156
WorkflowActivityTypeEnum .mags_analysis .value ,
145
157
)
@@ -154,13 +166,14 @@ def load(db: Session, function_limit=None, skip_annotation=False):
154
166
155
167
# This has historically been fast, but it is only for the progress bar.
156
168
# It can be removed if it becomes slow.
157
- count = mongodb ["metagenome_annotation_activity_set" ].estimated_document_count ()
158
- iterator = paginate_cursor (
159
- mongodb ["metagenome_annotation_activity_set" ],
160
- page_size = 1 , # prevent cursor from timing out
161
- no_cursor_timeout = True ,
169
+ annotation_activities = list (
170
+ mongodb ["workflow_execution_set" ].find (
171
+ {"type" : "nmdc:MetagenomeAnnotation" }, batch_size = 100
172
+ )
162
173
)
163
- with click .progressbar (iterator , length = count ) as bar :
174
+ # TODO test this and make sure it works as expected
175
+ # this undoes the pagination that existed before
176
+ with click .progressbar (annotation_activities , length = len (annotation_activities )) as bar :
164
177
pipeline .load (
165
178
db ,
166
179
bar ,
@@ -180,7 +193,7 @@ def load(db: Session, function_limit=None, skip_annotation=False):
180
193
logger .info ("Loading read qc..." )
181
194
pipeline .load (
182
195
db ,
183
- mongodb ["read_qc_analysis_activity_set " ].find (),
196
+ mongodb ["workflow_execution_set " ].find ({ "type" : "nmdc:ReadQcAnalysis" } ),
184
197
pipeline .load_reads_qc ,
185
198
WorkflowActivityTypeEnum .reads_qc .value ,
186
199
)
@@ -190,7 +203,8 @@ def load(db: Session, function_limit=None, skip_annotation=False):
190
203
logger .info ("Loading metaproteomic analysis..." )
191
204
pipeline .load (
192
205
db ,
193
- mongodb ["metaproteomics_analysis_activity_set" ].find (
206
+ mongodb ["workflow_execution_set" ].find (
207
+ {"type" : "nmdc:MetaproteomicAnalysis" },
194
208
no_cursor_timeout = True ,
195
209
),
196
210
pipeline .load_mp_analysis ,
@@ -207,7 +221,7 @@ def load(db: Session, function_limit=None, skip_annotation=False):
207
221
logger .info ("Loading metagenome assembly..." )
208
222
pipeline .load (
209
223
db ,
210
- mongodb ["metagenome_assembly_set " ].find (),
224
+ mongodb ["workflow_execution_set " ].find ({ "type" : "nmdc:MetagenomeAssembly" } ),
211
225
pipeline .load_mg_assembly ,
212
226
WorkflowActivityTypeEnum .metagenome_assembly .value ,
213
227
)
0 commit comments