37
37
from invenio_pidstore .models import PersistentIdentifier
38
38
from invenio_records_files .api import Record
39
39
from invenio_records_files .models import RecordsBuckets
40
+ from invenio_sipstore .models import SIPMetadataType
40
41
from sqlalchemy .orm .attributes import flag_modified
41
42
42
43
from cernopendata .modules .records .minters .docid import \
43
44
cernopendata_docid_minter
44
45
from cernopendata .modules .records .minters .recid import \
45
46
cernopendata_recid_minter
46
47
48
+ from .sip_utils import (
49
+ handle_sipstore_record_file_index ,
50
+ handle_sipstore_record_file ,
51
+ sip_record ,
52
+ )
47
53
48
54
def get_jsons_from_dir (dir ):
49
55
"""Get JSON files inside a dir."""
@@ -55,8 +61,9 @@ def get_jsons_from_dir(dir):
55
61
return res
56
62
57
63
58
- def handle_record_files (data , bucket , files , skip_files ):
64
+ def handle_record_files (data , bucket , files , skip_files , skip_sips ):
59
65
"""Handles record files."""
66
+ sip_files = []
60
67
for file in files :
61
68
if skip_files :
62
69
break
@@ -89,45 +96,80 @@ def handle_record_files(data, bucket, files, skip_files):
89
96
str (e )))
90
97
continue
91
98
99
+ if not skip_sips :
100
+ if file .get ("type" , None ) == "index.json" :
101
+ sip_files += handle_sipstore_record_file_index (f )
92
102
93
- def create_record (schema , data , files , skip_files ):
103
+ return sip_files
104
+
105
+
106
+ def handle_sip_files (files , skip_files , skip_sips ):
107
+ """Handles record files."""
108
+ sip_files = []
109
+ for file in files :
110
+ if skip_files :
111
+ break
112
+ assert 'uri' in file
113
+ assert 'size' in file
114
+ assert 'checksum' in file
115
+ f = FileInstance .get_by_uri (file .get ("uri" ))
116
+
117
+ if f and not skip_sips :
118
+ if file .get ("type" , None ) == "index.json" :
119
+ sip_files += handle_sipstore_record_file_index (f )
120
+
121
+ return sip_files
122
+
123
+
124
+ def create_record (schema , data , files , skip_files , skip_sips ):
94
125
"""Creates a new record."""
95
126
id = uuid .uuid4 ()
96
- cernopendata_recid_minter (id , data )
127
+ pid = cernopendata_recid_minter (id , data )
128
+
97
129
data ['$schema' ] = schema
98
130
record = Record .create (data , id_ = id )
99
131
if not skip_files :
100
132
bucket = Bucket .create ()
101
- handle_record_files (data , bucket , files , skip_files )
133
+ sip_files_content = handle_record_files (
134
+ data , bucket , files , skip_files , skip_sips )
135
+
102
136
RecordsBuckets .create (
103
137
record = record .model , bucket = bucket )
104
138
105
- return record
139
+ return pid , record , sip_files_content
106
140
107
141
108
- def update_record (pid , schema , data , files , skip_files ):
142
+ def update_record (pid , schema , data , files , skip_files , skip_sips ):
109
143
"""Updates the given record."""
110
144
record = Record .get_record (pid .object_uuid )
111
- with db .session .begin_nested ():
112
- if record .files and not skip_files :
113
- bucket_id = record .files .bucket
114
- bucket = Bucket .get (bucket_id .id )
115
- for o in ObjectVersion .get_by_bucket (bucket ).all ():
116
- o .remove ()
117
- o .file .delete ()
118
- RecordsBuckets .query .filter_by (
119
- record = record .model ,
120
- bucket = bucket
121
- ).delete ()
122
- bucket_id .remove ()
123
- db .session .commit ()
145
+ # with db.session.begin_nested():
146
+ # if record.files and not skip_files:
147
+ # bucket_id = record.files.bucket
148
+ # bucket = Bucket.get(bucket_id.id)
149
+ # for o in ObjectVersion.get_by_bucket(bucket).all():
150
+ # o.remove()
151
+ # o.file.delete()
152
+ # RecordsBuckets.query.filter_by(
153
+ # record=record.model,
154
+ # bucket=bucket
155
+ # ).delete()
156
+ # bucket_id.remove()
157
+ # db.session.commit()
158
+
124
159
record .update (data )
160
+ sip_files_content = []
125
161
if not skip_files :
126
- bucket = Bucket .create ()
127
- handle_record_files (data , bucket , files , skip_files )
128
- RecordsBuckets .create (
129
- record = record .model , bucket = bucket )
130
- return record
162
+ sip_files_content = handle_sip_files (
163
+ files ,
164
+ skip_files ,
165
+ skip_sips
166
+ )
167
+ # bucket = Bucket.create()
168
+ # sip_files_content = handle_record_files(
169
+ # data, bucket, files, skip_files, skip_sips)
170
+ # RecordsBuckets.create(
171
+ # record=record.model, bucket=bucket)
172
+ return record , sip_files_content
131
173
132
174
133
175
def create_doc (data , schema ):
@@ -156,6 +198,8 @@ def fixtures():
156
198
@fixtures .command ()
157
199
@click .option ('--skip-files' , is_flag = True , default = False ,
158
200
help = 'Skip loading of files' )
201
+ @click .option ('--skip-sips' , is_flag = True , default = False ,
202
+ help = 'Skip create/update of SIPs' )
159
203
@click .option ('files' , '--file' , '-f' , multiple = True ,
160
204
type = click .Path (exists = True ),
161
205
help = 'Path to the file(s) to be loaded. If not provided, all'
@@ -165,8 +209,9 @@ def fixtures():
165
209
@click .option ('--mode' , required = True , type = click .Choice (
166
210
['insert' , 'replace' , 'insert-or-replace' ]))
167
211
@with_appcontext
168
- def records (skip_files , files , profile , mode ):
212
+ def records (skip_files , skip_sips , files , profile , mode ):
169
213
"""Load all records."""
214
+
170
215
if profile :
171
216
import cProfile
172
217
import pstats
@@ -187,31 +232,34 @@ def records(skip_files, files, profile, mode):
187
232
else :
188
233
record_json = glob .glob (os .path .join (data , '*.json' ))
189
234
235
+
190
236
for filename in record_json :
191
237
# name = filename.split('/')[-1]
192
238
# if name.startswith('opera'):
193
239
# click.echo('Skipping opera records ...')
194
240
# continue
241
+
195
242
click .echo ('Loading records from {0} ...' .format (filename ))
196
243
with open (filename , 'rb' ) as source :
197
244
for data in json .load (source ):
198
-
199
245
if not data :
200
246
click .echo ('IGNORING a possibly broken or corrupted '
201
247
'record entry in file {0} ...' .format (filename ))
202
248
continue
203
249
204
250
files = data .get ('files' , [])
205
251
252
+ pid = None
206
253
if mode == 'insert-or-replace' :
207
254
try :
208
255
pid = PersistentIdentifier .get ('recid' , data ['recid' ])
209
256
if pid :
210
- record = update_record (
211
- pid , schema , data , files , skip_files )
257
+ record , sip_files_content = update_record (
258
+ pid , schema , data , files , skip_files , skip_sips )
212
259
action = 'updated'
213
260
except PIDDoesNotExistError :
214
- record = create_record (schema , data , files , skip_files )
261
+ pid , record , sip_files_content = create_record (
262
+ schema , data , files , skip_files , skip_sips )
215
263
action = 'inserted'
216
264
elif mode == 'insert' :
217
265
try :
@@ -223,7 +271,8 @@ def records(skip_files, files, profile, mode):
223
271
data .get ('recid' )), err = True )
224
272
return
225
273
except PIDDoesNotExistError :
226
- record = create_record (schema , data , files , skip_files )
274
+ pid , record , sip_files_content = create_record (
275
+ schema , data , files , skip_files , skip_sips )
227
276
action = 'inserted'
228
277
else :
229
278
try :
@@ -234,13 +283,20 @@ def records(skip_files, files, profile, mode):
234
283
'cannot replace it.' .format (
235
284
data .get ('recid' )), err = True )
236
285
return
237
- record = update_record (
238
- pid , schema , data , files , skip_files )
286
+ record , sip_files_content = update_record (
287
+ pid , schema , data , files , skip_files , skip_sips )
239
288
action = 'updated'
240
289
290
+
291
+
241
292
if not skip_files :
242
293
record .files .flush ()
243
294
record .commit ()
295
+
296
+ if not skip_sips :
297
+ sip_record (pid , record , sip_files_content , action )
298
+ # sip_record(pid, record, ''.join(sip_files_content), action)
299
+
244
300
db .session .commit ()
245
301
click .echo (
246
302
'Record recid {0} {1}.' .format (
@@ -462,3 +518,49 @@ def pids():
462
518
db .session .add (record )
463
519
db .session .commit ()
464
520
db .session .expunge_all ()
521
+
522
+
523
+
524
+ @fixtures .command ()
525
+ @with_appcontext
526
+ def sipmetadata ():
527
+ """Load sipmetadata types."""
528
+ data = [
529
+ {
530
+ "title" : "CERN Open Data Record JSON" ,
531
+ "name" : "record-json" ,
532
+ "format" : "json" ,
533
+ "schema" : current_app .extensions ['invenio-jsonschemas' ] \
534
+ .path_to_url ('records/record-v1.0.0.json' )
535
+ },
536
+ {
537
+ "title" : "CERN Open Data Docs JSON" ,
538
+ "name" : "docs-json" ,
539
+ "format" : "json" ,
540
+ "schema" : current_app .extensions ['invenio-jsonschemas' ] \
541
+ .path_to_url ('records/docs-v1.0.0.json' )
542
+ },
543
+ {
544
+ "title" : "CERN Open Data Glossary JSON" ,
545
+ "name" : "glossary-json" ,
546
+ "format" : "json" ,
547
+ "schema" : current_app .extensions ['invenio-jsonschemas' ] \
548
+ .path_to_url ('records/glossary-term-v1.0.0.json' )
549
+ },
550
+ {
551
+ "title" : "BagIt Archiver metadata" ,
552
+ "name" : "bagit" ,
553
+ "format" : "json" ,
554
+ "schema" : current_app .extensions ['invenio-jsonschemas' ] \
555
+ .path_to_url ('sipstore/bagit-v1.0.0.json' )
556
+ }
557
+ ]
558
+
559
+ click .secho ('Loading SIP metadata types...' , fg = 'blue' )
560
+ with click .progressbar (data ) as types :
561
+ with db .session .begin_nested ():
562
+ for type in types :
563
+ db .session .add (SIPMetadataType (** type ))
564
+ db .session .commit ()
565
+ click .secho ('SIP metadata types loaded!' , fg = 'green' )
566
+
0 commit comments