Skip to content

Commit 11d833e

Browse files
committed
further cleanup:
- consolidate UserUploadFile into SeedFile, can split again if needed - add get_absolute_presigned_url() to UserFile for convenience in getting absolute url directly
1 parent bc11e52 commit 11d833e

File tree

4 files changed

+65
-82
lines changed

4 files changed

+65
-82
lines changed

backend/btrixcloud/crawlconfigs.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,9 @@ async def add_crawl_config(
282282

283283
if config_in.config.seedFileId:
284284
# Validate file with that id exists
285-
seed_file = await self.file_ops.get_file(config_in.config.seedFileId, org)
285+
seed_file = await self.file_ops.get_seed_file(
286+
config_in.config.seedFileId, org
287+
)
286288

287289
# Validate seeds not set
288290
if config_in.config.seeds:
@@ -504,7 +506,7 @@ async def update_crawl_config(
504506

505507
if update.config and update.config.seedFileId:
506508
# Validate file with that id exists
507-
seed_file = await self.file_ops.get_file(update.config.seedFileId, org)
509+
seed_file = await self.file_ops.get_seed_file(update.config.seedFileId, org)
508510

509511
# Validate seeds not set
510512
if update.config.seeds or (
@@ -636,7 +638,7 @@ async def update_crawl_config(
636638
and update.config.seedFileId is None
637639
):
638640
try:
639-
await self.file_ops.delete_user_file(
641+
await self.file_ops.delete_seed_file(
640642
orig_crawl_config.config.seedFileId, org
641643
)
642644
except HTTPException:
@@ -976,7 +978,7 @@ async def make_inactive_or_delete(
976978
if not crawlconfig.crawlAttemptCount:
977979
if crawlconfig.config and crawlconfig.config.seedFileId:
978980
try:
979-
await self.file_ops.delete_user_file(
981+
await self.file_ops.delete_seed_file(
980982
crawlconfig.config.seedFileId, org
981983
)
982984
except HTTPException:
@@ -1127,7 +1129,7 @@ async def run_now_internal(
11271129
status_code=400, detail="seed_file_not_supported_by_crawler"
11281130
)
11291131

1130-
seed_file_out = await self.file_ops.get_file_out(
1132+
seed_file_out = await self.file_ops.get_seed_file_out(
11311133
crawlconfig.config.seedFileId, org
11321134
)
11331135
seed_file_url = seed_file_out.path

backend/btrixcloud/file_uploads.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@
1212
import pymongo
1313

1414
from .models import (
15-
UserUploadFile,
16-
UserUploadFileOut,
15+
SeedFileOut,
1716
SeedFile,
1817
UserFile,
1918
UserFilePreparer,
@@ -88,36 +87,36 @@ async def get_file_raw(
8887

8988
return res
9089

91-
async def get_file(
90+
async def get_seed_file(
9291
self,
9392
file_id: UUID,
9493
org: Optional[Organization] = None,
9594
type_: Optional[str] = None,
96-
) -> UserUploadFile:
95+
) -> SeedFile:
9796
"""Get file by UUID"""
9897
file_raw = await self.get_file_raw(file_id, org, type_)
99-
return UserUploadFile.from_dict(file_raw)
98+
return SeedFile.from_dict(file_raw)
10099

101-
async def get_file_out(
100+
async def get_seed_file_out(
102101
self,
103102
file_id: UUID,
104103
org: Optional[Organization] = None,
105104
type_: Optional[str] = None,
106105
headers: Optional[dict] = None,
107-
) -> UserUploadFileOut:
106+
) -> SeedFileOut:
108107
"""Get file output model by UUID"""
109-
user_file = await self.get_file(file_id, org, type_)
108+
user_file = await self.get_seed_file(file_id, org, type_)
110109
return await user_file.get_file_out(org, self.storage_ops, headers)
111110

112-
async def list_user_files(
111+
async def list_seed_files(
113112
self,
114113
org: Organization,
115114
page_size: int = DEFAULT_PAGE_SIZE,
116115
page: int = 1,
117116
sort_by: str = "created",
118117
sort_direction: int = -1,
119118
headers: Optional[dict] = None,
120-
) -> Tuple[list[UserUploadFileOut], int]:
119+
) -> Tuple[list[SeedFileOut], int]:
121120
"""list all user-uploaded files"""
122121
# pylint: disable=too-many-locals
123122

@@ -171,7 +170,7 @@ async def list_user_files(
171170

172171
user_files = []
173172
for res in items:
174-
file_ = UserUploadFile.from_dict(res)
173+
file_ = SeedFile.from_dict(res)
175174
file_out = await file_.get_file_out(org, self.storage_ops, headers)
176175
user_files.append(file_out)
177176

@@ -278,12 +277,13 @@ async def _parse_seed_info_from_file(
278277
first_seed = ""
279278
seed_count = 0
280279

281-
file_out = await file_obj.get_file_out(org, self.storage_ops)
282-
print("PATH", file_out.path)
280+
file_url = await file_obj.get_absolute_presigned_url(
281+
org, self.storage_ops, None
282+
)
283283

284284
with tempfile.TemporaryFile() as fp:
285285
async with aiohttp.ClientSession() as session:
286-
async with session.get(file_out.path) as resp:
286+
async with session.get(file_url) as resp:
287287
async for chunk in resp.content.iter_chunked(CHUNK_SIZE):
288288
fp.write(chunk)
289289

@@ -299,11 +299,11 @@ async def _parse_seed_info_from_file(
299299

300300
return first_seed, seed_count
301301

302-
async def delete_user_file(
302+
async def delete_seed_file(
303303
self, file_id: UUID, org: Organization
304304
) -> Dict[str, bool]:
305305
"""Delete user-uploaded file from storage and db"""
306-
file = await self.get_file(file_id, org)
306+
file = await self.get_seed_file(file_id, org)
307307

308308
# Make sure seed file isn't currently referenced by any workflows
309309
if file.type == "seedFile":
@@ -358,7 +358,7 @@ async def cleanup_unused_seed_files(self):
358358

359359
try:
360360
org = await self.org_ops.get_org_by_id(file_dict["oid"])
361-
await self.delete_user_file(file_id, org)
361+
await self.delete_seed_file(file_id, org)
362362
print(f"Deleted unused seed file {file_id}", flush=True)
363363
# pylint: disable=broad-exception-caught
364364
except Exception as err:
@@ -401,7 +401,7 @@ async def upload_seedfile_stream(
401401

402402
# pylint: disable=too-many-arguments
403403
@router.get("", response_model=PaginatedUserFileResponse)
404-
async def list_user_files(
404+
async def list_seed_files(
405405
request: Request,
406406
org: Organization = Depends(org_viewer_dep),
407407
pageSize: int = DEFAULT_PAGE_SIZE,
@@ -410,7 +410,7 @@ async def list_user_files(
410410
sortDirection: int = -1,
411411
):
412412
# pylint: disable=duplicate-code
413-
user_files, total = await ops.list_user_files(
413+
user_files, total = await ops.list_seed_files(
414414
org,
415415
page_size=pageSize,
416416
page=page,
@@ -420,17 +420,17 @@ async def list_user_files(
420420
)
421421
return paginated_format(user_files, total, page, pageSize)
422422

423-
@router.get("/{file_id}", response_model=UserUploadFileOut)
424-
async def get_user_file(
423+
@router.get("/{file_id}", response_model=SeedFileOut)
424+
async def get_seed_file(
425425
file_id: UUID, request: Request, org: Organization = Depends(org_viewer_dep)
426426
):
427-
return await ops.get_file_out(file_id, org, headers=dict(request.headers))
427+
return await ops.get_seed_file_out(file_id, org, headers=dict(request.headers))
428428

429429
@router.delete("/{file_id}", response_model=SuccessResponse)
430430
async def delete_user_file(
431431
file_id: UUID, org: Organization = Depends(org_crawl_dep)
432432
):
433-
return await ops.delete_user_file(file_id, org)
433+
return await ops.delete_seed_file(file_id, org)
434434

435435
if org_ops.router:
436436
org_ops.router.include_router(router)

backend/btrixcloud/models.py

Lines changed: 34 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1146,16 +1146,14 @@ def prepare_filename(self, filename):
11461146

11471147
# ============================================================================
11481148

1149-
### USER-UPLOADED IMAGES ###
1149+
### USER-UPLOADED FILES ###
11501150

11511151

11521152
# ============================================================================
1153-
class UserFileOut(BaseModel):
1154-
"""output for user-uploaded file as stored on other document
1153+
class PublicUserFileOut(BaseModel):
1154+
"""public output for user-uploaded file stored on other document
11551155
1156-
Used for collection thumbnails.
1157-
Should merge with UserUploadFile models below (used to store files in
1158-
distinct files mongo collection) eventually.
1156+
Public User Upload File (used for collection thumbnails).
11591157
Conforms to Data Resource Spec.
11601158
"""
11611159

@@ -1164,38 +1162,28 @@ class UserFileOut(BaseModel):
11641162
hash: str
11651163
size: int
11661164

1167-
originalFilename: str
11681165
mime: str
1169-
userid: UUID
1170-
userName: str
1171-
created: datetime
11721166

11731167

11741168
# ============================================================================
1175-
class PublicUserFileOut(BaseModel):
1176-
"""public output for user-uploaded file stored on other document
1177-
1178-
Used for collection thumbnails.
1179-
Should merge with UserUploadFile models below (used to store files in
1180-
distinct files mongo collection) eventually.
1169+
class UserFileOut(PublicUserFileOut):
1170+
"""output for user-uploaded file as stored on other document,
1171+
additional non-public fields included
11811172
Conforms to Data Resource Spec.
11821173
"""
11831174

1184-
name: str
1185-
path: str
1186-
hash: str
1187-
size: int
1188-
1175+
originalFilename: str
11891176
mime: str
1177+
userid: UUID
1178+
userName: str
1179+
created: datetime
11901180

11911181

11921182
# ============================================================================
11931183
class UserFile(BaseFile):
11941184
"""User-uploaded file stored on anther mongo document
11951185
1196-
Used for collection thumbnails.
1197-
Should merge with UserUploadFile models below (used to store files in
1198-
distinct files mongo collection) eventually.
1186+
Base user uploaded file (currently used for collection thumbnails).
11991187
Conforms to Data Resource Spec.
12001188
"""
12011189

@@ -1205,16 +1193,20 @@ class UserFile(BaseFile):
12051193
userName: str
12061194
created: datetime
12071195

1196+
async def get_absolute_presigned_url(
1197+
self, org, storage_ops, headers: Optional[dict]
1198+
) -> str:
1199+
"""Get presigned URL as absolute URL"""
1200+
presigned_url, _ = await storage_ops.get_presigned_url(org, self)
1201+
return storage_ops.resolve_relative_access_path(presigned_url, headers) or ""
1202+
12081203
async def get_file_out(
12091204
self, org, storage_ops, headers: Optional[dict] = None
12101205
) -> UserFileOut:
12111206
"""Get UserFileOut with new presigned url"""
1212-
presigned_url, _ = await storage_ops.get_presigned_url(org, self)
1213-
presigned_url = storage_ops.resolve_relative_access_path(presigned_url, headers)
1214-
12151207
return UserFileOut(
12161208
name=self.filename,
1217-
path=presigned_url or "",
1209+
path=await self.get_absolute_presigned_url(org, storage_ops, headers),
12181210
hash=self.hash,
12191211
size=self.size,
12201212
originalFilename=self.originalFilename,
@@ -1228,12 +1220,9 @@ async def get_public_file_out(
12281220
self, org, storage_ops, headers: Optional[dict] = None
12291221
) -> PublicUserFileOut:
12301222
"""Get PublicUserFileOut with new presigned url"""
1231-
presigned_url, _ = await storage_ops.get_presigned_url(org, self)
1232-
presigned_url = storage_ops.resolve_relative_access_path(presigned_url, headers)
1233-
12341223
return PublicUserFileOut(
12351224
name=self.filename,
1236-
path=presigned_url or "",
1225+
path=await self.get_absolute_presigned_url(org, storage_ops, headers),
12371226
hash=self.hash,
12381227
size=self.size,
12391228
mime=self.mime,
@@ -1281,8 +1270,8 @@ def get_user_file(
12811270

12821271

12831272
# ============================================================================
1284-
class UserUploadFileOut(UserFileOut):
1285-
"""Output model for all user-uploaded files stored in files mongo collection"""
1273+
class SeedFileOut(UserFileOut):
1274+
"""Output model for user-uploaded seed files"""
12861275

12871276
id: UUID
12881277
oid: UUID
@@ -1293,26 +1282,26 @@ class UserUploadFileOut(UserFileOut):
12931282

12941283

12951284
# ============================================================================
1296-
class UserUploadFile(UserFile, BaseMongoModel):
1297-
"""User-uploaded file saved in files mongo collection"""
1285+
class SeedFile(UserFile, BaseMongoModel):
1286+
"""Stores user-uploaded file files in 'file_uploads' mongo collection
1287+
Used with crawl workflows
1288+
"""
1289+
1290+
type: Literal["seedFile"] = "seedFile"
12981291

12991292
id: UUID
13001293
oid: UUID
1301-
type: str
13021294

13031295
firstSeed: Optional[str] = None
13041296
seedCount: Optional[int] = None
13051297

13061298
async def get_file_out(
13071299
self, org, storage_ops, headers: Optional[dict] = None
1308-
) -> UserUploadFileOut:
1309-
"""Get UserUploadFileOut with new presigned url"""
1310-
presigned_url, _ = await storage_ops.get_presigned_url(org, self)
1311-
presigned_url = storage_ops.resolve_relative_access_path(presigned_url, headers)
1312-
1313-
return UserUploadFileOut(
1300+
) -> SeedFileOut:
1301+
"""Get SeedFileOut with new presigned url"""
1302+
return SeedFileOut(
13141303
name=self.filename,
1315-
path=presigned_url or "",
1304+
path=await self.get_absolute_presigned_url(org, storage_ops, headers),
13161305
hash=self.hash,
13171306
size=self.size,
13181307
originalFilename=self.originalFilename,
@@ -1328,13 +1317,6 @@ async def get_file_out(
13281317
)
13291318

13301319

1331-
# ============================================================================
1332-
class SeedFile(UserUploadFile):
1333-
"""Seed file for crawl workflows"""
1334-
1335-
type: Literal["seedFile"] = "seedFile"
1336-
1337-
13381320
# ============================================================================
13391321

13401322
### PAGES ###
@@ -3061,7 +3043,7 @@ class PaginatedUserOutResponse(PaginatedResponse):
30613043
class PaginatedUserFileResponse(PaginatedResponse):
30623044
"""Response model for user-uploaded files (e.g. seed files)"""
30633045

3064-
items: List[UserUploadFileOut]
3046+
items: List[SeedFileOut]
30653047

30663048

30673049
# ============================================================================

backend/btrixcloud/storages.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@
5656
PresignedUrl,
5757
SuccessResponse,
5858
User,
59-
UserUploadFile,
6059
)
6160

6261
from .utils import slug_from_name, dt_now, get_origin
@@ -630,7 +629,7 @@ async def get_presigned_urls_bulk(
630629
return urls, now + self.signed_duration_delta
631630

632631
async def delete_file_object(
633-
self, org: Organization, crawlfile: Union[BaseFile, UserUploadFile]
632+
self, org: Organization, crawlfile: Union[BaseFile]
634633
) -> bool:
635634
"""delete crawl file from storage."""
636635
return await self._delete_file(org, crawlfile.filename, crawlfile.storage)

0 commit comments

Comments
 (0)