Skip to content

Commit 93f31a9

Browse files
feat: fetch datasets from resp. dataset repo branch; enable it on vercel
1 parent 92c41d1 commit 93f31a9

File tree

9 files changed

+88
-26
lines changed

9 files changed

+88
-26
lines changed

.env.example

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ SYNC_DESTINATION=123.456.789.123:~/nextclade
3232
DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org/v3
3333
# DATA_FULL_DOMAIN=http://localhost:27722
3434

35+
# If enabled, Nextclade Web will first attempt to fetch datasets from the corresponding GitHub branch. If this attempt
36+
# fails, it will use `DATA_FULL_DOMAIN` as usual.
37+
DATA_TRY_GITHUB_BRANCH=0
38+
3539
# Directory path (relative to the root of the project) from which local data server takes the data.
3640
# Useful for local testing on new datasets. See: https://github.com/neherlab/nextclade_data
3741
# It is recommended to keep the `nextclade_data` git repo in a sibling directory of `nextclade` git repo.

packages_rs/nextclade-web/config/next/lib/getEnvVars.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export function getEnvVars() {
1010
const DOMAIN = getDomain()
1111
const DOMAIN_STRIPPED = DOMAIN.replace('https://', '').replace('http://', '')
1212
const DATA_FULL_DOMAIN = getenv('DATA_FULL_DOMAIN')
13+
const DATA_TRY_GITHUB_BRANCH = getenv('DATA_TRY_GITHUB_BRANCH')
1314

1415
const common = {
1516
BABEL_ENV,
@@ -20,6 +21,7 @@ export function getEnvVars() {
2021
DOMAIN,
2122
DOMAIN_STRIPPED,
2223
DATA_FULL_DOMAIN,
24+
DATA_TRY_GITHUB_BRANCH,
2325
}
2426

2527
if (PRODUCTION) {

packages_rs/nextclade-web/config/next/next.config.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ const {
4646
DOMAIN,
4747
DOMAIN_STRIPPED,
4848
DATA_FULL_DOMAIN,
49+
DATA_TRY_GITHUB_BRANCH,
4950
} = getEnvVars()
5051

5152
const BRANCH_NAME = getGitBranch()
@@ -61,6 +62,7 @@ const clientEnv = {
6162
DOMAIN,
6263
DOMAIN_STRIPPED,
6364
DATA_FULL_DOMAIN,
65+
DATA_TRY_GITHUB_BRANCH,
6466
BLOCK_SEARCH_INDEXING: DOMAIN === RELEASE_URL ? '0' : '1',
6567
}
6668

packages_rs/nextclade-web/src/constants.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ export const URL_GITHUB_COMMITS = 'https://github.com/nextstrain/nextclade/commi
3131
export const URL_CLADE_SCHEMA_REPO = 'https://github.com/nextstrain/ncov-clades-schema/'
3232
export const URL_CLADE_SCHEMA_SVG = 'https://raw.githubusercontent.com/nextstrain/ncov-clades-schema/master/clades.svg'
3333

34+
export const URL_GITHUB_DATA_RAW = 'https://raw.githubusercontent.com/nextstrain/nextclade_data' as const
35+
3436
export const SUPPORT_EMAIL = 'hello@nextstrain.org'
3537

3638
export const TWITTER_USERNAME_RAW = 'nextstrain' as const

packages_rs/nextclade-web/src/io/axiosFetch.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,14 @@ export async function axiosHead<TData = unknown>(
8585

8686
return res.data as TData
8787
}
88+
89+
export async function axiosHeadOrUndefined<TData = unknown>(
90+
url: string | undefined,
91+
options?: AxiosRequestConfig,
92+
): Promise<TData | undefined> {
93+
try {
94+
return await axiosHead<TData>(url, options)
95+
} catch {
96+
return undefined
97+
}
98+
}

packages_rs/nextclade-web/src/io/fetchDatasets.ts

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
/* eslint-disable prefer-destructuring */
12
import type { ParsedUrlQuery } from 'querystring'
23
import { findSimilarStrings } from 'src/helpers/string'
4+
import { axiosHeadOrUndefined } from 'src/io/axiosFetch'
5+
import { isGithubUrlOrShortcut, parseGitHubRepoUrlOrShortcut } from 'src/io/fetchSingleDatasetFromGithub'
36

47
import { Dataset } from 'src/types'
58
import {
@@ -10,9 +13,11 @@ import {
1013
} from 'src/io/fetchDatasetsIndex'
1114
import { getQueryParamMaybe } from 'src/io/getQueryParamMaybe'
1215
import { useRecoilValue, useSetRecoilState } from 'recoil'
13-
import { datasetCurrentAtom, datasetsAtom, datasetServerUrlAtom, datasetUpdatedAtom } from 'src/state/dataset.state'
16+
import { datasetCurrentAtom, datasetsAtom, datasetUpdatedAtom } from 'src/state/dataset.state'
1417
import { useQuery } from 'react-query'
1518
import { isNil } from 'lodash'
19+
import urljoin from 'url-join'
20+
import { URL_GITHUB_DATA_RAW } from 'src/constants'
1621

1722
export async function getDatasetFromUrlParams(urlQuery: ParsedUrlQuery, datasets: Dataset[]) {
1823
// Retrieve dataset-related URL params and try to find a dataset based on these params
@@ -41,8 +46,60 @@ export async function getDatasetFromUrlParams(urlQuery: ParsedUrlQuery, datasets
4146
return dataset
4247
}
4348

44-
export async function initializeDatasets(urlQuery: ParsedUrlQuery, datasetServerUrlDefault: string) {
45-
const datasetServerUrl = getQueryParamMaybe(urlQuery, 'dataset-server') ?? datasetServerUrlDefault
49+
export async function getGithubDatasetServerUrl(): Promise<string | undefined> {
50+
const BRANCH_NAME = process.env.BRANCH_NAME
51+
if (!BRANCH_NAME) {
52+
return undefined
53+
}
54+
55+
const githubDatasetServerUrl = urljoin(URL_GITHUB_DATA_RAW, BRANCH_NAME)
56+
const githubIndexJsonUrl = urljoin(githubDatasetServerUrl, 'data_output', 'index.json')
57+
58+
if (await axiosHeadOrUndefined(githubIndexJsonUrl)) {
59+
return githubDatasetServerUrl
60+
}
61+
62+
return undefined
63+
}
64+
65+
export function toAbsoluteUrl(url: string): string {
66+
if (typeof window !== 'undefined' && url.slice(0) === '/') {
67+
return urljoin(window.location.origin, url)
68+
}
69+
return url
70+
}
71+
72+
export async function getDatasetServerUrl(urlQuery: ParsedUrlQuery) {
73+
// Get dataset URL from query URL params.
74+
let datasetServerUrl = getQueryParamMaybe(urlQuery, 'dataset-server')
75+
76+
// If the URL is formatted as a GitHub URL or as a GitHub URL shortcut, use it without any checking
77+
if (datasetServerUrl && isGithubUrlOrShortcut(datasetServerUrl)) {
78+
const { owner, repo, branch, path } = await parseGitHubRepoUrlOrShortcut(datasetServerUrl)
79+
return urljoin('https://raw.githubusercontent.com', owner, repo, branch, path)
80+
}
81+
82+
// If requested to try GitHub-hosted datasets either using `DATA_TRY_GITHUB_BRANCH` env var (e.g. from
83+
// `.env` file), or using `&dataset-server=gh` or `&dataset-server=github` URL parameters, then check if the
84+
// corresponding branch in the default data repo on GitHub contains an `index.json` file. And and if yes, use it.
85+
const datasetServerTryGithubBranch =
86+
process.env.DATA_TRY_GITHUB_BRANCH === '1' || (datasetServerUrl && ['gh', 'github'].includes(datasetServerUrl))
87+
if (datasetServerTryGithubBranch) {
88+
const githubDatasetServerUrl = await getGithubDatasetServerUrl()
89+
if (githubDatasetServerUrl) {
90+
datasetServerUrl = githubDatasetServerUrl
91+
}
92+
}
93+
94+
// If none of the above, use hardcoded default URL (from `.env` file)
95+
datasetServerUrl = datasetServerUrl ?? process.env.DATA_FULL_DOMAIN ?? '/'
96+
97+
// If the URL happens to be a relative path, then convert to absolute URL (on the app's current host)
98+
return toAbsoluteUrl(datasetServerUrl)
99+
}
100+
101+
export async function initializeDatasets(urlQuery: ParsedUrlQuery) {
102+
const datasetServerUrl = await getDatasetServerUrl(urlQuery)
46103

47104
const datasetsIndexJson = await fetchDatasetsIndex(datasetServerUrl)
48105

@@ -57,11 +114,10 @@ export async function initializeDatasets(urlQuery: ParsedUrlQuery, datasetServer
57114
/** Refetch dataset index periodically and update the local copy of if */
58115
export function useUpdatedDatasetIndex() {
59116
const setDatasetsState = useSetRecoilState(datasetsAtom)
60-
const datasetServerUrl = useRecoilValue(datasetServerUrlAtom)
61117
useQuery(
62118
'refetchDatasetIndex',
63119
async () => {
64-
const { currentDataset: _, ...datasetsState } = await initializeDatasets({}, datasetServerUrl)
120+
const { currentDataset: _, ...datasetsState } = await initializeDatasets({})
65121
setDatasetsState(datasetsState)
66122
},
67123
{
@@ -89,10 +145,9 @@ export function useUpdatedDataset() {
89145
'currentDatasetState',
90146
async () => {
91147
const path = datasetCurrent?.path
92-
const refAccession = datasetCurrent?.attributes.reference.value
93148
const updatedAt = datasetCurrent?.version?.updatedAt
94-
if (!isNil(refAccession) && !isNil(updatedAt)) {
95-
const candidateDatasets = filterDatasets(datasets, path, refAccession)
149+
if (!isNil(updatedAt)) {
150+
const candidateDatasets = filterDatasets(datasets, path)
96151
const updatedDataset = candidateDatasets.find((candidate) => {
97152
const candidateTag = candidate.version?.updatedAt
98153
return candidateTag && candidateTag > updatedAt

packages_rs/nextclade-web/src/pages/_app.tsx

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ import { SEO } from 'src/components/Common/SEO'
4848
import { Plausible } from 'src/components/Common/Plausible'
4949
import i18n, { changeLocale, getLocaleWithKey } from 'src/i18n/i18n'
5050
import { theme } from 'src/theme'
51-
import { datasetCurrentAtom, datasetsAtom, datasetServerUrlAtom } from 'src/state/dataset.state'
51+
import { datasetCurrentAtom, datasetsAtom } from 'src/state/dataset.state'
5252
import { ErrorBoundary } from 'src/components/Error/ErrorBoundary'
5353
import { PreviewWarning } from 'src/components/Common/PreviewWarning'
5454

@@ -99,8 +99,7 @@ export function RecoilStateInitializer() {
9999
return datasetInfo
100100
}
101101

102-
const datasetServerUrlDefault = await getPromise(datasetServerUrlAtom)
103-
return initializeDatasets(urlQuery, datasetServerUrlDefault)
102+
return initializeDatasets(urlQuery)
104103
})
105104
.catch((error) => {
106105
// Dataset error is fatal and we want error to be handled in the ErrorBoundary

packages_rs/nextclade-web/src/state/dataset.state.ts

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import { isNil } from 'lodash'
22
import { atom, DefaultValue, selector } from 'recoil'
3-
import urljoin from 'url-join'
43

54
import type { Dataset } from 'src/types'
65
// import { GENE_OPTION_NUC_SEQUENCE } from 'src/constants'
@@ -10,20 +9,6 @@ import { persistAtom } from 'src/state/persist/localStorage'
109
import { isDefaultValue } from 'src/state/utils/isDefaultValue'
1110
import { areDatasetsEqual } from 'src/types'
1211

13-
export function getDefaultDatasetServer(): string {
14-
let datasetServerUrl = process.env.DATA_FULL_DOMAIN ?? '/'
15-
// Add HTTP Origin if datasetServerUrl is a relative path (start with '/')
16-
if (typeof window !== 'undefined' && datasetServerUrl.slice(0) === '/') {
17-
datasetServerUrl = urljoin(window.location.origin, datasetServerUrl)
18-
}
19-
return datasetServerUrl
20-
}
21-
22-
export const datasetServerUrlAtom = atom<string>({
23-
key: 'datasetServerUrl',
24-
default: getDefaultDatasetServer(),
25-
})
26-
2712
export interface Datasets {
2813
datasets: Dataset[]
2914
}

scripts/build_on_vercel.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ sed -i'' "s|PROD_ENABLE_TYPE_CHECKS=1|PROD_ENABLE_TYPE_CHECKS=0|g" .env
115115
sed -i'' "s|PROD_ENABLE_ESLINT=1|PROD_ENABLE_ESLINT=0|g" .env
116116
sed -i'' "s|PROD_ENABLE_STYLELINT=1|PROD_ENABLE_STYLELINT=0|g" .env
117117
118+
sed -i'' "s|DATA_TRY_GITHUB_BRANCH=0|DATA_TRY_GITHUB_BRANCH=1|g" .env
119+
118120
cd packages_rs/nextclade-web
119121
120122
yarn install --frozen-lockfile

0 commit comments

Comments
 (0)