Skip to content

Commit 1ec08e6

Browse files
authored
Merge pull request #357 from theislab/scrinshot_probe_designer
adapter tutorials to new framework
2 parents 1ea8b70 + 3c6ffe2 commit 1ec08e6

File tree

4 files changed

+1481
-345
lines changed

4 files changed

+1481
-345
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#######################
2+
### BASIC PARAMETERS ###
3+
#######################
4+
5+
### General parameters
6+
dir_output: output_genomic_region_generator_ncbi # name of the directory where the output files will be written
7+
8+
### Parameters for genome and gene annotation
9+
source: ncbi # required: indicate that ncbi annotation should be used
10+
source_params:
11+
taxon: vertebrate_mammalian # required: taxon of the species, valid taxa are: archaea, bacteria, fungi, invertebrate, mitochondrion, plant, plasmid, plastid, protozoa, vertebrate_mammalian, vertebrate_other, viral
12+
species: Homo_sapiens # required: species name in NCBI download format, e.g. 'Homo_sapiens' for human; see https://ftp.ncbi.nlm.nih.gov/genomes/refseq/ for available species name
13+
annotation_release: 110 # required: release number of annotation e.g. '109' or '109.20211119' or 'current' to use most recent annotation release. Check out release numbers for NCBI at ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/annotation/annotation_releases/
14+
15+
### Parameters for sequences generation
16+
# List of genomic regions that should be generated, set the genomic regions you want to generate to True
17+
genomic_regions:
18+
gene: false
19+
exon: true
20+
exon_exon_junction: true
21+
cds: false
22+
intron: false
23+
24+
# If exon_exon_junction is ste to true, specify the block size, i.e. +/- "block_size" bp around the junction
25+
# Hint: it does not make sense to set the block size larger than the maximum oligo length
26+
exon_exon_junction_block_size: 50
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
#######################
2+
### USER PARAMETERS ###
3+
#######################
4+
5+
### General parameters
6+
### -----------------------------------------------
7+
n_jobs: 4 # number of cores used to run the pipeline and 2*n_jobs +1 of regions that should be stored in cache. If memory consumption of pipeline is too high reduce this number, if a lot of RAM is available increase this number to decrease runtime
8+
dir_output: output_scrinshot_probe_designer # name of the directory where the output files will be written
9+
write_intermediate_steps: true # if true, writes the oligo sequences after each step of the pipeline into a csv file
10+
11+
### Parameters for probe sequences generation
12+
### -----------------------------------------------
13+
file_regions: my_genes.txt # file with a list the genes used to generate the oligos sequences, leave empty if all the genes are used
14+
files_fasta_probe_database: # fasta file with sequences form which the oligos should be generated. Hint: use the genomic_region_generator pipeline to create fasta files of genomic regions of interest
15+
- output_genomic_region_generator_ncbi/annotation/exon_annotation_source-NCBI_species-Homo_sapiens_annotation_release-110_genome_assemly-GRCh38.p14.fna
16+
- output_genomic_region_generator_ncbi/annotation/exon_exon_junction_annotation_source-NCBI_species-Homo_sapiens_annotation_release-110_genome_assemly-GRCh38.p14.fna
17+
probe_length_min: 40 #min length of oligos
18+
probe_length_max: 45 #max length of oligos
19+
20+
### Parameters for the property filers, i.e. properties that the sequences should fulfill
21+
### -----------------------------------------------
22+
## target probe sequence
23+
probe_GC_content_min: 40 # minimum GC content of oligos
24+
probe_GC_content_max: 60 # maximum GC content of oligos
25+
probe_Tm_min: 65 # minimum melting temperature of oligos
26+
probe_Tm_max: 75 # maximum melting temperature of oligos
27+
homopolymeric_base_n: # minimum number of nucleotides to consider it a homopolymeric run per base
28+
A: 5
29+
T: 5
30+
C: 5
31+
G: 5
32+
## padlock arms
33+
arm_Tm_dif_max: 2 # maximum melting temperature difference of both arms (difference shouldn't be higher than 5! But range is not super important, the lower the better)
34+
arm_length_min: 10 # minimum length of each arm
35+
arm_Tm_min: 50 # minimum melting temperature of each arm
36+
arm_Tm_max: 60 # maximum melting temperature of each arm
37+
## detection oligos
38+
min_thymines: 2 # minimal number of Thymines in detection oligo.
39+
detect_oligo_length_min: 15 # minimum length of detection probe
40+
detect_oligo_length_max: 40 # maximum length of detection probe
41+
42+
### Parameters for the specificity filters
43+
### -----------------------------------------------
44+
files_fasta_reference_database: # fasta file with sequences used as reference for the specificity filters. Hint: use the genomic_region_generator pipeline to create fasta files of genomic regions of interest
45+
- output_genomic_region_generator_ncbi/annotation/exon_annotation_source-NCBI_species-Homo_sapiens_annotation_release-110_genome_assemly-GRCh38.p14.fna
46+
- output_genomic_region_generator_ncbi/annotation/exon_exon_junction_annotation_source-NCBI_species-Homo_sapiens_annotation_release-110_genome_assemly-GRCh38.p14.fna
47+
ligation_region_size: 5 # size of the seed region around the ligation site for blast seed region filter; set to 0 if ligation region should not be considered for blast search
48+
49+
### Parameters for set selection
50+
### -----------------------------------------------
51+
probe_isoform_weight: 2 # weight of the isoform consensus of the probe in the efficiency score
52+
probe_GC_content_opt: 50 # max and min values are defiend above
53+
probe_GC_weight: 1 # weight of the GC content of the probe in the efficiency score
54+
probe_Tm_opt: 70 # max and min values are defiend above
55+
probe_Tm_weight: 1 # weight of the Tm of the probe in the efficiency score
56+
57+
probeset_size_min: 3 # minimum size of probe sets (in case there exist no set of the optimal size) -> genes with less oligos will be filtered out and stored in regions_with_insufficient_oligos_for_db_probes
58+
probeset_size_opt: 5 # optimal size of probe sets
59+
distance_between_probes: 0 # how much overlap should be allowed between oligos, e.g. if oligos can overlpap x bases choose -x, if oligos can be next to one another choose 0, if oligos should be x bases apart choose x
60+
n_sets: 100 # maximum number of sets to generate
61+
62+
### Parameters for final sequence design
63+
### -----------------------------------------------
64+
U_distance: 5 # preferred minimal distance between U(racils)
65+
detect_oligo_Tm_opt: 56 # optimal melting temperature of detection probe
66+
top_n_sets: 3 #maximum number of sets to report in padlock_probes.yaml and "padlock_probes_order.yaml"
67+
68+
############################
69+
### DEVELOPER PARAMETERS ###
70+
############################
71+
72+
### Parameters for the specificity filters
73+
### -----------------------------------------------
74+
# Specificity filter with BlastN
75+
specificity_blastn_search_parameters:
76+
perc_identity: 80
77+
strand: "minus" # this parameter is fixed, if reference is whole genome, consider using "both"
78+
word_size: 10
79+
dust: "no"
80+
soft_masking: "false"
81+
max_target_seqs: 10
82+
max_hsps: 1000
83+
specificity_blastn_hit_parameters:
84+
coverage: 50 # can be turned into min_alignment_length
85+
86+
# Crosshybridization filter with BlastN
87+
cross_hybridization_blastn_search_parameters:
88+
perc_identity: 80
89+
strand: "minus" # this parameter is fixed
90+
word_size: 10
91+
dust: "no"
92+
soft_masking: "false"
93+
max_target_seqs: 10
94+
cross_hybridization_blastn_hit_parameters:
95+
coverage: 80 # can be turned into min_alignment_length
96+
97+
98+
### Parameters for the Oligo set selection
99+
### -----------------------------------------------
100+
max_graph_size: 5000 # maximum number of oligos that are taken into consisderation in the last step (5000 -> ~5GB, 2500 -> ~1GB)
101+
102+
103+
### Parameters for Melting Temperature
104+
### -----------------------------------------------
105+
# The melting temperature is used in 2 different stages (property filters and padlock detection probe design), where a few parameters are shared and the others differ.
106+
# parameters for melting temperature -> for more information on parameters, see: https://biopython.org/docs/1.75/api/Bio.SeqUtils.MeltingTemp.html#Bio.SeqUtils.MeltingTemp.Tm_NN
107+
## target probe
108+
Tm_parameters_probe:
109+
check: true #default
110+
strict: true #default
111+
c_seq: null #default
112+
shift: 0 #default
113+
nn_table: DNA_NN3 # Allawi & SantaLucia (1997)
114+
tmm_table: DNA_TMM1 #default
115+
imm_table: DNA_IMM1 #default
116+
de_table: DNA_DE1 #default
117+
dnac1: 50 #[nM]
118+
dnac2: 0 #[nM]
119+
selfcomp: false #default
120+
saltcorr: 7 # Owczarzy et al. (2008)
121+
Na: 39 #[mM]
122+
K: 75 #[mM]
123+
Tris: 20 #[mM]
124+
Mg: 10 #[mM]
125+
dNTPs: 0 #[mM] default
126+
127+
Tm_chem_correction_param_probe:
128+
DMSO: 0 #default
129+
fmd: 20
130+
DMSOfactor: 0.75 #default
131+
fmdfactor: 0.65 #default
132+
fmdmethod: 1 #default
133+
GC: null #default
134+
135+
Tm_salt_correction_param_probe: null # if salt correction desired, please add parameters below
136+
137+
## detection oligo
138+
Tm_parameters_detection_oligo:
139+
check: true #default
140+
strict: true #default
141+
c_seq: null #default
142+
shift: 0 #default
143+
nn_table: DNA_NN3 # Allawi & SantaLucia (1997)
144+
tmm_table: DNA_TMM1 #default
145+
imm_table: DNA_IMM1 #default
146+
de_table: DNA_DE1 #default
147+
dnac1: 50 #[nM]
148+
dnac2: 0 #[nM]
149+
selfcomp: false #default
150+
saltcorr: 7 # Owczarzy et al. (2008)
151+
Na: 39 #[mM]
152+
K: 0 #[mM] default
153+
Tris: 0 #[mM] default
154+
Mg: 0 #[mM] default
155+
dNTPs: 0 #[mM] default
156+
157+
Tm_chem_correction_param_detection_oligo:
158+
DMSO: 0 #default
159+
fmd: 30
160+
DMSOfactor: 0.75 #default
161+
fmdfactor: 0.65 #default
162+
fmdmethod: 1 #default
163+
GC: null #default
164+
165+
Tm_salt_correction_param_detection_oligo: null # if salt correction desired, please add parameters below

0 commit comments

Comments
 (0)