Skip to content

Commit 11e4dbf

Browse files
authored
Merge pull request #5 from Lattice-Automation/modernize3
Modernize, simplify
2 parents b29cb7f + dd185f6 commit 11e4dbf

File tree

137 files changed

+4496
-2314
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

137 files changed

+4496
-2314
lines changed

.gitignore

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,13 @@ dist
2424
*.gz
2525
*.zip
2626

27-
# test output files
28-
test/output
29-
3027
# doc site build output
3128
docs/_site
3229

3330
# mypy
3431
.mypy_cache
32+
33+
# go mod vendor
34+
vendor
35+
36+
.vscode

Dockerfile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
FROM golang:1.18.3
2+
3+
RUN apt-get update && \
4+
apt-get install -y --no-install-recommends \
5+
ncbi-blast+ \
6+
primer3
7+
8+
WORKDIR $HOME/src
9+
ADD . .
10+
RUN go install ./cmd/repp
11+
ENTRYPOINT ["repp"]

Makefile

Lines changed: 24 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1,97 +1,33 @@
1-
LOCAL_BIN=/usr/local/bin
2-
APP=${LOCAL_BIN}/repp
3-
APP_DATA=$${HOME}/.repp
4-
SETTINGS=./config/config.yaml
5-
6-
NAME=repp
7-
VERSION=0.1.0
8-
9-
DIST_WIN_ZIP=${NAME}_windows_${VERSION}.zip
10-
DIST_SRC=${NAME}_src_${VERSION}
11-
DIST_SRC_TAR=${DIST_SRC}.tar.gz
12-
13-
PLATFORM:=$(shell uname)
14-
15-
.PHONY: test dist docs
16-
.DEFAULT_GOAL: build
17-
18-
ifeq ($(PLATFORM),Windows_NT)
19-
$(error Windows not supported via make)
20-
endif
21-
22-
build:
23-
go get -d
24-
env GOOS=linux go build -o ./bin/linux -v
25-
env GOOS=darwin go build -o ./bin/darwin -v
26-
env GOOS=windows go build -o ./bin/repp.exe -v
1+
.PHONY: build
2+
build: fmt lint
3+
go mod tidy
4+
go mod vendor
5+
go build -o ./bin/repp ./cmd/repp
276

7+
.PHONY: install
288
install:
29-
mkdir -p $(APP_DATA)
30-
31-
cp $(SETTINGS) $(APP_DATA)/config.yaml
32-
cp -r ./vendor/primer3_config $(APP_DATA)
33-
cp -r ./assets/addgene/db/** $(APP_DATA)
34-
cp -r ./assets/igem/db/** $(APP_DATA)
35-
cp -r ./assets/dnasu/db/** $(APP_DATA)
36-
cp ./assets/snapgene/features.tsv $(APP_DATA)
37-
cp ./assets/neb/enzymes.tsv $(APP_DATA)
38-
39-
ifeq ($(PLATFORM),Linux)
40-
install ./bin/linux $(APP)
41-
install -C ./vendor/linux/blastn $(LOCAL_BIN)
42-
install -C ./vendor/linux/ntthal $(LOCAL_BIN)
43-
install -C ./vendor/linux/primer3_core $(LOCAL_BIN)
44-
install -C ./vendor/linux/blastdbcmd $(LOCAL_BIN)
45-
endif
9+
go install ./cmd/repp
4610

47-
ifeq ($(PLATFORM),Darwin)
48-
install ./bin/darwin $(APP)
49-
install -C ./vendor/darwin/blastn $(LOCAL_BIN)
50-
install -C ./vendor/darwin/ntthal $(LOCAL_BIN)
51-
install -C ./vendor/darwin/primer3_core $(LOCAL_BIN)
52-
install -C ./vendor/darwin/blastdbcmd $(LOCAL_BIN)
53-
endif
11+
.PHONY: image
12+
image:
13+
docker build -t jjtimmons/repp:latest .
5414

55-
windows: build
56-
cd scripts && makensis windows_installer.nsi
15+
image/push: image
16+
docker push jjtimmons/repp:latest
5717

58-
all: build install
18+
.PHONY: test
19+
test:
20+
go test -timeout 200s ./internal/...
5921

60-
dbs:
61-
cd assets && sh makeblastdbs.sh
62-
63-
uninstall: clean
64-
rm $(APP)
65-
rm -rf $(APP_DATA)
66-
67-
test: all
68-
go test -timeout 200s ./internal/repp
69-
70-
dist-dir:
71-
mkdir -p ${DIST_SRC}
72-
rsync -r --delete\
73-
--exclude={'.git','dist','test','scripts','bin/repp_install.exe','bin/repp.exe','vendor/windows','assets/addgene/addgene.json','assets/dnasu/DNASU*','assets/igem/xml*','assets/neb/*/'}\
74-
. ${DIST_SRC}
75-
tar -czf ${DIST_SRC_TAR} ${DIST_SRC}
76-
rm -rf ${DIST_SRC}
77-
78-
dist: windows dist-dir
79-
cp ./README.md ./docs/index.md
80-
81-
zip ${DIST_WIN_ZIP} ./bin/repp_install.exe
82-
83-
scp ${DIST_SRC_TAR} jjtimmons@frs.sourceforge.net:/home/frs/project/repplasmid/
84-
scp ${DIST_WIN_ZIP} jjtimmons@frs.sourceforge.net:/home/frs/project/repplasmid/
22+
.PHONY: docs
23+
docs:
24+
cd docs && make
8525

86-
rm ${DIST_SRC_TAR}
87-
rm ${DIST_WIN_ZIP}
26+
docs/serve: docs
27+
cd docs && make serve
8828

89-
docs:
90-
go run . docs
91-
cp README.md ./docs/index.md
92-
cd docs && bundle exec just-the-docs rake search:init
93-
find ./docs -name *make* -type f -exec sed -i -e 's/\/Users\/josh/~/g' {} \;
94-
rm ./docs/*-e
29+
fmt:
30+
gofmt -l ./internal
9531

96-
docs-dev: docs
97-
cd docs && bundle exec jekyll serve
32+
lint:
33+
golangci-lint run

README.md

Lines changed: 56 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,76 @@
11
<img src="https://user-images.githubusercontent.com/13923102/72353248-b90d7680-36b1-11ea-8714-3249a887b156.png" width="650" margin="0 auto 10px auto" />
22

3-
`REPP` is a tool for DNA assembly. It takes a target plasmid and finds the least expensive combination of fragments from user and public repositories to create it via Gibson Assembly.
3+
`repp` is a tool for DNA assembly. It takes a target plasmid and finds the least expensive combination of fragments from user and public repositories to create it via Gibson Assembly.
44

55
Biologists profit when they can re-use DNA during plasmid design: it enables cheaper designs and faster builds. But parsing through all re-usable DNA is completely infeasible. For example, there are over 75,000 plasmids in Addgene -- the likelihood of knowing the best combination and ordering of sub-sequences from Addgene for a given plasmid design is low.
66

7-
`REPP` enables such plasmid design. It turns plasmid specifications into designs using the least expensive design with both existing DNA fragments (PCR) and newly synthesized DNA fragments. Plasmids are specifiable using their target sequence, features, or sub-fragments.
7+
`repp` does such plasmid design. It turns specifications into assembly plans that use the least expensive combination of existing (PCR) and newly synthesized DNA fragments. Target plasmids are specifiable using their target sequence, features, or sub-fragments.
88

99
## Publication
1010

11-
We published a paper about REPP in PLOS One: [Timmons, J.J. & Densmore D. Repository-based plasmid design. PLOS One.](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0223935) We used it to build thousands of plasmids from iGEM and Addgene and showed that it reduced the cost of plasmid design as compared to synthesis.
11+
We published a paper about `repp` in PLOS One: [Timmons, J.J. & Densmore D. Repository-based plasmid design. PLOS One.](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0223935) We used it to build thousands of plasmids from iGEM and Addgene and showed that it reduced the cost of plasmid design as compared to synthesis.
12+
13+
## Examples
14+
15+
See [/examples](/examples) to see input/output from `repp`.
16+
17+
<br>
18+
19+
![https://user-images.githubusercontent.com/13923102/72355113-d55ee280-36b4-11ea-8663-f5759cd7597b.png](https://user-images.githubusercontent.com/13923102/72355113-d55ee280-36b4-11ea-8663-f5759cd7597b.png)
20+
21+
## Documentation
22+
23+
See [the docs](https://lattice-automation.github.io/repp/) or `--help` for any command.
1224

1325
## Installation
1426

15-
Download links are available at SourceForge: [https://sourceforge.net/projects/repplasmid/files/](https://sourceforge.net/projects/repplasmid/files/)
27+
### From Docker
1628

17-
### MacOS/Linux
29+
Run `repp` via Docker:
1830

19-
```bash
20-
wget -O repp_src_0.1.0.tar.gz 'https://sourceforge.net/projects/repplasmid/files/repp_src_0.1.0.tar.gz/download'
21-
tar xzf repp_src_0.1.0.tar.gz
22-
cd repp_src_0.1.0
23-
make install
31+
```sh
32+
mkdir -p $HOME/.repp
33+
alias repp="docker run -i --rm --mount type=bind,src=$HOME/.repp,dst=/root/.repp jjtimmons/repp:latest"
34+
repp --help
2435
```
2536

26-
### Windows
37+
### From Source
2738

28-
1. Download the most recent `repp_windows.*.zip` from [SourceForge](https://sourceforge.net/projects/repplasmid/files/)
29-
2. Unzip
30-
3. Run `repp_install.exe`
39+
Note: `repp` depends on:
3140

32-
## Documentation
41+
- [`Go >= 1.18.0`](https://go.dev/doc/install) for compilation
42+
- [BLAST+](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=Download) for sequence alignment
43+
- [Primer3](https://github.com/primer3-org/primer3) for hairpin detection and off-target primer-binding detection
3344

34-
See [the docs](https://lattice-automation.github.io/repp/) or use `--help` on any command.
45+
```sh
46+
git clone https://github.com/Lattice-Automation/repp.git
47+
cd repp
48+
make install
49+
```
3550

36-
## Examples
51+
## Sequence Databases
3752

38-
See [/examples](/examples) to see input/output from REPP.
53+
`repp` uses sequence databases for plasmid assembly. These are added as FASTA files along with the name and cost per plasmid from that source.
3954

40-
<br>
55+
Some existing FASTA files are maintained in our S3 bucket [`repp`](https://s3.console.aws.amazon.com/s3/buckets/repp?region=us-east-1&tab=objects). Below is a snippet for downloading and installing each via the [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html):
4156

42-
![https://user-images.githubusercontent.com/13923102/72355113-d55ee280-36b4-11ea-8663-f5759cd7597b.png](https://user-images.githubusercontent.com/13923102/72355113-d55ee280-36b4-11ea-8663-f5759cd7597b.png)
57+
```sh
58+
for db in igem addgene dnasu; do
59+
aws s3 cp "s3://repp/$db.fa.gz" .
60+
gzip -d "$db.fa.gz"
61+
done
62+
63+
# add sequence DBs with the cost of ordering a plasmid from each source
64+
repp add database --name igem --cost 0.0 < igem.fa
65+
repp add database --name addgene --cost 65.0 < addgene.fa
66+
repp add database --name dnasu --cost 55.0 < dnasu.fa
67+
```
4368

4469
## Plasmid Design
4570

4671
### Sequence
4772

48-
To design a plasmid based on its expected sequence save it to a FASTA or Genbank file. For example:
73+
To design a plasmid based on its target sequence save it to a FASTA file. For example:
4974

5075
```
5176
>2ndVal_mScarlet-I
@@ -55,20 +80,20 @@ CAACCTTACCAGAGGGCGCCCCAGCTGGCAATTCCGACGTCTAAGAAACCATTATTATCA...
5580
Then call `repp make sequence` to design it. The following example uses Addgene and a local BLAST database `parts_library.fa` as fragment sources:
5681

5782
```bash
58-
repp make sequence --in "./2ndVal_mScarlet-I.fa" --addgene --dbs "parts_library.fa"
83+
repp make sequence --in "./2ndVal_mScarlet-I.fa" --dbs "addgene,parts_library.fa"
5984
```
6085

6186
### Features
6287

63-
To design a plasmid based on the features it should contain, specify the features by name. By default, these should refer to features that are in REPP's feature database (`~/.repp/features.tsv`). Features can also refer to fragments, as in the following example where a plasmid is specified by its constituent list of iGEM parts:
88+
To design a plasmid based on the features it should contain, specify the features by name. By default, these should refer to features that are in `repp`'s feature database (`~/.repp/features.tsv`). Features can also refer to fragments, as in the following example where a plasmid is specified by its constituent list of iGEM parts:
6489

6590
```bash
66-
repp make features "BBa_R0062,BBa_B0034,BBa_C0040,BBa_B0010,BBa_B0012" --backbone pSB1C3 --enzymes "EcoRI,PstI" --igem
91+
repp make features "BBa_R0062,BBa_B0034,BBa_C0040,BBa_B0010,BBa_B0012" --backbone pSB1C3 --enzymes "EcoRI,PstI" --dbs igem
6792
```
6893

6994
### Fragments
7095

71-
To design a plasmid from its constiuent fragments, save them to a multi-FASTA.
96+
To design a plasmid from its constituent fragments, save them to a multi-FASTA.
7297

7398
```txt
7499
>GFP
@@ -83,25 +108,11 @@ And call the file from `repp make fragments`:
83108
repp make fragments --in "./fragments.fa" --out "plasmid.json"
84109
```
85110

86-
### Databases
87-
88-
`REPP` includes three embedded databases from large public repositories: [Addgene](https://www.addgene.org/), [iGEM](http://parts.igem.org/Main_Page), and [DNASU](https://dnasu.org/DNASU/Home.do). Each embedded database and its file path after installation are as follows:
89-
90-
- Addgene, `--addgene`, `~/.repp/addgene`
91-
- DNASU, `--dnasu`, `~/.repp/dnasu`
92-
- iGEM, `--igem`, `~/.repp/igem`
93-
94-
Users can also use their or their lab's fragment databases through the `--dbs` as a list of comma-separated fragment [BLAST databases](https://www.ncbi.nlm.nih.gov/books/NBK279688/). An example of a plasmid design using Addgene, DNASU, and multiple user-defined BLAST repositories is below:
95-
96-
```bash
97-
repp make sequence --in "./2ndVal_mScarlet-I.fa" --addgene --dnasu --dbs "proteins.fa,backbones.fa"
98-
```
99-
100111
### Configuration
101112

102-
The default settings file used by `REPP` is in `~/.repp/config.yaml`. The maximum number of fragments in an assembly, the minimum overlap between adjacent fragments, and cost curves for synthesis are all defined there. Editing this file directly will change the default values used during plasmid designs. For more details, see [configuration](https://jjtimmons.github.io/repp/configuration).
113+
The default settings file used by `repp` is in `~/.repp/config.yaml`. The maximum number of fragments in an assembly, the minimum overlap between adjacent fragments, and cost curves for synthesis are all defined there. Editing this file directly will change the default values used during plasmid designs. For more details, see [configuration](https://lattice-automation.github.io/repp/configuration).
103114

104-
To overwrite some `REPP` settings on a per-design basis, create another YAML file:
115+
To overwrite some `repp` settings on a per-design basis, create another YAML file:
105116

106117
```yaml
107118
# custom_settings.yaml
@@ -115,22 +126,22 @@ synthetic-fragment-cost:
115126
And reference it during plasmid design:
116127
117128
```bash
118-
repp make sequence --in "./2ndVal_mScarlet-I.fa" --addgene --settings "./custom_settings.yaml"
129+
repp make sequence --in "./2ndVal_mScarlet-I.fa" --dbs addgene --settings "./custom_settings.yaml"
119130
```
120131

121132
### Backbones and Enzymes
122133

123-
The plasmid sequence in the input file is designed as a circular plasmid by default. In other words, REPP assumes that the sequence includes an insert sequence as well as a backbone. To use the sequence in the input file as an insert sequence but another fragment as a backbone, use the `--backbone` and `--enzymes` command in combination. This will lookup `--backbone` in the fragment databases and digest it with the enzyme selected through the `--enzymes` flag. The linearized backbone will be concatenated to the insert sequence. For example, to insert a `GFP_CDS` sequence into iGEM's `pSB1A3` backbone after linearizing it with `PstI` and `EcoRI`:
134+
The plasmid sequence in the input file is designed as a circular plasmid by default. In other words, `repp` assumes that the sequence includes an insert sequence as well as a backbone. To use the sequence in the input file as an insert sequence but another fragment as a backbone, use the `--backbone` and `--enzymes` command in combination. This will lookup `--backbone` in the fragment databases and digest it with the enzyme selected through the `--enzymes` flag. The linearized backbone will be concatenated to the insert sequence. For example, to insert a `GFP_CDS` sequence into iGEM's `pSB1A3` backbone after linearizing it with `PstI` and `EcoRI`:
124135

125136
```bash
126-
repp make sequence --in "./GFP_CDS.fa" --addgene --igem --backbone pSB1A3 --enzymes "PstI,EcoRI"
137+
repp make sequence --in "./GFP_CDS.fa" --dbs addgene,igem --backbone pSB1A3 --enzymes "PstI,EcoRI"
127138
```
128139

129140
The largest linearized fragment post-digestion with all enzymes is used as the backbone in the Gibson Assembly.
130141

131142
### Output
132143

133-
`REPP` saves plasmid designs to JSON files at the path specified through the `--out` flag. Below is an abbreviated example of plasmid design output:
144+
`repp` saves plasmid designs to JSON files at the path specified through the `--out` flag. Below is an abbreviated example of plasmid design output:
134145

135146
```json
136147
{

0 commit comments

Comments
 (0)