Lattice-Automation
diff --git a/‎.gitignore
Lines changed: 5 additions & 3 deletions b/‎.gitignore
Lines changed: 5 additions & 3 deletions
diff --git a/‎Dockerfile
Lines changed: 11 additions & 0 deletions b/‎Dockerfile
Lines changed: 11 additions & 0 deletions
diff --git a/‎Makefile
Lines changed: 24 additions & 88 deletions b/‎Makefile
Lines changed: 24 additions & 88 deletions
diff --git a/‎README.md
Lines changed: 56 additions & 45 deletions b/‎README.md
Lines changed: 56 additions & 45 deletions
@@ -24,11 +24,13 @@ dist
 *.gz
 *.zip
 
-# test output files
-test/output
-
 # doc site build output
 docs/_site
 
 # mypy
 .mypy_cache
+
+# go mod vendor
+vendor
+
+.vscode
@@ -0,0 +1,11 @@
+FROM golang:1.18.3
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        ncbi-blast+ \
+        primer3
+
+WORKDIR $HOME/src
+ADD . .
+RUN go install ./cmd/repp
+ENTRYPOINT ["repp"]
@@ -1,97 +1,33 @@
-LOCAL_BIN=/usr/local/bin
-APP=${LOCAL_BIN}/repp
-APP_DATA=$${HOME}/.repp
-SETTINGS=./config/config.yaml
-
-NAME=repp
-VERSION=0.1.0
-
-DIST_WIN_ZIP=${NAME}_windows_${VERSION}.zip
-DIST_SRC=${NAME}_src_${VERSION}
-DIST_SRC_TAR=${DIST_SRC}.tar.gz
-
-PLATFORM:=$(shell uname)
-
-.PHONY: test dist docs
-.DEFAULT_GOAL: build
-
-ifeq ($(PLATFORM),Windows_NT)
-	$(error Windows not supported via make)
-endif
-
-build:
-	go get -d
-	env GOOS=linux go build -o ./bin/linux -v
-	env GOOS=darwin go build -o ./bin/darwin -v
-	env GOOS=windows go build -o ./bin/repp.exe -v
+.PHONY: build
+build: fmt lint
+	go mod tidy
+	go mod vendor
+	go build -o ./bin/repp ./cmd/repp
 
+.PHONY: install
 install:
-	mkdir -p $(APP_DATA)
-
-	cp $(SETTINGS) $(APP_DATA)/config.yaml
-	cp -r ./vendor/primer3_config $(APP_DATA) 
-	cp -r ./assets/addgene/db/** $(APP_DATA) 
-	cp -r ./assets/igem/db/** $(APP_DATA)
-	cp -r ./assets/dnasu/db/** $(APP_DATA)
-	cp ./assets/snapgene/features.tsv $(APP_DATA)
-	cp ./assets/neb/enzymes.tsv $(APP_DATA)
-
-ifeq ($(PLATFORM),Linux)
-	install ./bin/linux $(APP)
-	install -C ./vendor/linux/blastn $(LOCAL_BIN)
-	install -C ./vendor/linux/ntthal $(LOCAL_BIN)
-	install -C ./vendor/linux/primer3_core $(LOCAL_BIN)
-	install -C ./vendor/linux/blastdbcmd $(LOCAL_BIN)
-endif
+	go install ./cmd/repp
 
-ifeq ($(PLATFORM),Darwin)
-	install ./bin/darwin $(APP)
-	install -C ./vendor/darwin/blastn $(LOCAL_BIN)
-	install -C ./vendor/darwin/ntthal $(LOCAL_BIN)
-	install -C ./vendor/darwin/primer3_core $(LOCAL_BIN)
-	install -C ./vendor/darwin/blastdbcmd $(LOCAL_BIN)
-endif
+.PHONY: image
+image:
+	docker build -t jjtimmons/repp:latest .
 
-windows: build
-	cd scripts && makensis windows_installer.nsi
+image/push: image
+	docker push jjtimmons/repp:latest
 
-all: build install
+.PHONY: test
+test:
+	go test -timeout 200s ./internal/...
 
-dbs:
-	cd assets && sh makeblastdbs.sh
-
-uninstall: clean
-	rm $(APP)
-	rm -rf $(APP_DATA)
-
-test: all
-	go test -timeout 200s ./internal/repp
-
-dist-dir:
-	mkdir -p ${DIST_SRC}
-	rsync -r --delete\
-	 --exclude={'.git','dist','test','scripts','bin/repp_install.exe','bin/repp.exe','vendor/windows','assets/addgene/addgene.json','assets/dnasu/DNASU*','assets/igem/xml*','assets/neb/*/'}\
-	 . ${DIST_SRC}
-	tar -czf ${DIST_SRC_TAR} ${DIST_SRC}
-	rm -rf ${DIST_SRC}
-
-dist: windows dist-dir
-	cp ./README.md ./docs/index.md
-
-	zip ${DIST_WIN_ZIP} ./bin/repp_install.exe
-
-	scp ${DIST_SRC_TAR} jjtimmons@frs.sourceforge.net:/home/frs/project/repplasmid/
-	scp ${DIST_WIN_ZIP} jjtimmons@frs.sourceforge.net:/home/frs/project/repplasmid/
+.PHONY: docs
+docs:
+	cd docs && make
 
-	rm ${DIST_SRC_TAR}
-	rm ${DIST_WIN_ZIP}
+docs/serve: docs
+	cd docs && make serve
 
-docs:
-	go run . docs
-	cp README.md ./docs/index.md
-	cd docs && bundle exec just-the-docs rake search:init
-	find ./docs -name *make* -type f -exec sed -i -e 's/\/Users\/josh/~/g' {} \;
-	rm ./docs/*-e
+fmt:
+	gofmt -l ./internal
 
-docs-dev: docs 
-	cd docs && bundle exec jekyll serve
+lint:
+	golangci-lint run
@@ -1,51 +1,76 @@
 <img src="https://user-images.githubusercontent.com/13923102/72353248-b90d7680-36b1-11ea-8714-3249a887b156.png" width="650" margin="0 auto 10px auto" />
 
-`REPP` is a tool for DNA assembly. It takes a target plasmid and finds the least expensive combination of fragments from user and public repositories to create it via Gibson Assembly.
+`repp` is a tool for DNA assembly. It takes a target plasmid and finds the least expensive combination of fragments from user and public repositories to create it via Gibson Assembly.
 
 Biologists profit when they can re-use DNA during plasmid design: it enables cheaper designs and faster builds. But parsing through all re-usable DNA is completely infeasible. For example, there are over 75,000 plasmids in Addgene -- the likelihood of knowing the best combination and ordering of sub-sequences from Addgene for a given plasmid design is low.
 
-`REPP` enables such plasmid design. It turns plasmid specifications into designs using the least expensive design with both existing DNA fragments (PCR) and newly synthesized DNA fragments. Plasmids are specifiable using their target sequence, features, or sub-fragments.
+`repp` does such plasmid design. It turns specifications into assembly plans that use the least expensive combination of existing (PCR) and newly synthesized DNA fragments. Target plasmids are specifiable using their target sequence, features, or sub-fragments.
 
 ## Publication
 
-We published a paper about REPP in PLOS One: [Timmons, J.J. & Densmore D. Repository-based plasmid design. PLOS One.](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0223935) We used it to build thousands of plasmids from iGEM and Addgene and showed that it reduced the cost of plasmid design as compared to synthesis.
+We published a paper about `repp` in PLOS One: [Timmons, J.J. & Densmore D. Repository-based plasmid design. PLOS One.](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0223935) We used it to build thousands of plasmids from iGEM and Addgene and showed that it reduced the cost of plasmid design as compared to synthesis.
+
+## Examples
+
+See [/examples](/examples) to see input/output from `repp`.
+
+<br>
+
+![https://user-images.githubusercontent.com/13923102/72355113-d55ee280-36b4-11ea-8663-f5759cd7597b.png](https://user-images.githubusercontent.com/13923102/72355113-d55ee280-36b4-11ea-8663-f5759cd7597b.png)
+
+## Documentation
+
+See [the docs](https://lattice-automation.github.io/repp/) or `--help` for any command.
 
 ## Installation
 
-Download links are available at SourceForge: [https://sourceforge.net/projects/repplasmid/files/](https://sourceforge.net/projects/repplasmid/files/)
+### From Docker
 
-### MacOS/Linux
+Run `repp` via Docker:
 
-```bash
-wget -O repp_src_0.1.0.tar.gz 'https://sourceforge.net/projects/repplasmid/files/repp_src_0.1.0.tar.gz/download'
-tar xzf repp_src_0.1.0.tar.gz
-cd repp_src_0.1.0
-make install
+```sh
+mkdir -p $HOME/.repp
+alias repp="docker run -i --rm --mount type=bind,src=$HOME/.repp,dst=/root/.repp jjtimmons/repp:latest"
+repp --help
 ```
 
-### Windows
+### From Source
 
-1. Download the most recent `repp_windows.*.zip` from [SourceForge](https://sourceforge.net/projects/repplasmid/files/)
-2. Unzip
-3. Run `repp_install.exe`
+Note: `repp` depends on:
 
-## Documentation
+- [`Go >= 1.18.0`](https://go.dev/doc/install) for compilation
+- [BLAST+](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=Download) for sequence alignment
+- [Primer3](https://github.com/primer3-org/primer3) for hairpin detection and off-target primer-binding detection
 
-See [the docs](https://lattice-automation.github.io/repp/) or use `--help` on any command.
+```sh
+git clone https://github.com/Lattice-Automation/repp.git
+cd repp
+make install
+```
 
-## Examples
+## Sequence Databases
 
-See [/examples](/examples) to see input/output from REPP.
+`repp` uses sequence databases for plasmid assembly. These are added as FASTA files along with the name and cost per plasmid from that source.
 
-<br>
+Some existing FASTA files are maintained in our S3 bucket [`repp`](https://s3.console.aws.amazon.com/s3/buckets/repp?region=us-east-1&tab=objects). Below is a snippet for downloading and installing each via the [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html):
 
-![https://user-images.githubusercontent.com/13923102/72355113-d55ee280-36b4-11ea-8663-f5759cd7597b.png](https://user-images.githubusercontent.com/13923102/72355113-d55ee280-36b4-11ea-8663-f5759cd7597b.png)
+```sh
+for db in igem addgene dnasu; do
+  aws s3 cp "s3://repp/$db.fa.gz" .
+  gzip -d "$db.fa.gz"
+done
+
+# add sequence DBs with the cost of ordering a plasmid from each source
+repp add database --name igem --cost 0.0 < igem.fa
+repp add database --name addgene --cost 65.0 < addgene.fa
+repp add database --name dnasu --cost 55.0 < dnasu.fa
+```
 
 ## Plasmid Design
 
 ### Sequence
 
-To design a plasmid based on its expected sequence save it to a FASTA or Genbank file. For example:
+To design a plasmid based on its target sequence save it to a FASTA file. For example:
 
 ```
 >2ndVal_mScarlet-I
@@ -55,20 +80,20 @@ CAACCTTACCAGAGGGCGCCCCAGCTGGCAATTCCGACGTCTAAGAAACCATTATTATCA...
 Then call `repp make sequence` to design it. The following example uses Addgene and a local BLAST database `parts_library.fa` as fragment sources:
 
 ```bash
-repp make sequence --in "./2ndVal_mScarlet-I.fa" --addgene --dbs "parts_library.fa"
+repp make sequence --in "./2ndVal_mScarlet-I.fa" --dbs "addgene,parts_library.fa"
 ```
 
 ### Features
 
-To design a plasmid based on the features it should contain, specify the features by name. By default, these should refer to features that are in REPP's feature database (`~/.repp/features.tsv`). Features can also refer to fragments, as in the following example where a plasmid is specified by its constituent list of iGEM parts:
+To design a plasmid based on the features it should contain, specify the features by name. By default, these should refer to features that are in `repp`'s feature database (`~/.repp/features.tsv`). Features can also refer to fragments, as in the following example where a plasmid is specified by its constituent list of iGEM parts:
 
 ```bash
-repp make features "BBa_R0062,BBa_B0034,BBa_C0040,BBa_B0010,BBa_B0012" --backbone pSB1C3 --enzymes "EcoRI,PstI" --igem
+repp make features "BBa_R0062,BBa_B0034,BBa_C0040,BBa_B0010,BBa_B0012" --backbone pSB1C3 --enzymes "EcoRI,PstI" --dbs igem
 ```
 
 ### Fragments
 
-To design a plasmid from its constiuent fragments, save them to a multi-FASTA.
+To design a plasmid from its constituent fragments, save them to a multi-FASTA.
 
 ```txt
 >GFP
@@ -83,25 +108,11 @@ And call the file from `repp make fragments`:
 repp make fragments --in "./fragments.fa" --out "plasmid.json"
 ```
 
-### Databases
-
-`REPP` includes three embedded databases from large public repositories: [Addgene](https://www.addgene.org/), [iGEM](http://parts.igem.org/Main_Page), and [DNASU](https://dnasu.org/DNASU/Home.do). Each embedded database and its file path after installation are as follows:
-
-- Addgene, `--addgene`, `~/.repp/addgene`
-- DNASU, `--dnasu`, `~/.repp/dnasu`
-- iGEM, `--igem`, `~/.repp/igem`
-
-Users can also use their or their lab's fragment databases through the `--dbs` as a list of comma-separated fragment [BLAST databases](https://www.ncbi.nlm.nih.gov/books/NBK279688/). An example of a plasmid design using Addgene, DNASU, and multiple user-defined BLAST repositories is below:
-
-```bash
-repp make sequence --in "./2ndVal_mScarlet-I.fa" --addgene --dnasu --dbs "proteins.fa,backbones.fa"
-```
-
 ### Configuration
 
-The default settings file used by `REPP` is in `~/.repp/config.yaml`. The maximum number of fragments in an assembly, the minimum overlap between adjacent fragments, and cost curves for synthesis are all defined there. Editing this file directly will change the default values used during plasmid designs. For more details, see [configuration](https://jjtimmons.github.io/repp/configuration).
+The default settings file used by `repp` is in `~/.repp/config.yaml`. The maximum number of fragments in an assembly, the minimum overlap between adjacent fragments, and cost curves for synthesis are all defined there. Editing this file directly will change the default values used during plasmid designs. For more details, see [configuration](https://lattice-automation.github.io/repp/configuration).
 
-To overwrite some `REPP` settings on a per-design basis, create another YAML file:
+To overwrite some `repp` settings on a per-design basis, create another YAML file:
 
 ```yaml
 # custom_settings.yaml
@@ -115,22 +126,22 @@ synthetic-fragment-cost:
 And reference it during plasmid design:
 
 ```bash
-repp make sequence --in "./2ndVal_mScarlet-I.fa" --addgene --settings "./custom_settings.yaml"
+repp make sequence --in "./2ndVal_mScarlet-I.fa" --dbs addgene --settings "./custom_settings.yaml"
 ```
 
 ### Backbones and Enzymes
 
-The plasmid sequence in the input file is designed as a circular plasmid by default. In other words, REPP assumes that the sequence includes an insert sequence as well as a backbone. To use the sequence in the input file as an insert sequence but another fragment as a backbone, use the `--backbone` and `--enzymes` command in combination. This will lookup `--backbone` in the fragment databases and digest it with the enzyme selected through the `--enzymes` flag. The linearized backbone will be concatenated to the insert sequence. For example, to insert a `GFP_CDS` sequence into iGEM's `pSB1A3` backbone after linearizing it with `PstI` and `EcoRI`:
+The plasmid sequence in the input file is designed as a circular plasmid by default. In other words, `repp` assumes that the sequence includes an insert sequence as well as a backbone. To use the sequence in the input file as an insert sequence but another fragment as a backbone, use the `--backbone` and `--enzymes` command in combination. This will lookup `--backbone` in the fragment databases and digest it with the enzyme selected through the `--enzymes` flag. The linearized backbone will be concatenated to the insert sequence. For example, to insert a `GFP_CDS` sequence into iGEM's `pSB1A3` backbone after linearizing it with `PstI` and `EcoRI`:
 
 ```bash
-repp make sequence --in "./GFP_CDS.fa" --addgene --igem --backbone pSB1A3 --enzymes "PstI,EcoRI"
+repp make sequence --in "./GFP_CDS.fa" --dbs addgene,igem --backbone pSB1A3 --enzymes "PstI,EcoRI"
 ```
 
 The largest linearized fragment post-digestion with all enzymes is used as the backbone in the Gibson Assembly.
 
 ### Output
 
-`REPP` saves plasmid designs to JSON files at the path specified through the `--out` flag. Below is an abbreviated example of plasmid design output:
+`repp` saves plasmid designs to JSON files at the path specified through the `--out` flag. Below is an abbreviated example of plasmid design output:
 
 ```json
 {