Skip to content

Commit 734fc4a

Browse files
author
Tyler Burdsall
authored
Merge pull request #9 from iamtheburd/include-boost
v1.3.0 - Add support for larger sets of data with Boost
2 parents 38842d0 + 3bdbd4f commit 734fc4a

File tree

14 files changed

+726
-354
lines changed

14 files changed

+726
-354
lines changed

.gitignore

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
build/*.o
1+
build/*/*.o
2+
*.obj
23
combigen
34
combigen.exe
4-
combigen.obj
5-
*.txt
5+
*.txt

Makefile

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,36 @@
11
CXX = g++
2-
CXXFLAGS = -Wall -O2 -std=c++11
2+
CXXFLAGS = -Wall -O2 -std=c++14
3+
LIBFLAGS =
4+
BOOSTFLAGS = -DUSE_BOOST
35
PREFIX = /usr/local
4-
COMBIGEN_DIR = ./src
6+
COMBIGENDIR = ./src
7+
COMBIGENFILE = combigen.cpp
8+
BUILDDIR = release
59

6-
all: combigen
10+
all: main
711

8-
combigen: combigen.o
9-
$(CXX) $(CXXFLAGS) build/combigen.o -o combigen
12+
main: cli_functions.o combigen.o main.o
13+
$(CXX) $(CXXFLAGS) build/$(BUILDDIR)/main.o build/$(BUILDIR)/combigen.o build/$(BUILDDIR)/cli_functions.o -o combigen $(LIBFLAGS)
1014

11-
combigen.o: $(COMBIGEN_DIR)/combigen.cpp $(COMBIGEN_DIR)/combigen.h
12-
$(CXX) $(CXXFLAGS) $(COMBIGEN_DIR)/combigen.cpp -c -o build/combigen.o
15+
main.o: $(COMBIGENDIR)/main.cpp
16+
$(CXX) $(CXXFLAGS) $(COMBIGENDIR)/main.cpp -c -o build/$(BUILDDIR)/main.o
17+
18+
combigen.o: $(COMBIGENDIR)/combigen.cpp $(COMBIGENDIR)/combigen.h
19+
$(CXX) $(CXXFLAGS) $(COMBIGENDIR)/$(COMBIGENFILE) -c -o build/$(BUILDDIR)/combigen.o
20+
21+
cli_functions.o: $(COMBIGENDIR)/cli_functions.cpp $(COMBIGENDIR)/combigen.h
22+
$(CXX) $(CXXFLAGS) $(COMBIGENDIR)/cli_functions.cpp -c -o build/$(BUILDDIR)/cli_functions.o
23+
24+
.PHONY: perf
25+
perf: CXXFLAGS += $(BOOSTFLAGS)
26+
perf: LIBFLAGS += -lboost_random
27+
perf: COMBIGENFILE = boost_functions.cpp
28+
perf: BUILDDIR = perf
29+
perf: main
1330

1431
.PHONY: clean
1532
clean:
16-
@rm -f build/*.o combigen
33+
@rm -f build/*/*.o combigen
1734

1835

1936
.PHONY: install

README.md

Lines changed: 62 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
An efficient CLI tool to generate possible combinations written in C++
33

44
## Introduction
5-
Combigen aims to assist with data generation and exploration. Given a `.json` input where each key contains an array of string values, combigen can either generate every possible combination or a random subset of the possible combinations. It aims to be memory-efficient while maintaining high-performance. This can be especially useful when large amounts of data are needed for statistical analysis or mock data in an application.
5+
Combigen aims to assist with data generation and exploration. Given a `.json` input where each key contains an array of string values (or simply an array of string arrays), combigen can either generate every possible combination or a random, evenly-distributed subset of the possible combinations. It aims to be memory-efficient while maintaining high-performance. This can be especially useful when large amounts of data are needed for statistical analysis or mock data in an application.
66

77
It supports output as `.csv` and `.json`.
88

@@ -41,8 +41,38 @@ Usage: combigen [options]
4141
-v Display version number
4242
```
4343

44-
## Installation
44+
## Prerequisites
45+
### Linux/UNIX/Cygwin
46+
**Required:**
47+
* make
48+
* g++ (capable of compiling to the C++14 standard or higher)
4549

50+
**Optional:**
51+
* [Boost](https://www.boost.org), in case you are working with large sets of data
52+
53+
If you need to install Boost, I recommend utilizing your distro's package manager:
54+
55+
#### Debian/Ubuntu
56+
`$ sudo apt install libboost-all-dev`
57+
58+
#### Fedora
59+
`$ sudo dnf install boost`
60+
61+
#### Arch/Manjaro/Antergos
62+
`$ sudo pacman -Sy boost`
63+
64+
65+
### Windows
66+
**Required:**
67+
* Visual Studio 2015 or higher
68+
69+
**Optional:**
70+
* [Boost](https://www.boost.org), in case you are working with large sets of data.
71+
I recommend downloaded the precompiled libraries and placing them somewhere easy to remember on your machine.
72+
73+
74+
## Building From Source and Installing
75+
Note: for Windows, if you do not want to/don't have the ability to compile from the source files you can go to the [Release](https://github.com/iamtheburd/combigen/releases) page and directly download the `combigen.exe` binary from there. This also has the added of benefit of being compiled with the Boost libraries already.
4676

4777
### Linux/UNIX
4878

@@ -58,6 +88,12 @@ $ git clone --recurse-submodules -j8 https://github.com/iamtheburd/combigen.git
5888
$ make
5989
```
6090

91+
If you need support for larger sets of data (and have Boost installed), instead build with `make perf`:
92+
93+
```
94+
$ make perf
95+
```
96+
6197
3. Install:
6298

6399
```
@@ -66,10 +102,9 @@ $ sudo make install
66102

67103
### Windows
68104

105+
1. Download Visual Studio 2015+ and install.
69106

70-
1. Download Visual Studio and install first
71-
72-
2. Clone the repository to some directory
107+
2. Clone the repository to some directory using the above command
73108

74109
3. Open up the Developer Command Prompt (can usually be found by searching in the Start menu)
75110

@@ -78,16 +113,21 @@ $ sudo make install
78113
5. Build the file:
79114

80115
```
81-
> cl src\combigen.cpp /EHsc /O2
116+
> cl /EHsc /O2 src\cli_functions.cpp src\combigen.cpp src\main.cpp /Fe".\combigen.exe"
117+
```
118+
119+
Alternatively, if you need support for large rsets of data (and have Boost installed somewhere on your machine), run this command instead. Ensure you fill in the proper path to your Boost directory (this example assumes Boost 1.68.0 installed):
120+
121+
```
122+
> cl /EHsc /DUSE_BOOST /O2 /I C:\path\to\boost_1_68_0 src\cli_functions.cpp src\boost_functions.cpp src\main.cpp /Fe".\combigen.exe" /link /LIBPATH:C:\path\to\boost_1_68_0\lib64-msvc-14.1
82123
```
83124

84125
6. Place the resulting `combigen.exe` wherever you desire
85126

86-
Alternatively, you can also check out the [Releases](https://github.com/iamtheburd/combigen/releases) tab and directly download the `combigen.exe` from there.
87127

88128
## Usage
89129

90-
Using the example `.json` data provided, here are some examples showcasing some features:
130+
Using the example `combinations.json` data provided, here are some examples showcasing some features:
91131

92132
### Input
93133

@@ -116,6 +156,16 @@ $ combigen -i example_data/combinations.json -r 50000 > output.txt # Generate 5
116156
# and store them in output.txt
117157
```
118158

159+
### Large Sets of Data
160+
161+
To demonstrate how `combigen` can even work with large sets of data (when compiled with the Boost library) we can use the example `large_bits.json` file. Unlike the above example data, this file only contains an array of string arrays. In this set of data, the maximum size is equivalent to 3 ^ 256. We can still find the last entry (max size - 1):
162+
163+
```
164+
$ combigen -i example_data/large_bits.json -n 139008452377144732764939786789661303114218850808529137991604824430036072629766435941001769154109609521811665540548899435520
165+
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
166+
$
167+
```
168+
119169

120170
### Types
121171

@@ -140,8 +190,8 @@ $
140190
You can also change the delimiter with the `-d` flag:
141191

142192
```
143-
$ combigen -i example_data/combinations.json -r 3 -k -d "|" # Generate 3 random combinations, display the keys,
144-
# and set the delimiter to ||
193+
$ combigen -i example_data/combinations.json -r 3 -k -d "||" # Generate 3 random combinations, display the keys,
194+
# and set the delimiter to ||
145195
Age||First Name||Last Name||Number of Children||Number of Pets||Primary Desktop OS||Primary Mobile Phone OS||Residence||State/Territory
146196
20||Samantha||Harris||3||4||Windows||Other||RV||GA
147197
25||Matthew||Thomas||2||0||Windows||Other||Town Home||IL
@@ -267,6 +317,8 @@ Combigen uses the following open-source libraries:
267317

268318
* [skandhurkat/Getopt-for-Visual-Studio](https://github.com/skandhurkat/Getopt-for-Visual-Studio) - Port of the MinGW version of `getopt.h` so that the CLI works on Windows
269319

320+
* [Boost](https://www.boost.org) - For operating with incredibly large sets of data that push the limits of an `unsigned long long`.
321+
270322

271323
## Contributing
272324
Pull-requests are always welcome

build/perf/.gitkeep

Whitespace-only changes.

build/release/.gitkeep

Whitespace-only changes.

doc/combigen.1

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
.\" Manpage for combigen
22
.\" Send an email to tylerburdsall@gmail.com for questions or concerns regarding this man page
3-
.TH man 1 "05 Jun 2018" "1.2.2" "combigen man page"
3+
.TH man 1 "28 Nov 2018" "1.3.0" "combigen man page"
44
.SH NAME
55
combigen \- efficiently generate combinations
66
.SH SYNOPSIS
@@ -19,6 +19,7 @@ Usage: combigen [options]
1919
-i <input> Take the given .json file as input. Otherwise, input will come
2020
from stdin.
2121
Example: "{ "foo": [ "a", "b", "c" ], "bar": [ "1", "2" ] }"
22+
Or: "[ ["1", "2"], ["3", "4", "a", "b"] ]"
2223

2324
-t <type> Output type (csv or json). Defaults to csv
2425

example_data/large_bits.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"]]

src/boost_functions.cpp

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
/* boost_functions.cpp
2+
*
3+
* Copyright (C) 2018 Tyler Burdsall
4+
*
5+
* This program is free software: you can redistribute it and/or modify
6+
* it under the terms of the GNU General Public License as published by
7+
* the Free Software Foundation, either version 3 of the License, or
8+
* (at your option) any later version.
9+
*
10+
* This program is distributed in the hope that it will be useful,
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
* GNU General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU General Public License
16+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
*/
18+
19+
#ifndef BOOST_FUNCTIONS
20+
#define BOOST_FUNCTIONS
21+
22+
#include "combigen.h"
23+
24+
// Forward declare functions from cli_functions.h
25+
const void output_result(const vector<string> &result, const generation_args &args, const bool &for_optimization);
26+
const void display_help(void);
27+
const void display_csv_keys(const vector<string> &keys, const string &delim);
28+
29+
const void generate_random_samples_performance_mode(const generation_args &args)
30+
{
31+
const vector<vector<string>> results = lazy_cartesian_product::boost_generate_samples(args.pc.combinations, args.sample_size);
32+
if (!args.display_json)
33+
{
34+
if (args.display_keys)
35+
{
36+
display_csv_keys(args.pc.keys, args.delim);
37+
}
38+
}
39+
else
40+
{
41+
cout << "[\n";
42+
}
43+
for( const vector<string> &row: results)
44+
{
45+
output_result(row, args, true);
46+
if (args.display_json && &row != &results.back())
47+
{
48+
cout << ",";
49+
}
50+
}
51+
if (args.display_json)
52+
{
53+
cout << "]\n";
54+
}
55+
}
56+
57+
const void parse_args(const generation_args &args)
58+
{
59+
const uint1024_t max_size = lazy_cartesian_product::boost_compute_max_size(args.pc.combinations);
60+
if (args.generate_all_combinations)
61+
{
62+
generate_all(max_size, args);
63+
exit(0);
64+
}
65+
else
66+
{
67+
const uint1024_t sample_size(args.sample_size);
68+
if (sample_size == 0 && args.entry_at_provided && !args.generate_all_combinations)
69+
{
70+
const uint1024_t entry_at(args.entry_at);
71+
vector<string> result = lazy_cartesian_product::boost_entry_at(args.pc.combinations, args.entry_at);
72+
output_result(result, args, false);
73+
exit(0);
74+
}
75+
else if (sample_size >= 0)
76+
{
77+
const uint1024_t n(args.sample_size);
78+
if (n > max_size)
79+
{
80+
cerr << "ERROR: Sample size cannot be greater than maximum possible combinations\n";
81+
exit(-1);
82+
}
83+
if (args.perf_mode)
84+
{
85+
generate_random_samples_performance_mode(args);
86+
}
87+
else
88+
{
89+
vector<uint1024_t> range = lazy_cartesian_product::boost_generate_random_indices(args.sample_size, max_size);
90+
generate_random_samples(range, args);
91+
}
92+
exit(0);
93+
}
94+
else
95+
{
96+
display_help();
97+
exit(-1);
98+
}
99+
}
100+
}
101+
102+
const void generate_random_samples(const vector<uint1024_t> &range, const generation_args &args)
103+
{
104+
if (!args.display_json)
105+
{
106+
if (args.display_keys)
107+
{
108+
display_csv_keys(args.pc.keys, args.delim);
109+
}
110+
}
111+
else
112+
{
113+
cout << "[\n";
114+
}
115+
for (const uint1024_t &i: range)
116+
{
117+
vector<string> result = lazy_cartesian_product::boost_entry_at(args.pc.combinations, i.convert_to<string>());
118+
output_result(result, args, true);
119+
if (args.display_json && &i != &range.back())
120+
{
121+
cout << ",";
122+
}
123+
}
124+
if (args.display_json)
125+
{
126+
cout << "]\n";
127+
}
128+
}
129+
130+
const void generate_all(const uint1024_t &max_size, const generation_args &args)
131+
{
132+
if (!args.display_json)
133+
{
134+
if (args.display_keys)
135+
{
136+
display_csv_keys(args.pc.keys, args.delim);
137+
}
138+
}
139+
else
140+
{
141+
cout << "[\n";
142+
}
143+
const uint1024_t last = max_size - 1;
144+
for (uint1024_t i = 0; i < max_size; ++i)
145+
{
146+
vector<string> result = lazy_cartesian_product::boost_entry_at(args.pc.combinations, i.convert_to<string>());
147+
output_result(result, args, true);
148+
if (args.display_json && i != last)
149+
{
150+
cout << ",";
151+
}
152+
}
153+
if (args.display_json)
154+
{
155+
cout << "]\n";
156+
}
157+
}
158+
#endif

0 commit comments

Comments
 (0)