diff --git a/data_python.csv.txt b/data_python.csv.txt new file mode 100644 index 0000000..02ff1f1 --- /dev/null +++ b/data_python.csv.txt @@ -0,0 +1,23 @@ +"0", "1", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "1", "2", "2", "2", "2", "1", + "1", "1", "0", "2", "1", "2", "0", "2", "0", "1", "2", "1", "2", "0", "2", "0", "2", "2", "0", "2", "2", "2", + "0", "0", "1", "0", "1", "2", "2", "2", "2", "0", "2", "2", "2", "1", "2", "0", "1", "0", "0", "2", "1", "2", + "1", "1", "1", "0", "2", "0", "0", "1", "1", "2", "0", "1", "2", "0", "1", "0", "2", "1", "0", "0", "2", "0", + "1", "0", "2", "1", "2", "1", "1", "2", "1", "2", "2", "2", "1", "2", "1", "2", "1", "1", "1", "2", "2", "1", + "2", "2", "1", "2", "2", "2", "2", "2", "2", "2", "0", "0", "0", "1", "1", "1", "2", "1", "0", "1", "0", "1", + "0", "1", "2", "0", "2", "2", "1", "0", "0", "1", "1", "2", "2", "0", "2", "0", "0", "0", "2", "2", "2", "2", + "2", "1", "2", "2", "2", "2", "2", "1", "2", "1", "2", "1", "2", "0", "2", "2", "2", "2", "2", "2", "2", "2", + "0", "2", "1", "2", "1", "1", "1", "2", "2", "2", "2", "2", "2", "2", "0", "2", "2", "2", "2", "2", "1", "2", + "1", "2", "1", "2", "0", "2", "0", "1", "2", "0", "1", "0", "1", "1", "2", "2", "2", "2", "2", "2", "2", "2", + "2", "1", "0", "0", "1", "2", "1", "0", "2", "2", "1", "2", "2", "2", "1", "0", "1", "2", "2", "2", "1", "0", + "1", "0", "2", "2", "1", "2", "2", "2", "1", "2", "2", "2", "2", "0", "2", "0", "1", "1", "2", "0", "0", "2", + "2", "2", "1", "1", "0", "0", "1", "2", "1", "2", "1", "0", "2", "0", "2", "2", "0", "0", "0", "1", "0", "1", + "1", "1", "2", "2", "0", "1", "2", "2", "2", "0", "1", "1", "2", "2", "0", "1", "2", "2", "2", "2", "2", "2", + "0", "1", "2", "2", "0", "2", "0", "2", "2", "2", "1", "2", "2", "2", "1", "1", "1", "1", "2", "0", "0", "0", + "2", "2", "1", "1", "2", "1", "0", "2", "1", "1", "1", "0", "1", "2", "1", "2", "1", "2", "2", "2", "0", "2", + "0", "0", "2", "2", "2", "2", "2", "2", "1", "0", "1", "1", "1", "2", "1", "2", "2", "2", "2", "2", "1", "1", + "2", "2", "2", "2", "2", "2", "0", "1", "2", "0", "1", "2", "1", "2", "0", "2", "1", "0", "2", "2", "0", "2", + "2", "0", "2", "2", "2", "2", "0", "2", "2", "2", "1", "2", "0", "2", "1", "2", "2", "2", "1", "2", "2", "2", + "0", "0", "2", "1", "2", "2", "2", "2", "2", "2", "2", "1", "2", "2", "2", "0", "2", "2", "1", "2", "2", "2", + "2", "1", "2", "0", "2", "1", "2", "2", "0", "1", "0", "1", "2", "1", "0", "2", "2", "2", "1", "0", "1", "0", + "2", "1", "2", "2", "2", "0", "2", "1", "2", "2", "0", "1", "2", "0", "0", "1", "0", "1", "1", "1", "2", "1", + "0", "1", "2", "1", "2", "2", "0", "0", "0", "2", "1", "1", "2", "2", "1", "2" \ No newline at end of file diff --git a/main.cpp b/main.cpp index f302b03..f524f7c 100644 --- a/main.cpp +++ b/main.cpp @@ -112,19 +112,55 @@ void experimental_testing() { // } } +void rristro_results_variations() { + cupaal::MarkovModel_Matrix model; + model.states = {0, 1}; + model.labels = {"0", "1", "2"}; + model.observations = {{"0", "1", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "1", "2", "2", "2", "2", "1", + "1", "1", "0", "2", "1", "2", "0", "2", "0", "1", "2", "1", "2", "0", "2", "0", "2", "2", "0", "2", "2", "2", + "0", "0", "1", "0", "1", "2", "2", "2", "2", "0", "2", "2", "2", "1", "2", "0", "1", "0", "0", "2", "1", "2", + "1", "1", "1", "0", "2", "0", "0", "1", "1", "2", "0", "1", "2", "0", "1", "0", "2", "1", "0", "0", "2", "0", + "1", "0", "2", "1", "2", "1", "1", "2", "1", "2", "2", "2", "1", "2", "1", "2", "1", "1", "1", "2", "2", "1", + "2", "2", "1", "2", "2", "2", "2", "2", "2", "2", "0", "0", "0", "1", "1", "1", "2", "1", "0", "1", "0", "1", + "0", "1", "2", "0", "2", "2", "1", "0", "0", "1", "1", "2", "2", "0", "2", "0", "0", "0", "2", "2", "2", "2", + "2", "1", "2", "2", "2", "2", "2", "1", "2", "1", "2", "1", "2", "0", "2", "2", "2", "2", "2", "2", "2", "2", + "0", "2", "1", "2", "1", "1", "1", "2", "2", "2", "2", "2", "2", "2", "0", "2", "2", "2", "2", "2", "1", "2", + "1", "2", "1", "2", "0", "2", "0", "1", "2", "0", "1", "0", "1", "1", "2", "2", "2", "2", "2", "2", "2", "2", + "2", "1", "0", "0", "1", "2", "1", "0", "2", "2", "1", "2", "2", "2", "1", "0", "1", "2", "2", "2", "1", "0", + "1", "0", "2", "2", "1", "2", "2", "2", "1", "2", "2", "2", "2", "0", "2", "0", "1", "1", "2", "0", "0", "2", + "2", "2", "1", "1", "0", "0", "1", "2", "1", "2", "1", "0", "2", "0", "2", "2", "0", "0", "0", "1", "0", "1", + "1", "1", "2", "2", "0", "1", "2", "2", "2", "0", "1", "1", "2", "2", "0", "1", "2", "2", "2", "2", "2", "2", + "0", "1", "2", "2", "0", "2", "0", "2", "2", "2", "1", "2", "2", "2", "1", "1", "1", "1", "2", "0", "0", "0", + "2", "2", "1", "1", "2", "1", "0", "2", "1", "1", "1", "0", "1", "2", "1", "2", "1", "2", "2", "2", "0", "2", + "0", "0", "2", "2", "2", "2", "2", "2", "1", "0", "1", "1", "1", "2", "1", "2", "2", "2", "2", "2", "1", "1", + "2", "2", "2", "2", "2", "2", "0", "1", "2", "0", "1", "2", "1", "2", "0", "2", "1", "0", "2", "2", "0", "2", + "2", "0", "2", "2", "2", "2", "0", "2", "2", "2", "1", "2", "0", "2", "1", "2", "2", "2", "1", "2", "2", "2", + "0", "0", "2", "1", "2", "2", "2", "2", "2", "2", "2", "1", "2", "2", "2", "0", "2", "2", "1", "2", "2", "2", + "2", "1", "2", "0", "2", "1", "2", "2", "0", "1", "0", "1", "2", "1", "0", "2", "2", "2", "1", "0", "1", "0", + "2", "1", "2", "2", "2", "0", "2", "1", "2", "2", "0", "1", "2", "0", "0", "1", "0", "1", "1", "1", "2", "1", + "0", "1", "2", "1", "2", "2", "0", "0", "0", "2", "1", "1", "2", "2", "1", "2"}}; + + double labeling[] = {1.0/9, 3.0/9, 5.0/9, 2.0/12, 4.0/12, 6.0/12}; + double mat[] = {0.5, 0.5, 0.5, 0.5}; + double init[] = {0.5, 0.5}; + model.labelling_matrix = labeling; + model.transition_matrix = mat; + model.initial_distribution_vector = init; + model.print_calculations = false; + const std::map index_map = {{"0", 0}, {"1", 1}, {"2", 2}}; + model.label_index_map = index_map; + + for (int i; i <= 100; i++) { + model = baum_welch_matrix(model); + } +} + int main(int argc, char *argv[]) { // storm::utility::setUp(); // storm::settings::initializeAll("CuPAAL", "CuPAAL"); DdManager *gbm = Cudd_Init(0, 0,CUDD_UNIQUE_SLOTS,CUDD_CACHE_SLOTS, 0); - cupaal::MarkovModel_Matrix model; - model.states = {0, 1, 2, 3, 4}; - model.labels = {"a", "b"}; - model.observations.push_back({"a", "b", "b", "a", "a", "b", "a"}); - model.initialize_model_parameters_randomly(); - model.print_model_parameters(); - model.print_calculations = true; - const auto result = baum_welch_matrix(model); + rristro_results_variations(); Cudd_Quit(gbm); exit(EXIT_SUCCESS); diff --git a/src/cupaal/baum_welch.cpp b/src/cupaal/baum_welch.cpp index 1a289dc..44bc1d1 100644 --- a/src/cupaal/baum_welch.cpp +++ b/src/cupaal/baum_welch.cpp @@ -40,9 +40,6 @@ void cupaal::MarkovModel_Matrix::initialize_model_parameters_randomly(const int labelling_matrix[s * labels.size() + l] = labelling_probabilities[l]; } } - if (!observations.empty() && labelling_matrix != nullptr) { - calculate_omega(); - } } void cupaal::MarkovModel_Matrix::print_model_parameters() const { @@ -73,81 +70,63 @@ void cupaal::MarkovModel_Matrix::print_model_parameters() const { } std::cout << std::endl; } - - std::cout << "Omega Matrix:" << std::endl; - for (int s = 0; s < states.size(); s++) { - for (int l = 0; l < observations[0].size(); l++) { - std::cout << omega_matrix[s * observations[0].size() + l] << " "; - } - std::cout << std::endl; - } -} - -void cupaal::MarkovModel_Matrix::calculate_omega() { - if (observations.empty()) { - std::cout << "You should supply some observations before trying to calculate omega." << std::endl; - return; - } - - omega_matrix = static_cast(safe_malloc( - sizeof(probability), states.size() * observations[0].size())); - - for (const auto &trace: observations) { - for (int s = 0; s < states.size(); s++) { - for (int l = 0; l < observations[0].size(); l++) { - const auto label = trace[l]; - const auto label_index = label_index_map[label]; - omega_matrix[s * observations[0].size() + l] = labelling_matrix[label_index + s * labels.size()]; - } - } - } } std::vector cupaal::forward_matrix(const MarkovModel_Matrix &model) { const unsigned long number_of_states = model.states.size(); // Allocate alpha - std::vector alpha((number_of_states + 1) * model.observations[0].size(), 0.0); + std::vector alpha(number_of_states * model.observations[0].size(), 0.0); // Base case: t = 0 - for (int t = 0; t < number_of_states; t++) { - alpha[t] = model.initial_distribution_vector[t]; + for (int s = 0; s < number_of_states; s++) { + alpha[s] = model.initial_distribution_vector[s] * + model.labelling_matrix[s * model.labels.size() + model.label_index_map.at(model.observations[0][0])]; } // Case: 0 < t <= n_obs - for (int t = 0; t < model.observations[0].size(); t++) { + for (int t = 1; t < model.observations[0].size(); t++) { for (int s = 0; s < number_of_states; s++) { - double temp = 0.0; - for (int ss = 0; ss < number_of_states; ss++) { - temp += model.transition_matrix[ss * number_of_states + s] * model.omega_matrix[ - t * number_of_states + ss] * alpha[t * number_of_states + ss]; + double temporary_sum = 0; + for (int s_prime = 0; s_prime < number_of_states; s_prime++) { + temporary_sum += alpha[(t - 1) * number_of_states + s_prime] * model.transition_matrix[ + s_prime * number_of_states + s]; } - alpha[(t + 1) * number_of_states + s] = temp; + alpha[t * number_of_states + s] = model.labelling_matrix[ + s * model.labels.size() + model.label_index_map.at( + model.observations[0][t])] * temporary_sum; } } return alpha; } + std::vector cupaal::backward_matrix(const MarkovModel_Matrix &model) { const unsigned long number_of_states = model.states.size(); // Allocate beta + Base case: t = n_obs - std::vector beta((number_of_states + 1) * model.observations[0].size(), 1.0); + std::vector beta(number_of_states * model.observations[0].size(), 1.0); // Case: 0 <= t < n_obs - for (unsigned long t = model.observations[0].size(); t > 0; t--) { + for (int t = model.observations[0].size() - 2; t >= 0; t--) { for (int s = 0; s < number_of_states; s++) { - double temp = 0.0; - for (int ss = 0; ss < number_of_states; ss++) { - temp += beta[t * number_of_states + ss] * model.transition_matrix[s * number_of_states + ss]; + double temporary_sum = 0.0; + for (int s_prime = 0; s_prime < number_of_states; s_prime++) { + // auto label_result = model.labelling_matrix[s_prime * model.labels.size() + model.label_index_map.at(model.observations[0][t])]; + temporary_sum += beta[(t + 1) * number_of_states + s_prime] * + model.transition_matrix[s * number_of_states + s_prime] * + model.labelling_matrix[s_prime * model.labels.size() + model.label_index_map.at( + model.observations[0][t + 1])]; } - beta[(t - 1) * number_of_states + s] = model.omega_matrix[(t - 1) * number_of_states + s] * temp; + // beta[(t - 1) * number_of_states + s] = model.omega_matrix[(t - 1) * number_of_states + s] * temp; + beta[t * number_of_states + s] = temporary_sum; } } return beta; } -std::vector cupaal::gamma_matrix(const MarkovModel_Matrix &model, const std::vector &alpha, const std::vector &beta) { +std::vector cupaal::gamma_matrix(const MarkovModel_Matrix &model, const std::vector &alpha, + const std::vector &beta) { const unsigned long number_of_states = model.states.size(); const unsigned long number_of_obs = model.observations[0].size(); @@ -165,38 +144,39 @@ std::vector cupaal::gamma_matrix(const MarkovModel_Matrix &model, c // Compute gamma values for time t for (unsigned long s = 0; s < number_of_states; s++) { gamma[t * number_of_states + s] = - (alpha[t * number_of_states + s] * beta[t * number_of_states + s]) / normalization; + (alpha[t * number_of_states + s] * beta[t * number_of_states + s]) / normalization; } } return gamma; } -std::vector cupaal::xi_matrix(const MarkovModel_Matrix &model, const std::vector &alpha, const std::vector &beta) { +std::vector cupaal::xi_matrix(const MarkovModel_Matrix &model, const std::vector &alpha, + const std::vector &beta) { const unsigned long number_of_states = model.states.size(); const unsigned long number_of_obs = model.observations[0].size(); - std::vector xi(number_of_states * number_of_states * (number_of_obs - 1), 0.0); - for (unsigned long t = 0; t < number_of_obs - 1; t++) { + + for (unsigned long t = 0; t < (number_of_obs - 1); t++) { double normalization = 0.0; // Compute normalization factor for time t for (unsigned long i = 0; i < number_of_states; i++) { for (unsigned long j = 0; j < number_of_states; j++) { - normalization += alpha[t * number_of_states + i] * - model.transition_matrix[i * number_of_states + j] * - model.omega_matrix[(t + 1) * number_of_states + j] * - beta[(t + 1) * number_of_states + j]; + xi[t * model.states.size() * model.states.size() + i * model.states.size() + j] = + alpha[t * number_of_states + i] * + model.transition_matrix[i * number_of_states + j] * + model.labelling_matrix[j * model.labels.size() + model.label_index_map.at( + model.observations[0][t + 1])] * + beta[(t + 1) * number_of_states + j]; + + normalization += xi[t * model.states.size() * model.states.size() + i * model.states.size() + j]; } } // Compute xi values for time t for (unsigned long i = 0; i < number_of_states; i++) { for (unsigned long j = 0; j < number_of_states; j++) { - xi[t * number_of_states * number_of_states + i * number_of_states + j] = - (alpha[t * number_of_states + i] * - model.transition_matrix[i * number_of_states + j] * - model.omega_matrix[(t + 1) * number_of_states + j] * - beta[(t + 1) * number_of_states + j]) / normalization; + xi[t * model.states.size() * model.states.size() + i * model.states.size() + j] /= normalization; } } } @@ -204,6 +184,54 @@ std::vector cupaal::xi_matrix(const MarkovModel_Matrix &model, cons return xi; } +void cupaal::update_matrix(const MarkovModel_Matrix &model, const std::vector &alpha, const std::vector &beta) { + const unsigned long number_of_states = model.states.size(); + const unsigned long number_of_obs = model.observations[0].size(); + const auto gamma = gamma_matrix(model, alpha, beta); + const auto xi = xi_matrix(model, alpha, beta); + + // Update initial distribution + for (unsigned long s = 0; s < number_of_states; s++) { + model.initial_distribution_vector[s] = gamma[s]; //maybe add observation size + } + + // Update trasition matrix + for (unsigned long i = 0; i < number_of_states; i++) { + for (unsigned long j = 0; j < number_of_states; j++) { + double numerator = 0.0; + double denominator = 0.0; + + // Compute the numerator and denominator for transition probabilities + for (unsigned long t = 0; t < number_of_obs - 1; t++) { + numerator += xi[t * number_of_states * number_of_states + i * number_of_states + j]; + denominator += gamma[t * number_of_states + i]; + } + + model.transition_matrix[i * number_of_states + j] = numerator / denominator; + } + } + + // Update emission matrix + for (unsigned long s = 0; s < number_of_states; s++) { + for (unsigned long l = 0; l < model.labels.size(); l++) { + double numerator = 0.0; + double denominator = 0.0; + + // Compute the numerator and denominator for emission probabilities + for (unsigned long t = 0; t < number_of_obs; t++) { + // in case of multiple sequences of observations, change model.observations + if (model.observations[0][t] == model.labels[l]) { // Match observation + numerator += gamma[t * number_of_states + s]; + } + denominator += gamma[t * number_of_states + s]; + } + + model.labelling_matrix[s * model.labels.size() + l] = numerator / denominator; + + } + } +} + cupaal::MarkovModel_Matrix cupaal::baum_welch_matrix(const MarkovModel_Matrix &model) { const auto alpha = forward_matrix(model); const auto beta = backward_matrix(model); @@ -255,7 +283,7 @@ cupaal::MarkovModel_Matrix cupaal::baum_welch_matrix(const MarkovModel_Matrix &m std::cout << std::endl; } } - + update_matrix(model, alpha, beta); return model; } diff --git a/src/cupaal/baum_welch.h b/src/cupaal/baum_welch.h index 65177cb..e6fb68f 100644 --- a/src/cupaal/baum_welch.h +++ b/src/cupaal/baum_welch.h @@ -22,7 +22,6 @@ namespace cupaal { probability *labelling_matrix; // labelling matrix probability *transition_matrix; // P probability *initial_distribution_vector; // pi - probability *omega_matrix; // omega void initialize_model_parameters_randomly(int seed = 0); @@ -38,6 +37,7 @@ namespace cupaal { extern std::vector gamma_matrix(const MarkovModel_Matrix &model, const std::vector &alpha, const std::vector &beta); extern std::vector xi_matrix(const MarkovModel_Matrix &model, const std::vector &alpha, const std::vector &beta); + extern void update_matrix(const MarkovModel_Matrix &model, const std::vector &alpha, const std::vector &beta); extern MarkovModel_Matrix baum_welch_matrix(const MarkovModel_Matrix &model);