|
| 1 | +// Andrew Naplavkov |
| 2 | + |
| 3 | +#ifndef STEP_EDIT_DISTANCE_HPP |
| 4 | +#define STEP_EDIT_DISTANCE_HPP |
| 5 | + |
| 6 | +#include <algorithm> |
| 7 | +#include <optional> |
| 8 | +#include <step/detail/common.hpp> |
| 9 | +#include <step/detail/hirschberg.hpp> |
| 10 | +#include <utility> |
| 11 | + |
| 12 | +namespace step { |
| 13 | +namespace edit_distance { |
| 14 | +namespace detail { |
| 15 | + |
| 16 | +template <typename ForwardIt, typename T, typename OutputIt, typename BinaryOp> |
| 17 | +auto align(ForwardIt first, |
| 18 | + ForwardIt last, |
| 19 | + const T& val, |
| 20 | + OutputIt result, |
| 21 | + BinaryOp op) |
| 22 | +{ |
| 23 | + auto it = std::find(first, last, val); |
| 24 | + if (it == last) |
| 25 | + it = first; |
| 26 | + return std::transform(first, last, result, [&](const auto& item) { |
| 27 | + return op(item, item == *it ? std::optional{val} : std::nullopt); |
| 28 | + }); |
| 29 | +} |
| 30 | + |
| 31 | +struct dynamic_programming { |
| 32 | + /// @see https://en.wikipedia.org/wiki/Wagner–Fischer_algorithm |
| 33 | + template <typename RandomIt1, typename RandomIt2> |
| 34 | + auto make_last_row(RandomIt1 first1, |
| 35 | + RandomIt1 last1, |
| 36 | + RandomIt2 first2, |
| 37 | + RandomIt2 last2) const |
| 38 | + { |
| 39 | + auto size1 = std::distance(first1, last1); |
| 40 | + auto size2 = std::distance(first2, last2); |
| 41 | + ring_table<size_t, 2> tbl(size2 + 1); |
| 42 | + for (size_t l = 0; l <= size1; ++l) |
| 43 | + for (size_t r = 0; r <= size2; ++r) { |
| 44 | + if (l == 0) |
| 45 | + tbl[l][r] = r; |
| 46 | + else if (r == 0) |
| 47 | + tbl[l][r] = l; |
| 48 | + else if (first1[l - 1] == first2[r - 1]) |
| 49 | + tbl[l][r] = tbl[l - 1][r - 1]; |
| 50 | + else |
| 51 | + tbl[l][r] = 1 + std::min({tbl[l][r - 1], // insert |
| 52 | + tbl[l - 1][r], // remove |
| 53 | + tbl[l - 1][r - 1]}); // replace |
| 54 | + } |
| 55 | + return std::move(tbl[size1]); |
| 56 | + } |
| 57 | + |
| 58 | + bool operator()(size_t lhs, size_t rhs) const { return lhs < rhs; } |
| 59 | + |
| 60 | + template <typename RandomIt1, typename RandomIt2, typename OutputIt> |
| 61 | + OutputIt trivial_align(RandomIt1 first1, |
| 62 | + RandomIt1 last1, |
| 63 | + RandomIt2 first2, |
| 64 | + RandomIt2 last2, |
| 65 | + OutputIt result) const |
| 66 | + { |
| 67 | + if (first1 == last1) |
| 68 | + return std::transform(first2, last2, result, [&](const auto& item) { |
| 69 | + return std::make_pair(std::nullopt, item); |
| 70 | + }); |
| 71 | + else if (first2 == last2) |
| 72 | + return std::transform(first1, last1, result, [&](const auto& item) { |
| 73 | + return std::make_pair(item, std::nullopt); |
| 74 | + }); |
| 75 | + else if (std::next(first1) == last1) |
| 76 | + return align(first2, last2, *first1, result, make_reverse_pair{}); |
| 77 | + else // std::next(first2) == last2 |
| 78 | + return align(first1, last1, *first2, result, make_pair{}); |
| 79 | + } |
| 80 | +}; |
| 81 | + |
| 82 | +} // namespace detail |
| 83 | + |
| 84 | +/** |
| 85 | + * Find the optimal sequence alignment between two strings. Optimality is |
| 86 | + * measured with the Levenshtein distance, defined to be the sum of the costs of |
| 87 | + * insertions, replacements, deletions, and null actions needed to change one |
| 88 | + * string into the other. |
| 89 | + * |
| 90 | + * Time complexity: O(N*M), space complexity O(min(N,M)), where: |
| 91 | + * N = std::distance(first1, last1), M = std::distance(first2, last2). |
| 92 | + */ |
| 93 | +template <typename RandomIt1, typename RandomIt2, typename OutputIt> |
| 94 | +OutputIt align(RandomIt1 first1, |
| 95 | + RandomIt1 last1, |
| 96 | + RandomIt2 first2, |
| 97 | + RandomIt2 last2, |
| 98 | + OutputIt result) |
| 99 | +{ |
| 100 | + return hirschberg::align( |
| 101 | + first1, last1, first2, last2, result, detail::dynamic_programming{}); |
| 102 | +} |
| 103 | + |
| 104 | +template <typename RandomRng1, typename RandomRng2, typename OutputIt> |
| 105 | +OutputIt align(const RandomRng1& rng1, const RandomRng2& rng2, OutputIt result) |
| 106 | +{ |
| 107 | + return edit_distance::align(std::begin(rng1), |
| 108 | + std::end(rng1), |
| 109 | + std::begin(rng2), |
| 110 | + std::end(rng2), |
| 111 | + result); |
| 112 | +} |
| 113 | + |
| 114 | +} // namespace edit_distance |
| 115 | +} // namespace step |
| 116 | + |
| 117 | +#endif // STEP_EDIT_DISTANCE_HPP |
0 commit comments