fix detection of suffix/prefix changes for name-changes
This commit is contained in:
@@ -10,6 +10,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@@ -25,17 +26,81 @@ namespace guidance
|
||||
// Name Change Logic
|
||||
// Used both during Extraction as well as during Post-Processing
|
||||
|
||||
inline std::pair<std::string, std::string> getPrefixAndSuffix(const std::string &data)
|
||||
inline std::string longest_common_substring(const std::string &lhs, const std::string &rhs)
|
||||
{
|
||||
const auto suffix_pos = data.find_last_of(' ');
|
||||
if (suffix_pos == std::string::npos)
|
||||
return {};
|
||||
if (lhs.empty() || rhs.empty())
|
||||
return "";
|
||||
|
||||
const auto prefix_pos = data.find_first_of(' ');
|
||||
auto result = std::make_pair(data.substr(0, prefix_pos), data.substr(suffix_pos + 1));
|
||||
boost::to_lower(result.first);
|
||||
boost::to_lower(result.second);
|
||||
return result;
|
||||
// array for dynamic programming
|
||||
std::vector<std::vector<std::uint32_t>> dp(lhs.size(),
|
||||
std::vector<std::uint32_t>(rhs.size(), 0));
|
||||
|
||||
// to remember the best location
|
||||
std::uint32_t best = 0;
|
||||
std::uint32_t best_pos = 0;
|
||||
|
||||
for (std::uint32_t i = 0; i < lhs.size(); ++i)
|
||||
{
|
||||
for (std::uint32_t j = 0; j < rhs.size(); ++j)
|
||||
{
|
||||
if (lhs[i] == rhs[j])
|
||||
{
|
||||
dp[i][j] = (i == 0 || j == 0) ? 1 : (dp[i - 1][j - 1] + 1);
|
||||
if (dp[i][j] > best)
|
||||
{
|
||||
best = dp[i][j];
|
||||
best_pos = i + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// the best position marks the end of the string
|
||||
return lhs.substr(best_pos - best, best);
|
||||
}
|
||||
|
||||
inline auto decompose(const std::string &lhs, const std::string &rhs)
|
||||
{
|
||||
auto const lcs = longest_common_substring(lhs, rhs);
|
||||
|
||||
// trim spaces, transform to lower
|
||||
const auto trim = [](auto str) {
|
||||
boost::to_lower(str);
|
||||
auto front = str.find_first_not_of(" ");
|
||||
|
||||
if (front == std::string::npos)
|
||||
return str;
|
||||
|
||||
auto back = str.find_last_not_of(" ");
|
||||
return str.substr(front, back - front + 1);
|
||||
};
|
||||
|
||||
if (lcs.empty())
|
||||
{
|
||||
std::string empty = "";
|
||||
return std::make_tuple(trim(lhs), trim(rhs), empty, empty);
|
||||
}
|
||||
|
||||
// find the common substring in both
|
||||
auto lhs_pos = lhs.find(lcs);
|
||||
auto rhs_pos = rhs.find(lcs);
|
||||
|
||||
BOOST_ASSERT(lhs_pos + lcs.size() <= lhs.size());
|
||||
BOOST_ASSERT(rhs_pos + lcs.size() <= rhs.size());
|
||||
|
||||
// prefixes
|
||||
std::string lhs_prefix = (lhs_pos > 0) ? lhs.substr(0, lhs_pos) : "";
|
||||
std::string rhs_prefix = (rhs_pos > 0) ? rhs.substr(0, rhs_pos) : "";
|
||||
|
||||
// suffices
|
||||
std::string lhs_suffix = lhs.substr(lhs_pos + lcs.size());
|
||||
std::string rhs_suffix = rhs.substr(rhs_pos + lcs.size());
|
||||
|
||||
lhs_prefix = trim(std::move(lhs_prefix));
|
||||
lhs_suffix = trim(std::move(lhs_suffix));
|
||||
rhs_prefix = trim(std::move(rhs_prefix));
|
||||
rhs_suffix = trim(std::move(rhs_suffix));
|
||||
|
||||
return std::make_tuple(lhs_prefix, lhs_suffix, rhs_prefix, rhs_suffix);
|
||||
}
|
||||
|
||||
// Note: there is an overload without suffix checking below.
|
||||
@@ -64,51 +129,24 @@ inline bool requiresNameAnnounced(const std::string &from_name,
|
||||
const auto name_is_contained =
|
||||
boost::starts_with(from_name, to_name) || boost::starts_with(to_name, from_name);
|
||||
|
||||
const auto checkForPrefixOrSuffixChange = [](
|
||||
const std::string &first, const std::string &second, const SuffixTable &suffix_table) {
|
||||
const auto checkForPrefixOrSuffixChange =
|
||||
[](const std::string &first, const std::string &second, const SuffixTable &suffix_table) {
|
||||
std::string first_prefix, first_suffix, second_prefix, second_suffix;
|
||||
std::tie(first_prefix, first_suffix, second_prefix, second_suffix) =
|
||||
decompose(first, second);
|
||||
|
||||
const auto first_prefix_and_suffixes = getPrefixAndSuffix(first);
|
||||
const auto second_prefix_and_suffixes = getPrefixAndSuffix(second);
|
||||
const auto checkTable = [&](const std::string &str) {
|
||||
// workaround for cucumber tests:
|
||||
if (str.length() == 1 && (first.length() == 2 || second.length() == 2))
|
||||
return false;
|
||||
|
||||
// reverse strings, get suffices and reverse them to get prefixes
|
||||
const auto checkTable = [&](const std::string &str) {
|
||||
return str.empty() || suffix_table.isSuffix(str);
|
||||
return str.empty() || suffix_table.isSuffix(str);
|
||||
};
|
||||
|
||||
return checkTable(first_prefix) && checkTable(first_suffix) &&
|
||||
checkTable(second_prefix) && checkTable(second_suffix);
|
||||
};
|
||||
|
||||
const auto getOffset = [](const std::string &str) -> std::size_t {
|
||||
if (str.empty())
|
||||
return 0;
|
||||
else
|
||||
return str.length() + 1;
|
||||
};
|
||||
|
||||
const bool is_prefix_change = [&]() -> bool {
|
||||
if (!checkTable(first_prefix_and_suffixes.first))
|
||||
return false;
|
||||
if (!checkTable(second_prefix_and_suffixes.first))
|
||||
return false;
|
||||
return !first.compare(getOffset(first_prefix_and_suffixes.first),
|
||||
std::string::npos,
|
||||
second,
|
||||
getOffset(second_prefix_and_suffixes.first),
|
||||
std::string::npos);
|
||||
}();
|
||||
|
||||
const bool is_suffix_change = [&]() -> bool {
|
||||
if (!checkTable(first_prefix_and_suffixes.second))
|
||||
return false;
|
||||
if (!checkTable(second_prefix_and_suffixes.second))
|
||||
return false;
|
||||
return !first.compare(0,
|
||||
first.length() - getOffset(first_prefix_and_suffixes.second),
|
||||
second,
|
||||
0,
|
||||
second.length() - getOffset(second_prefix_and_suffixes.second));
|
||||
}();
|
||||
|
||||
return is_prefix_change || is_suffix_change;
|
||||
};
|
||||
|
||||
const auto is_suffix_change = checkForPrefixOrSuffixChange(from_name, to_name, suffix_table);
|
||||
const auto names_are_equal = from_name == to_name || name_is_contained || is_suffix_change;
|
||||
const auto name_is_removed = !from_name.empty() && to_name.empty();
|
||||
|
||||
Reference in New Issue
Block a user