xaizek / zograscope (License: AGPLv3 only) (since 2018-12-07)
Mainly a syntax-aware diff that also provides a number of additional tools.
<root> / src / tooling / Grepper.hpp (3ccfc3c2339fd4203b7cd4a380fae083fd851aaa) (6,327B) (mode 100644) [raw]
// Copyright (C) 2018 xaizek <xaizek@posteo.net>
//
// This file is part of zograscope.
//
// zograscope is free software: you can redistribute it and/or modify
// it under the terms of version 3 of the GNU Affero General Public License as
// published by the Free Software Foundation.
//
// zograscope is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with zograscope.  If not, see <http://www.gnu.org/licenses/>.

#ifndef ZOGRASCOPE_TOOLING_GREPPER_HPP_
#define ZOGRASCOPE_TOOLING_GREPPER_HPP_

#include <boost/algorithm/string/predicate.hpp>
#include <boost/utility/string_ref.hpp>

#include <cassert>

#include <regex>
#include <string>
#include <utility>
#include <vector>

#include "tree.hpp"

// Finds specified pattern of tokens in a tree.
class Grepper
{
    class Expr;

public:
    // Constructs grepper for the specified pattern.
    Grepper(const std::vector<std::string> &pattern = {});

public:
    // Matches leafs of `node` and invokes `handler` on full match.  Returns
    // `true` if at something was matched or pattern is empty, `false`
    // otherwise.  `handler` must be callable as if it has
    // `void handler(std::vector<Node *> nodes)` signature.
    template <typename F>
    bool grep(Node *node, F &&handler);

    // Checks for empty pattern.
    bool empty() const;

    // Retrieves number of nodes of specified type seen.
    int getSeen() const;
    // Retrieves number of nodes of specified type which were matched.
    int getMatched() const;

private:
    // Implementation of `grep` that calls itself recursively after `grep` did
    // necessary preparations.
    template <typename F>
    bool grepImpl(Node *node, F &&handler);

    // Visits single node.  Returns `true` on finding full match, `false`
    // otherwise.
    template <typename F>
    bool handle(Node *node, F &&handler);

private:
    std::vector<Expr> pattern; // Expressions to match against.
    std::vector<Node *> match; // Currently matched nodes.

    int seen;    // Number of nodes of specified type seen.
    int matched; // Number of nodes of specified which were matched.
};

// Single token matching expression.
class Grepper::Expr
{
    // Type of the expression.
    enum class Type
    {
        Exact,     // x or /^x$/ -- matches `x`
        Prefix,    // /^x/       -- matches anything that starts with `x`
        Suffix,    // /x$/       -- matches anything that ends with `x`
        Substring, // /x/        -- matches anything that contains `x`
        Regexp,    // //x/       -- matches `x` regular expression
        Wildcard,  // //         -- matches anything
    };

public:
    // Parses an expression to initialize an instance.
    explicit Expr(std::string expr);

public:
    // Checks whether given string matches the expression.
    bool matches(boost::string_ref str) const;

private:
    std::string text;  // Text to match against (if not a regexp).
    std::regex regexp; // Compiled regexp for regexp type.
    Type type;         // Type of the expression.
};

inline
Grepper::Expr::Expr(std::string expr) : text(std::move(expr))
{
    if (text.length() < 2U || text.front() != '/' || text.back() != '/') {
        type = Type::Exact;
    } else if (text.length() == 2U) {
        type = Type::Wildcard;
    } else {
        text.erase(0, 1);
        text.pop_back();
        if (text.front() == '^' && text.back() == '$') {
            type = Type::Exact;
            text.erase(0, 1);
            text.pop_back();
        } else if (text.front() == '/') {
            type = Type::Regexp;
            text.erase(0, 1);
            regexp.assign(text);
        } else if (text.front() == '^') {
            type = Type::Prefix;
            text.erase(0, 1);
        } else if (text.back() == '$') {
            type = Type::Suffix;
            text.pop_back();
        } else {
            type = Type::Substring;
        }
    }
}

inline bool
Grepper::Expr::matches(boost::string_ref str) const
{
    switch (type) {
        case Type::Exact:     return (str == text);
        case Type::Prefix:    return boost::starts_with(str, text);
        case Type::Suffix:    return boost::ends_with(str, text);
        case Type::Substring: return boost::contains(str, text);
        case Type::Regexp:    return std::regex_match(str.begin(), str.end(),
                                                      regexp);
        case Type::Wildcard:  return true;
    }
    assert(false && "Type has impossible value.");
    return false;
}

inline
Grepper::Grepper(const std::vector<std::string> &pattern)
    : pattern(pattern.cbegin(), pattern.cend()), seen(0), matched(0)
{
}

template <typename F>
inline bool
Grepper::grep(Node *node, F &&handler)
{
    if (empty()) {
        return true;
    }

    match.clear();
    return grepImpl(node, std::forward<F>(handler));
}

template <typename F>
inline bool
Grepper::grepImpl(Node *node, F &&handler)
{
    if (node->next != nullptr) {
        return grepImpl(node->next, std::forward<F>(handler));
    }

    if (node->children.empty() && node->leaf) {
        return handle(node, std::forward<F>(handler));
    }

    bool foundMatch = false;
    for (Node *child : node->children) {
        if (!child->leaf || child->next != nullptr) {
            foundMatch |= grepImpl(child, std::forward<F>(handler));
        } else {
            foundMatch |= handle(child, std::forward<F>(handler));
        }
    }
    return foundMatch;
}

template <typename F>
inline bool
Grepper::handle(Node *node, F &&handler)
{
    ++seen;

    if (!pattern[match.size()].matches(node->spelling)) {
        match.clear();
        return false;
    }

    match.push_back(node);
    if (match.size() == pattern.size()) {
        ++matched;

        handler(match);
        match.clear();

        return true;
    }

    return false;
}

inline bool
Grepper::empty() const
{
    return pattern.empty();
}

inline int
Grepper::getSeen() const
{
    return seen;
}

inline int
Grepper::getMatched() const
{
    return matched;
}

#endif // ZOGRASCOPE_TOOLING_GREPPER_HPP_
Hints

Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://code.reversed.top/user/xaizek/zograscope

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@code.reversed.top/user/xaizek/zograscope

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a pull request:
... clone the repository ...
... make some changes and some commits ...
git push origin master