xaizek / zograscope (License: AGPLv3 only) (since 2018-12-07)
Mainly a syntax-aware diff that also provides a number of additional tools.
<root> / src / tooling / Config.cpp (fae056eefedc2f2fa2d1140249db303022e91451) (11KiB) (mode 100644) [raw]
// Copyright (C) 2021 xaizek <xaizek@posteo.net>
//
// This file is part of zograscope.
//
// zograscope is free software: you can redistribute it and/or modify
// it under the terms of version 3 of the GNU Affero General Public License as
// published by the Free Software Foundation.
//
// zograscope is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with zograscope.  If not, see <http://www.gnu.org/licenses/>.

#include "Config.hpp"

#include <boost/algorithm/string/trim.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/utility/string_ref.hpp>

#include <cassert>

#include <algorithm>
#include <fstream>
#include <iterator>
#include <regex>
#include <string>
#include <utility>

#include "utils/strings.hpp"

namespace fs = boost::filesystem;

static bool pathIsInSubtree(const fs::path &root, const fs::path &path);
static fs::path normalizePath(const fs::path &path);
static fs::path makeRelativePath(fs::path base, fs::path path);
static bool isGlob(const std::string &expr);
static std::string globToRegex(boost::string_ref glob);
static Attrs extractAttrs(const std::vector<boost::string_ref> &parts);
static Attrs & operator+=(Attrs &lhs, const Attrs &rhs);

static const char ConfigDirName[] = ".zs";
static const char ExcludeFileName[] = "exclude";
static const char AttributesFileName[] = "attributes";
static const char TabWidthAttr[] = "tab-size";
static const char LangAttr[] = "lang";

// Type of match expression.
enum class MatchType
{
    Exact, // Exact match.
    Glob,  // Glob expression (implemented as a regexp).
};

// Single match expression.
class Config::MatchExpr
{
public:
    // Recognizes expression and prepares for matching against it.
    MatchExpr(std::string expr, bool directoryOnly);

public:
    // Checks for a match.
    bool matches(const std::string &relative,
                 const std::string &filename,
                 bool isDir) const;
    // Checks whether this is an exception from other rules.
    bool isException() const
    { return exception; }

private:
    std::regex regexp;  // Regular-expression match.
    std::string exact;  // String to match against exactly.
    MatchType type;     // Type of the expression.
    bool filenameOnly;  // Matches only against tail entry of a path.
    bool directoryOnly; // Matches only directories.
    bool exception;     // Whether this rule defines exception.
};

Config::MatchExpr::MatchExpr(std::string expr, bool directoryOnly) :
    directoryOnly(directoryOnly)
{
    filenameOnly = (expr.find('/') == std::string::npos);

    exception = (expr.front() == '!');
    if (exception) {
        expr.erase(expr.begin());
    }

    // We don't need a special flag for this syntax, `filenameOnly` being
    // `false` is enough.
    bool rootOnly = (expr.front() == '/');
    if (rootOnly) {
        expr.erase(expr.begin());
    }

    if (isGlob(expr)) {
        regexp = globToRegex(expr);
        type = MatchType::Glob;
    } else {
        exact = std::move(expr);
        type = MatchType::Exact;
    }
}

bool
Config::MatchExpr::matches(const std::string &relative,
                           const std::string &filename,
                           bool isDir) const
{
    if (directoryOnly && !isDir) {
        return false;
    }

    const std::string &str = (filenameOnly ? filename : relative);

    switch (type) {
        case MatchType::Exact:
            return (str == exact);
        case MatchType::Glob:
            return std::regex_match(str.begin(), str.end(), regexp);
    }

    assert(false && "Unhandled type.");
    return false;
}

Config::Config(const boost::filesystem::path &currDir) : currDir(currDir)
{
    fs::path configDir = discoverRoot();
    if (!configDir.empty()) {
        loadConfigDir(configDir);
    }
}

Config::~Config() = default;

fs::path
Config::discoverRoot()
{
    fs::path rootCandidate = currDir;
    while (!rootCandidate.empty()) {
        fs::path configDirCandidate = rootCandidate / ConfigDirName;
        if (fs::is_directory(configDirCandidate)) {
            rootDir = rootCandidate;
            return configDirCandidate;
        }
        rootCandidate = rootCandidate.parent_path();
    }
    return fs::path();
}

void
Config::loadConfigDir(const fs::path &configDir)
{
    fs::path excludeFilePath = configDir / ExcludeFileName;
    fs::path attributesFilePath = configDir / AttributesFileName;

    std::ifstream excludeFile(excludeFilePath.string());
    for (std::string line; std::getline(excludeFile, line); ) {
        if (!line.empty() && line.front() != '#') {
            bool directoryOnly = (line.back() == '/');
            excludeRules.emplace_back(normalizePath(line).string(),
                                      directoryOnly);
        }
    }

    std::ifstream attrsFile(attributesFilePath.string());
    for (std::string line; std::getline(attrsFile, line); ) {
        boost::trim_if(line, boost::is_any_of("\r\n \t"));
        if (!line.empty() && line.front() != '#') {
            std::vector<boost::string_ref> parts = split(line, ' ');

            MatchExpr expr(normalizePath(parts[0].to_string()).string(),
                           /*directoryOnly=*/false);
            if (!expr.isException()) {
                attrRules.emplace_back(std::move(expr), extractAttrs(parts));
            }
        }
    }
}

bool
Config::shouldVisitDirectory(const std::string &path) const
{
    return isAllowed(path, /*isDir=*/true);
}

bool
Config::shouldProcessFile(const std::string &path) const
{
    return isAllowed(path, /*isDir=*/false);
}

bool
Config::isAllowed(const std::string &path, bool isDir) const
{
    fs::path canonicPath = normalizePath(fs::absolute(path, currDir));

    if (!pathIsInSubtree(rootDir, canonicPath)) {
        return true;
    }

    fs::path relPath = makeRelativePath(rootDir, canonicPath);
    std::string relative = relPath.string();
    std::string filename = relPath.filename().string();

    auto it = std::find_if(excludeRules.cbegin(), excludeRules.cend(),
                           [&] (const MatchExpr &expr) {
        return !expr.isException() && expr.matches(relative, filename, isDir);
    });
    if (it == excludeRules.cend()) {
        return true;
    }

    it = std::find_if(it, excludeRules.cend(), [&] (const MatchExpr &expr) {
        return expr.isException() && expr.matches(relative, filename, isDir);
    });
    return (it != excludeRules.cend());
}

Attrs
Config::lookupAttrs(const std::string &path) const
{
    Attrs result;
    result.tabWidth = 4;

    fs::path canonicPath = normalizePath(fs::absolute(path, currDir));

    if (!pathIsInSubtree(rootDir, canonicPath)) {
        return result;
    }

    fs::path relPath = makeRelativePath(rootDir, canonicPath);
    std::string relative = relPath.string();
    std::string filename = relPath.filename().string();

    for (const auto &entry : attrRules) {
        if (entry.first.matches(relative, filename, /*isDir=*/false)) {
            result += entry.second;
        }
    }

    return result;
}

// Checks that a path is somewhere under specified root (root is considered to
// include itself).
static bool
pathIsInSubtree(const fs::path &root, const fs::path &path)
{
    auto rootLen = std::distance(root.begin(), root.end());
    auto pathLen = std::distance(path.begin(), path.end());
    if (pathLen < rootLen) {
        return false;
    }

    return std::equal(root.begin(), root.end(), path.begin());
}

// Excludes `..` and `.` entries from a path.
static fs::path
normalizePath(const fs::path &path)
{
    fs::path result;
    for (fs::path::iterator it = path.begin(); it != path.end(); ++it) {
        if (*it == "..") {
            if(result.filename() == "..") {
                result /= *it;
            } else {
                result = result.parent_path();
            }
        } else if (*it != ".") {
            result /= *it;
        }
    }
    return result;
}

// Makes path relative to specified base directory.
static fs::path
makeRelativePath(fs::path base, fs::path path)
{
    auto baseIt = base.begin();
    auto pathIt = path.begin();

    while (baseIt != base.end() && pathIt != path.end() && *pathIt == *baseIt) {
        ++pathIt;
        ++baseIt;
    }

    fs::path finalPath;
    while (baseIt != base.end()) {
        finalPath /= "..";
        ++baseIt;
    }

    while (pathIt != path.end()) {
        finalPath /= *pathIt;
        ++pathIt;
    }

    return finalPath;
}

// Checks whether expression is a glob.
static bool
isGlob(const std::string &expr)
{
    return (expr.find_first_of("[?*") != std::string::npos);
}

// Converts the glob into equivalent regular expression.
static std::string
globToRegex(boost::string_ref glob)
{
    boost::string_ref charsToEscape = "^.$()|+{";

    std::string regexp;

    auto toChar = [&regexp, &glob](char ch) {
        regexp += ch;
        glob.remove_prefix(1U);
    };
    auto toStr = [&regexp, &glob](const char str[]) {
        regexp += str;
        glob.remove_prefix(1U);
    };

    bool startedCharClass = false;

    while (!glob.empty()) {
        bool newCharClass = startedCharClass;
        startedCharClass = false;

        char ch = glob.front();
        if (newCharClass && (ch == '!' || ch == '^')) {
            toChar('^');
        } else if (ch == '\\') {
            toChar('\\');
            if (!glob.empty()) {
                toChar(glob.front());
            }
        } else if (ch == '?') {
            toStr("[^/]");
        } else if (ch == '*') {
            toStr("[^/]*");
        } else {
            startedCharClass = (ch == '[');
            if (charsToEscape.find(ch) != boost::string_ref::npos) {
                regexp += '\\';
            }
            toChar(ch);
        }
    }

    return regexp;
}

// Extracts attributes from a line from an attribute file.
static Attrs
extractAttrs(const std::vector<boost::string_ref> &parts)
{
    Attrs attrRules;

    for (auto it = ++parts.begin(); it < parts.end(); ++it) {
        std::string name, value;
        std::tie(name, value) = splitAt(it->to_string(), '=');

        if (name == TabWidthAttr) {
            std::size_t pos;
            int width = std::stoi(value, &pos);
            if (pos == value.length() && width > 0) {
                attrRules.tabWidth = width;
            }
        } else if (name == LangAttr) {
            attrRules.lang = value;
        }
    }

    return attrRules;
}

// Merges two sets of attributes together.
static Attrs &
operator+=(Attrs &lhs, const Attrs &rhs)
{
    if (!rhs.lang.empty()) {
        lhs.lang = rhs.lang;
    }
    if (rhs.tabWidth > 0) {
        lhs.tabWidth = rhs.tabWidth;
    }
    return lhs;
}
Hints

Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://code.reversed.top/user/xaizek/zograscope

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@code.reversed.top/user/xaizek/zograscope

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a pull request:
... clone the repository ...
... make some changes and some commits ...
git push origin master