xaizek / zograscope (License: AGPLv3 only) (since 2018-12-07)
Mainly a syntax-aware diff that also provides a number of additional tools.
<root> / src / ts / TSTransformer.cpp (752427d23ae4d32aca7b115c607a5480aa871700) (6,124B) (mode 100644) [raw]
// Copyright (C) 2021 xaizek <xaizek@posteo.net>
//
// This file is part of zograscope.
//
// zograscope is free software: you can redistribute it and/or modify
// it under the terms of version 3 of the GNU Affero General Public License as
// published by the Free Software Foundation.
//
// zograscope is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with zograscope.  If not, see <http://www.gnu.org/licenses/>.

#include "TSTransformer.hpp"

#include <iostream>
#include <memory>
#include <stdexcept>
#include <string>

#include <boost/utility/string_ref.hpp>
#include "tree_sitter/api.h"

#include "utils/strings.hpp"
#include "TreeBuilder.hpp"
#include "types.hpp"

static bool isSeparator(Type type);

TSTransformer::TSTransformer(const std::string &contents,
                             const TSLanguage &tsLanguage,
                             TreeBuilder &tb,
                           const std::unordered_map<std::string, SType> &stypes,
                             const std::unordered_map<std::string, Type> &types,
                             const std::unordered_set<std::string> &badNodes,
                             int tabWidth,
                             bool debug)
    : contents(contents), tsLanguage(tsLanguage), tb(tb), stypes(stypes),
      types(types), badNodes(badNodes), tabWidth(tabWidth), debug(debug)
{ }

void
TSTransformer::transform()
{
    std::unique_ptr<TSParser , void(*)(TSParser *)> parser(ts_parser_new(),
                                                           &ts_parser_delete);
    ts_parser_set_language(parser.get(), &tsLanguage);

    std::unique_ptr<TSTree, void(*)(TSTree *)> tree(
        ts_parser_parse_string(parser.get(), NULL,
                               contents.c_str(), contents.size()),
        &ts_tree_delete
    );
    if (tree == nullptr) {
        throw std::runtime_error("Failed to build a tree");
    }

    position = 0;
    line = 1;
    col = 1;

    tb.setRoot(visit(ts_tree_root_node(tree.get()), Type::Other));

    if (debug) {
        for (const std::string &type : badSTypes) {
            std::cout << "(TSTransformer) No SType for: " << type << '\n';
        }
        for (const std::string &type : badTypes) {
            std::cout << "(TSTransformer) No Type for: " << type << '\n';
        }
    }
}

PNode *
TSTransformer::visit(const TSNode &node, Type defType)
{
    SType stype = {};
    const char *type = ts_node_type(node);
    auto it = stypes.find(type);
    if (it != stypes.end()) {
        stype = it->second;
    } else if (debug) {
        uint32_t from = ts_node_start_byte(node);
        uint32_t to = ts_node_end_byte(node);
        boost::string_ref val(contents.c_str() + from, to - from);
        badSTypes.insert(type + (": `" + val.to_string() + '`'));
    }

    auto typeIt = types.find(type);
    if (typeIt != types.end()) {
        defType = typeIt->second;
    }

    PNode *pnode = tb.addNode({}, stype);

    uint32_t childCount = ts_node_child_count(node);
    for (uint32_t i = 0; i < childCount; ++i) {
        const TSNode child = ts_node_child(node, i);
        if (ts_node_child_count(child) == 0) {
            SType stype = {};
            auto it = stypes.find(ts_node_type(child));
            if (it != stypes.end()) {
                stype = it->second;
            }

            visitLeaf(stype, pnode, child, defType);
        } else {
            tb.append(pnode, visit(child, defType));
        }
    }

    return pnode;
}

void
TSTransformer::visitLeaf(SType stype,
                         PNode *pnode,
                         const TSNode &leaf,
                         Type defType)
{
    if (badNodes.find(ts_node_type(leaf)) != badNodes.end()) {
        return;
    }

    uint32_t from = ts_node_start_byte(leaf);
    uint32_t to = ts_node_end_byte(leaf);

    boost::string_ref skipped(contents.c_str() + position, from - position);
    updatePosition(skipped, tabWidth, line, col);

    boost::string_ref val(contents.c_str() + from, to - from);
    Type type = determineType(leaf);
    if (type == Type::Other) {
        type = defType;
    }

    if (stype == SType{} && isSeparator(type)) {
        stype = stypes.at("separator");
    }

    const std::uint32_t len = to - from;
    tb.append(pnode, tb.addNode(Text{from, len, 0, 0, static_cast<int>(type)},
                                Location{line, col, 0, 0}, stype));

    updatePosition(val, tabWidth, line, col);
    position = to;
}

Type
TSTransformer::determineType(const TSNode &node)
{
    const char *type = ts_node_type(node);
    auto it = types.find(type);
    if (it != types.cend()) {
        return it->second;
    }

    if (debug) {
        uint32_t from = ts_node_start_byte(node);
        uint32_t to = ts_node_end_byte(node);
        boost::string_ref val(contents.c_str() + from, to - from);
        badTypes.insert(type + (": `" + val.to_string() + '`'));
    }

    return Type::Other;
}

// Determines whether type is a separator.
static bool
isSeparator(Type type)
{
    switch (type) {
        case Type::Jumps:
        case Type::Types:
        case Type::LeftBrackets:
        case Type::RightBrackets:
        case Type::Comparisons:
        case Type::Operators:
        case Type::LogicalOperators:
        case Type::Assignments:
        case Type::Keywords:
        case Type::Other:
            return true;

        case Type::Virtual:
        case Type::Functions:
        case Type::UserTypes:
        case Type::Identifiers:
        case Type::Specifiers:
        case Type::Directives:
        case Type::Comments:
        case Type::StrConstants:
        case Type::IntConstants:
        case Type::FPConstants:
        case Type::CharConstants:
        case Type::NonInterchangeable:
            return false;
    }

    assert(false && "Unhandled enumeration item");
    return false;
}
Hints

Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://code.reversed.top/user/xaizek/zograscope

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@code.reversed.top/user/xaizek/zograscope

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a pull request:
... clone the repository ...
... make some changes and some commits ...
git push origin master