235 lines
6.8 KiB
C++
235 lines
6.8 KiB
C++
// Read a tabular cross-reference file generated by ctags, then read a list of
|
|
// html files generated by Vim's TOhtml command on C++ code. Link words
|
|
// in the html files to cross-references from ctags.
|
|
|
|
// Usage:
|
|
// linkify [tags file] [html files]...
|
|
|
|
// Still plenty of holes:
|
|
// - unnecessarily linking definition location to itself
|
|
// - can't detect strings in spite of attempt to support them below, because
|
|
// Vim's generated html turns quotes into html entities
|
|
// - distinguishing function and variable names
|
|
// - distinguishing Mu code in C++ files
|
|
// - distinguishing between function overloads
|
|
// - if there's duplicate tags we aren't smart enough to distinguish between
|
|
// them yet, so we simply don't add any link at all
|
|
// - but even that's not perfect, because sometimes the tags file has a
|
|
// single definition but there's still multiple overloads (say I defined
|
|
// 'clear()' on some type, and it's already defined on STL classes)
|
|
// - ctags misses some symbols in layered code
|
|
|
|
#include<assert.h>
|
|
|
|
#include<map>
|
|
using std::map;
|
|
|
|
#include<string>
|
|
using std::string;
|
|
|
|
#include<iostream>
|
|
using std::istream;
|
|
using std::cout;
|
|
using std::cerr;
|
|
|
|
#include<sstream>
|
|
using std::istringstream;
|
|
using std::ostringstream;
|
|
|
|
#include<fstream>
|
|
using std::ifstream;
|
|
using std::ofstream;
|
|
|
|
#include <locale>
|
|
using std::isspace; // unicode-aware
|
|
|
|
struct syminfo {
|
|
string filename;
|
|
int line_num;
|
|
syminfo() :line_num(0) {}
|
|
};
|
|
|
|
bool has_data(istream& in) {
|
|
in.peek();
|
|
if (in.eof()) return false;
|
|
assert(in);
|
|
return true;
|
|
}
|
|
|
|
bool starts_with(const string& s, const string& pat) {
|
|
string::const_iterator a=s.begin(), b=pat.begin();
|
|
for (/*nada*/; a!=s.end() && b!=pat.end(); ++a, ++b)
|
|
if (*a != *b) return false;
|
|
return b == pat.end();
|
|
}
|
|
|
|
void encode_some_html_entities(string& s) {
|
|
std::string::size_type pos = 0;
|
|
while (true) {
|
|
pos = s.find_first_of("<>", pos);
|
|
if (pos == std::string::npos) break;
|
|
std::string replacement;
|
|
switch (s.at(pos)) {
|
|
case '<': replacement = "<"; break;
|
|
case '>': replacement = ">"; break;
|
|
}
|
|
s.replace(pos, 1, replacement);
|
|
pos += replacement.size();
|
|
};
|
|
}
|
|
|
|
void read_tags(const string& filename, map<string, syminfo>& info) {
|
|
ifstream in(filename.c_str());
|
|
//? cerr << "reading " << filename << '\n';
|
|
string dummy;
|
|
while (has_data(in)) {
|
|
string symbol; in >> symbol;
|
|
if (symbol == "operator") {
|
|
// unsupported
|
|
getline(in, dummy); // skip
|
|
continue;
|
|
}
|
|
encode_some_html_entities(symbol);
|
|
//? cerr << symbol << '\n';
|
|
if (info.find(symbol) != info.end()) {
|
|
info[symbol].line_num = -1;
|
|
info[symbol].filename.clear();
|
|
}
|
|
else {
|
|
in >> dummy;
|
|
in >> info[symbol].line_num;
|
|
in >> info[symbol].filename;
|
|
}
|
|
getline(in, dummy); // skip rest of line
|
|
//? cerr << symbol << ": " << info[symbol].filename << ':' << info[symbol].line_num << '\n';
|
|
}
|
|
in.close();
|
|
}
|
|
|
|
void replace_tags_in_file(const string& filename, const map<string, syminfo>& info) {
|
|
//? cerr << info.size() << " symbols\n";
|
|
ifstream in(filename.c_str());
|
|
ofstream out((filename+".out").c_str());
|
|
while (has_data(in)) {
|
|
// send lines that don't start with '<span' straight through
|
|
string line;
|
|
getline(in, line);
|
|
if (!starts_with(line, "<span ")) {
|
|
out << line << '\n';
|
|
}
|
|
else {
|
|
static int span_size = string("</span>").size();
|
|
int skip_first_span = line.find("</span>") + span_size;
|
|
out << line.substr(0, skip_first_span);
|
|
istringstream in2(line.substr(skip_first_span));
|
|
in2 >> std::noskipws;
|
|
while (has_data(in2)) {
|
|
if (isspace(in2.peek())) {
|
|
//? cerr << "space\n";
|
|
char c; in2 >> c;
|
|
out << c;
|
|
}
|
|
// within a line, send straight through all characters inside '<..>'
|
|
else if (in2.peek() == '<') {
|
|
//? cerr << "tag\n";
|
|
char c = '\0';
|
|
while (in2 >> c) {
|
|
//? cerr << "span: " << c << '\n';
|
|
out << c;
|
|
if (c == '>') break;
|
|
}
|
|
//? cerr << "end tag\n";
|
|
}
|
|
else {
|
|
// send straight through all characters inside strings (handling escapes)
|
|
char c = in2.get();
|
|
if (c == '"') {
|
|
//? cerr << "string\n";
|
|
out << c;
|
|
while (in2 >> c) {
|
|
out << c;
|
|
if (c == '\\') {
|
|
in2 >> c; out << c;
|
|
}
|
|
else if (c == '"') {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else if (c == '\'') {
|
|
//? cerr << "character\n";
|
|
out << c;
|
|
while (in2 >> c) {
|
|
out << c;
|
|
if (c == '\\') {
|
|
in2 >> c; out << c;
|
|
}
|
|
else if (c == '\'') {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
// send straight through any characters after '//' (comments)
|
|
else if (c == '#') {
|
|
//? cerr << "comment\n";
|
|
out << c;
|
|
while (in2 >> c) out << c;
|
|
}
|
|
// send straight through any characters after '//' (comments)
|
|
else if (c == '/' && in2.peek() == '/') {
|
|
//? cerr << "comment\n";
|
|
out << c;
|
|
while (in2 >> c) out << c;
|
|
}
|
|
else {
|
|
//? cerr << "rest\n";
|
|
if (c == ',' || c == ':') {
|
|
out << c;
|
|
continue;
|
|
}
|
|
ostringstream out2;
|
|
out2 << c;
|
|
while (in2 >> c) {
|
|
if (isspace(c) || c == '<' || c == '"' || c == '\'' || c == '/' || c == ',' || c == ':') { // keep sync'd with other clauses above
|
|
in2.putback(c);
|
|
break;
|
|
}
|
|
out2 << c;
|
|
}
|
|
string symbol = out2.str();
|
|
if (symbol == "equal" || symbol == "index" || symbol == "put-index" || symbol == "length") {
|
|
//? cerr << " blacklisted\n";
|
|
out << symbol;
|
|
}
|
|
else if (info.find(symbol) == info.end()) {
|
|
//? cerr << " no info\n";
|
|
out << symbol;
|
|
}
|
|
else {
|
|
const syminfo& s = info.find(symbol)->second;
|
|
if (s.filename.empty()) {
|
|
//? cerr << " empty info\n";
|
|
out << symbol;
|
|
}
|
|
else {
|
|
//? cerr << " link\n";
|
|
out << "<a href='" << s.filename << ".html#L" << s.line_num << "'>" << symbol << "</a>";
|
|
}
|
|
}
|
|
} // end rest
|
|
}
|
|
} // done parsing line
|
|
out << '\n';
|
|
}
|
|
}
|
|
in.close(); out.close();
|
|
}
|
|
|
|
int main(int argc, const char* argv[]) {
|
|
map<string, syminfo> info;
|
|
read_tags(argv[1], info);
|
|
for (int i = 2; i < argc; ++i)
|
|
replace_tags_in_file(argv[i], info);
|
|
return 0;
|
|
}
|