panthema / 2016 / 0114-diploma-thesis / bispanning-graph-framework-v0.5 / misc / analyze-source_pl (Download File)
#!/usr/bin/perl -w
################################################################################
# misc/analyze-source.pl
#
# Copyright (C) 2014-2015 Timo Bingmann <tb@panthema.net>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program.  If not, see <http://www.gnu.org/licenses/>.
################################################################################

# print multiple email addresses
my $email_multimap = 0;

# launch emacsen for each error
my $launch_emacs = 0;

# write changes to files (dangerous!)
my $write_changes = 0;

# function testing whether to uncrustify a path
sub filter_uncrustify($) {
    my ($path) = @_;

    return 1;
}

use strict;
use warnings;
use Text::Diff;
use File::stat;
use List::Util qw(min);

my %include_list;
my %include_map;
my %authormap;

my @source_filelist;

sub expect_error($$$$) {
    my ($path,$ln,$str,$expect) = @_;

    print("Bad header line $ln in $path\n");
    print("Expected $expect\n");
    print("Got      $str\n");

    system("emacsclient -n $path") if $launch_emacs;
}

sub expect($$\@$) {
    my ($path,$ln,$data,$expect) = @_;

    if ($$data[$ln] ne $expect) {
        expect_error($path,$ln,$$data[$ln],$expect);
        # insert line with expected value
        splice(@$data, $ln, 0, $expect);
    }
}

sub expectr($$\@$$) {
    my ($path,$ln,$data,$expect,$replace_regex) = @_;

    if ($$data[$ln] ne $expect) {
        expect_error($path,$ln,$$data[$ln],$expect);
        # replace line with expected value if like regex
        if ($$data[$ln] =~ m/$replace_regex/) {
            $$data[$ln] = $expect;
        } else {
            splice(@$data, $ln, 0, $expect);
        }
    }
}

sub expect_re($$\@$) {
    my ($path,$ln,$data,$expect) = @_;

    if ($$data[$ln] !~ m/$expect/) {
        expect_error($path, $ln, $$data[$ln], "/$expect/");
    }
}

sub removeif($$\@$) {
    my ($path,$ln,$data,$expect) = @_;

    if ($$data[$ln] eq $expect) {
        # insert line with expected value
        splice(@$data, $ln, 1);
    }
}

# check equality of two arrays
sub array_equal {
    my ($a1ref,$a2ref) = @_;

    my @a1 = @{$a1ref};
    my @a2 = @{$a2ref};

    return 0 if scalar(@a1) != scalar(@a2);

    for my $i (0..scalar(@a1)-1) {
        return 0 if $a1[$i] ne $a2[$i];
    }

    return 1;
}

# run $text through a external pipe (@program)
sub filter_program {
    my $text = shift;
    my @program = @_;

    # fork and read output
    my $child1 = open(my $fh, "-|") // die("$0: fork: $!");
    if ($child1 == 0) {
        # fork and print text
        my $child2 = open(STDIN, "-|") // die("$0: fork: $!");
        if ($child2 == 0) {
            print $text;
            exit;
        }
        else {
            exec(@program) or die("$0: exec: $!");
        }
    }
    else {
        my @output = <$fh>;
        close($fh) or warn("$0: close: $!");
        return @output;
    }
}

sub process_inline_perl {
    my @data = @_;

    for(my $i = 0; $i < @data; ++$i)
    {
        if ($data[$i] =~ /\[\[\[perl/) {
            # try to find the following ]]] line
            my $j = $i + 1;
            while ($j < @data && $data[$j] !~ /\]\]\]/) {
                $j++;
            }

            # no ]]] found.
            die("[[[perl is missing closing ]]]") if $j >= @data;

            # extract lines
            my $prog = join("", @data[$i+1..$j-1]);

            # evaluate the program
            my $output;
            {
                # return STDOUT to $output
                open local(*STDOUT), '>', \$output or die $!;
                eval $prog;
            }
            if ($@) {
                print "Perl inline: -------------\n";
                print "$prog\n";
                print "--------------------------\n";
                die("failed with $@");
            }
            #print "output: ".$output."\n";

            # try to find the following [[[end]]] line
            my $k = $j + 1;
            while ($k < @data && $data[$k] !~ /\[\[\[end\]\]\]/ && $data[$k] !~ /\[\[\[perl/) {
                $k++;
            }

            # not found: insert [[[end]]]
            if ($data[$k] !~ /\[\[\[end\]\]\]/) {
                $k = $j + 1;
                $output .= "\n// [[[end]]]";
            }

            my @output = split(/\n/, $output);
            $output[$_] .= "\n" foreach (0..@output-1);

            splice(@data, $j + 1, $k - ($j + 1), @output);
        }
    }

    return @data;
}

sub process_cpp {
    my ($path) = @_;

    #print "$path\n";

    # check permissions
    my $st = stat($path) or die("Cannot stat() file $path: $!");
    if ($st->mode & 0133) {
        print("Wrong mode ".sprintf("%o", $st->mode)." on $path\n");
        if ($write_changes) {
            chmod(0644, $path) or die("Cannot chmod() file $path: $!");
        }
    }

    # read file
    open(F, $path) or die("Cannot read file $path: $!");
    my @data = <F>;
    close(F);

    unless ($path =~ /\.dox$/) {
        push(@source_filelist, $path);
    }

    my @origdata = @data;

    # first check whether there are [[[perl lines and execute them
    @data = process_inline_perl(@data);

    # put all #include lines into the includelist+map
    for my $i (0...@data-1)
    {
        if ($data[$i] =~ m!\s*#\s*include\s*[<"](\S+)[">]!) {
            $include_list{$1}{$path} = 1;
            $include_map{$path}{$1} = 1;
        }
    }

    # check source header
    my $i = 0;
    if ($data[$i] =~ m!// -.*- mode:!) { ++$i; } # skip emacs mode line

    expect($path, $i, @data, "/".('*'x79)."\n"); ++$i;
    expectr($path, $i, @data, " * $path\n", qr/^ \* /); ++$i;
    expect($path, $i, @data, " *\n"); ++$i;

    # skip over custom file comments
    my $j = $i;
    while ($data[$i] !~ /^ \* Copyright/) {
        expect_re($path, $i, @data, '^ \*( .*|$)$');
        if (++$i >= @data) {
            $i = $j; # restore read position
            last;
        }
    }

    # read authors
    while ($data[$i] =~ /^ \* Copyright \(C\) ([0-9-]+(, [0-9-]+)*) (?<name>[^0-9<]+)( <(?<mail>[^>]+)>)?\n/) {
        #print "Author: $+{name} - $+{mail}\n";
        $authormap{$+{name}}{$+{mail} || ""} = 1;
        die unless ++$i < @data;
    }

    # otherwise check license
    expect($path, $i, @data, " *\n"); ++$i;
    expectr($path, $i, @data, " * This program is free software: you can redistribute it and/or modify it under\n", qr/^ \*/); ++$i;
    expectr($path, $i, @data, " * the terms of the GNU General Public License as published by the Free Software\n", qr/^ \*/); ++$i;
    expectr($path, $i, @data, " * Foundation, either version 3 of the License, or (at your option) any later\n", qr/^ \*/); ++$i;
    expectr($path, $i, @data, " * version.\n", qr/^ \*/); ++$i;
    expectr($path, $i, @data, " *\n", qr/^ \*/); ++$i;
    expectr($path, $i, @data, " * This program is distributed in the hope that it will be useful, but WITHOUT\n", qr/^ \*/); ++$i;
    expectr($path, $i, @data, " * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS\n", qr/^ \*/); ++$i;
    expectr($path, $i, @data, " * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more\n", qr/^ \*/); ++$i;
    expectr($path, $i, @data, " * details.\n", qr/^ \*/); ++$i;
    expectr($path, $i, @data, " *\n", qr/^ \*/); ++$i;
    expectr($path, $i, @data, " * You should have received a copy of the GNU General Public License along with\n", qr/^ \*/); ++$i;
    expectr($path, $i, @data, " * this program.  If not, see <http://www.gnu.org/licenses/>.\n", qr/^ \*/); ++$i;
    expect($path, $i, @data, " ".('*'x78)."/\n"); ++$i;

    # check include guard name
    if ($path =~ m!\.(h|h.in|hpp)$!)
    {
        expect($path, $i, @data, "\n"); ++$i;

        # construct include guard macro name: PROGRAM_FILE_NAME_HEADER
        my $guard = $path;
        $guard =~ s!src/!!;
        $guard =~ tr!/-!__!;
        $guard =~ s!\.h(pp)?(\.in)?$!!;
        $guard = "BISPANNING_".uc($guard)."_HEADER";
        #print $guard."\n";x

        expectr($path, $i, @data, "#ifndef $guard\n", qr/^#ifndef /); ++$i;
        expectr($path, $i, @data, "#define $guard\n", qr/^#define /); ++$i;

        my $n = scalar(@data)-1;
        expectr($path, $n-2, @data, "#endif // !$guard\n", qr/^#endif /);
    }

    # check terminating /****/ comment
    {
        my $n = scalar(@data)-1;
        if ($data[$n] !~ m!^/\*{78}/$!) {
            push(@data, "\n");
            push(@data, "/".('*'x78)."/\n");
        }
    }

    for(my $i = 0; $i < @data-1; ++$i) {
        # check for @-style doxygen commands
        if ($data[$i] =~ m!\@(param|tparam|return|result|brief|details|c|i)\s!) {
            print("found \@-style doxygen command in $path:$i\n");
        }

        # check for assert() in test cases
        if ($path =~ /^tests/) {
            if ($data[$i] =~ m/(?<!static_)assert\(/) {
                print("found assert() in test $path:$i\n");
            }
        }

        # check for NULL -> replace with nullptr.
        if ($data[$i] =~ s/\bNULL\b/nullptr/g) {
            print("replacing NULL in $path:$i\n");
        }

        if ($path !~ /^swig/) {
            # check for typedef, issue warnings.
            #if ($data[$i] =~ m/\btypedef\b/) {
            if ($data[$i] =~ s/\btypedef\b(.+)\b(\w+);$/using $2 = $1;/) {
                print("found typedef in $path:$i\n");
            }
        }
    }

    # run uncrustify if in filter
    if (filter_uncrustify($path))
    {
        my $data = join("", @data);
        @data = filter_program($data, "uncrustify", "-q", "-c", "misc/uncrustify.cfg", "-l", "CPP");

        # manually add blank line after "namespace xyz {" and before "} // namespace xyz"
        my @namespace;
        for(my $i = 0; $i < @data-1; ++$i)
        {
            if ($data[$i] =~ m!^namespace (\S+) {!) {
                push(@namespace, $1);
            }
            elsif ($data[$i] =~ m!^namespace {!) {
                push(@namespace, "");
            }
            elsif ($data[$i] =~ m!^}\s+//\s+namespace\s+(\S+)\s*$!) {
                if (@namespace == 0) {
                    print "$path\n";
                    print "    NAMESPACE UNBALANCED! @ $i\n";
                    #system("emacsclient -n $path");
                }
                else {
                    # quiets wrong uncrustify indentation
                    $data[$i] =~ s!}\s+//\s+namespace!} // namespace!;
                    expectr($path, $i, @data, "} // namespace ".$namespace[-1]."\n",
                            qr!^}\s+//\s+namespace!);
                }
                if ($data[$i-1] !~ m!^}\s+// namespace!) {
                    splice(@data, $i, 0, "\n"); ++$i;
                }
                pop(@namespace);
            }
            elsif ($data[$i] =~ m!^}\s+//\s+namespace\s*$!) {
                if (@namespace == 0) {
                    print "$path\n";
                    print "    NAMESPACE UNBALANCED! @ $i\n";
                    #system("emacsclient -n $path");
                }
                else {
                    # quiets wrong uncrustify indentation
                    $data[$i] =~ s!}\s+//\s+namespace!} // namespace!;
                    expectr($path, $i, @data, "} // namespace\n",
                            qr!^}\s+//\s+namespace!);
                }
                if ($data[$i-1] !~ m!^}\s+// namespace!) {
                    splice(@data, $i, 0, "\n"); ++$i;
                }
                pop(@namespace);
            }
        }
        if (@namespace != 0) {
            print "$path\n";
            print "    NAMESPACE UNBALANCED!\n";
            #system("emacsclient -n $path");
        }

        # change misformatted [ = ] of lambdas to [=]
        for(my $i = 0; $i < @data-1; ++$i) {
            $data[$i] =~ s/\[ = \]/[=]/g;
            $data[$i] =~ s/\[ ([=&]),/[$1,/g;
        }
    }

    return if array_equal(\@data, \@origdata);

    print "$path\n";
    print diff(\@origdata, \@data);
    #system("emacsclient -n $path");

    if ($write_changes)
    {
        open(F, "> $path") or die("Cannot write $path: $!");
        print(F join("", @data));
        close(F);
    }
}

sub process_pl_cmake {
    my ($path) = @_;

    # check permissions
    if ($path =~ /\.(pl|sh)$/) {
        my $st = stat($path) or die("Cannot stat() file $path: $!");
        if (($st->mode & 0777) != 0755) {
            print("Wrong mode ".sprintf("%o", $st->mode)." on $path\n");
            if ($write_changes) {
                chmod(0755, $path) or die("Cannot chmod() file $path: $!");
            }
        }
    }
    else {
        my $st = stat($path) or die("Cannot stat() file $path: $!");
        if (($st->mode & 0777) != 0644) {
            print("Wrong mode ".sprintf("%o", $st->mode)." on $path\n");
            if ($write_changes) {
                chmod(0644, $path) or die("Cannot chmod() file $path: $!");
            }
        }
    }

    # read file
    open(F, $path) or die("Cannot read file $path: $!");
    my @data = <F>;
    close(F);

    my @origdata = @data;

    # check source header
    my $i = 0;
    if ($data[$i] =~ m/#!/) { ++$i; } # bash line
    expect($path, $i, @data, ('#'x80)."\n"); ++$i;
    expectr($path, $i, @data, "# $path\n", qr/^# /); ++$i;
    expect($path, $i, @data, "#\n"); ++$i;

    # skip over custom file comments
    my $j = $i;
    while ($data[$i] !~ /^#{80}/) {
        expect_re($path, $i, @data, '^#( .*|$)');
        if (++$i >= @data) {
            $i = $j; # restore read position
            last;
        }
    }

    expect($path, $i, @data, ('#'x80)."\n"); ++$i;

    # check terminating ####### comment
    {
        my $n = scalar(@data)-1;
        if ($data[$n] !~ m!^#{80}$!) {
            push(@data, "\n");
            push(@data, ("#"x80)."\n");
        }
    }

    return if array_equal(\@data, \@origdata);

    print "$path\n";
    print diff(\@origdata, \@data);
    #system("emacsclient -n $path");

    if ($write_changes)
    {
        open(F, "> $path") or die("Cannot write $path: $!");
        print(F join("", @data));
        close(F);
    }
}

### Main ###

foreach my $arg (@ARGV) {
    if ($arg eq "-w") { $write_changes = 1; }
    elsif ($arg eq "-e") { $launch_emacs = 1; }
    elsif ($arg eq "-m") { $email_multimap = 1; }
    else {
        print "Unknown parameter: $arg\n";
    }
}

(-e "src/CMakeLists.txt")
    or die("Please run this script in the Thrill source base directory.");

# check uncrustify's version:
my ($uncrustver) = filter_program("", "uncrustify", "--version");
($uncrustver eq "uncrustify 0.61\n")
    or die("Requires uncrustify 0.61 to run correctly.");

use File::Find;
my @filelist;
find(sub { !-d && push(@filelist, $File::Find::name) }, ".");

foreach my $file (@filelist)
{
    $file =~ s!./!! or die("File does not start ./");

    if ($file =~ m!^(b[0-9]*|build.*)(\/|$)!) {
        # skip common build directory names
    }
    elsif ($file =~ m!^nauty25r5!) {
        # skip external libraries
    }
    elsif ($file =~ /\.(h|cpp|hpp|h\.in|dox)$/) {
        process_cpp($file);
    }
    elsif ($file =~ /\.p[lm]$/) {
        process_pl_cmake($file);
    }
    elsif ($file =~ /\.(sh|awk)$/) {
        process_pl_cmake($file);
    }
    elsif ($file =~ m!(^|/)CMakeLists\.txt$!) {
        process_pl_cmake($file);
    }
    elsif ($file =~ /^doc\/images\/.*\.pdf$/) {
        # use pdf2svg to convert pdfs to svgs for doxygen.
        process_doc_images_pdf($file);
    }
    elsif ($file =~ /^doc\/images\/.*\.svg$/) {
    }
    # recognize further files
    elsif ($file =~ m!^\.git/!) {
    }
    elsif ($file =~ m!^misc/!) {
    }
    elsif ($file =~ m!CPPLINT\.cfg$!) {
    }
    elsif ($file =~ m!^doxygen-html/!) {
    }
    elsif ($file =~ m!^tests/.*\.(dat|plot)$!) {
        # data files of tests
    }
    # skip all additional files in source root
    elsif ($file =~ m!^[^/]+$!) {
    }
    else {
        print "Unknown file type $file\n";
    }
}

# print authors to AUTHORS
print "Writing AUTHORS:\n";
open(A, "> AUTHORS");
foreach my $a (sort keys %authormap)
{
    my $mail = $authormap{$a};
    if ($email_multimap) {
        $mail = join(",", sort keys %{$mail});
    }
    else {
        $mail = (sort keys(%{$mail}))[0]; # pick first
    }
    $mail = $mail ? " <$mail>" : "";

    print "  $a$mail\n";
    print A "$a$mail\n";
}
close(A);

# check include_list for C-style headers
{

    my @cheaders = qw(assert.h ctype.h errno.h fenv.h float.h inttypes.h
                      limits.h locale.h math.h signal.h stdarg.h stddef.h
                      stdlib.h stdio.h string.h time.h);

    foreach my $ch (@cheaders)
    {
        next if !$include_list{$ch};
        print "Replace c-style header $ch in\n";
        print "    [".join(",", sort keys %{$include_list{$ch}}). "]\n";
    }
}

# print includes
if (0)
{
    print "Writing include_map:\n";
    foreach my $inc (sort keys %include_map)
    {
        print "$inc => ".scalar(keys %{$include_map{$inc}})." [";
        print join(",", sort keys %{$include_map{$inc}}). "]\n";
    }
}

# try to find cycles in includes
if (1)
{
    my %graph = %include_map;

    # Tarjan's Strongly Connected Components Algorithm
    # https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm

    my $index = 0;
    my @S = [];
    my %vi; # vertex info

    sub strongconnect {
        my ($v) = @_;

        # Set the depth index for v to the smallest unused index
        $vi{$v}{index} = $index;
        $vi{$v}{lowlink} = $index++;
        push(@S, $v);
        $vi{$v}{onStack} = 1;

        # Consider successors of v
        foreach my $w (keys %{$graph{$v}}) {
            if (!defined $vi{$w}{index}) {
                # Successor w has not yet been visited; recurse on it
                strongconnect($w);
                $vi{$w}{lowlink} = min($vi{$v}{lowlink}, $vi{$w}{lowlink});
            }
            elsif ($vi{$w}{onStack}) {
                # Successor w is in stack S and hence in the current SCC
                $vi{$v}{lowlink} = min($vi{$v}{lowlink}, $vi{$w}{index})
            }
        }

        # If v is a root node, pop the stack and generate an SCC
        if ($vi{$v}{lowlink} == $vi{$v}{index}) {
            # start a new strongly connected component
            my @SCC;
            my $w;
            do {
                $w = pop(@S);
                $vi{$w}{onStack} = 0;
                # add w to current strongly connected component
                push(@SCC, $w);
            } while ($w ne $v);
            # output the current strongly connected component (only if it is not
            # a singleton)
            if (@SCC != 1) {
                print "-------------------------------------------------";
                print "Found cycle:\n";
                print "    $_\n" foreach @SCC;
                print "end cycle\n";
            }
        }
    }

    foreach my $v (keys %graph) {
        next if defined $vi{$v}{index};
        strongconnect($v);
    }
}

# run cpplint.py
{
    my @lintlist;

    foreach my $path (@source_filelist)
    {
        #next if $path =~ /exclude/;

        push(@lintlist, $path);
    }

    system("cpplint.py", "--counting=total", "--extensions=h,c,cc,hpp,cpp", @lintlist);
}

################################################################################