2019-12-29 19:01:16 +01:00
|
|
|
#!/usr/bin/env perl
|
|
|
|
|
|
|
|
=head1 NAME
|
|
|
|
|
|
|
|
git-md-toc - generate the table of content from the Markdown file(s)
|
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
|
|
|
|
git-md-toc [OPTIONS]
|
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
|
|
|
|
Read and input and generate the table of content (TOC) based on the
|
|
|
|
markup of the file which is assumed Markdown formatted. The outcome is
|
|
|
|
also formatted as Markdown.
|
|
|
|
|
|
|
|
If no file specified, the file C<README.md> is assumed.
|
|
|
|
|
|
|
|
The following HTML comments are recognized in a special way and handled
|
|
|
|
as the markers to insert new TOC or update existing one.
|
|
|
|
|
|
|
|
=over 4
|
|
|
|
|
|
|
|
=item C<< <!-- md-toc --> >>
|
|
|
|
|
|
|
|
is used to point the place in the document where to put a new TOC.
|
|
|
|
|
|
|
|
=item C<< <!-- md-toc-begin --> >>, C<< <!-- md-toc-end --> >>
|
|
|
|
|
|
|
|
are used to point the beginning and end of the existing TOC.
|
|
|
|
|
|
|
|
=back
|
|
|
|
|
|
|
|
Be noticed that these markers themselves must be sticky to the left edge
|
|
|
|
of the lines where they are situated. This rule doesn't spread on the
|
|
|
|
content within.
|
|
|
|
|
|
|
|
The updated TOC is always ended with double new line to separate from
|
|
|
|
the further text below. On the same reason, if the TOC is preceded with
|
|
|
|
some text above, the double new line is prepended the TOC.
|
|
|
|
|
|
|
|
=head1 OPTIONS
|
|
|
|
|
|
|
|
=over 4
|
|
|
|
|
|
|
|
=item B<-h>, B<--help>
|
|
|
|
|
|
|
|
Print this help and exit.
|
|
|
|
|
|
|
|
=item B<-t> I<TITLE>, B<--title>=I<TITLE>
|
|
|
|
|
|
|
|
Set the title for the table of content. If not specified, the default
|
|
|
|
value C<Table of Content> is assumed.
|
|
|
|
|
|
|
|
=item B<-l> I<LEVEL>, B<--level>=I<LEVEL>
|
|
|
|
|
|
|
|
Set the header level used for the TOC title. Available values are C<1>
|
|
|
|
to C<6>. The default value is C<1>.
|
|
|
|
|
|
|
|
=item B<-u>, B<--update>
|
|
|
|
|
|
|
|
Update the file with the new table of content. It works even when reading
|
|
|
|
from STDIN. In this case the outcome will be printed to STDOUT.
|
|
|
|
|
|
|
|
=back
|
|
|
|
|
|
|
|
=head1 SEE ALSO
|
|
|
|
|
|
|
|
=head2 Syntax specification
|
|
|
|
|
|
|
|
L<https://daringfireball.net/projects/markdown/>
|
|
|
|
|
|
|
|
=head2 Perl implementations
|
|
|
|
|
|
|
|
L<https://metacpan.org/pod/Text::Markdown>
|
|
|
|
|
|
|
|
L<https://metacpan.org/pod/Text::MultiMarkdown>
|
|
|
|
|
|
|
|
=head2 Some other implementations
|
|
|
|
|
|
|
|
L<https://github.com/ekalinin/github-markdown-toc>
|
|
|
|
|
|
|
|
L<https://github.com/ekalinin/github-markdown-toc.go>
|
|
|
|
|
|
|
|
L<https://github.com/frnmst/md-toc>
|
|
|
|
|
|
|
|
L<https://github.com/eGavr/toc-md>
|
|
|
|
|
|
|
|
=head1 AUTHORS
|
|
|
|
|
|
|
|
Ildar Shaimordanov E<lt>F<ildar.shaimordanov@gmail.com>E<gt>
|
|
|
|
|
|
|
|
=head1 COPYRIGHT
|
|
|
|
|
|
|
|
Copyright (c) 2019 Ildar Shaimordanov. All rights reserved.
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it
|
|
|
|
under the same terms as Perl itself.
|
|
|
|
|
|
|
|
=cut
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
|
|
|
|
use Getopt::Long qw( :config no_ignore_case bundling );
|
|
|
|
use Pod::Usage;
|
|
|
|
|
|
|
|
my $toc_default_title = "Table of Content";
|
|
|
|
my $toc_title;
|
|
|
|
my $toc_level = 1;
|
|
|
|
my $update;
|
2020-01-12 19:39:57 +01:00
|
|
|
my $max_depth = 6;
|
|
|
|
my $min_depth = 1;
|
|
|
|
my $use_filename = 0;
|
2019-12-29 19:01:16 +01:00
|
|
|
|
|
|
|
exit 1 unless GetOptions(
|
2020-01-13 00:08:05 +01:00
|
|
|
"h|help" => sub {
|
|
|
|
pod2usage({ -verbose => 2, -noperldoc => 1 });
|
|
|
|
},
|
|
|
|
|
|
|
|
"t|title:s" => sub {
|
|
|
|
$toc_title = $_[1] || $toc_default_title;
|
|
|
|
},
|
|
|
|
"l|level=i" => sub {
|
|
|
|
( $toc_level = $_[1] ) =~ /^[1-6]$/
|
|
|
|
or die "Integer expected in range [1..6]\n";
|
|
|
|
},
|
|
|
|
"u|update" => \$update,
|
2020-01-12 19:39:57 +01:00
|
|
|
"x|maxdepth=i" => \$max_depth,
|
2020-01-22 03:08:05 +01:00
|
|
|
"m|d|mindepth=i" => \$min_depth,
|
2020-01-12 19:39:57 +01:00
|
|
|
"f|filename" => \$use_filename,
|
2019-12-29 19:01:16 +01:00
|
|
|
);
|
|
|
|
|
|
|
|
# Hmm... No arguments. Let's take README.md or STDIN
|
|
|
|
@ARGV or @ARGV = -t 0 ? "README.md" : "-";
|
|
|
|
|
|
|
|
# =========================================================================
|
|
|
|
|
|
|
|
my $md_toc = "<!-- md-toc -->";
|
|
|
|
my $md_toc_begin = "<!-- md-toc-begin -->";
|
|
|
|
my $md_toc_end = "<!-- md-toc-end -->";
|
|
|
|
|
2020-01-12 19:39:57 +01:00
|
|
|
my $filename;
|
|
|
|
|
2019-12-29 19:01:16 +01:00
|
|
|
foreach ( @ARGV ) {
|
2020-01-13 00:08:05 +01:00
|
|
|
my $orig_text;
|
|
|
|
{
|
|
|
|
local $/;
|
|
|
|
open F, $_ or die "Unable to open for reading: $_: $!\n";
|
|
|
|
$orig_text = <F>;
|
|
|
|
close F;
|
|
|
|
};
|
2019-12-29 19:01:16 +01:00
|
|
|
|
2020-01-12 19:39:57 +01:00
|
|
|
$filename = $_;
|
|
|
|
|
2020-01-13 00:08:05 +01:00
|
|
|
# skip README.md unless it is the only file explicitly specified
|
|
|
|
next if @ARGV > 1 and $filename eq 'README.md';
|
|
|
|
|
|
|
|
my $clean_text = $orig_text;
|
|
|
|
|
|
|
|
# skip code fencing
|
|
|
|
$clean_text =~ s{
|
|
|
|
(?:\A|\n) [ \t]* ``` .*? \n [ \t]* ```
|
|
|
|
}{}msgx;
|
|
|
|
|
|
|
|
# skip non-empty TOC blocks
|
|
|
|
$clean_text =~ s{
|
|
|
|
(?:\A|\n)
|
|
|
|
<!-- [ \t]+ md-toc-begin [ \t]+ --> [ \t\r]* \n
|
|
|
|
[\s\S]*? \n
|
|
|
|
<!-- [ \t]+ md-toc-end [ \t]+ --> [ \t\r]*
|
|
|
|
(?=\n)
|
|
|
|
}{}msgx;
|
|
|
|
|
|
|
|
my %count = ();
|
|
|
|
|
|
|
|
my @toc = ();
|
|
|
|
|
|
|
|
push @toc, $md_toc_begin;
|
|
|
|
push @toc, "#" x $toc_level . " $toc_title" if $toc_title;
|
|
|
|
|
|
|
|
while ( $clean_text =~ m{
|
|
|
|
(?:\A|\n)
|
|
|
|
[ ]{0,3}
|
|
|
|
(?:
|
|
|
|
# atx-style headers H1-H6
|
|
|
|
( [#]{1,6} ) [ \t]+ ( .+? ) (?: [ \t]+ [#]* )?
|
|
|
|
|
|
|
|
|
# setext-style headers H1
|
|
|
|
( \S[^\r\n]*? ) [ \t\r]* \n [ \t]* ( [=] )+
|
|
|
|
|
|
|
|
|
# setext-style header H2
|
|
|
|
( (?![-]+)|[^\r\n]+? ) [ \t\r]* \n [ \t]* ( [-] )+
|
|
|
|
)
|
|
|
|
[ \t\r]*
|
|
|
|
(?=\n)
|
|
|
|
}mgx ) {
|
|
|
|
|
|
|
|
my $depth;
|
|
|
|
my $indent;
|
|
|
|
my $title;
|
|
|
|
|
|
|
|
if ( $1 && $2 ) {
|
|
|
|
$depth = length($1) - 1;
|
|
|
|
$title = $2;
|
|
|
|
} elsif ( $4 && $3 ) {
|
|
|
|
$depth = 0;
|
|
|
|
$indent = "";
|
|
|
|
$title = $3;
|
|
|
|
} elsif ( $6 && $5 ) {
|
|
|
|
$depth = 1;
|
|
|
|
$title = $5;
|
|
|
|
}
|
|
|
|
|
|
|
|
next unless $title;
|
|
|
|
|
2020-01-22 03:08:05 +01:00
|
|
|
$indent = " " x ($depth - ($min_depth - 1) < 0 ? 0 : $depth - ($min_depth - 1));
|
2020-01-13 00:08:05 +01:00
|
|
|
|
2020-01-21 22:11:15 +01:00
|
|
|
my $anchor_url;
|
2020-01-13 00:08:05 +01:00
|
|
|
|
2020-01-21 22:11:15 +01:00
|
|
|
# handle link titles
|
|
|
|
if ($title =~ m/\[([^]]+)\]\(([^)]+)\)/) {
|
|
|
|
$title = $1;
|
|
|
|
$anchor_url = $2;
|
|
|
|
}
|
|
|
|
|
|
|
|
my $anchor;
|
|
|
|
|
|
|
|
if (defined $anchor_url) {
|
|
|
|
$anchor = $anchor_url;
|
|
|
|
} else {
|
|
|
|
$anchor = lc $title;
|
|
|
|
$anchor =~ s/\s/-/g;
|
|
|
|
$anchor =~ s/[^\w-]//g;
|
|
|
|
$anchor = '#' . $anchor;
|
2020-01-13 00:08:05 +01:00
|
|
|
|
2020-01-21 22:11:15 +01:00
|
|
|
$count{$anchor}++;
|
|
|
|
|
|
|
|
$anchor .= ( 1 - $count{$anchor} or "" );
|
|
|
|
}
|
2019-12-29 19:01:16 +01:00
|
|
|
|
2020-01-12 19:39:57 +01:00
|
|
|
if ($depth >= $min_depth - 1 and $depth <= $max_depth - 1) {
|
|
|
|
if ($use_filename) {
|
2020-01-21 22:11:15 +01:00
|
|
|
push (@toc, "$indent* [$title]($filename$anchor)");
|
2020-01-12 19:39:57 +01:00
|
|
|
} else {
|
2020-01-21 22:11:15 +01:00
|
|
|
push (@toc, "$indent* [$title]($anchor)");
|
2020-01-12 19:39:57 +01:00
|
|
|
}
|
|
|
|
}
|
2020-01-13 00:08:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
push @toc, $md_toc_end;
|
|
|
|
|
|
|
|
my $toc = join "\n", @toc;
|
|
|
|
|
|
|
|
unless ( $update ) {
|
|
|
|
print "$toc\n";
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
|
|
|
|
$orig_text =~ s{
|
|
|
|
(?: (\A) | [\r\n]+ )
|
|
|
|
(?:
|
|
|
|
<!-- [ \t]+ md-toc [ \t]+ --> [ \t\r]*
|
|
|
|
|
|
|
|
|
<!-- [ \t]+ md-toc-begin [ \t]+ --> [ \t\r]* \n
|
|
|
|
(?: [\s\S]*? \n )*?
|
|
|
|
<!-- [ \t]+ md-toc-end [ \t]+ --> [ \t\r]*
|
|
|
|
)
|
|
|
|
(?: (\Z) | [\r\n]+ )
|
|
|
|
}{
|
|
|
|
( $1 // "\n\n" ) . $toc . "\n\n";
|
|
|
|
}emgx;
|
|
|
|
|
|
|
|
warn "Updating $_\n";
|
|
|
|
|
|
|
|
open F, ">$_" or die "Unable to open for writing: $_: $!\n";
|
|
|
|
print F $orig_text;
|
|
|
|
close F;
|
2019-12-29 19:01:16 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
# =========================================================================
|
|
|
|
|
|
|
|
# EOF
|