mirror of
				https://github.com/pragma-/pbot.git
				synced 2025-11-04 08:37:24 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			284 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
		
			Vendored
		
	
	
	
			
		
		
	
	
			284 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
		
			Vendored
		
	
	
	
#!/usr/bin/env perl
 | 
						|
 | 
						|
=head1 NAME
 | 
						|
 | 
						|
git-md-toc - generate the table of content from the Markdown file(s)
 | 
						|
 | 
						|
=head1 SYNOPSIS
 | 
						|
 | 
						|
  git-md-toc [OPTIONS]
 | 
						|
 | 
						|
=head1 DESCRIPTION
 | 
						|
 | 
						|
Read and input and generate the table of content (TOC) based on the
 | 
						|
markup of the file which is assumed Markdown formatted. The outcome is
 | 
						|
also formatted as Markdown.
 | 
						|
 | 
						|
If no file specified, the file C<README.md> is assumed.
 | 
						|
 | 
						|
The following HTML comments are recognized in a special way and handled
 | 
						|
as the markers to insert new TOC or update existing one.
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item C<< <!-- md-toc --> >>
 | 
						|
 | 
						|
is used to point the place in the document where to put a new TOC.
 | 
						|
 | 
						|
=item C<< <!-- md-toc-begin --> >>, C<< <!-- md-toc-end --> >>
 | 
						|
 | 
						|
are used to point the beginning and end of the existing TOC.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
Be noticed that these markers themselves must be sticky to the left edge
 | 
						|
of the lines where they are situated. This rule doesn't spread on the
 | 
						|
content within.
 | 
						|
 | 
						|
The updated TOC is always ended with double new line to separate from
 | 
						|
the further text below. On the same reason, if the TOC is preceded with
 | 
						|
some text above, the double new line is prepended the TOC.
 | 
						|
 | 
						|
=head1 OPTIONS
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item B<-h>, B<--help>
 | 
						|
 | 
						|
Print this help and exit.
 | 
						|
 | 
						|
=item B<-t> I<TITLE>, B<--title>=I<TITLE>
 | 
						|
 | 
						|
Set the title for the table of content. If not specified, the default
 | 
						|
value C<Table of Content> is assumed.
 | 
						|
 | 
						|
=item B<-l> I<LEVEL>, B<--level>=I<LEVEL>
 | 
						|
 | 
						|
Set the header level used for the TOC title. Available values are C<1>
 | 
						|
to C<6>. The default value is C<1>.
 | 
						|
 | 
						|
=item B<-u>, B<--update>
 | 
						|
 | 
						|
Update the file with the new table of content. It works even when reading
 | 
						|
from STDIN. In this case the outcome will be printed to STDOUT.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head1 SEE ALSO
 | 
						|
 | 
						|
=head2 Syntax specification
 | 
						|
 | 
						|
L<https://daringfireball.net/projects/markdown/>
 | 
						|
 | 
						|
=head2 Perl implementations
 | 
						|
 | 
						|
L<https://metacpan.org/pod/Text::Markdown>
 | 
						|
 | 
						|
L<https://metacpan.org/pod/Text::MultiMarkdown>
 | 
						|
 | 
						|
=head2 Some other implementations
 | 
						|
 | 
						|
L<https://github.com/ekalinin/github-markdown-toc>
 | 
						|
 | 
						|
L<https://github.com/ekalinin/github-markdown-toc.go>
 | 
						|
 | 
						|
L<https://github.com/frnmst/md-toc>
 | 
						|
 | 
						|
L<https://github.com/eGavr/toc-md>
 | 
						|
 | 
						|
=head1 AUTHORS
 | 
						|
 | 
						|
Ildar Shaimordanov E<lt>F<ildar.shaimordanov@gmail.com>E<gt>
 | 
						|
 | 
						|
=head1 COPYRIGHT
 | 
						|
 | 
						|
Copyright (c) 2019 Ildar Shaimordanov. All rights reserved.
 | 
						|
 | 
						|
This program is free software; you can redistribute it and/or modify it
 | 
						|
under the same terms as Perl itself.
 | 
						|
 | 
						|
=cut
 | 
						|
 | 
						|
use strict;
 | 
						|
use warnings;
 | 
						|
 | 
						|
use Getopt::Long qw( :config no_ignore_case bundling );
 | 
						|
use Pod::Usage;
 | 
						|
 | 
						|
my $toc_default_title = "Table of Content";
 | 
						|
my $toc_title;
 | 
						|
my $toc_level = 1;
 | 
						|
my $update;
 | 
						|
my $max_depth = 6;
 | 
						|
my $min_depth = 1;
 | 
						|
my $use_filename = 0;
 | 
						|
 | 
						|
exit 1 unless GetOptions(
 | 
						|
  "h|help"	=> sub {
 | 
						|
    pod2usage({ -verbose => 2, -noperldoc => 1 });
 | 
						|
  },
 | 
						|
 | 
						|
  "t|title:s"	=> sub {
 | 
						|
    $toc_title = $_[1] || $toc_default_title;
 | 
						|
  },
 | 
						|
  "l|level=i"	=> sub {
 | 
						|
    ( $toc_level = $_[1] ) =~ /^[1-6]$/
 | 
						|
      or die "Integer expected in range [1..6]\n";
 | 
						|
  },
 | 
						|
  "u|update"	=> \$update,
 | 
						|
  "x|maxdepth=i" => \$max_depth,
 | 
						|
  "m|d|mindepth=i" => \$min_depth,
 | 
						|
  "f|filename" => \$use_filename,
 | 
						|
);
 | 
						|
 | 
						|
# Hmm... No arguments. Let's take README.md or STDIN
 | 
						|
@ARGV or @ARGV = -t 0 ? "README.md" : "-";
 | 
						|
 | 
						|
# =========================================================================
 | 
						|
 | 
						|
my $md_toc		= "<!-- md-toc -->";
 | 
						|
my $md_toc_begin	= "<!-- md-toc-begin -->";
 | 
						|
my $md_toc_end		= "<!-- md-toc-end -->";
 | 
						|
 | 
						|
my $filename;
 | 
						|
 | 
						|
foreach ( @ARGV ) {
 | 
						|
  my $orig_text;
 | 
						|
  {
 | 
						|
    local $/;
 | 
						|
    open F, $_ or die "Unable to open for reading: $_: $!\n";
 | 
						|
    $orig_text = <F>;
 | 
						|
    close F;
 | 
						|
  };
 | 
						|
 | 
						|
  $filename = $_;
 | 
						|
 | 
						|
  # skip README.md unless it is the only file explicitly specified
 | 
						|
  next if @ARGV > 1 and $filename eq 'README.md';
 | 
						|
 | 
						|
  my $clean_text = $orig_text;
 | 
						|
 | 
						|
  # skip code fencing
 | 
						|
  $clean_text =~ s{
 | 
						|
    (?:\A|\n) [ \t]* ``` .*? \n [ \t]* ```
 | 
						|
  }{}msgx;
 | 
						|
 | 
						|
  # skip non-empty TOC blocks
 | 
						|
  $clean_text =~ s{
 | 
						|
    (?:\A|\n)
 | 
						|
    <!-- [ \t]+ md-toc-begin [ \t]+ --> [ \t\r]* \n
 | 
						|
    [\s\S]*? \n
 | 
						|
    <!-- [ \t]+ md-toc-end   [ \t]+ --> [ \t\r]*
 | 
						|
    (?=\n)
 | 
						|
  }{}msgx;
 | 
						|
 | 
						|
  my %count = ();
 | 
						|
 | 
						|
  my @toc = ();
 | 
						|
 | 
						|
  push @toc, $md_toc_begin;
 | 
						|
  push @toc, "#" x $toc_level . " $toc_title" if $toc_title;
 | 
						|
 | 
						|
  while ( $clean_text =~ m{
 | 
						|
      (?:\A|\n)
 | 
						|
      [ ]{0,3}
 | 
						|
      (?:
 | 
						|
      # atx-style headers H1-H6
 | 
						|
      ( [#]{1,6} ) [ \t]+ ( .+? ) (?: [ \t]+ [#]* )?
 | 
						|
      |
 | 
						|
      # setext-style headers H1
 | 
						|
      ( \S[^\r\n]*? ) [ \t\r]* \n [ \t]* ( [=] )+
 | 
						|
      |
 | 
						|
      # setext-style header H2
 | 
						|
      ( (?![-]+)|[^\r\n]+? ) [ \t\r]* \n [ \t]* ( [-] )+
 | 
						|
      )
 | 
						|
      [ \t\r]*
 | 
						|
      (?=\n)
 | 
						|
    }mgx ) {
 | 
						|
 | 
						|
    my $depth;
 | 
						|
    my $indent;
 | 
						|
    my $title;
 | 
						|
 | 
						|
    if ( $1 && $2 ) {
 | 
						|
      $depth = length($1) - 1;
 | 
						|
      $title = $2;
 | 
						|
    } elsif ( $4 && $3 ) {
 | 
						|
      $depth = 0;
 | 
						|
      $indent = "";
 | 
						|
      $title = $3;
 | 
						|
    } elsif ( $6 && $5 ) {
 | 
						|
      $depth = 1;
 | 
						|
      $title = $5;
 | 
						|
    }
 | 
						|
 | 
						|
    next unless $title;
 | 
						|
 | 
						|
    $indent = "  " x ($depth - ($min_depth - 1) < 0 ? 0 : $depth - ($min_depth - 1));
 | 
						|
 | 
						|
    my $anchor_url;
 | 
						|
 | 
						|
    # handle link titles
 | 
						|
    if ($title =~ m/\[([^]]+)\]\(([^)]+)\)/) {
 | 
						|
      $title = $1;
 | 
						|
      $anchor_url = $2;
 | 
						|
    }
 | 
						|
 | 
						|
    my $anchor;
 | 
						|
 | 
						|
    if (defined $anchor_url) {
 | 
						|
      $anchor = $anchor_url;
 | 
						|
    } else {
 | 
						|
      $anchor =  lc $title;
 | 
						|
      $anchor =~ s/\s/-/g;
 | 
						|
      $anchor =~ s/[^\w-]//g;
 | 
						|
      $anchor = '#' . $anchor;
 | 
						|
 | 
						|
      $count{$anchor}++;
 | 
						|
 | 
						|
      $anchor .= ( 1 - $count{$anchor} or "" );
 | 
						|
    }
 | 
						|
 | 
						|
    if ($depth >= $min_depth - 1 and $depth <= $max_depth - 1) {
 | 
						|
      if ($use_filename) {
 | 
						|
        push (@toc, "$indent* [$title]($filename$anchor)");
 | 
						|
      } else {
 | 
						|
        push (@toc, "$indent* [$title]($anchor)");
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  push @toc, $md_toc_end;
 | 
						|
 | 
						|
  my $toc = join "\n", @toc;
 | 
						|
 | 
						|
  unless ( $update ) {
 | 
						|
    print "$toc\n";
 | 
						|
    next;
 | 
						|
  }
 | 
						|
 | 
						|
  $orig_text =~ s{
 | 
						|
    (?: (\A) | [\r\n]+ )
 | 
						|
    (?:
 | 
						|
    <!-- [ \t]+ md-toc [ \t]+ --> [ \t\r]*
 | 
						|
    |
 | 
						|
    <!-- [ \t]+ md-toc-begin [ \t]+ --> [ \t\r]* \n
 | 
						|
    (?: [\s\S]*? \n )*?
 | 
						|
    <!-- [ \t]+ md-toc-end   [ \t]+ --> [ \t\r]*
 | 
						|
    )
 | 
						|
    (?: (\Z) | [\r\n]+ )
 | 
						|
  }{
 | 
						|
    ( $1 // "\n\n" ) . $toc . "\n\n";
 | 
						|
  }emgx;
 | 
						|
 | 
						|
  warn "Updating $_\n";
 | 
						|
 | 
						|
  open F, ">$_" or die "Unable to open for writing: $_: $!\n";
 | 
						|
  print F $orig_text;
 | 
						|
  close F;
 | 
						|
}
 | 
						|
 | 
						|
# =========================================================================
 | 
						|
 | 
						|
# EOF
 |