
135 lines
3.5 KiB
Raw Normal View History

2020-07-31 11:08:27 +01:00
package main
import (
flag ""
2020-07-31 11:08:27 +01:00
2020-08-31 23:27:50 +01:00
var version = "0.2.6"
2020-07-31 11:08:27 +01:00
var (
output = flag.StringP("output", "o", "", "Output path. Otherwise uses stdout")
input = flag.StringP("input", "i", "", "Input path. Otherwise uses stdin")
citationStart = flag.IntP("citationStart", "c", 1, "Start citations from this index")
citationMarkers = flag.BoolP("citationMarkers", "m", false, "Use footnote style citation markers")
numberedLinks = flag.BoolP("numberedLinks", "n", false, "Number the links")
prettyTables = flag.BoolP("prettyTables", "t", false, "Pretty tables - works with most simple tables")
emitImagesAsLinks = flag.BoolP("emitImagesAsLinks", "e", false, "Emit links to included images")
linkEmitFrequency = flag.IntP("linkEmitFrequency", "l", 2, "Emit gathered links through the document after this number of paragraphs")
verFlag = flag.BoolP("version", "v", false, "Find out what version of html2gmi you're running")
2020-07-31 11:08:27 +01:00
func check(e error) {
if e != nil {
2020-07-31 11:08:27 +01:00
func saveFile(contents []byte, path string) {
d1 := contents
err := ioutil.WriteFile(path, d1, 0644)
2020-07-31 11:08:27 +01:00
func readStdin() string {
// based on
reader := bufio.NewReader(os.Stdin) //default size is 4096 apparently
var output []rune
2020-07-31 11:11:24 +01:00
for {
input, _, err := reader.ReadRune()
if err != nil && err == io.EOF {
output = append(output, input)
2020-07-31 11:08:27 +01:00
return string(output)
2020-07-31 11:08:27 +01:00
func getInput() (string, error) {
var inputHtml string
info, err := os.Stdin.Stat()
if *input != "" {
//get the input file from the command line
dat, err := ioutil.ReadFile(*input)
inputHtml = string(dat)
} else if info.Mode()&os.ModeNamedPipe != 0 {
// we have a pipe input
inputHtml = readStdin()
} else {
//we shouldn't get here
return "", errors.New("invalid option for input - use -i <path> or pipe to stdin")
return inputHtml, nil
2020-07-31 11:08:27 +01:00
func main() {
var inputHtml string
if *verFlag {
fmt.Println("html2gmi " + version)
//get the input from commandline or stdin
inputHtml, err := getInput()
//convert html to gmi
options := html2gemini.NewOptions()
options.PrettyTables = *prettyTables
options.CitationStart = *citationStart
options.LinkEmitFrequency = *linkEmitFrequency
options.CitationMarkers = *citationMarkers
options.NumberedLinks = *numberedLinks
options.EmitImagesAsLinks = *emitImagesAsLinks
//dont use an extra line to separate header from body, but
//do separate each row visually
options.PrettyTablesOptions.HeaderLine = false
options.PrettyTablesOptions.RowLine = true
//use slightly nicer Unicode borders, otherwise can use +,|,-
//options.PrettyTablesOptions.CenterSeparator = "┼"
//options.PrettyTablesOptions.ColumnSeparator = "│"
//options.PrettyTablesOptions.RowSeparator = "─"
//pretty tables option is somewhat experimental
//and the column positions not always correct
//so use invisible borders of spaces for now
options.PrettyTablesOptions.CenterSeparator = " "
options.PrettyTablesOptions.ColumnSeparator = " "
options.PrettyTablesOptions.RowSeparator = " "
2020-08-30 21:39:29 +01:00
ctx := html2gemini.NewTraverseContext(*options)
2020-08-30 21:39:29 +01:00
text, err := html2gemini.FromString(inputHtml, *ctx)
//process the output
if *output == "" {
fmt.Print(text + "\n") //terminate with a new line
} else {
//save to the specified output
gmiBytes := []byte(text + "\n")
saveFile(gmiBytes, *output)
2020-07-31 11:08:27 +01:00