From 846bf5aae0a1e338a5054b290c367c49ec8122cf Mon Sep 17 00:00:00 2001 From: Johannes Rothe Date: Wed, 14 Sep 2022 21:59:21 +0200 Subject: [PATCH] Fix some of the custom Confluence HTML --- go.mod | 5 +++- go.sum | 2 ++ main.go | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 82 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index 4400047..ed65f94 100644 --- a/go.mod +++ b/go.mod @@ -4,4 +4,7 @@ go 1.18 require github.com/alexflint/go-arg v1.4.3 -require github.com/alexflint/go-scalar v1.1.0 // indirect +require ( + github.com/alexflint/go-scalar v1.1.0 // indirect + golang.org/x/net v0.0.0-20220909164309-bea034e7d591 // indirect +) diff --git a/go.sum b/go.sum index 9ef1991..d2783b1 100644 --- a/go.sum +++ b/go.sum @@ -11,6 +11,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +golang.org/x/net v0.0.0-20220909164309-bea034e7d591 h1:D0B/7al0LLrVC8aWF4+oxpv/m8bc7ViFfVS8/gXGdqI= +golang.org/x/net v0.0.0-20220909164309-bea034e7d591/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go index b697528..d202e32 100644 --- a/main.go +++ b/main.go @@ -1,6 +1,7 @@ package main import ( + "bytes" "encoding/base64" "encoding/json" "fmt" @@ -8,9 +9,11 @@ import ( "log" "net/http" "os" + "strings" "text/template" "github.com/alexflint/go-arg" + "golang.org/x/net/html" ) type NodeData struct { @@ -50,10 +53,13 @@ func (n *Node) writeIndex(w io.Writer) { - + + +
{{ template "tree" . }} +
`) @@ -104,12 +110,15 @@ func writeHTML(n *Node) { - + + - ` +
` + html += "

" + n.Data.Title + "

" html += n.Data.Body - html += "" + html += "
" + html = fixHTML(html) _, err = f.Write([]byte(html)) if err != nil { log.Fatalf("Error writing file for ID %v", n.Data.Id) @@ -411,6 +420,69 @@ func createPageTree(s *spaceResult) *Node { return &root } +func fixHTML(body string) string { + doc, err := html.Parse(strings.NewReader(body)) + if err != nil { + log.Fatal(err) + } + var f func(*html.Node) + f = func(n *html.Node) { + // fix attachment links + if n.Type == html.ElementNode && n.Data == "a" { + // get the correct name + var filename string + for _, a := range n.Attr { + if a.Key == "data-linked-resource-default-alias" { + filename = a.Val + break + } + } + // link to the correct file + for _, a := range n.Attr { + if a.Key == "data-linked-resource-type" { + if a.Val == "attachment" { + n.Attr = []html.Attribute{{Key: "href", Val: filename}} + // remove broken preview images from links, add filename instead + n.RemoveChild(n.FirstChild) + node := html.Node{Data: filename, Type: html.TextNode} + n.AppendChild(&node) + } + } + } + } + // fix images + if n.Type == html.ElementNode && n.Data == "img" { + // get the correct name + var filename string + for _, a := range n.Attr { + if a.Key == "data-linked-resource-default-alias" { + filename = a.Val + break + } + } + // link to the correct file + for _, a := range n.Attr { + if a.Key == "data-linked-resource-type" { + if a.Val == "attachment" { + n.Attr = []html.Attribute{{Key: "src", Val: filename}} + } + } + } + } + // fix tables + if n.Type == html.ElementNode && n.Data == "table" { + n.Attr = []html.Attribute{{Key: "class", Val: "table table-bordered align-top"}} + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + f(c) + } + } + f(doc) + var b bytes.Buffer + html.Render(&b, doc) + return b.String() +} + func main() { var args Arguments arg.MustParse(&args)