package main import ( "bytes" "encoding/base64" "encoding/json" "fmt" "io" "log" "net/http" "os" "strings" "text/template" "github.com/alexflint/go-arg" "golang.org/x/net/html" ) type NodeData struct { Id string Title string Body string } type Node struct { Children []*Node Data NodeData } type Arguments struct { Baseurl string `arg:"required" help:"Base URL of the Confluence instance (required)"` Spacekey string `arg:"required" help:"Spacekey to export (required)"` User string `arg:"required" help:"User used for authentication (required)"` Token string `arg:"required" help:"Token used for authentication (required)"` WithAttachments bool `arg:"-a,--attachments" help:"Download attachments"` } func (n *Node) writeIndex(w io.Writer) { tmpl, err := template.New("root").Parse(` {{ define "tree" }} {{ if gt (len .Children) 0 }} {{ end }} {{ end }}
{{ template "tree" . }}
`) if err != nil { fmt.Println("ERROR creating template") } err = tmpl.Execute(w, n) if err != nil { fmt.Printf("ERROR executing template: %v", err) } } func (n *Node) getAllChildren() []*Node { var children []*Node for _, child := range n.Children { children = append(children, child) if len(child.Children) > 0 { children = append(children, child.getAllChildren()...) } } return children } func (n *Node) getChildWithID(id string) (node *Node, hasChild bool) { for _, c := range n.Children { if c.Data.Id == id { return c, true } } empty := Node{} return &empty, false } func (n *Node) addChild(child *Node) { n.Children = append(n.Children, child) } func writeHTML(n *Node) { os.Chdir("content") f, err := os.Create(n.Data.Id + ".html") if err != nil { log.Fatalf("Error creating file for ID %v", n.Data.Id) } defer f.Close() log.Printf("Writing file %v containing page: %v", f.Name(), n.Data.Title) html := `
` html += "

" + n.Data.Title + "

" html += n.Data.Body html += "
" html = fixHTML(html) _, err = f.Write([]byte(html)) if err != nil { log.Fatalf("Error writing file for ID %v", n.Data.Id) } } type spaceResult struct { Results []struct { ID string `json:"id"` Type string `json:"type"` Status string `json:"status"` Title string `json:"title"` Ancestors []struct { ID string `json:"id"` Type string `json:"type"` Status string `json:"status"` Title string `json:"title"` MacroRenderedOutput struct { } `json:"macroRenderedOutput"` Extensions struct { Position int `json:"position"` } `json:"extensions"` Expandable struct { Container string `json:"container"` Metadata string `json:"metadata"` Restrictions string `json:"restrictions"` History string `json:"history"` Body string `json:"body"` Version string `json:"version"` Descendants string `json:"descendants"` Space string `json:"space"` ChildTypes string `json:"childTypes"` Operations string `json:"operations"` SchedulePublishDate string `json:"schedulePublishDate"` Children string `json:"children"` Ancestors string `json:"ancestors"` } `json:"_expandable"` Links struct { Self string `json:"self"` Tinyui string `json:"tinyui"` Editui string `json:"editui"` Webui string `json:"webui"` } `json:"_links"` } `json:"ancestors"` MacroRenderedOutput struct { } `json:"macroRenderedOutput"` Extensions struct { Position int `json:"position"` } `json:"extensions"` Expandable struct { ChildTypes string `json:"childTypes"` Container string `json:"container"` Metadata string `json:"metadata"` Operations string `json:"operations"` SchedulePublishDate string `json:"schedulePublishDate"` Children string `json:"children"` Restrictions string `json:"restrictions"` History string `json:"history"` Body string `json:"body"` Version string `json:"version"` Descendants string `json:"descendants"` Space string `json:"space"` } `json:"_expandable"` Links struct { Self string `json:"self"` Tinyui string `json:"tinyui"` Editui string `json:"editui"` Webui string `json:"webui"` } `json:"_links"` } `json:"results"` Start int `json:"start"` Limit int `json:"limit"` Size int `json:"size"` Links struct { Base string `json:"base"` Context string `json:"context"` Next string `json:"next"` Self string `json:"self"` } `json:"_links"` } type contentResult struct { ID string `json:"id"` Type string `json:"type"` Status string `json:"status"` Title string `json:"title"` MacroRenderedOutput struct { } `json:"macroRenderedOutput"` Body struct { View struct { Value string `json:"value"` Representation string `json:"representation"` Expandable struct { Webresource string `json:"webresource"` EmbeddedContent string `json:"embeddedContent"` MediaToken string `json:"mediaToken"` Content string `json:"content"` } `json:"_expandable"` } `json:"view"` Expandable struct { Editor string `json:"editor"` AtlasDocFormat string `json:"atlas_doc_format"` ExportView string `json:"export_view"` StyledView string `json:"styled_view"` Dynamic string `json:"dynamic"` Storage string `json:"storage"` Editor2 string `json:"editor2"` AnonymousExportView string `json:"anonymous_export_view"` } `json:"_expandable"` } `json:"body"` Extensions struct { Position int `json:"position"` } `json:"extensions"` Expandable struct { ChildTypes string `json:"childTypes"` Container string `json:"container"` Metadata string `json:"metadata"` Operations string `json:"operations"` SchedulePublishDate string `json:"schedulePublishDate"` Children string `json:"children"` Restrictions string `json:"restrictions"` History string `json:"history"` Ancestors string `json:"ancestors"` Version string `json:"version"` Descendants string `json:"descendants"` Space string `json:"space"` } `json:"_expandable"` Links struct { Editui string `json:"editui"` Webui string `json:"webui"` Context string `json:"context"` Self string `json:"self"` Tinyui string `json:"tinyui"` Collection string `json:"collection"` Base string `json:"base"` } `json:"_links"` } type attachmentResult struct { Results []struct { ID string `json:"id"` Type string `json:"type"` Status string `json:"status"` Title string `json:"title"` MacroRenderedOutput struct { } `json:"macroRenderedOutput"` Metadata struct { MediaType string `json:"mediaType"` } `json:"metadata"` Extensions struct { MediaType string `json:"mediaType"` FileSize int `json:"fileSize"` Comment string `json:"comment"` MediaTypeDescription string `json:"mediaTypeDescription"` FileID string `json:"fileId"` CollectionName string `json:"collectionName"` } `json:"extensions"` Expandable struct { ChildTypes string `json:"childTypes"` Container string `json:"container"` Operations string `json:"operations"` SchedulePublishDate string `json:"schedulePublishDate"` Children string `json:"children"` Restrictions string `json:"restrictions"` History string `json:"history"` Ancestors string `json:"ancestors"` Body string `json:"body"` Version string `json:"version"` Descendants string `json:"descendants"` Space string `json:"space"` } `json:"_expandable"` Links struct { Webui string `json:"webui"` Self string `json:"self"` Download string `json:"download"` } `json:"_links"` } `json:"results"` Start int `json:"start"` Limit int `json:"limit"` Size int `json:"size"` Links struct { Base string `json:"base"` Context string `json:"context"` Self string `json:"self"` } `json:"_links"` } func getRequest(args Arguments, url string) ([]byte, error) { b64Auth := base64.StdEncoding.EncodeToString([]byte(args.User + ":" + args.Token)) client := &http.Client{} req, err := http.NewRequest(http.MethodGet, url, nil) if err != nil { return nil, err } req.Header.Add("Authorization", "Basic "+b64Auth) resp, err := client.Do(req) if err != nil { return nil, err } if resp.StatusCode != http.StatusOK { return nil, err } if err != nil { return nil, err } body, err := io.ReadAll(resp.Body) if err != nil { return nil, err } return body, nil } func getSpaceChildren(args Arguments) spaceResult { url := args.Baseurl + "/wiki/rest/api/space/" + args.Spacekey + "/content/page?expand=ancestors&limit=9999" body, err := getRequest(args, url) if err != nil { log.Fatal(err) } var result spaceResult json.Unmarshal(body, &result) return result } func getContentFromID(args Arguments, id string) contentResult { url := args.Baseurl + "/wiki/rest/api/content/" + id + "?expand=body.view" body, err := getRequest(args, url) if err != nil { log.Fatal(err) } var result contentResult json.Unmarshal(body, &result) return result } func downloadAttachmentsForPage(args Arguments, page *Node) { url := args.Baseurl + "/wiki/rest/api/content/" + page.Data.Id + "/child/attachment" body, err := getRequest(args, url) if err != nil { log.Fatal(err) } var result attachmentResult json.Unmarshal(body, &result) for _, att := range result.Results { filename := att.Title url := args.Baseurl + "/wiki/" + att.Links.Download out, err := os.Create(filename) log.Printf("Writing attachment: %v", filename) if err != nil { log.Fatalf("Failed to create file: %v", err) } defer out.Close() body, err := getRequest(args, url) if err != nil { log.Fatalf("Failed to download file: %v", err) } _, err = out.Write(body) if err != nil { log.Fatalf("Failed to write file: %v", err) } } // TODO polish html, replace img attachment links, table styling // in html müssen die href zu den files geändert werden // href order img class // data-linked-resource-type="attachment" // data-linked-resource-default-alias="image-20210609-123740.png" // example with image: ROBODEVOPS/pages/3036938774.html // example with href: ROBODEVOPS/pages/2165571587.html } func createPageTree(s *spaceResult) *Node { var parent *Node root := Node{Data: NodeData{Title: "root"}} for _, page := range s.Results { parent = &root // ignore the root node if len(page.Ancestors) == 0 { continue } // create branches from ancestors for i, ancestor := range page.Ancestors { // skip root node if i == 0 { continue } node, hasChild := parent.getChildWithID(ancestor.ID) if hasChild { parent = node continue } n := Node{Data: NodeData{Id: ancestor.ID, Title: ancestor.Title}} parent.addChild(&n) parent = &n } // create leaf node n := Node{Data: NodeData{Id: page.ID, Title: page.Title}} parent.addChild(&n) } return &root } func fixHTML(body string) string { doc, err := html.Parse(strings.NewReader(body)) if err != nil { log.Fatal(err) } var f func(*html.Node) f = func(n *html.Node) { // fix attachment links if n.Type == html.ElementNode && n.Data == "a" { // get the correct name var filename string for _, a := range n.Attr { if a.Key == "data-linked-resource-default-alias" { filename = a.Val break } } // link to the correct file for _, a := range n.Attr { if a.Key == "data-linked-resource-type" { if a.Val == "attachment" { n.Attr = []html.Attribute{{Key: "href", Val: filename}} // remove broken preview images from links, add filename instead n.RemoveChild(n.FirstChild) node := html.Node{Data: filename, Type: html.TextNode} n.AppendChild(&node) } } } } // fix images if n.Type == html.ElementNode && n.Data == "img" { // get the correct name var filename string for _, a := range n.Attr { if a.Key == "data-linked-resource-default-alias" { filename = a.Val break } } // link to the correct file for _, a := range n.Attr { if a.Key == "data-linked-resource-type" { if a.Val == "attachment" { n.Attr = []html.Attribute{{Key: "src", Val: filename}} } } } } // fix tables if n.Type == html.ElementNode && n.Data == "table" { n.Attr = []html.Attribute{{Key: "class", Val: "table table-bordered align-top"}} } for c := n.FirstChild; c != nil; c = c.NextSibling { f(c) } } f(doc) var b bytes.Buffer html.Render(&b, doc) return b.String() } func main() { var args Arguments arg.MustParse(&args) result := getSpaceChildren(args) root := createPageTree(&result) fname := "index.html" os.Mkdir(args.Spacekey, 0744) os.Mkdir(args.Spacekey+"/content", 0744) os.Chdir(args.Spacekey) f, err := os.Create(fname) if err != nil { log.Fatalf("Failed to open %v", fname) } defer f.Close() root.writeIndex(f) pages := root.getAllChildren() for _, page := range pages { content := getContentFromID(args, page.Data.Id) page.Data.Body = content.Body.View.Value writeHTML(page) if args.WithAttachments { downloadAttachmentsForPage(args, page) } } }