513 lines
15 KiB
Go
Raw Normal View History

2022-09-05 22:38:38 +02:00
package main
import (
2022-09-14 21:59:21 +02:00
"bytes"
2022-09-05 22:38:38 +02:00
"encoding/base64"
"encoding/json"
2022-09-06 22:13:42 +02:00
"fmt"
2022-09-05 22:38:38 +02:00
"io"
"log"
"net/http"
2022-09-06 22:13:42 +02:00
"os"
2022-09-14 21:59:21 +02:00
"strings"
2022-09-06 22:13:42 +02:00
"text/template"
2022-09-05 22:38:38 +02:00
"github.com/alexflint/go-arg"
2022-09-14 21:59:21 +02:00
"golang.org/x/net/html"
2022-09-05 22:38:38 +02:00
)
type NodeData struct {
2022-09-06 22:13:42 +02:00
Id string
Title string
2022-09-11 21:54:05 +02:00
Body string
2022-09-05 22:38:38 +02:00
}
type Node struct {
2022-09-06 22:13:42 +02:00
Children []*Node
Data NodeData
2022-09-05 22:38:38 +02:00
}
2022-09-11 21:54:05 +02:00
type Arguments struct {
2022-09-14 20:24:55 +02:00
Baseurl string `arg:"required" help:"Base URL of the Confluence instance (required)"`
Spacekey string `arg:"required" help:"Spacekey to export (required)"`
User string `arg:"required" help:"User used for authentication (required)"`
Token string `arg:"required" help:"Token used for authentication (required)"`
WithAttachments bool `arg:"-a,--attachments" help:"Download attachments"`
2022-09-11 21:54:05 +02:00
}
func (n *Node) writeIndex(w io.Writer) {
2022-09-06 22:13:42 +02:00
tmpl, err := template.New("root").Parse(`
{{ define "tree" }}
{{ if gt (len .Children) 0 }}
<ul>
{{ range $index, $element := .Children}}
<li>
2022-09-14 20:24:55 +02:00
<a href="content/{{$element.Data.Id}}.html">{{ $element.Data.Title }}</a>
2022-09-06 22:13:42 +02:00
{{ template "tree" $element }}
</li>
{{ end }}
</ul>
{{ end }}
{{ end }}
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
2022-09-14 21:59:21 +02:00
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-iYQeCzEYFbKjA/T2uDLTpkwGzCiq6soy8tYaI1GyVh/UjpbCx/TYkiZhlZB6+fzT" crossorigin="anonymous">
</head>
<body>
2022-09-14 21:59:21 +02:00
<div class="container">
{{ template "tree" . }}
2022-09-14 21:59:21 +02:00
</div>
</body>
2022-09-06 22:13:42 +02:00
</html>
`)
if err != nil {
fmt.Println("ERROR creating template")
2022-09-05 22:38:38 +02:00
}
2022-09-06 22:13:42 +02:00
err = tmpl.Execute(w, n)
if err != nil {
fmt.Printf("ERROR executing template: %v", err)
2022-09-05 22:38:38 +02:00
}
}
2022-09-11 21:54:05 +02:00
func (n *Node) getAllChildren() []*Node {
var children []*Node
for _, child := range n.Children {
children = append(children, child)
if len(child.Children) > 0 {
children = append(children, child.getAllChildren()...)
}
}
2022-09-11 21:54:05 +02:00
return children
}
2022-09-05 22:38:38 +02:00
func (n *Node) getChildWithID(id string) (node *Node, hasChild bool) {
2022-09-06 22:13:42 +02:00
for _, c := range n.Children {
if c.Data.Id == id {
2022-09-05 22:38:38 +02:00
return c, true
}
}
empty := Node{}
return &empty, false
}
func (n *Node) addChild(child *Node) {
2022-09-06 22:13:42 +02:00
n.Children = append(n.Children, child)
2022-09-05 22:38:38 +02:00
}
func writeHTML(n *Node) {
2022-09-14 20:24:55 +02:00
os.Chdir("content")
f, err := os.Create(n.Data.Id + ".html")
if err != nil {
log.Fatalf("Error creating file for ID %v", n.Data.Id)
}
defer f.Close()
log.Printf("Writing file %v containing page: %v", f.Name(), n.Data.Title)
html := `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
2022-09-14 21:59:21 +02:00
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-iYQeCzEYFbKjA/T2uDLTpkwGzCiq6soy8tYaI1GyVh/UjpbCx/TYkiZhlZB6+fzT" crossorigin="anonymous">
</head>
<body>
2022-09-14 21:59:21 +02:00
<div class="container">`
html += "<h1>" + n.Data.Title + "</h1>"
html += n.Data.Body
2022-09-14 21:59:21 +02:00
html += "</div></body></html>"
html = fixHTML(html)
_, err = f.Write([]byte(html))
if err != nil {
log.Fatalf("Error writing file for ID %v", n.Data.Id)
}
}
2022-09-05 22:38:38 +02:00
type spaceResult struct {
Results []struct {
ID string `json:"id"`
Type string `json:"type"`
Status string `json:"status"`
Title string `json:"title"`
Ancestors []struct {
ID string `json:"id"`
Type string `json:"type"`
Status string `json:"status"`
Title string `json:"title"`
MacroRenderedOutput struct {
} `json:"macroRenderedOutput"`
Extensions struct {
Position int `json:"position"`
} `json:"extensions"`
Expandable struct {
Container string `json:"container"`
Metadata string `json:"metadata"`
Restrictions string `json:"restrictions"`
History string `json:"history"`
Body string `json:"body"`
Version string `json:"version"`
Descendants string `json:"descendants"`
Space string `json:"space"`
ChildTypes string `json:"childTypes"`
Operations string `json:"operations"`
SchedulePublishDate string `json:"schedulePublishDate"`
Children string `json:"children"`
Ancestors string `json:"ancestors"`
} `json:"_expandable"`
Links struct {
Self string `json:"self"`
Tinyui string `json:"tinyui"`
Editui string `json:"editui"`
Webui string `json:"webui"`
} `json:"_links"`
} `json:"ancestors"`
MacroRenderedOutput struct {
} `json:"macroRenderedOutput"`
Extensions struct {
Position int `json:"position"`
} `json:"extensions"`
Expandable struct {
ChildTypes string `json:"childTypes"`
Container string `json:"container"`
Metadata string `json:"metadata"`
Operations string `json:"operations"`
SchedulePublishDate string `json:"schedulePublishDate"`
Children string `json:"children"`
Restrictions string `json:"restrictions"`
History string `json:"history"`
Body string `json:"body"`
Version string `json:"version"`
Descendants string `json:"descendants"`
Space string `json:"space"`
} `json:"_expandable"`
Links struct {
Self string `json:"self"`
Tinyui string `json:"tinyui"`
Editui string `json:"editui"`
Webui string `json:"webui"`
} `json:"_links"`
} `json:"results"`
Start int `json:"start"`
Limit int `json:"limit"`
Size int `json:"size"`
Links struct {
Base string `json:"base"`
Context string `json:"context"`
Next string `json:"next"`
Self string `json:"self"`
} `json:"_links"`
}
2022-09-11 21:54:05 +02:00
type contentResult struct {
ID string `json:"id"`
Type string `json:"type"`
Status string `json:"status"`
Title string `json:"title"`
MacroRenderedOutput struct {
} `json:"macroRenderedOutput"`
Body struct {
View struct {
Value string `json:"value"`
Representation string `json:"representation"`
Expandable struct {
Webresource string `json:"webresource"`
EmbeddedContent string `json:"embeddedContent"`
MediaToken string `json:"mediaToken"`
Content string `json:"content"`
} `json:"_expandable"`
} `json:"view"`
Expandable struct {
Editor string `json:"editor"`
AtlasDocFormat string `json:"atlas_doc_format"`
ExportView string `json:"export_view"`
StyledView string `json:"styled_view"`
Dynamic string `json:"dynamic"`
Storage string `json:"storage"`
Editor2 string `json:"editor2"`
AnonymousExportView string `json:"anonymous_export_view"`
} `json:"_expandable"`
} `json:"body"`
Extensions struct {
Position int `json:"position"`
} `json:"extensions"`
Expandable struct {
ChildTypes string `json:"childTypes"`
Container string `json:"container"`
Metadata string `json:"metadata"`
Operations string `json:"operations"`
SchedulePublishDate string `json:"schedulePublishDate"`
Children string `json:"children"`
Restrictions string `json:"restrictions"`
History string `json:"history"`
Ancestors string `json:"ancestors"`
Version string `json:"version"`
Descendants string `json:"descendants"`
Space string `json:"space"`
} `json:"_expandable"`
Links struct {
Editui string `json:"editui"`
Webui string `json:"webui"`
Context string `json:"context"`
Self string `json:"self"`
Tinyui string `json:"tinyui"`
Collection string `json:"collection"`
Base string `json:"base"`
} `json:"_links"`
}
2022-09-14 20:24:55 +02:00
type attachmentResult struct {
Results []struct {
ID string `json:"id"`
Type string `json:"type"`
Status string `json:"status"`
Title string `json:"title"`
MacroRenderedOutput struct {
} `json:"macroRenderedOutput"`
Metadata struct {
MediaType string `json:"mediaType"`
} `json:"metadata"`
Extensions struct {
MediaType string `json:"mediaType"`
FileSize int `json:"fileSize"`
Comment string `json:"comment"`
MediaTypeDescription string `json:"mediaTypeDescription"`
FileID string `json:"fileId"`
CollectionName string `json:"collectionName"`
} `json:"extensions"`
Expandable struct {
ChildTypes string `json:"childTypes"`
Container string `json:"container"`
Operations string `json:"operations"`
SchedulePublishDate string `json:"schedulePublishDate"`
Children string `json:"children"`
Restrictions string `json:"restrictions"`
History string `json:"history"`
Ancestors string `json:"ancestors"`
Body string `json:"body"`
Version string `json:"version"`
Descendants string `json:"descendants"`
Space string `json:"space"`
} `json:"_expandable"`
Links struct {
Webui string `json:"webui"`
Self string `json:"self"`
Download string `json:"download"`
} `json:"_links"`
} `json:"results"`
Start int `json:"start"`
Limit int `json:"limit"`
Size int `json:"size"`
Links struct {
Base string `json:"base"`
Context string `json:"context"`
Self string `json:"self"`
} `json:"_links"`
}
2022-09-11 21:54:05 +02:00
func getRequest(args Arguments, url string) ([]byte, error) {
b64Auth := base64.StdEncoding.EncodeToString([]byte(args.User + ":" + args.Token))
2022-09-05 22:38:38 +02:00
client := &http.Client{}
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
2022-09-11 21:54:05 +02:00
return nil, err
2022-09-05 22:38:38 +02:00
}
req.Header.Add("Authorization", "Basic "+b64Auth)
resp, err := client.Do(req)
if err != nil {
2022-09-11 21:54:05 +02:00
return nil, err
2022-09-05 22:38:38 +02:00
}
if resp.StatusCode != http.StatusOK {
2022-09-11 21:54:05 +02:00
return nil, err
}
if err != nil {
return nil, err
2022-09-05 22:38:38 +02:00
}
body, err := io.ReadAll(resp.Body)
2022-09-11 21:54:05 +02:00
if err != nil {
return nil, err
}
return body, nil
}
func getSpaceChildren(args Arguments) spaceResult {
url := args.Baseurl + "/wiki/rest/api/space/" + args.Spacekey + "/content/page?expand=ancestors&limit=9999"
body, err := getRequest(args, url)
2022-09-05 22:38:38 +02:00
if err != nil {
log.Fatal(err)
}
var result spaceResult
json.Unmarshal(body, &result)
return result
}
2022-09-11 21:54:05 +02:00
func getContentFromID(args Arguments, id string) contentResult {
url := args.Baseurl + "/wiki/rest/api/content/" + id + "?expand=body.view"
body, err := getRequest(args, url)
if err != nil {
log.Fatal(err)
}
var result contentResult
json.Unmarshal(body, &result)
return result
}
2022-09-14 20:24:55 +02:00
func downloadAttachmentsForPage(args Arguments, page *Node) {
url := args.Baseurl + "/wiki/rest/api/content/" + page.Data.Id + "/child/attachment"
body, err := getRequest(args, url)
if err != nil {
log.Fatal(err)
}
var result attachmentResult
json.Unmarshal(body, &result)
for _, att := range result.Results {
filename := att.Title
url := args.Baseurl + "/wiki/" + att.Links.Download
out, err := os.Create(filename)
log.Printf("Writing attachment: %v", filename)
if err != nil {
log.Fatalf("Failed to create file: %v", err)
}
defer out.Close()
body, err := getRequest(args, url)
if err != nil {
log.Fatalf("Failed to download file: %v", err)
}
_, err = out.Write(body)
if err != nil {
log.Fatalf("Failed to write file: %v", err)
}
}
// TODO polish html, replace img attachment links, table styling
// in html müssen die href zu den files geändert werden
// href order img class
// data-linked-resource-type="attachment"
// data-linked-resource-default-alias="image-20210609-123740.png"
// example with image: ROBODEVOPS/pages/3036938774.html
// example with href: ROBODEVOPS/pages/2165571587.html
}
2022-09-11 21:54:05 +02:00
func createPageTree(s *spaceResult) *Node {
2022-09-05 22:38:38 +02:00
var parent *Node
2022-09-06 22:13:42 +02:00
root := Node{Data: NodeData{Title: "root"}}
2022-09-05 22:38:38 +02:00
for _, page := range s.Results {
parent = &root
// ignore the root node
if len(page.Ancestors) == 0 {
continue
}
// create branches from ancestors
for i, ancestor := range page.Ancestors {
// skip root node
if i == 0 {
continue
}
node, hasChild := parent.getChildWithID(ancestor.ID)
if hasChild {
parent = node
continue
}
2022-09-06 22:13:42 +02:00
n := Node{Data: NodeData{Id: ancestor.ID, Title: ancestor.Title}}
2022-09-05 22:38:38 +02:00
parent.addChild(&n)
parent = &n
}
// create leaf node
2022-09-06 22:13:42 +02:00
n := Node{Data: NodeData{Id: page.ID, Title: page.Title}}
2022-09-05 22:38:38 +02:00
parent.addChild(&n)
}
2022-09-11 21:54:05 +02:00
return &root
}
2022-09-14 21:59:21 +02:00
func fixHTML(body string) string {
doc, err := html.Parse(strings.NewReader(body))
if err != nil {
log.Fatal(err)
}
var f func(*html.Node)
f = func(n *html.Node) {
// fix attachment links
if n.Type == html.ElementNode && n.Data == "a" {
// get the correct name
var filename string
for _, a := range n.Attr {
if a.Key == "data-linked-resource-default-alias" {
filename = a.Val
break
}
}
// link to the correct file
for _, a := range n.Attr {
if a.Key == "data-linked-resource-type" {
if a.Val == "attachment" {
n.Attr = []html.Attribute{{Key: "href", Val: filename}}
// remove broken preview images from links, add filename instead
n.RemoveChild(n.FirstChild)
node := html.Node{Data: filename, Type: html.TextNode}
n.AppendChild(&node)
}
}
}
}
// fix images
if n.Type == html.ElementNode && n.Data == "img" {
// get the correct name
var filename string
for _, a := range n.Attr {
if a.Key == "data-linked-resource-default-alias" {
filename = a.Val
break
}
}
// link to the correct file
for _, a := range n.Attr {
if a.Key == "data-linked-resource-type" {
if a.Val == "attachment" {
n.Attr = []html.Attribute{{Key: "src", Val: filename}}
}
}
}
}
// fix tables
if n.Type == html.ElementNode && n.Data == "table" {
n.Attr = []html.Attribute{{Key: "class", Val: "table table-bordered align-top"}}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(doc)
var b bytes.Buffer
html.Render(&b, doc)
return b.String()
}
2022-09-11 21:54:05 +02:00
func main() {
var args Arguments
arg.MustParse(&args)
result := getSpaceChildren(args)
root := createPageTree(&result)
fname := "index.html"
2022-09-14 20:24:55 +02:00
os.Mkdir(args.Spacekey, 0744)
os.Mkdir(args.Spacekey+"/content", 0744)
os.Chdir(args.Spacekey)
2022-09-06 22:13:42 +02:00
f, err := os.Create(fname)
if err != nil {
log.Fatalf("Failed to open %v", fname)
}
defer f.Close()
root.writeIndex(f)
2022-09-05 22:38:38 +02:00
2022-09-11 21:54:05 +02:00
pages := root.getAllChildren()
for _, page := range pages {
content := getContentFromID(args, page.Data.Id)
page.Data.Body = content.Body.View.Value
writeHTML(page)
2022-09-14 20:24:55 +02:00
if args.WithAttachments {
downloadAttachmentsForPage(args, page)
}
2022-09-05 22:38:38 +02:00
}
}