-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patharticle.go
More file actions
executable file
·105 lines (90 loc) · 2.17 KB
/
article.go
File metadata and controls
executable file
·105 lines (90 loc) · 2.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
package main
import (
"fmt"
"io"
"os"
"regexp"
"strings"
)
type article struct {
out io.WriteCloser
paragraphs []string
pageHeader string
headline string
}
func (a *article) fillDefaults() {
a.pageHeader = "h2"
a.headline = "h3"
}
func (a *article) handle(tag string, value string) {
if tag == "h1" {
a.pageHeader = "h1"
a.headline = "h2"
}
value = strings.TrimSpace(value)
switch tag {
case a.pageHeader:
a.close()
fmt.Printf(" %s: %s\n", tag, value)
a.renew(value)
case a.headline:
a.flush()
a.accumulate(value)
default: // <p>
a.accumulate(value)
}
}
func (a *article) renew(value string) {
fileName := decideFileName(value)
fmt.Printf(" -> %s\n", fileName)
a.out = createWriter(fileName)
fmt.Fprintf(a.out, "%s\n\n", value)
}
var regexDateTitle = regexp.MustCompile(`(\d+)\.(\d+)\.(\d+)`)
func decideFileName(value string) string {
tokens := regexDateTitle.FindStringSubmatch(value)
if len(tokens) != 4 {
panic(fmt.Sprintf("Illegal date format in page header: %v", value))
}
fileName := fmt.Sprintf("%04s%02s%02s.txt", tokens[3], tokens[2], tokens[1])
return fileName
}
func createWriter(fileName string) io.WriteCloser {
out, err := os.Create(fileName)
if err != nil {
panic(err)
}
return out
}
func (a *article) accumulate(value string) {
if a.out != nil && len(value) > 0 {
a.paragraphs = append(a.paragraphs, value)
}
}
var regexExtraSpace = regexp.MustCompile(` +`)
var regexMissingSpace = regexp.MustCompile(`(\.)([A-ZÄÖÅ])`)
var regexFullStop = regexp.MustCompile(`(\.) ([A-ZÄÖÅ])`)
func (a *article) flush() {
if a.out == nil || len(a.paragraphs) == 0 {
return
}
fmt.Fprintln(a.out, "----")
for _, paragraph := range a.paragraphs {
fmt.Fprintf(a.out, "%s\n\n", paragraph)
}
fmt.Fprintln(a.out, "----")
for _, paragraph := range a.paragraphs {
paragraph = regexExtraSpace.ReplaceAllString(paragraph, " ")
paragraph = regexMissingSpace.ReplaceAllString(paragraph, "$1 $2")
paragraph = regexFullStop.ReplaceAllString(paragraph, "$1\n\n\n$2")
fmt.Fprintf(a.out, "%s\n\n\n", paragraph)
}
a.paragraphs = nil
}
func (a *article) close() {
a.flush()
if a.out != nil {
a.out.Close()
a.out = nil
}
}