From cfc0376b93bbd0fc30fda615d48436722c76fed9 Mon Sep 17 00:00:00 2001 From: jackchuka Date: Fri, 6 Feb 2026 11:35:17 +0900 Subject: [PATCH] feat(rule): link check to include frontmatter links --- internal/integration/link_test.go | 7 + internal/parser/extractor.go | 24 +++ internal/parser/parser.go | 1 + internal/parser/types.go | 1 + internal/rules/link.go | 5 +- internal/rules/link_test.go | 154 ++++++++++++++++++ .../invalid_frontmatter_blocked_domain.md | 9 + 7 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 testdata/links/invalid_frontmatter_blocked_domain.md diff --git a/internal/integration/link_test.go b/internal/integration/link_test.go index 2761f01..86ec2f4 100644 --- a/internal/integration/link_test.go +++ b/internal/integration/link_test.go @@ -47,6 +47,13 @@ func TestLinkValidation(t *testing.T) { ShouldPass: false, ExpectedRule: "link", }, + { + Name: "blocked domain in frontmatter URL", + FilePath: testdataDir + "links/invalid_frontmatter_blocked_domain.md", + SchemaPath: testdataDir + "links/.mdschema.yml", + ShouldPass: false, + ExpectedRule: "link", + }, } runTestCases(t, testCases) diff --git a/internal/parser/extractor.go b/internal/parser/extractor.go index 588d8b2..146f6a7 100644 --- a/internal/parser/extractor.go +++ b/internal/parser/extractor.go @@ -48,6 +48,30 @@ func extractCodeBlock(node *ast.FencedCodeBlock, content []byte) *CodeBlock { } } +// extractFrontmatterLinks extracts link-like values from frontmatter data. +func extractFrontmatterLinks(data map[string]any) []*Link { + if data == nil { + return nil + } + + var links []*Link + for _, value := range data { + str, ok := value.(string) + if !ok { + continue + } + if strings.HasPrefix(str, "#") || strings.HasPrefix(str, "http://") || strings.HasPrefix(str, "https://") { + links = append(links, &Link{ + URL: str, + IsInternal: isInternalLink(str), + Line: 1, + Column: 1, + }) + } + } + return links +} + func extractLink(node *ast.Link, content []byte) *Link { // Use ast.Walk to recursively extract all text (handles emphasis, code, etc.) var textBuf bytes.Buffer diff --git a/internal/parser/parser.go b/internal/parser/parser.go index f71a894..d82ed6a 100644 --- a/internal/parser/parser.go +++ b/internal/parser/parser.go @@ -56,6 +56,7 @@ func (p *Parser) Parse(path string, content []byte) (*Document, error) { frontMatter = &FrontMatter{ Format: "yaml", Data: metaData, + Links: extractFrontmatterLinks(metaData), } } diff --git a/internal/parser/types.go b/internal/parser/types.go index 06e284a..6512f3f 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -96,6 +96,7 @@ type FrontMatter struct { Format string // "yaml" or "toml" Content string Data map[string]any + Links []*Link } // LineLocatable is implemented by elements that have a line position diff --git a/internal/rules/link.go b/internal/rules/link.go index 0d2f23b..fa12417 100644 --- a/internal/rules/link.go +++ b/internal/rules/link.go @@ -42,8 +42,11 @@ func (r *LinkValidationRule) ValidateWithContext(ctx *vast.Context) []Violation linkRule := ctx.Schema.Links - // Collect all links from the document (not just schema-matched sections) + // Collect all links from the document links := r.collectAllLinks(ctx.Tree.Document.Root) + if fm := ctx.Tree.Document.FrontMatter; fm != nil { + links = append(links, fm.Links...) + } // Get document directory for relative path resolution docDir := filepath.Dir(ctx.Tree.Document.Path) diff --git a/internal/rules/link_test.go b/internal/rules/link_test.go index 2f61b87..1418b96 100644 --- a/internal/rules/link_test.go +++ b/internal/rules/link_test.go @@ -454,6 +454,160 @@ func TestLinkValidationRootRelativePath(t *testing.T) { } } +func TestLinkValidationFrontmatterBlockedDomain(t *testing.T) { + p := parser.New() + doc, err := p.Parse("test.md", []byte("---\nrepo: https://blocked.com/repo\n---\n\n# Title\n")) + if err != nil { + t.Fatalf("Parse() error: %v", err) + } + + s := &schema.Schema{ + Links: &schema.LinkRule{ + BlockedDomains: []string{"blocked.com"}, + }, + } + + ctx := vast.NewContext(doc, s, "") + rule := NewLinkValidationRule() + violations := rule.ValidateWithContext(ctx) + + if len(violations) == 0 { + t.Fatal("Expected violation for frontmatter URL to blocked domain") + } + + found := false + for _, v := range violations { + if strings.Contains(v.Message, "blocked domain") { + found = true + break + } + } + if !found { + t.Errorf("Expected violation mentioning blocked domain, got: %v", violations) + } +} + +func TestLinkValidationFrontmatterAllowedDomains(t *testing.T) { + p := parser.New() + doc, err := p.Parse("test.md", []byte("---\nrepo: https://notallowed.com/repo\n---\n\n# Title\n")) + if err != nil { + t.Fatalf("Parse() error: %v", err) + } + + s := &schema.Schema{ + Links: &schema.LinkRule{ + AllowedDomains: []string{"github.com"}, + }, + } + + ctx := vast.NewContext(doc, s, "") + rule := NewLinkValidationRule() + violations := rule.ValidateWithContext(ctx) + + if len(violations) == 0 { + t.Fatal("Expected violation for frontmatter URL not in allowed domains") + } +} + +func TestLinkValidationFrontmatterAllowedDomainPass(t *testing.T) { + p := parser.New() + doc, err := p.Parse("test.md", []byte("---\nrepo: https://github.com/user/repo\n---\n\n# Title\n")) + if err != nil { + t.Fatalf("Parse() error: %v", err) + } + + s := &schema.Schema{ + Links: &schema.LinkRule{ + AllowedDomains: []string{"github.com"}, + }, + } + + ctx := vast.NewContext(doc, s, "") + rule := NewLinkValidationRule() + violations := rule.ValidateWithContext(ctx) + + if len(violations) != 0 { + t.Errorf("Expected no violations for allowed domain, got %d: %v", len(violations), violations) + } +} + +func TestLinkValidationFrontmatterNonStringSkipped(t *testing.T) { + p := parser.New() + doc, err := p.Parse("test.md", []byte("---\ncount: 123\n---\n\n# Title\n")) + if err != nil { + t.Fatalf("Parse() error: %v", err) + } + + s := &schema.Schema{ + Links: &schema.LinkRule{ + BlockedDomains: []string{"blocked.com"}, + }, + } + + ctx := vast.NewContext(doc, s, "") + rule := NewLinkValidationRule() + violations := rule.ValidateWithContext(ctx) + + if len(violations) != 0 { + t.Errorf("Expected no violations for non-URL frontmatter value, got %d", len(violations)) + } +} + +func TestLinkValidationFrontmatterInternalAnchor(t *testing.T) { + p := parser.New() + doc, err := p.Parse("test.md", []byte("---\nref: \"#nonexistent\"\n---\n\n# Title\n")) + if err != nil { + t.Fatalf("Parse() error: %v", err) + } + + s := &schema.Schema{ + Links: &schema.LinkRule{ + ValidateInternal: true, + }, + } + + ctx := vast.NewContext(doc, s, "") + rule := NewLinkValidationRule() + violations := rule.ValidateWithContext(ctx) + + if len(violations) == 0 { + t.Fatal("Expected violation for broken anchor in frontmatter") + } + + found := false + for _, v := range violations { + if strings.Contains(v.Message, "nonexistent") && strings.Contains(v.Message, "does not exist") { + found = true + break + } + } + if !found { + t.Errorf("Expected violation mentioning broken anchor, got: %v", violations) + } +} + +func TestLinkValidationFrontmatterValidAnchor(t *testing.T) { + p := parser.New() + doc, err := p.Parse("test.md", []byte("---\nref: \"#details\"\n---\n\n# Title\n\n## Details\n\nSome content.\n")) + if err != nil { + t.Fatalf("Parse() error: %v", err) + } + + s := &schema.Schema{ + Links: &schema.LinkRule{ + ValidateInternal: true, + }, + } + + ctx := vast.NewContext(doc, s, "") + rule := NewLinkValidationRule() + violations := rule.ValidateWithContext(ctx) + + if len(violations) != 0 { + t.Errorf("Expected no violations for valid anchor in frontmatter, got %d: %v", len(violations), violations) + } +} + func TestLinkValidationRootRelativePathBroken(t *testing.T) { tmpDir := t.TempDir() diff --git a/testdata/links/invalid_frontmatter_blocked_domain.md b/testdata/links/invalid_frontmatter_blocked_domain.md new file mode 100644 index 0000000..a73f2f6 --- /dev/null +++ b/testdata/links/invalid_frontmatter_blocked_domain.md @@ -0,0 +1,9 @@ +--- +repo: https://blocked-domain.com/user/repo +--- + +# Test Document + +## Introduction + +This tests frontmatter URL validation against blocked domains.