diff --git a/README.md b/README.md index ca5e5ac..afc8442 100644 --- a/README.md +++ b/README.md @@ -333,6 +333,7 @@ GLOBAL OPTIONS: --sqlnullstr Adds a Null{{ENUM}} type for marshalling a nullable string value to sql. If sqlnullint is specified too, it will be Null{{ENUM}}Str (default: false) --template value, -t value [ --template value, -t value ] Additional template file(s) to generate enums. Use more than one flag for more files. Templates will be executed in alphabetical order. --alias value, -a value [ --alias value, -a value ] Adds or replaces aliases for a non alphanumeric value that needs to be accounted for. [Format should be "key:value,key2:value2", or specify multiple entries, or both!] + --initialism value [ --initialism value ] Initialism(s) to keep fully uppercased in generated const names (e.g., HTTP,URL,ID). Repeatable. --mustparse Adds a Must version of the Parse that will panic on failure. (default: false) --forcelower Forces a camel cased comment to generate lowercased names. (default: false) --forceupper Forces a camel cased comment to generate uppercased names. (default: false) @@ -344,6 +345,23 @@ GLOBAL OPTIONS: --version, -v print the version ``` +### Initialism notes + +- `--initialism` affects generated const identifiers only. It does not modify enum string values. +- `--forcelower` and `--forceupper` control enum string values; they are separate from `--initialism`. +- Initialism rewriting runs after `--alias` replacements and after snake_case to CamelCase conversion. +- With `--nocamel`, initialisms in underscore-separated segments may not be rewritten because CamelCase conversion is skipped. +- Rewriting currently runs on the full identifier (including prefix/type-derived segments), not only on the enum value segment. + +Example: + +```go +// ENUM(created) +type UserId int +``` + +With `--initialism ID`, the const becomes `UserIDCreated` (the `Id` in the type-derived prefix is rewritten too). + ### Syntax The parser looks for comments on your type defs and parse the enum declarations from it. diff --git a/generator/generator.go b/generator/generator.go index cac315a..30e4594 100644 --- a/generator/generator.go +++ b/generator/generator.go @@ -32,10 +32,11 @@ type Generator struct { BuildDate string BuiltBy string GeneratorConfig - t *template.Template - knownTemplates map[string]*template.Template - fileSet *token.FileSet - userTemplateNames []string + t *template.Template + knownTemplates map[string]*template.Template + fileSet *token.FileSet + userTemplateNames []string + initialismReplacements map[string]string } // Enum holds data for a discovered enum in the parsed source @@ -85,6 +86,15 @@ func NewGeneratorWithConfig(config GeneratorConfig) *Generator { GeneratorConfig: config, } + // Precompute initialism replacements from title-cased token to uppercased token. + if len(config.Initialisms) > 0 { + title := cases.Title(language.Und, cases.NoLower) + g.initialismReplacements = make(map[string]string, len(config.Initialisms)) + for _, initialism := range config.Initialisms { + g.initialismReplacements[title.String(strings.ToLower(initialism))] = initialism + } + } + funcs := sprig.TxtFuncMap() funcs["stringify"] = Stringify @@ -142,6 +152,102 @@ func ParseAliases(aliases []string) (map[string]string, error) { return aliasMap, nil } +// ParseInitialisms parses and validates initialism entries from CLI input. +// Each entry can be comma-separated. Initialisms must be all uppercase ASCII letters. +func ParseInitialisms(entries []string) ([]string, error) { + seen := make(map[string]struct{}) + var result []string + + for _, entry := range entries { + parts := strings.Split(entry, ",") + for _, part := range parts { + initialism := strings.TrimSpace(part) + if initialism == "" { + continue + } + for _, r := range initialism { + if r < 'A' || r > 'Z' { + return nil, fmt.Errorf("invalid initialism %q: must be all uppercase ASCII letters", initialism) + } + } + if _, ok := seen[initialism]; !ok { + seen[initialism] = struct{}{} + result = append(result, initialism) + } + } + } + + return result, nil +} + +// applyInitialisms rewrites identifier tokens that match configured initialisms. +// For example, with initialism "HTTP", token "Http" becomes "HTTP". +func (g *Generator) applyInitialisms(name string) string { + if len(g.initialismReplacements) == 0 { + return name + } + + tokens := splitIdentifierTokens(name) + if len(tokens) == 0 { + return name + } + + var builder strings.Builder + builder.Grow(len(name)) + for _, token := range tokens { + if replacement, ok := g.initialismReplacements[token]; ok { + builder.WriteString(replacement) + continue + } + builder.WriteString(token) + } + + return builder.String() +} + +func splitIdentifierTokens(value string) []string { + if value == "" { + return nil + } + + runes := []rune(value) + start := 0 + tokens := make([]string, 0, len(runes)) + + for i := 1; i < len(runes); i++ { + if shouldSplitToken(runes, i) { + tokens = append(tokens, string(runes[start:i])) + start = i + } + } + + tokens = append(tokens, string(runes[start:])) + return tokens +} + +func shouldSplitToken(runes []rune, index int) bool { + prev := runes[index-1] + curr := runes[index] + + if prev == '_' || curr == '_' { + return true + } + if unicode.IsDigit(prev) && !unicode.IsDigit(curr) { + return true + } + if !unicode.IsDigit(prev) && unicode.IsDigit(curr) { + return true + } + if unicode.IsLower(prev) && unicode.IsUpper(curr) { + return true + } + if unicode.IsUpper(prev) && unicode.IsUpper(curr) && index+1 < len(runes) && unicode.IsLower(runes[index+1]) { + return true + } + + return false +} + // GenerateFromFile is responsible for orchestrating the Code generation. It results in a byte array // that can be written to any file desired. It has already had goimports run on the code before being returned. func (g *Generator) GenerateFromFile(inputFile string) ([]byte, error) { @@ -378,6 +484,7 @@ func (g *Generator) parseEnum(ts *ast.TypeSpec) (*Enum, error) { if !g.LeaveSnakeCase { prefixedName = snakeToCamelCase(prefixedName) } + prefixedName = g.applyInitialisms(prefixedName) } ev := EnumValue{Name: name, RawName: rawName, PrefixedName: prefixedName, ValueStr: valueStr, ValueInt: data, Comment: comment} diff --git a/generator/generator_test.go b/generator/generator_test.go index 23f2264..daf7395 100644 --- a/generator/generator_test.go +++ b/generator/generator_test.go @@ -531,6 +531,7 @@ func TestNewGeneratorWithConfig(t *testing.T) { JSONPkg: "custom/json", Prefix: "TestPrefix", BuildTags: []string{"tag1", "tag2"}, + Initialisms: []string{"HTTP", "URL"}, NoComments: true, Values: true, } @@ -546,6 +547,7 @@ func TestNewGeneratorWithConfig(t *testing.T) { assert.Equal(t, config.JSONPkg, g.JSONPkg) assert.Equal(t, config.Prefix, g.Prefix) assert.Equal(t, config.BuildTags, g.BuildTags) + assert.Equal(t, config.Initialisms, g.Initialisms) assert.Equal(t, config.NoComments, g.NoComments) assert.Equal(t, config.Values, g.Values) @@ -638,6 +640,7 @@ func TestAllOptionsIntegration(t *testing.T) { WithJsonPkg("custom/json"), WithNoComments(), WithBuildTags("integration", "test"), + WithInitialisms("HTTP", "URL"), ) assert.True(t, g.SQLInt) @@ -645,6 +648,7 @@ func TestAllOptionsIntegration(t *testing.T) { assert.Equal(t, "custom/json", g.JSONPkg) assert.True(t, g.NoComments) assert.Equal(t, []string{"integration", "test"}, g.BuildTags) + assert.Equal(t, []string{"HTTP", "URL"}, g.Initialisms) } // TestGeneratorConfigWithTemplates tests NewGeneratorWithConfig with templates @@ -989,3 +993,317 @@ type Greek string assert.Contains(t, outputStr, "var ErrInvalidGreek") assert.Contains(t, outputStr, "lookupSqlIntGreek") } + +func TestInitialismParsing(t *testing.T) { + tests := map[string]struct { + input []string + result []string + err string + }{ + "no initialisms": { + result: nil, + }, + "single entry": { + input: []string{"HTTP"}, + result: []string{"HTTP"}, + }, + "comma separated": { + input: []string{"HTTP,URL,ID"}, + result: []string{"HTTP", "URL", "ID"}, + }, + "multiple flags": { + input: []string{"HTTP", "URL,ID", "API"}, + result: []string{"HTTP", "URL", "ID", "API"}, + }, + "deduplication": { + input: []string{"HTTP,HTTP,URL"}, + result: []string{"HTTP", "URL"}, + }, + "invalid lowercase": { + input: []string{"Http"}, + err: `invalid initialism "Http": must be all uppercase ASCII letters`, + }, + "invalid number": { + input: []string{"H2"}, + err: `invalid initialism "H2": must be all uppercase ASCII letters`, + }, + "empty entries ignored": { + input: []string{"HTTP,,URL"}, + result: []string{"HTTP", "URL"}, + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + result, err := ParseInitialisms(tc.input) + if tc.err != "" { + require.Error(t, err) + require.EqualError(t, err, tc.err) + } else { + require.NoError(t, err) + require.Equal(t, tc.result, result) + } + }) + } +} + +func TestWithInitialisms(t *testing.T) { + config := &GeneratorConfig{} + option := WithInitialisms("HTTP", "URL") + option(config) + assert.Equal(t, []string{"HTTP", "URL"}, config.Initialisms) + + // Test appending + option2 := WithInitialisms("API") + option2(config) + assert.Equal(t, []string{"HTTP", "URL", "API"}, config.Initialisms) +} + +func TestInitialismsInGeneration(t *testing.T) { + input := `package test + +// ENUM( +// get_http_url, +// post_api_request, +// fetch_html_id, +// ) +type Method int +` + g := NewGenerator(WithInitialisms("HTTP", "URL", "API", "ID", "HTML")) + f, err := parser.ParseFile(g.fileSet, "test.go", input, parser.ParseComments) + require.NoError(t, err) + + output, err := g.Generate(f) + require.NoError(t, err) + + outputStr := string(output) + // Verify initialisms are fully uppercased in const names + assert.Contains(t, outputStr, "MethodGetHTTPURL") + assert.Contains(t, outputStr, "MethodPostAPIRequest") + assert.Contains(t, outputStr, "MethodFetchHTMLID") + // Verify string values are NOT affected (stored in _MethodName concatenation) + assert.Contains(t, outputStr, "get_http_urlpost_api_requestfetch_html_id") +} + +func TestInitialismsKfeaturesStyle(t *testing.T) { + input := `package test + +// ENUM( +// bpf_lsm, +// btf, +// bpf_tracing, +// ima, +// ) +type Feature int +` + g := NewGenerator(WithInitialisms("BPF", "LSM", "BTF", "IMA")) + f, err := parser.ParseFile(g.fileSet, "test.go", input, parser.ParseComments) + require.NoError(t, err) + + output, err := g.Generate(f) + require.NoError(t, err) + + outputStr := string(output) + assert.Contains(t, outputStr, "FeatureBPFLSM") + assert.Contains(t, outputStr, "FeatureBTF") + assert.Contains(t, outputStr, "FeatureBPFTracing") + assert.Contains(t, outputStr, "FeatureIMA") +} + +func TestInitialismsWithLeaveSnakeCase(t *testing.T) { + input := `package test + +// ENUM(get_http_url) +type Method int +` + g := NewGenerator(WithoutSnakeToCamel(), WithInitialisms("HTTP", "URL")) + f, err := parser.ParseFile(g.fileSet, "test.go", input, parser.ParseComments) + require.NoError(t, err) + + output, err := g.Generate(f) + require.NoError(t, err) + + outputStr := string(output) + // With nocamel, snakeToCamelCase is skipped. cases.Title only uppercases + // the first rune of the entire rawName, so underscore-separated parts after + // the first remain lowercase. applyInitialisms finds no title-cased matches, + // so initialisms have no effect in nocamel mode. + assert.Contains(t, outputStr, "MethodGet_http_url") +} + +func TestInitialismsWithNoPrefix(t *testing.T) { + input := `package test + +// ENUM(http_url) +type Method int +` + g := NewGenerator(WithNoPrefix(), WithInitialisms("HTTP", "URL")) + f, err := parser.ParseFile(g.fileSet, "test.go", input, parser.ParseComments) + require.NoError(t, err) + + output, err := g.Generate(f) + require.NoError(t, err) + + outputStr := string(output) + assert.Contains(t, outputStr, "HTTPURL") +} + +func TestInitialismsWithStringEnum(t *testing.T) { + input := `package test + +// ENUM(http_api, rest_url) +type Endpoint string +` + g := NewGenerator(WithInitialisms("HTTP", "API", "URL", "REST")) + f, err := parser.ParseFile(g.fileSet, "test.go", input, parser.ParseComments) + require.NoError(t, err) + + output, err := g.Generate(f) + require.NoError(t, err) + + outputStr := string(output) + assert.Contains(t, outputStr, "EndpointHTTPAPI") + assert.Contains(t, outputStr, "EndpointRESTURL") + // String values unchanged + assert.Contains(t, outputStr, `"http_api"`) + assert.Contains(t, outputStr, `"rest_url"`) +} + +func TestInitialismOrdering(t *testing.T) { + input := `package test + +// ENUM(ide, id_value) +type Thing int +` + // ID and IDE overlap should resolve by exact token match. + g := NewGenerator(WithInitialisms("ID", "IDE")) + f, err := parser.ParseFile(g.fileSet, "test.go", input, parser.ParseComments) + require.NoError(t, err) + + output, err := g.Generate(f) + require.NoError(t, err) + + outputStr := string(output) + assert.Contains(t, outputStr, "ThingIDE") + assert.Contains(t, outputStr, "ThingIDValue") +} + +func TestInitialismsDoNotReplaceSubstringsInsideTokens(t *testing.T) { + input := `package test + +// ENUM(apiary, ideology, id_value, api_id) +type Thing int +` + g := NewGenerator(WithInitialisms("API", "IDE", "ID")) + f, err := parser.ParseFile(g.fileSet, "test.go", input, parser.ParseComments) + require.NoError(t, err) + + output, err := g.Generate(f) + require.NoError(t, err) + + outputStr := string(output) + assert.Contains(t, outputStr, "ThingApiary") + assert.Contains(t, outputStr, "ThingIdeology") + assert.Contains(t, outputStr, "ThingIDValue") + assert.Contains(t, outputStr, "ThingAPIID") + assert.NotContains(t, outputStr, "ThingAPIary") + assert.NotContains(t, outputStr, "ThingIDEology") +} + +func TestShouldSplitToken(t *testing.T) { + tests := map[string]struct { + value string + index int + expected bool + }{ + "current rune underscore": { + value: "A_B", + index: 1, + expected: true, + }, + "previous rune underscore": { + value: "A_B", + index: 2, + expected: true, + }, + "digit to letter": { + value: "2A", + index: 1, + expected: true, + }, + "letter to digit": { + value: "A2", + index: 1, + expected: true, + }, + "lower to upper": { + value: "aB", + index: 1, + expected: true, + }, + "upper run before trailing lower": { + value: "HTTPServer", + index: 4, // P|S where next is lower e + expected: true, + }, + "upper run at end": { + value: "HTTP", + index: 3, + expected: false, + }, + "upper to lower": { + value: "Ab", + index: 1, + expected: false, + }, + "digit to digit": { + value: "22", + index: 1, + expected: false, + }, + "lower to lower": { + value: "ab", + index: 1, + expected: false, + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + runes := []rune(tc.value) + require.GreaterOrEqual(t, tc.index, 1) + require.Less(t, tc.index, len(runes)) + assert.Equal(t, tc.expected, shouldSplitToken(runes, tc.index)) + }) + } +} + +func TestSplitIdentifierTokens(t *testing.T) { + tests := map[string]struct { + value string + expected []string + }{ + "empty string": { + value: "", + expected: nil, + }, + "underscore boundaries": { + value: "API_ID", + expected: []string{"API", "_", "ID"}, + }, + "digit boundaries": { + value: "V2API3ID", + expected: []string{"V", "2", "API", "3", "ID"}, + }, + "camel boundaries": { + value: "MyHTTPServer", + expected: []string{"My", "HTTP", "Server"}, + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + assert.Equal(t, tc.expected, splitIdentifierTokens(tc.value)) + }) + } +} diff --git a/generator/options.go b/generator/options.go index df08594..62d3c4a 100644 --- a/generator/options.go +++ b/generator/options.go @@ -23,6 +23,7 @@ type GeneratorConfig struct { ForceUpper bool `json:"force_upper"` NoComments bool `json:"no_comments"` NoParse bool `json:"no_parse"` + Initialisms []string `json:"initialisms"` BuildTags []string `json:"build_tags"` ReplacementNames map[string]string `json:"replacement_names"` TemplateFileNames []string `json:"template_file_names"` @@ -212,3 +213,11 @@ func WithNoParse() Option { g.NoParse = true } } + +// WithInitialisms configures initialisms whose const-name segments should be fully uppercased +// (e.g., "HTTP" makes the generator produce "HTTP" instead of "Http" in const names). +func WithInitialisms(initialisms ...string) Option { + return func(g *GeneratorConfig) { + g.Initialisms = append(g.Initialisms, initialisms...) + } +} diff --git a/main.go b/main.go index a149bc4..be609a7 100644 --- a/main.go +++ b/main.go @@ -42,6 +42,7 @@ type rootT struct { Ptr bool TemplateFileNames cli.StringSlice Aliases cli.StringSlice + Initialisms cli.StringSlice BuildTags cli.StringSlice MustParse bool ForceLower bool @@ -188,6 +189,11 @@ func main() { Usage: "Adds or replaces aliases for a non alphanumeric value that needs to be accounted for. [Format should be \"key:value,key2:value2\", or specify multiple entries, or both!]", Destination: &argv.Aliases, }, + &cli.StringSliceFlag{ + Name: "initialism", + Usage: "Initialism(s) to keep fully uppercased in generated const names (e.g., HTTP,URL,ID). Repeatable.", + Destination: &argv.Initialisms, + }, &cli.BoolFlag{ Name: "mustparse", Usage: "Adds a Must version of the Parse that will panic on failure.", @@ -240,6 +246,10 @@ func main() { if err != nil { return err } + initialisms, err := generator.ParseInitialisms(argv.Initialisms.Value()) + if err != nil { + return err + } for _, fileOption := range argv.FileNames.Value() { // Build configuration structure @@ -281,6 +291,7 @@ func main() { ForceUpper: argv.ForceUpper, NoComments: argv.NoComments, NoParse: argv.NoParse, + Initialisms: initialisms, BuildTags: argv.BuildTags.Value(), ReplacementNames: aliases, TemplateFileNames: templateFileNames,