-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdotenv.zig
More file actions
1683 lines (1489 loc) · 62.8 KB
/
dotenv.zig
File metadata and controls
1683 lines (1489 loc) · 62.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
const std = @import("std");
pub const ParseOptions = struct {
/// The logging function to use when priniting errors
/// Set this to `NopLogFn` to disable logging
log_fn: fn (comptime format: []const u8, args: anytype) void = DefaultLogFn,
/// The function used to determine if the first character of a key is valid
is_valid_first_key_char_fn: fn (self: @This(), char: u8) bool = DefaultIsValidFirstKeyChar,
/// The function used to determine if any other character of a key is valid
is_valid_key_char_fn: fn (self: @This(), char: u8) bool = DefaultIsValidKeyChar,
/// How many characters to print after the point at which the error occurred in parsing
/// This cap is only applied if there is no newline uptile next `max_error_line_peek` characters
max_error_line_peek: usize = 100,
const Self = @This();
/// This is the default logging function
/// It generates a compile log statement at comptime
/// It logs to stderr (unbuffered) at runtime
pub const DefaultLogFn = struct {
fn log_fn(comptime format: []const u8, args: anytype) void {
if (@inComptime()) {
@compileLog(std.fmt.comptimePrint(format, args));
} else {
std.debug.print(format, args);
}
}
}.log_fn;
/// No-op logging function
/// This does NOT log to @compileLog and does not cause a compile error directly.
/// At comptime, the parsing function will return an error that can be handled as well
pub const NopLogFn = struct {
fn log_fn(comptime _: []const u8, _: anytype) void {}
}.log_fn;
/// The default function to determine if the first character of a key is valid
/// matches [a-zA-Z_]
pub const DefaultIsValidFirstKeyChar = struct {
fn is_valid_first_key_char(self: Self, char: u8) bool {
const is_valid = std.ascii.isAlphabetic(char) or char == '_';
if (!is_valid) self.log_fn("First character for key should be [a-zA-Z_]; got: `{c}`\n", .{char});
return is_valid;
}
}.is_valid_first_key_char;
/// The default function to determine if any other character of a key is valid
/// matches [a-zA-Z0-9_]
pub const DefaultIsValidKeyChar = struct {
fn is_valid_key_char(self: Self, char: u8) bool {
const is_valid = std.ascii.isAlphanumeric(char) or char == '_';
if (!is_valid) self.log_fn("Key can only contain [a-zA-Z0-9_]; got: `{c}`\n", .{char});
return is_valid;
}
}.is_valid_key_char;
/// Just a helper to call the function, for internal use only
fn is_valid_first_key_char(self: @This(), char: u8) bool {
return self.is_valid_first_key_char_fn(self, char);
}
/// Just a helper to call the function, for internal use only
fn is_valid_key_char(self: @This(), char: u8) bool {
return self.is_valid_key_char_fn(self, char);
}
};
/// Errors specific to parsing keys
pub const ParseKeyError = error{
/// Thrown when the first character of a key (or substitution key) is not alphabetic (a-zA-Z) or '_'
InvalidFirstKeyChar,
/// Thrown when a subsequent character in a key (or substitution key) is not alphanumeric (a-zA-Z0-9) or '_'
/// Also thrown when the character immediately after optional whitespace following the key is not '=' (e.g., KEY?=value)
InvalidKeyChar,
/// Thrown when EOF is reached before finding '=' after parsing a key
UnexpectedEndOfFile,
};
/// Errors specific to parsing values (includes key errors and allocator errors)
pub const ParseValueError = error{
/// Thrown when EOF is reached inside a quoted value (' or ") without a closing quote
UnterminatedQuote,
/// Thrown in double-quoted values when an escape sequence is invalid:
/// - \x followed by non-hex digits (0-9a-fA-F), including partial (e.g., \xG or \xGG where G invalid)
/// - \ followed by an unrecognized character (not \\, \", \$, \n, \r, \t, \v, \f, \x)
InvalidEscapeSequence,
/// Thrown when parsing a substitution ${KEY} and EOF is reached before finding the closing '}'
UnterminatedSubstitutionBlock,
/// Thrown after parsing a value (quoted or unquoted), when skipping trailing whitespace,
/// and encountering a non-newline, non-'#' character (e.g., extra text after closing quote like `"value" extra`)
UnexpectedCharacter,
/// Thrown when expanding a substitution ${KEY} and no prior key named KEY exists in the map
SubstitutionKeyNotFound,
} || ParseKeyError || std.mem.Allocator.Error;
pub const ParseError = ParseValueError || std.fs.File.OpenError || std.fs.File.ReadError;
// Read and parse the `.env` file to a EnvType (hashmap)
/// Caller owns the returned hashmap
pub fn load(allocator: std.mem.Allocator, comptime options: ParseOptions) ParseError!EnvType {
return loadFrom(".env", allocator, options);
}
/// Read and parse the provided env file to a EnvType (hashmap)
/// Caller owns the returned hashmap
pub fn loadFrom(file_name: []const u8, allocator: std.mem.Allocator, comptime options: ParseOptions) ParseError!EnvType {
var file = try std.fs.cwd().openFile(file_name, .{});
const file_data = file.readToEndAlloc(allocator, std.math.maxInt(usize)) catch |e| {
file.close();
return e;
};
file.close();
defer allocator.free(file_data);
return loadFromData(file_data, allocator, options);
}
/// Read and parse the provided data string to a EnvType (hashmap)
/// Caller owns the data memory and the returned hashmap
pub fn loadFromData(data: []const u8, allocator: std.mem.Allocator, comptime options: ParseOptions) ParseValueError!EnvType {
var hm = try GetParser(options).parse(data, allocator);
defer hm.deinit();
return .fromHashMap(&hm);
}
// Read and parse the `.env` file to a ComptimeEnvType (actually a hashmap and NOT StaticStringMap)
pub fn loadComptime(options: ParseOptions) ParseValueError!ComptimeEnvType {
return comptime loadFromComptime(".env", options);
}
/// Read and parse the provided env file to a ComptimeEnvType (actually a hashmap and NOT StaticStringMap)
pub fn loadFromComptime(file_name: []const u8, options: ParseOptions) ParseValueError!ComptimeEnvType {
return comptime loadFromDataComptime(@embedFile(file_name), options);
}
/// Read and parse the provided data string to a ComptimeEnvType (actually a hashmap and NOT StaticStringMap)
pub fn loadFromDataComptime(file_data: []const u8, options: ParseOptions) ParseValueError!ComptimeEnvType {
var hm = try GetParser(options).parse(file_data, comptime_allocator);
return comptime .fromHashMap(&hm);
}
// This is taken from https://github.com/ziglang/zig/issues/1291
pub const comptime_allocator: std.mem.Allocator = struct {
const allocator: std.mem.Allocator = .{
.ptr = undefined,
.vtable = &.{
.alloc = comptimeAlloc,
.resize = comptimeResize,
.remap = comptimeRemap,
.free = comptimeFree,
},
};
fn comptimeAlloc(_: *anyopaque, len: usize, alignment: std.mem.Alignment, _: usize) ?[*]u8 {
if (!@inComptime()) unreachable;
var bytes: [len]u8 align(alignment.toByteUnits()) = undefined;
return &bytes;
}
fn comptimeResize(_: *anyopaque, _: []u8, _: std.mem.Alignment, _: usize, _: usize) bool {
// Always returning false here ensures that callsites make new allocations that fit
// better, avoiding wasted .cdata and .data memory.
return false;
}
fn comptimeRemap(_: *anyopaque, _: []u8, _: std.mem.Alignment, _: usize, _: usize) ?[*]u8 {
// Always returning false here ensures that callsites make new allocations that fit
// better, avoiding wasted .cdata and .data memory.
return null;
}
fn comptimeFree(_: *anyopaque, _: []u8, _: std.mem.Alignment, _: usize) void {
// Global variables are garbage-collected by the linker.
}
}.allocator;
/// HashMap implementation used internally while parsing.
/// This is used for key replacement (${...})
/// This is a barebones implementation, it uses 8 bits for the fingerprint
/// unlike the 7 in zig's standard hashmap because we don't require toombstones
///
/// I chose to use write this instead of using the standard hashmap because
/// the standard implementation does not work at comptime, and has toombstones
/// which are not needed for this use case. We would need to use a context variant
/// of the hash map to prevent a new allocation for each value and it would result
/// in same amount of bloat more or less. Besides, this implementation should be
/// slightly faster (hopefully;) and works at comptime as well. Also, converting
/// the standard to ComptimeEnvType / EnvType would need rehashing which this
/// implementation does not need.
pub const HashMap = struct {
const Size = u32;
pub const String = packed struct{ idx: usize, len: usize };
pub const KV = struct { key: []const u8, value: []const u8 };
const default_max_load_percentage = std.hash_map.default_max_load_percentage;
// The keys_string
keys_string: []const u8,
// The string containing the concatenated values
values_string: std.ArrayList(u8) = .{},
// This is the start of our allocated block
_keys: [*]String = &.{},
// This comes after the keys
_values: [*]String = &.{},
// These will be at the end of our allocated block, 0 means unused.
_meta: [*]u8 = &.{},
/// Length for our keys, values, and meta arrays
cap: Size = 0,
// How many elements are in use
size: Size = 0,
// How many elements are available, this is used to reduce the number of instructions needed for the grow check
available: Size = 0,
// The allocator that sores everything
allocator: std.mem.Allocator,
// The length of key strings
// NOTE: this is not the same as keys_string.len as the keys_string contains unused parts
keys_string_len: usize = 0,
pub inline fn keys(self: *const @This()) []String { return self._keys[0..self.cap]; }
pub inline fn values(self: *const @This()) []String { return self._values[0..self.cap]; }
pub inline fn meta(self: *const @This()) []u8 { return self._meta[0..self.cap]; }
pub fn init(keys_string: []const u8, cap: Size, allocator: std.mem.Allocator) !@This() {
@setEvalBranchQuota(1000_000);
const c = std.math.ceilPowerOfTwo(Size, cap) catch 16;
const mem = try allocator.alignedAlloc(u8, std.mem.Alignment.of(String), (2 * @sizeOf(String) + 1) * c);
@memset(mem[2 * c * @sizeOf(String)..], 0);
return .{
.keys_string = keys_string,
._keys = @alignCast(@ptrCast(mem.ptr)),
._values = @alignCast(@ptrCast(mem[c * @sizeOf(String)..].ptr)),
._meta = mem[2 * c * @sizeOf(String)..].ptr,
.cap = c,
.available = c * default_max_load_percentage / 100,
.allocator = allocator,
};
}
fn getHFP(key: []const u8) std.meta.Tuple(&.{u64, u8}) {
const h = std.hash_map.StringContext.hash(undefined, key);
const fp: u8 = @intCast(h >> 56);
return .{h, if (fp == 0) 1 else fp};
}
fn hashString(self: *const @This(), string: String) u64 {
return std.hash_map.StringContext.hash(undefined, self.keys_string[string.idx..][0..string.len]);
}
fn eqlString(self: *const @This(), string: String, other: []const u8) bool {
return std.mem.eql(u8, self.keys_string[string.idx..][0..string.len], other);
}
fn getIndex(self: *const @This(), fingerprint: u8, hash: u64, key: []const u8) usize {
var i: usize = @intCast(hash & (self.cap - 1));
while (self.meta()[i] != 0) : (i = (i + 1) & (self.cap - 1)) {
if (self.meta()[i] == fingerprint and self.eqlString(self.keys()[i], key)) break;
}
return i;
}
pub fn get(self: *const @This(), key: []const u8) ?[]const u8 {
@setEvalBranchQuota(1000_000);
const hash, const fingerprint = getHFP(key);
const i = self.getIndex(fingerprint, hash, key);
if (self.meta()[i] == 0) return null;
const v = self.values()[i];
return self.values_string.items[v.idx..][0..v.len];
}
pub fn put(self: *@This(), key: String, value: String) !void {
@setEvalBranchQuota(1000_000);
try self.grow();
const kstr = self.keys_string[key.idx..][0..key.len];
const hash, const fingerprint = getHFP(kstr);
const i = self.getIndex(fingerprint, hash, kstr);
if (self.meta()[i] == 0) {
self.meta()[i] = fingerprint;
self.keys()[i] = key;
self.size += 1;
self.available -= 1;
self.keys_string_len += key.len;
}
self.values()[i] = value;
}
fn grow(old: *@This()) !void {
@setEvalBranchQuota(1000_000);
if (old.available > old.size) return;
var self = try init(old.keys_string, if (old.size == 0) 16 else old.size * 2, old.allocator);
self.values_string = old.values_string;
self.size = old.size;
self.keys_string_len = old.keys_string_len;
for (old.meta(), old.keys(), old.values()) |m, k, v| {
if (m == 0) continue;
const kstr = self.keys_string[k.idx..][0..k.len];
const hash, _ = getHFP(kstr);
var i: usize = @intCast(hash & (self.cap - 1));
while (self.meta()[i] != 0) : (i = (i + 1) & (self.cap - 1)) {}
self.meta()[i] = m;
self.keys()[i] = k;
self.values()[i] = v;
}
old.allocator.free(old.allocation());
old.* = self;
}
fn allocation(self: *@This()) []align(@alignOf(String)) u8 {
return @as([*] align(@alignOf(String)) u8, @alignCast(@ptrCast(self._keys)))[0.. (2 * @sizeOf(String) + 1) * self.cap];
}
pub fn deinit(self: *@This()) void {
self.allocator.free(self.allocation());
self._keys = undefined;
self._values = undefined;
self._meta = undefined;
self.values_string.deinit(self.allocator);
self.values_string = undefined;
}
};
/// A type to store the parsed data at comptime, this uses `const` everything to bypass the
/// "runtime values can't hold a reference to a comptime variable" error.
///
/// This struct has a smaller size that the HashMap and does not have unused sections in the
/// string (see comment in `fromHashMap`).
/// The key + value size is also reduced to 8 bytes compared to 4*usize (32 if usize = 8) for the HashMap.
pub const ComptimeEnvType = struct {
const Self = @This();
const Size = u32;
pub const KV = HashMap.KV;
pub const Bucket = struct {
key_idx: KeyIdxType,
key_len: KeyLenType,
const KeyIdxType = std.meta.Int(.unsigned, @min(@bitSizeOf(usize), 40));
const KeyLenType = std.meta.Int(.unsigned, if (@bitSizeOf(usize) < 40) @bitSizeOf(usize) else 24);
};
/// key+value strings concatenated together
data: []const u8 = &.{},
/// Buckets of the hashmap
_buckets: [*]const Bucket = &.{},
/// Metadata of the hashmap
_meta: [*]const u8 = &.{},
/// Capacity of the hashmap
cap: Size = 0,
/// How many elements are in use
size: Size = 0,
/// Create a new ComptimeEnvType from a HashMap
pub fn fromHashMap(comptime hm: *HashMap) @This() {
@setEvalBranchQuota(1000_000);
comptime {
var self: @This() = .{ .cap = hm.cap, .size = hm.size };
var buckets_v: []const Bucket = &.{};
var meta_v: []const u8 = &.{};
var last_exists = false;
for (hm.meta(), hm.keys(), hm.values()) |m, k, v| {
meta_v = meta_v ++ &[_]u8{m};
if (m == 0) {
if (last_exists) {
buckets_v = buckets_v ++ &[_]Bucket{ .{ .key_idx = @intCast(self.data.len), .key_len = undefined } };
} else {
buckets_v = buckets_v ++ &[_]Bucket{undefined};
}
last_exists = false;
} else {
const ks = hm.keys_string[k.idx..][0..k.len];
const vs = hm.values_string.items[v.idx..][0..v.len];
buckets_v = buckets_v ++ &[_]Bucket{ .{ .key_idx = @intCast(self.data.len), .key_len = @intCast(ks.len) } };
// we re-append to the data because we if any key was overwrite, there would be a unused value string that
// would not get GC'd (last tested with zig-0.15.1)
self.data = self.data ++ ks ++ vs;
last_exists = true;
}
}
std.debug.assert(buckets_v.len == self.cap);
std.debug.assert(meta_v.len == self.cap);
buckets_v = buckets_v ++ &[_]Bucket{ .{ .key_idx = @intCast(self.data.len), .key_len = undefined } };
self._buckets = buckets_v.ptr;
self._meta = meta_v.ptr;
return self;
}
}
pub const Iterator = struct {
map: *const Self,
i: usize = 0,
pub fn next(it: *Iterator) ?KV {
if (it.i >= it.map.capacity()) return null;
while (it.i < it.map.capacity()) {
defer it.i += 1;
if (it.map.meta()[it.i] == 0) continue;
const bucket = it.map.buckets()[it.i];
const next_bucket = it.map.buckets()[it.i + 1];
return .{
.key = it.map.data[@intCast(bucket.key_idx)..][0..@intCast(bucket.key_len)],
.value = it.map.data[0..@intCast(next_bucket.key_idx)][@intCast(bucket.key_idx)..][@intCast(bucket.key_len)..]
};
}
return null;
}
};
pub fn iterator(self: *const @This()) Iterator { return .{ .map = self }; }
pub inline fn count(self: *const @This()) usize { return self.size; }
pub inline fn capacity(self: *const @This()) usize { return self.cap; }
pub inline fn buckets(self: *const @This()) []const Bucket { return self._buckets[0..self.cap+1]; }
pub inline fn meta(self: *const @This()) []const u8 { return self._meta[0..self.cap]; }
pub fn getRuntime(self: *const @This(), key: []const u8) ?[]const u8 {
const hash, const fingerprint = HashMap.getHFP(key);
var i: usize = @intCast(hash & (self.cap - 1));
while (self.meta()[i] != 0) : (i = (i + 1) & (self.cap - 1)) {
const bucket = self.buckets()[i];
if (self.meta()[i] == fingerprint and std.mem.eql(u8, key, self.data[@intCast(bucket.key_idx)..][0..@intCast(bucket.key_len)])) {
const next = self.buckets()[i + 1];
return self.data[0..@intCast(next.key_idx)][@intCast(bucket.key_idx)..][@intCast(bucket.key_len)..];
}
}
return null;
}
pub fn get(comptime self: *const @This(), comptime key: []const u8) ?[]const u8 {
return comptime self.getRuntime(key);
}
pub fn deinit(comptime self: *@This()) void {
self.data = undefined;
self._buckets = undefined;
self._meta = undefined;
}
};
/// A type to store the parsed data at runtime. Stores buckets + metadata + key + value strings
/// in a single allocation.
///
/// see comment for `ComptimeEnvType`
pub const EnvType = struct {
const Size = u32;
pub const KV = HashMap.KV;
pub const Bucket = ComptimeEnvType.Bucket;
/// This is a mid-way pointer, before it is the buckets, after it is the concatenated key + value strings
_meta: [*]u8 = &.{},
/// This is the size of the key + value strings region
_data_size: usize = 0,
/// This is the size of the buckets / metadata region
cap: Size = 0,
/// How many elements are in use
size: Size = 0,
/// Caller owns the hashmap
pub fn fromHashMap(hm: *HashMap) !@This() {
const allocation_size = (hm.cap + 1) * @sizeOf(Bucket) + hm.cap + (hm.keys_string_len + hm.values_string.items.len);
var allocation = try hm.allocator.alignedAlloc(u8, std.mem.Alignment.of(Bucket), allocation_size);
const retval: @This() = .{
._meta = allocation[(hm.cap + 1) * @sizeOf(Bucket)..].ptr,
._data_size = hm.keys_string_len + hm.values_string.items.len,
.size = hm.size,
.cap = hm.cap,
};
const buckets_v = @constCast(retval.buckets());
const meta_v = @constCast(retval.meta());
const data_v = @constCast(retval.data());
var data_idx: usize = 0;
var last_exists = false;
for (hm.meta(), hm.keys(), hm.values(), 0..) |m, k, v, i| {
meta_v[i] = m;
if (m == 0) {
if (last_exists) {
buckets_v[i] = .{ .key_idx = @intCast(data_idx), .key_len = undefined };
}
last_exists = false;
} else {
const ks = hm.keys_string[k.idx..][0..k.len];
const vs = hm.values_string.items[v.idx..][0..v.len];
buckets_v[i] = .{ .key_idx = @intCast(data_idx), .key_len = @intCast(ks.len) };
@memcpy(data_v[data_idx..][0..ks.len], ks);
data_idx += ks.len;
@memcpy(data_v[data_idx..][0..vs.len], vs);
data_idx += vs.len;
last_exists = true;
}
}
buckets_v[buckets_v.len - 1] = .{ .key_idx = @intCast(data_idx), .key_len = undefined };
return retval;
}
/// Iterator over the key/value pairs. It stores pointers to the buckets, metadata, and key+value strings
/// and not the hashmap itself because since hashmap contains a mid-way pointer, it would take more cycles
/// to derive the buckets and key+value strings from the hashmap.
pub const Iterator = struct {
/// Buckets of the hashmap
buckets: [*]const Bucket,
/// Metadata of the hashmap
meta: [*]const u8,
/// key+value strings concatenated together
data: [*]const u8,
/// Capacity of the hashmap
cap: Size,
/// The current index
i: Size = 0,
pub fn next(it: *Iterator) ?KV {
if (it.i >= it.cap) return null;
while (it.i < it.cap) {
defer it.i += 1;
if (it.meta[0..it.cap][it.i] == 0) continue;
const bucket = it.buckets[0..it.cap][it.i];
const next_bucket = it.buckets[0..it.cap][it.i + 1];
return .{
.key = it.data[@intCast(bucket.key_idx)..][0..@intCast(bucket.key_len)],
.value = it.data[0..@intCast(next_bucket.key_idx)][@intCast(bucket.key_idx)..][@intCast(bucket.key_len)..]
};
}
return null;
}
};
pub fn iterator(self: *const @This()) Iterator {
return .{ .buckets = self.buckets().ptr, .meta = self.meta().ptr, .data = self.data().ptr, .cap = self.cap };
}
pub inline fn data(self: *const @This()) []const u8 { return self._meta[self.cap..][0..self._data_size]; }
pub inline fn count(self: *const @This()) usize { return self.size; }
pub inline fn capacity(self: *const @This()) usize { return self.cap; }
pub inline fn buckets(self: *const @This()) []const Bucket {
return @as([*]const Bucket, @ptrCast(@alignCast((self._meta - @as(usize, (self.cap + 1) * @sizeOf(Bucket))))))[0..self.cap+1];
}
pub inline fn meta(self: *const @This()) []const u8 { return self._meta[0..self.cap]; }
pub fn get(self: *const @This(), key: []const u8) ?[]const u8 {
const hash, const fingerprint = HashMap.getHFP(key);
var i: usize = @intCast(hash & (self.cap - 1));
const buckets_v = self.buckets();
const data_v = self.data();
while (self.meta()[i] != 0) : (i = (i + 1) & (self.cap - 1)) {
const bucket = buckets_v[i];
if (self.meta()[i] == fingerprint and std.mem.eql(u8, key, data_v[@intCast(bucket.key_idx)..][0..@intCast(bucket.key_len)])) {
const next = buckets_v[i + 1];
return data_v[0..@intCast(next.key_idx)][@intCast(bucket.key_idx)..][@intCast(bucket.key_len)..];
}
}
return null;
}
/// Release the backing memory and invalidate this map.
pub fn deinit(self: *@This(), allocator: std.mem.Allocator) void {
const buckets_ptr = @as([*]align(@alignOf(Bucket)) u8, @ptrCast(@constCast(self.buckets().ptr)));
const allocation_size = (self.cap + 1) * @sizeOf(Bucket) + self.cap + self._data_size;
allocator.free(buckets_ptr[0..allocation_size]);
self._meta = undefined;
self._data_size = undefined;
}
};
/// Not sure if this is that good of an idea
fn isOneOf(c: u8, comptime chars: []const u8) bool {
const VectorType = @Vector(chars.len, u8);
const query_vec: VectorType = chars[0..chars.len].*;
const current_vec: VectorType = @splat(c);
return @reduce(.Min, query_vec ^ current_vec) == 0;
}
/// Escape a character, otherwise return null
/// we return a pointer so that it is trivially convertible to a string slice
/// we can do this as the string literals are statically stored in the binary
fn escaped(c: u8) ?*const [2]u8 {
return switch (c) {
'\\' => "\\\\",
'\n' => "\\n",
'\r' => "\\r",
'\t' => "\\t",
'\x0B' => "\\v",
'\x0C' => "\\f",
inline else => null,
};
}
/// Mapping for decoding hex characters
const HEX_DECODE_ARRAY = blk: {
var all: ['f' - '0' + 1]u8 = undefined;
for ('0'..('9' + 1)) |b| all[b - '0'] = b - '0';
for ('A'..('F' + 1)) |b| all[b - '0'] = b - 'A' + 10;
for ('a'..('f' + 1)) |b| all[b - '0'] = b - 'a' + 10;
break :blk all;
};
inline fn decodeHex(char: u8) u8 {
return HEX_DECODE_ARRAY[char - @as(usize, '0')];
}
pub fn GetParser(options: ParseOptions) type {
return struct {
map: HashMap,
at: usize = 0,
line: usize = 0,
line_start: usize = 0,
/// Returns true if the whole string has been parsed
fn done(self: *@This()) bool {
return self.at >= self.map.keys_string.len;
}
/// Returns the current character
fn current(self: *@This()) ?u8 {
if (self.done()) return null;
return self.map.keys_string[self.at];
}
/// Returns the current character, assert that it exists
fn last(self: *@This()) u8 {
std.debug.assert(self.at != 0);
return self.map.keys_string[self.at - 1];
}
/// Returns the current character and advances the pointer
fn take(self: *@This()) ?u8 {
if (self.done()) return null;
self.at += 1;
return self.last();
}
/// Returns the current character (as u9) and advances the pointer
fn takeU9(self: *@This()) u9 {
return self.take() orelse 0x100;
}
/// Skips all characters until the given character is found
fn skipUpto(self: *@This(), comptime end: u8) void {
self.skipUptoAny(std.fmt.comptimePrint("{c}", .{end}));
}
/// Skips all characters until any of the given characters is found
fn skipUptoAny(self: *@This(), comptime end: []const u8) void {
while (self.at < self.map.keys_string.len and !isOneOf(self.current().?, end)) {
self.at += 1;
}
}
/// Skips all characters that belong to the given chars set
fn skipAny(self: *@This(), comptime chars: []const u8) void {
while (self.at < self.map.keys_string.len and isOneOf(self.current().?, chars)) {
self.at += 1;
}
}
/// Returns the current character as a slice (for printing)
fn currentAsSlice(self: *@This()) []const u8 {
std.debug.assert(self.at < self.map.keys_string.len);
return self.map.keys_string[self.at..][0..1];
}
/// Prints the error marker at the current position
fn printErrorMarker(self: *@This()) void {
const at = self.at;
self.map.keys_string = self.map.keys_string[0.. @min(self.at + options.max_error_line_peek, self.map.keys_string.len)];
self.skipUpto('\n');
options.log_fn(":{d}:{d}\n{s}\n", .{self.line, at - self.line_start, self.map.keys_string[self.line_start..self.at]});
if (@inComptime()) {
options.log_fn((" " ** @as(usize, at - self.line_start - 1)) ++ "^\n", .{});
} else {
for (1..at - self.line_start) |_| {
options.log_fn(" ", .{});
}
options.log_fn("^\n", .{});
}
}
/// Parses the key
fn parseKey(self: *@This()) ParseKeyError!?HashMap.String {
// Skip any whitespace / comment lines, break at first non-whitespace character
while (true) {
self.skipAny(" \t\x0B\r\x0C");
const c = self.current() orelse return null;
if (c == '#') {
self.skipUpto('\n');
_ = self.take();
} else if (c == '\n') {
self.line += 1;
self.line_start = self.at;
_ = self.take();
} else break;
}
const start = self.at; // starting index of our key in the string
// ensure first key char is valid
if (!options.is_valid_first_key_char(self.take().?)) {
self.at -= 1;
options.log_fn("Invalid first character `{s}` for key at ", .{escaped(self.current().?) orelse self.currentAsSlice()});
self.printErrorMarker();
return ParseKeyError.InvalidFirstKeyChar;
}
// Consume key chars untile we encounter something unexpected
while (self.current()) |c| {
if (isOneOf(c, " \t\x0B=")) { // The key is done
break;
} else if (!options.is_valid_key_char(c)) { // Parse the key character
options.log_fn("Invalid character `{s}` while parsing key at ", .{escaped(c) orelse self.currentAsSlice()});
self.printErrorMarker();
return ParseKeyError.InvalidKeyChar;
}
self.at += 1;
} else {
options.log_fn("Unexpected end of file while parsing key at ", .{});
self.at = start;
self.printErrorMarker();
return ParseKeyError.UnexpectedEndOfFile;
}
// Index and length of the key inside the provided data (self.map.keys_string)
const retval: HashMap.String = .{ .idx = @intCast(start), .len = @intCast(self.at - start) };
// consume whitespace, then the = character
self.skipAny(" \t\x0B");
const end_char = self.take() orelse {
options.log_fn("Unexpected end of file, expected `=` ", .{});
self.printErrorMarker();
return ParseKeyError.UnexpectedEndOfFile;
};
// There must be an = character. orelse we return an error
if (end_char == '=') return retval;
options.log_fn("Got unexpected `{s}`, expected `=` ", .{escaped(end_char) orelse self.currentAsSlice()});
self.printErrorMarker();
return ParseKeyError.InvalidKeyChar;
}
fn parseValue(self: *@This()) ParseValueError!void {
self.skipAny(" \t\x0B"); // skip any preceding whitespace
if (self.current()) |c| {
return switch (c) { // check if the value is quoted or unquoted
'\'' => self.parseQuotedValue('\''),
'"' => self.parseQuotedValue('"'),
'#' => {
self.skipUpto('\n');
_ = self.take();
return;
},
else => self.parseQuotedValue(null),
};
} else return;
}
/// This function is called when out value is quted and we wanna remove the trailing whitespace
fn trimResultEnd(self: *@This()) void {
while (self.map.values_string.items.len > 0 and isOneOf(self.map.values_string.items[self.map.values_string.items.len - 1], " \t\x0B\r\x0C")) {
self.map.values_string.items.len -= 1;
}
}
/// Unlike how the name suggests, this parses the unquoted value as well when quote_char is null
fn parseQuotedValue(self: *@This(), comptime quote_char: ?u8) ParseValueError!void {
if (quote_char) |qc| std.debug.assert(qc == self.take().?);
// This is used for logging only
const quote_string = if (quote_char) |c| comptime std.fmt.comptimePrint(" quoted({c})", .{c}) else "";
// We use a switch block and not a while loop (cuz we don't need a while loop!)
// Keeps our code clean i think
// Since zig can't switch on ?u8, we convert it to a u9 and set it's value to 0x100 if it's null
// we then switch on the u9 to get effectively the same thing
blk: switch (self.takeU9()) {
0x100 => { // the data has been exhausted
// we can just return the value in unquoted case
if (quote_char == null) break :blk;
// The quote was not closed, hence we return an error.
// We know this because on closing the quote, the switch block is exited and
// we could not have ended up here
options.log_fn("Unexpected end of file while parsing a{s} value at ", .{quote_string});
self.printErrorMarker();
return ParseValueError.UnterminatedQuote;
},
'\\' => { // Special logic for when we see a backslash
switch (if (quote_char) |c| @as(u9, c) else 0x100) { // switch on the quote_char
0x100 => switch (self.takeU9()) {
0x100 => continue :blk 0x100,
'\\', '$' => |c| try self.map.values_string.append(self.map.allocator, @intCast(c)),
'\n' => {
self.line += 1;
self.line_start = self.at;
try self.map.values_string.append(self.map.allocator, '\n');
},
else => |c| try self.map.values_string.appendSlice(self.map.allocator, &[_]u8{'\\', @intCast(c)}),
},
'\'' => switch (self.takeU9()) {
0x100 => continue :blk 0x100,
'\\', '\'' => |c| try self.map.values_string.append(self.map.allocator, @intCast(c)),
'\n' => {
self.line += 1;
self.line_start = self.at;
try self.map.values_string.append(self.map.allocator, '\n');
},
else => |c| try self.map.values_string.appendSlice(self.map.allocator, &[_]u8{'\\', @intCast(c)}),
},
'"' => switch (self.takeU9()) {
0x100 => continue :blk 0x100,
'\\' => try self.map.values_string.append(self.map.allocator, '\\'),
'n' => try self.map.values_string.append(self.map.allocator, '\n'),
'r' => try self.map.values_string.append(self.map.allocator, '\r'),
't' => try self.map.values_string.append(self.map.allocator, '\t'),
'v' => try self.map.values_string.append(self.map.allocator, '\x0B'),
'f' => try self.map.values_string.append(self.map.allocator, '\x0C'),
'x' => {
const hexa = self.take() orelse continue :blk 0x100;
const hexb = self.take() orelse continue :blk 0x100;
if (!std.ascii.isHex(hexa) or !std.ascii.isHex(hexb)) {
options.log_fn("Invalid hex escape sequence `\\x{s}{s}` in a{s} value at ", .{
escaped(hexa) orelse self.map.keys_string[self.at - 2..][0..1],
escaped(hexb) orelse self.map.keys_string[self.at - 1..][0..1],
quote_string,
});
self.at -= if (!std.ascii.isHex(hexa)) 2 else 1;
self.printErrorMarker();
return ParseValueError.InvalidEscapeSequence;
}
try self.map.values_string.append(self.map.allocator, @intCast((decodeHex(hexa) << 4) | decodeHex(hexb)));
},
inline '$', '\"' => |c| try self.map.values_string.append(self.map.allocator, c),
else => |c_u9| { // This is Always and error since double quotes require proper escaping
options.log_fn("Unexpected escape sequence `\\{s}` in a{s} value at ", .{
escaped(@intCast(c_u9)) orelse self.currentAsSlice(), quote_string
});
self.at -= 1;
self.printErrorMarker();
return ParseValueError.InvalidEscapeSequence;
}
},
else => unreachable,
}
continue :blk self.takeU9();
},
'$' => { // Try to parse the ${...} block
const next = self.takeU9();
if (quote_char == '\'' or next != '{') {
try self.map.values_string.append(self.map.allocator, '$');
continue :blk next;
}
const start = self.at; // Start of the key, we don't strip whitespace in the block
if (!options.is_valid_first_key_char(self.take() orelse {
options.log_fn("Unexpected end of file while parsing {{}} in a{s} value at ", .{quote_string});
self.printErrorMarker();
return ParseValueError.UnterminatedSubstitutionBlock;
})) {
self.at -= 1;
options.log_fn("Invalid first character `{s}` for key at ", .{escaped(self.current().?) orelse self.currentAsSlice()});
self.printErrorMarker();
return ParseKeyError.InvalidFirstKeyChar;
}
while (self.current()) |c| {
if (c == '}') {
self.at += 1;
break;
}
if (!options.is_valid_key_char(c)) {
options.log_fn("Invalid character `{c}` while parsing key at ", .{c});
self.printErrorMarker();
return ParseKeyError.InvalidKeyChar;
}
self.at += 1;
} else {
options.log_fn("Unexpected end of file while parsing key for {{}} in a{s} value at ", .{quote_string});
self.printErrorMarker();
return ParseValueError.UnterminatedSubstitutionBlock;
}
const key = self.map.keys_string[start..self.at - 1];
const maybe_val = self.map.get(key);
const val = maybe_val orelse {
options.log_fn("Substitution key `{s}` not found in map; at ", .{key});
self.at = start;
self.printErrorMarker();
return ParseValueError.SubstitutionKeyNotFound;
};
try self.map.values_string.appendSlice(self.map.allocator, val);
continue :blk self.takeU9();
},
'\n' => {
self.line += 1;
self.line_start = self.at;
if (quote_char == null) {
self.trimResultEnd();
return;
}
try self.map.values_string.append(self.map.allocator, '\n');
continue :blk self.takeU9();
},
else => |c_9| { // default case
const c: u8 = @intCast(c_9);
if (quote_char) |qc| {
if (c == qc) break :blk;
} else if (c == '#') {
self.skipUpto('\n');
self.trimResultEnd();
return;
}
if (quote_char != null and c == quote_char.?) break :blk;
try self.map.values_string.append(self.map.allocator, c);
continue :blk self.takeU9();
},
}
if (quote_char == null) self.trimResultEnd();
self.skipAny(" \t\x0B\r\x0C");
const c = self.take() orelse return;
if (c == '\n') return;
if (c != '#') {
options.log_fn("Unexpected character `{c}` in a{s} value at ", .{c, quote_string});
self.printErrorMarker();
return ParseValueError.UnexpectedCharacter;
}
self.skipUpto('\n');
_ = self.take();
}
/// Combined parsing function for both runtime and comptime
fn parse(data: []const u8, allocator: std.mem.Allocator) ParseValueError!HashMap {
@setEvalBranchQuota(1000_000);
var self: @This() = .{ .map = try .init(data, 32, allocator) };
errdefer self.map.deinit();
while (try self.parseKey()) |key| {
const value_idx = self.map.values_string.items.len;
try self.parseValue();
try self.map.put(key, .{ .idx = value_idx, .len = self.map.values_string.items.len - value_idx });
}
return self.map;
}
};
}
//------
// Tests
//------
test {
std.testing.refAllDeclsRecursive(@This());
}
const ENV_TEST_STRING_1: []const u8 =
\\ # This is a comment
\\NOTHING=# This is also a comment so NOTHING should be an empty string
\\NOTHING = "" # You can override values, this is still an empty string
\\HOSTNAME = localhost
\\PORT = 8080
\\URL = http://${HOSTNAME}:${PORT}
\\FALLBACK = "${NOTHING}"
\\LITERAL = '${This Will Not Be Substitutes}'
\\ESCAPE_SEQUENCES = "\xff\n\r\v\f"
\\# 5 = 6 #this will cause an error if uncommented
\\MULTILINE_VALUE = "Multi
\\line
\\ value"
\\UNQUOTED_MULTILINE = Multi\
\\line\
\\ value # comments are allowed here but not after the `\`
;
test loadFrom {
var parsed = try loadFromData(ENV_TEST_STRING_1, std.testing.allocator, .{});
defer parsed.deinit(std.testing.allocator);
// var iter = parsed.iterator();
// while (iter.next()) |kv| {
// std.debug.print("`{s}`: `{s}`\n", .{ENV_TEST_STRING_1[kv.key_ptr.*.idx..][0..kv.key_ptr.*.len], kv.value_ptr.*});
// }
try std.testing.expectEqualStrings("", parsed.get("NOTHING").?);
try std.testing.expectEqualStrings("localhost", parsed.get("HOSTNAME").?);
try std.testing.expectEqualStrings("8080", parsed.get("PORT").?);
try std.testing.expectEqualStrings("http://localhost:8080", parsed.get("URL").?);
try std.testing.expectEqualStrings("", parsed.get("FALLBACK").?);
try std.testing.expectEqualStrings("${This Will Not Be Substitutes}", parsed.get("LITERAL").?);