Conversation
| Assert.Single(document.Chapters); | ||
| Assert.Single(document.Chapters[^1].Sections); | ||
| Assert.Single(document.Chapters[^1].Sections); | ||
| Assert.IsType<Paragraph>(document.Chapters[^1].Sections[^1].Elements[^1]); | ||
| if (document.Chapters[^1].Sections[^1].Elements[^1] is Paragraph paragraph) |
There was a problem hiding this comment.
| Assert.Single(document.Chapters); | |
| Assert.Single(document.Chapters[^1].Sections); | |
| Assert.Single(document.Chapters[^1].Sections); | |
| Assert.IsType<Paragraph>(document.Chapters[^1].Sections[^1].Elements[^1]); | |
| if (document.Chapters[^1].Sections[^1].Elements[^1] is Paragraph paragraph) | |
| var chapter = Assert.Single(document.Chapters); | |
| var section = Assert.Single(chapter.Sections); | |
| var paragraph = Assert.IsType<Paragraph>(section.Elements[^1]); |
| Assert.Single(document.Chapters); | ||
| Assert.Single(document.Chapters[^1].Sections); | ||
| Assert.Equal(expectedParagraphs.Count, document.Chapters[^1].Sections[^1].Elements.Count); | ||
| foreach ((var expectedParagraph, var actualElement) in expectedParagraphs.Zip(document.Chapters[^1].Sections[^1].Elements)) |
| } | ||
|
|
||
| [Theory] | ||
| [MemberData(nameof(ProcessChildrenlayout1TestCases))] |
There was a problem hiding this comment.
html, expectedPragraphText, expectedScriptText, expectedClassNameにすればInlineDataでいけると思います
There was a problem hiding this comment.
ProcessChildrenlayout2TestCases も InlineData にした方が良いですか?
| { | ||
| get { return new EpubDocument("", "", "", Guid.NewGuid()) { Chapters = [new Chapter() { Sections = [new Section("") { Elements = [new Paragraph()] }] }] }; } | ||
| } |
There was a problem hiding this comment.
ラムダ式を使うと簡単にかけます
| { | |
| get { return new EpubDocument("", "", "", Guid.NewGuid()) { Chapters = [new Chapter() { Sections = [new Section("") { Elements = [new Paragraph()] }] }] }; } | |
| } | |
| => new("", "", "", Guid.NewGuid()) | |
| { | |
| Chapters = [ | |
| new() { | |
| Sections = [new("") { Elements = [new Paragraph()] }] | |
| } | |
| ] | |
| }; |
| var section = Assert.Single(chapter.Sections); | ||
| Assert.Equal(expectedParagraphs.Count, document.Chapters[^1].Sections[^1].Elements.Count); | ||
| Assert.All(expectedParagraphs.Zip(document.Chapters[^1].Sections[^1].Elements), v => |
There was a problem hiding this comment.
document.Chapters[^1].Sections[^1] == sectionだと思います
There was a problem hiding this comment.
classのメンバの値が等しいことを == で確認できますか?
There was a problem hiding this comment.
そもそもsectionsの要素がsectionだけであることをAssert.Singleで示しているのでいちいちインデクサを使う必要はないということです。
| internal class httpClientFactory : IHttpClientFactory | ||
| { | ||
| public HttpClient CreateClient(string name) | ||
| { | ||
| return httpClient; | ||
| } | ||
|
|
||
| private static readonly HttpClient httpClient = new HttpClient(); | ||
|
|
||
| } |
| var config = Configuration.Default.WithDefaultLoader(); | ||
| using var context = BrowsingContext.New(config); | ||
| var doc = await context.OpenAsync(request => request.Content(html)); | ||
| var mainText = doc.DocumentElement.LastElementChild?.LastElementChild as IHtmlDivElement; | ||
| if (mainText == null) | ||
| Assert.Fail(); |
There was a problem hiding this comment.
こんな感じはどうでしょうか?ここでは準備段階なのでnullになるかどうかはこちらの入力値で変更できます。よってnullチェックは必要ありません。
var parser = new HtmlParser();
var docs = parser.ParseDocument(html).Body!.Children.Single();| // レイアウト2.1 1行だけの字下げ | ||
| (@"<div class=""jisage_3"" style=""margin-left: 3em"">text<br /></div><br>", [new Paragraph() { Text = "text", ClassName = "jisage_3", ScriptLine = new ScriptLine("text", "", "") }], [("jisage", (1, 3))]), | ||
| // レイアウト2.2 ブロックでの字下げ | ||
| (@"<div class=""jisage_3"" style=""margin-left: 3em"">text1<br />text2<br /></div><br>", [new Paragraph() { Text = "text1", ClassName = "jisage_3", ScriptLine = new ScriptLine("text1", "", "") }, new Paragraph() { Text = "text2", ClassName = "jisage_3", ScriptLine = new ScriptLine("text2", "", "") },], [("jisage", (1, 3))]), | ||
| // レイアウト2.3 凹凸の複雑な字下げ | ||
| (@"<div class=""burasage"" style=""margin-left: 3em; text_indent: -1em;"">Long Text</div>", [new Paragraph() { Text = "Long Text", ClassName = "jisage_3 text_indent_-1" }], [("jisage", (1, 3)), ("text_indent", (-1, 0))]), | ||
| // レイアウト2.4 は特定の書き方について述べていないので省略。 | ||
| // レイアウト2.5 地付き | ||
| (@"<div class=""chitsuki_0"" style=""text-align:right; margin-right: 0em"">text</div>", [new Paragraph() { Text = "text", ClassName = "chitsuki_0", ScriptLine = new ScriptLine("text", "", "") }], [("chitsuki", (0, 0))]), | ||
|
|
||
|
|
||
| // </div>の後の<br />がないパターン | ||
| (@"<div class=""jisage_3"" style=""margin-left: 3em"">text<br /></div>", [new Paragraph() { Text = "text", ClassName = "jisage_3", ScriptLine = new ScriptLine("text", "", "") }], [("jisage", (1, 3))]), | ||
| // </div>の前の<br />がないパターン | ||
| (@"<div class=""burasage"" style=""margin-left: 1em; text_indent: -1em;"">text</div>", [new Paragraph() { Text = "text", ClassName = "jisage_3 text_indent_-1", ScriptLine = new ScriptLine("text", "", "") }], [("jisage", (1, 3)), ("text_indent", (-1, 0))]), |
There was a problem hiding this comment.
見づらいので改行を増やしてください。new()を使えばより短くできると思います。
| var config = Configuration.Default.WithDefaultLoader(); | ||
| using var context = BrowsingContext.New(config); | ||
| var doc = await context.OpenAsync(request => request.Content(html)); | ||
| var mainText = doc.QuerySelector(".main_text") as IHtmlDivElement; | ||
| if (mainText == null) | ||
| Assert.Fail(); | ||
| var document = EmptySingleParagraph; | ||
| _scrapingAozoraService._Classes().Clear(); |
|
|
||
| [Theory] | ||
| [MemberData(nameof(ProcessChildrenlayout2TestCases))] | ||
| public async void ProcessChildrenLayout2Test(string html, IReadOnlyCollection<Paragraph> expectedParagraphs, IEnumerable<(string key, (int min, int max) value)> expectedDictionary) |
There was a problem hiding this comment.
確かに厳密にはReadonlyにするほうが良いですが、範囲が限定的なので配列にしたほうが見やすいと思います。
| { | ||
| using var context = BrowsingContext.New(Configuration.Default); | ||
| using var doc = await context.OpenAsync(req => req.Content(input)); | ||
| Assert.NotNull(doc.ParentElement); |
| /// </summary> | ||
| /// <param name="document">書き込むEpubDocument</param> | ||
| /// <param name="mainText">class = "main_text" なdiv要素</param> | ||
| internal void ProcessMainText(EpubDocument document, IHtmlDivElement mainText) |
There was a problem hiding this comment.
divである必要はありますか?キャストが増えてしまう割に対した制約が増えないのでいらないと思います
| /// <param name="element">処理を行う要素</param> | ||
| /// <param name="appliedClasses">適用されるclassのリスト</param> | ||
| /// <param name="scrapingInfo"></param> | ||
| internal void ProcessChildren(EpubDocument document, IElement element, string appliedClasses, ref int headingId, SplittedLineBuilder paragraphLineBuilder, SplittedLineBuilder scriptLineLineBuilder, Dictionary<string, (int min, int max)> classes) |
No description provided.