-
Notifications
You must be signed in to change notification settings - Fork 0
Merge Updates for ArchiveWeb.page 0.15 #7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
PR Reviewer Guide 🔍Here are some key observations to aid the review process:
|
| if ( | ||
| !this.gzip && | ||
| this.markers.WARC_PAYLOAD && | ||
| record.warcType !== "request" && | ||
| (chunks.length === 5 || chunks.length === 4) | ||
| ) { | ||
| if (chunks.length === 5) { | ||
| yield chunks[0]; | ||
| yield chunks[1]; | ||
| yield chunks[2]; | ||
| yield this.markers.WARC_PAYLOAD; | ||
| if (chunks[3].length) { | ||
| yield chunks[3]; | ||
| yield this.markers.WARC_PAYLOAD; | ||
| } | ||
| yield chunks[4]; | ||
| } else { | ||
| yield chunks[0]; | ||
| yield chunks[1]; | ||
| yield this.markers.WARC_PAYLOAD; | ||
| if (chunks[2].length) { | ||
| yield chunks[2]; | ||
| yield this.markers.WARC_PAYLOAD; | ||
| } | ||
| yield chunks[3]; | ||
| } | ||
| } else { | ||
| for (const chunk of chunks) { | ||
| yield chunk; | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suggestion: The current code has a potential issue with marker placement. When inserting the WARC_PAYLOAD marker after chunks[2] in the 5-chunk case, you should check if chunks[3] has length before yielding the marker, similar to how you're doing it in the 4-chunk case. [possible issue, importance: 7]
| if ( | |
| !this.gzip && | |
| this.markers.WARC_PAYLOAD && | |
| record.warcType !== "request" && | |
| (chunks.length === 5 || chunks.length === 4) | |
| ) { | |
| if (chunks.length === 5) { | |
| yield chunks[0]; | |
| yield chunks[1]; | |
| yield chunks[2]; | |
| yield this.markers.WARC_PAYLOAD; | |
| if (chunks[3].length) { | |
| yield chunks[3]; | |
| yield this.markers.WARC_PAYLOAD; | |
| } | |
| yield chunks[4]; | |
| } else { | |
| yield chunks[0]; | |
| yield chunks[1]; | |
| yield this.markers.WARC_PAYLOAD; | |
| if (chunks[2].length) { | |
| yield chunks[2]; | |
| yield this.markers.WARC_PAYLOAD; | |
| } | |
| yield chunks[3]; | |
| } | |
| } else { | |
| for (const chunk of chunks) { | |
| yield chunk; | |
| } | |
| } | |
| if ( | |
| !this.gzip && | |
| this.markers.WARC_PAYLOAD && | |
| record.warcType !== "request" && | |
| (chunks.length === 5 || chunks.length === 4) | |
| ) { | |
| if (chunks.length === 5) { | |
| yield chunks[0]; | |
| yield chunks[1]; | |
| yield chunks[2]; | |
| if (chunks[3].length) { | |
| yield this.markers.WARC_PAYLOAD; | |
| yield chunks[3]; | |
| yield this.markers.WARC_PAYLOAD; | |
| } | |
| yield chunks[4]; | |
| } else { | |
| yield chunks[0]; | |
| yield chunks[1]; | |
| yield this.markers.WARC_PAYLOAD; | |
| if (chunks[2].length) { | |
| yield chunks[2]; | |
| yield this.markers.WARC_PAYLOAD; | |
| } | |
| yield chunks[3]; | |
| } | |
| } else { | |
| for (const chunk of chunks) { | |
| yield chunk; | |
| } | |
| } |
| async *emitRecord( | ||
| record: WARCRecord, | ||
| doDigest: boolean, | ||
| output: { length: number; digest?: string }, | ||
| ) { | ||
| const opts = { gzip: this.gzip, digest: this.digestOpts }; | ||
| const s = new WARCSerializer(record, opts); | ||
|
|
||
| const chunks = []; | ||
| if (doDigest) { | ||
| this.recordHasher!.init(); | ||
| } | ||
|
|
||
| for await (const chunk of s) { | ||
| if (doDigest) { | ||
| this.recordHasher!.update(chunk as Uint8Array); | ||
| } | ||
| chunks.push(chunk); | ||
| output.length += chunk.length; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suggestion: The code assumes that chunk is a Uint8Array when updating the recordHasher, but it doesn't verify this. Since the type signature of the generator allows for both string and Uint8Array, you should ensure proper type handling before updating the hasher to prevent potential runtime errors. [possible issue, importance: 8]
| async *emitRecord( | |
| record: WARCRecord, | |
| doDigest: boolean, | |
| output: { length: number; digest?: string }, | |
| ) { | |
| const opts = { gzip: this.gzip, digest: this.digestOpts }; | |
| const s = new WARCSerializer(record, opts); | |
| const chunks = []; | |
| if (doDigest) { | |
| this.recordHasher!.init(); | |
| } | |
| for await (const chunk of s) { | |
| if (doDigest) { | |
| this.recordHasher!.update(chunk as Uint8Array); | |
| } | |
| chunks.push(chunk); | |
| output.length += chunk.length; | |
| } | |
| async *emitRecord( | |
| record: WARCRecord, | |
| doDigest: boolean, | |
| output: { length: number; digest?: string }, | |
| ) { | |
| const opts = { gzip: this.gzip, digest: this.digestOpts }; | |
| const s = new WARCSerializer(record, opts); | |
| const chunks = []; | |
| if (doDigest) { | |
| this.recordHasher!.init(); | |
| } | |
| for await (const chunk of s) { | |
| if (doDigest) { | |
| if (typeof chunk === "string") { | |
| this.recordHasher!.update(encoder.encode(chunk)); | |
| } else { | |
| this.recordHasher!.update(chunk as Uint8Array); | |
| } | |
| } | |
| chunks.push(chunk); | |
| output.length += chunk.length; | |
| } |
| async createWARCRecord(resource: DLResourceEntry) { | ||
| let url = resource.url; | ||
| const date = new Date(resource.ts).toISOString(); | ||
| resource.timestamp = getTSMillis(date); | ||
| const httpHeaders = resource.respHeaders || {}; | ||
| const warcVersion = this.warcVersion; | ||
|
|
||
| const pageId = resource.pageId; | ||
|
|
||
| let payload: Uint8Array | null | undefined = resource.payload; | ||
| let type: "response" | "request" | "resource" | "revisit"; | ||
|
|
||
| let refersToUrl, refersToDate; | ||
| let refersToDigest; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suggestion: The type variable is declared but not initialized with a default value. If none of the conditions in the function that set this variable are met, it will remain undefined, potentially causing runtime errors when used later in the function. [possible issue, importance: 6]
| async createWARCRecord(resource: DLResourceEntry) { | |
| let url = resource.url; | |
| const date = new Date(resource.ts).toISOString(); | |
| resource.timestamp = getTSMillis(date); | |
| const httpHeaders = resource.respHeaders || {}; | |
| const warcVersion = this.warcVersion; | |
| const pageId = resource.pageId; | |
| let payload: Uint8Array | null | undefined = resource.payload; | |
| let type: "response" | "request" | "resource" | "revisit"; | |
| let refersToUrl, refersToDate; | |
| let refersToDigest; | |
| async createWARCRecord(resource: DLResourceEntry) { | |
| let url = resource.url; | |
| const date = new Date(resource.ts).toISOString(); | |
| resource.timestamp = getTSMillis(date); | |
| const httpHeaders = resource.respHeaders || {}; | |
| const warcVersion = this.warcVersion; | |
| const pageId = resource.pageId; | |
| let payload: Uint8Array | null | undefined = resource.payload; | |
| let type: "response" | "request" | "resource" | "revisit" = "response"; // Default value |
| const { coll } = await this.prepareColl(collId, request); | ||
|
|
||
| if (await ipfsRemove(coll)) { | ||
| await this.collections.updateMetadata(coll.name, coll.config.metadata); | ||
| return { removed: true }; | ||
| } | ||
|
|
||
| return { removed: false }; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suggestion: The method is missing error handling. If ipfsRemove(coll) throws an exception, it will propagate up and potentially crash the service worker. Add a try-catch block to handle potential errors. [possible issue, importance: 7]
| const { coll } = await this.prepareColl(collId, request); | |
| if (await ipfsRemove(coll)) { | |
| await this.collections.updateMetadata(coll.name, coll.config.metadata); | |
| return { removed: true }; | |
| } | |
| return { removed: false }; | |
| } | |
| async ipfsRemove(request: Request, collId: string) { | |
| const { coll } = await this.prepareColl(collId, request); | |
| try { | |
| if (await ipfsRemove(coll)) { | |
| await this.collections.updateMetadata(coll.name, coll.config.metadata); | |
| return { removed: true }; | |
| } | |
| return { removed: false }; | |
| } catch (error) { | |
| console.error("Error removing IPFS:", error); | |
| return { removed: false, error: "ipfs_remove_failed" }; | |
| } | |
| } |
| if (range && !range.startsWith("bytes 0-")) { | ||
| console.log("skip range request: " + range); | ||
| return; | ||
| } | ||
|
|
||
| const status = response.status; | ||
| const statusText = response.statusText; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suggestion: The current range check is redundant since you've already checked if the range doesn't start with "bytes 0-" earlier in the function. This creates a potential inconsistency where a request might pass the first check but fail the second, leading to confusing behavior. [general, importance: 5]
| if (range && !range.startsWith("bytes 0-")) { | |
| console.log("skip range request: " + range); | |
| return; | |
| } | |
| const status = response.status; | |
| const statusText = response.statusText; | |
| if (range) { | |
| // Range was already checked at the beginning of the function | |
| // This ensures we only process ranges starting with "bytes 0-" | |
| const expectedRange = `bytes 0-${payload.length - 1}/${payload.length}`; | |
| if (range !== expectedRange) { | |
| console.log("skip range request: " + range); | |
| return; | |
| } | |
| } |
| const width = 1920; | ||
| const height = 1080; | ||
|
|
||
| // @ts-expect-error: ignore param | ||
| await this.send("Emulation.setDeviceMetricsOverride", {width, height, deviceScaleFactor: 0, mobile: false}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suggestion: The deviceScaleFactor is set to 0, which is an invalid value. According to Chrome DevTools Protocol, this should be a positive number where 0 is not allowed. This could lead to unexpected rendering issues in screenshots. [possible issue, importance: 8]
| const width = 1920; | |
| const height = 1080; | |
| // @ts-expect-error: ignore param | |
| await this.send("Emulation.setDeviceMetricsOverride", {width, height, deviceScaleFactor: 0, mobile: false}); | |
| async saveScreenshot(pageInfo: any) { | |
| // View Screenshot | |
| const width = 1920; | |
| const height = 1080; | |
| // @ts-expect-error: ignore param | |
| await this.send("Emulation.setDeviceMetricsOverride", {width, height, deviceScaleFactor: 1, mobile: false}); | |
| // @ts-expect-error: ignore param | |
| const resp = await this.send("Page.captureScreenshot", {format: "png"}); | |
| const payload = Buffer.from(resp.data, "base64"); | |
| const blob = new Blob([payload], {type: "image/png"}); | |
| await this.send("Emulation.clearDeviceMetricsOverride"); | |
| const mime = "image/png"; |
| const ecdsaImportParams = { | ||
| name: "ECDSA", | ||
| namedCurve: "P-384", | ||
| }; | ||
|
|
||
| const extractable = true; | ||
| const usage = ["sign", "verify"] as KeyUsage[]; | ||
|
|
||
| const ecdsaSignParams = { | ||
| name: "ECDSA", | ||
| hash: "SHA-256", | ||
| }; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suggestion: The code uses "SHA-256" for the hash algorithm with ECDSA P-384, which is a mismatch. For P-384 curves, it's recommended to use SHA-384 to match the security level of the curve. Using a weaker hash algorithm reduces the overall security of the signature. [security, importance: 9]
| const ecdsaImportParams = { | |
| name: "ECDSA", | |
| namedCurve: "P-384", | |
| }; | |
| const extractable = true; | |
| const usage = ["sign", "verify"] as KeyUsage[]; | |
| const ecdsaSignParams = { | |
| name: "ECDSA", | |
| hash: "SHA-256", | |
| }; | |
| async sign(string: string, created: string): Promise<DataSignature> { | |
| let keyPair: CryptoKeyPair; | |
| let keys = await this.loadKeys(); | |
| const ecdsaImportParams = { | |
| name: "ECDSA", | |
| namedCurve: "P-384", | |
| }; | |
| const extractable = true; | |
| const usage = ["sign", "verify"] as KeyUsage[]; | |
| const ecdsaSignParams = { | |
| name: "ECDSA", | |
| hash: "SHA-384", | |
| }; |
PR Type
Enhancement
Description
Added screenshot and PDF capture capabilities
Added Flash content archiving via Ruffle
Fixed TextEncoder initialization
Updated UI for archiving settings
Improved IPFS integration
Changes walkthrough 📝
8 files
Update autopilot UI color and textAdd screenshot, PDF, and Flash archiving capabilitiesAdd service worker API for WACZ file handlingAdd WACZ file generation and download functionalityAdd IPFS integration utilitiesAdd cryptographic signing capabilities for archivesAdd recording proxy for capturing web contentUpdate UI with new archiving options1 files
Fix TypeScript formatting2 files
Add TypeScript declarations for global constantsUpdate imports for new module structure1 files
Fix download URL generation1 files
Bump version to 0.15.0 and update dependencies