Improved CR preservation logic - now based on entropy

This commit is contained in:
n1474335 2019-12-13 14:45:13 +00:00
parent 813a151524
commit fd390bc61b
4 changed files with 69 additions and 32 deletions

View File

@ -591,6 +591,44 @@ class Utils {
return utf8 ? Utils.byteArrayToUtf8(arr) : Utils.byteArrayToChars(arr);
}
/**
* Calculates the Shannon entropy for a given set of data.
*
* @param {Uint8Array|ArrayBuffer} input
* @returns {number}
*/
static calculateShannonEntropy(data) {
if (data instanceof ArrayBuffer) {
data = new Uint8Array(data);
}
const prob = [],
occurrences = new Array(256).fill(0);
// Count occurrences of each byte in the input
let i;
for (i = 0; i < data.length; i++) {
occurrences[data[i]]++;
}
// Store probability list
for (i = 0; i < occurrences.length; i++) {
if (occurrences[i] > 0) {
prob.push(occurrences[i] / data.length);
}
}
// Calculate Shannon entropy
let entropy = 0,
p;
for (i = 0; i < prob.length; i++) {
p = prob[i];
entropy += p * Math.log(p) / Math.log(2);
}
return -entropy;
}
/**
* Parses CSV data and returns it as a two dimensional array or strings.

View File

@ -491,6 +491,15 @@
</select>
</div>
<div class="form-group option-item">
<label for="preserveCR" class="bmd-label-floating"> Preserve carriage returns (0x0d)</label>
<select class="form-control" option="preserveCR" id="preserveCR" data-toggle="tooltip" data-placement="bottom" data-offset="-10%" data-html="true" title="HTML textareas don't support carriage returns, so if we want to preserve them in our input, we have to disable editing.<br><br>The default option is to only do this for high-entropy inputs, but you can force the choice using this dropdown.">
<option value="entropy">For high-entropy inputs</option>
<option value="always">Always</option>
<option value="never">Never</option>
</select>
</div>
<div class="form-group option-item">
<label for="errorTimeout" class="bmd-label-floating">Operation error timeout in ms (0 for never)</label>
<input type="number" class="form-control" option="errorTimeout" id="errorTimeout">
@ -575,13 +584,6 @@
Keep the current tab in sync between the input and output
</label>
</div>
<div class="checkbox option-item">
<label for="preserveCR" data-toggle="tooltip" data-placement="right" data-html="true" title="As HTML textareas don't support carriage returns, editing input must be turned off to preserve them.<br><br>When this option is enabled, editing is disabled for pasted text that contains carriage returns. Otherwise, editing will remain enabled but carriage returns will not be preserved.">
<input type="checkbox" option="preserveCR" id="preserveCR">
Preserve carriage returns when pasting an input
</label>
</div>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-secondary" id="reset-options">Reset options to default</button>

View File

@ -54,8 +54,7 @@ function main() {
autoMagic: true,
imagePreview: true,
syncTabs: true,
preserveCR: true,
userSetCR: false
preserveCR: "entropy"
};
document.removeEventListener("DOMContentLoaded", main, false);

View File

@ -860,31 +860,29 @@ class InputWaiter {
if (input.indexOf("\r") < 0) return false;
const optionsStr = "This behaviour can be changed in the <a href='#' onclick='document.getElementById(\"options\").click()'>Options pane</a>";
if (!this.app.options.userSetCR) {
// User has not set a CR preference yet
let preserve = await new Promise(function(resolve, reject) {
this.app.confirm(
"Carriage Return Detected",
"A <a href='https://wikipedia.org/wiki/Carriage_return'>carriage return</a> (<code>\\r</code>, <code>0x0d</code>) was detected in your input. As HTML textareas <a href='https://html.spec.whatwg.org/multipage/form-elements.html#the-textarea-element'>can't display carriage returns</a>, editing must be turned off to preserve them. <br>Alternatively, you can enable editing but your carriage returns will not be preserved.<br><br>This preference will be saved but can be toggled in the options pane.",
"Preserve Carriage Returns",
"Enable Editing", resolve, this);
}.bind(this));
if (preserve === undefined) {
// The confirm pane was closed without picking a specific choice
this.app.alert(`Not preserving carriage returns.\n${optionsStr}`, 5000);
preserve = false;
}
this.manager.options.updateOption("preserveCR", preserve);
this.manager.options.updateOption("userSetCR", true);
} else {
if (this.app.options.preserveCR) {
this.app.alert(`A carriage return (\\r, 0x0d) was detected in your input, so editing has been disabled to preserve it.<br>${optionsStr}`, 10000);
} else {
this.app.alert(`A carriage return (\\r, 0x0d) was detected in your input. Editing is remaining enabled, but carriage returns will not be preserved.<br>${optionsStr}`, 10000);
}
const preserveStr = `A carriage return (\\r, 0x0d) was detected in your input. To preserve it, editing has been disabled.<br>${optionsStr}`;
const dontPreserveStr = `A carriage return (\\r, 0x0d) was detected in your input. It has not been preserved.<br>${optionsStr}`;
switch (this.app.options.preserveCR) {
case "always":
this.app.alert(preserveStr, 6000);
return true;
case "never":
this.app.alert(dontPreserveStr, 6000);
return false;
}
return this.app.options.preserveCR;
// Only preserve for high-entropy inputs
const data = Utils.strToArrayBuffer(input);
const entropy = Utils.calculateShannonEntropy(data);
if (entropy > 6) {
this.app.alert(preserveStr, 6000);
return true;
}
this.app.alert(dontPreserveStr, 6000);
return false;
}
/**