diff --git a/DustMite/dustmite.d b/DustMite/dustmite.d index ec7abf262..e8876b7f9 100644 --- a/DustMite/dustmite.d +++ b/DustMite/dustmite.d @@ -24,7 +24,7 @@ import std.process; import std.random; import std.range; import std.regex; -import std.stdio; +import std.stdio : stdout, stderr, File; import std.string; import std.typecons; @@ -33,26 +33,33 @@ import splitter; alias Splitter = splitter.Splitter; // Issue 314 workarounds -alias std.string.join join; -alias std.string.startsWith startsWith; +alias join = std.string.join; +alias startsWith = std.algorithm.searching.startsWith; -string dir, resultDir, tester, globalCache; -string dirSuffix(string suffix) { return (dir.absolutePath().buildNormalizedPath() ~ "." ~ suffix).relativePath(); } +string dir, resultDir, tmpDir, tester, globalCache; +string dirSuffix(string suffix, Flag!q{temp} temp) +{ + return ( + (temp && tmpDir ? tmpDir.buildPath(dir.baseName) : dir) + .absolutePath().buildNormalizedPath() ~ "." ~ suffix + ).relativePath(); +} size_t maxBreadth; size_t origDescendants; int tests, maxSteps = -1; bool foundAnything; bool noSave, trace, noRedirect, doDump, whiteout; +RemoveRule[] rejectRules; string strategy = "inbreadth"; struct Times { StopWatch total, load, testSave, resultSave, apply, lookaheadApply, lookaheadWaitThread, lookaheadWaitProcess, test, clean, globalCache, misc; } Times times; static this() { times.total.start(); times.misc.start(); } -void measure(string what)(scope void delegate() p) +T measure(string what, T)(scope T delegate() p) { times.misc.stop(); mixin("times."~what~".start();"); - p(); - mixin("times."~what~".stop();"); times.misc.start(); + scope(exit) { mixin("times."~what~".stop();"); times.misc.start(); } + return p(); } struct Reduction @@ -140,29 +147,31 @@ struct RemoveRule { Regex!char regexp; string shellGlob; bool remove; } int main(string[] args) { - bool force, dumpHtml, showTimes, stripComments, obfuscate, fuzz, keepLength, showHelp, showVersion, noOptimize, inPlace; + bool force, dumpHtml, dumpJson, readJson, showTimes, stripComments, obfuscate, fuzz, keepLength, showHelp, openWiki, showVersion, noOptimize, inPlace; string coverageDir; RemoveRule[] removeRules; string[] splitRules; uint lookaheadCount, tabWidth = 8; - args = args.filter!( - (arg) - { - if (arg.startsWith("-j")) + args = args + .filter!((string arg) { + if (arg.skipOver("-j")) { - arg = arg[2..$]; lookaheadCount = arg.length ? arg.to!uint : totalCPUs; return false; } return true; - }).array(); + }) + // Work around getopt's inability to handle "-" in 2.080.0 + .map!((string arg) => arg == "-" ? "\0" ~ arg : arg) + .array(); getopt(args, "force", &force, "reduceonly|reduce-only", (string opt, string value) { removeRules ~= RemoveRule(Regex!char.init, value, true); }, "remove" , (string opt, string value) { removeRules ~= RemoveRule(regex(value, "mg"), null, true); }, "noremove|no-remove" , (string opt, string value) { removeRules ~= RemoveRule(regex(value, "mg"), null, false); }, + "reject" , (string opt, string value) { rejectRules ~= RemoveRule(regex(value, "mg"), null, true); }, "strip-comments", &stripComments, "whiteout|white-out", &whiteout, "coverage", &coverageDir, @@ -173,6 +182,7 @@ int main(string[] args) "split", &splitRules, "dump", &doDump, "dump-html", &dumpHtml, + "dump-json", &dumpJson, "times", &showTimes, "noredirect|no-redirect", &noRedirect, "cache", &globalCache, // for research @@ -180,11 +190,16 @@ int main(string[] args) "nosave|no-save", &noSave, // for research "nooptimize|no-optimize", &noOptimize, // for research "tab-width", &tabWidth, + "temp-dir", &tmpDir, "max-steps", &maxSteps, // for research / benchmarking "i|in-place", &inPlace, + "json", &readJson, "h|help", &showHelp, + "man", &openWiki, "V|version", &showVersion, ); + foreach (ref arg; args) + arg.skipOver("\0"); // Undo getopt hack if (showVersion) { @@ -195,7 +210,14 @@ int main(string[] args) enum source = import("source"); else enum source = "upstream"; - writeln("DustMite build ", __DATE__, " (", source, "), built with ", __VENDOR__, " ", __VERSION__); + stdout.writeln("DustMite build ", __DATE__, " (", source, "), built with ", __VENDOR__, " ", __VERSION__); + if (args.length == 1) + return 0; + } + + if (openWiki) + { + browse("https://github.com/CyberShadow/DustMite/wiki"); if (args.length == 1) return 0; } @@ -204,8 +226,7 @@ int main(string[] args) { stderr.writef(q"EOS Usage: %s [OPTION]... PATH TESTER -PATH should be a directory containing a clean copy of the file-set to reduce. -A file path can also be specified. NAME.EXT will be treated like NAME/NAME.EXT. +PATH should contain a clean copy of the file-set to reduce. TESTER should be a shell command which returns 0 for a correct reduction, and anything else otherwise. Supported options: @@ -216,6 +237,8 @@ Supported options: (may be used multiple times) --no-remove REGEXP Do not reduce blocks containing REGEXP (may be used multiple times) + --reject REGEXP Reject reductions which cause REGEXP to occur in output + (may be used multiple times) --strip-comments Attempt to remove comments from source code --white-out Replace deleted text with spaces to preserve line numbers --coverage DIR Load .lst files corresponding to source files from DIR @@ -227,7 +250,9 @@ Supported options: --split MASK:MODE Parse and reduce files specified by MASK using the given splitter. Can be repeated. MODE must be one of: %-(%s, %) + --json Load PATH as a JSON file (same syntax as --dump-json) --no-redirect Don't redirect stdout/stderr streams of test command + --temp-dir Write and run reduction candidates in this directory -j[N] Use N look-ahead processes (%d by default) EOS", args[0], splitterNames, totalCPUs); @@ -242,10 +267,12 @@ EOS"); stderr.write(q"EOS -h, --help Show this message Less interesting options: + --man Launch the project wiki web page in a web browser -V, --version Show program version --strategy STRAT Set strategy (careful/lookback/pingpong/indepth/inbreadth) - --dump Dump parsed tree to DIR.dump file - --dump-html Dump parsed tree to DIR.html file + --dump Dump parsed tree to PATH.dump file + --dump-html Dump parsed tree to PATH.html file + --dump-json Dump parsed tree to PATH.json file --times Display verbose spent time breakdown --cache DIR Use DIR as persistent disk cache (in addition to memory cache) @@ -278,23 +305,34 @@ EOS"); bool isDotName(string fn) { return fn.startsWith(".") && !(fn=="." || fn==".."); } - bool suspiciousFilesFound; - if (!force && isDir(dir)) + if (!readJson && !force && dir.exists && dir.isDir()) + { + bool suspiciousFilesFound; foreach (string path; dirEntries(dir, SpanMode.breadth)) if (isDotName(baseName(path)) || isDotName(baseName(dirName(path))) || extension(path)==".o" || extension(path)==".obj" || extension(path)==".exe") { stderr.writeln("Warning: Suspicious file found: ", path); suspiciousFilesFound = true; } - if (suspiciousFilesFound) - stderr.writeln("You should use a clean copy of the source tree.\nIf it was your intention to include this file in the file-set to be reduced,\nyou can use --force to silence this message."); + if (suspiciousFilesFound) + stderr.writeln("You should use a clean copy of the source tree.\nIf it was your intention to include this file in the file-set to be reduced,\nyou can use --force to silence this message."); + } ParseRule parseSplitRule(string rule) { auto p = rule.lastIndexOf(':'); - enforce(p > 0, "Invalid parse rule: " ~ rule); - auto pattern = rule[0..p]; - auto splitterName = rule[p+1..$]; + string pattern, splitterName; + if (p < 0) + { + pattern = "*"; + splitterName = rule; + } + else + { + enforce(p > 0, "Invalid parse rule: " ~ rule); + pattern = rule[0 .. p]; + splitterName = rule[p + 1 .. $]; + } auto splitterIndex = splitterNames.countUntil(splitterName); enforce(splitterIndex >= 0, "Unknown splitter: " ~ splitterName); return ParseRule(pattern, cast(Splitter)splitterIndex); @@ -304,7 +342,10 @@ EOS"); ParseOptions parseOptions; parseOptions.stripComments = stripComments; - parseOptions.mode = obfuscate ? ParseOptions.Mode.words : ParseOptions.Mode.source; + parseOptions.mode = + readJson ? ParseOptions.Mode.json : + obfuscate ? ParseOptions.Mode.words : + ParseOptions.Mode.source; parseOptions.rules = splitRules.map!parseSplitRule().array(); parseOptions.tabWidth = tabWidth; measure!"load"({root = loadFiles(dir, parseOptions);}); @@ -324,13 +365,15 @@ EOS"); resetProgress(root); if (doDump) - dumpSet(root, dirSuffix("dump")); + dumpSet(root, dirSuffix("dump", No.temp)); if (dumpHtml) - dumpToHtml(root, dirSuffix("html")); + dumpToHtml(root, dirSuffix("html", No.temp)); + if (dumpJson) + dumpToJson(root, dirSuffix("json", No.temp)); if (tester is null) { - writeln("No tester specified, exiting"); + stderr.writeln("No tester specified, exiting"); return 0; } @@ -338,10 +381,10 @@ EOS"); resultDir = dir; else { - resultDir = dirSuffix("reduced"); + resultDir = dirSuffix("reduced", No.temp); if (resultDir.exists) { - writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#result-directory-already-exists"); + stderr.writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#result-directory-already-exists"); throw new Exception("Result directory already exists"); } } @@ -355,20 +398,20 @@ EOS"); version (Posix) { if (testerFile.exists && (testerFile.getAttributes() & octal!111) == 0) - writeln("Hint: test program seems to be a non-executable file, try: chmod +x " ~ testerFile.escapeShellFileName()); + stderr.writeln("Hint: test program seems to be a non-executable file, try: chmod +x " ~ testerFile.escapeShellFileName()); } if (!testerFile.exists && tester.exists) - writeln("Hint: test program path should be relative to the source directory, try " ~ + stderr.writeln("Hint: test program path should be relative to the source directory, try " ~ tester.absolutePath.relativePath(dir.absolutePath).escapeShellFileName() ~ " instead of " ~ tester.escapeShellFileName()); if (!noRedirect) - writeln("Hint: use --no-redirect to see test script output"); - writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#initial-test-fails"); + stderr.writeln("Hint: use --no-redirect to see test script output"); + stderr.writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#initial-test-fails"); throw new Exception("Initial test fails: " ~ nullResult.reason); } } - lookaheadProcesses = new Lookahead[lookaheadCount]; + lookaheadProcessSlots = new LookaheadSlot[lookaheadCount]; foundAnything = false; string resultAdjective; @@ -397,20 +440,20 @@ EOS"); { if (noSave) measure!"resultSave"({safeSave(root, resultDir);}); - writefln("Done in %s tests and %s; %s version is in %s", tests, duration, resultAdjective, resultDir); + stderr.writefln("Done in %s tests and %s; %s version is in %s", tests, duration, resultAdjective, resultDir); } else { - writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#reduced-to-empty-set"); - writefln("Done in %s tests and %s; %s to empty set", tests, duration, resultAdjective); + stderr.writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#reduced-to-empty-set"); + stderr.writefln("Done in %s tests and %s; %s to empty set", tests, duration, resultAdjective); } } else - writefln("Done in %s tests and %s; no reductions found", tests, duration); + stderr.writefln("Done in %s tests and %s; no reductions found", tests, duration); if (showTimes) foreach (i, t; times.tupleof) - writefln("%s: %s", times.tupleof[i].stringof, times.tupleof[i].peek()); + stderr.writefln("%s: %s", times.tupleof[i].stringof, times.tupleof[i].peek()); return 0; } @@ -494,7 +537,8 @@ void recalculate(Entity root) e.deadHash.put(c.isWhite ? c : ' '); } - putString(e.filename); + if (e.file) + putString(e.file.name); putString(e.head); void addDependents(R)(R range, bool fresh) @@ -602,7 +646,7 @@ void recalculate(Entity root) return; } - inFile |= e.isFile; + inFile |= e.file !is null; assert(e.hash.length == e.deadHash.length); @@ -618,7 +662,8 @@ void recalculate(Entity root) auto start = pos; - putString(e.filename); + if (e.file) + putString(e.file.name); putString(e.head); foreach (c; e.children) passWO(c, inFile); @@ -778,7 +823,7 @@ struct ReductionIterator // Try next reduction type type = Reduction.Type.Concat; - if (e.isFile) + if (e.file) return; // Try this else { @@ -971,7 +1016,7 @@ bool nextAddress(ref size_t[] address, Entity root, bool descend) class LevelStrategy : IterativeStrategy { - bool levelChanged; + bool levelChanged; // We found some reductions while traversing this level bool invalid; override int getDepth() { return cast(int)address.length; } @@ -1084,22 +1129,18 @@ final class LookbackStrategy : LevelStrategy if (!nextInLevel()) { // End of level - if (levelChanged) - { - setLevel(currentLevel ? currentLevel - 1 : 0); - } - else - if (setLevel(maxLevel + 1)) - { - maxLevel = currentLevel; - } - else + auto nextLevel = levelChanged + ? currentLevel ? currentLevel - 1 : 0 + : maxLevel + 1; + if (!setLevel(nextLevel)) { if (iterationChanged) nextIteration(); else done = true; } + else + maxLevel = max(maxLevel, currentLevel); } } } @@ -1119,12 +1160,10 @@ final class PingPongStrategy : LevelStrategy if (!nextInLevel()) { // End of level - if (levelChanged) - { - setLevel(currentLevel ? currentLevel - 1 : 0); - } - else - if (!setLevel(currentLevel + 1)) + auto nextLevel = levelChanged + ? currentLevel ? currentLevel - 1 : 0 + : currentLevel + 1; + if (!setLevel(nextLevel)) { if (iterationChanged) nextIteration(); @@ -1202,12 +1241,12 @@ void reduceByStrategy(Strategy strategy) if (lastIteration != strategy.getIteration()) { - writefln("############### ITERATION %d ################", strategy.getIteration()); + stderr.writefln("############### ITERATION %d ################", strategy.getIteration()); lastIteration = strategy.getIteration(); } if (lastDepth != strategy.getDepth()) { - writefln("============= Depth %d =============", strategy.getDepth()); + stderr.writefln("============= Depth %d =============", strategy.getDepth()); lastDepth = strategy.getDepth(); } if (lastProgressGeneration != strategy.progressGeneration) @@ -1259,7 +1298,7 @@ void obfuscate(ref Entity root, bool keepLength) foreach (f; root.children) { - foreach (entity; parseToWords(f.filename) ~ f.children) + foreach (entity; parseToWords(f.file ? f.file.name : null) ~ f.children) if (entity.head.length && !isDigit(entity.head[0])) if (entity.head !in wordSet) { @@ -1373,20 +1412,24 @@ void dump(Writer)(Entity root, Writer writer) if (e.dead) { if (inFile && e.contents.length) - writer.handleText(e.contents[e.filename.length .. $]); + writer.handleText(e.contents[(e.file ? e.file.name : null).length .. $]); } else - if (!inFile && e.isFile) + if (!inFile && e.file) { - writer.handleFile(e.filename); + writer.handleFile(e.file); foreach (c; e.children) dumpEntity!true(c); } else { if (inFile && e.head.length) writer.handleText(e.head); - foreach (c; e.children) - dumpEntity!inFile(c); + if (inFile) + foreach (c; e.children) + dumpEntity!inFile(c); + else // Create files in reverse order, so that directories' timestamps get set last + foreach_reverse (c; e.children) + dumpEntity!inFile(c); if (inFile && e.tail.length) writer.handleText(e.tail); } } @@ -1398,59 +1441,194 @@ static struct FastWriter(Next) /// Accelerates Writer interface by bulking conti { Next next; immutable(char)* start, end; - void finish() + + private void flush() { if (start != end) next.handleText(start[0 .. end - start]); start = end = null; } - void handleFile(string s) + + void handleFile(const(Entity.FileProperties)* fileProperties) { - finish(); - next.handleFile(s); + flush(); + next.handleFile(fileProperties); } + void handleText(string s) { if (s.ptr != end) { - finish(); + flush(); start = s.ptr; } end = s.ptr + s.length; } - ~this() { finish(); } + + void finish() + { + flush(); + next.finish(); + } } -void save(Entity root, string savedir) +// Workaround for https://issues.dlang.org/show_bug.cgi?id=23683 +// Remove when moving to a DMD version incorporating a fix +version (Windows) { - safeDelete(savedir); - safeMkdir(savedir); + import core.sys.windows.winbase; + import core.sys.windows.winnt; + import std.windows.syserror; - static struct DiskWriter + alias AliasSeq(Args...) = Args; + alias FSChar = WCHAR; + void setTimes(const(char)[] name, + SysTime accessTime, + SysTime modificationTime) { - string dir; + auto namez = (name ~ "\0").to!(FSChar[]).ptr; - File o; - typeof(o.lockingBinaryWriter()) binaryWriter; + import std.datetime.systime : SysTimeToFILETIME; + const ta = SysTimeToFILETIME(accessTime); + const tm = SysTimeToFILETIME(modificationTime); + alias defaults = + AliasSeq!(FILE_WRITE_ATTRIBUTES, + 0, + null, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL | + FILE_ATTRIBUTE_DIRECTORY | + FILE_FLAG_BACKUP_SEMANTICS, + HANDLE.init); + auto h = CreateFileW(namez, defaults); - void handleFile(string fn) - { - static Appender!(char[]) pathBuf; - pathBuf.clear(); - pathBuf.put(dir.chainPath(fn)); - auto path = pathBuf.data; - if (!exists(dirName(path))) - safeMkdir(dirName(path)); + wenforce(h != INVALID_HANDLE_VALUE, "CreateFileW: " ~ name); + + scope(exit) + wenforce(CloseHandle(h), "CloseHandle: " ~ name); + + wenforce(SetFileTime(h, null, &ta, &tm), "SetFileTime: " ~ name); + } +} + +static struct DiskWriter +{ + string dir; + + const(Entity.FileProperties)* fileProperties; + // Regular files + File o; + typeof(o.lockingBinaryWriter()) binaryWriter; + // Symlinks + Appender!(char[]) symlinkBuf; + + @property const(char)[] currentFilePath() + { + static Appender!(char[]) pathBuf; + pathBuf.clear(); + pathBuf.put(dir.chainPath(fileProperties.name)); + return pathBuf.data; + } + + void handleFile(const(Entity.FileProperties)* fileProperties) + { + finish(); + + this.fileProperties = fileProperties; + scope(failure) this.fileProperties = null; + + auto path = currentFilePath; + if (!exists(dirName(path))) + safeMkdir(dirName(path)); // TODO make directories nested instead + + if (attrIsSymlink(fileProperties.mode.get(0))) + symlinkBuf.clear(); + else + if (attrIsDir(fileProperties.mode.get(0))) + {} + else // regular file + { o.open(cast(string)path, "wb"); binaryWriter = o.lockingBinaryWriter; } + } - void handleText(string s) + void handleText(string s) + { + if (attrIsSymlink(fileProperties.mode.get(0))) + symlinkBuf.put(s); + else + if (attrIsDir(fileProperties.mode.get(0))) + enforce(s.length == 0, "Directories cannot have contents"); + else // regular file { + assert(o.isOpen); binaryWriter.put(s); } } + + void finish() + { + if (fileProperties) + { + scope(exit) fileProperties = null; + + auto path = currentFilePath; + + if (attrIsSymlink(fileProperties.mode.get(0))) + symlink(symlinkBuf.data, path); + else + if (attrIsDir(fileProperties.mode.get(0))) + mkdirRecurse(path); + else // regular file + { + assert(o.isOpen); + binaryWriter = typeof(binaryWriter).init; + o.close(); + o = File.init; // Avoid crash on Windows + } + + if (!fileProperties.mode.isNull) + { + auto mode = fileProperties.mode.get(); + if (!attrIsSymlink(mode)) + setAttributes(path, mode); + } + if (!fileProperties.times.isNull) + setTimes(path, fileProperties.times.get()[0], fileProperties.times.get()[1]); + } + } +} + +struct MemoryWriter +{ + char[] buf; + size_t pos; + + void handleFile(const(Entity.FileProperties)* fileProperties) {} + + void handleText(string s) + { + auto end = pos + s.length; + if (buf.length < end) + { + buf.length = end; + buf.length = buf.capacity; + } + buf[pos .. end] = s; + pos = end; + } + + void reset() { pos = 0; } + char[] data() { return buf[0 .. pos]; } +} + +void save(Entity root, string savedir) +{ + safeDelete(savedir); + safeMkdir(savedir); + FastWriter!DiskWriter writer; writer.next.dir = savedir; dump(root, &writer); @@ -1518,7 +1696,7 @@ bool tryReduction(ref Entity root, Reduction r) if (newRoot is root) { assert(r.type != Reduction.Type.None); - writeln(r, " => N/A"); + stderr.writeln(r, " => N/A"); return false; } if (test(newRoot, [r]).success) @@ -1606,7 +1784,8 @@ Entity applyReductionImpl(Entity origRoot, ref Reduction r) { auto fa = rootAddress.children[i]; auto f = edit(fa); - f.filename = applyReductionToPath(f.filename, r); + if (f.file) + f.file.name = applyReductionToPath(f.file.name, r); foreach (j, const word; f.children) if (word.head == r.from) edit(fa.children[j]).head = r.to; @@ -1660,7 +1839,7 @@ Entity applyReductionImpl(Entity origRoot, ref Reduction r) { if (e.dead) return; - if (e.isFile) + if (e.file) { // Skip noRemove files, except when they are the target // (in which case they will keep their contents after the reduction). @@ -1850,10 +2029,10 @@ RoundRobinCache!(ReductionCacheKey, Entity) reductionCache; Entity applyReduction(Entity origRoot, ref Reduction r) { - if (lookaheadProcesses.length) + if (lookaheadProcessSlots.length) { if (!reductionCache.keys) - reductionCache.requireSize(1 + lookaheadProcesses.length); + reductionCache.requireSize(1 + lookaheadProcessSlots.length); auto cacheKey = ReductionCacheKey(origRoot, r); return reductionCache.get(cacheKey, applyReductionImpl(origRoot, r)); @@ -1894,10 +2073,10 @@ void autoRetry(scope void delegate() fun, lazy const(char)[] operation) } catch (Exception e) { - writeln("Error while attempting to " ~ operation ~ ": " ~ e.msg); + stderr.writeln("Error while attempting to " ~ operation ~ ": " ~ e.msg); import core.thread; Thread.sleep(dur!"seconds"(1)); - writeln("Retrying..."); + stderr.writeln("Retrying..."); } } @@ -1955,14 +2134,15 @@ void saveResult(Entity root) measure!"resultSave"({safeSave(root, resultDir);}); } -struct Lookahead +struct LookaheadSlot { + bool active; Thread thread; shared Pid pid; string testdir; EntityHash digest; } -Lookahead[] lookaheadProcesses; +LookaheadSlot[] lookaheadProcessSlots; TestResult[EntityHash] lookaheadResults; @@ -2002,10 +2182,14 @@ struct TestResult lookahead, diskCache, ramCache, + reject, + error, } Source source; int status; + string error; + string reason() { final switch (source) @@ -2022,6 +2206,10 @@ struct TestResult return "Test result was cached on disk as " ~ (success ? "success" : "failure"); case Source.ramCache: return "Test result was cached in memory as " ~ (success ? "success" : "failure"); + case Source.reject: + return "Test result was rejected by a --reject rule"; + case Source.error: + return "Error: " ~ error; } } } @@ -2031,7 +2219,7 @@ TestResult test( Reduction[] reductions, /// For display purposes only ) { - writef("%-(%s, %) => ", reductions); stdout.flush(); + stderr.writef("%-(%s, %) => ", reductions); stdout.flush(); EntityHash digest = root.hash; @@ -2041,7 +2229,7 @@ TestResult test( if (cacheResult) { // Note: as far as I can see, a cache hit for a positive reduction is not possible (except, perhaps, for a no-op reduction) - writeln(*cacheResult ? "Yes" : "No", " (cached)"); + stderr.writeln(*cacheResult ? "Yes" : "No", " (cached)"); return TestResult(*cacheResult, TestResult.Source.ramCache); } auto result = fallback; @@ -2062,13 +2250,13 @@ TestResult test( measure!"globalCache"({ found = exists(cacheBase~"0"); }); if (found) { - writeln("No (disk cache)"); + stderr.writeln("No (disk cache)"); return TestResult(false, TestResult.Source.diskCache); } measure!"globalCache"({ found = exists(cacheBase~"1"); }); if (found) { - writeln("Yes (disk cache)"); + stderr.writeln("Yes (disk cache)"); return TestResult(true, TestResult.Source.diskCache); } auto result = fallback; @@ -2085,34 +2273,57 @@ TestResult test( { // Handle existing lookahead jobs - TestResult reap(ref Lookahead process, int status) + Nullable!TestResult reapThread(ref LookaheadSlot slot) { - scope(success) process = Lookahead.init; - safeDelete(process.testdir); - if (process.thread) - process.thread.join(/*rethrow:*/true); - return lookaheadResults[process.digest] = TestResult(status == 0, TestResult.Source.lookahead, status); + try + { + slot.thread.join(/*rethrow:*/true); + slot.thread = null; + return typeof(return)(); + } + catch (Exception e) + { + scope(success) slot = LookaheadSlot.init; + safeDelete(slot.testdir); + auto result = TestResult(false, TestResult.Source.error); + result.error = e.msg; + lookaheadResults[slot.digest] = result; + return typeof(return)(result); + } + } + + TestResult reapProcess(ref LookaheadSlot slot, int status) + { + scope(success) slot = LookaheadSlot.init; + safeDelete(slot.testdir); + if (slot.thread) + reapThread(slot); // should be null + return lookaheadResults[slot.digest] = TestResult(status == 0, TestResult.Source.lookahead, status); } - foreach (ref process; lookaheadProcesses) - if (process.thread) + foreach (ref slot; lookaheadProcessSlots) // Reap threads + if (slot.thread) { debug (DETERMINISTIC_LOOKAHEAD) - { - process.thread.join(/*rethrow:*/true); - process.thread = null; - } + reapThread(slot); + else + if (!slot.thread.isRunning) + reapThread(slot); + } - auto pid = cast()atomicLoad(process.pid); + foreach (ref slot; lookaheadProcessSlots) // Reap processes + if (slot.active) + { + auto pid = cast()atomicLoad(slot.pid); if (pid) { debug (DETERMINISTIC_LOOKAHEAD) - reap(process, pid.wait()); + reapProcess(slot, pid.wait()); else { auto waitResult = pid.tryWait(); if (waitResult.terminated) - reap(process, waitResult.status); + reapProcess(slot, waitResult.status); } } } @@ -2132,8 +2343,8 @@ TestResult test( size_t numSteps; - foreach (ref process; lookaheadProcesses) - while (!process.thread && !predictionTree.empty) + foreach (ref slot; lookaheadProcessSlots) + while (!slot.active && !predictionTree.empty) { auto state = predictionTree.front; predictionTree.removeFront(); @@ -2141,7 +2352,7 @@ TestResult test( retryIter: if (state.iter.done) continue; - reductionCache.requireSize(lookaheadProcesses.length + ++numSteps); + reductionCache.requireSize(lookaheadProcessSlots.length + ++numSteps); auto reduction = state.iter.front; Entity newRoot; measure!"lookaheadApply"({ newRoot = state.iter.root.applyReduction(reduction); }); @@ -2154,7 +2365,7 @@ TestResult test( auto digest = newRoot.hash; double prediction; - if (digest in cache || digest in lookaheadResults || lookaheadProcesses[].canFind!(p => p.thread && p.digest == digest)) + if (digest in cache || digest in lookaheadResults || lookaheadProcessSlots[].canFind!(p => p.thread && p.digest == digest)) { if (digest in cache) prediction = cache[digest] ? 1 : 0; @@ -2166,25 +2377,26 @@ TestResult test( } else { - process.digest = digest; + slot.active = true; + slot.digest = digest; static int counter; - process.testdir = dirSuffix("lookahead.%d".format(counter++)); + slot.testdir = dirSuffix("lookahead.%d".format(counter++), Yes.temp); // Saving and process creation are expensive. // Don't block the main thread, use a worker thread instead. - static void runThread(Entity newRoot, ref Lookahead process, string tester) + static void runThread(Entity newRoot, ref LookaheadSlot slot, string tester) { - process.thread = new Thread({ - save(newRoot, process.testdir); + slot.thread = new Thread({ + save(newRoot, slot.testdir); auto nul = File(nullFileName, "w+"); - auto pid = spawnShell(tester, nul, nul, nul, null, Config.none, process.testdir); - atomicStore(process.pid, cast(shared)pid); + auto pid = spawnShell(tester, nul, nul, nul, null, Config.none, slot.testdir); + atomicStore(slot.pid, cast(shared)pid); }); - process.thread.start(); + slot.thread.start(); } - runThread(newRoot, process, tester); + runThread(newRoot, slot, tester); prediction = state.predictor.predict(); } @@ -2209,26 +2421,35 @@ TestResult test( auto plookaheadResult = digest in lookaheadResults; if (plookaheadResult) { - writeln(plookaheadResult.success ? "Yes" : "No", " (lookahead)"); + stderr.writeln(plookaheadResult.success ? "Yes" : "No", " (lookahead)"); return *plookaheadResult; } - foreach (ref process; lookaheadProcesses) + foreach (ref slot; lookaheadProcessSlots) { - if (process.thread && process.digest == digest) + if (slot.active && slot.digest == digest) { // Current test is already being tested in the background, wait for its result. // Join the thread first, to guarantee that there is a pid - measure!"lookaheadWaitThread"({ process.thread.join(/*rethrow:*/true); }); - process.thread = null; + if (slot.thread) + { + auto result = measure!"lookaheadWaitThread"({ + return reapThread(slot); + }); + if (!result.isNull) + { + stderr.writefln("%s (lookahead-wait: %s)", result.get().success ? "Yes" : "No", result.get().source); + return result.get(); + } + } - auto pid = cast()atomicLoad(process.pid); + auto pid = cast()atomicLoad(slot.pid); int exitCode; measure!"lookaheadWaitProcess"({ exitCode = pid.wait(); }); - auto result = reap(process, exitCode); - writeln(result.success ? "Yes" : "No", " (lookahead-wait)"); + auto result = reapProcess(slot, exitCode); + stderr.writeln(result.success ? "Yes" : "No", " (lookahead-wait)"); return result; } } @@ -2237,29 +2458,91 @@ TestResult test( return fallback; } + TestResult testReject(lazy TestResult fallback) + { + if (rejectRules.length) + { + bool defaultReject = !rejectRules.front.remove; + + bool scan(Entity e) + { + if (e.file) + { + static MemoryWriter writer; + writer.reset(); + dump(e, &writer); + + static bool[] removeCharBuf; + if (removeCharBuf.length < writer.data.length) + removeCharBuf.length = writer.data.length; + auto removeChar = removeCharBuf[0 .. writer.data.length]; + removeChar[] = defaultReject; + + foreach (ref rule; rejectRules) + if (rule.regexp !is Regex!char.init) + foreach (m; writer.data.matchAll(rule.regexp)) + { + auto start = m.hit.ptr - writer.data.ptr; + auto end = start + m.hit.length; + removeChar[start .. end] = rule.remove; + } + + if (removeChar.canFind(true)) + return true; + } + else + foreach (c; e.children) + if (scan(c)) + return true; + return false; + } + + if (scan(root)) + { + stderr.writeln("No (rejected)"); + return TestResult(false, TestResult.Source.reject); + } + } + return fallback; + } + + TestResult handleError(lazy TestResult fallback) + { + try + return fallback; + catch (Exception e) + { + auto result = TestResult(false, TestResult.Source.error); + result.error = e.msg; + stderr.writefln("No (error: %s)", e.msg); + return result; + } + } + TestResult doTest() { - string testdir = dirSuffix("test"); + string testdir = dirSuffix("test", Yes.temp); measure!"testSave"({save(root, testdir);}); scope(exit) measure!"clean"({safeDelete(testdir);}); + auto nullRead = File(nullFileName, "rb"); Pid pid; if (noRedirect) - pid = spawnShell(tester, null, Config.none, testdir); + pid = spawnShell(tester, nullRead, stdout , stderr , null, Config.none, testdir); else { - auto nul = File(nullFileName, "w+"); - pid = spawnShell(tester, nul, nul, nul, null, Config.none, testdir); + auto nullWrite = File(nullFileName, "wb"); + pid = spawnShell(tester, nullRead, nullWrite, nullWrite, null, Config.none, testdir); } int status; measure!"test"({status = pid.wait();}); auto result = TestResult(status == 0, TestResult.Source.tester, status); - writeln(result.success ? "Yes" : "No"); + stderr.writeln(result.success ? "Yes" : "No"); return result; } - auto result = ramCached(diskCached(lookahead(doTest()))); - if (trace) saveTrace(root, reductions, dirSuffix("trace"), result.success); + auto result = ramCached(diskCached(testReject(lookahead(handleError(doTest()))))); + if (trace) saveTrace(root, reductions, dirSuffix("trace", No.temp), result.success); return result; } @@ -2323,20 +2606,20 @@ void applyNoRemoveRules(Entity root, RemoveRule[] removeRules) // don't remove anything except what's specified by the rule. bool defaultRemove = !removeRules.front.remove; - auto files = root.isFile ? [root] : root.children; + auto files = root.file ? [root] : root.children; foreach (f; files) { - assert(f.isFile); + assert(f.file); // Check file name bool removeFile = defaultRemove; foreach (rule; removeRules) { if ( - (rule.shellGlob && f.filename.globMatch(rule.shellGlob)) + (rule.shellGlob && f.file.name.globMatch(rule.shellGlob)) || - (rule.regexp !is Regex!char.init && f.filename.match(rule.regexp)) + (rule.regexp !is Regex!char.init && f.file.name.match(rule.regexp)) ) removeFile = rule.remove; } @@ -2359,6 +2642,7 @@ void applyNoRemoveRules(Entity root, RemoveRule[] removeRules) return true; auto start = s.ptr - f.contents.ptr; auto end = start + s.length; + assert(start <= end && end <= f.contents.length, "String is not a slice of the file"); return removeChar[start .. end].all; } @@ -2406,10 +2690,10 @@ void loadCoverage(Entity root, string dir) { void scanFile(Entity f) { - auto fn = buildPath(dir, setExtension(baseName(f.filename), "lst")); + auto fn = buildPath(dir, setExtension(baseName(f.file.name), "lst")); if (!exists(fn)) return; - writeln("Loading coverage file ", fn); + stderr.writeln("Loading coverage file ", fn); static bool covered(string line) { @@ -2451,7 +2735,7 @@ void loadCoverage(Entity root, string dir) void scanFiles(Entity e) { - if (e.isFile) + if (e.file) scanFile(e); else foreach (c; e.children) @@ -2492,7 +2776,8 @@ void convertRefs(Entity root) void convertRef(ref EntityRef r) { assert(r.entity && !r.address); - r.address = addresses[r.entity.id]; + r.address = addresses.get(r.entity.id, null); + assert(r.address, "Dependent not in tree"); r.entity = null; } @@ -2597,7 +2882,7 @@ void dumpSet(Entity root, string fn) f.write( " ", e.redirect ? "-> " ~ text(findEntityEx(root, e.redirect).entity.id) ~ " " : "", - e.isFile ? e.filename ? printableFN(e.filename) ~ " " : null : e.head ? printable(e.head) ~ " " : null, + e.file ? e.file.name ? printableFN(e.file.name) ~ " " : null : e.head ? printable(e.head) ~ " " : null, e.tail ? printable(e.tail) ~ " " : null, e.comment ? "/* " ~ e.comment ~ " */ " : null, "]" @@ -2606,7 +2891,7 @@ void dumpSet(Entity root, string fn) else { f.writeln(e.comment ? " // " ~ e.comment : null); - if (e.isFile) f.writeln(prefix, " ", printableFN(e.filename)); + if (e.file) f.writeln(prefix, " ", printableFN(e.file.name)); if (e.head) f.writeln(prefix, " ", printable(e.head)); foreach (c; e.children) print(c, depth+1); @@ -2654,10 +2939,10 @@ void dumpToHtml(Entity root, string fn) void dump(Entity e) { - if (e.isFile) + if (e.file) { buf.put("<h1>"); - dumpText(e.filename); + dumpText(e.file.name); buf.put("</h1><pre>"); foreach (c; e.children) dump(c); @@ -2683,6 +2968,61 @@ EOT"); std.file.write(fn, buf.data()); } +void dumpToJson(Entity root, string fn) +{ + import std.json : JSONValue; + + bool[const(Address)*] needLabel; + + void scan(Entity e, const(Address)* addr) + { + foreach (dependent; e.dependents) + { + assert(dependent.address); + needLabel[dependent.address] = true; + } + foreach (i, child; e.children) + scan(child, addr.child(i)); + } + scan(root, &rootAddress); + + JSONValue toJson(Entity e, const(Address)* addr) + { + JSONValue[string] o; + + if (e.file) + o["filename"] = e.file.name; + + if (e.head.length) + o["head"] = e.head; + if (e.children.length) + o["children"] = e.children.length.iota.map!(i => + toJson(e.children[i], addr.child(i)) + ).array; + if (e.tail.length) + o["tail"] = e.tail; + + if (e.noRemove) + o["noRemove"] = true; + + if (addr in needLabel) + o["label"] = e.id.to!string; + if (e.dependents.length) + o["dependents"] = e.dependents.map!((ref dependent) => + root.findEntity(dependent.address).entity.id.to!string + ).array; + + return JSONValue(o); + } + + auto jsonDoc = JSONValue([ + "version" : JSONValue(1), + "root" : toJson(root, &rootAddress), + ]); + + std.file.write(fn, jsonDoc.toPrettyString()); +} + // void dumpText(string fn, ref Reduction r = nullReduction) // { // auto f = File(fn, "wt"); @@ -2694,7 +3034,7 @@ version(testsuite) shared static this() { import core.runtime; - "../cov".mkdir.collectException(); - dmd_coverDestPath("../cov"); + "../../cov".mkdir.collectException(); + dmd_coverDestPath("../../cov"); dmd_coverSetMerge(true); } diff --git a/DustMite/polyhash.d b/DustMite/polyhash.d index 5fa9766f6..13ab910d1 100644 --- a/DustMite/polyhash.d +++ b/DustMite/polyhash.d @@ -290,8 +290,8 @@ if (is(T : long) && T.sizeof >= 2) asm { "`~x86SignedOpPrefix!T~`mul`~x86SizeOpSuffix!T~` %3" - : "=a" low, "=d" high - : "a" a, "rm" b; + : "=a"(low), "=d"(high) + : "a"(a), "rm"(b); } `); return typeof(return)(low, high); @@ -363,8 +363,8 @@ if (is(T : long) && T.sizeof >= 2 && is(L == LongInt!T)) asm { "`~x86SignedOpPrefix!T~`div`~x86SizeOpSuffix!T~` %4" - : "=a" quotient, "=d" remainder - : "a" low, "d" high, "rm" b; + : "=a"(quotient), "=d"(remainder) + : "a"(low), "d"(high), "rm"(b); } `); return typeof(return)(quotient, remainder); diff --git a/DustMite/splitter.d b/DustMite/splitter.d index ab5da91cc..be8d5bf37 100644 --- a/DustMite/splitter.d +++ b/DustMite/splitter.d @@ -8,14 +8,18 @@ import std.ascii; import std.algorithm; import std.array; import std.conv; +import std.datetime.systime; import std.exception; import std.file; import std.functional; import std.path; import std.range; +import std.stdio : File, stdin; import std.string; import std.traits; import std.stdio : stderr; +import std.typecons; +import std.utf : byChar; import polyhash; @@ -65,8 +69,15 @@ final class Entity Entity[] children; /// This node's children nodes, e.g. the statements of the statement block. string tail; /// This node's "tail", e.g. "}" for a statement block. - string filename, contents; - @property bool isFile() { return filename != ""; } + string contents; + + struct FileProperties + { + string name; /// Relative to the reduction root + Nullable!uint mode; /// OS-specific (std.file.getAttributes) + Nullable!(SysTime[2]) times; /// Access and modification times + } + FileProperties* file; /// If non-null, this node represents a file bool isPair; /// Internal hint for --dump output bool noRemove; /// Don't try removing this entity (children OK) @@ -133,22 +144,18 @@ private: // Used during parsing only debug string[] comments; /// Used to debug the splitter } -enum Mode -{ - source, - words, /// split identifiers, for obfuscation -} - enum Splitter { files, /// Load entire files only lines, /// Split by line ends + null_, /// Split by the \0 (NUL) character words, /// Split by whitespace D, /// Parse D source code diff, /// Unified diffs indent, /// Indentation (Python, YAML...) + lisp, /// Lisp and similar languages } -immutable string[] splitterNames = [EnumMembers!Splitter].map!(e => e.text().toLower()).array(); +immutable string[] splitterNames = [EnumMembers!Splitter].map!(e => e.text().toLower().chomp("_")).array(); struct ParseRule { @@ -158,7 +165,12 @@ struct ParseRule struct ParseOptions { - enum Mode { source, words } + enum Mode + { + source, + words, /// split identifiers, for obfuscation + json, + } bool stripComments; ParseRule[] rules; @@ -166,21 +178,22 @@ struct ParseOptions uint tabWidth; } +version (Posix) {} else +{ + // Non-POSIX symlink stubs + string readLink(const(char)[]) { throw new Exception("Sorry, symbolic links are only supported on POSIX systems"); } + void symlink(const(char)[], const(char)[]) { throw new Exception("Sorry, symbolic links are only supported on POSIX systems"); } +} + /// Parse the given file/directory. -/// For files, modifies path to be the base name for .test / .reduced directories. +/// For files, modifies `path` to be the base name for .test / .reduced directories. Entity loadFiles(ref string path, ParseOptions options) { - if (isFile(path)) - { - auto filePath = path; - path = stripExtension(path); - return loadFile(filePath.baseName(), filePath, options); - } - else + if (path != "-" && !path.isSymlink && path.exists && path.isDir) { auto set = new Entity(); - foreach (string entry; dirEntries(path, SpanMode.breadth).array.sort!((a, b) => a.name < b.name)) - if (isFile(entry)) + foreach (string entry; dirEntries(path, SpanMode.breadth, /*followSymlink:*/false).array.sort!((a, b) => a.name < b.name)) + if (isSymlink(entry) || isFile(entry) || isDir(entry)) { assert(entry.startsWith(path)); auto name = entry[path.length+1..$]; @@ -188,6 +201,16 @@ Entity loadFiles(ref string path, ParseOptions options) } return set; } + else + { + auto realPath = path; + string name; // For Entity.filename + if (path == "-" || path == "/dev/stdin") + name = path = "stdin"; + else + name = realPath.baseName(); + return loadFile(name, realPath, options); + } } enum BIN_SIZE = 2; @@ -239,61 +262,117 @@ immutable ParseRule[] defaultRules = [ { "*.d" , Splitter.D }, { "*.di" , Splitter.D }, + { "*.diff" , Splitter.diff }, { "*.patch", Splitter.diff }, + + { "*.lisp" , Splitter.lisp }, + { "*.cl" , Splitter.lisp }, + { "*.lsp" , Splitter.lisp }, + { "*.el" , Splitter.lisp }, + { "*" , Splitter.files }, ]; +void[] readFile(File f) +{ + import std.range.primitives : put; + auto result = appender!(ubyte[]); + auto size = f.size; + if (size <= uint.max) + result.reserve(cast(size_t)size); + put(result, f.byChunk(64 * 1024)); + return result.data; +} + Entity loadFile(string name, string path, ParseOptions options) { - stderr.writeln("Loading ", path); + auto base = name.baseName(); + Splitter splitterType = chain(options.rules, defaultRules).find!(rule => base.globMatch(rule.pattern)).front.splitter; + + Nullable!uint mode; + if (path != "-") + { + mode = getLinkAttributes(path); + if (attrIsSymlink(mode.get()) || attrIsDir(mode.get())) + splitterType = Splitter.files; + } + + stderr.writeln("Loading ", path, " [", splitterType, "]"); + auto contents = + attrIsSymlink(mode.get(0)) ? path.readLink() : + attrIsDir(mode.get(0)) ? null : + cast(string)readFile(path == "-" ? stdin : File(path, "rb")); + + if (options.mode == ParseOptions.Mode.json) + return loadJson(contents); + auto result = new Entity(); - result.filename = name.replace(`\`, `/`); - result.contents = cast(string)read(path); + result.file = new Entity.FileProperties; + result.file.name = name.replace(dirSeparator, `/`); + result.file.mode = mode; + if (!mode.isNull() && !attrIsSymlink(mode.get()) && path != "-") + { + SysTime accessTime, modificationTime; + getTimes(path, accessTime, modificationTime); + result.file.times = [accessTime, modificationTime]; + } + result.contents = contents; - auto base = name.baseName(); - foreach (rule; chain(options.rules, defaultRules)) - if (base.globMatch(rule.pattern)) - { - final switch (rule.splitter) + final switch (splitterType) + { + case Splitter.files: + result.children = [new Entity(result.contents, null, null)]; + break; + case Splitter.lines: + result.children = parseToLines(result.contents); + break; + case Splitter.words: + result.children = parseToWords(result.contents); + break; + case Splitter.null_: + result.children = parseToNull(result.contents); + break; + case Splitter.D: + if (result.contents.startsWith("Ddoc")) + goto case Splitter.files; + + DSplitter splitter; + if (options.stripComments) + result.contents = splitter.stripComments(result.contents); + + final switch (options.mode) { - case Splitter.files: - result.children = [new Entity(result.contents, null, null)]; - return result; - case Splitter.lines: - result.children = parseToLines(result.contents); - return result; - case Splitter.words: - result.children = parseToWords(result.contents); - return result; - case Splitter.D: - { - if (result.contents.startsWith("Ddoc")) - goto case Splitter.files; + case ParseOptions.Mode.json: + assert(false); + case ParseOptions.Mode.source: + result.children = splitter.parse(result.contents); + break; + case ParseOptions.Mode.words: + result.children = splitter.parseToWords(result.contents); + break; + } + break; + case Splitter.diff: + result.children = parseDiff(result.contents); + break; + case Splitter.indent: + result.children = parseIndent(result.contents, options.tabWidth); + break; + case Splitter.lisp: + result.children = parseLisp(result.contents); + break; + } - DSplitter splitter; - if (options.stripComments) - result.contents = splitter.stripComments(result.contents); + debug + { + string resultContents; + void walk(Entity[] entities) { foreach (e; entities) { resultContents ~= e.head; walk(e.children); resultContents ~= e.tail; }} + walk(result.children); + assert(result.contents == resultContents, "Contents mismatch after splitting:\n" ~ resultContents); + } - final switch (options.mode) - { - case ParseOptions.Mode.source: - result.children = splitter.parse(result.contents); - return result; - case ParseOptions.Mode.words: - result.children = splitter.parseToWords(result.contents); - return result; - } - } - case Splitter.diff: - result.children = parseDiff(result.contents); - return result; - case Splitter.indent: - result.children = parseIndent(result.contents, options.tabWidth); - return result; - } - } - assert(false); // default * rule should match everything + return result; } // ***************************************************************************************************************************************************************************** @@ -866,6 +945,49 @@ struct DSplitter } } + // Join together module names. We should not attempt to reduce "import std.stdio" to "import std" (or "import stdio"). + static void postProcessImports(ref Entity[] entities) + { + if (entities.length && entities[0].head.strip == "import" && !entities[0].children.length && !entities[0].tail.length) + foreach (entity; entities[1 .. $]) + { + static void visit(Entity entity) + { + static bool isValidModuleName(string s) { return s.byChar.all!(c => isWordChar(c) || isWhite(c) || c == '.'); } + static bool canBeMerged(Entity entity) + { + return + isValidModuleName(entity.head) && + entity.children.all!(child => canBeMerged(child)) && + isValidModuleName(entity.tail); + } + + if (canBeMerged(entity)) + { + auto root = entity; + // Link all ancestors to the root, and in reverse, therefore making them inextricable. + void link(Entity entity) + { + entity.dependents ~= EntityRef(root); + // root.dependents ~= EntityRef(entity); + foreach (child; entity.children) + link(child); + } + foreach (child; entity.children) + link(child); + } + else + { + foreach (child; entity.children) + visit(child); + } + } + + foreach (child; entity.children) + visit(child); + } + } + static void postProcessDependency(ref Entity[] entities) { if (entities.length < 2) @@ -1014,7 +1136,7 @@ struct DSplitter { if (parenKeywordTokens.canFind(entities[i].token)) { - auto pparen = firstHead(entities[i+1]); + auto pparen = firstNonEmpty(entities[i+1]); if (pparen && *pparen !is entities[i+1] && pparen.token == tokenLookup!"(") @@ -1086,6 +1208,7 @@ struct DSplitter postProcessRecursive(e.children); postProcessSimplify(entities); + postProcessImports(entities); postProcessTemplates(entities); postProcessDependency(entities); postProcessBlockKeywords(entities); @@ -1222,16 +1345,18 @@ struct DSplitter postProcessArgs(entities); } - static Entity* firstHead(ref return Entity e) + static Entity* firstNonEmpty(ref return Entity e) { if (e.head.length) return &e; foreach (ref c; e.children) { - auto r = firstHead(c); + auto r = firstNonEmpty(c); if (r) return r; } + if (e.tail.length) + return &e; return null; } @@ -1265,6 +1390,7 @@ Entity[] parseSplit(alias fun)(string text) alias parseToWords = parseSplit!isNotAlphaNum; alias parseToLines = parseSplit!isNewline; +alias parseToNull = parseSplit!(c => c == '\0'); /// Split s on end~start, preserving end and start on each chunk private string[] split2(string end, string start)(string s) @@ -1295,9 +1421,45 @@ unittest assert(split2!("]", "[")("[foo] [bar]") == ["[foo] [bar]"]); } +// From ae.utils.array +template skipWhile(alias pred) +{ + T[] skipWhile(T)(ref T[] source, bool orUntilEnd = false) + { + enum bool isSlice = is(typeof(pred(source[0..1]))); + enum bool isElem = is(typeof(pred(source[0] ))); + static assert(isSlice || isElem, "Can't skip " ~ T.stringof ~ " until " ~ pred.stringof); + static assert(isSlice != isElem, "Ambiguous types for skipWhile: " ~ T.stringof ~ " and " ~ pred.stringof); + + foreach (i; 0 .. source.length) + { + bool match; + static if (isSlice) + match = pred(source[i .. $]); + else + match = pred(source[i]); + if (!match) + { + auto result = source[0..i]; + source = source[i .. $]; + return result; + } + } + + if (orUntilEnd) + { + auto result = source; + source = null; + return result; + } + else + return null; + } +} + Entity[] parseDiff(string s) { - return s + auto entities = s .split2!("\n", "diff ") .map!( (string file) @@ -1308,54 +1470,387 @@ Entity[] parseDiff(string s) ) .array ; + + // If a word occurs only in two or more (but not all) hunks, + // create dependency nodes which make Dustmite try reducing these + // hunks simultaneously. + { + auto allHunks = entities.map!(entity => entity.children).join; + auto hunkWords = allHunks + .map!(hunk => hunk.head) + .map!((text) { + bool[string] words; + while (text.length) + { + alias isWordChar = c => isAlphaNum(c) || c == '_'; + text.skipWhile!(not!isWordChar)(true); + auto word = text.skipWhile!isWordChar(true); + if (word.length) + words[word] = true; + } + return words; + }) + .array; + + auto allWords = hunkWords + .map!(words => words.byPair) + .joiner + .assocArray; + string[bool[]] sets; // Deduplicated sets of hunks to try to remove at once + foreach (word; allWords.byKey) + { + immutable bool[] hunkHasWord = hunkWords.map!(c => !!(word in c)).array.assumeUnique; + auto numHunksWithWord = hunkHasWord.count!(b => b); + if (numHunksWithWord > 1 && numHunksWithWord < allHunks.length) + sets[hunkHasWord] = word; + } + + foreach (set, word; sets) + { + auto e = new Entity(); + debug e.comments ~= word; + e.dependents ~= allHunks.length.iota + .filter!(i => set[i]) + .map!(i => EntityRef(allHunks[i])) + .array; + entities ~= e; + } + } + + return entities; +} + +size_t getIndent(string line, uint tabWidth, size_t lastIndent) +{ + size_t indent = 0; +charLoop: + foreach (c; line) + switch (c) + { + case ' ': + indent++; + break; + case '\t': + indent += tabWidth; + break; + case '\r': + case '\n': + // Treat empty (whitespace-only) lines as belonging to the + // immediately higher (most-nested) block. + indent = lastIndent; + break charLoop; + default: + break charLoop; + } + return indent; } Entity[] parseIndent(string s, uint tabWidth) { Entity[] root; - Entity[]*[] stack; + Entity[] stack; foreach (line; s.split2!("\n", "")) { - size_t indent = 0; - charLoop: - foreach (c; line) - switch (c) - { - case ' ': - indent++; - break; - case '\t': - indent += tabWidth; - break; - case '\r': - case '\n': - // Treat empty (whitespace-only) lines as belonging to the - // immediately higher (most-nested) block. - indent = stack.length; - break charLoop; - default: - break charLoop; - } - + auto indent = getIndent(line, tabWidth, stack.length); auto e = new Entity(line); foreach_reverse (i; 0 .. min(indent, stack.length)) // non-inclusively up to indent if (stack[i]) { - *stack[i] ~= e; + stack[i].children ~= e; goto parentFound; } root ~= e; parentFound: stack.length = indent + 1; - stack[indent] = &e.children; + stack[indent] = new Entity; + e.children ~= stack[indent]; } return root; } +Entity[] parseLisp(string s) +{ + // leaf head: token (non-whitespace) + // leaf tail: whitespace + // non-leaf head: "(" and any whitespace + // non-leaf tail: ")" and any whitespace + + size_t i; + + size_t last; + scope(success) assert(last == s.length, "Incomplete slice"); + string slice(void delegate() advance) + { + assert(last == i, "Non-contiguous slices"); + auto start = i; + advance(); + last = i; + return s[start .. i]; + } + + /// How many characters did `advance` move forward by? + size_t countAdvance(void delegate() advance) + { + auto start = i; + advance(); + return i - start; + } + + void advanceWhitespace() + { + while (i < s.length) + { + switch (s[i]) + { + case ' ': + case '\t': + case '\r': + case '\n': + case '\f': + case '\v': + i++; + continue; + + case ';': + i++; + while (i < s.length && s[i] != '\n') + i++; + continue; + + default: + return; // stop + } + assert(false); // unreachable + } + } + + void advanceToken() + { + assert(countAdvance(&advanceWhitespace) == 0); + assert(i < s.length); + + switch (s[i]) + { + case '(': + case ')': + case '[': + case ']': + assert(false); + case '"': + i++; + while (i < s.length) + { + switch (s[i]) + { + case '"': + i++; + return; // stop + + case '\\': + i++; + if (i < s.length) + i++; + continue; + + default: + i++; + continue; + } + assert(false); // unreachable + } + break; + default: + while (i < s.length) + { + switch (s[i]) + { + case ' ': + case '\t': + case '\r': + case '\n': + case '\f': + case '\v': + case ';': + + case '"': + case '(': + case ')': + case '[': + case ']': + return; // stop + + case '\\': + i++; + if (i < s.length) + i++; + continue; + + default: + i++; + continue; + } + assert(false); // unreachable + } + break; + } + } + + void advanceParen(char paren) + { + assert(i < s.length && s[i] == paren); + i++; + advanceWhitespace(); + } + + Entity[] parse(bool topLevel) + { + Entity[] result; + if (topLevel) // Handle reading whitespace at top-level + { + auto ws = slice(&advanceWhitespace); + if (ws.length) + result ~= new Entity(ws); + } + + Entity parseParen(char open, char close) + { + auto entity = new Entity(slice({ advanceParen(open); })); + entity.children = parse(false); + if (i < s.length) + entity.tail = slice({ advanceParen(close); }); + return entity; + } + + while (i < s.length) + { + switch (s[i]) + { + case '(': + result ~= parseParen('(', ')'); + continue; + case '[': + result ~= parseParen('[', ']'); + continue; + + case ')': + case ']': + if (!topLevel) + break; + result ~= new Entity(slice({ advanceParen(s[i]); })); + continue; + + default: + result ~= new Entity( + slice(&advanceToken), + null, + slice(&advanceWhitespace), + ); + continue; + } + break; + } + return result; + } + + return parse(true); +} + private: +Entity loadJson(string contents) +{ + import std.json : JSONValue, parseJSON; + + auto jsonDoc = parseJSON(contents); + enforce(jsonDoc["version"].integer == 1, "Unknown JSON version"); + + // Pass 1: calculate the total size of all data. + // --no-remove and some optimizations require that entity strings + // are arranged in contiguous memory. + size_t totalSize; + void scanSize(ref JSONValue v) + { + if (auto p = "head" in v.object) + totalSize += p.str.length; + if (auto p = "children" in v.object) + p.array.each!scanSize(); + if (auto p = "tail" in v.object) + totalSize += p.str.length; + } + scanSize(jsonDoc["root"]); + + auto buf = new char[totalSize]; + size_t pos = 0; + + Entity[string] labeledEntities; + JSONValue[][Entity] entityDependents; + + // Pass 2: Create the entity tree + Entity parse(ref JSONValue v) + { + auto e = new Entity; + + if (auto p = "filename" in v.object) + { + e.file = new Entity.FileProperties; + e.file.name = p.str.buildNormalizedPath; + enforce(e.file.name.length && + !e.file.name.isAbsolute && + !e.file.name.pathSplitter.canFind(`..`), + "Invalid filename in JSON file: " ~ p.str); + } + + if (auto p = "head" in v.object) + { + auto end = pos + p.str.length; + buf[pos .. end] = p.str; + e.head = buf[pos .. end].assumeUnique; + pos = end; + } + if (auto p = "children" in v.object) + e.children = p.array.map!parse.array; + if (auto p = "tail" in v.object) + { + auto end = pos + p.str.length; + buf[pos .. end] = p.str; + e.tail = buf[pos .. end].assumeUnique; + pos = end; + } + + if (auto p = "noRemove" in v.object) + e.noRemove = (){ + if (*p == JSONValue(true)) return true; + if (*p == JSONValue(false)) return false; + throw new Exception("noRemove is not a boolean"); + }(); + + if (auto p = "label" in v.object) + { + enforce(p.str !in labeledEntities, "Duplicate label in JSON file: " ~ p.str); + labeledEntities[p.str] = e; + } + if (auto p = "dependents" in v.object) + entityDependents[e] = p.array; + + return e; + } + auto root = parse(jsonDoc["root"]); + + // Pass 3: Resolve dependents + foreach (e, dependents; entityDependents) + e.dependents = dependents + .map!((ref d) => labeledEntities + .get(d.str, null) + .enforce("Unknown label in dependents: " ~ d.str) + .EntityRef + ) + .array; + + return root; +} + bool isNewline(char c) { return c == '\r' || c == '\n'; } alias isNotAlphaNum = not!isAlphaNum;