From 21a23b8530d07572c144fcac0310fe91079a96ff Mon Sep 17 00:00:00 2001
From: Sebastian Wilzbach <seb@wilzba.ch>
Date: Tue, 20 Dec 2016 22:02:45 +0100
Subject: [PATCH] Add has_public_example and tests_extractor

---
 README.md                                     |  20 +-
 styles/.gitignore                             |   4 +
 styles/dub.sdl                                |  16 ++
 styles/dub.selections.json                    |   6 +
 styles/has_public_example.d                   | 206 ++++++++++++++++++
 .../tests_extractor.d                         | 103 +++++----
 styles/utils.d                                | 109 +++++++++
 7 files changed, 415 insertions(+), 49 deletions(-)
 create mode 100644 styles/.gitignore
 create mode 100644 styles/dub.sdl
 create mode 100644 styles/dub.selections.json
 create mode 100644 styles/has_public_example.d
 rename phobos_tests_extractor.d => styles/tests_extractor.d (67%)
 mode change 100755 => 100644
 create mode 100644 styles/utils.d

diff --git a/README.md b/README.md
index 6f3e7fd556..4b03060aed 100644
--- a/README.md
+++ b/README.md
@@ -19,9 +19,10 @@ dget                   | Internal | D source code downloader.
 dman                   | Public   | D documentation lookup tool.
 dustmite               | Public   | [Test case minimization tool](https://github.com/CyberShadow/DustMite/wiki).
 get_dlibcurl32         | Internal | Win32 libcurl downloader/converter.
-phobos_tests_extractor | Internal | Extracts public unittests from Phobos (requires DUB)
+has_public_example 	   | Internal | Checks public functions for public examples (requires DUB)
 rdmd                   | Public   | [D build tool](http://dlang.org/rdmd.html).
 rdmd_test              | Internal | rdmd test suite.
+tests_extractor 	   | Internal | Extracts public unittests (requires DUB)
 tolf                   | Internal | Line endings converter.
 
 To report a problem or browse the list of open bugs, please visit the
@@ -29,3 +30,20 @@ To report a problem or browse the list of open bugs, please visit the
 
 For a list and descriptions of D development tools, please visit the
 [D wiki](http://wiki.dlang.org/Development_tools).
+
+Running DUB tools
+-----------------
+
+Some tools require D's package manager DUB.
+By default DUB builds a binary and executes it:
+
+```
+dub --root styles -c has_public_example
+```
+
+Remember that when programs are run via DUB, you need to pass in `--` before
+the program's arguments, e.g `dub --root styles -c has_public_example -- -i ../phobos/std/algorithm`.
+
+For more information, please see [DUB's documentation][dub-doc].
+
+[dub-doc]: https://code.dlang.org/docs/commandline
diff --git a/styles/.gitignore b/styles/.gitignore
new file mode 100644
index 0000000000..367bd43faa
--- /dev/null
+++ b/styles/.gitignore
@@ -0,0 +1,4 @@
+.dub
+has_public_example
+test_extractor
+out
diff --git a/styles/dub.sdl b/styles/dub.sdl
new file mode 100644
index 0000000000..66b9080631
--- /dev/null
+++ b/styles/dub.sdl
@@ -0,0 +1,16 @@
+dependency "libdparse" version="~>0.7.0-beta.2"
+name "styles"
+targetType "executable"
+sourceFiles "utils.d"
+
+configuration "has_public_example" {
+	name "has_public_example"
+	targetName "has_public_example"
+	sourceFiles "has_public_example.d"
+}
+
+configuration "tests_extractor" {
+	name "test_extractor"
+	targetName "test_extractor"
+	sourceFiles "tests_extractor.d"
+}
diff --git a/styles/dub.selections.json b/styles/dub.selections.json
new file mode 100644
index 0000000000..5758f0b2aa
--- /dev/null
+++ b/styles/dub.selections.json
@@ -0,0 +1,6 @@
+{
+	"fileVersion": 1,
+	"versions": {
+		"libdparse": "0.7.0-beta.2"
+	}
+}
diff --git a/styles/has_public_example.d b/styles/has_public_example.d
new file mode 100644
index 0000000000..13a00f0bc7
--- /dev/null
+++ b/styles/has_public_example.d
@@ -0,0 +1,206 @@
+/*
+ * Checks that all functions have a public example
+ *
+ * Copyright (C) 2016 by D Language Foundation
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ *    (See accompanying file LICENSE_1_0.txt or copy at
+ *          http://www.boost.org/LICENSE_1_0.txt)
+*/
+// Written in the D programming language.
+
+import dparse.ast;
+import std.algorithm;
+import std.experimental.logger;
+import std.range;
+import std.stdio;
+import utils;
+
+bool hadError;
+
+class TestVisitor : ASTVisitor
+{
+
+    this(string fileName, ubyte[] sourceCode)
+    {
+        this.fileName = fileName;
+        this.sourceCode = sourceCode;
+    }
+
+    alias visit = ASTVisitor.visit;
+
+    override void visit(const Module mod)
+    {
+        FunctionDeclaration lastFun;
+        bool hasPublicUnittest;
+
+        foreach (decl; mod.declarations)
+        {
+            if (!isPublic(decl.attributes))
+                continue;
+
+            if (decl.functionDeclaration !is null)
+            {
+                if (hasDitto(decl.functionDeclaration))
+                    continue;
+
+                if (lastFun !is null && !hasPublicUnittest)
+                    triggerError(lastFun);
+
+                lastFun = cast(FunctionDeclaration) decl.functionDeclaration;
+                //debug {
+                    //lastFun.name.text.writeln;
+                //}
+                hasPublicUnittest = false;
+                continue;
+            }
+
+            if (decl.unittest_ !is null)
+            {
+                hasPublicUnittest |= validate(lastFun, decl);
+                continue;
+            }
+
+            // ignore dittoed template declarations
+            if (decl.templateDeclaration !is null)
+                if (hasDitto(decl.templateDeclaration))
+                    continue;
+
+            // ignore dittoed struct declarations
+            if (decl.structDeclaration !is null)
+                if (hasDitto(decl.structDeclaration))
+                    continue;
+
+            // ran into struct or something else -> reset
+            if (lastFun !is null && !hasPublicUnittest)
+                triggerError(lastFun);
+
+            lastFun = null;
+        }
+
+        if (lastFun !is null && !hasPublicUnittest)
+            triggerError(lastFun);
+    }
+
+private:
+    string fileName;
+    ubyte[] sourceCode;
+
+    void triggerError(const FunctionDeclaration decl)
+    {
+        stderr.writefln("%s:%d %s has no public unittest", fileName, decl.name.line, decl.name.text);
+        hadError = true;
+    }
+
+    bool validate(const FunctionDeclaration lastFun, const Declaration decl)
+    {
+        // ignore module header unittest blocks or already validated functions
+        if (lastFun is null)
+            return true;
+
+        if (!hasUnittestDdocHeader(sourceCode, decl))
+            return false;
+
+        return true;
+    }
+
+    bool hasDitto(Decl)(const Decl decl)
+    {
+        if (decl.comment is null)
+            return false;
+
+        if (decl.comment == "ditto")
+            return true;
+
+        if (decl.comment == "Ditto")
+            return true;
+
+        return false;
+    }
+
+    bool isPublic(const Attribute[] attrs)
+    {
+        import dparse.lexer : tok;
+        import std.algorithm.searching : any;
+        import std.algorithm.iteration : map;
+
+        enum tokPrivate = tok!"private", tokProtected = tok!"protected", tokPackage = tok!"package";
+
+        if (attrs !is null)
+            if (attrs.map!`a.attribute`.any!(x => x == tokPrivate || x == tokProtected || x == tokPackage))
+                return false;
+
+        return true;
+    }
+}
+
+void parseFile(string fileName)
+{
+    import dparse.lexer;
+    import dparse.parser : parseModule;
+    import dparse.rollback_allocator : RollbackAllocator;
+    import std.array : uninitializedArray;
+
+    auto inFile = File(fileName, "r");
+    if (inFile.size == 0)
+        warningf("%s is empty", inFile.name);
+
+    ubyte[] sourceCode = uninitializedArray!(ubyte[])(to!size_t(inFile.size));
+    inFile.rawRead(sourceCode);
+    LexerConfig config;
+    auto cache = StringCache(StringCache.defaultBucketCount);
+    auto tokens = getTokensForParser(sourceCode, config, &cache);
+
+    RollbackAllocator rba;
+    auto m = parseModule(tokens.array, fileName, &rba);
+    auto visitor = new TestVisitor(fileName, sourceCode);
+    visitor.visit(m);
+}
+
+void main(string[] args)
+{
+    import std.file;
+    import std.getopt;
+    import std.path : asNormalizedPath;
+
+    string inputDir;
+    string ignoredFilesStr;
+
+    auto helpInfo = getopt(args, config.required,
+            "inputdir|i", "Folder to start the recursive search for unittest blocks (can be a single file)", &inputDir,
+            "ignore", "Comma-separated list of files to exclude (partial matching is supported)", &ignoredFilesStr);
+
+    if (helpInfo.helpWanted)
+    {
+        return defaultGetoptPrinter(`example_validator
+Searches the input directory recursively to ensure that all public functions
+have a public unittest blocks, i.e.
+unittest blocks that are annotated with three slashes (///).
+`, helpInfo.options);
+    }
+
+    inputDir = inputDir.asNormalizedPath.array;
+
+    DirEntry[] files;
+
+    if (inputDir.isFile)
+    {
+        files = [DirEntry(inputDir)];
+        inputDir = ".";
+    }
+    else
+    {
+        files = dirEntries(inputDir, SpanMode.depth).filter!(
+                a => a.name.endsWith(".d") && !a.name.canFind(".git")).array;
+    }
+
+    auto ignoringFiles = ignoredFilesStr.split(",");
+
+    foreach (file; files)
+        if (!ignoringFiles.any!(x => file.name.canFind(x)))
+            file.name.parseFile;
+
+    import core.stdc.stdlib : exit;
+    if (hadError)
+        exit(1);
+}
diff --git a/phobos_tests_extractor.d b/styles/tests_extractor.d
old mode 100755
new mode 100644
similarity index 67%
rename from phobos_tests_extractor.d
rename to styles/tests_extractor.d
index 6747302e1c..234cbbbeab
--- a/phobos_tests_extractor.d
+++ b/styles/tests_extractor.d
@@ -1,8 +1,3 @@
-#!/usr/bin/env dub
-/+ dub.sdl:
-name "check_phobos"
-dependency "libdparse" version="~>0.7.0-beta.2"
-+/
 /*
  * Parses all public unittests that are visible on dlang.org
  * (= annotated with three slashes)
@@ -23,9 +18,9 @@ import std.experimental.logger;
 import std.file;
 import std.path;
 import std.range;
-import std.regex;
 import std.stdio;
-import std.string;
+
+import utils;
 
 class TestVisitor : ASTVisitor
 {
@@ -33,25 +28,40 @@ class TestVisitor : ASTVisitor
     ubyte[] sourceCode;
     string moduleName;
 
-    this(string outFileName, string moduleName, ubyte[] sourceCode)
+    this(File outFile, ubyte[] sourceCode)
     {
-        this.outFile = File(outFileName, "w");
-        this.moduleName = moduleName;
+        this.outFile = outFile;
         this.sourceCode = sourceCode;
     }
 
     alias visit = ASTVisitor.visit;
 
-    override void visit(const Unittest u)
+    override void visit(const Module m)
     {
-        // scan the previous line for ddoc header
-        auto prevLine = sourceCode[0 .. u.location].retro;
-        prevLine.findSkip("\n"); // skip forward to the previous line
-        auto ddocCommentSlashes = prevLine.until('\n').count('/');
+        if (m.moduleDeclaration !is null)
+        {
+            moduleName = m.moduleDeclaration.moduleName.identifiers.map!(i => i.text).join(".");
+        }
+        else
+        {
+            // fallback: convert the file path to its module path, e.g. std/uni.d -> std.uni
+            moduleName = outFile.name.replace(".d", "").replace(dirSeparator, ".").replace(".package", "");
+        }
+        m.accept(this);
+    }
 
-        // only look for comments annotated with three slashes (///)
-        if (ddocCommentSlashes != 3)
-            return;
+    override void visit(const Declaration decl)
+    {
+        if (decl.unittest_ !is null)
+        {
+           if (hasUnittestDdocHeader(sourceCode, decl))
+                print(decl.unittest_);
+        }
+    }
+
+private:
+    void print(const Unittest u)
+    {
 
         // write the origin source code line
         outFile.writefln("// Line %d", u.line);
@@ -74,35 +84,29 @@ class TestVisitor : ASTVisitor
     }
 }
 
-void parseTests(string fileName, string moduleName, string outFileName)
+void parseFile(File inFile, File outFile)
 {
     import dparse.lexer;
-    import dparse.parser;
-    import dparse.rollback_allocator;
+    import dparse.parser : parseModule;
+    import dparse.rollback_allocator : RollbackAllocator;
     import std.array : uninitializedArray;
 
-    assert(exists(fileName));
-
-    File f = File(fileName);
-
-    if (f.size == 0)
-    {
-        warningf("%s is empty", fileName);
-        return;
-    }
+    if (inFile.size == 0)
+        warningf("%s is empty", inFile.name);
 
-    ubyte[] sourceCode = uninitializedArray!(ubyte[])(to!size_t(f.size));
-    f.rawRead(sourceCode);
+    ubyte[] sourceCode = uninitializedArray!(ubyte[])(to!size_t(inFile.size));
+    inFile.rawRead(sourceCode);
     LexerConfig config;
-    StringCache cache = StringCache(StringCache.defaultBucketCount);
+    auto cache = StringCache(StringCache.defaultBucketCount);
     auto tokens = getTokensForParser(sourceCode, config, &cache);
+
     RollbackAllocator rba;
-    Module m = parseModule(tokens.array, fileName, &rba);
-    auto visitor = new TestVisitor(outFileName, moduleName, sourceCode);
+    auto m = parseModule(tokens.array, inFile.name, &rba);
+    auto visitor = new TestVisitor(outFile, sourceCode);
     visitor.visit(m);
 }
 
-void parseFile(string inputDir, string fileName, string outputDir, string modulePrefix = "")
+void parseFileDir(string inputDir, string fileName, string outputDir)
 {
     import std.path : buildPath, dirSeparator, buildNormalizedPath;
 
@@ -116,20 +120,16 @@ void parseFile(string inputDir, string fileName, string outputDir, string module
             fileNameNormalized[0 .. dirSeparator.length] == dirSeparator)
         fileNameNormalized = fileNameNormalized[dirSeparator.length .. $];
 
-    // convert the file path to its module path, e.g. std/uni.d -> std.uni
-    string moduleName = modulePrefix ~ fileNameNormalized.replace(".d", "")
-                                                         .replace(dirSeparator, ".")
-                                                         .replace(".package", "");
-
     // convert the file path to a nice output file, e.g. std/uni.d -> std_uni.d
     string outName = fileNameNormalized.replace(dirSeparator, "_");
 
-    parseTests(fileName, moduleName, buildPath(outputDir, outName));
+    parseFile(File(fileName, "r"), File(buildPath(outputDir, outName), "w"));
 }
 
 void main(string[] args)
 {
     import std.getopt;
+    import std.variant : Algebraic, visit;
 
     string inputDir;
     string outputDir = "./out";
@@ -138,8 +138,7 @@ void main(string[] args)
 
     auto helpInfo = getopt(args, config.required,
             "inputdir|i", "Folder to start the recursive search for unittest blocks (can be a single file)", &inputDir,
-            "outputdir|o", "Folder to which the extracted test files should be saved", &outputDir,
-            "moduleprefix", "Module prefix to use for all files (e.g. std.algorithm)", &modulePrefix,
+            "outputdir|o", "Folder to which the extracted test files should be saved (stdout for a single file)", &outputDir,
             "ignore", "Comma-separated list of files to exclude (partial matching is supported)", &ignoredFilesStr);
 
     if (helpInfo.helpWanted)
@@ -153,7 +152,7 @@ to in the output directory.
     }
 
     inputDir = inputDir.asNormalizedPath.array;
-    outputDir = outputDir.asNormalizedPath.array;
+    Algebraic!(string, File) outputLocation = cast(string) outputDir.asNormalizedPath.array;
 
     if (!exists(outputDir))
         mkdir(outputDir);
@@ -168,6 +167,11 @@ to in the output directory.
     {
         files = [DirEntry(inputDir)];
         inputDir = ".";
+        // for single files use stdout by default
+        if (outputDir == "./out")
+        {
+            outputLocation = stdout;
+        }
     }
     else
     {
@@ -181,12 +185,15 @@ to in the output directory.
     {
         if (!ignoringFiles.any!(x => file.name.canFind(x)))
         {
-            writeln("parsing ", file);
-            parseFile(inputDir, file, outputDir, modulePrefix);
+            stderr.writeln("parsing ", file);
+            outputLocation.visit!(
+                (string outputFolder) => parseFileDir(inputDir, file, outputFolder),
+                (File outputFile) => parseFile(File(file.name, "r"), outputFile),
+            );
         }
         else
         {
-            writeln("ignoring ", file);
+            stderr.writeln("ignoring ", file);
         }
     }
 }
diff --git a/styles/utils.d b/styles/utils.d
new file mode 100644
index 0000000000..6d97a6ccaf
--- /dev/null
+++ b/styles/utils.d
@@ -0,0 +1,109 @@
+/*
+ * Shared methods between style checkers
+ *
+ * Copyright (C) 2016 by D Language Foundation
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ *    (See accompanying file LICENSE_1_0.txt or copy at
+ *          http://www.boost.org/LICENSE_1_0.txt)
+*/
+// Written in the D programming language.
+
+import dparse.ast;
+import std.algorithm;
+import std.conv : to;
+import std.experimental.logger;
+import std.range;
+import std.stdio : File;
+
+bool hasUnittestDdocHeader(ubyte[] sourceCode, const Declaration decl)
+{
+    import std.algorithm.comparison : min;
+    import std.ascii : whitespace;
+    import std.string : indexOf;
+
+    const Unittest u = decl.unittest_;
+    size_t firstPos = getAttributesStartLocation(sourceCode, decl.attributes, u.location);
+
+    // scan the previous line for ddoc header -> skip to last real character
+    auto prevLine = sourceCode[0 .. firstPos].retro.find!(c => whitespace.countUntil(c) < 0);
+
+    auto ddocCommentSlashes = prevLine.until('\n').count('/');
+
+    // only look for comments annotated with three slashes (///)
+    if (ddocCommentSlashes == 3)
+        return true;
+
+    if (u.comment !is null)
+    {
+        // detect other common comment forms - be careful: reverse form
+        // to be public it must start with either /** or /++
+        auto lastTwoSymbols = prevLine.take(2);
+        if (lastTwoSymbols.equal("/*"))
+            return isDdocCommentLexer!'*'(prevLine.drop(2));
+        if (prevLine.take(2).equal("/+"))
+            return isDdocCommentLexer!'+'(prevLine.drop(2));
+    }
+	return false;
+}
+
+private auto isDdocCommentLexer(char symbol, Range)(Range r)
+{
+    size_t symbolSeen;
+    foreach (s; r)
+    {
+        switch (s)
+        {
+            case symbol:
+                symbolSeen++;
+                break;
+            case '/':
+                if (symbolSeen > 0)
+                    return symbolSeen > 1;
+                break;
+            default:
+                symbolSeen = 0;
+        }
+    }
+    warning("invalid comment structure detected");
+    return false;
+}
+
+size_t getAttributesStartLocation(ubyte[] sourceCode, const Attribute[] attrs, size_t firstPos)
+{
+	import dparse.lexer : tok;
+	if (attrs.length == 0)
+	    return firstPos;
+
+    // shortcut if atAttribute is the first attribute
+    if (attrs[0].atAttribute !is null)
+        return min(firstPos, attrs[0].atAttribute.startLocation);
+
+    foreach_reverse (attr; attrs)
+    {
+        if (attr.atAttribute !is null)
+            firstPos = min(firstPos, attr.atAttribute.startLocation);
+
+        // if an attribute is defined we can safely jump over it
+        if (attr.attribute.type != tok!"")
+        {
+            auto str = tokenRep(attr.attribute);
+            auto whitespaceLength = sourceCode[0 .. firstPos].retro.countUntil(str.retro);
+            firstPos -= str.length + whitespaceLength;
+        }
+    }
+    return firstPos;
+}
+
+// from dparse.formatter
+import dparse.lexer : str, Token, IdType;
+
+string tokenRep(Token t)
+{
+    return t.text.length ? t.text : tokenRep(t.type);
+}
+
+string tokenRep(IdType t)
+{
+    return t ? str(t) : "";
+}