From 811c9150cc69b38706eb378670005fda10b8cacf Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 08:06:41 +0000 Subject: [PATCH] Deployed 5e3e865 with MkDocs version: 1.5.3 --- 404.html | 23 + ADRs/0001-adrs/index.html | 23 + ADRs/0002-code-platform/index.html | 23 + ADRs/0003-ci-cd/index.html | 23 + .../0004-software-hosting-platform/index.html | 23 + ADRs/0005-python-tooling/index.html | 23 + ADRs/0006-agile-tooling/index.html | 23 + ADRs/0007-commit-convention/index.html | 23 + .../index.html | 23 + ADRs/0010-container-registry/index.html | 23 + ADRs/0011-researcher-in-residence/index.html | 23 + About/contact/index.html | 23 + About/team/index.html | 23 + Projects/TAD/index.html | 25 +- Projects/TAD/reporting_standard/index.html | 2132 +++++++++++++++++ Projects/TAD/reporting_standards/index.html | 25 +- Projects/TAD/tools/index.html | 23 + Way-of-Working/Code-Reviews/index.html | 23 + Way-of-Working/Contributing/index.html | 23 + Way-of-Working/Onboarding/Accounts/index.html | 23 + .../Onboarding/Dev-machine/index.html | 23 + Way-of-Working/Onboarding/index.html | 23 + Way-of-Working/Principles/index.html | 23 + Way-of-Working/UbiquitousLanguage/index.html | 23 + index.html | 23 + search/search_index.json | 2 +- sitemap.xml.gz | Bin 127 -> 127 bytes 27 files changed, 2687 insertions(+), 3 deletions(-) create mode 100644 Projects/TAD/reporting_standard/index.html diff --git a/404.html b/404.html index 16e177b3..02a01293 100644 --- a/404.html +++ b/404.html @@ -785,6 +785,8 @@ + + @@ -832,6 +834,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/ADRs/0001-adrs/index.html b/ADRs/0001-adrs/index.html index 02f3388c..fd9ef838 100644 --- a/ADRs/0001-adrs/index.html +++ b/ADRs/0001-adrs/index.html @@ -886,6 +886,8 @@ + + @@ -933,6 +935,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/ADRs/0002-code-platform/index.html b/ADRs/0002-code-platform/index.html index 4ef833c1..7d1f2cd7 100644 --- a/ADRs/0002-code-platform/index.html +++ b/ADRs/0002-code-platform/index.html @@ -904,6 +904,8 @@ + + @@ -951,6 +953,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/ADRs/0003-ci-cd/index.html b/ADRs/0003-ci-cd/index.html index 903789db..d737c947 100644 --- a/ADRs/0003-ci-cd/index.html +++ b/ADRs/0003-ci-cd/index.html @@ -877,6 +877,8 @@ + + @@ -924,6 +926,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/ADRs/0004-software-hosting-platform/index.html b/ADRs/0004-software-hosting-platform/index.html index 9aca74b9..bec20f3e 100644 --- a/ADRs/0004-software-hosting-platform/index.html +++ b/ADRs/0004-software-hosting-platform/index.html @@ -886,6 +886,8 @@ + + @@ -933,6 +935,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/ADRs/0005-python-tooling/index.html b/ADRs/0005-python-tooling/index.html index c0e9c719..3936ff36 100644 --- a/ADRs/0005-python-tooling/index.html +++ b/ADRs/0005-python-tooling/index.html @@ -877,6 +877,8 @@ + + @@ -924,6 +926,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/ADRs/0006-agile-tooling/index.html b/ADRs/0006-agile-tooling/index.html index 07be87e0..86db4b3a 100644 --- a/ADRs/0006-agile-tooling/index.html +++ b/ADRs/0006-agile-tooling/index.html @@ -877,6 +877,8 @@ + + @@ -924,6 +926,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/ADRs/0007-commit-convention/index.html b/ADRs/0007-commit-convention/index.html index b1cdd7b2..e89e0ab3 100644 --- a/ADRs/0007-commit-convention/index.html +++ b/ADRs/0007-commit-convention/index.html @@ -877,6 +877,8 @@ + + @@ -924,6 +926,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/ADRs/0008-architectural-diagram-tooling/index.html b/ADRs/0008-architectural-diagram-tooling/index.html index d400cb04..f113aa82 100644 --- a/ADRs/0008-architectural-diagram-tooling/index.html +++ b/ADRs/0008-architectural-diagram-tooling/index.html @@ -877,6 +877,8 @@ + + @@ -924,6 +926,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/ADRs/0010-container-registry/index.html b/ADRs/0010-container-registry/index.html index 86585b45..a03a42e4 100644 --- a/ADRs/0010-container-registry/index.html +++ b/ADRs/0010-container-registry/index.html @@ -904,6 +904,8 @@ + + @@ -951,6 +953,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/ADRs/0011-researcher-in-residence/index.html b/ADRs/0011-researcher-in-residence/index.html index 046a841a..aa672ef3 100644 --- a/ADRs/0011-researcher-in-residence/index.html +++ b/ADRs/0011-researcher-in-residence/index.html @@ -895,6 +895,8 @@ + + @@ -942,6 +944,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/About/contact/index.html b/About/contact/index.html index 3ddb278c..40660d8b 100644 --- a/About/contact/index.html +++ b/About/contact/index.html @@ -820,6 +820,8 @@ + + @@ -867,6 +869,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/About/team/index.html b/About/team/index.html index e4f4f749..b5bc364d 100644 --- a/About/team/index.html +++ b/About/team/index.html @@ -922,6 +922,8 @@ + + @@ -969,6 +971,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/Projects/TAD/index.html b/Projects/TAD/index.html index 79baf392..5962a857 100644 --- a/Projects/TAD/index.html +++ b/Projects/TAD/index.html @@ -14,7 +14,7 @@ - + @@ -812,6 +812,8 @@ + + @@ -861,6 +863,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/Projects/TAD/reporting_standard/index.html b/Projects/TAD/reporting_standard/index.html new file mode 100644 index 00000000..6675f492 --- /dev/null +++ b/Projects/TAD/reporting_standard/index.html @@ -0,0 +1,2132 @@ + + + + + + + + + + + + + + + + + + + + + + + + + TAD Reporting Standard - AI Validation Team + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + + + +
    + + + + + + + +
    + +
    + + + + +
    +
    + + + +
    +
    +
    + + + + + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    + + + + + + + + + + + + +

    TAD Reporting Standard

    +

    Version: 0.1a1

    +

    This document describes the Transparency of Algorithmic Decision making (TAD) Reporting Standard.

    +

    For reproducibility, governance, auditing and sharing of algorithmic systems it is essential to have a +reporting standard so that information about an algorithmic system can be shared. This reporting standard +describes how information about the different phases of an algorithm's life cycle can be reported. +It contains, among other things, descriptive information combined with information about the technical +tests and assessments applied.

    +
    +

    Disclaimer

    +

    The TAD Reporting Standard is work in progress. This means that the current standard is probably suboptimal +and will change significantly in future versions.

    +
    +

    Introduction

    +

    Inspired by Model Cards for Model Reporting +and Papers with Code Model Index this standard almost +1 2 3 4 extends the Hugging Face model card metadata specification +to allow for:

    +
      +
    1. More finegrained information on performance metrics, by extending the metrics_field from the Hugging +Face metadata specification.
    2. +
    3. Capturing additional measurements on fairness and bias, which can be partitioned into bar plot like +measurements (such as mean absolute SHAP values) and graph plot like measurements (such as partial dependence). This is achieved +by defining an new field measurements.
    4. +
    5. Capturing assessments (such as IAMA +and ALTAI). +This is achieved by defining a new field assessments.
    6. +
    +

    Following Hugging Face, this proposed standard will be written in yaml.

    +

    This standard does not contain all fields present in the Hugging Face metadata specification. The fields that +are optional in the HuggingFace specification and are specific to the HugginFace interface are ommited.

    +

    Another difference is that we devide our implementation into three seperate parts.

    +
      +
    1. system_card, containing information about a group of ML-models which accomplish a specific task.
    2. +
    3. model_card, containing information about a specific data science model.
    4. +
    5. assessment_card, containing information about a regulatory assessment.
    6. +
    +
    +

    Include statements

    +

    These model_cards and assessment_cards can be included verbatim into a system_card, +or referenced with an !include statement, allowing for minimal cards to be compact in a single +file. Extensive cards can be split up for readability and maintainability. Our standard allows for +the !include to be used anywhere.

    +
    +

    Specification of the standard

    +

    The standard will be written in yaml. Example yaml files are given in the next section. The standard defines +three cards: a system_card, a model_card and an assessment_card. A system_card contains information +about an algorithmic system. It can have mutiple models and each of these models should have a model_card. +Regulatory assessments can be processed in an assessment_card. Note that model_card's and +assessment_card's can be included directly into the system_card or can be included as seperate yaml +files with help of a yaml-include mechanism. For clarity the latter is preffered and is also used in +the examples in the next section.

    +

    system_card

    +

    A system_card contains the following information.

    +
      +
    1. schema_version (REQUIRED, string). Version of the schema used, for example "0.1a1".
    2. +
    3. name (OPTIONAL, string). Name used to describe the system.
    4. +
    5. upl (OPTIONAL, string). If this algorithm is part of a product offered by the Dutch Government, + it should contain a URI from the Uniform Product List.
    6. +
    7. +

      owners (list). There can be multiple owners. For each owner the following fields are present.

      +
        +
      1. oin (OPTIONAL, string). If applicable the Organisatie-identificatienummer (OIN).
      2. +
      3. organization (OPTIONAL, string). Name of the organization that owns the model. If ion is +NOT provided this field is REQUIRED.
      4. +
      5. name (OPTIONAL, string). Name of a contact person within the organisation.
      6. +
      7. email (OPTIONAL, string). Email address of the contact person or organization.
      8. +
      9. role (OPTIONAL, string). Role of the contact person. This field should only be set when the name field +is set.
      10. +
      +
    8. +
    9. +

      description (OPTIONAL, string). A short description of the system.

      +
    10. +
    11. +

      labels (OPTIONAL, list). This fields allows to store meta information about a system. There +can be multiple labels. For each label the following fields are present.

      +
        +
      1. name (OPTIONAL, string). Name of the label.
      2. +
      3. value (OPTIONAL, string). Value of the label.
      4. +
      +
    12. +
    13. +

      status (OPTIONAL, string). The status of the system. For example the status can be "production".

      +
    14. +
    15. publication_category (OPTIONAL, enum[string]). The publication category of the algorithm should +be chosen from ["high_risk", other"].
    16. +
    17. begin_date (OPTIONAL, string). The first date the system was used. +Date should be given in ISO 8601 format, i.e. YYYY-MM-DD.
    18. +
    19. end_date (OPTIONAL, string). The last date the system was used. +Date should be given in ISO 8601 format, i.e. YYYY-MM-DD.
    20. +
    21. goal_and_impact (OPTIONAL, string). The purpose of the system and the impact it has on citizens +and companies.
    22. +
    23. considerations (OPTIONAL, string). The pro's and con's of using the system.
    24. +
    25. risk_management (OPTIONAL, string). Description of the risks associated with the system.
    26. +
    27. human_intervention (OPTIONAL, string). A description to want extend there is human involvement +in the system.
    28. +
    29. legal_base (OPTIONAL, list). If there exists a legal base for the process the system is embedded +in, this field can be filled in with the relevant laws. There can be multiple legal bases. For each +legal base the following fields are present.
        +
      1. name (OPTIONAL, string). Name of the law.
      2. +
      3. link (OPTIONAL, string). URI pointing towards the contents of the law.
      4. +
      +
    30. +
    31. used_data (OPTIONAL, string). An overview of the data that is used in the system.
    32. +
    33. technical_design (OPTIONAL, string). Description on how the system works.
    34. +
    35. external_providers (OPTIONAL, list[string]). Name of an external provider, if relevant. There can +be multiple external providers.
    36. +
    37. references (OPTIONAL, list[string]). Additional reference URI's that point information about the system +and are relevant.
    38. +
    +

    1. Models

    +
      +
    1. models (OPTIONAL, list[ModelCard]). A list of model cards (as defined below) or !includes of a yaml +file containing a model card. This model card can for example be a model card described in the next section +or a model card from Hugging Face. There can be multiple model cards, meaning multiple models are used.
    2. +
    +

    2. Assessments

    +
      +
    1. assessments (OPTIONAL, list[AssesmentCard]). A list of assessment cards (as defined below) or !includes of a yaml +file containing a assessment card. This assessment card is an assessment card described in the next section. +There can be multiple assessment cards, meaning multiple assessment were performed.
    2. +
    +

    model_card

    +

    A model_card contains the following information.

    +
      +
    1. language (OPTIONAL, list[string]). If relevant, the natural languages the model supports in ISO 639. + There can be multiple languages.
    2. +
    3. +

      license(REQUIRED, string). Any license from the open source license list +1. If the license is NOT present in the license list this field must be set to 'other' and the following +two fields will be REQUIRED.

      +
        +
      1. license_name (string). An id for the license.
      2. +
      3. license_link (string). A link to a file of that name inside the repo, or a URL to a remote file containing the license +contents.
      4. +
      +
    4. +
    5. +

      tags (OPTIONAL, list[string]). Tags with keywords to describe the project. There can be multiple tags.

      +
    6. +
    7. +

      owners (list). There can be multiple owners. For each owner the following fields are present.

      +
        +
      1. oin (OPTIONAL, string). If applicable the Organisatie-identificatienummer (OIN).
      2. +
      3. organization (OPTIONAL, string). Name of the organization that owns the model. If ion is +NOT provided this field is REQUIRED.
      4. +
      5. name (OPTIONAL, string). Name of a contact person within the organisation.
      6. +
      7. email (OPTIONAL, string). Email address of the contact person or organization.
      8. +
      9. role (OPTIONAL, string). Role of the contact person. This field should only be set when the name field +is set.
      10. +
      +
    8. +
    +

    1. Model Index

    +

    There can be multiple models. For each model the following fields are present.

    +
      +
    1. name (REQUIRED, string). The name of the model.
    2. +
    3. model (REQUIRED, string). A URI pointing to a repository containing the model file.
    4. +
    5. artifacts (OPTIONAL, list[string]). A list of URI's where each URI refers to a relevant model +artifact, that cannot be captured by any other field, but are relevant to model.
    6. +
    7. +

      parameters (list). There can be multiple parameters. For each parameter the following fields are present.

      +
        +
      1. name (REQUIRED, string). The name of the parameter, for example "epochs".
      2. +
      3. dtype (OPTIONAL, string). The datatype of the parameter, for example "int".
      4. +
      5. value (OPTIONAL, string). The value of the parameter, for example 100.
      6. +
      7. +

        labels (list). This field allows to store meta information about a parameter. + There can be multiple labels. For each label the following fields are present.

        +
          +
        1. name (OPTIONAL, string). The name of the label.
        2. +
        3. dtype (OPTIONAL, string). The datatype of the feature. If name is set, this field +is REQUIRED.
        4. +
        5. value (OPTIONAL, string). The value of the feature. If name is set, this field is REQUIRED.
        6. +
        +
      8. +
      +
    8. +
    9. +

      results (list). There can be multiple results. For each result the following fields are present.

      +
        +
      1. +

        task (OPTIONAL, list).

        +
          +
        1. task_type (REQUIRED, string). The task of the model, for example "object-classifcation".
        2. +
        3. task_name (OPTIONAL, string). A pretty name fo the model taks, for example "Object Classification".
        4. +
        +
      2. +
      3. +

        datasets (list). There can be multiple datasets 2. For each dataset the following fields are present.

        +
          +
        1. type (REQUIRED, string). The type of the dataset, can be a dataset id from HuggingFace datasets +or any other link to a repository containing the dataset3, for example "common_voice".
        2. +
        3. name (REQUIRED, string). Name pretty name for the dataset, for example "Common Voice (French)".
        4. +
        5. split (OPTIONAL, string). The split of the dataset, for example "train".
        6. +
        7. features (OPTIONAL, list[string]). List of feature names.
        8. +
        9. revision (OPTIONAL, string). Version of the dataset, for example 5503434ddd753f426f4b38109466949a1217c2bb.
        10. +
        +
      4. +
      5. +

        metrics (list). There can be multiple metrics. For each metric the following fields are present.

        +
          +
        1. type (REQUIRED, string). A metric-id from Hugging Face metrics4, for example accuracy.
        2. +
        3. name (REQUIRED, string). A descriptive name of the metric. For example "false positive rate" is +not a descriptive name, but "training false positive rate w.r.t class x" is.
        4. +
        5. dtype (REQUIRED, string). The data type of the metric, for example float.
        6. +
        7. value (REQUIRED, string). The value of the metric.
        8. +
        9. +

          labels (list). This field allows to store meta information about a metric. For example, +metrics can be computed for example on subgroups of specific features. +For example, one can compute the accuracy for examples where the feature "gender" is set to "male". +There can be multiple subgroups, which means that the metric is computed on the intersection of those subgroups. +There can be multiple labels. For each label the following fields are present.

          +
            +
          1. name (OPTIONAL, string). The name of the feature. For example: "gender".
          2. +
          3. type (OPTIONAL, string). The type of the label. Can for example be set to "feature" or "output_class". +If name is set, this field is REQUIRED.
          4. +
          5. dtype (OPTIONAL, string). The datatype of the feature, for example float. If name is set, this field +is REQUIRED.
          6. +
          7. value (OPTIONAL, string). The value of the feature. If name is set, this field is REQUIRED. +For example: "male".
          8. +
          +
        10. +
        +
      6. +
      7. +

        measurements.

        +
          +
        1. +

          bar_plots (list). The purpose of this field is to capute bar plot like measurements, for example SHAP values. +There can be multiple bar plots. For each bar plot the following fields are present.

          +
            +
          1. type (REQUIRED, string). The type of bar plot, for example "SHAP".
          2. +
          3. name (OPTIONAL, string). A pretty name for the plot, for example "Mean Absolute SHAP Values".
          4. +
          5. results (list). The contents of the bar plot. A result represents a bar. There can be mutiple results. +For each result the following fields are present.
              +
            1. name (REQUIRED, string). The name of bar.
            2. +
            3. value (REQUIRED, float). The value of the corresponding bar.
            4. +
            +
          6. +
          +
        2. +
        3. +

          graph_plots (list). The purpose of this field is to capture graph plot like measurements, such as partial dependence +plots. +There can be multiple graph plots. For each graph plot the following fields are present.

          +
            +
          1. type (REQUIRED, string). The type of the graph plot, for example "partial_dependence".
          2. +
          3. name (OPTONAL, string). A pretty name of the graph, for example "Partial Dependence Plot".
          4. +
          5. results (list). Results contains the graph plot data. Each graph can depend on a specific output +class and feature. There can be multiple results. For each result the following fields are present.
              +
            1. class (OPTIONAL, string/int/float/bool). The output class name that the graph corresponds to. +This field is not always present.
            2. +
            3. feature (REQUIRED, string). The feature the graph corresponds to. This is required, since all +relevant graphs are dependend on features.
            4. +
            5. data (list)
                +
              1. x_value (REQUIRED, float). The $x$-value of the graph.
              2. +
              3. y_value (REQUIRED, float). The $y$-value of the graph.
              4. +
              +
            6. +
            +
          6. +
          +
        4. +
        +
      8. +
      +
    10. +
    +

    assessment_card

    +

    An assessment_card contains the following information.

    +
      +
    1. name (REQUIRED, string). The name of the assessment.
    2. +
    3. date (REQUIRED, string). The date at which the assessment is completed. +Date should be given in ISO 8601 format, i.e. YYYY-MM-DD.
    4. +
    5. +

      contents (list). There can be multiple items in contents. For each item the following fields are present:

      +
        +
      1. question (REQUIRED, string). A question.
      2. +
      3. answer (REQUIRED, string). An answer.
      4. +
      5. remarks (OPTIONAL, string). A field to put relevant discussion remarks in.
      6. +
      7. authors. There can be multiple names. For each name the following field is present.
          +
        1. name (OPTIONAL, string). The name of the author of the question.
        2. +
        +
      8. +
      9. timestamp (OPTIONAL, string). A timestamp of the date and time of the answer.
      10. +
      +
    6. +
    +

    Example

    +

    System Card

    +
    version: {system_card_version}                          # Optional. Example: "0.1a1"
    +name: {system_name}                                     # Optional. Example: "AangifteVertrekBuitenland"
    +upl: {upl_uri}                                          # Optional. Example: https://standaarden.overheid.nl/owms/terms/AangifteVertrekBuitenland
    +owners:
    +- oin: {oin}                                            # Optional. Example: 00000001003214345000
    +  organization: {organization_name}                     # Optional if oin is provided, Required otherwise. Example: BZK
    +  name: {owner_name}                                    # Optional. Example: John Doe
    +  email: {owner_email}                                  # Optional. Example: johndoe@email.com
    +  role: {owner_role}                                    # Optional. Example: Data Scientist.
    +description: {system_description}                       # Optional. Short description of the system.
    +labels:                                                 # Optional labels to store metadata about the system.
    +- name: {label_name}                                    # Optional.
    +  value: {label_value}                                  # Optional.
    +status: {system_status}                                 # Optional. Example "production".
    +publication_category: {system_publication_cat}          # Optional. Example: "impactfull_algorithm".
    +begin_date: {system_begin_date}                         # Optional. Example: 2025-1-1.
    +end_date: {system_end_date}                             # Optional. Example: 2025-12-1.
    +goal_and_impact: {system_goal_and_impact}               # Optional. Goal and impact of the system.
    +considerations: {system_considerations}                 # Optional. Considerations about the system.
    +risk_management: {system_risk_management}               # Optional. Description of risks associated with the system.
    +human_intervention: {system_human_intervention}         # Optional. Description of uman involvement in the system.
    +legal_base:
    +- name: {law_name}                                      # Optional. Example: "AVG".
    +  link: {law_uri}                                       # Optional. Example: "https://eur-lex.europa.eu/legal-content/NL/TXT/HTML/?uri=CELEX:31995L0046".
    +used_data: {system_used_data}                           # Optional. Description of the data used by the system.
    +technical_design: {technical_design}                    # Optional. Description of the technical design of the system.
    +external_providers:
    +- {system_external_provider}                            # Optional. Reference to used external providers.
    +references:
    +- {reference_uri}                                       # Optional. Example: URI to codebase.
    +
    +models:
    + - !include {model_card_uri}                            # Optional. Example: cat_classifier_model.yaml.
    +
    +assessments:
    +- !include {assessment_card_uri}                        # Required. Example: iama.yaml.
    +
    +

    Model Card

    +
    language:
    +  - {lang_0}                                            # Optional. Example nl.
    +license: {licence}                                      # Required. Example: Apache-2.0 or any license SPDX ID from https://opensource.org/license or "other".
    +license_name: {licence_name}                            # Optional if license != other, Required otherwise. Example: 'my-license-1.0'
    +license_link: {license_link}                            # Optional if license != other, Required otherwise. Specify "LICENSE" or "LICENSE.md" to link to a file of that name inside the repo, or a URL to a remote file.
    +tags:
    +- {tag_0}                                               # Optional. Example: audio
    +- {tag_1}                                               # Optional. Example: automatic-speech-recognition
    +owners:
    +- organization: {organization_name}                     # Required. Example: BZK
    +  oin: {oin}                                            # Optional. Example: 00000001003214345000
    +  name: {owner_name}                                    # Optional. Example: John Doe
    +  email: {owner_email}                                  # Optional. Example: johndoe@email.com
    +  role: {owner_role}                                    # Optional. Example: Data Scientist.
    +
    +model-index:
    +- name: {model_id}                                      # Required. Example: CatClassifier.
    +  model: {model_uri}                                    # Required. URI to a repository containing the model file.
    +  artifacs:
    +  - {model_artifact}                                    # Optional. URI to relevant model artifacts, if applicable.
    +  parameters:
    +  - name: {parameter_name}                              # Optional. Example: "epochs".
    +    dtype: {parameter_dtype}                            # Optional. Example "int".
    +    value: {parameter_value}                            # Optional. Example 100.
    +    labels:
    +      - name: {label_name}                              # Optional. Example: "gender".
    +        dtype: {label_type}                             # Optional. Example: "string".
    +        value: {label_value}                            # Optional. Example: "female".
    +  results:
    +  - task:
    +      type: {task_type}                                 # Required. Example: image-classification.
    +      name: {task_name}                                 # Optional. Example: Image Classification.
    +    datasets:
    +      - type: {dataset_type}                            # Required. Example: common_voice. Link to a repository containing the dataset
    +        name: {dataset_name}                            # Required. Example: "Common Voice (French)". A pretty name for the dataset.
    +        split: {split}                                  # Optional. Example "train".
    +        features:
    +         - {feature_name}                               # Optional. Example: "gender".
    +        revision: {dataset_version}                     # Optional. Example: 5503434ddd753f426f4b38109466949a1217c2bb
    +    metrics:
    +    - type: {metric_type}                               # Required. Example: false-positive-rate. Use metric id from https://hf.co/metrics.
    +      name: {metric_name}                               # Required. Example: "FPR wrt class 0 restricted to feature gender:0 and age:21".
    +      dtype: {metric_dtype}                             # Required. Example: "float".
    +      value: {metric_value}                             # Required. Example: 0.75.
    +      labels:
    +        - name: {label_name}                            # Optional. Example: "gender".
    +          type: {label_type}                            # Optional. Exmple "feature".
    +          dtype: {label_type}                           # Optional. Example: "string".
    +          value: {label_value}                          # Optional. Example: "female".
    +    measurements:
    +      # Bar plots should be able to capture SHAP and Robustness Toolbox from AI Verify.
    +      bar_plots:
    +      - type: {measurement_type}                        # Required. Example: "SHAP".
    +        name: {measurement_name}                        # Optional. Example: "Mean Absolute Shap Values".
    +        results:
    +        - name: {bar_name}                              # Required. The name of a bar.
    +          value: {bar_value}                            # Required. The corresponding value.
    +      # Graph plots should be able to capture graph based measurements such as partial dependence and accumalated local effect.
    +      graph_plots:
    +      - type: {measurement_type}                        # Required. Example: "partial_dependence".
    +        name: {measurement_name}                        # Optional. Example: "Partial Dependence Plot".
    +        # Results store the graph plot data. So far all plots are depenendend on a combination of a specific class (sometimes) and feature (always).
    +        # For example partial dependence plots are made for each feature and class.
    +        results:
    +         - class: {class_name}                          # Optional. Name of the output class the graph depends on.
    +           feature: {feature_name}                      # Required. Name of the feature the graph depends on.
    +           data:
    +            - x_value: {x_value}                        # Required. The x value of the graph data.
    +              y_value: {y_value}                        # Required. The y value of the graph data.
    +
    +

    Assessment Card

    +
    name: {assessment_name}                               # Required. Example: IAMA.
    +date: {assessment_date}                               # Required. Example: 25-03-2025.
    +contents:
    +  - question: {question_text}                         # Required. Example: "Question 1: ...".
    +    answer: {answer_text}                             # Required. Example: "Answer: ...".
    +    remarks: {remarks_text}                           # Optional. Example: "Remarks: ...".
    +    authors:                                          # Optional. Example: "['John', 'Peter']".
    +      - name: {author_name}
    +    timestamp: {timestamp}                            # Optional. Example: 1711630721.
    +
    +

    Schema

    +

    JSON schema will be added when we publish the first beta version.

    +
    +
    +
      +
    1. +

      Deviation from the HuggingFace specification is in the Licence field. HuggingFace only accepts +dataset id's from HuggingFace license list while we +accept any license from Open Source License List

      +
    2. +
    3. +

      Deviation from the HuggingFace specification is in the model_index:results:dataset field. HuggingFace only accepts +one dataset, while we accept a list of datasets. 

      +
    4. +
    5. +

      Deviation from the HuggingFace specification is in the Dataset Type field. HuggingFace only accepts +dataset id's from HuggingFace datasets while we also allow for any url pointing to the dataset. 

      +
    6. +
    7. +

      For this extension to work relevent metrics (such as for example false positive rate) have to be added to the +Hugging Face metrics, possibly this can be done in our organizational namespace. 

      +
    8. +
    +
    + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + + + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + \ No newline at end of file diff --git a/Projects/TAD/reporting_standards/index.html b/Projects/TAD/reporting_standards/index.html index 61546bf6..70d658b7 100644 --- a/Projects/TAD/reporting_standards/index.html +++ b/Projects/TAD/reporting_standards/index.html @@ -11,7 +11,7 @@ - + @@ -812,6 +812,8 @@ + + @@ -859,6 +861,27 @@ + + +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + diff --git a/Projects/TAD/tools/index.html b/Projects/TAD/tools/index.html index c0e678dc..b127074e 100644 --- a/Projects/TAD/tools/index.html +++ b/Projects/TAD/tools/index.html @@ -812,6 +812,8 @@ + + @@ -861,6 +863,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/Way-of-Working/Code-Reviews/index.html b/Way-of-Working/Code-Reviews/index.html index e8ca6258..8b5494c0 100644 --- a/Way-of-Working/Code-Reviews/index.html +++ b/Way-of-Working/Code-Reviews/index.html @@ -805,6 +805,8 @@ + + @@ -852,6 +854,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/Way-of-Working/Contributing/index.html b/Way-of-Working/Contributing/index.html index 3b9a6830..bf481941 100644 --- a/Way-of-Working/Contributing/index.html +++ b/Way-of-Working/Contributing/index.html @@ -805,6 +805,8 @@ + + @@ -852,6 +854,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/Way-of-Working/Onboarding/Accounts/index.html b/Way-of-Working/Onboarding/Accounts/index.html index 07470ec2..83a3bde3 100644 --- a/Way-of-Working/Onboarding/Accounts/index.html +++ b/Way-of-Working/Onboarding/Accounts/index.html @@ -805,6 +805,8 @@ + + @@ -852,6 +854,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/Way-of-Working/Onboarding/Dev-machine/index.html b/Way-of-Working/Onboarding/Dev-machine/index.html index 4147e68f..d92db5ec 100644 --- a/Way-of-Working/Onboarding/Dev-machine/index.html +++ b/Way-of-Working/Onboarding/Dev-machine/index.html @@ -803,6 +803,8 @@ + + @@ -850,6 +852,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/Way-of-Working/Onboarding/index.html b/Way-of-Working/Onboarding/index.html index 37e5b0b8..41c78fc3 100644 --- a/Way-of-Working/Onboarding/index.html +++ b/Way-of-Working/Onboarding/index.html @@ -805,6 +805,8 @@ + + @@ -852,6 +854,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/Way-of-Working/Principles/index.html b/Way-of-Working/Principles/index.html index e3e18f44..bd0722be 100644 --- a/Way-of-Working/Principles/index.html +++ b/Way-of-Working/Principles/index.html @@ -805,6 +805,8 @@ + + @@ -852,6 +854,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/Way-of-Working/UbiquitousLanguage/index.html b/Way-of-Working/UbiquitousLanguage/index.html index ac2b3943..3466e446 100644 --- a/Way-of-Working/UbiquitousLanguage/index.html +++ b/Way-of-Working/UbiquitousLanguage/index.html @@ -805,6 +805,8 @@ + + @@ -852,6 +854,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/index.html b/index.html index 43bfb27b..542c5ecc 100644 --- a/index.html +++ b/index.html @@ -863,6 +863,8 @@ + + @@ -910,6 +912,27 @@ +
  • + + + + + TAD Reporting Standard + + + + +
  • + + + + + + + + + +
  • diff --git a/search/search_index.json b/search/search_index.json index 022c05b9..02ec669a 100644 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Home","text":"

    Here we are documenting the processes and work of the AI Validation Team at the Ministry of the Interior and Kingdom Relations in The Netherlands.

    We are a team of mostly engineers at a policy department.

    "},{"location":"#contribute","title":"Contribute","text":"

    Read our guide on how to contribute.

    "},{"location":"#contact","title":"Contact","text":"

    Our contact details are here.

    "},{"location":"ADRs/0001-adrs/","title":"ADR-0001 ADRs","text":""},{"location":"ADRs/0001-adrs/#context","title":"Context","text":"

    In modern software development practices, the use of Architecture Decision Records (ADRs) has become increasingly common. ADRs are documents that capture important architectural decisions made during the development process. These decisions play a crucial role in guiding the development team and ensuring consistency and coherence in the architecture of the software system.

    "},{"location":"ADRs/0001-adrs/#assumptions","title":"Assumptions","text":"
    1. ADRs provide a structured way to document and communicate architectural decisions.
    2. Publishing ADRs publicly fosters transparency and facilitates collaboration among team members and stakeholders.
    3. ADRs help in onboarding new team members by providing insights into past decisions and their rationale.
    "},{"location":"ADRs/0001-adrs/#decision","title":"Decision","text":"

    We will utilize ADRs in our team to document and communicate architectural decisions effectively. Furthermore, we will publish these ADRs publicly to promote transparency and facilitate collaboration.

    "},{"location":"ADRs/0001-adrs/#template","title":"Template","text":"

    Use the template below to add an ADR:

    # ADR-XXXX Title\n\n## Context\n\nWhat is the issue that we're seeing that is motivating this decision or change?\n\n## Assumptions\n\nAnything that could cause problems if untrue now or later. (optional)\n\n## Decision\n\nWhat is the change that we're proposing and/or doing?\n\n## Risks\n\nAnything that could cause malfunction, delay, or other negative impacts. (optional)\n\n## Consequences\n\nWhat becomes easier or more difficult to do because of this change?\n\n## More Information\n\nProvide additional evidence/confidence for the decision outcome\nLinks to other decisions and resources might here appear as well. (optional)\n
    "},{"location":"ADRs/0002-code-platform/","title":"ADR-0002 Code Platform","text":""},{"location":"ADRs/0002-code-platform/#context","title":"Context","text":"

    In the landscape of software development, the choice of coding platform significantly impacts developer productivity, collaboration, and code quality. it's crucial to evaluate and select a coding platform that aligns with our development needs and fosters efficient workflows.

    "},{"location":"ADRs/0002-code-platform/#assumptions","title":"Assumptions","text":"

    The following assumptions are made:

    "},{"location":"ADRs/0002-code-platform/#decision","title":"Decision","text":"

    After careful consideration and evaluation of various options like GitHub, GitLab and BitBucket, we propose adopting GitHub as our primary coding platform. The decision is based on the following factors:

    Costs: There are currently no costs associate in using GitHub for our usecases.

    Features and Functionality: GitHub offers a comprehensive set of features essential for modern software development and collaboration with external teams, including version control, code review, issue tracking, continuous integration, and deployment automation.

    Security: GitHub offers a complete set of security features essential to secure development like dependency management and security scanning.

    Community and Ecosystem: GitHub boasts a vibrant community and ecosystem, facilitating knowledge sharing, collaboration, and access to third-party tools, and services that can enhance our development workflows. Within our organization we have easy access to the team managing the GitHub organization.

    Usability and User Experience: A user-friendly interface and intuitive workflows are essential for maximizing developer productivity and minimizing onboarding time. GitHub offers a streamlined user experience and customizable workflows that align with our team's preferences and practices.

    "},{"location":"ADRs/0002-code-platform/#risks","title":"Risks","text":"

    Currently the organization of MinBZK on GitHub does not have a lot of people indicating that our team is an early adapter of the platform within the organization. This might impact our features due to cost constrains.

    "},{"location":"ADRs/0002-code-platform/#consequences","title":"Consequences","text":"

    If we choose another tool in the future we need to migrate our codebase, and potentially need to rewrite some specific GitHub features that cannot be used in another tool.

    "},{"location":"ADRs/0002-code-platform/#more-information","title":"More Information","text":"

    Alternatives considered:

    "},{"location":"ADRs/0003-ci-cd/","title":"ADR-0003 CI/CD Tooling","text":""},{"location":"ADRs/0003-ci-cd/#context","title":"Context","text":"

    Our development team wants to implement a CI/CD solution to streamline the build, testing, and deployment workflows of our software products. Currently, our codebase resides on GitHub, and we leverage Kubernetes as our chosen orchestration platform, managed by the DigiLab platform team.

    "},{"location":"ADRs/0003-ci-cd/#decision","title":"Decision","text":"

    We will use the following tools for CI/CD pipeline:

    "},{"location":"ADRs/0003-ci-cd/#consequences","title":"Consequences","text":"

    GitHub Actions aligns with our existing infrastructure, ensuring seamless integration with our codebase and minimizing operational overhead. GitHub Actions' specific syntax for CI results in vendor lock-in, necessitating significant effort to migrate to an alternative CI system in the future.

    Flux, being a GitOps operator for Kubernetes, offers a declarative approach to managing deployments, enhancing reliability and repeatabilty within our Kubernetes ecosystem.

    "},{"location":"ADRs/0004-software-hosting-platform/","title":"ADR-0004 Software hosting platform","text":""},{"location":"ADRs/0004-software-hosting-platform/#context","title":"Context","text":"

    Our team recognizes the necessity of a platform to run our software, as our local machines lack the capacity to handle certain workloads effectively. We have evaluated several options available to us:

    1. Digilab Kubernetes
    2. Logius Kubernetes
    3. SSC-ICT VMs
    4. ODC Noord
    "},{"location":"ADRs/0004-software-hosting-platform/#assumptions","title":"Assumptions","text":"

    We operate under the following assumptions:

    "},{"location":"ADRs/0004-software-hosting-platform/#decision","title":"Decision","text":"

    We will use Digilab Kubernetes for our workloads.

    "},{"location":"ADRs/0004-software-hosting-platform/#consequences","title":"Consequences","text":"

    By choosing Digilab Kubernetes, we gain access to a namespace within their managed Kubernetes cluster. However, it's important to note that Digilab does not provide any guarantees regarding the availability of the cluster. Should our software require higher availability assurances, we may need to explore alternative solutions.

    "},{"location":"ADRs/0005-python-tooling/","title":"ADR-0005 Python coding standard and tools","text":""},{"location":"ADRs/0005-python-tooling/#context","title":"Context","text":"

    In modern software development, maintaining code quality is crucial for readability, maintainability, and collaboration. Python, being a dynamically typed language, requires robust tooling to ensure code consistency and type safety. Manual enforcement of coding standards is time-consuming and error-prone. Hence, adopting automated tooling to streamline this process is imperative.

    "},{"location":"ADRs/0005-python-tooling/#decision","title":"Decision","text":"

    We will use these standards and tools for our own projects:

    Working with external projects these coding standards will not always be possible. but we will try to integrate them as much as possible.

    "},{"location":"ADRs/0005-python-tooling/#consequences","title":"Consequences","text":"

    Improved Code Quality: Adoption of these tools will lead to improved code quality, consistency, and maintainability across the project.

    Enhanced Developer Productivity: Automated code formatting and static type checking will reduce manual effort and free developers to focus more on coding logic rather than formatting and type-related issues.

    Reduced Bug Incidence: Static typing and linting will catch potential bugs and issues early in the development process, reducing the likelihood of runtime errors and debugging efforts.

    Standardized Development Workflow: By integrating pre-commit hooks, the development workflow will be standardized, ensuring that all developers follow the same code quality standards.

    "},{"location":"ADRs/0006-agile-tooling/","title":"ADR-0006 Agile tooling","text":""},{"location":"ADRs/0006-agile-tooling/#context","title":"Context","text":"

    Our development team wants to enhance transparency and productivity in our software development processes. We are using GitHub for version control and collaboration. However, to further streamline our process, there is a need to incorporate tooling for managing the effort of our team.

    "},{"location":"ADRs/0006-agile-tooling/#decision","title":"Decision","text":"

    We will use GitHub Projects as our agile process tool

    "},{"location":"ADRs/0006-agile-tooling/#consequences","title":"Consequences","text":"

    GitHub Projects seamlessly integrates with our existing GitHub repositories, allowing us to manage our Agile processes. within the same ecosystem where our code resides. This integration eliminates the need for additional third-party tools, simplifying our workflow.

    "},{"location":"ADRs/0007-commit-convention/","title":"ADR-0007 Commit convention","text":""},{"location":"ADRs/0007-commit-convention/#context","title":"Context","text":"

    In software development, maintaining clear and consistent commit message conventions is crucial for effective collaboration, code review, and project management. Commit messages serve as a form of documentation, helping developers understand the changes introduced by each commit without having to analyze the code diff extensively.

    "},{"location":"ADRs/0007-commit-convention/#decision","title":"Decision","text":"

    A commit message must follow the following rules:

    1. The subject line (first line) MUST not be no longer than 50 characters
    2. The subject line MUST be in the imperative mood
    3. A sentences MUST have Capitalized first word
    4. The subject line MUST not end with a punctuation
    5. The body line length SHOULD be restricted to 72 characters
    6. The body MUST be separate by a blank line from the subject line if used
    7. The body SHOULD be used to explain what and why, not how.
    8. The body COULD end with a ticket number
    9. The Subject line COULD include a ticket number in the following format

    \\<ref>-\\<ticketnumber>: subject line

    An example of a commit message:

    Fix foo to enable bar

    or

    AB-1234: Fix foo to enable bar

    or

    Fix foo to enable bar

    This fixes the broken behavior of component abc caused by problem xyz.

    If we contribute to projects not started by us we try to follow the above standard unless a specific convention is obvious or required by the project.

    "},{"location":"ADRs/0007-commit-convention/#consequences","title":"Consequences","text":"

    In some repositories Conventional Commits are used. This ADR does not follow conventional commits.

    "},{"location":"ADRs/0008-architectural-diagram-tooling/","title":"ADR-0008 Architectural Diagram Tooling","text":""},{"location":"ADRs/0008-architectural-diagram-tooling/#context","title":"Context","text":"

    To communicate our designs in a graphical manner, it is of importance to draw architectural diagrams. For this we use tooling, that supports us in our work. We need to have something that is written so that it can be processed by both people and machine, and we want to have version control on our diagrams.

    "},{"location":"ADRs/0008-architectural-diagram-tooling/#decision","title":"Decision","text":"

    We will write our architectural diagrams in Markdown-like (.mmmd) in the Mermaid Syntax to edit these diagrams one can use the various plugins. For each project where it is needed, we will add the diagrams in the repository of the subject. The level of detail we will provide in the diagrams is according to the C4-model metamodel on architecture diagramming.

    "},{"location":"ADRs/0008-architectural-diagram-tooling/#consequences","title":"Consequences","text":"

    Standardized Workflow: By maintaining architecture as code, it will be standardized in our workflow.

    Version control on diagrams: By using version control, we will be able to collaborate easier on the diagrams, and we will be able to see the history of them.

    Diagrams are in .md format: By storing our diagrams next to our code, it will be where you need it the most.

    "},{"location":"ADRs/0010-container-registry/","title":"ADR-0010 Container Registry","text":""},{"location":"ADRs/0010-container-registry/#context","title":"Context","text":"

    Containers allow us to package and run applications in a standardized and portable way. To be able to (re)use and share images, they need to be stored in a registry that is accessible by others.

    There are many container registries. During research the following registries have been noted:

    Docker Hub, GitHub Container Registry, Amazon Elastic Container Registry (ECR), Azure Container Registry (ACR), Google Artifact Registry (GAR), Red Hat Quay, GitLab Container Registry, Harbor, Sonatype Nexus Repository Manager, JFrog Artifactory.

    "},{"location":"ADRs/0010-container-registry/#assumptions","title":"Assumptions","text":""},{"location":"ADRs/0010-container-registry/#decision","title":"Decision","text":"

    We will use GitHub Container Registry.

    This aligns best with the previously made choices for GitHub as a code repository and CI/CD workflow.

    "},{"location":"ADRs/0010-container-registry/#risks","title":"Risks","text":"

    Traditionally, Docker Hub has been the place to publish images. Therefore, our images may be more difficult to discover.

    The following assumptions are not (directly) covered by the chosen registry:

    "},{"location":"ADRs/0010-container-registry/#consequences","title":"Consequences","text":"

    By using GitHub Container Registry we have a container registry we can use both internally as well as share with others. This has low impact, we can always move to another registry since the Open Container Initiative is standardized.

    "},{"location":"ADRs/0010-container-registry/#more-information","title":"More Information","text":"

    The following sites have been consulted:

    "},{"location":"ADRs/0011-researcher-in-residence/","title":"ADR-0011 Researcher in Residence","text":""},{"location":"ADRs/0011-researcher-in-residence/#context","title":"Context","text":"

    The AI validation team works transparently. Working with public funds warrants transparency toward the public. Additionally, being transparent aligns with the team's mission of increasing the transparency of public organizations. In line with this reasoning, it is important to be open to researchers interested in the work of the AI validation team. Allowing researchers to conduct research within the team contributes to transparency and enables external perspectives and feedback to be incorporated into the team's work.

    "},{"location":"ADRs/0011-researcher-in-residence/#assumptions","title":"Assumptions","text":""},{"location":"ADRs/0011-researcher-in-residence/#decision","title":"Decision","text":"

    We have decided to include a researcher in residence as a member of our team.

    The researcher in residence takes the following form:

    The following conditions apply to the researcher in residence.

    "},{"location":"ADRs/0011-researcher-in-residence/#risks","title":"Risks","text":"

    Risks around a potential chilling effect (team members not feeling free to express themselves) are mitigated by the conditions we impose. In light of aforementioned form and conditions above, we see no further significant risks.

    "},{"location":"ADRs/0011-researcher-in-residence/#consequences","title":"Consequences","text":"

    Including a researcher in residence makes it easier for them to conduct research within both the team and the wider organization where the AI validation team operates. This benefits the quality of the research findings and the feedback provided to the team and organization.

    "},{"location":"About/contact/","title":"Contact","text":"

    Contact us at ai-validatie@minbzk.nl.

    "},{"location":"About/team/","title":"Our Team","text":""},{"location":"About/team/#robbert-bos","title":"Robbert Bos","text":"

    Product Owner

    Robbert has been on a mission for over 15 years to enhance the transparency and collaboration within AI projects. Before joining this team, he founded several data science and tech companies (partly) dedicated to this cause. Robbert is passionate about solving complex problems where he connects business needs with technology and involves others in how these solutions can improve their work.

    robbertbos

    Robbert Bos

    "},{"location":"About/team/#lucas-haitsma","title":"Lucas Haitsma","text":"

    Researcher in Residence

    Lucas is PhD candidate conducting research into the regulation and governance of algorithmic discrimination by supervision and enforcement organizations. Lucas is our Researcher in Residence.

    Lucas Haitsma

    rug.nl

    "},{"location":"About/team/#berry-den-hartog","title":"Berry den Hartog","text":"

    Engineer

    Berry is a software engineer passionate about problem-solving and system optimization, with expertise in Go, Python, and C++. Specialized in architecting high-volume data processing systems and implementing Lean-Agile and DevOps practices. Experienced in managing end-to-end processes from hardware provisioning to software deployment and release.

    berrydenhartog

    Berry den Hartog

    "},{"location":"About/team/#anne-schuth","title":"Anne Schuth","text":"

    Engineering Manager

    Anne used to be a Machine Learning Engineering Manager at Spotify and previously held roles at DPG Media, Blendle, and Google AI. He holds a PhD from the University of Amsterdam.

    anneschuth

    Anne Schuth

    anneschuth.nl

    "},{"location":"About/team/#christopher-spelt","title":"Christopher Spelt","text":"

    Engineer

    After graduating in pure mathematics, Christopher transitioned into machine learning. He is passionate about solving complex problems, especially those that have a societal impact. My expertise lies in math, machine learning theory and I'm skilled in Python.

    ChristopherSpelt

    Christopher Spelt

    "},{"location":"About/team/#willy-tadema","title":"Willy Tadema","text":"

    AI Ethics Lead

    Willy specializes in AI governance, AI risk management, AI assurance and ethics-by-design. She is an advocate of AI standards and a member of several ethics committees.

    FrieseWoudloper

    Willy Tadema

    "},{"location":"About/team/#robbert-uittenbroek","title":"Robbert Uittenbroek","text":"

    Engineer

    Robbert is a highly enthusiastic full-stack engineer with a Bachelor's degree in Computer Science from the Hanze University of Applied Sciences in Groningen. He is passionate about building secure, compliant, and ethical solutions, and thrives in collaborative environments. Robbert is eager to leverage his skills and knowledge to help shape and propel the future of IT within the government.

    uittenbroekrobbert

    Robbert Uittenbroek

    "},{"location":"About/team/#laurens-weijs","title":"Laurens Weijs","text":"

    Engineer

    Laurens is a passionate guy with a love for innovation and doing things differently. With a background in Econometrics and Computer Science he loves to tackle the IT challenges of the Government by helping other people through extensive knowledge sharing on stage, building neural networks himself, or building a strong community.

    laurensWe

    Laurens Weijs

    "},{"location":"Projects/TAD/","title":"Transparency of algorithmic decision making","text":"

    This document contains a checklist with requirements for tools we could use to help with the transparency of algorithmic decision making.

    The requirements are based on:

    "},{"location":"Projects/TAD/#overview-of-requirements","title":"Overview of requirements","text":"

    The requirements have been given a priority based on the MoSCoW scale to allow for tool comparison.

    "},{"location":"Projects/TAD/#functionality","title":"Functionality","text":"Requirement Priority The tool allows users to conduct technical tests on algorithms or models, including assessments of performance, bias, and fairness. To facilitate these tests, users can input relevant datasets, M The tool allows users to choose which tests to perform. M The tool allows users to fill out questionnaires to conduct impact assessments for AI. For example IAMA or ALTAI. M The tool can generate a human readable report. M The tools works with a standardized report format, that it can read, write, and update. M The tool supports plugin functionality so additional tests can be added easily. S The tool allows to create custom reports based on components. S It is possible to add custom components for reports. S The tool provides detailed logging, including tracking of different model versions, changes in impact assessments, and technical test results for individual runs. S The tool supports saving progress. S The tool can be used on an isolated system without an internet connection. S The tool offers options to discuss and document conversations. For example, to converse about technical tests or to collaborate on impact assessments. C The tool operates with complete data privacy; it does not share any data or logging information. C The tool allows extension of report formats functionality. C The tool can be integrated in a CI/CD flow. C The tool can be offered as a (cloud) service where no local installation is required. C It is possible to define and automate workflows for repetitive tasks. C The tool offers pre-built connectors or low-code/no-code integration options to simplify the integration process. C"},{"location":"Projects/TAD/#reliability","title":"Reliability","text":"Requirement Priority The tool operates consistently and reliably, meaning it delivers the same expected results every time you use it. M The tool recovers automatically from common failures. S The tool recovers from failures quickly, minimizing data loss, for example by automatically saving intermediate test progress results. S The tool handles errors gracefully and informs users of any issues. S The tool provides clear error messages and instructions for troubleshooting. S"},{"location":"Projects/TAD/#usability","title":"Usability","text":"Requirement Priority The tool possess a clean, intuitive, and visually appealing UI that follows industry standards. S The tool provides clear and consistent navigation, making it easy for users to find what they need. S The tool is responsive and provides instant feedback. S The user interface is multilingual and supports at least English. S The tool offers keyboard shortcuts for efficient interaction. C The user interface can easily be translated into other languages. C"},{"location":"Projects/TAD/#help-documentation","title":"Help & Documentation","text":"Requirement Priority The tool provides comprehensive online help documentation with searchable functionalities. S The tool offers context-sensitive help within the application. C The online documentation includes video tutorials and training materials for ease of learning. C The project provides readily available customer support through various channels (e.g., email, phone, online chat) to address user inquiries and troubleshoot issues. C"},{"location":"Projects/TAD/#performance-efficiency","title":"Performance Efficiency","text":"Requirement Priority The tool operates efficiently and minimize resource utilization. M The tool responds to user actions instantly. M The tool is scalable to accommodate increased user base and data volume. S"},{"location":"Projects/TAD/#maintainability","title":"Maintainability","text":"Requirement Priority The tool is easy to modify and maintain. M The tool adheres to industry coding standards and best practices to ensure code quality and maintainability. M The code is written in a common, widely adopted and supported and actively used and maintained programming language. M The project provides version control for code changes and rollback capabilities. M The project is open source. M It is possible to contribute to the source. S The system is modular, allowing for easy modification of individual components. S Diagnostic tools are available to identify and troubleshoot issues. S"},{"location":"Projects/TAD/#security","title":"Security","text":"Requirement Priority The tool must protect data and system from unauthorized access, use, disclosure, disruption, modification, or destruction. M Regular security audits and penetration testing are conducted. S The tool enforce authorization controls based on user roles and permissions, restricting access to sensitive data and functionalities. C Data encryption is used for sensitive information at rest and in transit. C The project allows for regular security audits and penetration testing to identify vulnerabilities and ensure system integrity. C The tool implements backup functionality to ensure data availability in case of incidents. C"},{"location":"Projects/TAD/#compatibility","title":"Compatibility","text":"Requirement Priority The tool is compatible with existing systems and infrastructure. M The tool supports industry-standard data formats and protocols. M The tool operates seamlessly on supported operating systems and hardware platforms. S The tool supports commonly used data formats (e.g., CSV, Excel, JSON) for easy data exchange with other systems and tools. S The tool integrates with existing security solutions. C"},{"location":"Projects/TAD/#accessibility","title":"Accessibility","text":"Requirement Priority The tool is accessible to users with disabilities, following relevant accessibility standards (e.g., WCAG). S"},{"location":"Projects/TAD/#portability","title":"Portability","text":"Requirement Priority The tool support a range of operating systems (e.g., Windows, macOS, Linux) commonly used within an organization. S The tool minimizes dependencies on specific hardware or software configurations, promoting flexibility. S The tool offers a cloud-based deployment option or be compatible with cloud environments for scalability and accessibility. S The tool adheres to relevant cloud security standards and best practices. S"},{"location":"Projects/TAD/#deployment","title":"Deployment","text":"Requirement Priority The tool has an easy and user-friendly installation and configuration process. S The tool has on-premise or cloud-based deployment options to cater to different organizational needs and infrastructure. S"},{"location":"Projects/TAD/#legal-compliance","title":"Legal & Compliance","text":"Requirement Priority It is clear how the tool is funded to avoid improper influence due to conflicts of interest M The tool is compliant with relevant legal and regulatory requirements. S The tool adheres to (local) data privacy regulations like GDPR, ensuring the protection of user data. S The tool implements appropriate security measures to comply with industry regulations and standards. S The tool is licensed for use within the organization according to the terms and conditions of the license agreement. S The tool respects intellectual property rights and avoid copyright infringement issues. S"},{"location":"Projects/TAD/reporting_standards/","title":"TAD Reporting standards","text":"

    This document assesses standards that standardize the way algorithm assessments can be captured.

    "},{"location":"Projects/TAD/reporting_standards/#background","title":"Background","text":"

    There are many algorithm assessments (e.g. IAMA, HUIDERIA, etc.), technical tests on performance (e.g. Accuracy, TP, FP, F1, etc), fairness and bias of algorithms (e.g. SHAP) and reporting formats available. The goal is to have a way of standardizing the way these different assessments and tests can be captured.

    "},{"location":"Projects/TAD/reporting_standards/#available-standards","title":"Available standards","text":""},{"location":"Projects/TAD/reporting_standards/#model-cards","title":"Model Cards","text":"

    The most interesting existing capturing methods seem to be all based on Model Cards for Model Reporting, which are:

    \"Short documents accompanying trained machine learning models that provide benchmarked evaluation in a variety of conditions, such as across different cultural, demographic, or phenotypic groups (e.g., race, geographic location, sex, Fitzpatrick skin type) and intersectional groups (e.g., age and race, or sex and Fitzpatrick skin type) that are relevant to the intended application domains. Model cards also disclose the context in which models are intended to be used, details of the performance evaluation procedures, and other relevant information\", proposed by Google. Note that \"The proposed set of sections\" in the Model Cards paper \"are intended to provide relevant details to consider, but are not intended to be complete or exhaustive, and may be tailored depending on the model, context, and stakeholders.\"

    Many companies implement their own version of Model Cards, for example Meta System Cards and the tools mentioned in the next section.

    "},{"location":"Projects/TAD/reporting_standards/#automatic-model-card-generation","title":"Automatic model card generation","text":"

    There exist tools to (semi)-automatically generate models cards:

    1. Model Card Generator by US Sensus Bureau. Basic UI to create model cards and export to markdown, also hase a command line tool.
    2. Model Card Toolkit by Google. Automation only comes from integration with TensorFlowExtended and ML Metadata.
    3. VerifyML. Based on the Google toolkit, but is extended to include specific tests on fairness and bias. Technical tests can be added by users and model card schema (in protobuf) can be extended by users.
    4. Experimental Model Cards Tool by Hugging Face. This is the implementation of the Google paper by Hugging Face and provides information on the models available on their platform. The writing tools guides users through their model card and allows for up- and downloading from and to markdown.
    "},{"location":"Projects/TAD/reporting_standards/#other-standards","title":"Other standards","text":"

    A landscape analysis of ML documentation tools has been performed by Hugging Face and provides a good overview of the current landscape.

    Another interesting standard is the Algorithmic Transparency Recording Standard of the United Kingdom Goverment, which can be found here.

    "},{"location":"Projects/TAD/reporting_standards/#proposal","title":"Proposal","text":"

    We need a standard that captures algorithmic assessments and technical tests on model and datasets. The idea of model cards can serve as a guiding theoretical principle on how to implement such a standard. More specifically, we can draw inspiration from the existing model card schema's and implementations of VerifyML and Hugging Face. We note the following:

    1. None of these two standards capture algorithmic assessments.
    2. Only VerifyML has a specific format to capture some technical tests.

    Hence in any case we need to extend one of these standards. We propose to:

    1. Assess and compare these two standards
    2. Chose the most appropriate one to extend
    3. Extend (and possibly adjust) this standard to our own standard (in the form of a basic schema) that allows for capturing algorithmic assessments and standardizes the way technical tests can be captured.
    "},{"location":"Projects/TAD/tools/","title":"Research of tools for transparency of algorithmic decision making","text":"

    In our ongoing research on AI validation and transparency, we are seeking tools to support assessments. Ideal tools would combine various technical tests with checklists and questionnaires and have the ability to generate reports in both human-friendly and machine-exchangeable formats.

    This document contains a list of tools we have found and may want to investigate further.

    "},{"location":"Projects/TAD/tools/#ai-verify","title":"AI Verify","text":"

    AI Verify is an AI governance testing framework and software toolkit that validates the performance of AI systems against a set of internationally recognised principles through standardised tests, and is consistent with international AI governance frameworks such as those from European Union, OECD and Singapore.

    Links: AI Verify Homepage, AI Verify documentation, AI Verify Github.

    "},{"location":"Projects/TAD/tools/#to-investigate-further","title":"To investigate further","text":""},{"location":"Projects/TAD/tools/#verifyml","title":"VerifyML","text":"

    What is it? VerifyML is an opinionated, open-source toolkit and workflow to help companies implement human-centric AI practices. It seems pretty much equivalent to AI Verify.

    Why interesting? The functionality of this toolkit seems to match closely with those of AI Verify. It has a \"git and code first approach\" and has automatic generation of model cards.

    Remarks The code seems to be last updated 2 years ago.

    Links: VerifyML, VerifyML GitHub

    "},{"location":"Projects/TAD/tools/#ibm-research-360-toolkit","title":"IBM Research 360 Toolkit","text":"

    What is it? Open source Python libraries that supports interpretability and explainability of datasets and machine learning models. Most relevant tookits are the AI Fairness 360 and AI Explainability 360.

    Why interesting? Seems to encompass extensive fairness and explainability tests. Codebase seems to be active.

    Remarks It comes as Python and R libraries.

    Links: AI Fairness 360 Github, AI Explainability 360 Github.

    "},{"location":"Projects/TAD/tools/#hollisticai","title":"Hollisticai","text":"

    What is it? Open source tool to assess and improve the trustworthiness of AI systems. Offers tools to measure and mitigate bias across numerous tasks. Will be extended to include tools for efficacy, robustness, privacy and explainability.

    Why interesting? Although it is not entirely clear what exactly this tool does (see Remarks) it does seem (according to their website) to provide reports on bias and fairness. The Github rep does not seem to include any report generating code, but mainly technical tests. Here is an example in which bias is measured in a classification model.

    Remarks Website seems to suggest the possibility to generate reports, but this is not directly reflected in the codebase. Possibly reports are only available with some sort of licenced product?

    Links: Hollisticai homepage, Hollisticai Github.

    "},{"location":"Projects/TAD/tools/#interesting-to-mention","title":"Interesting to mention","text":""},{"location":"Projects/TAD/tools/#the-fate-system","title":"The FATE system","text":"

    Paper by TNO about the FATE system. Acronym stands for \"FAir, Transparent and Explainable Decision Making.\"

    Tools mentioned include some of the above: Aequitas, AI Fairness 360, Dalex, Fairlean, Responsibly, and What-If-Tool

    Links: Paper, Article, Microsoft links.

    "},{"location":"Way-of-Working/Code-Reviews/","title":"Code reviews","text":"

    The purpose of a code review is to ensure the quality, readability, and that all requirements from the ticket have been met for a change before it gets merged into the main codebase. Additionally, code reviews are a communication tool, they allow team members to stay aware of changes being made.

    Code reviews involve having a team member examine the changes made by another team member and give feedback or ask questions if needed.

    "},{"location":"Way-of-Working/Code-Reviews/#creating-a-pull-request","title":"Creating a Pull Request","text":"

    We use GitHub pull requests (PR) for code reviews. You can make a draft PR if your work is still in progress. When you are done you can remove the draft status. A team member may start reviewing when the PR does not have a draft status.

    For team ADRs at least 3 accepting reviews are required, or all team members should accept if it can be expected that the ADR is controversial.

    A team ADR is an ADR made in the ai-validation repository.

    All other PRs only need at least 1 reviewer to get accepted, but can have more reviewers if desired (by either reviewer or author).

    "},{"location":"Way-of-Working/Code-Reviews/#review-process","title":"Review process","text":"

    By default the codeowner, indicated in the CODEOWNER file, will be requested to review. For us this is the GitHub team AI-validation. If the PR creator wants a specific team member to review, the PR creator should add the team member specifically in the reviewers section of the PR. A message in Mattermost will be posted for PRs. Then with the reaction of an emoji a reviewer will indicate they are looking at the PR.

    If the reviewer has suggestions or comments the PR creator can fix those or add comments to the suggestions. When the creator of the PR thinks he is done with the feedback he must re-request a review from the person that did the review. The reviewer must then look at the changes and approve or add more comments. This process continues untill the reviewer agrees that all is correct and approves the PR.

    Once the review is approved the reviewer checks if the branch is in sync with the main branch before merging. If not, the reviewer rebases the branch. Once the branch is in sync with main the reviewer merges the PR and checks if the deployment is successful. If the deployment is not successful the reviewer fixes it. If the PR needs more than one review, the last accepting reviewer merges the PR.

    "},{"location":"Way-of-Working/Contributing/","title":"Contributing to AI Validation","text":"

    First off, thanks for taking the time to contribute! \u2764\ufe0f

    All types of contributions are encouraged and valued. See the Table of Contents for different ways to help and details about how this project handles them. Please make sure to read the relevant section before making your contribution. It will make it a lot easier for us maintainers and smooth out the experience for all involved. The community looks forward to your contributions. \ud83c\udf89

    "},{"location":"Way-of-Working/Contributing/#table-of-contents","title":"Table of Contents","text":""},{"location":"Way-of-Working/Contributing/#code-of-conduct","title":"Code of Conduct","text":"

    This project and everyone participating in it is governed by the Code of Conduct. By participating, you are expected to uphold this code. Please report unacceptable behavior to ai-validatie@minbzk.nl.

    "},{"location":"Way-of-Working/Contributing/#i-have-a-question","title":"I Have a Question","text":"

    Before you ask a question, it is best to search for existing Issues that might help you. In case you have found a suitable issue and still need clarification, you can write your question in this issue.

    If you then still feel the need to ask a question and need clarification, we recommend the following:

    We will then take care of the issue as soon as possible.

    "},{"location":"Way-of-Working/Contributing/#i-want-to-contribute","title":"I Want To Contribute","text":""},{"location":"Way-of-Working/Contributing/#legal-notice","title":"Legal Notice","text":"

    When contributing to this project, you must agree that you have authored 100% of the content, that you have the necessary rights to the content and that the content you contribute may be provided under the project license.

    "},{"location":"Way-of-Working/Contributing/#reporting-bugs","title":"Reporting Bugs","text":""},{"location":"Way-of-Working/Contributing/#before-submitting-a-bug-report","title":"Before Submitting a Bug Report","text":"

    A good bug report shouldn't leave others needing to chase you up for more information. Therefore, we ask you to investigate carefully, collect information and describe the issue in detail in your report. Please complete the following steps in advance to help us fix any potential bug as fast as possible.

    "},{"location":"Way-of-Working/Contributing/#how-do-i-submit-a-good-bug-report","title":"How Do I Submit a Good Bug Report?","text":"

    You must never report security related issues, vulnerabilities or bugs including sensitive information to the issue tracker, or elsewhere in public. Instead sensitive bugs must be sent by email to ai-validatie@minbzk.nl.

    We use GitHub issues to track bugs and errors. If you run into an issue with the project:

    Once it's filed:

    "},{"location":"Way-of-Working/Contributing/#suggesting-enhancements","title":"Suggesting Enhancements","text":"

    This section guides you through submitting an enhancement suggestion for this project, including completely new features and minor improvements. Following these guidelines will help maintainers and the community to understand your suggestion and find related suggestions.

    "},{"location":"Way-of-Working/Contributing/#before-submitting-an-enhancement","title":"Before Submitting an Enhancement","text":""},{"location":"Way-of-Working/Contributing/#how-do-i-submit-a-good-enhancement-suggestion","title":"How Do I Submit a Good Enhancement Suggestion?","text":"

    Enhancement suggestions are tracked as GitHub issues.

    "},{"location":"Way-of-Working/Contributing/#styleguides","title":"Styleguides","text":""},{"location":"Way-of-Working/Contributing/#commit-messages","title":"Commit Messages","text":"

    We have commit message conventions: Commit convention

    "},{"location":"Way-of-Working/Contributing/#markdown-lint","title":"Markdown Lint","text":"

    We use markdownlint to standardize markdown. MarkDown lint.

    "},{"location":"Way-of-Working/Contributing/#pre-commit","title":"Pre-commit","text":"

    We use Pre-commit to enabled standardization. Pre-commit.

    "},{"location":"Way-of-Working/Principles/","title":"Our Principles","text":"
    1. Our strong trust in the government and the dedication of people at all levels within the government organization is the basis of our actions.
    2. The interests of the citizen and society take precedence in all our activities.
    3. Learning and knowledge sharing are central: we encourage team members to take on tasks that are new or less familiar to them.
    4. Existing knowledge, policies, and proven methods are actively reused and shared.
    5. We strive for maximum openness and transparency in all our processes.
    6. We prefer the use and creation of Open Source Software.
    7. Our team members can choose to work anonymously.
    8. We treat each other with respect.
    9. Collaboration is essential to our success; we actively seek collaboration with both public and private partners.
    "},{"location":"Way-of-Working/UbiquitousLanguage/","title":"Ubiquitous Language","text":"

    For clarity and consistency, this document defines some terms used within our team where the meaning in Data Science or Computer Science differs, and terms that are for any reason good to mention.

    For a full reference for Machine Learning, we recommend ML Fundamentals from Google.

    "},{"location":"Way-of-Working/Onboarding/","title":"Onboarding","text":""},{"location":"Way-of-Working/Onboarding/Accounts/","title":"Accounts","text":""},{"location":"Way-of-Working/Onboarding/Accounts/#mattermost-chat","title":"Mattermost Chat","text":"

    Make sure you have installed Mattermost, then follow these steps.

    "},{"location":"Way-of-Working/Onboarding/Accounts/#webex","title":"Webex","text":"

    Make sure you have installed Webex, then follow these steps.

    "},{"location":"Way-of-Working/Onboarding/Accounts/#github","title":"Github","text":"

    Create or use your existing Github account.

    "},{"location":"Way-of-Working/Onboarding/Accounts/#collaboration-space","title":"Collaboration Space","text":""},{"location":"Way-of-Working/Onboarding/Accounts/#open-up-your-calendar","title":"Open up your calendar","text":""},{"location":"Way-of-Working/Onboarding/Accounts/#shared-email","title":"Shared email","text":""},{"location":"Way-of-Working/Onboarding/Accounts/#bookmark","title":"Bookmark","text":"

    Bookmark these links in your browser:

    "},{"location":"Way-of-Working/Onboarding/Dev-machine/","title":"Setting up your Dev Machine","text":"

    We are assuming your dev machine is a Mac. This guide is rather opinionated, feel free to have your own opinion, and feel free to contribute! Contributing can be done by clicking \"edit\" top right and by making a pull request on this repository.

    "},{"location":"Way-of-Working/Onboarding/Dev-machine/#things-that-should-have-been-default-on-mac","title":"Things that should have been default on Mac","text":""},{"location":"Way-of-Working/Onboarding/Dev-machine/#citrix-workspace","title":"Citrix workspace","text":""},{"location":"Way-of-Working/Onboarding/Dev-machine/#communication","title":"Communication","text":""},{"location":"Way-of-Working/Onboarding/Dev-machine/#terminal-and-shell","title":"Terminal and shell","text":""},{"location":"Way-of-Working/Onboarding/Dev-machine/#coding","title":"Coding","text":""}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Home","text":"

    Here we are documenting the processes and work of the AI Validation Team at the Ministry of the Interior and Kingdom Relations in The Netherlands.

    We are a team of mostly engineers at a policy department.

    "},{"location":"#contribute","title":"Contribute","text":"

    Read our guide on how to contribute.

    "},{"location":"#contact","title":"Contact","text":"

    Our contact details are here.

    "},{"location":"ADRs/0001-adrs/","title":"ADR-0001 ADRs","text":""},{"location":"ADRs/0001-adrs/#context","title":"Context","text":"

    In modern software development practices, the use of Architecture Decision Records (ADRs) has become increasingly common. ADRs are documents that capture important architectural decisions made during the development process. These decisions play a crucial role in guiding the development team and ensuring consistency and coherence in the architecture of the software system.

    "},{"location":"ADRs/0001-adrs/#assumptions","title":"Assumptions","text":"
    1. ADRs provide a structured way to document and communicate architectural decisions.
    2. Publishing ADRs publicly fosters transparency and facilitates collaboration among team members and stakeholders.
    3. ADRs help in onboarding new team members by providing insights into past decisions and their rationale.
    "},{"location":"ADRs/0001-adrs/#decision","title":"Decision","text":"

    We will utilize ADRs in our team to document and communicate architectural decisions effectively. Furthermore, we will publish these ADRs publicly to promote transparency and facilitate collaboration.

    "},{"location":"ADRs/0001-adrs/#template","title":"Template","text":"

    Use the template below to add an ADR:

    # ADR-XXXX Title\n\n## Context\n\nWhat is the issue that we're seeing that is motivating this decision or change?\n\n## Assumptions\n\nAnything that could cause problems if untrue now or later. (optional)\n\n## Decision\n\nWhat is the change that we're proposing and/or doing?\n\n## Risks\n\nAnything that could cause malfunction, delay, or other negative impacts. (optional)\n\n## Consequences\n\nWhat becomes easier or more difficult to do because of this change?\n\n## More Information\n\nProvide additional evidence/confidence for the decision outcome\nLinks to other decisions and resources might here appear as well. (optional)\n
    "},{"location":"ADRs/0002-code-platform/","title":"ADR-0002 Code Platform","text":""},{"location":"ADRs/0002-code-platform/#context","title":"Context","text":"

    In the landscape of software development, the choice of coding platform significantly impacts developer productivity, collaboration, and code quality. it's crucial to evaluate and select a coding platform that aligns with our development needs and fosters efficient workflows.

    "},{"location":"ADRs/0002-code-platform/#assumptions","title":"Assumptions","text":"

    The following assumptions are made:

    "},{"location":"ADRs/0002-code-platform/#decision","title":"Decision","text":"

    After careful consideration and evaluation of various options like GitHub, GitLab and BitBucket, we propose adopting GitHub as our primary coding platform. The decision is based on the following factors:

    Costs: There are currently no costs associate in using GitHub for our usecases.

    Features and Functionality: GitHub offers a comprehensive set of features essential for modern software development and collaboration with external teams, including version control, code review, issue tracking, continuous integration, and deployment automation.

    Security: GitHub offers a complete set of security features essential to secure development like dependency management and security scanning.

    Community and Ecosystem: GitHub boasts a vibrant community and ecosystem, facilitating knowledge sharing, collaboration, and access to third-party tools, and services that can enhance our development workflows. Within our organization we have easy access to the team managing the GitHub organization.

    Usability and User Experience: A user-friendly interface and intuitive workflows are essential for maximizing developer productivity and minimizing onboarding time. GitHub offers a streamlined user experience and customizable workflows that align with our team's preferences and practices.

    "},{"location":"ADRs/0002-code-platform/#risks","title":"Risks","text":"

    Currently the organization of MinBZK on GitHub does not have a lot of people indicating that our team is an early adapter of the platform within the organization. This might impact our features due to cost constrains.

    "},{"location":"ADRs/0002-code-platform/#consequences","title":"Consequences","text":"

    If we choose another tool in the future we need to migrate our codebase, and potentially need to rewrite some specific GitHub features that cannot be used in another tool.

    "},{"location":"ADRs/0002-code-platform/#more-information","title":"More Information","text":"

    Alternatives considered:

    "},{"location":"ADRs/0003-ci-cd/","title":"ADR-0003 CI/CD Tooling","text":""},{"location":"ADRs/0003-ci-cd/#context","title":"Context","text":"

    Our development team wants to implement a CI/CD solution to streamline the build, testing, and deployment workflows of our software products. Currently, our codebase resides on GitHub, and we leverage Kubernetes as our chosen orchestration platform, managed by the DigiLab platform team.

    "},{"location":"ADRs/0003-ci-cd/#decision","title":"Decision","text":"

    We will use the following tools for CI/CD pipeline:

    "},{"location":"ADRs/0003-ci-cd/#consequences","title":"Consequences","text":"

    GitHub Actions aligns with our existing infrastructure, ensuring seamless integration with our codebase and minimizing operational overhead. GitHub Actions' specific syntax for CI results in vendor lock-in, necessitating significant effort to migrate to an alternative CI system in the future.

    Flux, being a GitOps operator for Kubernetes, offers a declarative approach to managing deployments, enhancing reliability and repeatabilty within our Kubernetes ecosystem.

    "},{"location":"ADRs/0004-software-hosting-platform/","title":"ADR-0004 Software hosting platform","text":""},{"location":"ADRs/0004-software-hosting-platform/#context","title":"Context","text":"

    Our team recognizes the necessity of a platform to run our software, as our local machines lack the capacity to handle certain workloads effectively. We have evaluated several options available to us:

    1. Digilab Kubernetes
    2. Logius Kubernetes
    3. SSC-ICT VMs
    4. ODC Noord
    "},{"location":"ADRs/0004-software-hosting-platform/#assumptions","title":"Assumptions","text":"

    We operate under the following assumptions:

    "},{"location":"ADRs/0004-software-hosting-platform/#decision","title":"Decision","text":"

    We will use Digilab Kubernetes for our workloads.

    "},{"location":"ADRs/0004-software-hosting-platform/#consequences","title":"Consequences","text":"

    By choosing Digilab Kubernetes, we gain access to a namespace within their managed Kubernetes cluster. However, it's important to note that Digilab does not provide any guarantees regarding the availability of the cluster. Should our software require higher availability assurances, we may need to explore alternative solutions.

    "},{"location":"ADRs/0005-python-tooling/","title":"ADR-0005 Python coding standard and tools","text":""},{"location":"ADRs/0005-python-tooling/#context","title":"Context","text":"

    In modern software development, maintaining code quality is crucial for readability, maintainability, and collaboration. Python, being a dynamically typed language, requires robust tooling to ensure code consistency and type safety. Manual enforcement of coding standards is time-consuming and error-prone. Hence, adopting automated tooling to streamline this process is imperative.

    "},{"location":"ADRs/0005-python-tooling/#decision","title":"Decision","text":"

    We will use these standards and tools for our own projects:

    Working with external projects these coding standards will not always be possible. but we will try to integrate them as much as possible.

    "},{"location":"ADRs/0005-python-tooling/#consequences","title":"Consequences","text":"

    Improved Code Quality: Adoption of these tools will lead to improved code quality, consistency, and maintainability across the project.

    Enhanced Developer Productivity: Automated code formatting and static type checking will reduce manual effort and free developers to focus more on coding logic rather than formatting and type-related issues.

    Reduced Bug Incidence: Static typing and linting will catch potential bugs and issues early in the development process, reducing the likelihood of runtime errors and debugging efforts.

    Standardized Development Workflow: By integrating pre-commit hooks, the development workflow will be standardized, ensuring that all developers follow the same code quality standards.

    "},{"location":"ADRs/0006-agile-tooling/","title":"ADR-0006 Agile tooling","text":""},{"location":"ADRs/0006-agile-tooling/#context","title":"Context","text":"

    Our development team wants to enhance transparency and productivity in our software development processes. We are using GitHub for version control and collaboration. However, to further streamline our process, there is a need to incorporate tooling for managing the effort of our team.

    "},{"location":"ADRs/0006-agile-tooling/#decision","title":"Decision","text":"

    We will use GitHub Projects as our agile process tool

    "},{"location":"ADRs/0006-agile-tooling/#consequences","title":"Consequences","text":"

    GitHub Projects seamlessly integrates with our existing GitHub repositories, allowing us to manage our Agile processes. within the same ecosystem where our code resides. This integration eliminates the need for additional third-party tools, simplifying our workflow.

    "},{"location":"ADRs/0007-commit-convention/","title":"ADR-0007 Commit convention","text":""},{"location":"ADRs/0007-commit-convention/#context","title":"Context","text":"

    In software development, maintaining clear and consistent commit message conventions is crucial for effective collaboration, code review, and project management. Commit messages serve as a form of documentation, helping developers understand the changes introduced by each commit without having to analyze the code diff extensively.

    "},{"location":"ADRs/0007-commit-convention/#decision","title":"Decision","text":"

    A commit message must follow the following rules:

    1. The subject line (first line) MUST not be no longer than 50 characters
    2. The subject line MUST be in the imperative mood
    3. A sentences MUST have Capitalized first word
    4. The subject line MUST not end with a punctuation
    5. The body line length SHOULD be restricted to 72 characters
    6. The body MUST be separate by a blank line from the subject line if used
    7. The body SHOULD be used to explain what and why, not how.
    8. The body COULD end with a ticket number
    9. The Subject line COULD include a ticket number in the following format

    \\<ref>-\\<ticketnumber>: subject line

    An example of a commit message:

    Fix foo to enable bar

    or

    AB-1234: Fix foo to enable bar

    or

    Fix foo to enable bar

    This fixes the broken behavior of component abc caused by problem xyz.

    If we contribute to projects not started by us we try to follow the above standard unless a specific convention is obvious or required by the project.

    "},{"location":"ADRs/0007-commit-convention/#consequences","title":"Consequences","text":"

    In some repositories Conventional Commits are used. This ADR does not follow conventional commits.

    "},{"location":"ADRs/0008-architectural-diagram-tooling/","title":"ADR-0008 Architectural Diagram Tooling","text":""},{"location":"ADRs/0008-architectural-diagram-tooling/#context","title":"Context","text":"

    To communicate our designs in a graphical manner, it is of importance to draw architectural diagrams. For this we use tooling, that supports us in our work. We need to have something that is written so that it can be processed by both people and machine, and we want to have version control on our diagrams.

    "},{"location":"ADRs/0008-architectural-diagram-tooling/#decision","title":"Decision","text":"

    We will write our architectural diagrams in Markdown-like (.mmmd) in the Mermaid Syntax to edit these diagrams one can use the various plugins. For each project where it is needed, we will add the diagrams in the repository of the subject. The level of detail we will provide in the diagrams is according to the C4-model metamodel on architecture diagramming.

    "},{"location":"ADRs/0008-architectural-diagram-tooling/#consequences","title":"Consequences","text":"

    Standardized Workflow: By maintaining architecture as code, it will be standardized in our workflow.

    Version control on diagrams: By using version control, we will be able to collaborate easier on the diagrams, and we will be able to see the history of them.

    Diagrams are in .md format: By storing our diagrams next to our code, it will be where you need it the most.

    "},{"location":"ADRs/0010-container-registry/","title":"ADR-0010 Container Registry","text":""},{"location":"ADRs/0010-container-registry/#context","title":"Context","text":"

    Containers allow us to package and run applications in a standardized and portable way. To be able to (re)use and share images, they need to be stored in a registry that is accessible by others.

    There are many container registries. During research the following registries have been noted:

    Docker Hub, GitHub Container Registry, Amazon Elastic Container Registry (ECR), Azure Container Registry (ACR), Google Artifact Registry (GAR), Red Hat Quay, GitLab Container Registry, Harbor, Sonatype Nexus Repository Manager, JFrog Artifactory.

    "},{"location":"ADRs/0010-container-registry/#assumptions","title":"Assumptions","text":""},{"location":"ADRs/0010-container-registry/#decision","title":"Decision","text":"

    We will use GitHub Container Registry.

    This aligns best with the previously made choices for GitHub as a code repository and CI/CD workflow.

    "},{"location":"ADRs/0010-container-registry/#risks","title":"Risks","text":"

    Traditionally, Docker Hub has been the place to publish images. Therefore, our images may be more difficult to discover.

    The following assumptions are not (directly) covered by the chosen registry:

    "},{"location":"ADRs/0010-container-registry/#consequences","title":"Consequences","text":"

    By using GitHub Container Registry we have a container registry we can use both internally as well as share with others. This has low impact, we can always move to another registry since the Open Container Initiative is standardized.

    "},{"location":"ADRs/0010-container-registry/#more-information","title":"More Information","text":"

    The following sites have been consulted:

    "},{"location":"ADRs/0011-researcher-in-residence/","title":"ADR-0011 Researcher in Residence","text":""},{"location":"ADRs/0011-researcher-in-residence/#context","title":"Context","text":"

    The AI validation team works transparently. Working with public funds warrants transparency toward the public. Additionally, being transparent aligns with the team's mission of increasing the transparency of public organizations. In line with this reasoning, it is important to be open to researchers interested in the work of the AI validation team. Allowing researchers to conduct research within the team contributes to transparency and enables external perspectives and feedback to be incorporated into the team's work.

    "},{"location":"ADRs/0011-researcher-in-residence/#assumptions","title":"Assumptions","text":""},{"location":"ADRs/0011-researcher-in-residence/#decision","title":"Decision","text":"

    We have decided to include a researcher in residence as a member of our team.

    The researcher in residence takes the following form:

    The following conditions apply to the researcher in residence.

    "},{"location":"ADRs/0011-researcher-in-residence/#risks","title":"Risks","text":"

    Risks around a potential chilling effect (team members not feeling free to express themselves) are mitigated by the conditions we impose. In light of aforementioned form and conditions above, we see no further significant risks.

    "},{"location":"ADRs/0011-researcher-in-residence/#consequences","title":"Consequences","text":"

    Including a researcher in residence makes it easier for them to conduct research within both the team and the wider organization where the AI validation team operates. This benefits the quality of the research findings and the feedback provided to the team and organization.

    "},{"location":"About/contact/","title":"Contact","text":"

    Contact us at ai-validatie@minbzk.nl.

    "},{"location":"About/team/","title":"Our Team","text":""},{"location":"About/team/#robbert-bos","title":"Robbert Bos","text":"

    Product Owner

    Robbert has been on a mission for over 15 years to enhance the transparency and collaboration within AI projects. Before joining this team, he founded several data science and tech companies (partly) dedicated to this cause. Robbert is passionate about solving complex problems where he connects business needs with technology and involves others in how these solutions can improve their work.

    robbertbos

    Robbert Bos

    "},{"location":"About/team/#lucas-haitsma","title":"Lucas Haitsma","text":"

    Researcher in Residence

    Lucas is PhD candidate conducting research into the regulation and governance of algorithmic discrimination by supervision and enforcement organizations. Lucas is our Researcher in Residence.

    Lucas Haitsma

    rug.nl

    "},{"location":"About/team/#berry-den-hartog","title":"Berry den Hartog","text":"

    Engineer

    Berry is a software engineer passionate about problem-solving and system optimization, with expertise in Go, Python, and C++. Specialized in architecting high-volume data processing systems and implementing Lean-Agile and DevOps practices. Experienced in managing end-to-end processes from hardware provisioning to software deployment and release.

    berrydenhartog

    Berry den Hartog

    "},{"location":"About/team/#anne-schuth","title":"Anne Schuth","text":"

    Engineering Manager

    Anne used to be a Machine Learning Engineering Manager at Spotify and previously held roles at DPG Media, Blendle, and Google AI. He holds a PhD from the University of Amsterdam.

    anneschuth

    Anne Schuth

    anneschuth.nl

    "},{"location":"About/team/#christopher-spelt","title":"Christopher Spelt","text":"

    Engineer

    After graduating in pure mathematics, Christopher transitioned into machine learning. He is passionate about solving complex problems, especially those that have a societal impact. My expertise lies in math, machine learning theory and I'm skilled in Python.

    ChristopherSpelt

    Christopher Spelt

    "},{"location":"About/team/#willy-tadema","title":"Willy Tadema","text":"

    AI Ethics Lead

    Willy specializes in AI governance, AI risk management, AI assurance and ethics-by-design. She is an advocate of AI standards and a member of several ethics committees.

    FrieseWoudloper

    Willy Tadema

    "},{"location":"About/team/#robbert-uittenbroek","title":"Robbert Uittenbroek","text":"

    Engineer

    Robbert is a highly enthusiastic full-stack engineer with a Bachelor's degree in Computer Science from the Hanze University of Applied Sciences in Groningen. He is passionate about building secure, compliant, and ethical solutions, and thrives in collaborative environments. Robbert is eager to leverage his skills and knowledge to help shape and propel the future of IT within the government.

    uittenbroekrobbert

    Robbert Uittenbroek

    "},{"location":"About/team/#laurens-weijs","title":"Laurens Weijs","text":"

    Engineer

    Laurens is a passionate guy with a love for innovation and doing things differently. With a background in Econometrics and Computer Science he loves to tackle the IT challenges of the Government by helping other people through extensive knowledge sharing on stage, building neural networks himself, or building a strong community.

    laurensWe

    Laurens Weijs

    "},{"location":"Projects/TAD/","title":"Transparency of algorithmic decision making","text":"

    This document contains a checklist with requirements for tools we could use to help with the transparency of algorithmic decision making.

    The requirements are based on:

    "},{"location":"Projects/TAD/#overview-of-requirements","title":"Overview of requirements","text":"

    The requirements have been given a priority based on the MoSCoW scale to allow for tool comparison.

    "},{"location":"Projects/TAD/#functionality","title":"Functionality","text":"Requirement Priority The tool allows users to conduct technical tests on algorithms or models, including assessments of performance, bias, and fairness. To facilitate these tests, users can input relevant datasets, M The tool allows users to choose which tests to perform. M The tool allows users to fill out questionnaires to conduct impact assessments for AI. For example IAMA or ALTAI. M The tool can generate a human readable report. M The tools works with a standardized report format, that it can read, write, and update. M The tool supports plugin functionality so additional tests can be added easily. S The tool allows to create custom reports based on components. S It is possible to add custom components for reports. S The tool provides detailed logging, including tracking of different model versions, changes in impact assessments, and technical test results for individual runs. S The tool supports saving progress. S The tool can be used on an isolated system without an internet connection. S The tool offers options to discuss and document conversations. For example, to converse about technical tests or to collaborate on impact assessments. C The tool operates with complete data privacy; it does not share any data or logging information. C The tool allows extension of report formats functionality. C The tool can be integrated in a CI/CD flow. C The tool can be offered as a (cloud) service where no local installation is required. C It is possible to define and automate workflows for repetitive tasks. C The tool offers pre-built connectors or low-code/no-code integration options to simplify the integration process. C"},{"location":"Projects/TAD/#reliability","title":"Reliability","text":"Requirement Priority The tool operates consistently and reliably, meaning it delivers the same expected results every time you use it. M The tool recovers automatically from common failures. S The tool recovers from failures quickly, minimizing data loss, for example by automatically saving intermediate test progress results. S The tool handles errors gracefully and informs users of any issues. S The tool provides clear error messages and instructions for troubleshooting. S"},{"location":"Projects/TAD/#usability","title":"Usability","text":"Requirement Priority The tool possess a clean, intuitive, and visually appealing UI that follows industry standards. S The tool provides clear and consistent navigation, making it easy for users to find what they need. S The tool is responsive and provides instant feedback. S The user interface is multilingual and supports at least English. S The tool offers keyboard shortcuts for efficient interaction. C The user interface can easily be translated into other languages. C"},{"location":"Projects/TAD/#help-documentation","title":"Help & Documentation","text":"Requirement Priority The tool provides comprehensive online help documentation with searchable functionalities. S The tool offers context-sensitive help within the application. C The online documentation includes video tutorials and training materials for ease of learning. C The project provides readily available customer support through various channels (e.g., email, phone, online chat) to address user inquiries and troubleshoot issues. C"},{"location":"Projects/TAD/#performance-efficiency","title":"Performance Efficiency","text":"Requirement Priority The tool operates efficiently and minimize resource utilization. M The tool responds to user actions instantly. M The tool is scalable to accommodate increased user base and data volume. S"},{"location":"Projects/TAD/#maintainability","title":"Maintainability","text":"Requirement Priority The tool is easy to modify and maintain. M The tool adheres to industry coding standards and best practices to ensure code quality and maintainability. M The code is written in a common, widely adopted and supported and actively used and maintained programming language. M The project provides version control for code changes and rollback capabilities. M The project is open source. M It is possible to contribute to the source. S The system is modular, allowing for easy modification of individual components. S Diagnostic tools are available to identify and troubleshoot issues. S"},{"location":"Projects/TAD/#security","title":"Security","text":"Requirement Priority The tool must protect data and system from unauthorized access, use, disclosure, disruption, modification, or destruction. M Regular security audits and penetration testing are conducted. S The tool enforce authorization controls based on user roles and permissions, restricting access to sensitive data and functionalities. C Data encryption is used for sensitive information at rest and in transit. C The project allows for regular security audits and penetration testing to identify vulnerabilities and ensure system integrity. C The tool implements backup functionality to ensure data availability in case of incidents. C"},{"location":"Projects/TAD/#compatibility","title":"Compatibility","text":"Requirement Priority The tool is compatible with existing systems and infrastructure. M The tool supports industry-standard data formats and protocols. M The tool operates seamlessly on supported operating systems and hardware platforms. S The tool supports commonly used data formats (e.g., CSV, Excel, JSON) for easy data exchange with other systems and tools. S The tool integrates with existing security solutions. C"},{"location":"Projects/TAD/#accessibility","title":"Accessibility","text":"Requirement Priority The tool is accessible to users with disabilities, following relevant accessibility standards (e.g., WCAG). S"},{"location":"Projects/TAD/#portability","title":"Portability","text":"Requirement Priority The tool support a range of operating systems (e.g., Windows, macOS, Linux) commonly used within an organization. S The tool minimizes dependencies on specific hardware or software configurations, promoting flexibility. S The tool offers a cloud-based deployment option or be compatible with cloud environments for scalability and accessibility. S The tool adheres to relevant cloud security standards and best practices. S"},{"location":"Projects/TAD/#deployment","title":"Deployment","text":"Requirement Priority The tool has an easy and user-friendly installation and configuration process. S The tool has on-premise or cloud-based deployment options to cater to different organizational needs and infrastructure. S"},{"location":"Projects/TAD/#legal-compliance","title":"Legal & Compliance","text":"Requirement Priority It is clear how the tool is funded to avoid improper influence due to conflicts of interest M The tool is compliant with relevant legal and regulatory requirements. S The tool adheres to (local) data privacy regulations like GDPR, ensuring the protection of user data. S The tool implements appropriate security measures to comply with industry regulations and standards. S The tool is licensed for use within the organization according to the terms and conditions of the license agreement. S The tool respects intellectual property rights and avoid copyright infringement issues. S"},{"location":"Projects/TAD/reporting_standard/","title":"TAD Reporting Standard","text":"

    Version: 0.1a1

    This document describes the Transparency of Algorithmic Decision making (TAD) Reporting Standard.

    For reproducibility, governance, auditing and sharing of algorithmic systems it is essential to have a reporting standard so that information about an algorithmic system can be shared. This reporting standard describes how information about the different phases of an algorithm's life cycle can be reported. It contains, among other things, descriptive information combined with information about the technical tests and assessments applied.

    Disclaimer

    The TAD Reporting Standard is work in progress. This means that the current standard is probably suboptimal and will change significantly in future versions.

    "},{"location":"Projects/TAD/reporting_standard/#introduction","title":"Introduction","text":"

    Inspired by Model Cards for Model Reporting and Papers with Code Model Index this standard almost 1 2 3 4 extends the Hugging Face model card metadata specification to allow for:

    1. More finegrained information on performance metrics, by extending the metrics_field from the Hugging Face metadata specification.
    2. Capturing additional measurements on fairness and bias, which can be partitioned into bar plot like measurements (such as mean absolute SHAP values) and graph plot like measurements (such as partial dependence). This is achieved by defining an new field measurements.
    3. Capturing assessments (such as IAMA and ALTAI). This is achieved by defining a new field assessments.

    Following Hugging Face, this proposed standard will be written in yaml.

    This standard does not contain all fields present in the Hugging Face metadata specification. The fields that are optional in the HuggingFace specification and are specific to the HugginFace interface are ommited.

    Another difference is that we devide our implementation into three seperate parts.

    1. system_card, containing information about a group of ML-models which accomplish a specific task.
    2. model_card, containing information about a specific data science model.
    3. assessment_card, containing information about a regulatory assessment.

    Include statements

    These model_cards and assessment_cards can be included verbatim into a system_card, or referenced with an !include statement, allowing for minimal cards to be compact in a single file. Extensive cards can be split up for readability and maintainability. Our standard allows for the !include to be used anywhere.

    "},{"location":"Projects/TAD/reporting_standard/#specification-of-the-standard","title":"Specification of the standard","text":"

    The standard will be written in yaml. Example yaml files are given in the next section. The standard defines three cards: a system_card, a model_card and an assessment_card. A system_card contains information about an algorithmic system. It can have mutiple models and each of these models should have a model_card. Regulatory assessments can be processed in an assessment_card. Note that model_card's and assessment_card's can be included directly into the system_card or can be included as seperate yaml files with help of a yaml-include mechanism. For clarity the latter is preffered and is also used in the examples in the next section.

    "},{"location":"Projects/TAD/reporting_standard/#system_card","title":"system_card","text":"

    A system_card contains the following information.

    1. schema_version (REQUIRED, string). Version of the schema used, for example \"0.1a1\".
    2. name (OPTIONAL, string). Name used to describe the system.
    3. upl (OPTIONAL, string). If this algorithm is part of a product offered by the Dutch Government, it should contain a URI from the Uniform Product List.
    4. owners (list). There can be multiple owners. For each owner the following fields are present.

      1. oin (OPTIONAL, string). If applicable the Organisatie-identificatienummer (OIN).
      2. organization (OPTIONAL, string). Name of the organization that owns the model. If ion is NOT provided this field is REQUIRED.
      3. name (OPTIONAL, string). Name of a contact person within the organisation.
      4. email (OPTIONAL, string). Email address of the contact person or organization.
      5. role (OPTIONAL, string). Role of the contact person. This field should only be set when the name field is set.
    5. description (OPTIONAL, string). A short description of the system.

    6. labels (OPTIONAL, list). This fields allows to store meta information about a system. There can be multiple labels. For each label the following fields are present.

      1. name (OPTIONAL, string). Name of the label.
      2. value (OPTIONAL, string). Value of the label.
    7. status (OPTIONAL, string). The status of the system. For example the status can be \"production\".

    8. publication_category (OPTIONAL, enum[string]). The publication category of the algorithm should be chosen from [\"high_risk\", other\"].
    9. begin_date (OPTIONAL, string). The first date the system was used. Date should be given in ISO 8601 format, i.e. YYYY-MM-DD.
    10. end_date (OPTIONAL, string). The last date the system was used. Date should be given in ISO 8601 format, i.e. YYYY-MM-DD.
    11. goal_and_impact (OPTIONAL, string). The purpose of the system and the impact it has on citizens and companies.
    12. considerations (OPTIONAL, string). The pro's and con's of using the system.
    13. risk_management (OPTIONAL, string). Description of the risks associated with the system.
    14. human_intervention (OPTIONAL, string). A description to want extend there is human involvement in the system.
    15. legal_base (OPTIONAL, list). If there exists a legal base for the process the system is embedded in, this field can be filled in with the relevant laws. There can be multiple legal bases. For each legal base the following fields are present.
      1. name (OPTIONAL, string). Name of the law.
      2. link (OPTIONAL, string). URI pointing towards the contents of the law.
    16. used_data (OPTIONAL, string). An overview of the data that is used in the system.
    17. technical_design (OPTIONAL, string). Description on how the system works.
    18. external_providers (OPTIONAL, list[string]). Name of an external provider, if relevant. There can be multiple external providers.
    19. references (OPTIONAL, list[string]). Additional reference URI's that point information about the system and are relevant.
    "},{"location":"Projects/TAD/reporting_standard/#1-models","title":"1. Models","text":"
    1. models (OPTIONAL, list[ModelCard]). A list of model cards (as defined below) or !includes of a yaml file containing a model card. This model card can for example be a model card described in the next section or a model card from Hugging Face. There can be multiple model cards, meaning multiple models are used.
    "},{"location":"Projects/TAD/reporting_standard/#2-assessments","title":"2. Assessments","text":"
    1. assessments (OPTIONAL, list[AssesmentCard]). A list of assessment cards (as defined below) or !includes of a yaml file containing a assessment card. This assessment card is an assessment card described in the next section. There can be multiple assessment cards, meaning multiple assessment were performed.
    "},{"location":"Projects/TAD/reporting_standard/#model_card","title":"model_card","text":"

    A model_card contains the following information.

    1. language (OPTIONAL, list[string]). If relevant, the natural languages the model supports in ISO 639. There can be multiple languages.
    2. license(REQUIRED, string). Any license from the open source license list 1. If the license is NOT present in the license list this field must be set to 'other' and the following two fields will be REQUIRED.

      1. license_name (string). An id for the license.
      2. license_link (string). A link to a file of that name inside the repo, or a URL to a remote file containing the license contents.
    3. tags (OPTIONAL, list[string]). Tags with keywords to describe the project. There can be multiple tags.

    4. owners (list). There can be multiple owners. For each owner the following fields are present.

      1. oin (OPTIONAL, string). If applicable the Organisatie-identificatienummer (OIN).
      2. organization (OPTIONAL, string). Name of the organization that owns the model. If ion is NOT provided this field is REQUIRED.
      3. name (OPTIONAL, string). Name of a contact person within the organisation.
      4. email (OPTIONAL, string). Email address of the contact person or organization.
      5. role (OPTIONAL, string). Role of the contact person. This field should only be set when the name field is set.
    "},{"location":"Projects/TAD/reporting_standard/#1-model-index","title":"1. Model Index","text":"

    There can be multiple models. For each model the following fields are present.

    1. name (REQUIRED, string). The name of the model.
    2. model (REQUIRED, string). A URI pointing to a repository containing the model file.
    3. artifacts (OPTIONAL, list[string]). A list of URI's where each URI refers to a relevant model artifact, that cannot be captured by any other field, but are relevant to model.
    4. parameters (list). There can be multiple parameters. For each parameter the following fields are present.

      1. name (REQUIRED, string). The name of the parameter, for example \"epochs\".
      2. dtype (OPTIONAL, string). The datatype of the parameter, for example \"int\".
      3. value (OPTIONAL, string). The value of the parameter, for example 100.
      4. labels (list). This field allows to store meta information about a parameter. There can be multiple labels. For each label the following fields are present.

        1. name (OPTIONAL, string). The name of the label.
        2. dtype (OPTIONAL, string). The datatype of the feature. If name is set, this field is REQUIRED.
        3. value (OPTIONAL, string). The value of the feature. If name is set, this field is REQUIRED.
    5. results (list). There can be multiple results. For each result the following fields are present.

      1. task (OPTIONAL, list).

        1. task_type (REQUIRED, string). The task of the model, for example \"object-classifcation\".
        2. task_name (OPTIONAL, string). A pretty name fo the model taks, for example \"Object Classification\".
      2. datasets (list). There can be multiple datasets 2. For each dataset the following fields are present.

        1. type (REQUIRED, string). The type of the dataset, can be a dataset id from HuggingFace datasets or any other link to a repository containing the dataset3, for example \"common_voice\".
        2. name (REQUIRED, string). Name pretty name for the dataset, for example \"Common Voice (French)\".
        3. split (OPTIONAL, string). The split of the dataset, for example \"train\".
        4. features (OPTIONAL, list[string]). List of feature names.
        5. revision (OPTIONAL, string). Version of the dataset, for example 5503434ddd753f426f4b38109466949a1217c2bb.
      3. metrics (list). There can be multiple metrics. For each metric the following fields are present.

        1. type (REQUIRED, string). A metric-id from Hugging Face metrics4, for example accuracy.
        2. name (REQUIRED, string). A descriptive name of the metric. For example \"false positive rate\" is not a descriptive name, but \"training false positive rate w.r.t class x\" is.
        3. dtype (REQUIRED, string). The data type of the metric, for example float.
        4. value (REQUIRED, string). The value of the metric.
        5. labels (list). This field allows to store meta information about a metric. For example, metrics can be computed for example on subgroups of specific features. For example, one can compute the accuracy for examples where the feature \"gender\" is set to \"male\". There can be multiple subgroups, which means that the metric is computed on the intersection of those subgroups. There can be multiple labels. For each label the following fields are present.

          1. name (OPTIONAL, string). The name of the feature. For example: \"gender\".
          2. type (OPTIONAL, string). The type of the label. Can for example be set to \"feature\" or \"output_class\". If name is set, this field is REQUIRED.
          3. dtype (OPTIONAL, string). The datatype of the feature, for example float. If name is set, this field is REQUIRED.
          4. value (OPTIONAL, string). The value of the feature. If name is set, this field is REQUIRED. For example: \"male\".
      4. measurements.

        1. bar_plots (list). The purpose of this field is to capute bar plot like measurements, for example SHAP values. There can be multiple bar plots. For each bar plot the following fields are present.

          1. type (REQUIRED, string). The type of bar plot, for example \"SHAP\".
          2. name (OPTIONAL, string). A pretty name for the plot, for example \"Mean Absolute SHAP Values\".
          3. results (list). The contents of the bar plot. A result represents a bar. There can be mutiple results. For each result the following fields are present.
            1. name (REQUIRED, string). The name of bar.
            2. value (REQUIRED, float). The value of the corresponding bar.
        2. graph_plots (list). The purpose of this field is to capture graph plot like measurements, such as partial dependence plots. There can be multiple graph plots. For each graph plot the following fields are present.

          1. type (REQUIRED, string). The type of the graph plot, for example \"partial_dependence\".
          2. name (OPTONAL, string). A pretty name of the graph, for example \"Partial Dependence Plot\".
          3. results (list). Results contains the graph plot data. Each graph can depend on a specific output class and feature. There can be multiple results. For each result the following fields are present.
            1. class (OPTIONAL, string/int/float/bool). The output class name that the graph corresponds to. This field is not always present.
            2. feature (REQUIRED, string). The feature the graph corresponds to. This is required, since all relevant graphs are dependend on features.
            3. data (list)
              1. x_value (REQUIRED, float). The $x$-value of the graph.
              2. y_value (REQUIRED, float). The $y$-value of the graph.
    "},{"location":"Projects/TAD/reporting_standard/#assessment_card","title":"assessment_card","text":"

    An assessment_card contains the following information.

    1. name (REQUIRED, string). The name of the assessment.
    2. date (REQUIRED, string). The date at which the assessment is completed. Date should be given in ISO 8601 format, i.e. YYYY-MM-DD.
    3. contents (list). There can be multiple items in contents. For each item the following fields are present:

      1. question (REQUIRED, string). A question.
      2. answer (REQUIRED, string). An answer.
      3. remarks (OPTIONAL, string). A field to put relevant discussion remarks in.
      4. authors. There can be multiple names. For each name the following field is present.
        1. name (OPTIONAL, string). The name of the author of the question.
      5. timestamp (OPTIONAL, string). A timestamp of the date and time of the answer.
    "},{"location":"Projects/TAD/reporting_standard/#example","title":"Example","text":""},{"location":"Projects/TAD/reporting_standard/#system-card","title":"System Card","text":"
    version: {system_card_version}                          # Optional. Example: \"0.1a1\"\nname: {system_name}                                     # Optional. Example: \"AangifteVertrekBuitenland\"\nupl: {upl_uri}                                          # Optional. Example: https://standaarden.overheid.nl/owms/terms/AangifteVertrekBuitenland\nowners:\n- oin: {oin}                                            # Optional. Example: 00000001003214345000\n  organization: {organization_name}                     # Optional if oin is provided, Required otherwise. Example: BZK\n  name: {owner_name}                                    # Optional. Example: John Doe\n  email: {owner_email}                                  # Optional. Example: johndoe@email.com\n  role: {owner_role}                                    # Optional. Example: Data Scientist.\ndescription: {system_description}                       # Optional. Short description of the system.\nlabels:                                                 # Optional labels to store metadata about the system.\n- name: {label_name}                                    # Optional.\n  value: {label_value}                                  # Optional.\nstatus: {system_status}                                 # Optional. Example \"production\".\npublication_category: {system_publication_cat}          # Optional. Example: \"impactfull_algorithm\".\nbegin_date: {system_begin_date}                         # Optional. Example: 2025-1-1.\nend_date: {system_end_date}                             # Optional. Example: 2025-12-1.\ngoal_and_impact: {system_goal_and_impact}               # Optional. Goal and impact of the system.\nconsiderations: {system_considerations}                 # Optional. Considerations about the system.\nrisk_management: {system_risk_management}               # Optional. Description of risks associated with the system.\nhuman_intervention: {system_human_intervention}         # Optional. Description of uman involvement in the system.\nlegal_base:\n- name: {law_name}                                      # Optional. Example: \"AVG\".\n  link: {law_uri}                                       # Optional. Example: \"https://eur-lex.europa.eu/legal-content/NL/TXT/HTML/?uri=CELEX:31995L0046\".\nused_data: {system_used_data}                           # Optional. Description of the data used by the system.\ntechnical_design: {technical_design}                    # Optional. Description of the technical design of the system.\nexternal_providers:\n- {system_external_provider}                            # Optional. Reference to used external providers.\nreferences:\n- {reference_uri}                                       # Optional. Example: URI to codebase.\n\nmodels:\n - !include {model_card_uri}                            # Optional. Example: cat_classifier_model.yaml.\n\nassessments:\n- !include {assessment_card_uri}                        # Required. Example: iama.yaml.\n
    "},{"location":"Projects/TAD/reporting_standard/#model-card","title":"Model Card","text":"
    language:\n  - {lang_0}                                            # Optional. Example nl.\nlicense: {licence}                                      # Required. Example: Apache-2.0 or any license SPDX ID from https://opensource.org/license or \"other\".\nlicense_name: {licence_name}                            # Optional if license != other, Required otherwise. Example: 'my-license-1.0'\nlicense_link: {license_link}                            # Optional if license != other, Required otherwise. Specify \"LICENSE\" or \"LICENSE.md\" to link to a file of that name inside the repo, or a URL to a remote file.\ntags:\n- {tag_0}                                               # Optional. Example: audio\n- {tag_1}                                               # Optional. Example: automatic-speech-recognition\nowners:\n- organization: {organization_name}                     # Required. Example: BZK\n  oin: {oin}                                            # Optional. Example: 00000001003214345000\n  name: {owner_name}                                    # Optional. Example: John Doe\n  email: {owner_email}                                  # Optional. Example: johndoe@email.com\n  role: {owner_role}                                    # Optional. Example: Data Scientist.\n\nmodel-index:\n- name: {model_id}                                      # Required. Example: CatClassifier.\n  model: {model_uri}                                    # Required. URI to a repository containing the model file.\n  artifacs:\n  - {model_artifact}                                    # Optional. URI to relevant model artifacts, if applicable.\n  parameters:\n  - name: {parameter_name}                              # Optional. Example: \"epochs\".\n    dtype: {parameter_dtype}                            # Optional. Example \"int\".\n    value: {parameter_value}                            # Optional. Example 100.\n    labels:\n      - name: {label_name}                              # Optional. Example: \"gender\".\n        dtype: {label_type}                             # Optional. Example: \"string\".\n        value: {label_value}                            # Optional. Example: \"female\".\n  results:\n  - task:\n      type: {task_type}                                 # Required. Example: image-classification.\n      name: {task_name}                                 # Optional. Example: Image Classification.\n    datasets:\n      - type: {dataset_type}                            # Required. Example: common_voice. Link to a repository containing the dataset\n        name: {dataset_name}                            # Required. Example: \"Common Voice (French)\". A pretty name for the dataset.\n        split: {split}                                  # Optional. Example \"train\".\n        features:\n         - {feature_name}                               # Optional. Example: \"gender\".\n        revision: {dataset_version}                     # Optional. Example: 5503434ddd753f426f4b38109466949a1217c2bb\n    metrics:\n    - type: {metric_type}                               # Required. Example: false-positive-rate. Use metric id from https://hf.co/metrics.\n      name: {metric_name}                               # Required. Example: \"FPR wrt class 0 restricted to feature gender:0 and age:21\".\n      dtype: {metric_dtype}                             # Required. Example: \"float\".\n      value: {metric_value}                             # Required. Example: 0.75.\n      labels:\n        - name: {label_name}                            # Optional. Example: \"gender\".\n          type: {label_type}                            # Optional. Exmple \"feature\".\n          dtype: {label_type}                           # Optional. Example: \"string\".\n          value: {label_value}                          # Optional. Example: \"female\".\n    measurements:\n      # Bar plots should be able to capture SHAP and Robustness Toolbox from AI Verify.\n      bar_plots:\n      - type: {measurement_type}                        # Required. Example: \"SHAP\".\n        name: {measurement_name}                        # Optional. Example: \"Mean Absolute Shap Values\".\n        results:\n        - name: {bar_name}                              # Required. The name of a bar.\n          value: {bar_value}                            # Required. The corresponding value.\n      # Graph plots should be able to capture graph based measurements such as partial dependence and accumalated local effect.\n      graph_plots:\n      - type: {measurement_type}                        # Required. Example: \"partial_dependence\".\n        name: {measurement_name}                        # Optional. Example: \"Partial Dependence Plot\".\n        # Results store the graph plot data. So far all plots are depenendend on a combination of a specific class (sometimes) and feature (always).\n        # For example partial dependence plots are made for each feature and class.\n        results:\n         - class: {class_name}                          # Optional. Name of the output class the graph depends on.\n           feature: {feature_name}                      # Required. Name of the feature the graph depends on.\n           data:\n            - x_value: {x_value}                        # Required. The x value of the graph data.\n              y_value: {y_value}                        # Required. The y value of the graph data.\n
    "},{"location":"Projects/TAD/reporting_standard/#assessment-card","title":"Assessment Card","text":"
    name: {assessment_name}                               # Required. Example: IAMA.\ndate: {assessment_date}                               # Required. Example: 25-03-2025.\ncontents:\n  - question: {question_text}                         # Required. Example: \"Question 1: ...\".\n    answer: {answer_text}                             # Required. Example: \"Answer: ...\".\n    remarks: {remarks_text}                           # Optional. Example: \"Remarks: ...\".\n    authors:                                          # Optional. Example: \"['John', 'Peter']\".\n      - name: {author_name}\n    timestamp: {timestamp}                            # Optional. Example: 1711630721.\n
    "},{"location":"Projects/TAD/reporting_standard/#schema","title":"Schema","text":"

    JSON schema will be added when we publish the first beta version.

    1. Deviation from the HuggingFace specification is in the Licence field. HuggingFace only accepts dataset id's from HuggingFace license list while we accept any license from Open Source License List.\u00a0\u21a9\u21a9

    2. Deviation from the HuggingFace specification is in the model_index:results:dataset field. HuggingFace only accepts one dataset, while we accept a list of datasets.\u00a0\u21a9\u21a9

    3. Deviation from the HuggingFace specification is in the Dataset Type field. HuggingFace only accepts dataset id's from HuggingFace datasets while we also allow for any url pointing to the dataset.\u00a0\u21a9\u21a9

    4. For this extension to work relevent metrics (such as for example false positive rate) have to be added to the Hugging Face metrics, possibly this can be done in our organizational namespace.\u00a0\u21a9\u21a9

    "},{"location":"Projects/TAD/reporting_standards/","title":"TAD Reporting standards","text":"

    This document assesses standards that standardize the way algorithm assessments can be captured.

    "},{"location":"Projects/TAD/reporting_standards/#background","title":"Background","text":"

    There are many algorithm assessments (e.g. IAMA, HUIDERIA, etc.), technical tests on performance (e.g. Accuracy, TP, FP, F1, etc), fairness and bias of algorithms (e.g. SHAP) and reporting formats available. The goal is to have a way of standardizing the way these different assessments and tests can be captured.

    "},{"location":"Projects/TAD/reporting_standards/#available-standards","title":"Available standards","text":""},{"location":"Projects/TAD/reporting_standards/#model-cards","title":"Model Cards","text":"

    The most interesting existing capturing methods seem to be all based on Model Cards for Model Reporting, which are:

    \"Short documents accompanying trained machine learning models that provide benchmarked evaluation in a variety of conditions, such as across different cultural, demographic, or phenotypic groups (e.g., race, geographic location, sex, Fitzpatrick skin type) and intersectional groups (e.g., age and race, or sex and Fitzpatrick skin type) that are relevant to the intended application domains. Model cards also disclose the context in which models are intended to be used, details of the performance evaluation procedures, and other relevant information\", proposed by Google. Note that \"The proposed set of sections\" in the Model Cards paper \"are intended to provide relevant details to consider, but are not intended to be complete or exhaustive, and may be tailored depending on the model, context, and stakeholders.\"

    Many companies implement their own version of Model Cards, for example Meta System Cards and the tools mentioned in the next section.

    "},{"location":"Projects/TAD/reporting_standards/#automatic-model-card-generation","title":"Automatic model card generation","text":"

    There exist tools to (semi)-automatically generate models cards:

    1. Model Card Generator by US Sensus Bureau. Basic UI to create model cards and export to markdown, also hase a command line tool.
    2. Model Card Toolkit by Google. Automation only comes from integration with TensorFlowExtended and ML Metadata.
    3. VerifyML. Based on the Google toolkit, but is extended to include specific tests on fairness and bias. Technical tests can be added by users and model card schema (in protobuf) can be extended by users.
    4. Experimental Model Cards Tool by Hugging Face. This is the implementation of the Google paper by Hugging Face and provides information on the models available on their platform. The writing tools guides users through their model card and allows for up- and downloading from and to markdown.
    "},{"location":"Projects/TAD/reporting_standards/#other-standards","title":"Other standards","text":"

    A landscape analysis of ML documentation tools has been performed by Hugging Face and provides a good overview of the current landscape.

    Another interesting standard is the Algorithmic Transparency Recording Standard of the United Kingdom Goverment, which can be found here.

    "},{"location":"Projects/TAD/reporting_standards/#proposal","title":"Proposal","text":"

    We need a standard that captures algorithmic assessments and technical tests on model and datasets. The idea of model cards can serve as a guiding theoretical principle on how to implement such a standard. More specifically, we can draw inspiration from the existing model card schema's and implementations of VerifyML and Hugging Face. We note the following:

    1. None of these two standards capture algorithmic assessments.
    2. Only VerifyML has a specific format to capture some technical tests.

    Hence in any case we need to extend one of these standards. We propose to:

    1. Assess and compare these two standards
    2. Chose the most appropriate one to extend
    3. Extend (and possibly adjust) this standard to our own standard (in the form of a basic schema) that allows for capturing algorithmic assessments and standardizes the way technical tests can be captured.
    "},{"location":"Projects/TAD/tools/","title":"Research of tools for transparency of algorithmic decision making","text":"

    In our ongoing research on AI validation and transparency, we are seeking tools to support assessments. Ideal tools would combine various technical tests with checklists and questionnaires and have the ability to generate reports in both human-friendly and machine-exchangeable formats.

    This document contains a list of tools we have found and may want to investigate further.

    "},{"location":"Projects/TAD/tools/#ai-verify","title":"AI Verify","text":"

    AI Verify is an AI governance testing framework and software toolkit that validates the performance of AI systems against a set of internationally recognised principles through standardised tests, and is consistent with international AI governance frameworks such as those from European Union, OECD and Singapore.

    Links: AI Verify Homepage, AI Verify documentation, AI Verify Github.

    "},{"location":"Projects/TAD/tools/#to-investigate-further","title":"To investigate further","text":""},{"location":"Projects/TAD/tools/#verifyml","title":"VerifyML","text":"

    What is it? VerifyML is an opinionated, open-source toolkit and workflow to help companies implement human-centric AI practices. It seems pretty much equivalent to AI Verify.

    Why interesting? The functionality of this toolkit seems to match closely with those of AI Verify. It has a \"git and code first approach\" and has automatic generation of model cards.

    Remarks The code seems to be last updated 2 years ago.

    Links: VerifyML, VerifyML GitHub

    "},{"location":"Projects/TAD/tools/#ibm-research-360-toolkit","title":"IBM Research 360 Toolkit","text":"

    What is it? Open source Python libraries that supports interpretability and explainability of datasets and machine learning models. Most relevant tookits are the AI Fairness 360 and AI Explainability 360.

    Why interesting? Seems to encompass extensive fairness and explainability tests. Codebase seems to be active.

    Remarks It comes as Python and R libraries.

    Links: AI Fairness 360 Github, AI Explainability 360 Github.

    "},{"location":"Projects/TAD/tools/#hollisticai","title":"Hollisticai","text":"

    What is it? Open source tool to assess and improve the trustworthiness of AI systems. Offers tools to measure and mitigate bias across numerous tasks. Will be extended to include tools for efficacy, robustness, privacy and explainability.

    Why interesting? Although it is not entirely clear what exactly this tool does (see Remarks) it does seem (according to their website) to provide reports on bias and fairness. The Github rep does not seem to include any report generating code, but mainly technical tests. Here is an example in which bias is measured in a classification model.

    Remarks Website seems to suggest the possibility to generate reports, but this is not directly reflected in the codebase. Possibly reports are only available with some sort of licenced product?

    Links: Hollisticai homepage, Hollisticai Github.

    "},{"location":"Projects/TAD/tools/#interesting-to-mention","title":"Interesting to mention","text":""},{"location":"Projects/TAD/tools/#the-fate-system","title":"The FATE system","text":"

    Paper by TNO about the FATE system. Acronym stands for \"FAir, Transparent and Explainable Decision Making.\"

    Tools mentioned include some of the above: Aequitas, AI Fairness 360, Dalex, Fairlean, Responsibly, and What-If-Tool

    Links: Paper, Article, Microsoft links.

    "},{"location":"Way-of-Working/Code-Reviews/","title":"Code reviews","text":"

    The purpose of a code review is to ensure the quality, readability, and that all requirements from the ticket have been met for a change before it gets merged into the main codebase. Additionally, code reviews are a communication tool, they allow team members to stay aware of changes being made.

    Code reviews involve having a team member examine the changes made by another team member and give feedback or ask questions if needed.

    "},{"location":"Way-of-Working/Code-Reviews/#creating-a-pull-request","title":"Creating a Pull Request","text":"

    We use GitHub pull requests (PR) for code reviews. You can make a draft PR if your work is still in progress. When you are done you can remove the draft status. A team member may start reviewing when the PR does not have a draft status.

    For team ADRs at least 3 accepting reviews are required, or all team members should accept if it can be expected that the ADR is controversial.

    A team ADR is an ADR made in the ai-validation repository.

    All other PRs only need at least 1 reviewer to get accepted, but can have more reviewers if desired (by either reviewer or author).

    "},{"location":"Way-of-Working/Code-Reviews/#review-process","title":"Review process","text":"

    By default the codeowner, indicated in the CODEOWNER file, will be requested to review. For us this is the GitHub team AI-validation. If the PR creator wants a specific team member to review, the PR creator should add the team member specifically in the reviewers section of the PR. A message in Mattermost will be posted for PRs. Then with the reaction of an emoji a reviewer will indicate they are looking at the PR.

    If the reviewer has suggestions or comments the PR creator can fix those or add comments to the suggestions. When the creator of the PR thinks he is done with the feedback he must re-request a review from the person that did the review. The reviewer must then look at the changes and approve or add more comments. This process continues untill the reviewer agrees that all is correct and approves the PR.

    Once the review is approved the reviewer checks if the branch is in sync with the main branch before merging. If not, the reviewer rebases the branch. Once the branch is in sync with main the reviewer merges the PR and checks if the deployment is successful. If the deployment is not successful the reviewer fixes it. If the PR needs more than one review, the last accepting reviewer merges the PR.

    "},{"location":"Way-of-Working/Contributing/","title":"Contributing to AI Validation","text":"

    First off, thanks for taking the time to contribute! \u2764\ufe0f

    All types of contributions are encouraged and valued. See the Table of Contents for different ways to help and details about how this project handles them. Please make sure to read the relevant section before making your contribution. It will make it a lot easier for us maintainers and smooth out the experience for all involved. The community looks forward to your contributions. \ud83c\udf89

    "},{"location":"Way-of-Working/Contributing/#table-of-contents","title":"Table of Contents","text":""},{"location":"Way-of-Working/Contributing/#code-of-conduct","title":"Code of Conduct","text":"

    This project and everyone participating in it is governed by the Code of Conduct. By participating, you are expected to uphold this code. Please report unacceptable behavior to ai-validatie@minbzk.nl.

    "},{"location":"Way-of-Working/Contributing/#i-have-a-question","title":"I Have a Question","text":"

    Before you ask a question, it is best to search for existing Issues that might help you. In case you have found a suitable issue and still need clarification, you can write your question in this issue.

    If you then still feel the need to ask a question and need clarification, we recommend the following:

    We will then take care of the issue as soon as possible.

    "},{"location":"Way-of-Working/Contributing/#i-want-to-contribute","title":"I Want To Contribute","text":""},{"location":"Way-of-Working/Contributing/#legal-notice","title":"Legal Notice","text":"

    When contributing to this project, you must agree that you have authored 100% of the content, that you have the necessary rights to the content and that the content you contribute may be provided under the project license.

    "},{"location":"Way-of-Working/Contributing/#reporting-bugs","title":"Reporting Bugs","text":""},{"location":"Way-of-Working/Contributing/#before-submitting-a-bug-report","title":"Before Submitting a Bug Report","text":"

    A good bug report shouldn't leave others needing to chase you up for more information. Therefore, we ask you to investigate carefully, collect information and describe the issue in detail in your report. Please complete the following steps in advance to help us fix any potential bug as fast as possible.

    "},{"location":"Way-of-Working/Contributing/#how-do-i-submit-a-good-bug-report","title":"How Do I Submit a Good Bug Report?","text":"

    You must never report security related issues, vulnerabilities or bugs including sensitive information to the issue tracker, or elsewhere in public. Instead sensitive bugs must be sent by email to ai-validatie@minbzk.nl.

    We use GitHub issues to track bugs and errors. If you run into an issue with the project:

    Once it's filed:

    "},{"location":"Way-of-Working/Contributing/#suggesting-enhancements","title":"Suggesting Enhancements","text":"

    This section guides you through submitting an enhancement suggestion for this project, including completely new features and minor improvements. Following these guidelines will help maintainers and the community to understand your suggestion and find related suggestions.

    "},{"location":"Way-of-Working/Contributing/#before-submitting-an-enhancement","title":"Before Submitting an Enhancement","text":""},{"location":"Way-of-Working/Contributing/#how-do-i-submit-a-good-enhancement-suggestion","title":"How Do I Submit a Good Enhancement Suggestion?","text":"

    Enhancement suggestions are tracked as GitHub issues.

    "},{"location":"Way-of-Working/Contributing/#styleguides","title":"Styleguides","text":""},{"location":"Way-of-Working/Contributing/#commit-messages","title":"Commit Messages","text":"

    We have commit message conventions: Commit convention

    "},{"location":"Way-of-Working/Contributing/#markdown-lint","title":"Markdown Lint","text":"

    We use markdownlint to standardize markdown. MarkDown lint.

    "},{"location":"Way-of-Working/Contributing/#pre-commit","title":"Pre-commit","text":"

    We use Pre-commit to enabled standardization. Pre-commit.

    "},{"location":"Way-of-Working/Principles/","title":"Our Principles","text":"
    1. Our strong trust in the government and the dedication of people at all levels within the government organization is the basis of our actions.
    2. The interests of the citizen and society take precedence in all our activities.
    3. Learning and knowledge sharing are central: we encourage team members to take on tasks that are new or less familiar to them.
    4. Existing knowledge, policies, and proven methods are actively reused and shared.
    5. We strive for maximum openness and transparency in all our processes.
    6. We prefer the use and creation of Open Source Software.
    7. Our team members can choose to work anonymously.
    8. We treat each other with respect.
    9. Collaboration is essential to our success; we actively seek collaboration with both public and private partners.
    "},{"location":"Way-of-Working/UbiquitousLanguage/","title":"Ubiquitous Language","text":"

    For clarity and consistency, this document defines some terms used within our team where the meaning in Data Science or Computer Science differs, and terms that are for any reason good to mention.

    For a full reference for Machine Learning, we recommend ML Fundamentals from Google.

    "},{"location":"Way-of-Working/Onboarding/","title":"Onboarding","text":""},{"location":"Way-of-Working/Onboarding/Accounts/","title":"Accounts","text":""},{"location":"Way-of-Working/Onboarding/Accounts/#mattermost-chat","title":"Mattermost Chat","text":"

    Make sure you have installed Mattermost, then follow these steps.

    "},{"location":"Way-of-Working/Onboarding/Accounts/#webex","title":"Webex","text":"

    Make sure you have installed Webex, then follow these steps.

    "},{"location":"Way-of-Working/Onboarding/Accounts/#github","title":"Github","text":"

    Create or use your existing Github account.

    "},{"location":"Way-of-Working/Onboarding/Accounts/#collaboration-space","title":"Collaboration Space","text":""},{"location":"Way-of-Working/Onboarding/Accounts/#open-up-your-calendar","title":"Open up your calendar","text":""},{"location":"Way-of-Working/Onboarding/Accounts/#shared-email","title":"Shared email","text":""},{"location":"Way-of-Working/Onboarding/Accounts/#bookmark","title":"Bookmark","text":"

    Bookmark these links in your browser:

    "},{"location":"Way-of-Working/Onboarding/Dev-machine/","title":"Setting up your Dev Machine","text":"

    We are assuming your dev machine is a Mac. This guide is rather opinionated, feel free to have your own opinion, and feel free to contribute! Contributing can be done by clicking \"edit\" top right and by making a pull request on this repository.

    "},{"location":"Way-of-Working/Onboarding/Dev-machine/#things-that-should-have-been-default-on-mac","title":"Things that should have been default on Mac","text":""},{"location":"Way-of-Working/Onboarding/Dev-machine/#citrix-workspace","title":"Citrix workspace","text":""},{"location":"Way-of-Working/Onboarding/Dev-machine/#communication","title":"Communication","text":""},{"location":"Way-of-Working/Onboarding/Dev-machine/#terminal-and-shell","title":"Terminal and shell","text":""},{"location":"Way-of-Working/Onboarding/Dev-machine/#coding","title":"Coding","text":""}]} \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz index 4864414004c1be8698dd36754104b289b6578d88..e5904ac3e50a8a8e19b1dd3a7f4f41d6753e00d7 100644 GIT binary patch delta 13 Ucmb=gXP58h;F$1QVj_D503G54V*mgE delta 13 Ucmb=gXP58h;Mm+LHj%vo035;uA^-pY