CDCgov · dthoward96 · Apr 11, 2024 · Feb 9, 2024 · Feb 9, 2024 · Feb 9, 2024
diff --git a/.github/workflows/GHCR_docker.yml b/.github/workflows/GHCR_docker.yml
@@ -0,0 +1,42 @@
+name: Create and publish docker image to GHCR
+
+on:
+  push:
+    branches: [ "master" ]
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+
+jobs:
+  build-and-push-image:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Log into container registry
+        uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract Docker metadata
+        id: meta
+        uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: type=ref,event=branch
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09
+        with:
+          context: .
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
diff --git a/.github/workflows/codeql-analysis.yml → .github/workflows/codeql.yml b/.github/workflows/codeql-analysis.yml → .github/workflows/codeql.yml
@@ -13,60 +13,72 @@ name: "CodeQL"
 
 on:
   push:
-    branches: [ master ]
+    branches: [ "master" ]
   pull_request:
-    # The branches below must be a subset of the branches above
-    branches: [ master ]
+    branches: [ "master" ]
   schedule:
-    - cron: '40 12 * * 5'
+    - cron: '43 3 * * 5'
 
 jobs:
   analyze:
     name: Analyze
-    runs-on: ubuntu-latest
+    # Runner size impacts CodeQL analysis time. To learn more, please see:
+    #   - https://gh.io/recommended-hardware-resources-for-running-codeql
+    #   - https://gh.io/supported-runners-and-hardware-resources
+    #   - https://gh.io/using-larger-runners
+    # Consider using larger runners for possible analysis time improvements.
+    runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
+    timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
     permissions:
+      # required for all workflows
+      security-events: write
+
+      # only required for workflows in private repositories
       actions: read
       contents: read
-      security-events: write
 
     strategy:
       fail-fast: false
       matrix:
         language: [ 'python' ]
-        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
+        # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ]
+        # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both
+        # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
         # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@v2
+      uses: github/codeql-action/init@v3
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
         # By default, queries listed here will override any specified in a config file.
         # Prefix the list here with "+" to use these queries and those in the config file.
-        
-        # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
+
+        # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
         # queries: security-extended,security-and-quality
 
-        
-    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
+
+    # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@v2
+      uses: github/codeql-action/autobuild@v3
 
     # ℹ️ Command-line programs to run using the OS shell.
     # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
 
-    #   If the Autobuild fails above, remove it and uncomment the following three lines. 
+    #   If the Autobuild fails above, remove it and uncomment the following three lines.
     #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
 
     # - run: |
-    #   echo "Run, Build Application using script"
-    #   ./location_of_script_within_repo/buildscript.sh
+    #     echo "Run, Build Application using script"
+    #     ./location_of_script_within_repo/buildscript.sh
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v2
+      uses: github/codeql-action/analyze@v3
+      with:
+        category: "/language:${{matrix.language}}"
diff --git a/.github/workflows/docker_test_build.yml b/.github/workflows/docker_test_build.yml
@@ -0,0 +1,18 @@
+name: Build test Docker image
+
+on:
+  push:
+    branches: [ "master" ]
+  pull_request:
+    branches: [ "master" ]
+
+jobs:
+
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Build the Docker image
+      run: docker build . --file Dockerfile 
diff --git a/.github/workflows/python-package-mamba.yml b/.github/workflows/python-package-mamba.yml
diff --git a/README.Rmd b/README.Rmd
@@ -30,10 +30,20 @@ github_pages_url <- description$GITHUB_PAGES
 
 **General Disclaimer**: This repository was created for use by CDC programs to collaborate on public health related projects in support of the [CDC mission](https://www.cdc.gov/about/organization/mission.htm).  GitHub is not hosted by the CDC, but is a third party website used by CDC and its partners to share information and collaborate on software. CDC use of GitHub does not imply an endorsement of any one particular service, product, or enterprise.
 
+# [Documentation](`r github_pages_url`/index.html)
+
 ## Overview
 
 ``r program`` is a Python program that is developed to automate the process of generating necessary submission files and batch uploading them to <ins>NCBI archives</ins> (such as **BioSample**, **SRA**, and **Genbank**) and <ins>GISAID databases</ins> (e.g. **EpiFlu** and **EpiCoV**). Presently, the pipeline is capable of uploading **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) data. However, the dynamic nature of this pipeline can allow for additional uploads of other organisms in future updates or requests.
 
+## Contacts
+
+| Role       | Contact |
+| ---------- | ------- |
+| Creator    | [Dakota Howard](https://github.com/dthoward96), [Reina Chau](https://github.com/rchau88) |
+| Maintainer | [Dakota Howard](https://github.com/dthoward96) |
+| Back-Up    | [Reina Chau](https://github.com/rchau88), [Brian Lee](https://github.com/leebrian) |
+
 ## Prerequisites
 
 - **NCBI Submissions**
@@ -93,6 +103,10 @@ Before submitters can perform a batch submission using ``r program``, they must
 - [How to run seqsender with Compose](`r github_pages_url`/articles/compose_installation.html)
 - [How to run seqsender with Singularity](`r github_pages_url`/articles/singularity_installation.html)
 
+## Code Attributions
+
+Dakota Howard and Reina Chau for majority of the code base with input and testing from [colleagues](`r github_pages_url`/authors.html). 
+
 ## Public Domain Standard Notice
 
 This repository constitutes a work of the United States Government and is not subject to domestic copyright protection under 17 USC § 105. This repository is in the public domain within the United States, and copyright and related rights in the work worldwide are waived through the [CC0 1.0 Universal public domain dedication](https://creativecommons.org/publicdomain/zero/1.0/). All contributions to this repository will be released under the CC0 dedication. By submitting a pull request you are agreeing to comply with this waiver of copyright interest.

diff --git a/README.md b/README.md
@@ -1,14 +1,19 @@
 
 <!-- ![build](https://github.com/montilab/cadra/workflows/rcmdcheck/badge.svg) -->
+
 <!-- ![GitHub issues](https://img.shields.io/github/issues/montilab/cadra) -->
+
 <!-- ![GitHub last commit](https://img.shields.io/github/last-commit/montilab/cadra) -->
+
 <p style="font-size: 16px;">
+
 <em>Public Database Submission Pipeline</em>
+
 </p>
 
 **Beta Version**: 1.1.0. This pipeline is currently in Beta testing, and
 issues could appear during submission. Please use it at your own risk.
-Feedback and suggestions are welcome!
+Feedback and suggestions are welcome\!
 
 **General Disclaimer**: This repository was created for use by CDC
 programs to collaborate on public health related projects in support of
@@ -18,6 +23,8 @@ CDC and its partners to share information and collaborate on software.
 CDC use of GitHub does not imply an endorsement of any one particular
 service, product, or enterprise.
 
+# [Documentation](https://cdcgov.github.io/seqsender/index.html)
+
 ## Overview
 
 `seqsender` is a Python program that is developed to automate the
@@ -29,9 +36,17 @@ A Virus** (FLU) and **SARS-COV-2** (COV) data. However, the dynamic
 nature of this pipeline can allow for additional uploads of other
 organisms in future updates or requests.
 
+## Contacts
+
+| Role       | Contact                                                                                  |
+| ---------- | ---------------------------------------------------------------------------------------- |
+| Creator    | [Dakota Howard](https://github.com/dthoward96), [Reina Chau](https://github.com/rchau88) |
+| Maintainer | [Dakota Howard](https://github.com/dthoward96)                                           |
+| Back-Up    | [Reina Chau](https://github.com/rchau88), [Brian Lee](https://github.com/leebrian)       |
+
 ## Prerequisites
 
-- **NCBI Submissions**
+  - **NCBI Submissions**
 
 `seqsender` utilizes an UI-Less Data Submission Protocol to bulk upload
 submission files (e.g., *submission.xml*, *submission.zip*, etc.) to
@@ -63,11 +78,11 @@ FTP on the command line. Before attempting to submit a submission using
     <a href="mailto:[email protected]">[email protected]</a>
     to discuss requirements for submissions.
 
-5.  Coordinate a NCBI namespace name (**spuid_namespace**) that will be
+5.  Coordinate a NCBI namespace name (**spuid\_namespace**) that will be
     used with Submitter Provided Unique Identifiers (**spuid**) in the
-    submission. The liaison of **spuid_namespace** and **spuid** is used
-    to report back assigned accessions as well as for cross-linking
-    objects within submission. The values of **spuid_namespace** are up
+    submission. The liaison of **spuid\_namespace** and **spuid** is
+    used to report back assigned accessions as well as for cross-linking
+    objects within submission. The values of **spuid\_namespace** are up
     to the submitter to decide but they must be unique and
     well-coordinated prior to make a submission. For more information
     about these two fields, see
@@ -78,7 +93,9 @@ FTP on the command line. Before attempting to submit a submission using
     [GENBANK](https://cdcgov.github.io/seqsender/articles/genbank_submission.html#metadata)
     metadata requirements.
 
-- **GISAID Submissions**
+<!-- end list -->
+
+  - **GISAID Submissions**
 
 `seqsender` makes use of GISAID’s Command Line Interface tools to bulk
 uploading meta- and sequence-data to GISAID databases. Presently, the
@@ -145,14 +162,20 @@ prepared and stored in a submission directory of choice.
 
 ## Quick Start
 
-- [How to run seqsender
-  locally](https://cdcgov.github.io/seqsender/articles/local_installation.html)
-- [How to run seqsender with
-  Docker](https://cdcgov.github.io/seqsender/articles/docker_installation.html)
-- [How to run seqsender with
-  Compose](https://cdcgov.github.io/seqsender/articles/compose_installation.html)
-- [How to run seqsender with
-  Singularity](https://cdcgov.github.io/seqsender/articles/singularity_installation.html)
+  - [How to run seqsender
+    locally](https://cdcgov.github.io/seqsender/articles/local_installation.html)
+  - [How to run seqsender with
+    Docker](https://cdcgov.github.io/seqsender/articles/docker_installation.html)
+  - [How to run seqsender with
+    Compose](https://cdcgov.github.io/seqsender/articles/compose_installation.html)
+  - [How to run seqsender with
+    Singularity](https://cdcgov.github.io/seqsender/articles/singularity_installation.html)
+
+## Code Attributions
+
+Dakota Howard and Reina Chau for majority of the code base with input
+and testing from
+[colleagues](https://cdcgov.github.io/seqsender/authors.html).
 
 ## Public Domain Standard Notice
 

diff --git a/create.py b/create.py
@@ -111,8 +111,6 @@ def create_submission_xml(organism, database, submission_name, config_dict, meta
 	comment.text = config_dict["Description"]["Comment"]
 	# Description info including organization and contact info
 	organization = etree.SubElement(description, "Organization", type=config_dict["Description"]["Organization"]["@type"], role=config_dict["Description"]["Organization"]["@role"])
-	if config_dict["Description"]["Organization"]["@org_id"]:
-		organization.set("org_id", config_dict["Description"]["Organization"]["@org_id"])
 	org_name = etree.SubElement(organization, "Name")
 	org_name.text = config_dict["Description"]["Organization"]["Name"]
 	if "GENBANK" not in database:
@@ -401,7 +399,7 @@ def create_genbank_files(organism, config_dict, metadata, fasta_file, submission
 	# Retrieve the source df"
 	source_df = metadata.filter(regex="^gb-seq_id$|^src-|^ncbi-spuid$|^ncbi-bioproject$|^organism$|^collection_date$").copy()
 	source_df.columns = source_df.columns.str.replace("src-","").str.strip()
-	source_df = source_df.rename(columns = {"gb-seq_id":"Sequence_ID", "collection_date":"Collection_date", "ncbi-spuid":"strain"})
+	source_df = source_df.rename(columns = {"gb-seq_id":"Sequence_ID", "collection_date":"Collection_date"})
 	# Add BioProject if available
 	if "ncbi-bioproject" in source_df:
 		source_df = source_df.rename(columns={"ncbi-bioproject": "BioProject"})

diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -6,18 +6,11 @@ x-data-volumes:
   source: $HOME/Github/Testings/seqsender
   target: /data
 
-x-seqsender-code:
-  &seqsender-code
-  type: bind
-  source: $HOME/Github/seqsender
-  target: /seqsender
-
 services:
   seqsender: 
     container_name: seqsender
-    image: cdcgov/seqsender-dev:latest
+    image: cdcgov/seqsender:latest
     restart: always
     volumes: 
       - *data-volume
-      - *seqsender-code
     command: tail -f /dev/null