Skip to content

Commit 954d364

Browse files
author
DavidUnderdown
committed
update cover date regex, and latest version of CR Cards schemas
1 parent 657b26b commit 954d364

File tree

3 files changed

+39
-35
lines changed

3 files changed

+39
-35
lines changed

example-schemas/ADM_362-technical-acquisition-with-minimal-transcription.csvs

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,21 @@
11
version 1.0
22
@totalColumns 42
3-
/********************************************************************************
4-
*Schema: ADM_363-technical-acquisition-with-minimal-transcription.csvs *
5-
*Authors: Nicki Welch *
6-
* David Underdown *
7-
*Purpose: To capture metadata about the digitisation of the ADM 363 series *
8-
* Primarily technical metadata, but with a minimal amount of *
9-
* transcription to verify that the records may be publicly released *
10-
* after receipt by The National Archives *
11-
*Revision: 1.0 first release *
12-
* 1.1 update as some official numbers only single digit *
13-
* 1.2 allow M as official number prefix too *
14-
* 1.3 further additions to prefixes, L, S, SS, SSX *
15-
* 1.4 allow for * and ? in official number *
16-
********************************************************************************/
3+
/*-------------------------------------------------------------------------------
4+
|Schema: ADM_363-technical-acquisition-with-minimal-transcription.csvs |
5+
|Authors: Nicki Welch |
6+
| David Underdown |
7+
|Purpose: To capture metadata about the digitisation of the ADM 363 series |
8+
| Primarily technical metadata, but with a minimal amount of |
9+
| transcription to verify that the records may be publicly released |
10+
| after receipt by The National Archives |
11+
|Revision: 1.0 first release |
12+
| 1.1 update as some official numbers only single digit |
13+
| 1.2 allow M as official number prefix too |
14+
| 1.3 further additions to prefixes, L, S, SS, SSX |
15+
| 1.4 allow for * and ? in official number |
16+
| 1.5 further prefixes MX, KX, JX, and longer volume number |
17+
| 1.6 add explicit check that checksum is not that for a 0 byte file |
18+
-------------------------------------------------------------------------------*/
1719
batch_code: length(10) regex("^ADM362B([0-9]{3})$")
1820
department: (is("ADM") if($file_path/notEmpty,in($file_path) and in($resource_uri)))
1921
series: is("362") and if($file_path/notEmpty,in($file_path) and in($resource_uri))
@@ -22,7 +24,7 @@ item: ((positiveInteger unique($piece,$item,$ordinal)) or empty) if($file_path/n
2224
ordinal: if($item/empty,empty,unique($item,$ordinal))
2325
file_uuid: if($ordinal/empty,empty,uuid4 unique)
2426
file_path: uri if($ordinal/empty,empty,unique fileExists regex("^file:\/\/\/ADM_362\/[0-9]{1,5}\/[0-9]{1,5}\/[0-9]{1,4}_.+\.jp2$"))
25-
file_checksum: if($ordinal/empty,empty,checksum(file($file_path),"SHA-256"))
27+
file_checksum: if($ordinal/empty,empty,isNot("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") and checksum(file($file_path),"SHA-256"))
2628
resource_uri: if($ordinal/notEmpty,uri starts("http://datagov.nationalarchives.gov.uk/66/"))
2729
scan_operator: if($ordinal/empty,empty,length(1,12) regex("^[0-9a-zA-Z]{1,12}$"))
2830
scan_id: if($ordinal/empty,empty,length(1,12) regex("^[0-9a-zA-Z_]{1,12}$"))
@@ -51,8 +53,8 @@ image_deskew_operator: if($ordinal/empty,empty,if($image_deskew/is("yes"),regex(
5153
image_deskew_timestamp: if($ordinal/empty,empty,if($image_deskew/is("yes"),xDateTime,is("")))
5254
QA-code: regex("^[0-9/,]{1,2}$") @optional
5355
comments: regex("[\w\s,\.]+") @optional
54-
transcribed_volume_number: if($item/empty,regex("[0-9A-Z\-\s]{1,15}"),is(""))
56+
transcribed_volume_number: if($item/empty,regex("[0-9A-Z\-\s]{1,19}"),is(""))
5557
transcribed_birth_date_day: if(($ordinal/empty and $item/notEmpty),regex("^\*|([0\?][1-9\?])|([1-2\?][0-9\?])|([3\?][0-1\?])$"),is(""))
5658
transcribed_birth_date_month: if(($ordinal/empty and $item/notEmpty),is("*") or is("?") or is("January") or is("February") or is("March") or is("April") or is("May") or is("June") or is("July") or is("August") or is("September") or is("October") or is("November") or is("December"), is(""))
5759
transcribed_birth_date_year: if(($ordinal/empty and $item/notEmpty),regex("^1[7-9][0-9\?]{2}|\*$"),is(""))
58-
transcribed_official_number: if(($ordinal/empty and $item/notEmpty),regex("^([FJKLMS]|SS|SSX)[/*/?0-9]{1,6}$"),is(""))
60+
transcribed_official_number: if(($ordinal/empty and $item/notEmpty),regex("^([FJKLMS]|MX|JX|KX|SS|SSX)[/*/?0-9]{1,6}$"),is(""))

example-schemas/ADM_363-technical-acquisition-with-minimal-transcription.csvs

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,21 @@
11
version 1.0
22
@totalColumns 42
3-
/********************************************************************************
4-
*Schema: ADM_363-technical-acquisition-with-minimal-transcription.csvs *
5-
*Authors: Nicki Welch *
6-
* David Underdown *
7-
*Purpose: To capture metadata about the digitisation of the ADM 363 series *
8-
* Primarily technical metadata, but with a minimal amount of *
9-
* transcription to verify that the records may be publicly released *
10-
* after receipt by The National Archives *
11-
*Revision: 1.0 first release *
12-
* 1.1 update as some official numbers only single digit *
13-
* 1.2 allow M as official number prefix too *
14-
* 1.3 further additions to prefixes, L, S, SS, SSX *
15-
* 1.4 allow for * and ? in official number *
16-
********************************************************************************/
3+
/*-------------------------------------------------------------------------------
4+
|Schema: ADM_363-technical-acquisition-with-minimal-transcription.csvs |
5+
|Authors: Nicki Welch |
6+
| David Underdown |
7+
|Purpose: To capture metadata about the digitisation of the ADM 363 series |
8+
| Primarily technical metadata, but with a minimal amount of |
9+
| transcription to verify that the records may be publicly released |
10+
| after receipt by The National Archives |
11+
|Revision: 1.0 first release |
12+
| 1.1 update as some official numbers only single digit |
13+
| 1.2 allow M as official number prefix too |
14+
| 1.3 further additions to prefixes, L, S, SS, SSX |
15+
| 1.4 allow for * and ? in official number |
16+
| 1.5 further prefixes MX, KX, JX, and longer volume number |
17+
| 1.6 add explicit check that checksum is not that for a 0 byte file |
18+
-------------------------------------------------------------------------------*/
1719
batch_code: length(10) regex("^ADM36[23]B([0-9]{3})$")
1820
department: (is("ADM") if($file_path/notEmpty,in($file_path) and in($resource_uri)))
1921
series: is("363") and if($file_path/notEmpty,in($file_path) and in($resource_uri))
@@ -22,7 +24,7 @@ item: ((positiveInteger unique($piece,$item,$ordinal)) or empty) if($file_path/n
2224
ordinal: if($item/empty,empty,unique($item,$ordinal))
2325
file_uuid: if($ordinal/empty,empty,uuid4 unique)
2426
file_path: uri if($ordinal/empty,empty,unique fileExists regex("^file:\/\/\/ADM_363\/[0-9]{1,5}\/[0-9]{1,5}\/[0-9]{1,4}_.+\.jp2$"))
25-
file_checksum: if($ordinal/empty,empty,checksum(file($file_path),"SHA-256"))
27+
file_checksum: if($ordinal/empty,empty,isNot("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") and checksum(file($file_path),"SHA-256"))
2628
resource_uri: if($ordinal/notEmpty,uri starts("http://datagov.nationalarchives.gov.uk/66/"))
2729
scan_operator: if($ordinal/empty,empty,length(1,12) regex("^[0-9a-zA-Z]{1,12}$"))
2830
scan_id: if($ordinal/empty,empty,length(1,12) regex("^[0-9a-zA-Z_]{1,12}$"))
@@ -51,8 +53,8 @@ image_deskew_operator: if($ordinal/empty,empty,if($image_deskew/is("yes"),regex(
5153
image_deskew_timestamp: if($ordinal/empty,empty,if($image_deskew/is("yes"),xDateTime,is("")))
5254
QA-code: regex("^[0-9/,]{1,2}$") @optional
5355
comments: regex("[\w\s,\.]+") @optional
54-
transcribed_volume_number: if($item/empty,regex("[0-9A-Z\-\s]{1,15}"),is(""))
56+
transcribed_volume_number: if($item/empty,regex("[0-9A-Z\-\s]{1,19}"),is(""))
5557
transcribed_birth_date_day: if(($ordinal/empty and $item/notEmpty),regex("^\*|([0\?][1-9\?])|([1-2\?][0-9\?])|([3\?][0-1\?])$"),is(""))
5658
transcribed_birth_date_month: if(($ordinal/empty and $item/notEmpty),is("*") or is("?") or is("January") or is("February") or is("March") or is("April") or is("May") or is("June") or is("July") or is("August") or is("September") or is("October") or is("November") or is("December"), is(""))
5759
transcribed_birth_date_year: if(($ordinal/empty and $item/notEmpty),regex("^1[7-9][0-9\?]{2}|\*$"),is(""))
58-
transcribed_official_number: if(($ordinal/empty and $item/notEmpty),regex("^([FJKLMS]|SS|SSX)[/*/?0-9]{1,6}$"),is(""))
60+
transcribed_official_number: if(($ordinal/empty and $item/notEmpty),regex("^([FJKLMS]|MX|JX|KX|SS|SSX)[/*/?0-9]{1,6}$"),is(""))

example-schemas/tech_acq_metadata_v1_WO95Y14B000.csvs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ ordinal: range(1,160) and in($file_path) unique($department,$division,$series,$s
4747
//the combination of fields indicated should be unique within the file
4848
description: not("")
4949
//description is a fairly free-form field, but must not be empty
50-
date: regex("^19(14|15|16|17|18|19|20|21|22|23) (Jan|Feb|Mar|Apr|May|June|July|Aug|Sept|Oct|Nov|Dec)( [1-3][0-9]|[1-9])?( - 19(14|15|16|17|18|19|20|21|22|23) (Jan|Feb|Mar|Apr|May|June|July|Aug|Sept|Oct|Nov|Dec)( [1-3][0-9]|[1-9])?)?$")
50+
date: regex("^19(14|15|16|17|18|19|20|21|22|23) (Jan|Feb|Mar|April|Apr|May|June|Jun|July|Jul|Aug|Sept|Sep|Oct|Nov|Dec)( [1-3][0-9]| [1-9])?((( - )|-|-|( - ))(19(14|15|16|17|18|19|20|21|22|23) )?([1-3][0-9] |[1-9] )?(Jan|Feb|Mar|Apr|May|June|Jun|July|Jul|Aug|Sept|Sep|Oct|Nov|Dec)( [1-3][0-9]| [1-9])?)?$")
5151
//dates according to The National Archives' cataloguing standards, expected to be a range for this project, but may be relaxed
5252
file_uuid: uuid4 unique
5353
//must be a version 4 uuid, and the value must be unique within the file. uuids must be lower case.

0 commit comments

Comments
 (0)