From 13245558fd099e336264903e6ca1a9ff1e04ba0e Mon Sep 17 00:00:00 2001 From: William Palin Date: Thu, 4 Apr 2024 10:38:15 -0400 Subject: [PATCH 1/2] feat(laws.json): Upgrade laws.json Add a few permutations to US Code Provide extraction of sections separted --- reporters_db/data/laws.json | 11 ++++++++--- reporters_db/data/regexes.json | 2 ++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/reporters_db/data/laws.json b/reporters_db/data/laws.json index 44d30776..d05b3b4c 100644 --- a/reporters_db/data/laws.json +++ b/reporters_db/data/laws.json @@ -5534,12 +5534,16 @@ "1 U.S.C. sec. 1", "1 U.S.C. Sections 1-2", "1 USC S. 1-2", - "1 U.S. Code §1" + "1 U.S. Code §1", + "21, United States Code, Section 853", + "18, United States Code, Section 3500", + "18, United States Code, Section 981(a)(l)(C)" ], "jurisdiction": "United States", "name": "United States Code; United States Code Annotated; United States Code Service; Gould’s United States Code Unannotated", "regexes": [ - "(?P\\d+)\\s+$reporter\\s+$section_marker\\s*$law_section" + "(?P<title>\\d+),?\\s+$reporter,?\\s+$section_marker\\s*$law_section", + "(?P<title>\\d+),?\\s+$reporter,?\\s+$section_marker\\s*$law_sections" ], "start": null, "variations": [ @@ -5548,7 +5552,8 @@ "U.S.C.A.", "U.S.C.S.", "U.S.C.U.", - "U.S. Code" + "U.S. Code", + "United States Code" ] } ], diff --git a/reporters_db/data/regexes.json b/reporters_db/data/regexes.json index 8ee32e89..427b4dab 100644 --- a/reporters_db/data/regexes.json +++ b/reporters_db/data/regexes.json @@ -37,6 +37,8 @@ "month": "(?P<month>[A-Z][a-z]+\\.?)", "section": "(?P<section>\\d+(?:[\\-.:]\\d+){,3})", "section#": "Section like 1-2-3, 1.2.3, or 1:2-3.4", + "sections": "(?P<sections>\\d+(?:\\((?:[a-zA-Z]{1}|\\d{1,2})\\))+)", + "sections#": "Sections 81(a)(l)(C) or 81(a) - to capture section and subsections in parentheses", "subject": "(?P<subject>$law_subject_word(?: $law_subject_word| &){,4})", "subject#": "One to five word statute subject like 'Parks Rec. & Hist. Preserv.', 'Not-for-Profit Corp.', 'Alt. County Gov’t', 'R.R.'", "subject_word": "[A-Z][.\\-'A-Za-z]*", From 43a1778b6685ad2201aec53a740f148d8fec0288 Mon Sep 17 00:00:00 2001 From: William Palin <bill@free.law> Date: Thu, 4 Apr 2024 10:49:14 -0400 Subject: [PATCH 2/2] fix(usc): Improve regex for section --- reporters_db/data/laws.json | 3 +-- reporters_db/data/regexes.json | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/reporters_db/data/laws.json b/reporters_db/data/laws.json index d05b3b4c..1230d7d9 100644 --- a/reporters_db/data/laws.json +++ b/reporters_db/data/laws.json @@ -5542,8 +5542,7 @@ "jurisdiction": "United States", "name": "United States Code; United States Code Annotated; United States Code Service; Gould’s United States Code Unannotated", "regexes": [ - "(?P<title>\\d+),?\\s+$reporter,?\\s+$section_marker\\s*$law_section", - "(?P<title>\\d+),?\\s+$reporter,?\\s+$section_marker\\s*$law_sections" + "(?P<title>\\d+),?\\s+$reporter,?\\s+$section_marker\\s*$law_section" ], "start": null, "variations": [ diff --git a/reporters_db/data/regexes.json b/reporters_db/data/regexes.json index 427b4dab..7b4bfddd 100644 --- a/reporters_db/data/regexes.json +++ b/reporters_db/data/regexes.json @@ -35,10 +35,8 @@ "#": "Regexes used in laws.json", "day": "(?P<day>\\d{1,2}),?", "month": "(?P<month>[A-Z][a-z]+\\.?)", - "section": "(?P<section>\\d+(?:[\\-.:]\\d+){,3})", - "section#": "Section like 1-2-3, 1.2.3, or 1:2-3.4", - "sections": "(?P<sections>\\d+(?:\\((?:[a-zA-Z]{1}|\\d{1,2})\\))+)", - "sections#": "Sections 81(a)(l)(C) or 81(a) - to capture section and subsections in parentheses", + "section": "(?P<section>(?:\\d+(?:[\\-.:]\\d+){,3})|(?:\\d+(?:\\((?:[a-zA-Z]{1}|\\d{1,2})\\))+))", + "section#": "Section like 1-2-3, 1.2.3, or 1:2-3.4 or 81(a)(2)(F) or 81(a)", "subject": "(?P<subject>$law_subject_word(?: $law_subject_word| &){,4})", "subject#": "One to five word statute subject like 'Parks Rec. & Hist. Preserv.', 'Not-for-Profit Corp.', 'Alt. County Gov’t', 'R.R.'", "subject_word": "[A-Z][.\\-'A-Za-z]*",