Skip to content

Commit

Permalink
Merge pull request #67 from psych-ds/fix_fileio
Browse files Browse the repository at this point in the history
Refactor file reading and JSON-LD processing
  • Loading branch information
bleonar5 authored Sep 27, 2024
2 parents 70f5489 + e9dc782 commit a0c51f0
Show file tree
Hide file tree
Showing 9 changed files with 38 additions and 96 deletions.
60 changes: 2 additions & 58 deletions src/files/deno.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import { psychDSFile, issueInfo } from '../types/file.ts'
import { FileTree } from '../types/filetree.ts'
import { requestReadPermission } from '../setup/requestPermissions.ts'
import { readPsychDSIgnore, FileIgnoreRules } from './ignore.ts'
import jsonld from "jsonld";

/**
* Thrown when a text file is decoded as UTF-8 but contains UTF-16 characters
Expand All @@ -27,15 +26,13 @@ export class psychDSFileDeno implements psychDSFile {
path: string
expanded: object
issueInfo: issueInfo[]
fileText: string
#fileInfo?: Deno.FileInfo
#datasetAbsPath: string

constructor(datasetPath: string, filePath: string, ignore: FileIgnoreRules) {
this.#datasetAbsPath = datasetPath
this.path = filePath
this.name = path.basename(filePath)
this.fileText = ''
this.expanded = {}
this.issueInfo = []
this.#ignore = ignore
Expand Down Expand Up @@ -124,27 +121,7 @@ export async function _readFileTree(
const name = path.basename(relativePath)
const tree = new FileTree(relativePath, name, parent)

if(!parent){
for await (const dirEntry of Deno.readDir(path.join(rootPath,relativePath))){
if(dirEntry.isFile && dirEntry.name === "dataset_description.json"){
const file = new psychDSFileDeno(
rootPath,
path.join(relativePath, dirEntry.name),
ignore,
)

file.fileText = (await file.text())
.replaceAll('https://schema.org','http://schema.org')
.replaceAll('https://www.schema.org','http://www.schema.org')

const json = await JSON.parse(file.fileText)

if('@context' in json){
context = json['@context'] as object
}
}
}
}


for await (const dirEntry of Deno.readDir(path.join(rootPath, relativePath))) {
Expand All @@ -154,45 +131,12 @@ export async function _readFileTree(
path.join(relativePath, dirEntry.name),
ignore,
)
//store text of file for later. This was added to accommodate browser version
file.fileText = (await file.text())
.replaceAll('https://schema.org','http://schema.org')
.replaceAll('https://www.schema.org','http://www.schema.org')

// For .psychdsignore, read in immediately and add the rules
if (dirEntry.name === '.psychdsignore') {
ignore.add(readPsychDSIgnore(file))
}
if (dirEntry.name.endsWith('.json')) {
let json = {}
let exp = []
try{
json = await JSON.parse(file.fileText)
if (context && !dirEntry.name.endsWith('dataset_description.json')){
json = {
...json,
'@context': context
}
}
}
catch(_error){
file.issueInfo.push({
key: 'InvalidJsonFormatting'
})
}

try{
exp = await jsonld.expand(json)
if (exp.length > 0)
file.expanded = exp[0]
}
catch(error){
file.issueInfo.push({
key: 'InvalidJsonldSyntax',
evidence: `${error.message.split(';')[1]}`
})
}
ignore.add(await readPsychDSIgnore(file))
}

tree.files.push(file)
}
if (dirEntry.isDirectory) {
Expand Down
4 changes: 2 additions & 2 deletions src/files/ignore.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import { psychDSFile } from '../types/file.ts'
import { ignore, Ignore } from '../deps/ignore.ts'

export function readPsychDSIgnore(file: psychDSFile) {
const value = file.fileText
export async function readPsychDSIgnore(file: psychDSFile) {
const value = await file.text()
if (value) {
const lines = value.split('\n')
return lines
Expand Down
4 changes: 2 additions & 2 deletions src/schema/applyRules.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ async function setupTest(path: string, fileName: string) {

let dsContext;
if (ddFile) {
const description = ddFile.expanded;
const description = await ddFile.text()
.then(JSON.parse)
dsContext = new psychDSContextDataset({datasetPath: path} as ValidatorOptions, ddFile, description);
}

const file = new psychDSFileDeno(path, fileName, ignore);
file.fileText = await file.text();

const context = new psychDSContext(fileTree, file, issues, dsContext);
await context.asyncLoads();
Expand Down
13 changes: 6 additions & 7 deletions src/schema/applyRules.ts
Original file line number Diff line number Diff line change
Expand Up @@ -191,11 +191,10 @@ import { psychDSFile } from '../types/file.ts';
//loop through all the fields found in dataset_metadata.yaml, along with their requirement levels
for (const [key, requirement] of Object.entries(rule.fields)) {
const severity = getFieldSeverity(requirement, context)
//@ts-expect-error: metadata presence assumed
const keyName = `${rule.namespace}${key}`
const keyName = `https://schema.org/${key}`
//expandedSidecar represents the metadata object with all contexts added, e.g. the "name" field becomes the "https://schema.org/name" field.
//we add this schema.org namespace to keyName to account for this.
if (severity && severity !== 'ignore' && !(keyName in context.sidecar)) {
if (severity && severity !== 'ignore' && !(keyName in context.expandedSidecar)) {
if (requirement.issue?.code && requirement.issue?.message) {
context.issues.add({
key: requirement.issue.code,
Expand Down Expand Up @@ -228,13 +227,13 @@ import { psychDSFile } from '../types/file.ts';
schema: GenericSchema,
issues: SchemaOrgIssues
){
const schemaNamespace = 'http://schema.org/'
const schemaNamespace = 'https://schema.org/'
//@type is required in the root object of the metadata file
if ("@type" in context.sidecar){
if ("@type" in context.expandedSidecar){
//@type for the root object must be schema.org/Dataset
//TODO: Check if it's even valid JSON-LD to have more than one values assigned for type
//if it is valid, it should be accounted for
if ((context.sidecar['@type'] as string[])[0] !== `${schemaNamespace}Dataset`){
if ((context.expandedSidecar['@type'] as string[])[0] !== `${schemaNamespace}Dataset`){
let issueFile: psychDSFile
if(Object.keys(context.metadataProvenance).includes('@type'))
issueFile = context.metadataProvenance['@type']
Expand All @@ -261,7 +260,7 @@ import { psychDSFile } from '../types/file.ts';
return
}
//collect issues recursively for all keys and values in root object
issues = _schemaCheck(context.sidecar, context, schema, '',schemaNamespace,issues)
issues = _schemaCheck(context.expandedSidecar, context, schema, '',schemaNamespace,issues)
logSchemaIssues(context,issues)
}

Expand Down
19 changes: 10 additions & 9 deletions src/schema/context.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ Deno.test({
)
let dsContext: psychDSContextDataset = new psychDSContextDataset()
if (ddFile) {
const description = ddFile.expanded
const description = await ddFile.text()
.then(JSON.parse)
dsContext = new psychDSContextDataset({datasetPath:PATH} as ValidatorOptions, ddFile,description)
}
await t.step('file sidecar overwrites directory sidecar', async() => {
Expand All @@ -34,8 +35,8 @@ Deno.test({
const context = new psychDSContext(fileTree, file, issues,dsContext)

await context.loadSidecar(fileTree)
if("http://schema.org/key" in context.sidecar){
assertEquals(context.sidecar['http://schema.org/key'],[{"@value":"value"}])}
if("https://schema.org/key" in context.expandedSidecar){
assertEquals(context.expandedSidecar['https://schema.org/key'],[{"@value":"value"}])}
else
assertEquals(1,2)
})
Expand All @@ -47,8 +48,8 @@ Deno.test({
const context = new psychDSContext(fileTree, file, issues,dsContext)

await context.loadSidecar(fileTree)
if("http://schema.org/key" in context.sidecar)
assertEquals(context.sidecar['http://schema.org/key'],[{"@value":"value2"}])
if("https://schema.org/key" in context.expandedSidecar)
assertEquals(context.expandedSidecar['https://schema.org/key'],[{"@value":"value2"}])
else
assertEquals(1,2)
})
Expand All @@ -60,7 +61,7 @@ Deno.test({
const context = new psychDSContext(fileTree, file, issues,dsContext)

await context.loadSidecar(fileTree)
assertEquals("http://schema.org/name" in context.sidecar,true)
assertEquals("https://schema.org/name" in context.expandedSidecar,true)
})

await t.step('no context in sidecar', async() => {
Expand All @@ -78,10 +79,10 @@ Deno.test({
}

const context = new psychDSContext(noCtxFileTree, file, issues,dsContext)
if("@context" in context.sidecar)
delete context.sidecar['@context']
if("@context" in context.expandedSidecar)
delete context.expandedSidecar['@context']

await context.loadSidecar(noCtxFileTree)
assertEquals("http://schema.org/name" in context.sidecar,false)
assertEquals("https://schema.org/name" in context.expandedSidecar,false)
})
}})
28 changes: 14 additions & 14 deletions src/schema/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import {
Context,
ContextDataset
} from '../types/context.ts'
import { IssueFile } from '../types/issues.ts'
import { psychDSFile } from '../types/file.ts'
import { FileTree } from '../types/filetree.ts'
import { ColumnsMap } from '../types/columns.ts'
Expand All @@ -10,6 +11,7 @@ import {
import { parseCSV,csvIssue } from '../files/csv.ts'
import { ValidatorOptions } from '../setup/options.ts'
import { logger } from '../utils/logger.ts'
import jsonld from 'jsonld'


export class psychDSContextDataset implements ContextDataset {
Expand Down Expand Up @@ -84,10 +86,6 @@ import {
this.suggestedColumns = []
}

// deno-lint-ignore no-explicit-any
get json(): Promise<Record<string, any>> {
return JSON.parse(this.file.fileText)
}
get path(): string {
return this.file.path
}
Expand Down Expand Up @@ -149,10 +147,12 @@ import {
}

if (validSidecars.length === 1) {
this.sidecar = { ...this.sidecar, ...validSidecars[0].expanded }
const validSidecarJson = await validSidecars[0].text()
.then(JSON.parse)
this.sidecar = { ...this.sidecar, ...validSidecarJson }
//keep record of which keys in the metadata object came from which file,
//so they can be properly identified when issues arise
Object.keys(validSidecars[0].expanded).forEach((key) => {
Object.keys(validSidecarJson).forEach((key) => {
const baseKey = key.split('/').at(-1) as string
this.metadataProvenance[baseKey] = validSidecars[0]
})
Expand All @@ -167,7 +167,7 @@ import {
//moved getExpandedSidecar to the end of loadSidecar since it is asyncronous, subsequent to
//the content of loadSidecar, and necessary for loadValidColumns. previous implementation had them
//all running in parallel, which caused issues.
this.expandedSidecar = {}//await this.getExpandedSidecar()
this.expandedSidecar = await this.getExpandedSidecar()
this.loadValidColumns()
}
}
Expand All @@ -179,15 +179,15 @@ import {
return
}
//TODO:possibly redundant (could maybe be stored in one place)
const nameSpace = "http://schema.org/"
const nameSpace = "https://schema.org/"
//if there's no variableMeasured property, then the valid column headers cannot be determined
if(!(`${nameSpace}variableMeasured`in this.sidecar)){
if(!(`${nameSpace}variableMeasured`in this.expandedSidecar)){
return
}

let validColumns :string[] = []

for(const variable of this.sidecar[`${nameSpace}variableMeasured`] as object[]){
for(const variable of this.expandedSidecar[`${nameSpace}variableMeasured`] as object[]){
//jsonld.expand turns string values in json into untyped objects with @value keys
if('@value' in variable)
validColumns = [...validColumns,variable['@value'] as string]
Expand All @@ -213,7 +213,7 @@ import {
}
let result
try{
result = await parseCSV(this.file.fileText)
result = await parseCSV(await this.file.text())
}
catch(error){
logger.warning(
Expand Down Expand Up @@ -247,7 +247,7 @@ import {

})
}
/*

async getExpandedSidecar(){
try{
//account for possibility of both http and https in metadata context
Expand All @@ -265,7 +265,7 @@ import {
//in addition to adding the appropriate namespace (e.g. http://schema.org)
//to all keys within the json, it also throws a variety of errors for improper JSON-LD syntax,
//which mostly all pertain to improper usages of privileged @____ keywords
const exp = [] as string[]//await jsonld.expand(this.sidecar)
const exp = await jsonld.expand(this.sidecar)
if(!exp[0])
return {}
else
Expand All @@ -285,7 +285,7 @@ import {
})
return {}
}
}*/
}

async asyncLoads() {
await Promise.allSettled([
Expand Down
1 change: 0 additions & 1 deletion src/types/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,5 @@ export interface Context {
validColumns: object
suggestedColumns: string[]
metadataProvenance: Record<string,psychDSFile>
json: object

}
2 changes: 0 additions & 2 deletions src/types/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ export interface psychDSFile {
ignored: boolean
// ReadableStream to file raw contents
stream: ReadableStream<Uint8Array>
// string storage of file contents
fileText: string
// object for temporarily storing issues with jsonld before issue object is created in validate()
issueInfo: issueInfo[]
// slot to hold expanded version of jsonld
Expand Down
3 changes: 2 additions & 1 deletion src/validators/psychds.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ export async function validate(
let dsContext
if (ddFile) {
try{
const description = ddFile.expanded
const description = await ddFile.text()
.then(JSON.parse)
dsContext = new psychDSContextDataset(options, ddFile,description)
}
catch(_error){
Expand Down

0 comments on commit a0c51f0

Please sign in to comment.