Document toolboxDocument toolbox

Proposed Schemas with Value Domains Specified

Introduction

Now that users are going to start putting data into the system, we need a way to communicate to them what values are manditory, optional, and what value domains are allowed. Please add your thoughts to this wiki page as the month progresses and we work with our Scientists.

JSON Schema

The JsonSchema is an emerging standard similar to DTDs for XML.

See type descriptors here http://tools.ietf.org/html/draft-zyp-json-schema-03#section-5

Query Results Schema

{
  "properties": {
    "results": {
      "items": {"type": "object"},
      "type": "array"
    },
    "totalNumberOfResults": {"type": "number"}
  },
  "type": "object"
}

Project Schema

The JsonSchema is an emerging standard similar to DTDs for XML.

{
  "properties": {
    "accessControlList": {"type": "string"},
    "annotations": {"type": "string"},
    "creationDate": {"type": "number"},
    "creator": {"type": "string"},
    "description": {"type": "string"},
    "etag": {"type": "string"},
    "id": {"type": "string"},
    "name": {"type": "string"},
    "parentId": {"type": "string"},
    "uri": {"type": "string"}
  },
  "type": "object"
}

'creator' needs to be changed to 'createdBy' so that we can query upon it

Dataset Schema

The JsonSchema is an emerging standard similar to DTDs for XML.

{
  "properties": {
    "accessControlList": {"type": "string"},
    "annotations": {"type": "string"},
    "creationDate": {"type": "number"},
    "creator": {"type": "string"},
    "description": {"type": "string"},
    "etag": {"type": "string"},
    "hasClinicalData": {"type": "boolean"},
    "hasExpressionData": {"type": "boolean"},
    "hasGeneticData": {"type": "boolean"},
    "id": {"type": "string"},
    "layers": {"type": "string"},
    "locations": {"type": "string"},
    "name": {"type": "string"},
    "parentId": {"type": "string"},
    "releaseDate": {"type": "number"},
    "status": {"type": "string"},
    "uri": {"type": "string"},
    "version": {"type": "string"}
  },
  "type": "object"
}

'creator' needs to be changed to 'createdBy' so that we can query upon it

Layer Schema

The JsonSchema is an emerging standard similar to DTDs for XML.

{
  "properties": {
    "accessControlList": {
                         "description": "the uri to access the ACL for this
                                                  Layer", 
                         "type": "string",
                         "format": "uri",
                         },
    "annotations": {
                   "description": "SYSTEM CONTROLLED, the uri to access the annotations for
                                      this Layer",
                   "type": "string",
                   "format": "uri",
                   },
    "creationDate": {
                    "description": "SYSTEM CONTROLLED, the creation date of this metadata in
                                        Synapse",
                    "type": "integer",
                    "format": "utc-millisec",
                    },
    "description": {
                   "description": "a narrative blurb of text about this layer",
                   "type": "string",
                   "format": "text"
                   },
    "etag": {
            "description": "SYSTEM CONTROLLED, a value used for optimistic
                        concurrency control as a way to help prevent simultaneous
                        updates of a resource from overwriting each other",
            "type": "string",
            },
    "id": {
          "description": "SYSTEM CONTROLLED, the Synapse identifier for this entity",
          "type": "string",
          "format": "text",
          },
    "locations": {
                 "description": "SYSTEM CONTROLLED, the uri to access the location metadata
                                  regarding where the data for this layer is actually stored",
                 "type": "string",
                 "format": "uri",
                 },
    "name": {
            "description": "the display name of this layer",
            "type": "string",
            "format": "text",
            "required": true
            },
    "numSamples": {
                  "description": "the number of samples in this layer",
                  "type": "integer",
                  "minimum": 0
                  },
    "parentId": {
                "description": "the Synapse identifier for this entity, all
                                layers must have a parent",
                "type": "string",
                "format": "text",
                "required": true,
                },
    "platform": {
                "description": "the platform upon which the samples were
                                processed (primary ontology: Ontology Of
                                Biomedical Investigation)",
                "type": "string",
                "format": ["ontology:OBI", "ontology:SageBioCustom"]
                },
    "previews": {
                "description": "SYSTEM CONTROLLED, the uri to access the previews for
                                this Layer",
                "type": "string",
                "format": "uri",
                },
    "processingFacility": {
                          "description": "the ontological term best-fitting
                                         the facility in which this data was
                                         processed, (primary ontology:
                                         Ontology Of Biomedical
                                         Investigation)",                                      
                          "type": "string",
                          "format": ["ontology:OBI", "ontology:SageBioCustom"]
                          },
    "publicationDate": {
                       "description": "the publication date of the notable
                                              paper for the data in this layer",
                       "type": "integer",
                       "format": "utc-millisec"
                       },
    "qcBy": {
            "description": "the Synapse username of the person who QCed this
                           layer",
            "type": "string",
            "format": "text"
            },
    "qcDate": {
              "description": "the date upon which this Layer was QCed",
              "type": "number",
              "format": "utc-millisec"
              },
    "releaseNotes": {
                    "description": "free text regarding any release notes
                                        associated with this layer",
                    "type": "string",
                    "format": "text"
                    },
    "status": {
              "type": "string",
              "enum": ["QCed", "Curated", "Raw", "Unknown"],
              "default": "Uknown"
              },
    "tissueType": {
                  "description": "the ontological term best-fitting the
                                    tissue type found in this layer,
                                    (primary ontology: Foundational Model Of Anatomy)",
                  "type": "string",
                  "format": ["ontology:FMA", "ontology:SageBioCustom"]
                  },
    "type": {
            "description": "the high-level type of data this layer holds",
            "type": "string",
            "enum": ["C", "E", "G"],
            "required": true
            },
    "uri": {
           "uri": "SYSTEM CONTROLLED, the uri to access this Layer",
           "type": "string",
           "format": "uri",
           },
    "version": {
               "description": "the Synapse version of this Layer",
               "type": "string",
               "pattern": "^\d+\.\d+\.\d+$",
               }
  },
  "type": "object"
}

More suggestions:

  • Xa: Layers should list both who did the curation and who did the QC, and on what date. Right now this info resided in the dataset but different folks might curate different layers. More: we should say "processedBy" and agree that for all data we will have a curated layer, qced layer, etc... and use references to refer to the layer that was input to the process step.
  • Matt: We also need a new field for "platformVendor", but this value should come from a controlled vocabulary. (The analogy here is that this is the make of the car.) This set should include:
    • Affymetrix
    • Agillent
    • Illumina
    • Perligen
    • Nimblegen
    • Custom
  • Matt: We also need to think more carefully about the values we are using for "platform". BioConductor has a list of these that we might use as our controlled vocabulary. (The analogy here is that this is the model of the car.)
  • Adam: nothing should be called "clinical" layer or "C" since that is too specific. It should be "phenotypic" layer or "P".

Layer Preview Schema

The JsonSchema is an emerging standard similar to DTDs for XML.

{
  "properties": {
    "accessControlList": {"type": "string"},
    "annotations": {"type": "string"},
    "creationDate": {"type": "number"},
    "etag": {"type": "string"},
    "headers": {
      "items": {"type": "string"},
      "type": "array"
    },
    "id": {"type": "string"},
    "name": {"type": "string"},
    "parentId": {"type": "string"},
    "previewBlob": {
      "items": {"type": "string"},
      "type": "array"
    },
    "previewString": {"type": "string"},
    "rows": {
      "items": {"type": "object"},
      "type": "array"
    },
    "uri": {"type": "string"}
  },
  "type": "object"
}

Dataset or Layer Locations Schema

The JsonSchema is an emerging standard similar to DTDs for XML.

{
  "properties": {
    "accessControlList": {"type": "string"},
    "annotations": {"type": "string"},
    "creationDate": {"type": "number"},
    "etag": {"type": "string"},
    "id": {"type": "string"},
    "md5sum": {"type": "string"},
    "name": {"type": "string"},
    "parentId": {"type": "string"},
    "path": {"type": "string"},
    "type": {"type": "string"},
    "uri": {"type": "string"}
  },
  "type": "object"
}

Annotations Schema

The JsonSchema is an emerging standard similar to DTDs for XML.

{
  "properties": {
    "blobAnnotations": {"type": "object"},
    "creationDate": {"type": "number"},
    "dateAnnotations": {"type": "object"},
    "doubleAnnotations": {"type": "object"},
    "etag": {"type": "string"},
    "id": {"type": "string"},
    "longAnnotations": {"type": "object"},
    "stringAnnotations": {"type": "object"},
    "uri": {"type": "string"}
  },
  "type": "object"
}

Access Control List Schema

The JsonSchema is an emerging standard similar to DTDs for XML.

{
  "properties": {
    "createdBy": {"type": "string"},
    "creationDate": {"type": "number"},
    "etag": {"type": "string"},
    "id": {"type": "string"},
    "modifiedBy": {"type": "string"},
    "modifiedOn": {"type": "number"},
    "resourceAccess": {
      "items": {
        "properties": {
          "accessType": {
            "items": {"type": "string"},
            "type": "array"
          },
          "id": {"type": "string"},
          "userGroupId": {"type": "string"}
        },
        "type": "object"
      },
      "type": "array"
    },
    "resourceId": {"type": "string"},
    "uri": {"type": "string"}
  },
  "type": "object"
}