Engineering Audit Resources
Data warehouse queries, documentation, and handling
Restriction change of state
#select t1.ID, t1.IS_CONTROLLED, t1.IS_RESTRICTED, t1.IS_PUBLIC, t2.IS_CONTROLLED, t2.IS_RESTRICTED, t2.IS_PUBLIC
select t1.*, t2.*
from (
select ns2.*
from NODE_SNAPSHOT ns2
join (
# most recent snapshot
select ns1.ID, max(ns1.TIMESTAMP)
from NODE_SNAPSHOT ns1
group by ns1.ID
) nsmax1 on nsmax1.ID=ns2.ID
) t1
join (
select ns2.*
from NODE_SNAPSHOT ns2
join (
# snapshot a month ago
select ns1.ID, max(ns1.TIMESTAMP)
from NODE_SNAPSHOT ns1
where ns1.TIMESTAMP < unix_timestamp('2019-09-01 00:00:00')*1000
group by ns1.ID
) nsmax1 on nsmax1.ID=ns2.ID
) t2 on t2.ID=t1.ID and t2.VERSION_NUMBER=t1.VERSION_NUMBER
where not (t1.IS_PUBLIC = t2.IS_PUBLIC and t1.IS_CONTROLLED = t2.IS_CONTROLLED and t1.IS_RESTRICTED = t2.IS_RESTRICTED)
limit 100
;
Export the
Restriction change of state
table to a spread sheet to create a pivot table summary of the number of access control changes by project. Include this summary in the report.Contact the project owner or community manager of each different project on the list to notify them that their files have been identified as anomalies through a regular Synapse audit.
For any responses that indicate inadvertent or inappropriately permissive access control changes, create a ticket within the Governance Jira space for investigation of a privacy incident.
Top downloaders
# top 20 downloaders by count(filehandle_id)
select fhdr.USER_ID, count(*) as c
from FILE_HANDLE_DOWNLOAD_RECORD fhdr
where fhdr.TIMESTAMP between unix_timestamp('2019-07-01 00:00:00')*1000 and unix_timestamp('2019-09-10 00:00:00')*1000
group by fhdr.USER_ID
order by c desc
limit 20;
Contact the account holder of each account returned by this query with a prompt like the following:
Your Synapse account has been identified during a routine Synapse audit as having accessed a large number of files in the last six months. This activity may be expected due to how you use Synapse, or may be the result of a compromised or shared account.ÂPlease reply to this email message to confirm that you are not aware of a breach of your Synapse credentials and that you have not shared them with anyone else.Â
Summarize the responses for the report.
For any responses that indicate loss of control of account credentials, create a ticket within the Governance Jira space for investigation of a privacy incident.
MD5 duplicates
create table auditdb.fhd_detail2 as
select ls.ID, ls.VERSION_NUMBER, ls.NAME,
ls.PROJECT_ID, ls.PARENT_ID, ls.BENEFACTOR_ID,
ls.IS_PUBLIC, ls.IS_CONTROLLED, ls.IS_RESTRICTED,
ls.FILE_HANDLE_ID, md5c.CONTENT_MD5, md5c.c as DUP_COUNT
from auditdb.latest_snapshot_202003 ls
join warehouse.FILE_HANDLE_RECORD fhr on fhr.ID=ls.FILE_HANDLE_ID
join auditdb.fhr_md5_count md5c on md5c.CONTENT_MD5=fhr.CONTENT_MD5
where ls.IS_PUBLIC=1 and (ls.IS_CONTROLLED=1 or ls.IS_RESTRICTED=1)
select fhdd.CONTENT_MD5 as MD5, fhdd.PROJECT_ID as SOURCE_PROJECT, fhdd.ID as SOURCE_ID, fhdd.VERSION_NUMBER as SOURCE_VERSION, fhdd.IS_PUBLIC as SOURCE_IS_PUBLIC, fhdd.IS_CONTROLLED as SOURCE_IS_CONTROLLED, fhdd.IS_RESTRICTED as SOURCE_IS_RESTRICTED,
ls.PROJECT_ID as DUP_PROJECT, ls.ID as DUP_ID, ls.CREATED_BY, ls.IS_PUBLIC as DUP_IS_PUBLIC, ls.IS_CONTROLLED as DUP_IS_CONTROLLED, ls.IS_RESTRICTED as DUP_IS_RESTRICTED
from auditdb.fhd_detail2 fhdd
join FILE_HANDLE_RECORD fhr on fhr.CONTENT_MD5=fhdd.CONTENT_MD5
join auditdb.latest_snapshot_202003 ls on ls.FILE_HANDLE_ID=fhr.ID
where fhdd.FILE_HANDLE_ID <> fhr.ID and ls.IS_PUBLIC = 1 and (fhdd.IS_CONTROLLED <> ls.IS_CONTROLLED or fhdd.IS_RESTRICTED <> ls.IS_RESTRICTED)
Export the project summary table from the
MD5 duplicates
tableContact the file owner on the list to notify them that their files have been identified as anomalies through a regular Synapse audit.
For any responses that indicate proliferation of files beyond intended, create a ticket within the Governance Jira space for investigation of a privacy incident.