...
Code Block |
---|
~>cat phaseMapper.sh #!/bin/sh RESULT_BUCKET=s3://sagetest-YourUsername/results while read S3_INPUT_FILE; do echo input to process ${S3_INPUT_FILE} 1>&2 # For debugging purposes, print out the files cached for us ls -la 1>&2 # Parse the s3 file path to get the file name LOCAL_INPUT_FILE=$(echo ${S3_INPUT_FILE} | perl -pe 'if (/^((s3[n]?):\/)?\/?([^:\/\s]+)((\/\w+)*\/)([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$/) {print "$6\n"};' | head -1) # Download the file from S3 echo hadoop fs -get ${S3_INPUT_FILE} ${LOCAL_INPUT_FILE} 1>&2 hadoop fs -get ${S3_INPUT_FILE} ${LOCAL_INPUT_FILE} 1>&2 # Run phase processing ./phase ${LOCAL_INPUT_FILE} ${LOCAL_INPUT_FILE}_out 100 1 100 # Upload the output files ls -la ${LOCAL_INPUT_FILE}*_out* 1>&2 for f in ${LOCAL_INPUT_FILE}*_out* do echo hadoop fs -put $f ${RESULT_BUCKET}/$LOCAL_INPUT_FILE/$f 1>&2 hadoop fs -put $f ${RESULT_BUCKET}/$LOCAL_INPUT_FILE/$f 1>&2 done echo processed ${S3_INPUT_FILE} 1>&2 echo 1>&2 echo 1>&2 done exit 0 |
...