From 2784977e4613260a3e40a1e6de412ce276e50467 Mon Sep 17 00:00:00 2001 From: Kevin Wallace Date: Thu, 9 May 2019 00:16:02 -0700 Subject: Initial commit --- merge.sh | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100755 merge.sh (limited to 'merge.sh') diff --git a/merge.sh b/merge.sh new file mode 100755 index 0000000..6f764de --- /dev/null +++ b/merge.sh @@ -0,0 +1,75 @@ +#!/bin/bash -e +if [ $# != 1 ]; then + echo >&2 "usage: $0 [outfile]" + exit 1 +fi + +OUT="$1" +MERGE_INDEXES=$(realpath nutrimatic/bin/merge-indexes) +MAKE_INDEX=$(realpath nutrimatic/bin/make-index) + +# merge-indexes FDs: stdin, stdout, stderr, [in...], out +# fatal assumption: we'll never exceed MAX_ARG +MAXMERGE=$(($(ulimit -n)-4)) +CUTOFF=1 +TMPDIR=$(mktemp -d mergetmp.XXXXX) +MERGEDIR=$TMPDIR/merge +INDEXDIR=$TMPDIR/index +mkdir -p $MERGEDIR $INDEXDIR + +( + cd $MERGEDIR + m=0 + while true; do + eof=0 + if [ -e EOF ]; then + eof=1 + fi + files=($(shopt -s nullglob; ls -S *.index *.merge | tail -n $MAXMERGE)) + if [ $eof -eq 0 -a ${#files[@]} -lt 2 ]; then + sleep 1 + continue + fi + out=shard.$(printf '%05d' $m).merge + m=$((m+1)) + echo $out: ${files[@]} + $MERGE_INDEXES $CUTOFF ${files[@]} $out && rm ${files[@]} + if [ $eof -eq 1 -a ${#files[@]} -lt $MAXMERGE ]; then + mv $out ../merged + exit 0 + fi + done +) & +merge_pid=$! + +( + n=0 + while true; do + cur=$INDEXDIR/shard.$(printf '%05d' $n).index + n=$((n+1)) + next=$INDEXDIR/shard.$(printf '%05d' $n).index + eof=$INDEXDIR/EOF + while [ ! -e "$next" -a ! -e "$eof" ]; do + sleep 1 + done + echo "$cur" + mv "$cur" "$MERGEDIR/" + if [ ! -e "$next" -a -e "$eof" ]; then + mv "$eof" "$MERGEDIR/EOF" + exit 0 + fi + done +) & +copy_pid=$! + +trap "rm -rf '$TMPDIR'; kill 0; exit" INT TERM + +# consumes stdin +$MAKE_INDEX $INDEXDIR/shard +touch $INDEXDIR/EOF + +wait $copy_pid +wait $merge_pid +mv $TMPDIR/merged "$OUT" +rm -r $TMPDIR +exit 0 -- cgit v1.2.3