diff options
Diffstat (limited to 'merge.sh')
-rwxr-xr-x | merge.sh | 75 |
1 files changed, 75 insertions, 0 deletions
diff --git a/merge.sh b/merge.sh new file mode 100755 index 0000000..6f764de --- /dev/null +++ b/merge.sh @@ -0,0 +1,75 @@ +#!/bin/bash -e +if [ $# != 1 ]; then + echo >&2 "usage: $0 [outfile]" + exit 1 +fi + +OUT="$1" +MERGE_INDEXES=$(realpath nutrimatic/bin/merge-indexes) +MAKE_INDEX=$(realpath nutrimatic/bin/make-index) + +# merge-indexes FDs: stdin, stdout, stderr, [in...], out +# fatal assumption: we'll never exceed MAX_ARG +MAXMERGE=$(($(ulimit -n)-4)) +CUTOFF=1 +TMPDIR=$(mktemp -d mergetmp.XXXXX) +MERGEDIR=$TMPDIR/merge +INDEXDIR=$TMPDIR/index +mkdir -p $MERGEDIR $INDEXDIR + +( + cd $MERGEDIR + m=0 + while true; do + eof=0 + if [ -e EOF ]; then + eof=1 + fi + files=($(shopt -s nullglob; ls -S *.index *.merge | tail -n $MAXMERGE)) + if [ $eof -eq 0 -a ${#files[@]} -lt 2 ]; then + sleep 1 + continue + fi + out=shard.$(printf '%05d' $m).merge + m=$((m+1)) + echo $out: ${files[@]} + $MERGE_INDEXES $CUTOFF ${files[@]} $out && rm ${files[@]} + if [ $eof -eq 1 -a ${#files[@]} -lt $MAXMERGE ]; then + mv $out ../merged + exit 0 + fi + done +) & +merge_pid=$! + +( + n=0 + while true; do + cur=$INDEXDIR/shard.$(printf '%05d' $n).index + n=$((n+1)) + next=$INDEXDIR/shard.$(printf '%05d' $n).index + eof=$INDEXDIR/EOF + while [ ! -e "$next" -a ! -e "$eof" ]; do + sleep 1 + done + echo "$cur" + mv "$cur" "$MERGEDIR/" + if [ ! -e "$next" -a -e "$eof" ]; then + mv "$eof" "$MERGEDIR/EOF" + exit 0 + fi + done +) & +copy_pid=$! + +trap "rm -rf '$TMPDIR'; kill 0; exit" INT TERM + +# consumes stdin +$MAKE_INDEX $INDEXDIR/shard +touch $INDEXDIR/EOF + +wait $copy_pid +wait $merge_pid +mv $TMPDIR/merged "$OUT" +rm -r $TMPDIR +exit 0 |