#!/bin/bash -e if [ $# != 1 ]; then echo >&2 "usage: $0 [outfile]" exit 1 fi OUT="$1" MERGE_INDEXES=$(realpath nutrimatic/bin/merge-indexes) MAKE_INDEX=$(realpath nutrimatic/bin/make-index) # merge-indexes FDs: stdin, stdout, stderr, [in...], out # fatal assumption: we'll never exceed MAX_ARG MAXMERGE=$(($(ulimit -n)-4)) CUTOFF=1 TMPDIR=$(mktemp -d mergetmp.XXXXX) MERGEDIR=$TMPDIR/merge INDEXDIR=$TMPDIR/index mkdir -p $MERGEDIR $INDEXDIR ( cd $MERGEDIR m=0 while true; do eof=0 if [ -e EOF ]; then eof=1 fi files=($(shopt -s nullglob; ls -S *.index *.merge | tail -n $MAXMERGE)) if [ $eof -eq 0 -a ${#files[@]} -lt 2 ]; then sleep 1 continue fi out=shard.$(printf '%05d' $m).merge m=$((m+1)) echo $out: ${files[@]} $MERGE_INDEXES $CUTOFF ${files[@]} $out && rm ${files[@]} if [ $eof -eq 1 -a ${#files[@]} -lt $MAXMERGE ]; then mv $out ../merged exit 0 fi done ) & merge_pid=$! ( n=0 while true; do cur=$INDEXDIR/shard.$(printf '%05d' $n).index n=$((n+1)) next=$INDEXDIR/shard.$(printf '%05d' $n).index eof=$INDEXDIR/EOF while [ ! -e "$next" -a ! -e "$eof" ]; do sleep 1 done echo "$cur" mv "$cur" "$MERGEDIR/" if [ ! -e "$next" -a -e "$eof" ]; then mv "$eof" "$MERGEDIR/EOF" exit 0 fi done ) & copy_pid=$! trap "rm -rf '$TMPDIR'; kill 0; exit" INT TERM # consumes stdin $MAKE_INDEX $INDEXDIR/shard touch $INDEXDIR/EOF wait $copy_pid wait $merge_pid mv $TMPDIR/merged "$OUT" rm -r $TMPDIR exit 0