blob: 6f764de7c8462e8b1e2db77da9e4c07a5cc5ac37 (
plain) (
tree)
|
|
#!/bin/bash -e
if [ $# != 1 ]; then
echo >&2 "usage: $0 [outfile]"
exit 1
fi
OUT="$1"
MERGE_INDEXES=$(realpath nutrimatic/bin/merge-indexes)
MAKE_INDEX=$(realpath nutrimatic/bin/make-index)
# merge-indexes FDs: stdin, stdout, stderr, [in...], out
# fatal assumption: we'll never exceed MAX_ARG
MAXMERGE=$(($(ulimit -n)-4))
CUTOFF=1
TMPDIR=$(mktemp -d mergetmp.XXXXX)
MERGEDIR=$TMPDIR/merge
INDEXDIR=$TMPDIR/index
mkdir -p $MERGEDIR $INDEXDIR
(
cd $MERGEDIR
m=0
while true; do
eof=0
if [ -e EOF ]; then
eof=1
fi
files=($(shopt -s nullglob; ls -S *.index *.merge | tail -n $MAXMERGE))
if [ $eof -eq 0 -a ${#files[@]} -lt 2 ]; then
sleep 1
continue
fi
out=shard.$(printf '%05d' $m).merge
m=$((m+1))
echo $out: ${files[@]}
$MERGE_INDEXES $CUTOFF ${files[@]} $out && rm ${files[@]}
if [ $eof -eq 1 -a ${#files[@]} -lt $MAXMERGE ]; then
mv $out ../merged
exit 0
fi
done
) &
merge_pid=$!
(
n=0
while true; do
cur=$INDEXDIR/shard.$(printf '%05d' $n).index
n=$((n+1))
next=$INDEXDIR/shard.$(printf '%05d' $n).index
eof=$INDEXDIR/EOF
while [ ! -e "$next" -a ! -e "$eof" ]; do
sleep 1
done
echo "$cur"
mv "$cur" "$MERGEDIR/"
if [ ! -e "$next" -a -e "$eof" ]; then
mv "$eof" "$MERGEDIR/EOF"
exit 0
fi
done
) &
copy_pid=$!
trap "rm -rf '$TMPDIR'; kill 0; exit" INT TERM
# consumes stdin
$MAKE_INDEX $INDEXDIR/shard
touch $INDEXDIR/EOF
wait $copy_pid
wait $merge_pid
mv $TMPDIR/merged "$OUT"
rm -r $TMPDIR
exit 0
|