summaryrefslogblamecommitdiff
path: root/merge.sh
blob: 6f764de7c8462e8b1e2db77da9e4c07a5cc5ac37 (plain) (tree)










































































                                                                                       
#!/bin/bash -e
if [ $# != 1 ]; then
	echo >&2 "usage: $0 [outfile]"
	exit 1
fi

OUT="$1"
MERGE_INDEXES=$(realpath nutrimatic/bin/merge-indexes)
MAKE_INDEX=$(realpath nutrimatic/bin/make-index)

# merge-indexes FDs: stdin, stdout, stderr, [in...], out
# fatal assumption: we'll never exceed MAX_ARG
MAXMERGE=$(($(ulimit -n)-4))
CUTOFF=1
TMPDIR=$(mktemp -d mergetmp.XXXXX)
MERGEDIR=$TMPDIR/merge
INDEXDIR=$TMPDIR/index
mkdir -p $MERGEDIR $INDEXDIR

(
	cd $MERGEDIR
	m=0
	while true; do
		eof=0
		if [ -e EOF ]; then
			eof=1
		fi
		files=($(shopt -s nullglob; ls -S *.index *.merge | tail -n $MAXMERGE))
		if [ $eof -eq 0 -a ${#files[@]} -lt 2 ]; then
			sleep 1
			continue
		fi
		out=shard.$(printf '%05d' $m).merge
		m=$((m+1))
		echo $out: ${files[@]}
		$MERGE_INDEXES $CUTOFF ${files[@]} $out && rm ${files[@]}
		if [ $eof -eq 1 -a ${#files[@]} -lt $MAXMERGE ]; then
			mv $out ../merged
			exit 0
		fi
	done
) &
merge_pid=$!

(
	n=0
	while true; do
		cur=$INDEXDIR/shard.$(printf '%05d' $n).index
		n=$((n+1))
		next=$INDEXDIR/shard.$(printf '%05d' $n).index
		eof=$INDEXDIR/EOF
		while [ ! -e "$next" -a ! -e "$eof" ]; do
			sleep 1
		done
		echo "$cur"
		mv "$cur" "$MERGEDIR/"
		if [ ! -e "$next" -a -e "$eof" ]; then
			mv "$eof" "$MERGEDIR/EOF"
			exit 0
		fi
	done
) &
copy_pid=$!

trap "rm -rf '$TMPDIR'; kill 0; exit" INT TERM

# consumes stdin
$MAKE_INDEX $INDEXDIR/shard
touch $INDEXDIR/EOF

wait $copy_pid
wait $merge_pid
mv $TMPDIR/merged "$OUT"
rm -r $TMPDIR
exit 0