PATH := $(PATH):/usr/local/bin .PHONY: all all: enwiktionary-latest.version all: enwiktionary-latest.index all: enwiki-latest.version all: enwiki-latest.index all: phraser-words500K-latest.index all: phraser-phrases5M-latest.index .PHONY: newest newest: rm -f *.version && make .PHONY: reindex reindex: rm -f *.index && make .PHONY: sync sync: newest rsync -av *.index tilde:public_html/pzzl.org/nut/ .PHONY: trim trim: ls enwiki-*-snap.index | sort | head -n -2 | xargs -t rm ls enwiktionary-*-snap.index | sort | head -n -2 | xargs -t rm ls enwiki-*-pages-articles.xml.bz2 | sort | head -n -2 | xargs -t rm ls enwiktionary-*-pages-articles.xml.bz2 | sort | head -n -2 | xargs -t rm ls phraser-words500K-*-snap.index | sort | head -n -2 | xargs -t rm ls phraser-phrases5M-*-snap.index | sort | head -n -2 | xargs -t rm ls phraser-*.txt | sort | head -n -2 | xargs -t rm %/.git: git submodule update --init $* nutrimatic/bin: nutrimatic/.git cd nutrimatic && ./build.py en%-latest.version: wget -O - https://dumps.wikimedia.org/en$*/latest/en$*-latest-pages-articles.xml.bz2-rss.xml |\ xpath -e '//rss/channel/link/text()' |\ sed 's/^.*\///' > $@ enwiki-%-pages-articles.xml.bz2: wget -O $@ https://dumps.wikimedia.org/enwiki/$*/enwiki-$*-pages-articles.xml.bz2 enwiki-%-snap.merge: nutrimatic/bin enwiki-%-pages-articles.xml.bz2 # ~1h wikiextractor -q -o- enwiki-$*-pages-articles.xml.bz2 |\ ./merge.sh $@ enwiktionary-%-pages-articles.xml.bz2: wget -O $@ https://dumps.wikimedia.org/enwiktionary/$*/enwiktionary-$*-pages-articles.xml.bz2 enwiktionary-%-snap.merge: nutrimatic/bin enwiktionary-%-pages-articles.xml.bz2 wikiextractor -q -o- enwiktionary-$*-pages-articles.xml.bz2 |\ ./merge.sh $@ en%-snap.index: en%-snap.merge nutrimatic/bin nutrimatic/bin/merge-indexes 5 en$*-snap.merge $@ %-latest.index: %-latest.version make $*-$(shell cat $*-latest.version)-snap.index ln -sf $*-$(shell cat $*-latest.version)-snap.index $@ phraser-words500K-latest.version: date -d \ "$$(curl -I https://lahosken.san-francisco.ca.us/frivolity/prog/phraser/words_500K.txt |\ awk -F': ' '/^last-modified: / { print $$2 }')" \ +"%Y%m%d%H%M%S" > $@ phraser-phrases5M-latest.version: date -d \ "$$(curl -I https://lahosken.san-francisco.ca.us/frivolity/prog/phraser/phrases_5M.txt |\ awk -F': ' '/^last-modified: / { print $$2 }')" \ +"%Y%m%d%H%M%S" > $@ phraser-words500K-%.txt: wget -O $@ https://lahosken.san-francisco.ca.us/frivolity/prog/phraser/words_500K.txt phraser-phrases5M-%.txt: wget -O $@ https://lahosken.san-francisco.ca.us/frivolity/prog/phraser/phrases_5M.txt phraser-words500K-%-snap.index: phraser-words500K-%.txt nutrimatic/bin # ~ 3m awk -F'\t' '{ for (i=0; i < $$1; i++) print $$2 }' phraser-words500K-$*.txt | ./merge.sh $@ phraser-phrases5M-%-snap.index: phraser-phrases5M-%.txt nutrimatic/bin # ~ 10m awk -F'\t' '{ for (i=0; i < $$1; i++) print $$2 }' phraser-phrases5M-$*.txt | ./merge.sh $@