From 2784977e4613260a3e40a1e6de412ce276e50467 Mon Sep 17 00:00:00 2001 From: Kevin Wallace Date: Thu, 9 May 2019 00:16:02 -0700 Subject: Initial commit --- Makefile | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 Makefile (limited to 'Makefile') diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3c95593 --- /dev/null +++ b/Makefile @@ -0,0 +1,58 @@ +PATH := $(PATH):/usr/local/bin + +.PHONY: all +all: enwiktionary-latest.version +all: enwiktionary-latest.index +all: enwiki-latest.version +all: enwiki-latest.index + +.PHONY: newest +newest: + rm -f *.version && make + +.PHONY: reindex +reindex: + rm -f *.index && make + +.PHONY: sync +sync: newest + rsync -av *.index tilde:public_html/pzzl.org/nut/ + +.PHONY: trim +trim: + ls enwiki-*-snap.index | sort | head -n -2 | xargs rm + ls enwiktionary-*-snap.index | sort | head -n -2 | xargs rm + +%/.git: + git submodule update --init $* + +nutrimatic/bin: nutrimatic/.git + cd nutrimatic && ./build.py + +wikiextractor/WikiExtractor.py: wikiextractor/.git + +%-latest.version: + wget -O - https://dumps.wikimedia.org/$*/latest/$*-latest-pages-articles.xml.bz2-rss.xml |\ + xpath '//rss/channel/link/text()' |\ + sed 's/^.*\///' > $@ + +enwiki-%-snap.merge: nutrimatic/bin wikiextractor/WikiExtractor.py + # ~7h30m + wget -O - https://dumps.wikimedia.org/enwiki/$*/enwiki-$*-pages-articles.xml.bz2 |\ + bzip2 -d |\ + python wikiextractor/WikiExtractor.py -q -o- - |\ + ./merge.sh $@ + +enwiktionary-%-snap.merge: nutrimatic/bin wikiextractor/WikiExtractor.py + # ~1h + wget -O - https://dumps.wikimedia.org/enwiktionary/$*/enwiktionary-$*-pages-articles.xml.bz2 |\ + bzip2 -d |\ + python wikiextractor/WikiExtractor.py -q -o- - |\ + ./merge.sh $@ + +%-snap.index: %-snap.merge nutrimatic/bin + nutrimatic/bin/merge-indexes 5 $*-snap.merge $@ + +%-latest.index: %-latest.version + make $*-$(shell cat $*-latest.version)-snap.index + ln -sf $*-$(shell cat $*-latest.version)-snap.index $@ -- cgit v1.2.3