diff options
author | Kevin Wallace <kevin@wallace.seattle.wa.us> | 2019-05-09 00:16:02 -0700 |
---|---|---|
committer | Kevin Wallace <kevin@wallace.seattle.wa.us> | 2019-05-09 00:18:46 -0700 |
commit | 2784977e4613260a3e40a1e6de412ce276e50467 (patch) | |
tree | 9d5adc0b75996943803d8c594361de70fb8efe5b /Makefile |
Initial commit
Diffstat (limited to 'Makefile')
-rw-r--r-- | Makefile | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3c95593 --- /dev/null +++ b/Makefile @@ -0,0 +1,58 @@ +PATH := $(PATH):/usr/local/bin + +.PHONY: all +all: enwiktionary-latest.version +all: enwiktionary-latest.index +all: enwiki-latest.version +all: enwiki-latest.index + +.PHONY: newest +newest: + rm -f *.version && make + +.PHONY: reindex +reindex: + rm -f *.index && make + +.PHONY: sync +sync: newest + rsync -av *.index tilde:public_html/pzzl.org/nut/ + +.PHONY: trim +trim: + ls enwiki-*-snap.index | sort | head -n -2 | xargs rm + ls enwiktionary-*-snap.index | sort | head -n -2 | xargs rm + +%/.git: + git submodule update --init $* + +nutrimatic/bin: nutrimatic/.git + cd nutrimatic && ./build.py + +wikiextractor/WikiExtractor.py: wikiextractor/.git + +%-latest.version: + wget -O - https://dumps.wikimedia.org/$*/latest/$*-latest-pages-articles.xml.bz2-rss.xml |\ + xpath '//rss/channel/link/text()' |\ + sed 's/^.*\///' > $@ + +enwiki-%-snap.merge: nutrimatic/bin wikiextractor/WikiExtractor.py + # ~7h30m + wget -O - https://dumps.wikimedia.org/enwiki/$*/enwiki-$*-pages-articles.xml.bz2 |\ + bzip2 -d |\ + python wikiextractor/WikiExtractor.py -q -o- - |\ + ./merge.sh $@ + +enwiktionary-%-snap.merge: nutrimatic/bin wikiextractor/WikiExtractor.py + # ~1h + wget -O - https://dumps.wikimedia.org/enwiktionary/$*/enwiktionary-$*-pages-articles.xml.bz2 |\ + bzip2 -d |\ + python wikiextractor/WikiExtractor.py -q -o- - |\ + ./merge.sh $@ + +%-snap.index: %-snap.merge nutrimatic/bin + nutrimatic/bin/merge-indexes 5 $*-snap.merge $@ + +%-latest.index: %-latest.version + make $*-$(shell cat $*-latest.version)-snap.index + ln -sf $*-$(shell cat $*-latest.version)-snap.index $@ |