summaryrefslogtreecommitdiff
path: root/Makefile
diff options
context:
space:
mode:
authorKevin Wallace <kevin@wallace.seattle.wa.us>2019-05-09 00:16:02 -0700
committerKevin Wallace <kevin@wallace.seattle.wa.us>2019-05-09 00:18:46 -0700
commit2784977e4613260a3e40a1e6de412ce276e50467 (patch)
tree9d5adc0b75996943803d8c594361de70fb8efe5b /Makefile
Initial commit
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile58
1 files changed, 58 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..3c95593
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,58 @@
+PATH := $(PATH):/usr/local/bin
+
+.PHONY: all
+all: enwiktionary-latest.version
+all: enwiktionary-latest.index
+all: enwiki-latest.version
+all: enwiki-latest.index
+
+.PHONY: newest
+newest:
+ rm -f *.version && make
+
+.PHONY: reindex
+reindex:
+ rm -f *.index && make
+
+.PHONY: sync
+sync: newest
+ rsync -av *.index tilde:public_html/pzzl.org/nut/
+
+.PHONY: trim
+trim:
+ ls enwiki-*-snap.index | sort | head -n -2 | xargs rm
+ ls enwiktionary-*-snap.index | sort | head -n -2 | xargs rm
+
+%/.git:
+ git submodule update --init $*
+
+nutrimatic/bin: nutrimatic/.git
+ cd nutrimatic && ./build.py
+
+wikiextractor/WikiExtractor.py: wikiextractor/.git
+
+%-latest.version:
+ wget -O - https://dumps.wikimedia.org/$*/latest/$*-latest-pages-articles.xml.bz2-rss.xml |\
+ xpath '//rss/channel/link/text()' |\
+ sed 's/^.*\///' > $@
+
+enwiki-%-snap.merge: nutrimatic/bin wikiextractor/WikiExtractor.py
+ # ~7h30m
+ wget -O - https://dumps.wikimedia.org/enwiki/$*/enwiki-$*-pages-articles.xml.bz2 |\
+ bzip2 -d |\
+ python wikiextractor/WikiExtractor.py -q -o- - |\
+ ./merge.sh $@
+
+enwiktionary-%-snap.merge: nutrimatic/bin wikiextractor/WikiExtractor.py
+ # ~1h
+ wget -O - https://dumps.wikimedia.org/enwiktionary/$*/enwiktionary-$*-pages-articles.xml.bz2 |\
+ bzip2 -d |\
+ python wikiextractor/WikiExtractor.py -q -o- - |\
+ ./merge.sh $@
+
+%-snap.index: %-snap.merge nutrimatic/bin
+ nutrimatic/bin/merge-indexes 5 $*-snap.merge $@
+
+%-latest.index: %-latest.version
+ make $*-$(shell cat $*-latest.version)-snap.index
+ ln -sf $*-$(shell cat $*-latest.version)-snap.index $@