blob: 0ce706db99a6bb9d423880ae656c9710d71ab99a [file] [log] [blame]
Searching for pages using the MediaWiki API returns at most 500 results
(hi Patrick). To get a list of all pages in a larger wiki, we need to run
repeated searches...
Source: https://github.com/moy/Git-Mediawiki/issues/32
Author: anarcat https://github.com/anarcat
diff --git a/contrib/mw-to-git/git-remote-mediawiki.perl b/contrib/mw-to-git/git-remote-mediawiki.perl
index 8dd74a9..f2ce311 100755
--- a/contrib/mw-to-git/git-remote-mediawiki.perl
+++ b/contrib/mw-to-git/git-remote-mediawiki.perl
@@ -259,16 +259,29 @@ sub get_mw_tracked_categories {
sub get_mw_all_pages {
my $pages = shift;
# No user-provided list, get the list of pages from the API.
- my $mw_pages = $mediawiki->list({
+ my $query = {
action => 'query',
list => 'allpages',
aplimit => 'max'
- });
- if (!defined($mw_pages)) {
+ };
+ my $curpage;
+ my $oldpage = '';
+ while (1) {
+ if (defined($curpage)) {
+ if ($oldpage eq $curpage) {
+ last;
+ }
+ $query->{apfrom} = $curpage;
+ $oldpage = $curpage;
+ }
+ my $mw_pages = $mediawiki->list($query);
+ if (!defined($mw_pages)) {
fatal_mw_error("get the list of wiki pages");
- }
- foreach my $page (@{$mw_pages}) {
+ }
+ foreach my $page (@{$mw_pages}) {
$pages->{$page->{title}} = $page;
+ $curpage = $page->{title};
+ }
}
return;
}