| Searching for pages using the MediaWiki API returns at most 500 results |
| (hi Patrick). To get a list of all pages in a larger wiki, we need to run |
| repeated searches... |
| |
| Source: https://github.com/moy/Git-Mediawiki/issues/32 |
| Author: anarcat https://github.com/anarcat |
| |
| diff --git a/contrib/mw-to-git/git-remote-mediawiki.perl b/contrib/mw-to-git/git-remote-mediawiki.perl |
| index 8dd74a9..f2ce311 100755 |
| --- a/contrib/mw-to-git/git-remote-mediawiki.perl |
| +++ b/contrib/mw-to-git/git-remote-mediawiki.perl |
| @@ -259,16 +259,29 @@ sub get_mw_tracked_categories { |
| sub get_mw_all_pages { |
| my $pages = shift; |
| # No user-provided list, get the list of pages from the API. |
| - my $mw_pages = $mediawiki->list({ |
| + my $query = { |
| action => 'query', |
| list => 'allpages', |
| aplimit => 'max' |
| - }); |
| - if (!defined($mw_pages)) { |
| + }; |
| + my $curpage; |
| + my $oldpage = ''; |
| + while (1) { |
| + if (defined($curpage)) { |
| + if ($oldpage eq $curpage) { |
| + last; |
| + } |
| + $query->{apfrom} = $curpage; |
| + $oldpage = $curpage; |
| + } |
| + my $mw_pages = $mediawiki->list($query); |
| + if (!defined($mw_pages)) { |
| fatal_mw_error("get the list of wiki pages"); |
| - } |
| - foreach my $page (@{$mw_pages}) { |
| + } |
| + foreach my $page (@{$mw_pages}) { |
| $pages->{$page->{title}} = $page; |
| + $curpage = $page->{title}; |
| + } |
| } |
| return; |
| } |