###########################################################################
# WikiGateway.pm:
#    Exposes a Wiki XML-RPC API for wikis which don't support it themselves
#
#  maintained by Bayle Shanks
#
# Copyright 2003 Bayle Shanks and L. M. Orchard. 
#
# Based on usemod_xmlrpc.cgi by l.m.orchard <deus_x@pobox.com> 
# http://www.decafbad.com
#
# also using some code by David Jacoby
#
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details, published at 
# http://www.gnu.org/copyleft/gpl.html
#
# You may also redistribute or modify it under the terms of Perl's Artistic
# License, at your option. 
#
###########################################################################

# API notes:
#  Each of these functions requires two more arguments than in the API.
#  They are $url and $type, and they precede all of the usual argument to each
#  fn. 
#
#  $url is the base URL of the wiki server that you are interfacing with, 
#  for example 
#    http://www.usemod.com/cgi-bin/mb.pl
#
#  $type is the type of the wiki server that you are interfacing with. Here
#  are the options:
#
#  * 'usemod1': UseMod 1.0
#  * 'Usemod1NoModwiki': close to UseMod 1.0, but without ModWiki



package WikiGateway;
# actually I'd like to see this as Wiki::WikiGatway eventually



## TODO: currently only getRecentChanges cares about "type". Everything else just assumes you are "usemod1".

###########################################################################
### Parameters
###########################################################################

### Set this if you are using the library as an XMLRPC web service
###   (the effect is only to encode some return values in an XMLRPC way)
my $XMLRPC = 0;

## "persistent" variables
## set by wikiGatewayTargetWiki
my $SERVER_URL;
my $SERVER_TYPE;


use strict;
use Date::Parse;
#use LWP::Simple; # for some reason this has to be moved down below or it doesn't work as a web service

use XMLRPC::Lite;


sub setXMLRPC {
    $XMLRPC = @_[0];
}

##### new function, added by bayle
sub wikiGatewayTargetWiki {
# TODO: use "shift" to make the argument block more readable:
    my ($pkg, $url, $type);
    if ($XMLRPC) {
	($pkg, $url, $type) = @_;
    }
    else {
	($url, $type) = @_;
    }

    $SERVER_URL = $url;
    $SERVER_TYPE = $type;

#TODO: return this info if no args

}




###########################################################################
### Utility subroutines
###########################################################################

################################
# sub base64IfXMLRPC           #
################################
sub base64IfXMLRPC {
    if ($XMLRPC)
    {
	$SOAP::Constants::DO_NOT_USE_XML_PARSER = 1;
	use XMLRPC::Transport::HTTP;
	return SOAP::Data->type(base64 => @_[0]);
    }
    else {
	return @_[0];
    }
}

###########################################################################
### Wiki XML-RPC API Methods
###########################################################################

###########################################################################
# * array getRecentChanges ( Date timestamp ):
#      Get list of changed pages since timestamp, which should be in
#      UTC. The result is an array, where each element is a struct:
#         * name (string) :
#              Name of the page. The name is UTF-8 with URL encoding
#              to make it ASCII.
#         * lastModified (date) :
#              Date of last modification, in UTC.
#         * author (string) :
#              Name of the author (if available). Again, name is
#              UTF-8 with URL encoding.
#         * version (int) :
#              Current version.
###########################################################################




################################
# sub getRC_ModWiki            #
################################

sub getRC_ModWiki {
######  used if the wiki server supports ModWiki
    my ($pkg, $url, $type, $date);
    if ($XMLRPC) {
	($pkg, $date) = @_;
	($url, $type) = ($SERVER_URL, $SERVER_TYPE); 
    }
    else {
	($url, $type, $date) = @_;
    }
##################
# INIT
##################


    use LWP::Simple; 

    if ($XMLRPC)
    {
	$SOAP::Constants::DO_NOT_USE_XML_PARSER = 1;
	use XMLRPC::Transport::HTTP;
    }

    use XML::RSS;
    my $time = str2time($date);

    my $MODWIKI_URI = 'http://purl.org/rss/1.0/modules/wiki/';
    my @changes = ();

##################
# THIS IS WRONG
##################

    my $days = 5;  # todo: implement $date handling
    
##################
# FETCH RSS FROM WIKI SERVER  
##################


    my $URL = $url."?action=rss&days=5";
    my $rssString = get $URL;
    

##################
# PARSE RSS   
##################
    
    my $rss = new XML::RSS;
    $rss->add_module(prefix=>'modwiki', uri=>'http://purl.org/rss/1.0/modules/wiki/');

# do I need to add dc?
#    $rss->add_module(prefix=>'modwiki', uri=>'http://purl.org/rss/1.0/modules/wiki/');
    $rss->parse($rssString);
    


##################
# ITERATE THROUGH EACH RSS ITEM, 
# AND STORE THE RELEVANT FIELDS INTO RETURN STRUCT   
##################
# notes:
#  not sure if this is the right date format...
#  why didn't this work:
#		   author       => $item->{dc}->{'contributor'},
#  i also added an 'importance' field here

    foreach my $item (@{$rss->{'items'}}) {
	my $pageinfo =
	{
		name         => $item->{'title'},
		author       => $item->{'http://www.w3.org/1999/02/22-rdf-syntax-ns#'}->{'value'},
		lastModified => $item->{dc}->{'date'},
		comment       => $item->{'description'}, 
		version      => $item->{$MODWIKI_URI}->{'version'},
		importance => $item->{$MODWIKI_URI}->{'importance'},
		#debug => $item
	    };
  

##### Special encoding for xmlrpc
	if ($XMLRPC)
	{
	    $pageinfo->{lastModified} = SOAP::Data->type(dateTime => ($pageinfo->{lastModified}));  
	}
	
	push @changes, $pageinfo;      
    }
    
    return \@changes;
}


################################
# sub getRC_Usemod1NoModwiki   #
################################

sub getRC_Usemod1NoModwiki
{
    ##### called if the wiki type passed in is "usemod1NoModWiki"
    ##### a screen-scraping alternative for UseMod
    #####  (probably is or can be adapted to be compatible with pre-1.0 UseMod
    #####   which usually does not support RSS)

    ##### this fn was based on David Jacoby's code
    my ($pkg, $url, $type, $date);
    if ($XMLRPC) {
	($pkg, $date) = @_;
	($url, $type) = ($SERVER_URL, $SERVER_TYPE); 
    }
    else {
	($url, $type, $date) = @_;
    }

##################
# THIS IS WRONG
##################
	my $days = 5;  

##################
# INIT
##################
	my $time = str2time($date);
	my @changes = ();
	my $pagename;
	my $comment;
	my ($author, $authorID, $authorIP);

##################
# FETCH RSS FROM WIKI SERVER  
##################
	my $URL = $url."?action=rc&days=5";
	my $rcPage = get $URL;


##################
# FIND THE LINES WITH THE CHANGES ON THEM  
##################

### split into lines

	my @output = split /\n/ , $rcPage ;
	@output = grep /^<li>/ , @output ;


####### filter for only the lines with the actual changes on them 
	my @secondary_output;

	for my $line ( @output ) {
	    next if $line !~ /^<li>/ ;
	    next if $line !~ /diff/ ;
	    push @secondary_output , $line ;
	}

	@output = @secondary_output ;

##################
# ITERATE OVER THE CHANGE LINES AND PROCESS THEM  
##################

# notes:
# David used to have this code in here:
#     $line    =~ /(<a href="([^?]*?\?([A-Za-z0-9=&]+))">)/ ; 
# (perhaps that will be needed for older version of UseMod?)

	for my $line ( @output ) {
	    next if $line !~ /^<li>/ ;

  #### parse pagename
  ####
	    $line    =~ /(<a href="([^?]*?\?action=browse&.*?&id=([A-Za-z0-9]+))">)/ ; 
	    my $anchor   = $1 ;
	    my $link     = $2 ; 
	    my $pagename = $3 ;

  #### parse comment field
  ####
	    $line       =~ /strong>\[([^<]+)\]<\/strong/ ;
	    my $comment = $1 ;
	    $comment    = undef if $comment eq $anchor ;

  #### parse author 
  ####
  
    # when the user is logged in, UseMod displays the author's username
    #  but it also has a popup with their ID# and IP
    # when the user is not logged in, UseMod displays the IP in the author slot

	    $line =~ / \. \. \. \. \. (.*)/;
	    $author = $1;
	    my $regexp = '<a href="[^"]*" title="ID (.*?) from ([^"]*)">([^>]*)<\/a>';
	    if ($author =~ /$regexp/)
	    {
		$authorID = $1;
		$authorIP = $2;
		$author = $3;
	    }
	    else {
		$authorIP = $author;
	    }
	    

# TODO: implement time handling
###   this code (from Orchard) might be useful when doing so

	    my ($sec, $min, $hr, $dd, $mm, $yy, $wd, $yd, $isdst) =
		localtime($time);
	    $yy += 1900; $mm++;
	    my $last_modified = sprintf('%04d%02d%02dT%02d:%02d:%02d',
					$yy,$mm,$dd,$hr,$min,$sec);


##################
# STORE INFO INTO RETURN STRUCT  
##################


## is there no comment field in the previous version of WikiXmlRpcInterface ?!? 
## well, i'm adding it

# TODO:
#      lastModified => SOAP::Data->type(dateTime => $last_modified),
#      version      => 1 # version not implemented yet
	    my $pageinfo =
	    {
		name         => qq($pagename),
		author       => qq($author),
		comment       => qq($comment), 
	    };
  
	    push @changes, $pageinfo;
	    
	}

	return \@changes;

}


################################
# sub getRecentChanges   #
################################
sub getRecentChanges {

    my ($pkg, $url, $type, $date);
    if ($XMLRPC) {
	($pkg, $date) = @_;
	($url, $type) = ($SERVER_URL, $SERVER_TYPE); 
    }
    else {
	($url, $type, $date) = @_;
    }


	#### Dispatches based on $type, the type of the wiki server

# note that I left out the "last SWITCH;"s here
      SWITCH: {
	  if ($type eq "usemod1")          {return getRC_ModWiki(@_);}
	  if ($type eq "usemod1NoModWiki") {return getRC_Usemod1NoModwiki(@_);}
	  if ($type eq "moinmoin1")        {return XMLRPC::Lite
						-> proxy("$url?action=xmlrpc")
						-> call('getRecentChanges')
						-> result;}
      }
}


###########################################################################
# * int getRPCVersionSupported():
#      Returns 1 with this version of the Wiki API.
###########################################################################

sub getRPCVersionSupported {
	return 1;
}

###########################################################################
# * base64 getPage( String pagename ):
#     Get the raw Wiki text of page, latest version. Page name must be
#     UTF-8, with URL encoding. Returned value is a binary object,
#     with UTF-8 encoded page data.
###########################################################################

sub getPage {
    my ($pkg, $url, $type, $pagename);
    if ($XMLRPC) {
	($pkg, $pagename) = @_;
	($url, $type) = ($SERVER_URL, $SERVER_TYPE); 
    }
    else {
	($url, $type, $pagename) = @_;
    }

  SWITCH: {
      if ($type eq "moinmoin1")        {return XMLRPC::Lite
					    -> proxy("$url?action=xmlrpc")
					    -> call('getPage',$pagename)
					    -> result;}
      if (($type eq "usemod1") || ($type eq "usemod1NoModWiki")) 
      {

	  use LWP::Simple;
	  my $pagetext = get $url."?action=browse&id=$pagename&raw=1"; 

	  return base64IfXMLRPC($pagetext);
      }
  }
}

###########################################################################
#  * base64 getPageVersion( String pagename, int version ):
#     Get the raw Wiki text of page. Returns UTF-8, expects UTF-8 with
#     URL encoding.
###########################################################################

sub getPageVersion {
    my ($pkg, $url, $type, $pagename, $version);
    if ($XMLRPC) {
	($pkg, $pagename, $version) = @_;
	($url, $type) = ($SERVER_URL, $SERVER_TYPE); 
    }
    else {
	($url, $type, $pagename, $version) = @_;
    }

  SWITCH: {
      if ($type eq "moinmoin1")        {return XMLRPC::Lite
					    -> proxy("$url?action=xmlrpc")
					    -> call('getPageVersion',$pagename,$version)
					    -> result;}
      if (($type eq "usemod1") || ($type eq "usemod1NoModWiki")) 
      {

	  use LWP::Simple;
	  my $pagetext = get $url."?action=browse&id=$pagename&revision=version&raw=1"; 

	  return base64IfXMLRPC($pagetext);

      }
  }
}

###########################################################################
#  * base64 getPageHTML( String pagename ):
#      Return page in rendered HTML. Returns UTF-8, expects UTF-8 with
#      URL encoding.
###########################################################################

sub getPageHTML {
    my ($pkg, $url, $type, $pagename);
    if ($XMLRPC) {
	($pkg, $pagename) = @_;
	($url, $type) = ($SERVER_URL, $SERVER_TYPE); 
    }
    else {
	($url, $type, $pagename) = @_;
    }

  SWITCH: {
      if ($type eq "moinmoin1")        {return XMLRPC::Lite
					    -> proxy("$url?action=xmlrpc")
					    -> call('getPageHTML',$pagename)
					    -> result;}
      if (($type eq "usemod1") || ($type eq "usemod1NoModWiki")) 
      {

	  use LWP::Simple;
	  my $pagetext = get $url."?action=browse&id=$pagename&embed=1"; 

	  return base64IfXMLRPC($pagetext);
      }
  }
}

###########################################################################
#  * base64 getPageHTMLVersion( String pagename, int version ):
#      Return page in rendered HTML, UTF-8.
###########################################################################

sub getPageHTMLVersion {
    my ($pkg, $url, $type, $pagename, $version);
    if ($XMLRPC) {
	($pkg, $pagename, $version) = @_;
	($url, $type) = ($SERVER_URL, $SERVER_TYPE); 
    }
    else {
	($url, $type, $pagename, $version) = @_;
    }

  SWITCH: {
      if ($type eq "moinmoin1")        {return XMLRPC::Lite
					    -> proxy("$url?action=xmlrpc")
					    -> call('getPageHTMLVersion',$pagename,$version)
					    -> result;}
      if (($type eq "usemod1") || ($type eq "usemod1NoModWiki")) 
      {
	  use LWP::Simple;
	  my $pagetext = get $url."?action=browse&id=$pagename&version=$version&embed=1"; 
    
	  return base64IfXMLRPC($pagetext);
      }
  }
}

###########################################################################
#  * array getAllPages():
#      Returns a list of all pages. The result is an array of strings,
#      again UTF-8 in URL encoding.
###########################################################################

sub getAllPages {
    my ($pkg, $url, $type);
    if ($XMLRPC) {
	($pkg) = @_;
	($url, $type) = ($SERVER_URL, $SERVER_TYPE); 
    }
    else {
	($url, $type) = @_;
    }

      SWITCH: {
	  if ($type eq "moinmoin1")        {return XMLRPC::Lite
						-> proxy("$url?action=xmlrpc")
						-> call('getAllPages')
						-> result;}
	  if (($type eq "usemod1") || ($type eq "usemod1NoModWiki")) 
	  {
      
	      #### FETCH PAGE FROM WIKI SERVER
	      use LWP::Simple;
	      my $index = get $url.'?action=index&embed=1';

	      #### CUT OFF HEADER AND FOOTER
	      $index =~ /.*pages found:<\/h2>(.*)<hr .*/is;
	      # maybe it would be less brittle to grep for the "FORM" ?
	      $index = $1;

	      #### PROCESS THE REST
	      $index =~ s/.... <a href="/<a href="/gi;
	      $index =~ s/<a href="[^>]*>(.*)<\/a>/$1/gi;
    $index =~ s/\n//g;
    my @lines = split /<br>/,$index;

    return \@lines;
}
}
}

###########################################################################
#  * struct getPageInfo( string pagename ) :
#      returns a struct with elements
#          * name (string): the canonical page name, URL-encoded UTF-8.
#          * lastModified (date): Last modification date, UTC.
#          * author (string): author name, URL-encoded UTF-8.
#          * version (int): current version
###########################################################################
##### NOT IMPLEMENTED YET
###########################################################################

sub getPageInfo {
    my ($pkg, $url, $type, $pagename);
    if ($XMLRPC) {
	($pkg, $pagename) = @_;
	($url, $type) = ($SERVER_URL, $SERVER_TYPE); 
    }
    else {
	($url, $type, $pagename) = @_;
    }

  SWITCH: {
      if ($type eq "moinmoin1")        {return XMLRPC::Lite
					    -> proxy("$url?action=xmlrpc")
					    -> call('getPageInfo',$pagename)
					    -> result;}
      if (($type eq "usemod1") || ($type eq "usemod1NoModWiki")) 
      {
	  {

# here's what was in Orchard's code:
#	my ($meta, $pagetext);
#	my $pageinfo = {};

#		return
#		  {
#		   name         => $pagename,
#		   lastModified => SOAP::Data->type(dateTime => $last_modified),#		   author       => $extra{name} || $host,
#		   version      => $extra{revision}
#		  };
#	}
	      return {};
	  }
      }
  }
}

###########################################################################
#  * struct getPageInfoVersion( string pagename, int version ) :
#      returns a struct just like plain getPageInfo(), but this time
#      for a specific version.
###########################################################################
##### NOT IMPLEMENTED YET
###########################################################################

sub getPageInfoVersion {
    my ($pkg, $url, $type, $pagename, $version);
    if ($XMLRPC) {
	($pkg, $pagename, $version) = @_;
	($url, $type) = ($SERVER_URL, $SERVER_TYPE); 
    }
    else {
	($url, $type, $pagename, $version) = @_;
    }

  SWITCH: {
      if ($type eq "moinmoin1")        {return XMLRPC::Lite
					    -> proxy("$url?action=xmlrpc")
					    -> call('getPageInfoVersion',$pagename,$version)
					    -> result;}
      if (($type eq "usemod1") || ($type eq "usemod1NoModWiki")) 
      {

#see getPageInfo for something close to what Orchard had here
	return {};
    }
  }
}

###########################################################################
###	    * array listLinks( string pagename ): Lists all links for a given
###        page. The returned array contains structs, with the following
###        elements
###          * name (string) : The page name or URL the link is to.
###          * type (int) : The link type. Zero (0) for internal Wiki
###             link, one (1) for external link (URL - image link,
###             whatever).
###########################################################################
##### NOT IMPLEMENTED YET
###########################################################################
sub listLinks {
    my ($pkg, $url, $type, $pagename);
    if ($XMLRPC) {
	($pkg, $pagename) = @_;
	($url, $type) = ($SERVER_URL, $SERVER_TYPE); 
    }
    else {
	($url, $type, $pagename) = @_;
    }
#is this just for outgoing links? why do we need this?
# bayle: i think there should also be something to get the "full link list" and the reverse links

  SWITCH: {
      if ($type eq "moinmoin1")        {return XMLRPC::Lite
					    -> proxy("$url?action=xmlrpc")
					    -> call('listLinks',$pagename)
					    -> result;}
      if (($type eq "usemod1") || ($type eq "usemod1NoModWiki")) 
      {

#see getPageInfo for something close to what Orchard had here



# here's what orchard had:
#	my @int_links = &UseModWiki::GetPageLinks($pagename, 1, 0, 0);
#	my @ext_links = &UseModWiki::GetPageLinks($pagename, 0, 1, 1);

#	my @links_out;
#	push @links_out, map { {name => $_, type => 0} } @int_links;
#	push @links_out, map { {name => $_, type => 1} } @ext_links;
	
#	return \@links_out;

    }
  }
}




###########################################################################
### * boolean wiki.putPage( String pagename, base64 text ): Set the
###    text of a page, returning true on success
###########################################################################

sub putPage_moinmoin1 {
    my ($url, $type, $pagename, $pagetext) = @_;


##################
# INIT
##################
	use LWP::Simple;
	use HTML::Form;
	use LWP::UserAgent;
	my $ua = LWP::UserAgent->new;
	$ua->agent("$0/0.1 " . $ua->agent);

##################
# FETCH POST FORM FROM WIKI SERVER
##################
	my $URL = $url.$pagename.'?action=edit';	
	my $writeFormPage = get $URL;

##################
# FILL IN THE FORM
##################
	my $form = HTML::Form->parse($writeFormPage, $URL);
	defined($form) or return "Can't even get wiki edit form at $URL";
	$form->value('savetext',$pagetext);

#### to create a fake edit conflict for testing purposes, uncomment this line
#### and then edit something from a different IP and username immediately
#### before running this method
#	$form->value('oldtime',0);

##################
# SUBMIT IT
##################
	my $req = $form->click('button_save');
	my $res = $ua->request($req);

##################
# CHECK FOR EDIT CONFLICT
##################

### NOT IMPLEMENTED FOR MOINMOIN YET

	if ($res->as_string =~ /<H1>Edit Conflict!<\/H1>/i)
	{
	    return "edit conflict";
	}

	if (!($res->is_redirect))
	{
	    ### if there is no edit conflict, UseMod returns a redirect.
	    ### so maybe if we reach this point, something is wrong and we
	    ###    should fail and report and unknown problem.
	    ### i dunno.

#	    return "not redirect";
	}
	
#todo: error checking
# how else would you do error checking? UseMod doesn't give you a clear
# "success" indicator, unless you want to check the changed page to make
# sure your text is there (but what if someone else changed the page in the meantime?)

	if ($@) { return $@; }

	if ($XMLRPC)
	{
	    $SOAP::Constants::DO_NOT_USE_XML_PARSER = 1;
	    use XMLRPC::Transport::HTTP;
	    return SOAP::Data->type(boolean => 1);
	}
	else {
	    return 1;
	}

    
}

sub putPage {
    my ($pkg, $url, $type, $pagename, $pagetext);
    if ($XMLRPC) {
	($pkg, $pagename, $pagetext) = @_;
	($url, $type) = ($SERVER_URL, $SERVER_TYPE); 
    }
    else {
	($url, $type, $pagename, $pagetext) = @_;
    }

  SWITCH: {
      if ($type eq "moinmoin1")        {
	  return putPage_moinmoin1($url, $type, $pagename, $pagetext);
      }
      if (($type eq "usemod1") || ($type eq "usemod1NoModWiki")) 
      {


##################
# INIT
##################
	use LWP::Simple;
	use HTML::Form;
	use LWP::UserAgent;
	my $ua = LWP::UserAgent->new;
	$ua->agent("$0/0.1 " . $ua->agent);

##################
# FETCH POST FORM FROM WIKI SERVER
##################
	my $URL = $url.'?action=edit&id='.$pagename;	
	my $writeFormPage = get $URL;

##################
# FILL IN THE FORM
##################
	my $form = HTML::Form->parse($writeFormPage, $URL);
	defined($form) or return "Can't even get wiki edit form at $URL";
	$form->value('text',$pagetext);

#### to create a fake edit conflict for testing purposes, uncomment this line
#### and then edit something from a different IP and username immediately
#### before running this method
#	$form->value('oldtime',0);

##################
# SUBMIT IT
##################
	my $req = $form->click('Save');
	my $res = $ua->request($req);

##################
# CHECK FOR EDIT CONFLICT
##################
	if ($res->as_string =~ /<H1>Edit Conflict!<\/H1>/i)
	{
	    return "edit conflict";
	}

	if (!($res->is_redirect))
	{
	    ### if there is no edit conflict, UseMod returns a redirect.
	    ### so maybe if we reach this point, something is wrong and we
	    ###    should fail and report and unknown problem.
	    ### i dunno.

#	    return "not redirect";
	}
	
#todo: error checking
# how else would you do error checking? UseMod doesn't give you a clear
# "success" indicator, unless you want to check the changed page to make
# sure your text is there (but what if someone else changed the page in the meantime?)

	if ($@) { return $@; }

	if ($XMLRPC)
	{
	    $SOAP::Constants::DO_NOT_USE_XML_PARSER = 1;
	    use XMLRPC::Transport::HTTP;
	    return SOAP::Data->type(boolean => 1);
	}
	else {
	    return 1;
	}

    }
  }

}


1;