| xmlformat is distributed under a BSD-style license. This license |
| applies to the entire xmlformat distribution, with the exception of |
| the REX parser (described below). |
| |
| Copyright (c) 2004, Kitebird, LLC. All rights reserved. |
| |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions |
| are met: |
| |
| 1. Redistributions of source code must retain the above copyright |
| notice, this list of conditions and the following disclaimer. |
| |
| 2. Redistributions in binary form must reproduce the above copyright |
| notice, this list of conditions and the following disclaimer in the |
| documentation and/or other materials provided with the distribution. |
| |
| 3. Neither the name of Kitebird nor the names of its contributors may |
| be used to endorse or promote products derived from this software |
| without specific prior written permission. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| POSSIBILITY OF SUCH DAMAGE. |
| |
| ---------------------------------------------------------------------- |
| The REX parser |
| |
| xmlformat contains code based on the REX parser, which is Copyright (c) 1998, |
| Robert D. Cameron. REX is described in this document: |
| |
| http://www.cs.sfu.ca/~cameron/REX.html |
| |
| The document contains a Perl implementation of REX: |
| |
| --- begin REX code --- |
| # REX/Perl 1.0 |
| # Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions", |
| # Technical Report TR 1998-17, School of Computing Science, Simon Fraser |
| # University, November, 1998. |
| # Copyright (c) 1998, Robert D. Cameron. |
| # The following code may be freely used and distributed provided that |
| # this copyright and citation notice remains intact and that modifications |
| # or additions are clearly identified. |
| |
| $TextSE = "[^<]+"; |
| $UntilHyphen = "[^-]*-"; |
| $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-"; |
| $CommentCE = "$Until2Hyphens>?"; |
| $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+"; |
| $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>"; |
| $S = "[ \\n\\t\\r]+"; |
| $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]"; |
| $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]"; |
| $Name = "(?:$NameStrt)(?:$NameChar)*"; |
| $QuoteSE = "\"[^\"]*\"|'[^']*'"; |
| $DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*"; |
| $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>"; |
| $S1 = "[\\n\\r\\t ]"; |
| $UntilQMs = "[^?]*\\?+"; |
| $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>"; |
| $DT_ItemSE = |
| "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S"; |
| $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?"; |
| $DeclCE = |
| "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?"; |
| $PI_CE = "$Name(?:$PI_Tail)?"; |
| $EndTagCE = "$Name(?:$S)?>?"; |
| $AttValSE = "\"[^<\"]*\"|'[^<']*'"; |
| $ElemTagCE = "$Name(?:$S$Name(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?"; |
| $MarkupSPE = |
| "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)"; |
| $XML_SPE = "$TextSE|$MarkupSPE"; |
| |
| |
| sub ShallowParse { |
| my($XML_document) = @_; |
| return $XML_document =~ /$XML_SPE/g; |
| } |
| --- end REX code --- |
| |
| The Perl and Ruby implementations of xmlformat contain parsers that |
| are based on the preceding code and are essentially the same, with the |
| exception of changes to variable and function names. |
| |