sf2bz.sh
author ali@yendor.vm.bytemark.co.uk
Thu Nov 16 08:30:26 2006 +0000 (2006-11-16)
changeset 0 ae7b3fa753dc
permissions -rw-r--r--
First cut. Distintly raw around the edges:
* Assumes it will be running in /home/ali/wk/slashem/web.scripts
* Assumes cache directory will be in topdir
* No build system (simple compiling and linking against libxml2)
* No configure system (eg., tagsoup)
* Output XML untested
* Doesn't set bugzilla maintainer or exporter
* Handling of artifact priorities and resolution is suspect
     1 #!/bin/sh
     2 if [ $# -ne 2 ]; then
     3     echo "Usage: sf2bz <sf-project-export>.xml <bz-import>.xml" >&2
     4     exit 1
     5 fi
     6 input=$1
     7 output=$2
     8 topdir=/home/ali/wk/slashem/web.scripts
     9 cd $topdir
    10 cache_dir=$topdir/cache
    11 tagsoup=tagsoup/tagsoup-1.0.1.jar
    12 #tmpdir=`mktemp -d /tmp/XXXXXXXX` || exit 1
    13 tmpdir=/tmp/sf2bz; rm -rf $tmpdir; mkdir $tmpdir
    14 mkdir -p $cache_dir/sf/artifacts
    15 # Sourceforge's export is pretty good, but misses a few things.
    16 # Deal with one of these (descriptions for attached files) here.
    17 echo '<?xml version="1.0"?>' > $tmpdir/descriptions.xml
    18 echo '<artifacts xmlns:xi="http://www.w3.org/2001/XInclude">' \
    19   >> $tmpdir/descriptions.xml
    20 ./get_attached_files $input | cut -s "-d " -f 1,2 | sort -n | uniq | \
    21   while read atid aid; do
    22     mkdir -p $cache_dir/sf/artifacts/$atid
    23     if [ ! -r $cache_dir/sf/artifacts/$atid/$aid.html ]; then
    24 	wget -q -O $cache_dir/sf/artifacts/$atid/$aid.html \
    25 	  "http://sourceforge.net/tracker/index.php?func=detail&aid=$aid&group_id=9746&atid=$atid"
    26     fi
    27     mkdir -p $cache_dir/artifacts/$atid
    28     if [ ! -r $cache_dir/artifacts/$atid/$aid.xhtml ]; then
    29 	java -jar $tagsoup $cache_dir/sf/artifacts/$atid/$aid.html \
    30 	  > $cache_dir/artifacts/$atid/$aid.xhtml
    31     fi
    32     echo "<xi:include href=\"$cache_dir/artifacts/$atid/$aid.xhtml\"/>" >> $tmpdir/descriptions.xml
    33 done
    34 echo '</artifacts>' >> $tmpdir/descriptions.xml
    35 xsltproc --xinclude attachment_descs.xsl $tmpdir/descriptions.xml \
    36   > $tmpdir/attachment-descriptions.xml
    37 # Some details of attachments are included in the project export.
    38 xsltproc attachment_details.xsl $input > $tmpdir/attachment-details.xml
    39 # Then convert the attachments to XML (as required by bugzilla)
    40 # pulling in the various bits of information we have gathered.
    41 mkdir -p $cache_dir/sf/attachments $cache_dir/attachments
    42 mkdir -p $tmpdir/jail
    43 ./get_attached_files artifacts.xml | while read atid aid file_id; do
    44     if [ ! -r $cache_dir/sf/attachments/$file_id ]; then
    45 	wget -N -O $cache_dir/sf/attachments/$file_id "http://sourceforge.net/tracker/download.php?group_id=9746&atid=$atid&file_id=$file_id&aid=$aid"
    46     fi
    47     if [ ! -r $cache_dir/attachments/$file_id.xml ]; then
    48 	type=`file -bi $cache_dir/sf/attachments/$file_id | sed 's/;.*//'`
    49 	case "$type" in
    50 	    text/*)
    51 		(cd $tmpdir/jail; patch --dry-run -s -f \
    52 		  < $cache_dir/sf/attachments/$file_id > /dev/null 2>&1)
    53 		if [ $? -lt 2 ]; then
    54 		    pflag="-p"
    55 		else
    56 		    pflag=""
    57 		fi
    58 		;;
    59 	    *)
    60 	        pflag=""
    61 		;;
    62 	esac
    63 	./xml_attached_file $pflag -i $file_id -t $type \
    64 	  $cache_dir/sf/attachments/$file_id > $tmpdir/$file_id.1.xml
    65 	xsltproc attachment_gather.xsl $tmpdir/$file_id.1.xml \
    66 	  > $tmpdir/$file_id.2.xml
    67 	xsltproc --xinclude attachment_post.xsl $tmpdir/$file_id.2.xml \
    68 	  > $cache_dir/attachments/$file_id.xml
    69     fi
    70 done
    71 # We can't get hold of descriptions of deleted attachments, so we
    72 # keep a copy of the file in case we need it. Ideally, we'd make sure
    73 # that we only add descriptions, but that's for another day.
    74 cp $tmpdir/attachment-descriptions.xml $topdir
    75 xsltproc get_users.xsl $input > $tmpdir/raw_users.xml
    76 echo '<artifacts xmlns:xi="http://www.w3.org/2001/XInclude">' \
    77   >> $tmpdir/users.1.xml
    78 for file in $cache_dir/artifacts/*/*.xhtml; do
    79     echo "<xi:include href=\"$file\"/>" >> $tmpdir/users.1.xml
    80 done
    81 echo "<xi:include href=\"raw_users.xml\"/>" >> $tmpdir/users.1.xml
    82 echo '</artifacts>' >> $tmpdir/users.1.xml
    83 xsltproc --xinclude user_names.xsl $tmpdir/users.1.xml > $tmpdir/users.2.xml
    84 mkdir -p $cache_dir/sf/users $cache_dir/users
    85 echo '<sf_users xmlns:xi="http://www.w3.org/2001/XInclude">' \
    86   >> $tmpdir/users.3.xml
    87 xsltproc unknown_users.xsl $tmpdir/users.2.xml | while read user; do
    88     case $user in
    89 	*@users.sourceforge.net)
    90 	    user_id=`echo $user | sed 's/@users.sourceforge.net$//'`
    91 	    if [ ! -r $cache_dir/sf/users/$user_id.html ]; then
    92 		wget -N -O $cache_dir/sf/users/$user_id.html \
    93 		  "http://sourceforge.net/users/$user_id/"
    94 	    fi
    95 	    if [ ! -r $cache_dir/users/$user_id.xhtml ]; then
    96 		java -jar $tagsoup $cache_dir/sf/users/$user_id.html \
    97 		  > $cache_dir/users/$user_id.xhtml
    98 	    fi
    99 	    echo "<xi:include href=\"$cache_dir/users/$user_id.xhtml\"/>" \
   100 	      >> $tmpdir/users.3.xml
   101     esac
   102 done
   103 echo "<xi:include href=\"users.2.xml\"/>" >> $tmpdir/users.3.xml
   104 echo '</sf_users>' >> $tmpdir/users.3.xml
   105 xsltproc --xinclude user_names.xsl $tmpdir/users.3.xml > users.xml
   106 ln -s $cache_dir/attachments $tmpdir
   107 xsltproc bugzilla.xsl $input > artifacts.tmp.xml
   108 xsltproc --xinclude bugzilla_post.xsl artifacts.tmp.xml > $output
   109 #rm -rf $tmpdir artifacts.tmp.xml