sf2bz.sh
author ali@yendor.vm.bytemark.co.uk
Thu Nov 16 08:30:26 2006 +0000 (2006-11-16)
changeset 0 ae7b3fa753dc
permissions -rw-r--r--
First cut. Distintly raw around the edges:
* Assumes it will be running in /home/ali/wk/slashem/web.scripts
* Assumes cache directory will be in topdir
* No build system (simple compiling and linking against libxml2)
* No configure system (eg., tagsoup)
* Output XML untested
* Doesn't set bugzilla maintainer or exporter
* Handling of artifact priorities and resolution is suspect
ali@0
     1
#!/bin/sh
ali@0
     2
if [ $# -ne 2 ]; then
ali@0
     3
    echo "Usage: sf2bz <sf-project-export>.xml <bz-import>.xml" >&2
ali@0
     4
    exit 1
ali@0
     5
fi
ali@0
     6
input=$1
ali@0
     7
output=$2
ali@0
     8
topdir=/home/ali/wk/slashem/web.scripts
ali@0
     9
cd $topdir
ali@0
    10
cache_dir=$topdir/cache
ali@0
    11
tagsoup=tagsoup/tagsoup-1.0.1.jar
ali@0
    12
#tmpdir=`mktemp -d /tmp/XXXXXXXX` || exit 1
ali@0
    13
tmpdir=/tmp/sf2bz; rm -rf $tmpdir; mkdir $tmpdir
ali@0
    14
mkdir -p $cache_dir/sf/artifacts
ali@0
    15
# Sourceforge's export is pretty good, but misses a few things.
ali@0
    16
# Deal with one of these (descriptions for attached files) here.
ali@0
    17
echo '<?xml version="1.0"?>' > $tmpdir/descriptions.xml
ali@0
    18
echo '<artifacts xmlns:xi="http://www.w3.org/2001/XInclude">' \
ali@0
    19
  >> $tmpdir/descriptions.xml
ali@0
    20
./get_attached_files $input | cut -s "-d " -f 1,2 | sort -n | uniq | \
ali@0
    21
  while read atid aid; do
ali@0
    22
    mkdir -p $cache_dir/sf/artifacts/$atid
ali@0
    23
    if [ ! -r $cache_dir/sf/artifacts/$atid/$aid.html ]; then
ali@0
    24
	wget -q -O $cache_dir/sf/artifacts/$atid/$aid.html \
ali@0
    25
	  "http://sourceforge.net/tracker/index.php?func=detail&aid=$aid&group_id=9746&atid=$atid"
ali@0
    26
    fi
ali@0
    27
    mkdir -p $cache_dir/artifacts/$atid
ali@0
    28
    if [ ! -r $cache_dir/artifacts/$atid/$aid.xhtml ]; then
ali@0
    29
	java -jar $tagsoup $cache_dir/sf/artifacts/$atid/$aid.html \
ali@0
    30
	  > $cache_dir/artifacts/$atid/$aid.xhtml
ali@0
    31
    fi
ali@0
    32
    echo "<xi:include href=\"$cache_dir/artifacts/$atid/$aid.xhtml\"/>" >> $tmpdir/descriptions.xml
ali@0
    33
done
ali@0
    34
echo '</artifacts>' >> $tmpdir/descriptions.xml
ali@0
    35
xsltproc --xinclude attachment_descs.xsl $tmpdir/descriptions.xml \
ali@0
    36
  > $tmpdir/attachment-descriptions.xml
ali@0
    37
# Some details of attachments are included in the project export.
ali@0
    38
xsltproc attachment_details.xsl $input > $tmpdir/attachment-details.xml
ali@0
    39
# Then convert the attachments to XML (as required by bugzilla)
ali@0
    40
# pulling in the various bits of information we have gathered.
ali@0
    41
mkdir -p $cache_dir/sf/attachments $cache_dir/attachments
ali@0
    42
mkdir -p $tmpdir/jail
ali@0
    43
./get_attached_files artifacts.xml | while read atid aid file_id; do
ali@0
    44
    if [ ! -r $cache_dir/sf/attachments/$file_id ]; then
ali@0
    45
	wget -N -O $cache_dir/sf/attachments/$file_id "http://sourceforge.net/tracker/download.php?group_id=9746&atid=$atid&file_id=$file_id&aid=$aid"
ali@0
    46
    fi
ali@0
    47
    if [ ! -r $cache_dir/attachments/$file_id.xml ]; then
ali@0
    48
	type=`file -bi $cache_dir/sf/attachments/$file_id | sed 's/;.*//'`
ali@0
    49
	case "$type" in
ali@0
    50
	    text/*)
ali@0
    51
		(cd $tmpdir/jail; patch --dry-run -s -f \
ali@0
    52
		  < $cache_dir/sf/attachments/$file_id > /dev/null 2>&1)
ali@0
    53
		if [ $? -lt 2 ]; then
ali@0
    54
		    pflag="-p"
ali@0
    55
		else
ali@0
    56
		    pflag=""
ali@0
    57
		fi
ali@0
    58
		;;
ali@0
    59
	    *)
ali@0
    60
	        pflag=""
ali@0
    61
		;;
ali@0
    62
	esac
ali@0
    63
	./xml_attached_file $pflag -i $file_id -t $type \
ali@0
    64
	  $cache_dir/sf/attachments/$file_id > $tmpdir/$file_id.1.xml
ali@0
    65
	xsltproc attachment_gather.xsl $tmpdir/$file_id.1.xml \
ali@0
    66
	  > $tmpdir/$file_id.2.xml
ali@0
    67
	xsltproc --xinclude attachment_post.xsl $tmpdir/$file_id.2.xml \
ali@0
    68
	  > $cache_dir/attachments/$file_id.xml
ali@0
    69
    fi
ali@0
    70
done
ali@0
    71
# We can't get hold of descriptions of deleted attachments, so we
ali@0
    72
# keep a copy of the file in case we need it. Ideally, we'd make sure
ali@0
    73
# that we only add descriptions, but that's for another day.
ali@0
    74
cp $tmpdir/attachment-descriptions.xml $topdir
ali@0
    75
xsltproc get_users.xsl $input > $tmpdir/raw_users.xml
ali@0
    76
echo '<artifacts xmlns:xi="http://www.w3.org/2001/XInclude">' \
ali@0
    77
  >> $tmpdir/users.1.xml
ali@0
    78
for file in $cache_dir/artifacts/*/*.xhtml; do
ali@0
    79
    echo "<xi:include href=\"$file\"/>" >> $tmpdir/users.1.xml
ali@0
    80
done
ali@0
    81
echo "<xi:include href=\"raw_users.xml\"/>" >> $tmpdir/users.1.xml
ali@0
    82
echo '</artifacts>' >> $tmpdir/users.1.xml
ali@0
    83
xsltproc --xinclude user_names.xsl $tmpdir/users.1.xml > $tmpdir/users.2.xml
ali@0
    84
mkdir -p $cache_dir/sf/users $cache_dir/users
ali@0
    85
echo '<sf_users xmlns:xi="http://www.w3.org/2001/XInclude">' \
ali@0
    86
  >> $tmpdir/users.3.xml
ali@0
    87
xsltproc unknown_users.xsl $tmpdir/users.2.xml | while read user; do
ali@0
    88
    case $user in
ali@0
    89
	*@users.sourceforge.net)
ali@0
    90
	    user_id=`echo $user | sed 's/@users.sourceforge.net$//'`
ali@0
    91
	    if [ ! -r $cache_dir/sf/users/$user_id.html ]; then
ali@0
    92
		wget -N -O $cache_dir/sf/users/$user_id.html \
ali@0
    93
		  "http://sourceforge.net/users/$user_id/"
ali@0
    94
	    fi
ali@0
    95
	    if [ ! -r $cache_dir/users/$user_id.xhtml ]; then
ali@0
    96
		java -jar $tagsoup $cache_dir/sf/users/$user_id.html \
ali@0
    97
		  > $cache_dir/users/$user_id.xhtml
ali@0
    98
	    fi
ali@0
    99
	    echo "<xi:include href=\"$cache_dir/users/$user_id.xhtml\"/>" \
ali@0
   100
	      >> $tmpdir/users.3.xml
ali@0
   101
    esac
ali@0
   102
done
ali@0
   103
echo "<xi:include href=\"users.2.xml\"/>" >> $tmpdir/users.3.xml
ali@0
   104
echo '</sf_users>' >> $tmpdir/users.3.xml
ali@0
   105
xsltproc --xinclude user_names.xsl $tmpdir/users.3.xml > users.xml
ali@0
   106
ln -s $cache_dir/attachments $tmpdir
ali@0
   107
xsltproc bugzilla.xsl $input > artifacts.tmp.xml
ali@0
   108
xsltproc --xinclude bugzilla_post.xsl artifacts.tmp.xml > $output
ali@0
   109
#rm -rf $tmpdir artifacts.tmp.xml