1 #!/usr/bin/env python
   2 """
   3     12_to_13.py - migration from < moin--main--patch-248 to >= patch 249
   4     * convert event-log from iso8859-1 to config.charset (utf-8) encoding
   5 
   6     Steps for a successful migration to utf-8:
   7         1. stop your wiki and make a backup
   8         2. make a copy of the wiki's "data" directory to your working dir
   9         3. clean up your working copy of the data dir:
  10             a. if you use CVS or GNU arch remove stuff like CVS/, .cvsignore
  11                or .arch-ids/ etc.
  12             b. remove *.pickle (used by moin for caching some information,
  13                will be re-created automatically), especially:
  14                    I. data/user/userdict.pickle
  15                    II. data/dicts.pickle
  16             c. if you used symlinks in data/text or elsewhere, remove them
  17         4. make sure that from_encoding and to_encoding matches your needs (see
  18            beginning of script below and config.charset in moin_config.py) and
  19            run python2.3 12_to_13_mig6.py from your working dir
  20         5. if there was no error, you will find:
  21             data.pre-mig6 (the script renames your data directory copy to that name)
  22             data (result, converted to utf-8)
  23         6. verify conversion results (number of pages, size of logs, attachments,
  24            number of backup copies) - everything should be reasonable before
  25            you proceed. Usually the file size gets larger when converting from
  26            iso8859-1 (or other non-unicode charset) to utf-8 except if your
  27            content is ASCII-only, then it will keep its size.
  28         7. copy additional files from data.pre-mig6 to data (maybe intermaps, logs,
  29            etc.). Be aware that the file contents AND file names of wiki content
  30            may have changed, so DO NOT copy the cache/ directory, but let
  31            the wiki recreate it.
  32         8. replace the data directory your wiki uses with the data directory
  33            you created by previous steps. DO NOT simply copy the converted stuff
  34            into the original or you will duplicate pages and create chaos!
  35         9. test it. if something has gone wrong, you still have your backup.
  36 
  37 
  38         10. if you use dictionaries for spellchecking, you have to convert them
  39             to config.charset, too. Remove your dict.cache before re-starting
  40             your wiki.
  41 
  42     @copyright: 2004 Thomas Waldmann
  43     @license: GPL, see COPYING for details
  44 """
  45 
  46 #from_encoding = 'iso8859-1'
  47 from_encoding = 'utf-8'
  48 to_encoding = 'utf-8'
  49 
  50 import os.path, sys, shutil, urllib
  51 
  52 sys.path.insert(0, '../../..')
  53 from MoinMoin import wikiutil
  54 
  55 from migutil import opj, listdir, copy_file, copy_dir
  56 
  57 errorcount = 0
  58 
  59 def convert_string(str, enc_from, enc_to):
  60     #print str
  61     global errorcount
  62     try:
  63         return str.decode(enc_from).encode(enc_to)
  64     except:
  65         #print sys.exc_info()
  66         errorcount +=1
  67         return "ERROR"
  68     #return str.decode(enc_from).encode(enc_to)
  69 
  70 def convert_eventlog(fname_from, fname_to, enc_from, enc_to):
  71     print "%s -> %s" % (fname_from, fname_to)
  72     file_from = open(fname_from)
  73     file_to = open(fname_to, "w")
  74 
  75     for line in file_from:
  76         line = line.replace('\r','')
  77         line = line.replace('\n','')
  78         fields = line.split('\t')
  79         kvpairs = fields[2]
  80         kvpairs = kvpairs.split('&')
  81         kvlist = []
  82         for kvpair in kvpairs:
  83             key, val = kvpair.split('=')
  84             key = urllib.unquote(key)
  85             val = urllib.unquote(val)
  86             key = convert_string(key, enc_from, enc_to)
  87             val = convert_string(val, enc_from, enc_to)
  88             if("ERROR"==key or "ERROR"==val):
  89                 # setp by setp find error log line..
  90                 # not log in new format log file..
  91                 pass
  92             else:
  93                 key = urllib.quote(key)
  94                 val = urllib.quote(val)
  95                 kvlist.append("%s=%s" % (key,val))
  96             #key = urllib.quote(key)
  97             #val = urllib.quote(val)
  98             #kvlist.append("%s=%s" % (key,val))
  99         fields[2] = '&'.join(kvlist)
 100         line = '\t'.join(fields) + '\n'
 101         file_to.write(line)
 102 
 103     file_to.close()
 104     file_from.close()
 105     st=os.stat(fname_from)
 106     os.utime(fname_to, (st.st_atime,st.st_mtime))
 107 
 108 
 109 origdir = 'data.pre-mig6'
 110 
 111 try:
 112     os.rename('data', origdir)
 113     pass
 114 except OSError:
 115     print "You need to be in the directory where your copy of the 'data' directory is located."
 116     sys.exit(1)
 117 
 118 copy_dir(origdir, 'data')
 119 os.remove(opj('data','event-log')) # old format
 120 convert_eventlog(opj(origdir, 'event-log'), opj('data', 'event-log'), from_encoding, to_encoding)
 121 #Zoomq::050205 add for jump out Unicode error skip log that bad line
 122 print "mig6 finished but maybe there is %d lines log not transition for some unicode ERROR!!"%errorcount