各種SKK辞書パッケージを公開 (http://www.self-core.org/~atty/pub/skkdic/)

http://openlab.jp/skk/dic-ja.htmlで公開されている辞書をすべてipk化してみました。自動で~/.uimに設定を追加するようにしたかったのですが、まだIMKit-uimが複数辞書に対応していないので見送りました。これからも本家の更新にあわせて随時公開していきます。

はじめてPythonでマジメに書いてみたので、貼ってみる。

#!/usr/pkg/bin/python
# vim: set tabstop=8 softtabstop=4 shiftwidth=4 expandtab smarttab:

import os
import time
import shelve
import logging
import httplib
import urllib
import tarfile
from email.Utils import parsedate
from StringIO import StringIO
from copy import copy

DICT_SITE_HOST = "openlab.jp"
DICT_SITE_DIR = "/skk/skk/dic/"
FORCE_BUILD = True
OUTPUT_DIR = '/home/atty/public_html/pub/skkdic/'

IPKG_CONTROL_TEMPLATE = """\
Package: #{PACKAGE}
Priority: optional
Section: utils
Version: #{VERSION}
Architecture: all
Maintainer: AGAWA Koji 
Depends:
Description: SKK Dictionary (#{DICTIONARY})
 This package provided #{DICTIONARY}.
"""

logger = logging.getLogger()
handler = logging.StreamHandler()
handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(name)s: %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)

if os.sep != '/':
    tarfile.normpath = lambda path: path.replace(os.sep, '/')
else:
    tarfile.normpath = lambda path: path

def adddir(tar, dir):
    inf = tarfile.TarInfo()
    inf.name = dir
    inf.mode = tarfile.S_IFDIR | 0755;
    inf.uid, inf.gid = (500, 500)
    inf.size = 0
    inf.mtime = time.mktime(time.localtime())
    inf.type = tarfile.DIRTYPE
    inf.uname, inf.gname = ('zaurus', 'qpe')

    tar.addfile(inf)

def dic_retrive(dic_name):
    log = logging.getLogger(dic_name + ' NET')
    conn = httplib.HTTPConnection(DICT_SITE_HOST)
    subdir = ''
    if dic_name == 'SKK-JISYO.zipcode' or dic_name == 'SKK-JISYO.office.zipcode':
        subdir = 'zipcode/'
    conn.request('HEAD', DICT_SITE_DIR + subdir + dic_name)
    res = conn.getresponse()
    if res.status != 200:
        log.warning('HEAD failed (%d %s)', res.status, res.reason)
        return None

    lastmod = parsedate(res.getheader('Last-Modified'))
    if not lastmod:
        log.warning("couldn't get Last-Modified time")
        return None

    if not FORCE_BUILD and cache.has_key(dic_name):
        if time.mktime(cache[dic_name]) >= time.mktime(lastmod):
            log.info('not modified')
            return None
    cache[dic_name] = lastmod

    dic_url = 'http://' + DICT_SITE_HOST + DICT_SITE_DIR + subdir + dic_name
    try:
        dic_file = urllib.urlopen(dic_url)
    except IOError:
        log.warning('GET failed (%s)', url)
        return None

    return (dic_file, int(res.getheader('Content-Length')), lastmod)

def make_dic_ipkg(dic_name):
    
    log = logging.getLogger(dic_name)

    log.info('---- ' + dic_name)
    dic_file = dic_retrive(dic_name)
    if not dic_file:
        log.warning('retreive failed')
        return None

    tarinfo_template = tarfile.TarInfo()
    tarinfo_template.mode = tarfile.S_IFREG | 0644;
    tarinfo_template.uid, tarinfo_template.gid = (500, 500)
    tarinfo_template.mtime = time.mktime(time.localtime())
    tarinfo_template.uname, tarinfo_template.gname = ('zaurus', 'qpe')

    ipk_name = 'skkdic-' + dic_name[10:].lower().replace('_', '-') \
            .replace('.', '-')
    ipk_version = time.strftime('%Y%m%d-1', dic_file[2])
    ipk_filename = ipk_name + '_' + ipk_version + '_all.ipk'

    ipk_data_file = StringIO()
    tar = tarfile.open('./data.tar.gz', 'w:gz', ipk_data_file)
    tar.Posix = True
    adddir(tar, '.')
    adddir(tar, './opt')
    adddir(tar, './opt/QtPalmtop')
    adddir(tar, './opt/QtPalmtop/share')
    adddir(tar, './opt/QtPalmtop/share/skk')
    inf = copy(tarinfo_template)
    inf.name = './opt/QtPalmtop/share/skk/' + dic_name
    inf.size = dic_file[1]
    inf.mtime = int(time.mktime(dic_file[2]))
    tar.addfile(inf, dic_file[0])
    if dic_name == 'SKK-JISYO.edict':
        f = dic_retrive('edict_doc.txt')
        if not f:
            log.warning('retreive failed')
            return None
        inf.name = './opt/QtPalmtop/share/skk/edict_doc.txt'
        inf.size = f[1]
        inf.mtime = int(time.mktime(f[2]))
        tar.addfile(inf, f[0])
    tar.close()

    cntl = IPKG_CONTROL_TEMPLATE.replace('#{PACKAGE}', ipk_name)
    cntl = cntl.replace('#{VERSION}', ipk_version)
    cntl = cntl.replace('#{DICTIONARY}', dic_name)
    control_file = StringIO(cntl)
    ipk_control_file = StringIO()
    tar = tarfile.open('./control.tar.gz', 'w:gz', ipk_control_file)
    tar.Posix = True
    adddir(tar, '.')
    inf = tarinfo_template
    inf.name = './control'
    inf.size = len(cntl)
    tar.addfile(inf, control_file)
    tar.close()

    ipk_debian_binary_file = StringIO('2.0\n')

    ipk_file = file(OUTPUT_DIR + ipk_filename, 'wb')
    tar = tarfile.open(ipk_filename, 'w:gz', ipk_file)
    tar.Posix = True

    inf.name = './debian-binary'
    inf.size = len(ipk_debian_binary_file.getvalue())
    ipk_debian_binary_file.seek(0)
    tar.addfile(inf, ipk_debian_binary_file)

    inf.name = './data.tar.gz'
    inf.size = len(ipk_data_file.getvalue())
    ipk_data_file.seek(0)
    tar.addfile(inf, ipk_data_file)

    inf.name = './control.tar.gz'
    inf.size = len(ipk_control_file.getvalue())
    ipk_control_file.seek(0)
    tar.addfile(inf, ipk_control_file)

    tar.close()
    ipk_file.close()

# main
DIC_LIST = [
    'SKK-JISYO.S',
    'SKK-JISYO.M',
    'SKK-JISYO.ML',
    'SKK-JISYO.L',
    'SKK-JISYO.JIS2',
    'SKK-JISYO.JIS3_4',
    'SKK-JISYO.pubdic+',
    'SKK-JISYO.edict',
    'SKK-JISYO.geo',
    'SKK-JISYO.zipcode',
    'SKK-JISYO.office.zipcode',
    'SKK-JISYO.assoc',
    'SKK-JISYO.okinawa',
    'SKK-JISYO.law',
    'SKK-JISYO.jinmei',
    'SKK-JISYO.china_taiwan',
    'SKK-JISYO.mazegaki',
]

cache = shelve.open('cache')

for dic in DIC_LIST:
    make_dic_ipkg(dic)

cache.close()

うーん、汚ない。

ファイルオブジェクトがよく抽象化されていて、テンポラリファイルを作る必要が無いのに感動した。