各種SKK辞書パッケージを公開 (http://www.self-core.org/~atty/pub/skkdic/)
http://openlab.jp/skk/dic-ja.htmlで公開されている辞書をすべてipk化してみました。自動で~/.uimに設定を追加するようにしたかったのですが、まだIMKit-uimが複数辞書に対応していないので見送りました。これからも本家の更新にあわせて随時公開していきます。
はじめてPythonでマジメに書いてみたので、貼ってみる。
#!/usr/pkg/bin/python # vim: set tabstop=8 softtabstop=4 shiftwidth=4 expandtab smarttab: import os import time import shelve import logging import httplib import urllib import tarfile from email.Utils import parsedate from StringIO import StringIO from copy import copy DICT_SITE_HOST = "openlab.jp" DICT_SITE_DIR = "/skk/skk/dic/" FORCE_BUILD = True OUTPUT_DIR = '/home/atty/public_html/pub/skkdic/' IPKG_CONTROL_TEMPLATE = """\ Package: #{PACKAGE} Priority: optional Section: utils Version: #{VERSION} Architecture: all Maintainer: AGAWA KojiDepends: Description: SKK Dictionary (#{DICTIONARY}) This package provided #{DICTIONARY}. """ logger = logging.getLogger() handler = logging.StreamHandler() handler.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(name)s: %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) if os.sep != '/': tarfile.normpath = lambda path: path.replace(os.sep, '/') else: tarfile.normpath = lambda path: path def adddir(tar, dir): inf = tarfile.TarInfo() inf.name = dir inf.mode = tarfile.S_IFDIR | 0755; inf.uid, inf.gid = (500, 500) inf.size = 0 inf.mtime = time.mktime(time.localtime()) inf.type = tarfile.DIRTYPE inf.uname, inf.gname = ('zaurus', 'qpe') tar.addfile(inf) def dic_retrive(dic_name): log = logging.getLogger(dic_name + ' NET') conn = httplib.HTTPConnection(DICT_SITE_HOST) subdir = '' if dic_name == 'SKK-JISYO.zipcode' or dic_name == 'SKK-JISYO.office.zipcode': subdir = 'zipcode/' conn.request('HEAD', DICT_SITE_DIR + subdir + dic_name) res = conn.getresponse() if res.status != 200: log.warning('HEAD failed (%d %s)', res.status, res.reason) return None lastmod = parsedate(res.getheader('Last-Modified')) if not lastmod: log.warning("couldn't get Last-Modified time") return None if not FORCE_BUILD and cache.has_key(dic_name): if time.mktime(cache[dic_name]) >= time.mktime(lastmod): log.info('not modified') return None cache[dic_name] = lastmod dic_url = 'http://' + DICT_SITE_HOST + DICT_SITE_DIR + subdir + dic_name try: dic_file = urllib.urlopen(dic_url) except IOError: log.warning('GET failed (%s)', url) return None return (dic_file, int(res.getheader('Content-Length')), lastmod) def make_dic_ipkg(dic_name): log = logging.getLogger(dic_name) log.info('---- ' + dic_name) dic_file = dic_retrive(dic_name) if not dic_file: log.warning('retreive failed') return None tarinfo_template = tarfile.TarInfo() tarinfo_template.mode = tarfile.S_IFREG | 0644; tarinfo_template.uid, tarinfo_template.gid = (500, 500) tarinfo_template.mtime = time.mktime(time.localtime()) tarinfo_template.uname, tarinfo_template.gname = ('zaurus', 'qpe') ipk_name = 'skkdic-' + dic_name[10:].lower().replace('_', '-') \ .replace('.', '-') ipk_version = time.strftime('%Y%m%d-1', dic_file[2]) ipk_filename = ipk_name + '_' + ipk_version + '_all.ipk' ipk_data_file = StringIO() tar = tarfile.open('./data.tar.gz', 'w:gz', ipk_data_file) tar.Posix = True adddir(tar, '.') adddir(tar, './opt') adddir(tar, './opt/QtPalmtop') adddir(tar, './opt/QtPalmtop/share') adddir(tar, './opt/QtPalmtop/share/skk') inf = copy(tarinfo_template) inf.name = './opt/QtPalmtop/share/skk/' + dic_name inf.size = dic_file[1] inf.mtime = int(time.mktime(dic_file[2])) tar.addfile(inf, dic_file[0]) if dic_name == 'SKK-JISYO.edict': f = dic_retrive('edict_doc.txt') if not f: log.warning('retreive failed') return None inf.name = './opt/QtPalmtop/share/skk/edict_doc.txt' inf.size = f[1] inf.mtime = int(time.mktime(f[2])) tar.addfile(inf, f[0]) tar.close() cntl = IPKG_CONTROL_TEMPLATE.replace('#{PACKAGE}', ipk_name) cntl = cntl.replace('#{VERSION}', ipk_version) cntl = cntl.replace('#{DICTIONARY}', dic_name) control_file = StringIO(cntl) ipk_control_file = StringIO() tar = tarfile.open('./control.tar.gz', 'w:gz', ipk_control_file) tar.Posix = True adddir(tar, '.') inf = tarinfo_template inf.name = './control' inf.size = len(cntl) tar.addfile(inf, control_file) tar.close() ipk_debian_binary_file = StringIO('2.0\n') ipk_file = file(OUTPUT_DIR + ipk_filename, 'wb') tar = tarfile.open(ipk_filename, 'w:gz', ipk_file) tar.Posix = True inf.name = './debian-binary' inf.size = len(ipk_debian_binary_file.getvalue()) ipk_debian_binary_file.seek(0) tar.addfile(inf, ipk_debian_binary_file) inf.name = './data.tar.gz' inf.size = len(ipk_data_file.getvalue()) ipk_data_file.seek(0) tar.addfile(inf, ipk_data_file) inf.name = './control.tar.gz' inf.size = len(ipk_control_file.getvalue()) ipk_control_file.seek(0) tar.addfile(inf, ipk_control_file) tar.close() ipk_file.close() # main DIC_LIST = [ 'SKK-JISYO.S', 'SKK-JISYO.M', 'SKK-JISYO.ML', 'SKK-JISYO.L', 'SKK-JISYO.JIS2', 'SKK-JISYO.JIS3_4', 'SKK-JISYO.pubdic+', 'SKK-JISYO.edict', 'SKK-JISYO.geo', 'SKK-JISYO.zipcode', 'SKK-JISYO.office.zipcode', 'SKK-JISYO.assoc', 'SKK-JISYO.okinawa', 'SKK-JISYO.law', 'SKK-JISYO.jinmei', 'SKK-JISYO.china_taiwan', 'SKK-JISYO.mazegaki', ] cache = shelve.open('cache') for dic in DIC_LIST: make_dic_ipkg(dic) cache.close()
うーん、汚ない。
ファイルオブジェクトがよく抽象化されていて、テンポラリファイルを作る必要が無いのに感動した。