User:Wmrwiki/导入xml文件/py
< User:Wmrwiki | 导入xml文件
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from xml.dom.minidom import *
import datetime
def create():
doc = Document()
mwtag = doc.createElement('mediawiki')
mwtag.setAttribute('xmlns', 'http://www.mediawiki.org/xml/export-0.3/')
mwtag.setAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
mwtag.setAttribute('xsi:schemaLocation',
'http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd')
mwtag.setAttribute('version', '0.3')
mwtag.setAttribute('xml:lang', 'zh')
doc.appendChild(mwtag)
return doc
def append(doc, title, contributor, text, time=datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')):
page = doc.createElement('page')
tt = doc.createElement('title')
tt.appendChild(doc.createTextNode(title))
page.appendChild(tt)
rev = doc.createElement('revision')
ts = doc.createElement('timestamp')
ts.appendChild(doc.createTextNode(time))
rev.appendChild(ts)
cont = doc.createElement('contributor')
un = doc.createElement('username')
un.appendChild(doc.createTextNode(contributor))
cont.appendChild(un)
rev.appendChild(cont)
txt = doc.createElement('text')
txt.appendChild(doc.createTextNode(text))
rev.appendChild(txt)
page.appendChild(rev)
doc.getElementsByTagName('mediawiki')[0].appendChild(page)
def write(doc, fn, encoding='utf8'):
f = open(fn, 'w')
f.write(doc.toxml(encoding))
f.close()
def main():
doc = create()
append(doc, u'Wikisource:沙盒’', u'Liangent', u'新的<>测\n试')
append(doc, u'Wikisource:沙盒’', u'Liangent', u'新的<>测\n试')
write(doc, 'to_import.xml')
if __name__ == '__main__':
main()