Source code for apps.utils.docxhelper

# -*- coding: utf-8 -*-
"""
This module contains classes and functions for generating
DocX documents from HTML used for exporting reports.

:subtitle:`Function definitions:`
"""
from datetime import datetime
import cStringIO
from django.http import HttpResponse
from docx import paragraph, relationshiplist, nsprefixes, newdocument,\
    coreproperties, savedocx, contenttypes, appproperties, websettings,\
    wordrelationships
from HTMLParser import HTMLParser
from django.template import loader


[docs]class HTMLToDocXParser(HTMLParser): ''' Class for parsing HTML to Docx ''' # accepted styles styles = ('b', 'i', 'u') current_style = '' current_data = '' body = None current_paragraph = []
[docs] def set_body(self, body): """ Initalize the DocX document by setting the body Args: - body: the DocX body """ self.body = body self.current_paragraph = [] self.current_data = '' self.current_style = ''
[docs] def handle_starttag(self, tag, attrs): """ Handles a starttag in the HTML. Converts a predefined list of tags to the corresponding DocX definitions. Args: - tag: the name of the HTML tag - attr: optional attrs coupled to the HTML tag """ if tag in self.styles and tag not in self.current_style: if self.current_data not in (None, ''): self.current_paragraph.append( (self.current_data, str(self.current_style))) self.current_data = '' self.current_style = self.current_style + tag if tag == 'p' and self.current_paragraph != []: # pragma: no cover self.current_paragraph.append( (self.current_data, str(self.current_style))) self.body.append(paragraph(self.current_paragraph)) self.current_paragraph = []
[docs] def handle_endtag(self, tag): """ Handles a endtag in the HTML. Converts a predefined list of tags to the corresponding DocX definitions: Args: - tag: the name of the HTML tag """ if tag == 'br': self.current_data = self.current_data + '\n' if tag == 'p': self.open_paragraph = False if self.current_paragraph != []: self.current_paragraph.append( (self.current_data, str(self.current_style))) self.body.append(paragraph(self.current_paragraph)) self.current_paragraph = [] else: if self.current_data not in (None, ''): self.current_paragraph.append( (self.current_data, str(self.current_style))) self.body.append(paragraph(self.current_paragraph)) self.current_paragraph = [] self.current_data = '' if tag in self.styles and tag in self.current_style: self.current_paragraph.append( (self.current_data, str(self.current_style))) self.current_data = '' self.current_style = self.current_style.replace(tag, '')
[docs] def handle_data(self, data): """ Adds text to the DocX document Args: - data: the data/text to add to the document """ self.current_data = self.current_data + data
[docs] def handle_entityref(self, name): """ Handle special HTML entities Args: - name: the name of the HTML entity """ if name == 'nbsp': data = ' ' else: data = self.unescape('&' + name + ';') self.current_data += data
[docs]def convertHtmlToDocX(sourceHtml): ''' Wrapper function for converting HTML to DocX Args: - sourceHtml: the HTML to convert Returns: A response instance with the DocX document included ''' resultFile = cStringIO.StringIO() relationships = relationshiplist() document = newdocument() body = document.xpath('/w:document/w:body', namespaces=nsprefixes)[0] parser = HTMLToDocXParser() parser.set_body(body) parser.feed(sourceHtml) title = 'Remote care DocX export' subject = '' creator = 'Remote Care' keywords = [''] coreprops = coreproperties(title=title, subject=subject, creator=creator, keywords=keywords) savedocx( document, coreprops, appproperties(), contenttypes(), websettings(), wordrelationships(relationships), resultFile ) response = HttpResponse( resultFile.getvalue(), content_type='application/docx' ) response['Content-Disposition'] =\ 'attachment; filename="remote_care_export.docx"' return response # HttpResponse(sourceHtml)
[docs]def render_to_DocX(request, body): ''' Shortcut function for rendering the HTML template and create DocX Args: - request: the initial request - body: the body with the text to be exported Returns: A response instance with the DocX document included ''' template_name = 'utils/export/default_docx.html' template = loader.get_template(template_name) context = { 'username': request.user.full_name, 'date': datetime.today().strftime("%d %B %Y %H:%M:%S"), 'body': body} return convertHtmlToDocX(template.render(context, request))