Pillow: ImageFont Default font does not handle unicode characters > 256

Created on 2 Oct 2017  路  7Comments  路  Source: python-pillow/Pillow

Here is file cluster.py

from PIL import ImageDraw,Image
from math import sqrt
import random

def readfile(filename):
    lines = [line for line in open(filename)]

  # First line is the column titles
    colnames = lines[0].strip().split('\t')[1:]
    rownames = []
    data = []
    for line in lines[1:]:
        p = line.strip().split('\t')
    # First column in each row is the rowname
        rownames.append(p[0])
    # The data for this row is the remainder of the row
        data.append([float(x) for x in p[1:]])
    return (rownames, colnames, data)


def pearson(v1, v2):
  # Simple sums
    sum1 = sum(v1)
    sum2 = sum(v2)

  # Sums of the squares
    sum1Sq = sum([pow(v, 2) for v in v1])
    sum2Sq = sum([pow(v, 2) for v in v2])

  # Sum of the products
    pSum = sum([v1[i] * v2[i] for i in range(len(v1))])

  # Calculate r (Pearson score)
    num = pSum - sum1 * sum2 / len(v1)
    den = sqrt((sum1Sq - pow(sum1, 2) / len(v1)) * (sum2Sq - pow(sum2, 2)
               / len(v1)))
    if den == 0:
        return 0
    return 1.0 - num / den

class bicluster:
    def __init__(
        self,
        vec,
        left=None,
        right=None,
        distance=0.0,
        id=None,
        ):
        self.left = left
        self.right = right
        self.vec = vec
        self.id = id
        self.distance = distance

def hcluster(rows, distance=pearson):
    distances = {}
    currentclustid = -1

  # Clusters are initially just the rows
    clust = [bicluster(rows[i], id = i) for i in range(len(rows))]

    while len(clust) > 1:
        lowestpair = (0, 1)
        closest = distance(clust[0].vec, clust[1].vec)

    # loop through every pair looking for the smallest distance
        for i in range(len(clust)):
            for j in range(i + 1, len(clust)):
        # distances is the cache of distance calculations
                if (clust[i].id, clust[j].id) not in distances:
                    distances[(clust[i].id, clust[j].id)] = \
                        distance(clust[i].vec, clust[j].vec)
                d = distances[(clust[i].id, clust[j].id)]

                if d < closest:
                    closest = d
                    lowestpair = (i, j)
    # calculate the average of the two clusters
        mergevec = [(clust[lowestpair[0]].vec[i] + clust[lowestpair[1]].vec[i])
                    / 2.0 for i in range(len(clust[0].vec))]

    # create the new cluster
        newcluster = bicluster(mergevec, left=clust[lowestpair[0]],
                               right=clust[lowestpair[1]], distance=closest,
                               id=currentclustid)

    # cluster ids that weren't in the original set are negative
        currentclustid -= 1
        del clust[lowestpair[1]]
        del clust[lowestpair[0]]
        clust.append(newcluster)

    return clust[0]


def printclust(clust, labels=None, n=0):
  # indent to make a hierarchy layout
    for i in range(n):
        print (' '),
    if clust.id < 0:
    # negative id means that this is branch
        print ('-')
    else:
    # positive id means that this is an endpoint
        if labels == None:
            print (clust.id)
        else:
            print (labels[clust.id])

  # now print the right and left branches
    if clust.left != None:
        printclust(clust.left, labels=labels, n=n + 1)
    if clust.right != None:
        printclust(clust.right, labels=labels, n=n + 1)
   # use the recursion to get the height of the given cluster
def getheight(clust):
  # Is this an endpoint? Then the height is just 1
    if clust.left == None and clust.right == None:
        return 1
  # Otherwise the height is the same of the heights of
  # each branch
    return getheight(clust.left) + getheight(clust.right)


def getdepth(clust):
  # The distance of an endpoint is 0.0
    if clust.left == None and clust.right == None:
        return 0

  # The distance of a branch is the greater of its two sides
  # plus its own distance
    return max(getdepth(clust.left), getdepth(clust.right)) + clust.distance


def drawdendrogram(clust, labels, jpeg='clusters.jpg'):
  # height and width
    h = getheight(clust) * 20
    w = 1200
    depth = getdepth(clust)

  # width is fixed, so scale distances accordingly
    scaling = float(w - 150) / depth

  # Create a new image with a white background
    img = Image.new('RGB', (w, h), (255, 255, 255))
    draw = ImageDraw.Draw(img)

    draw.line((0, h / 2, 10, h / 2), fill=(255, 0, 0))

  # Draw the first node
    drawnode(
        draw,
        clust,
        10,
        h / 2,
        scaling,
        labels = labels
        )
    img.save(jpeg, 'JPEG')


def drawnode(
    draw,
    clust,
    x,
    y,
    scaling,
    labels
    ):
    if clust.id < 0:
        h1 = getheight(clust.left) * 20
        h2 = getheight(clust.right) * 20
        top = y - (h1 + h2) / 2
        bottom = y + (h1 + h2) / 2
    # Line length
        ll = clust.distance * scaling
    # Vertical line from this cluster to children
        draw.line((x, top + h1 / 2, x, bottom - h2 / 2), fill=(255, 0, 0))

    # Horizontal line to left item
        draw.line((x, top + h1 / 2, x + ll, top + h1 / 2), fill=(255, 0, 0))

    # Horizontal line to right item
        draw.line((x, bottom - h2 / 2, x + ll, bottom - h2 / 2), fill=(255, 0,
                  0))

    # Call the function to draw the left and right nodes
        drawnode(
            draw,
            clust.left,
            x + ll,
            top + h1 / 2,
            scaling,
            labels
            )
        drawnode(
            draw,
            clust.right,
            x + ll,
            bottom - h2 / 2,
            scaling,
            labels
            )
    else:
    # If this is an endpoint, draw the item label
        draw.text((x + 5, y - 7), labels[clust.id], (0, 0, 0))

And here is file excute.py

import cluster
blognames, words, data = cluster.readfile('C:/Users/Bui/Documents/blogdata.txt')
cluts = cluster.hcluster(data)
cluster.drawdendrogram(cluts, blognames,jpeg= 'blogclust.jpg')

When run excute.py i got the following errors:

C:\Users\Bui\AppData\Local\Programs\Python\Python36-32\python.exe C:/Users/Bui/PycharmProjects/helloworld/excute.py
Traceback (most recent call last):
  File "C:\Users\Bui\AppData\Local\Programs\Python\Python36-32\lib\site-packages\PIL\ImageDraw.py", line 217, in text
    mask, offset = font.getmask2(text, self.fontmode, *args, **kwargs)
AttributeError: 'ImageFont' object has no attribute 'getmask2'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:/Users/Bui/PycharmProjects/helloworld/excute.py", line 4, in <module>
    cluster.drawdendrogram(cluts, blognames,jpeg= 'blogclust.jpg')
  File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 158, in drawdendrogram
    labels = labels
  File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
    labels
  File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
    labels
  File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
    labels
  File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 195, in drawnode
    labels
  File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
    labels
  File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
    labels
  File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
    labels
  File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 195, in drawnode
    labels
  File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
    labels
  File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
    labels
  File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
    labels
  [Previous line repeated 3 more times]
  File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 195, in drawnode
    labels
  File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 207, in drawnode
    draw.text((x + 5, y - 7), labels[clust.id], (0, 0, 0))
  File "C:\Users\Bui\AppData\Local\Programs\Python\Python36-32\lib\site-packages\PIL\ImageDraw.py", line 221, in text
    mask = font.getmask(text, self.fontmode, *args, **kwargs)
  File "C:\Users\Bui\AppData\Local\Programs\Python\Python36-32\lib\site-packages\PIL\ImageFont.py", line 113, in getmask
    return self.font.getmask(text, mode)
UnicodeEncodeError: 'latin-1' codec can't encode character '\u2013' in position 9: ordinal not in range(256)

Process finished with exit code 1

Please help me to fix it

All 7 comments

@scsmla You've got a lot of stuff going on there unrelated to Pillow.

Please read these and to come up with a minimal, complete, and verifiable example:

Thank you.

UnicodeEncodeError: 'latin-1' codec can't encode character '\u2013' in position 9: ordinal not in range(256)

You need to specify a font that can handle unicode. The default built-in bitmap font is latin-1 only, as it only has a potential for 256 characters.

Minimal repro:

from PIL import Image, ImageDraw
text = u'\u2013'
img = Image.new('RGB', (10,10))
draw = ImageDraw.Draw(img)
draw.text((0,0), text, "white")

Working:

from PIL import Image, ImageDraw, ImageFont
text = u'\u2013'
img = Image.new('RGB', (10,10))
draw = ImageDraw.Draw(img)
font = ImageFont.truetype('Tests/fonts/DejaVuSans.ttf')
draw.text((0,0), text, "white", font=font)

@wiredfool I tried the your code but it is still not working. Please check for me again

In what way is the code not working?

The same error as the previous edition of my code. In here I tried the font "arialbd.ttf"
Here is the cluster.py

# -*- coding: utf-8 -*-
from PIL import Image, ImageDraw, ImageFont


def readfile(filename):
    lines = [line for line in open(filename)]

    # First line is the column titles
    colnames = lines[0].strip().split('\t')[1:]
    rownames = []
    data = []
    for line in lines[1:]:
        p = line.strip().split('\t')
        # First column in each row is the rowname
        rownames.append(p[0])
        # The data for this row is the remainder of the row
        data.append([float(x) for x in p[1:]])
    return rownames, colnames, data


from math import sqrt


def pearson(v1, v2):
    # Simple sums
    sum1 = sum(v1)
    sum2 = sum(v2)

    # Sums of the squares
    sum1Sq = sum([pow(v, 2) for v in v1])
    sum2Sq = sum([pow(v, 2) for v in v2])

    # Sum of the products
    pSum = sum([v1[i] * v2[i] for i in range(len(v1))])

    # Calculate r (Pearson score)
    num = pSum - (sum1 * sum2 / len(v1))
    den = sqrt((sum1Sq - pow(sum1, 2) / len(v1)) * (sum2Sq - pow(sum2, 2) / len(v1)))
    if den == 0: return 0

    return 1.0 - num / den


class bicluster:
    def __init__(self, vec, left=None, right=None, distance=0.0, id=None):
        self.left = left
        self.right = right
        self.vec = vec
        self.id = id
        self.distance = distance


def hcluster(rows, distance=pearson):
    distances = {}
    currentclustid = -1

    # Clusters are initially just the rows
    clust = [bicluster(rows[i], id=i) for i in range(len(rows))]

    while len(clust) > 1:
        lowestpair = (0, 1)
        closest = distance(clust[0].vec, clust[1].vec)

        # loop through every pair looking for the smallest distance
        for i in range(len(clust)):
            for j in range(i + 1, len(clust)):
                # distances is the cache of distance calculations
                if (clust[i].id, clust[j].id) not in distances:
                    distances[(clust[i].id, clust[j].id)] = distance(clust[i].vec, clust[j].vec)

                d = distances[(clust[i].id, clust[j].id)]

                if d < closest:
                    closest = d
                    lowestpair = (i, j)

        # calculate the average of the two clusters
        mergevec = [
            (clust[lowestpair[0]].vec[i] + clust[lowestpair[1]].vec[i]) / 2.0
            for i in range(len(clust[0].vec))]

        # create the new cluster
        newcluster = bicluster(mergevec, left=clust[lowestpair[0]],
                               right=clust[lowestpair[1]],
                               distance=closest, id=currentclustid)

        # cluster ids that weren't in the original set are negative
        currentclustid -= 1
        del clust[lowestpair[1]]
        del clust[lowestpair[0]]
        clust.append(newcluster)

    return clust[0]


def printclust(clust, labels=None, n=0):
    # indent to make a hierarchy layout
    for i in range(n):
        print(' ')
    if clust.id < 0:
        # negative id means that this is branch
        print('-')
    else:
        # positive id means that this is an endpoint
        if labels == None:
            print(clust.id)
        else:
            print(labels[clust.id])

    # now print the right and left branches
    if clust.left != None: printclust(clust.left, labels=labels, n=n + 1)
    if clust.right != None: printclust(clust.right, labels=labels, n=n + 1)


def getheight(clust):
    # Is this an endpoint? Then the height is just 1
    if clust.left == None and clust.right == None:
        return 1

    # Otherwise the height is the same of the heights of
    # each branch
    return getheight(clust.left) + getheight(clust.right)


def getdepth(clust):
    # The distance of an endpoint is 0.0
    if clust.left == None and clust.right == None:
        return 0

    # The distance of a branch is the greater of its two sides
    # plus its own distance
    return max(getdepth(clust.left), getdepth(clust.right)) + clust.distance


def drawdendrogram(clust, labels, jpeg='clusters.jpg'):
    # height and width
    h = getheight(clust) * 20
    w = 1200
    depth = getdepth(clust)

    # width is fixed, so scale distances accordingly
    scaling = float(w - 150) / depth

    # Create a new image with a white background
    img = Image.new('RGB', (10,10))
    draw = ImageDraw.Draw(img)
    text = u'\u2013'
    font = ImageFont.truetype('arialbd.ttf')
    draw.text((0,0),text,"white",font=font)

    draw.line((0, h / 2, 10, h / 2), fill=(255, 0, 0))

    # Draw the first node
    drawnode(draw, clust, 10, (h / 2), scaling, labels)
    img.save(jpeg, 'JPEG')


def drawnode(draw, clust, x, y, scaling, labels):
    if clust.id < 0:
        h1 = getheight(clust.left) * 20
        h2 = getheight(clust.right) * 20
        top = y - (h1 + h2) / 2
        bottom = y + (h1 + h2) / 2
        # Line length
        ll = clust.distance * scaling
        # Vertical line from this cluster to children
        draw.line((x, top + h1 / 2, x, bottom - h2 / 2), fill=(255, 0, 0))

        # Horizontal line to left item
        draw.line((x, top + h1 / 2, x + ll, top + h1 / 2), fill=(255, 0, 0))

        # Horizontal line to right item
        draw.line((x, bottom - h2 / 2, x + ll, bottom - h2 / 2), fill=(255, 0, 0))

        # Call the function to draw the left and right nodes
        drawnode(draw, clust.left, x + ll, top + h1 / 2, scaling, labels)
        drawnode(draw, clust.right, x + ll, bottom - h2 / 2, scaling, labels)
    else:
        # If this is an endpoint, draw the item label
        draw.text((x + 5, y - 7), labels[clust.id], (0, 0, 0))

Nothing changes in excute.py file

@scsmla: Once again, you've pasted 150 lines of code without isolating the part that's going wrong or the exception that it raises.

The core issue that you were having can be condensed down to the 4 or 5 line repro that I posted, which attempts to render one character from your file onto a small image. It fails on the default font, it passes with a truetype font. Pasting that directly into your larger script just obscures any error that you might have, and if it does work, will render only that one character on a small canvas.

So, from this, I can guess at two possibilities:

1) You aren't getting the font loaded and you're effectively passing None in as the font, which still falls back to the default font
2) You've actually got a different error.

UnicodeEncodeError: 'latin-1' codec can't encode character '\u2013' in position 9: ordinal not in range(256)

You need to specify a font that can handle unicode. The default built-in bitmap font is latin-1 only, as it only has a potential for 256 characters.

Minimal repro:

from PIL import Image, ImageDraw
text = u'\u2013'
img = Image.new('RGB', (10,10))
draw = ImageDraw.Draw(img)
draw.text((0,0), text, "white")

Working:

from PIL import Image, ImageDraw, ImageFont
text = u'\u2013'
img = Image.new('RGB', (10,10))
draw = ImageDraw.Draw(img)
font = ImageFont.truetype('Tests/fonts/DejaVuSans.ttf')
draw.text((0,0), text, "white", font=font)

works for me

Was this page helpful?
0 / 5 - 0 ratings

Related issues

damianmoore picture damianmoore  路  4Comments

hxzhao527 picture hxzhao527  路  4Comments

steph-ben picture steph-ben  路  4Comments

vytisb picture vytisb  路  4Comments

thinrhino picture thinrhino  路  3Comments