Here is file cluster.py
from PIL import ImageDraw,Image
from math import sqrt
import random
def readfile(filename):
lines = [line for line in open(filename)]
# First line is the column titles
colnames = lines[0].strip().split('\t')[1:]
rownames = []
data = []
for line in lines[1:]:
p = line.strip().split('\t')
# First column in each row is the rowname
rownames.append(p[0])
# The data for this row is the remainder of the row
data.append([float(x) for x in p[1:]])
return (rownames, colnames, data)
def pearson(v1, v2):
# Simple sums
sum1 = sum(v1)
sum2 = sum(v2)
# Sums of the squares
sum1Sq = sum([pow(v, 2) for v in v1])
sum2Sq = sum([pow(v, 2) for v in v2])
# Sum of the products
pSum = sum([v1[i] * v2[i] for i in range(len(v1))])
# Calculate r (Pearson score)
num = pSum - sum1 * sum2 / len(v1)
den = sqrt((sum1Sq - pow(sum1, 2) / len(v1)) * (sum2Sq - pow(sum2, 2)
/ len(v1)))
if den == 0:
return 0
return 1.0 - num / den
class bicluster:
def __init__(
self,
vec,
left=None,
right=None,
distance=0.0,
id=None,
):
self.left = left
self.right = right
self.vec = vec
self.id = id
self.distance = distance
def hcluster(rows, distance=pearson):
distances = {}
currentclustid = -1
# Clusters are initially just the rows
clust = [bicluster(rows[i], id = i) for i in range(len(rows))]
while len(clust) > 1:
lowestpair = (0, 1)
closest = distance(clust[0].vec, clust[1].vec)
# loop through every pair looking for the smallest distance
for i in range(len(clust)):
for j in range(i + 1, len(clust)):
# distances is the cache of distance calculations
if (clust[i].id, clust[j].id) not in distances:
distances[(clust[i].id, clust[j].id)] = \
distance(clust[i].vec, clust[j].vec)
d = distances[(clust[i].id, clust[j].id)]
if d < closest:
closest = d
lowestpair = (i, j)
# calculate the average of the two clusters
mergevec = [(clust[lowestpair[0]].vec[i] + clust[lowestpair[1]].vec[i])
/ 2.0 for i in range(len(clust[0].vec))]
# create the new cluster
newcluster = bicluster(mergevec, left=clust[lowestpair[0]],
right=clust[lowestpair[1]], distance=closest,
id=currentclustid)
# cluster ids that weren't in the original set are negative
currentclustid -= 1
del clust[lowestpair[1]]
del clust[lowestpair[0]]
clust.append(newcluster)
return clust[0]
def printclust(clust, labels=None, n=0):
# indent to make a hierarchy layout
for i in range(n):
print (' '),
if clust.id < 0:
# negative id means that this is branch
print ('-')
else:
# positive id means that this is an endpoint
if labels == None:
print (clust.id)
else:
print (labels[clust.id])
# now print the right and left branches
if clust.left != None:
printclust(clust.left, labels=labels, n=n + 1)
if clust.right != None:
printclust(clust.right, labels=labels, n=n + 1)
# use the recursion to get the height of the given cluster
def getheight(clust):
# Is this an endpoint? Then the height is just 1
if clust.left == None and clust.right == None:
return 1
# Otherwise the height is the same of the heights of
# each branch
return getheight(clust.left) + getheight(clust.right)
def getdepth(clust):
# The distance of an endpoint is 0.0
if clust.left == None and clust.right == None:
return 0
# The distance of a branch is the greater of its two sides
# plus its own distance
return max(getdepth(clust.left), getdepth(clust.right)) + clust.distance
def drawdendrogram(clust, labels, jpeg='clusters.jpg'):
# height and width
h = getheight(clust) * 20
w = 1200
depth = getdepth(clust)
# width is fixed, so scale distances accordingly
scaling = float(w - 150) / depth
# Create a new image with a white background
img = Image.new('RGB', (w, h), (255, 255, 255))
draw = ImageDraw.Draw(img)
draw.line((0, h / 2, 10, h / 2), fill=(255, 0, 0))
# Draw the first node
drawnode(
draw,
clust,
10,
h / 2,
scaling,
labels = labels
)
img.save(jpeg, 'JPEG')
def drawnode(
draw,
clust,
x,
y,
scaling,
labels
):
if clust.id < 0:
h1 = getheight(clust.left) * 20
h2 = getheight(clust.right) * 20
top = y - (h1 + h2) / 2
bottom = y + (h1 + h2) / 2
# Line length
ll = clust.distance * scaling
# Vertical line from this cluster to children
draw.line((x, top + h1 / 2, x, bottom - h2 / 2), fill=(255, 0, 0))
# Horizontal line to left item
draw.line((x, top + h1 / 2, x + ll, top + h1 / 2), fill=(255, 0, 0))
# Horizontal line to right item
draw.line((x, bottom - h2 / 2, x + ll, bottom - h2 / 2), fill=(255, 0,
0))
# Call the function to draw the left and right nodes
drawnode(
draw,
clust.left,
x + ll,
top + h1 / 2,
scaling,
labels
)
drawnode(
draw,
clust.right,
x + ll,
bottom - h2 / 2,
scaling,
labels
)
else:
# If this is an endpoint, draw the item label
draw.text((x + 5, y - 7), labels[clust.id], (0, 0, 0))
And here is file excute.py
import cluster
blognames, words, data = cluster.readfile('C:/Users/Bui/Documents/blogdata.txt')
cluts = cluster.hcluster(data)
cluster.drawdendrogram(cluts, blognames,jpeg= 'blogclust.jpg')
When run excute.py i got the following errors:
C:\Users\Bui\AppData\Local\Programs\Python\Python36-32\python.exe C:/Users/Bui/PycharmProjects/helloworld/excute.py
Traceback (most recent call last):
File "C:\Users\Bui\AppData\Local\Programs\Python\Python36-32\lib\site-packages\PIL\ImageDraw.py", line 217, in text
mask, offset = font.getmask2(text, self.fontmode, *args, **kwargs)
AttributeError: 'ImageFont' object has no attribute 'getmask2'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/Bui/PycharmProjects/helloworld/excute.py", line 4, in <module>
cluster.drawdendrogram(cluts, blognames,jpeg= 'blogclust.jpg')
File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 158, in drawdendrogram
labels = labels
File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
labels
File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
labels
File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
labels
File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 195, in drawnode
labels
File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
labels
File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
labels
File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
labels
File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 195, in drawnode
labels
File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
labels
File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
labels
File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 203, in drawnode
labels
[Previous line repeated 3 more times]
File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 195, in drawnode
labels
File "C:\Users\Bui\PycharmProjects\helloworld\cluster.py", line 207, in drawnode
draw.text((x + 5, y - 7), labels[clust.id], (0, 0, 0))
File "C:\Users\Bui\AppData\Local\Programs\Python\Python36-32\lib\site-packages\PIL\ImageDraw.py", line 221, in text
mask = font.getmask(text, self.fontmode, *args, **kwargs)
File "C:\Users\Bui\AppData\Local\Programs\Python\Python36-32\lib\site-packages\PIL\ImageFont.py", line 113, in getmask
return self.font.getmask(text, mode)
UnicodeEncodeError: 'latin-1' codec can't encode character '\u2013' in position 9: ordinal not in range(256)
Process finished with exit code 1
Please help me to fix it
@scsmla You've got a lot of stuff going on there unrelated to Pillow.
Please read these and to come up with a minimal, complete, and verifiable example:
Thank you.
UnicodeEncodeError: 'latin-1' codec can't encode character '\u2013' in position 9: ordinal not in range(256)
You need to specify a font that can handle unicode. The default built-in bitmap font is latin-1 only, as it only has a potential for 256 characters.
Minimal repro:
from PIL import Image, ImageDraw
text = u'\u2013'
img = Image.new('RGB', (10,10))
draw = ImageDraw.Draw(img)
draw.text((0,0), text, "white")
Working:
from PIL import Image, ImageDraw, ImageFont
text = u'\u2013'
img = Image.new('RGB', (10,10))
draw = ImageDraw.Draw(img)
font = ImageFont.truetype('Tests/fonts/DejaVuSans.ttf')
draw.text((0,0), text, "white", font=font)
@wiredfool I tried the your code but it is still not working. Please check for me again
In what way is the code not working?
The same error as the previous edition of my code. In here I tried the font "arialbd.ttf"
Here is the cluster.py
# -*- coding: utf-8 -*-
from PIL import Image, ImageDraw, ImageFont
def readfile(filename):
lines = [line for line in open(filename)]
# First line is the column titles
colnames = lines[0].strip().split('\t')[1:]
rownames = []
data = []
for line in lines[1:]:
p = line.strip().split('\t')
# First column in each row is the rowname
rownames.append(p[0])
# The data for this row is the remainder of the row
data.append([float(x) for x in p[1:]])
return rownames, colnames, data
from math import sqrt
def pearson(v1, v2):
# Simple sums
sum1 = sum(v1)
sum2 = sum(v2)
# Sums of the squares
sum1Sq = sum([pow(v, 2) for v in v1])
sum2Sq = sum([pow(v, 2) for v in v2])
# Sum of the products
pSum = sum([v1[i] * v2[i] for i in range(len(v1))])
# Calculate r (Pearson score)
num = pSum - (sum1 * sum2 / len(v1))
den = sqrt((sum1Sq - pow(sum1, 2) / len(v1)) * (sum2Sq - pow(sum2, 2) / len(v1)))
if den == 0: return 0
return 1.0 - num / den
class bicluster:
def __init__(self, vec, left=None, right=None, distance=0.0, id=None):
self.left = left
self.right = right
self.vec = vec
self.id = id
self.distance = distance
def hcluster(rows, distance=pearson):
distances = {}
currentclustid = -1
# Clusters are initially just the rows
clust = [bicluster(rows[i], id=i) for i in range(len(rows))]
while len(clust) > 1:
lowestpair = (0, 1)
closest = distance(clust[0].vec, clust[1].vec)
# loop through every pair looking for the smallest distance
for i in range(len(clust)):
for j in range(i + 1, len(clust)):
# distances is the cache of distance calculations
if (clust[i].id, clust[j].id) not in distances:
distances[(clust[i].id, clust[j].id)] = distance(clust[i].vec, clust[j].vec)
d = distances[(clust[i].id, clust[j].id)]
if d < closest:
closest = d
lowestpair = (i, j)
# calculate the average of the two clusters
mergevec = [
(clust[lowestpair[0]].vec[i] + clust[lowestpair[1]].vec[i]) / 2.0
for i in range(len(clust[0].vec))]
# create the new cluster
newcluster = bicluster(mergevec, left=clust[lowestpair[0]],
right=clust[lowestpair[1]],
distance=closest, id=currentclustid)
# cluster ids that weren't in the original set are negative
currentclustid -= 1
del clust[lowestpair[1]]
del clust[lowestpair[0]]
clust.append(newcluster)
return clust[0]
def printclust(clust, labels=None, n=0):
# indent to make a hierarchy layout
for i in range(n):
print(' ')
if clust.id < 0:
# negative id means that this is branch
print('-')
else:
# positive id means that this is an endpoint
if labels == None:
print(clust.id)
else:
print(labels[clust.id])
# now print the right and left branches
if clust.left != None: printclust(clust.left, labels=labels, n=n + 1)
if clust.right != None: printclust(clust.right, labels=labels, n=n + 1)
def getheight(clust):
# Is this an endpoint? Then the height is just 1
if clust.left == None and clust.right == None:
return 1
# Otherwise the height is the same of the heights of
# each branch
return getheight(clust.left) + getheight(clust.right)
def getdepth(clust):
# The distance of an endpoint is 0.0
if clust.left == None and clust.right == None:
return 0
# The distance of a branch is the greater of its two sides
# plus its own distance
return max(getdepth(clust.left), getdepth(clust.right)) + clust.distance
def drawdendrogram(clust, labels, jpeg='clusters.jpg'):
# height and width
h = getheight(clust) * 20
w = 1200
depth = getdepth(clust)
# width is fixed, so scale distances accordingly
scaling = float(w - 150) / depth
# Create a new image with a white background
img = Image.new('RGB', (10,10))
draw = ImageDraw.Draw(img)
text = u'\u2013'
font = ImageFont.truetype('arialbd.ttf')
draw.text((0,0),text,"white",font=font)
draw.line((0, h / 2, 10, h / 2), fill=(255, 0, 0))
# Draw the first node
drawnode(draw, clust, 10, (h / 2), scaling, labels)
img.save(jpeg, 'JPEG')
def drawnode(draw, clust, x, y, scaling, labels):
if clust.id < 0:
h1 = getheight(clust.left) * 20
h2 = getheight(clust.right) * 20
top = y - (h1 + h2) / 2
bottom = y + (h1 + h2) / 2
# Line length
ll = clust.distance * scaling
# Vertical line from this cluster to children
draw.line((x, top + h1 / 2, x, bottom - h2 / 2), fill=(255, 0, 0))
# Horizontal line to left item
draw.line((x, top + h1 / 2, x + ll, top + h1 / 2), fill=(255, 0, 0))
# Horizontal line to right item
draw.line((x, bottom - h2 / 2, x + ll, bottom - h2 / 2), fill=(255, 0, 0))
# Call the function to draw the left and right nodes
drawnode(draw, clust.left, x + ll, top + h1 / 2, scaling, labels)
drawnode(draw, clust.right, x + ll, bottom - h2 / 2, scaling, labels)
else:
# If this is an endpoint, draw the item label
draw.text((x + 5, y - 7), labels[clust.id], (0, 0, 0))
Nothing changes in excute.py file
@scsmla: Once again, you've pasted 150 lines of code without isolating the part that's going wrong or the exception that it raises.
The core issue that you were having can be condensed down to the 4 or 5 line repro that I posted, which attempts to render one character from your file onto a small image. It fails on the default font, it passes with a truetype font. Pasting that directly into your larger script just obscures any error that you might have, and if it does work, will render only that one character on a small canvas.
So, from this, I can guess at two possibilities:
1) You aren't getting the font loaded and you're effectively passing None in as the font, which still falls back to the default font
2) You've actually got a different error.
UnicodeEncodeError: 'latin-1' codec can't encode character '\u2013' in position 9: ordinal not in range(256)You need to specify a font that can handle unicode. The default built-in bitmap font is latin-1 only, as it only has a potential for 256 characters.
Minimal repro:
from PIL import Image, ImageDraw text = u'\u2013' img = Image.new('RGB', (10,10)) draw = ImageDraw.Draw(img) draw.text((0,0), text, "white")Working:
from PIL import Image, ImageDraw, ImageFont text = u'\u2013' img = Image.new('RGB', (10,10)) draw = ImageDraw.Draw(img) font = ImageFont.truetype('Tests/fonts/DejaVuSans.ttf') draw.text((0,0), text, "white", font=font)
works for me