[英]How to set bin coordinates for a scatter plot
我想返回占據特定區域的散點數。 通常,我會通過使用2dhistogram
和pcolormesh
來做到這pcolormesh
。
但是如果我想設置代表不代表網格的不規則尺寸的 bin 坐標,我該怎么做?
下面是我的數據集的一個例子。
import matplotlib.pyplot as plt
import matplotlib as mpl
import math
import numpy as np
x1 = np.random.randint(80, size=(400, 10))
y1 = np.random.randint(80, size=(400, 10))
x2 = np.random.randint(80, size=(400, 10))
y2 = np.random.randint(80, size=(400, 10))
fig, ax = plt.subplots()
ax.grid(False)
plt.scatter(x1[0],y1[0], c = 'r', zorder = 2)
plt.scatter(x2[0],y2[0], c = 'b', zorder = 2)
ang1 = 0, 50
ang2 = 100, 50
angle = math.degrees(math.acos(5.5/9.15))
xy = 50, 50
Halfway = mpl.lines.Line2D((50,50), (0,100), c = 'white')
arc1 = mpl.patches.Arc(ang1, 65, 100, angle = 0, theta2 = angle, theta1 = 360-angle, lw = 2)
arc2 = mpl.patches.Arc(ang2, 65, 100, angle = 0, theta2 = 180+angle, theta1 = 180-angle, lw = 2)
Oval = mpl.patches.Ellipse(xy, 100, 100, lw = 3, alpha = 0.1)
ax.add_line(Halfway)
ax.add_patch(arc1)
ax.add_patch(arc2)
ax.add_patch(Oval)
plt.text(15, 75, '1', fontsize = 8)
plt.text(35, 90, '2', fontsize = 8)
plt.text(65, 90, '3', fontsize = 8)
plt.text(85, 75, '4', fontsize = 8)
ax.autoscale()
plt.draw()
我要設置的垃圾箱標記為 1-4。 是否可以設置返回這些垃圾箱的坐標?
如果我可以設置這些坐標,那么我想返回每個散點占據的 bin。 輸出:
更新:
如果我想要一個在散點圖中每一行的每個 bin 中顯示 xy 的導出,我會寫出(x1[0], y1[0])
並轉置數據以返回:
1 2 3 4
0 [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)]
然后我將(x1[0], y1[0])
更改為(x1[1], y1[1])
以獲取第二行數據。
1 2 3 4
1 [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)]
然后我會結合這些來創建:
1 2 3 4
0 [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)]
1 [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)]
我有 1000 行,所以我試圖創建一種方法來使用整個(x1, y1)
為每行數據生成每個 bin 中的坐標。
預期輸出:
1 2 3 4
0 [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)]
1 [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)]
2 [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)]
3 [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)]
4 [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)] [(x,y),(x,y)]
5....
6....
如果我嘗試(x1, y1)
我得到錯誤:
err = (arc_vertices[:,0] - x)**2 + (arc_vertices[:,1] - y)**2 ValueError: operands could not be broadcast together with shapes (70,) (10,)
我真的對這種方法不滿意。 用數據的 x 坐標計算點的 y 坐標落在曲線上的位置似乎更好。
這種方法的工作原理類似,但使用弧的有限頂點:
arc1v = ax.transData.inverted().transform(arc1.get_verts())
arc2v = ax.transData.inverted().transform(arc2.get_verts())
for (x,y) in zip(x1[0], y1[0]):
err = (arc1v[:,0] - x)**2 + (arc1v[:,1] - y)**2
nearest = (arc1v[err == min(err)])[0]
line_x = (x, nearest[0])
line_y = (y, nearest[1])
ax.add_line(mpl.lines.Line2D(line_x, line_y))
if x > nearest[0]:
ax.scatter(x, y, marker='^', s=100, c='k', zorder=1)
else:
ax.scatter(x, y, marker='v', s=100, c='k', zorder=1)
這個“標簽”指向(左)曲線左側的一個向下的三角形,並指向它右側的一個向上的三角形。 圖形上的線指向曲線上最近的定義頂點,僅供說明之用。
您也可以對另一條曲線執行此操作,並且 bin 2/3 除法很簡單。
這是一個更完整的答案:
import matplotlib.pyplot as plt
import matplotlib as mpl
import math
import numpy as np
BIN_23_X = 50 # The separator between bin 2 and 3
x1 = np.random.randint(80, size=(400, 10))
y1 = np.random.randint(80, size=(400, 10))
x2 = np.random.randint(80, size=(400, 10))
y2 = np.random.randint(80, size=(400, 10))
fig, ax = plt.subplots()
ax.grid(False)
plt.scatter(x1[0],y1[0], c = 'r', zorder = 2)
plt.scatter(x2[0],y2[0], c = 'b', zorder = 2)
ang1 = 0, 50
ang2 = 100, 50
angle = math.degrees(math.acos(5.5/9.15))
xy = 50, 50
Halfway = mpl.lines.Line2D((BIN_23_X,BIN_23_X), (0,100), c = 'white')
arc1 = mpl.patches.Arc(ang1, 65, 100, angle = 0, theta2 = angle, theta1 = 360-angle, lw = 2)
arc2 = mpl.patches.Arc(ang2, 65, 100, angle = 0, theta2 = 180+angle, theta1 = 180-angle, lw = 2)
Oval = mpl.patches.Ellipse(xy, 100, 100, lw = 3, alpha = 0.1)
ax.add_line(Halfway)
ax.add_patch(arc1)
ax.add_patch(arc2)
ax.add_patch(Oval)
plt.text(15, 75, '1', fontsize = 8)
plt.text(35, 90, '2', fontsize = 8)
plt.text(65, 90, '3', fontsize = 8)
plt.text(85, 75, '4', fontsize = 8)
# Classification helpers
def get_nearest_arc_vert(x, y, arc_vertices):
err = (arc_vertices[:,0] - x)**2 + (arc_vertices[:,1] - y)**2
nearest = (arc_vertices[err == min(err)])[0]
return nearest
arc1v = ax.transData.inverted().transform(arc1.get_verts())
arc2v = ax.transData.inverted().transform(arc2.get_verts())
def classify_pointset(vx, vy):
bins = {(k+1):[] for k in range(4)}
for (x,y) in zip(vx, vy):
nx1, ny1 = get_nearest_arc_vert(x, y, arc1v)
nx2, ny2 = get_nearest_arc_vert(x, y, arc2v)
if x < nx1: # Is this point in bin 1? To the left of arc1?
bins[1].append((x,y))
elif x > nx2: # Is this point in bin 4? To the right of arc2?
bins[4].append((x,y))
else:
# If we get here, the point is in either bin 2 or 3. We'll consider points
# that fall on the line to be in bin 3.
if x < BIN_23_X: # Is this point to the left BIN_23_X? => Bin 2
bins[2].append((x,y))
else: # Otherwise, the point is in Bin 3
bins[3].append((x,y))
return bins
# Classify points
bins_red = classify_pointset(x1[0], y1[0])
bins_blue = classify_pointset(x2[0], y2[0])
# Display classifications
print("Red:")
for x in bins_red.items():
print(" ", x)
print("Blue:")
for x in bins_blue.items():
print(" ", x)
# "Annotate" classifications
for (x,y) in (bins_red[1] + bins_blue[1]):
ax.scatter(x, y, marker='^', s=100, c='k', zorder=1)
for (x,y) in (bins_red[2] + bins_blue[2]):
ax.scatter(x, y, marker='v', s=100, c='k', zorder=1)
for (x,y) in (bins_red[3] + bins_blue[3]):
ax.scatter(x, y, marker='^', s=100, c='y', zorder=1)
for (x,y) in (bins_red[4] + bins_blue[4]):
ax.scatter(x, y, marker='v', s=100, c='y', zorder=1)
ax.autoscale()
plt.draw()
plt.show()
產生:
在這里,點被“注釋”,它們后面的形狀對應於它們被分類到哪些箱:
Bin Anno. Color Triangle Pointing ------------------------------------------- Bin 1 Black Up Bin 2 Black Down Bin 3 Yellow Up Bin 4 Yellow Down
該代碼還顯示分類結果( classify_pointset
的輸出是一個字典,以 bin 編號 (1-4) 為鍵,值是在 bin 中找到的點的點坐標:
Red: (1, [(14, 30), (4, 18), (12, 48)]) (2, [(49, 41)]) (3, [(62, 79), (50, 7), (68, 19), (71, 1), (59, 27), (77, 0)]) (4, []) Blue: (1, [(20, 74), (11, 17), (12, 75)]) (2, [(41, 19), (30, 15)]) (3, [(61, 75)]) (4, [(79, 73), (69, 58), (76, 34), (78, 65)])
您不必以圖形方式注釋圖形,它只是用於說明,您可以只使用由classify_pointset
( bins_red
和bins_blue
)返回的字典。
以下代碼生成一個列表列表(仍為 1 索引),因此您可以通過訪問all_points[1]
找到 bin 1 中的所有點(紅色和藍色)。 all_points
列表中的第一個元素(索引 0)是None
,因為我們將列表保持為 1 索引。
# Generate a list of lists, the outer index corresponds to the bin number (1-indexed)
all_points = [None] * 5
for bin_key in [1,2,3,4]:
all_points[bin_key] = bins_red[bin_key] + bins_blue[bin_key]
# Just for display.
for bin_key, bin_points in enumerate(all_points):
print(bin_key, bin_points)
輸出:
0 None 1 [(1, 8), (16, 72), (23, 67), (12, 19), (24, 51), (24, 47), (15, 23), (18, 51)] 2 [(39, 75), (35, 27), (48, 55), (45, 53), (45, 22)] 3 [(66, 58), (55, 64), (70, 1), (71, 15), (73, 3), (71, 75)] 4 [(74, 62)]
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.