[英]issue with data not being written to file using python dataframe
I'm trying to create a geojson file, using an input txt file that I have.我正在尝试使用我拥有的输入 txt 文件创建一个 geojson 文件。 This is the code that I have but there seems to be errors with it.
这是我拥有的代码,但似乎有错误。
import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io
col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway,random
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''
#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlong.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
jsonLoad = gdf.to_json()
IF the above works, then this part would generate it into the exact file format that I want.如果上述方法有效,那么这部分会将其生成为我想要的确切文件格式。
import json
from geojson import Point, Feature, dump
#save the data to the file
parsed = json.loads(jsonLoad)
print(json.dumps(parsed, indent=4, sort_keys=True))
#parsed = gdf.to_json()
with open('myfile.geojson', 'w') as f:
dump(parsed, f,indent=1)
I'm trying to get them grouped according to their WayID
but when I get my resultant file, in the geoJSON I don't see the tags like shelter,yes,highway,footway
attached to it, which I don't understand why it's not being stored in the geoJSON?我试图让他们根据他们的
WayID
分组,但是当我得到我的结果文件时,在 geoJSON 我没有看到像shelter,yes,highway,footway
这样的标签附加到它上面,我不明白为什么会这样没有存储在 geoJSON 中?
For example this is my generated file,例如这是我生成的文件,
{
"features": [
{
"geometry": {
"coordinates": [
[
103.9364834,
1.3218368
],
[
103.9365417,
1.3208156
],
[
103.9367689,
1.3206226
],
[
103.9345338,
1.3202877
],
[
103.9344606,
1.3235089
],
[
103.9370296,
1.3207544
],
[
103.9364744,
1.3218821
],
[
103.9367017,
1.3219285
],
[
103.936561,
1.3203222
],
[
103.936842,
1.320661
],
[
103.9371016,
1.3207378
],
[
103.933684,
1.3237604
],
[
103.9355026,
1.3237205
],
[
103.9364707,
1.321643
],
[
103.9363887,
1.3216271
]
],
"type": "LineString"
},
"id": "0",
"properties": {
"WAYID": 190637
},
"type": "Feature"
},
{
"geometry": {
"coordinates": [
[
103.9365788,
1.3202255
],
[
103.9339101,
1.3205415
],
[
103.9363223,
1.3202778
]
],
"type": "LineString"
},
"id": "1",
"properties": {
"WAYID": 190888
},
"type": "Feature"
}
],
"type": "FeatureCollection"
}
But under the properties
I would've expected to see the rest of the tags columns like 'tag2','tag3','tag4','tag5'
但在
properties
下,我希望看到其余的标签列,如'tag2','tag3','tag4','tag5'
What am I missing here?我在这里缺少什么? Would appreciate if someone could tell me why this is happening, thank you!
如果有人能告诉我为什么会这样,将不胜感激,谢谢!
EDIT:编辑:
For the same issue, if I change a couple of data lines to have more tags eg:对于同样的问题,如果我更改几条数据线以获得更多标签,例如:
import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io
col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5','tag6','tag7','tag8','tag9','tag10','tag11','tag12','tag13','tag14','tag15','tag16','tag17','tag18','tag19','tag20']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3224845,long=103.9332554,106525,116692201,addr:housenumber,4,residential,BLOCK,addr:country,AG,building:levels,14,footway,sidewalk,addr:street,Random South Avenue 10,addr:postcode,460004,building,residential,addr:city,Boo
lat=1.3217691,long=103.9348351,106119,190571,highway,footway
lat=1.323215,long=103.9330919,106524,116692204,addr:housenumber,23,residential,BLOCK,addr:country,AG,building:levels,14,addr:street,Random Street Name 1,addr:postcode,460011,building,residential,addr:city,Boo
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''
#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlongRemoveComma.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID','tag2','tag3','tag4','tag5','tag6','tag7','tag8','tag9','tag10','tag11','tag12','tag13','tag14','tag15','tag16','tag17','tag18','tag19','tag20'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
jsonLoad = gdf.to_json()
It returns me an empty dataset, but I want it to be grouped mainly by WAYID
with all the respective tags attached它返回一个空数据集,但我希望它主要按
WAYID
分组,并附加所有相应的标签
{
"features": [],
"type": "FeatureCollection"
}
The problem comes from the fact that when you groupby WAYID
you dump all other columns.问题来自这样一个事实,即当您按
WAYID
您会转储所有其他列。 That is why all tags vanish.这就是所有标签消失的原因。 Do this:
做这个:
import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io
col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''
#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlong.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID', 'tag2', 'tag3', 'tag4', 'tag5'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
jsonLoad = gdf.to_json()
and run you code again.并再次运行您的代码。 This will return
这将返回
{
"features": [
{
"geometry": {
"coordinates": [
[
103.9344606,
1.3235089
],
[
103.9355026,
1.3237205
]
],
"type": "LineString"
},
"id": "0",
"properties": {
"WAYID": 190637,
"tag2": "highway",
"tag3": "footway",
"tag4": "shelter",
"tag5": "yes"
},
"type": "Feature"
},
{
"geometry": {
"coordinates": [
[
103.9364834,
1.3218368
],
[
103.9345338,
1.3202877
],
[
103.9364744,
1.3218821
],
[
103.9367017,
1.3219285
],
[
103.936561,
1.3203222
],
[
103.9371016,
1.3207378
],
[
103.933684,
1.3237604
],
[
103.9364707,
1.321643
],
[
103.9363887,
1.3216271
]
],
"type": "LineString"
},
"id": "1",
"properties": {
"WAYID": 190637,
"tag2": "shelter",
"tag3": "yes",
"tag4": "highway",
"tag5": "footway"
},
"type": "Feature"
},
{
"geometry": {
"coordinates": [
[
103.9365788,
1.3202255
],
[
103.9339101,
1.3205415
],
[
103.9363223,
1.3202778
]
],
"type": "LineString"
},
"id": "2",
"properties": {
"WAYID": 190888,
"tag2": "shelter",
"tag3": "yes",
"tag4": "highway",
"tag5": "footway"
},
"type": "Feature"
}
],
"type": "FeatureCollection"
}
UPDATE :
更新:
Do this to keep all the columns:这样做以保留所有列:
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
gdf = gdf.sort_values("pointID").groupby("WAYID", as_index=False).first()
jsonLoad = gdf.to_json()
and running your code again gives:并再次运行您的代码给出:
{
"features": [
{
"geometry": {
"coordinates": [
103.9348351,
1.3217691
],
"type": "Point"
},
"id": "0",
"properties": {
"WAYID": 190571,
"lat": 1.3217691,
"long": 103.9348351,
"pointID": 106119,
"tag10": null,
"tag11": null,
"tag12": null,
"tag13": null,
"tag14": null,
"tag15": null,
"tag16": null,
"tag17": null,
"tag18": null,
"tag19": null,
"tag2": "highway",
"tag20": null,
"tag3": "footway",
"tag4": null,
"tag5": null,
"tag6": null,
"tag7": null,
"tag8": null,
"tag9": null
},
"type": "Feature"
},
{
"geometry": {
"coordinates": [
103.933684,
1.3237604
],
"type": "Point"
},
"id": "1",
"properties": {
"WAYID": 190637,
"lat": 1.3237604,
"long": 103.933684,
"pointID": 106563,
"tag10": null,
"tag11": null,
"tag12": null,
"tag13": null,
"tag14": null,
"tag15": null,
"tag16": null,
"tag17": null,
"tag18": null,
"tag19": null,
"tag2": "shelter",
"tag20": null,
"tag3": "yes",
"tag4": "highway",
"tag5": "footway",
"tag6": null,
"tag7": null,
"tag8": null,
"tag9": null
},
"type": "Feature"
},
{
"geometry": {
"coordinates": [
103.9339101,
1.3205415
],
"type": "Point"
},
"id": "2",
"properties": {
"WAYID": 190888,
"lat": 1.3205415,
"long": 103.9339101,
"pointID": 106642,
"tag10": null,
"tag11": null,
"tag12": null,
"tag13": null,
"tag14": null,
"tag15": null,
"tag16": null,
"tag17": null,
"tag18": null,
"tag19": null,
"tag2": "shelter",
"tag20": null,
"tag3": "yes",
"tag4": "highway",
"tag5": "footway",
"tag6": null,
"tag7": null,
"tag8": null,
"tag9": null
},
"type": "Feature"
},
{
"geometry": {
"coordinates": [
103.9332554,
1.3224845
],
"type": "Point"
},
"id": "3",
"properties": {
"WAYID": 116692201,
"lat": 1.3224845,
"long": 103.9332554,
"pointID": 106525,
"tag10": "footway",
"tag11": "sidewalk",
"tag12": "addr:street",
"tag13": "Random South Avenue 10",
"tag14": "addr:postcode",
"tag15": "460004",
"tag16": "building",
"tag17": "residential",
"tag18": "addr:city",
"tag19": "Boo",
"tag2": "addr:housenumber",
"tag20": null,
"tag3": "4",
"tag4": "residential",
"tag5": "BLOCK",
"tag6": "addr:country",
"tag7": "AG",
"tag8": "building:levels",
"tag9": 14.0
},
"type": "Feature"
},
{
"geometry": {
"coordinates": [
103.9330919,
1.323215
],
"type": "Point"
},
"id": "4",
"properties": {
"WAYID": 116692204,
"lat": 1.323215,
"long": 103.9330919,
"pointID": 106524,
"tag10": "addr:street",
"tag11": "Random Street Name 1",
"tag12": "addr:postcode",
"tag13": "460011",
"tag14": "building",
"tag15": "residential",
"tag16": "addr:city",
"tag17": "Boo",
"tag18": null,
"tag19": null,
"tag2": "addr:housenumber",
"tag20": null,
"tag3": "23",
"tag4": "residential",
"tag5": "BLOCK",
"tag6": "addr:country",
"tag7": "AG",
"tag8": "building:levels",
"tag9": 14.0
},
"type": "Feature"
}
],
"type": "FeatureCollection"
}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.