簡體   English   中英

使用python數據框未將數據寫入文件的問題

[英]issue with data not being written to file using python dataframe

我正在嘗試使用我擁有的輸入 txt 文件創建一個 geojson 文件。 這是我擁有的代碼,但似乎有錯誤。

import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io

col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway,random
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''

#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlong.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
    
    
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
    
jsonLoad = gdf.to_json()

如果上述方法有效,那么這部分會將其生成為我想要的確切文件格式。

import json
from geojson import Point, Feature, dump
#save the data to the file 
parsed = json.loads(jsonLoad)
print(json.dumps(parsed, indent=4, sort_keys=True))
#parsed = gdf.to_json()
with open('myfile.geojson', 'w') as f:
   dump(parsed, f,indent=1)

我試圖讓他們根據他們的WayID分組,但是當我得到我的結果文件時,在 geoJSON 我沒有看到像shelter,yes,highway,footway這樣的標簽附加到它上面,我不明白為什么會這樣沒有存儲在 geoJSON 中?

例如這是我生成的文件,

{
    "features": [
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9364834,
                        1.3218368
                    ],
                    [
                        103.9365417,
                        1.3208156
                    ],
                    [
                        103.9367689,
                        1.3206226
                    ],
                    [
                        103.9345338,
                        1.3202877
                    ],
                    [
                        103.9344606,
                        1.3235089
                    ],
                    [
                        103.9370296,
                        1.3207544
                    ],
                    [
                        103.9364744,
                        1.3218821
                    ],
                    [
                        103.9367017,
                        1.3219285
                    ],
                    [
                        103.936561,
                        1.3203222
                    ],
                    [
                        103.936842,
                        1.320661
                    ],
                    [
                        103.9371016,
                        1.3207378
                    ],
                    [
                        103.933684,
                        1.3237604
                    ],
                    [
                        103.9355026,
                        1.3237205
                    ],
                    [
                        103.9364707,
                        1.321643
                    ],
                    [
                        103.9363887,
                        1.3216271
                    ]
                ],
                "type": "LineString"
            },
            "id": "0",
            "properties": {
                "WAYID": 190637
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9365788,
                        1.3202255
                    ],
                    [
                        103.9339101,
                        1.3205415
                    ],
                    [
                        103.9363223,
                        1.3202778
                    ]
                ],
                "type": "LineString"
            },
            "id": "1",
            "properties": {
                "WAYID": 190888
            },
            "type": "Feature"
        }
    ],
    "type": "FeatureCollection"
}

但在properties下,我希望看到其余的標簽列,如'tag2','tag3','tag4','tag5'

我在這里缺少什么? 如果有人能告訴我為什么會這樣,將不勝感激,謝謝!

編輯:

對於同樣的問題,如果我更改幾條數據線以獲得更多標簽,例如:

import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io

col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5','tag6','tag7','tag8','tag9','tag10','tag11','tag12','tag13','tag14','tag15','tag16','tag17','tag18','tag19','tag20']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3224845,long=103.9332554,106525,116692201,addr:housenumber,4,residential,BLOCK,addr:country,AG,building:levels,14,footway,sidewalk,addr:street,Random South Avenue 10,addr:postcode,460004,building,residential,addr:city,Boo
lat=1.3217691,long=103.9348351,106119,190571,highway,footway
lat=1.323215,long=103.9330919,106524,116692204,addr:housenumber,23,residential,BLOCK,addr:country,AG,building:levels,14,addr:street,Random Street Name 1,addr:postcode,460011,building,residential,addr:city,Boo
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''

#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlongRemoveComma.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
    
    
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID','tag2','tag3','tag4','tag5','tag6','tag7','tag8','tag9','tag10','tag11','tag12','tag13','tag14','tag15','tag16','tag17','tag18','tag19','tag20'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
    
jsonLoad = gdf.to_json()

它返回一個空數據集,但我希望它主要按WAYID分組,並附加所有相應的標簽

{
    "features": [],
    "type": "FeatureCollection"
}

問題來自這樣一個事實,即當您按WAYID您會轉儲所有其他列。 這就是所有標簽消失的原因。 做這個:

import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io

col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''

#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlong.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
    
    
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID', 'tag2', 'tag3', 'tag4', 'tag5'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
    
jsonLoad = gdf.to_json()

並再次運行您的代碼。 這將返回

{
    "features": [
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9344606,
                        1.3235089
                    ],
                    [
                        103.9355026,
                        1.3237205
                    ]
                ],
                "type": "LineString"
            },
            "id": "0",
            "properties": {
                "WAYID": 190637,
                "tag2": "highway",
                "tag3": "footway",
                "tag4": "shelter",
                "tag5": "yes"
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9364834,
                        1.3218368
                    ],
                    [
                        103.9345338,
                        1.3202877
                    ],
                    [
                        103.9364744,
                        1.3218821
                    ],
                    [
                        103.9367017,
                        1.3219285
                    ],
                    [
                        103.936561,
                        1.3203222
                    ],
                    [
                        103.9371016,
                        1.3207378
                    ],
                    [
                        103.933684,
                        1.3237604
                    ],
                    [
                        103.9364707,
                        1.321643
                    ],
                    [
                        103.9363887,
                        1.3216271
                    ]
                ],
                "type": "LineString"
            },
            "id": "1",
            "properties": {
                "WAYID": 190637,
                "tag2": "shelter",
                "tag3": "yes",
                "tag4": "highway",
                "tag5": "footway"
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9365788,
                        1.3202255
                    ],
                    [
                        103.9339101,
                        1.3205415
                    ],
                    [
                        103.9363223,
                        1.3202778
                    ]
                ],
                "type": "LineString"
            },
            "id": "2",
            "properties": {
                "WAYID": 190888,
                "tag2": "shelter",
                "tag3": "yes",
                "tag4": "highway",
                "tag5": "footway"
            },
            "type": "Feature"
        }
    ],
    "type": "FeatureCollection"
}

更新

這樣做以保留所有列:


gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
gdf = gdf.sort_values("pointID").groupby("WAYID", as_index=False).first()

    
jsonLoad = gdf.to_json()

並再次運行您的代碼給出:

{
    "features": [
        {
            "geometry": {
                "coordinates": [
                    103.9348351,
                    1.3217691
                ],
                "type": "Point"
            },
            "id": "0",
            "properties": {
                "WAYID": 190571,
                "lat": 1.3217691,
                "long": 103.9348351,
                "pointID": 106119,
                "tag10": null,
                "tag11": null,
                "tag12": null,
                "tag13": null,
                "tag14": null,
                "tag15": null,
                "tag16": null,
                "tag17": null,
                "tag18": null,
                "tag19": null,
                "tag2": "highway",
                "tag20": null,
                "tag3": "footway",
                "tag4": null,
                "tag5": null,
                "tag6": null,
                "tag7": null,
                "tag8": null,
                "tag9": null
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    103.933684,
                    1.3237604
                ],
                "type": "Point"
            },
            "id": "1",
            "properties": {
                "WAYID": 190637,
                "lat": 1.3237604,
                "long": 103.933684,
                "pointID": 106563,
                "tag10": null,
                "tag11": null,
                "tag12": null,
                "tag13": null,
                "tag14": null,
                "tag15": null,
                "tag16": null,
                "tag17": null,
                "tag18": null,
                "tag19": null,
                "tag2": "shelter",
                "tag20": null,
                "tag3": "yes",
                "tag4": "highway",
                "tag5": "footway",
                "tag6": null,
                "tag7": null,
                "tag8": null,
                "tag9": null
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    103.9339101,
                    1.3205415
                ],
                "type": "Point"
            },
            "id": "2",
            "properties": {
                "WAYID": 190888,
                "lat": 1.3205415,
                "long": 103.9339101,
                "pointID": 106642,
                "tag10": null,
                "tag11": null,
                "tag12": null,
                "tag13": null,
                "tag14": null,
                "tag15": null,
                "tag16": null,
                "tag17": null,
                "tag18": null,
                "tag19": null,
                "tag2": "shelter",
                "tag20": null,
                "tag3": "yes",
                "tag4": "highway",
                "tag5": "footway",
                "tag6": null,
                "tag7": null,
                "tag8": null,
                "tag9": null
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    103.9332554,
                    1.3224845
                ],
                "type": "Point"
            },
            "id": "3",
            "properties": {
                "WAYID": 116692201,
                "lat": 1.3224845,
                "long": 103.9332554,
                "pointID": 106525,
                "tag10": "footway",
                "tag11": "sidewalk",
                "tag12": "addr:street",
                "tag13": "Random South Avenue 10",
                "tag14": "addr:postcode",
                "tag15": "460004",
                "tag16": "building",
                "tag17": "residential",
                "tag18": "addr:city",
                "tag19": "Boo",
                "tag2": "addr:housenumber",
                "tag20": null,
                "tag3": "4",
                "tag4": "residential",
                "tag5": "BLOCK",
                "tag6": "addr:country",
                "tag7": "AG",
                "tag8": "building:levels",
                "tag9": 14.0
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    103.9330919,
                    1.323215
                ],
                "type": "Point"
            },
            "id": "4",
            "properties": {
                "WAYID": 116692204,
                "lat": 1.323215,
                "long": 103.9330919,
                "pointID": 106524,
                "tag10": "addr:street",
                "tag11": "Random Street Name 1",
                "tag12": "addr:postcode",
                "tag13": "460011",
                "tag14": "building",
                "tag15": "residential",
                "tag16": "addr:city",
                "tag17": "Boo",
                "tag18": null,
                "tag19": null,
                "tag2": "addr:housenumber",
                "tag20": null,
                "tag3": "23",
                "tag4": "residential",
                "tag5": "BLOCK",
                "tag6": "addr:country",
                "tag7": "AG",
                "tag8": "building:levels",
                "tag9": 14.0
            },
            "type": "Feature"
        }
    ],
    "type": "FeatureCollection"
}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM