简体   繁体   中英

issue with parsing data from txt file using python

I am trying to take in the following data set but I keep getting errors with the parsing. I'm trying to create a geoJSON file that looks like this

"type": "FeatureCollection",
    "features": [
        {
            "id": "0",
            "type": "Feature",
            "properties": {
                "description": "PlaceBC",
                "name": "A"
            },
            "geometry": {
                "type": "LineString",
                "coordinates": [
                    [
                        103.9364834,
                        1.3218368
                    ],
                    [
                        103.9364744,
                        1.3218821
                    ],
                    [
                        103.9367017,
                        1.3219285
                    ],
                    [
                        103.9364707,
                        1.321643
                    ],
                    [
                        103.9363887,
                        1.3216271
                    ],
                    [
                        103.9344606,
                        1.3235089
                    ],
                    [
                        103.9355026,
                        1.3237205
                    ],
                    [
                        103.934106,
                        1.3217046
                    ]
                ]
            }
        }

So I'm obtaining the data from a txt file but here I'm using a sample set for quick testing.

import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io

col = ['lat','long','name','description']
data = '''lat=1.3218368,long=103.9364834,107244,Place BC
lat=1.3218821,long=103.9364744,107243,Place BC
lat=1.3219285,long=103.9367017,107242,Place BC
lat=1.321643,long=103.9364707,107241,Place BC
lat=1.3216271,long=103.9363887,107240,Place BC
lat=1.3235089,long=103.9344606,107148,Place BC
lat=1.3237205,long=103.9355026,107115,Place BC
lat=1.3217046,long=103.934106,107065,Place BC
lat=1.3203204,long=103.9366324,107053,Place BC
lat=1.3206557,long=103.9373536,107052,Place BC
lat=1.3206271,long=103.9374192,107051,Place BC
lat=1.3205511,long=103.9371742,107050,Place BC
lat=1.3206044,long=103.9375056,107049,Place BC
lat=1.3207561,long=103.9371863,107048,Place BC
lat=1.3204307,long=103.9368537,107047,Place BC
lat=1.3204877,long=103.9368389,107046,Place BC
lat=1.3205465,long=103.9368269,107045,Place BC
lat=1.320612,long=103.9368246,107044,Place BC'''

#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
    #df = pd.read_csv("latlong.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
    
    
    #load dataframe as geodataframe
    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
    #groupby on name and description, while converting the grouped geometries to a LineString
    gdf = gdf.groupby(['name', 'description'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
    
    gdf.to_json()

This is the errors that I am getting,

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
~\AppData\Roaming\Python\Python39\site-packages\shapely\speedups\_speedups.pyx in shapely.speedups._speedups.geos_linestring_from_py()

AttributeError: 'list' object has no attribute '__array_interface__'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_14056/627086265.py in <module>
     32 gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
     33 #groupby on name and description, while converting the grouped geometries to a LineString
---> 34 gdf = gdf.groupby(['name', 'description'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
     35 
     36 gdf.to_json()

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\groupby\generic.py in apply(self, func, *args, **kwargs)
    221     )
    222     def apply(self, func, *args, **kwargs):
--> 223         return super().apply(func, *args, **kwargs)
    224 
    225     @doc(_agg_template, examples=_agg_examples_doc, klass="Series")

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\groupby\groupby.py in apply(self, func, *args, **kwargs)
   1273         with option_context("mode.chained_assignment", None):
   1274             try:
-> 1275                 result = self._python_apply_general(f, self._selected_obj)
   1276             except TypeError:
   1277                 # gh-20949

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\groupby\groupby.py in _python_apply_general(self, f, data)
   1307             data after applying f
   1308         """
-> 1309         keys, values, mutated = self.grouper.apply(f, data, self.axis)
   1310 
   1311         return self._wrap_applied_output(

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\groupby\ops.py in apply(self, f, data, axis)
    839             # group might be modified
    840             group_axes = group.axes
--> 841             res = f(group)
    842             if not _is_indexed_like(res, group_axes, axis):
    843                 mutated = True

~\AppData\Local\Temp/ipykernel_14056/627086265.py in <lambda>(x)
     32 gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
     33 #groupby on name and description, while converting the grouped geometries to a LineString
---> 34 gdf = gdf.groupby(['name', 'description'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
     35 
     36 gdf.to_json()

~\AppData\Roaming\Python\Python39\site-packages\shapely\geometry\linestring.py in __init__(self, coordinates)
     46         BaseGeometry.__init__(self)
     47         if coordinates is not None:
---> 48             self._set_coords(coordinates)
     49 
     50     @property

~\AppData\Roaming\Python\Python39\site-packages\shapely\geometry\linestring.py in _set_coords(self, coordinates)
     95     def _set_coords(self, coordinates):
     96         self.empty()
---> 97         ret = geos_linestring_from_py(coordinates)
     98         if ret is not None:
     99             self._geom, self._ndim = ret

~\AppData\Roaming\Python\Python39\site-packages\shapely\speedups\_speedups.pyx in shapely.speedups._speedups.geos_linestring_from_py()

ValueError: LineStrings must have at least 2 coordinate tuples

I'm not sure what's causing the error while I'm parsing the data. Please help :(

Could someone tell me what's the mistake here? Thank you!!!

In each group ('name', 'description'), there is only one Point so it's not enough to create a LineString . If you only group by description column, it's possible to create a LineString

out = gdf.groupby('description')['geometry'].apply(LineString)
print(out)

# Output:
description
Place BC    LINESTRING (103.93648 1.32184, 103.93647 1.321...
Name: geometry, dtype: geometry
import matplotlib.pyplot as plt

out.plot()
plt.show()

线串

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM