[英]Convert nested JSON to CSV
我正在将超过 10 个级别的嵌套 JSON object 转换为 C# .NET 中的 CSV 文件。
我一直在使用JavaScriptSerializer().Deserialize<ObjectA>(json)
或XmlNode xml = (XmlDocument)JsonConvert.DeserializeXmlNode.json)
来分解 object。有了这些对象,我可以进一步写入 CSV 文件。 然而现在JSON object进一步扩大。 大多数数据并没有真正被使用,所以我更喜欢原始数据转储。
我可以在不声明结构的情况下将数据转储为 csv 格式,这是不是更简单的方法?
样品 JSON
{
"F1":1,
"F2":2,
"F3":[
{
"E1":3,
"E2":4
},
{
"E1":5,
"E2":6
},
{
"E1":7,
"E2":8,
"E3":[
{
"D1":9,
"D2":10
}
]
},
]
}
而我预期的 CSV output 是
F1,F2,E1,E2,D1,D2
1,2
1,2,3,4
1,2,5,6
1,2,7,8,9,10
您的请求中存在不一致:您希望为具有子项的根对象生成一行,但您不希望为"F3[2]"
对象生成行,该对象也具有子项。 所以听起来你的规则是“为具有至少一个原始值属性的对象打印一行,只要该对象是根对象或者没有至少具有一个原始值属性的后代对象” 。 这有点棘手,但可以使用LINQ to JSON
var obj = JObject.Parse(json);
// Collect column titles: all property names whose values are of type JValue, distinct, in order of encountering them.
var values = obj.DescendantsAndSelf()
.OfType<JProperty>()
.Where(p => p.Value is JValue)
.GroupBy(p => p.Name)
.ToList();
var columns = values.Select(g => g.Key).ToArray();
// Filter JObjects that have child objects that have values.
var parentsWithChildren = values.SelectMany(g => g).SelectMany(v => v.AncestorsAndSelf().OfType<JObject>().Skip(1)).ToHashSet();
// Collect all data rows: for every object, go through the column titles and get the value of that property in the closest ancestor or self that has a value of that name.
var rows = obj
.DescendantsAndSelf()
.OfType<JObject>()
.Where(o => o.PropertyValues().OfType<JValue>().Any())
.Where(o => o == obj || !parentsWithChildren.Contains(o)) // Show a row for the root object + objects that have no children.
.Select(o => columns.Select(c => o.AncestorsAndSelf()
.OfType<JObject>()
.Select(parent => parent[c])
.Where(v => v is JValue)
.Select(v => (string)v)
.FirstOrDefault())
.Reverse() // Trim trailing nulls
.SkipWhile(s => s == null)
.Reverse());
// Convert to CSV
var csvRows = new[] { columns }.Concat(rows).Select(r => string.Join(",", r));
var csv = string.Join("\n", csvRows);
Console.WriteLine(csv);
运用
public static class EnumerableExtensions
{
// http://stackoverflow.com/questions/3471899/how-to-convert-linq-results-to-hashset-or-hashedset
public static HashSet<T> ToHashSet<T>(this IEnumerable<T> source)
{
return new HashSet<T>(source);
}
}
哪个输出:
F1,F2,E1,E2,D1,D2 1,2 1,2,3,4 1,2,5,6 1,2,7,8,9,10
我写了这个并且它正在为我工作在这里我们用标题prop_prop保存标题中的对象树的所有breadcrumps并以标题prop1保存标题中的jarray属性对象
public Dictionary<string, string> ComplexJsonToDictionary(JObject jObject, Dictionary<string, string> result, string field)
{
foreach (var property in jObject.Properties())
{
var endField = field + (string.IsNullOrEmpty(field) ? "" : "_") + property.Name;
var innerDictionary = new Dictionary<string, string>();
try
{
var innerValue = JObject.Parse(Convert.ToString(property.Value));
result.AddOrOverride(ComplexJsonToDictionary(innerValue, innerDictionary, endField));
}
catch (Exception)
{
try
{
var innerValues = JArray.Parse(Convert.ToString(property.Value));
try
{
var i = 0;
foreach (var token in innerValues)
{
var innerValue = JObject.Parse(Convert.ToString(token));
result.AddOrOverride(ComplexJsonToDictionary(innerValue, innerDictionary, endField+i++));
}
}
catch (Exception)
{
result.Add(endField, string.Join(",", innerValues.Values<string>()));
}
}
catch (Exception)
{
result.Add(endField, property.Value.ToString());
}
}
}
return result;
}
感谢您的努力,如果合适,请写评论。
将json解析后的jobject传递给static扩展,然后会返回jobjects数组,flatten,然后转换为csv。部分源码取自其他stack overflow等资源,我没有参考。
public static IEnumerable<string> JsonToCsvRowsWithHierarchyHeaders(string jsonData)
{
if (jsonData.Trim().StartsWith("[") && jsonData.Trim().EndsWith("]"))
{
var startString = "{\"appendRoot\":";
var endString = "}";
jsonData = $"{startString}{jsonData}{endString}";
}
var jObject = JsonConvert.DeserializeObject<JObject>(jsonData);
var flattenJObjects = JsonParserExtensions.FlattenJsonGetJObjects(jObject).ToList();
var csvRows = new List<string>();
if (flattenJObjects.Any())
{
var firstRow = flattenJObjects.First().Children<JProperty>().Select(x => x.Name).ToList();
var header = string.Join(delimeter, firstRow).Replace("appendRoot_", "").ToLower();
csvRows.Add(header);
foreach (var flattenJObject in flattenJObjects)
{
var tokens = flattenJObject.Children<JProperty>();
if (firstRow.Count() != tokens.Count())
{
var missMatchPropertyValues = firstRow.Select(cell => tokens.FirstOrDefault(x => x.Name == cell))
.Select(value => value == null
? string.Empty
: CheckAndUpdateRowCellValueTextQualifier(JsonConvert.DeserializeObject<string>(value.Value.ToString(Newtonsoft.Json.Formatting.None))))
.ToList();
var rowString = string.Join(delimeter, missMatchPropertyValues);
csvRows.Add(rowString);
}
else
{
var rowValue = tokens.Select(token =>
CheckAndUpdateRowCellValueTextQualifier(
JsonConvert.DeserializeObject<string>(
token.Value.ToString(Newtonsoft.Json.Formatting.None))));
var rowString = string.Join(delimeter, rowValue);
csvRows.Add(rowString);
}
}
}
return csvRows;
}
private static string CheckAndUpdateRowCellValueTextQualifier(string value)
{
const string q = @"""";
if (!string.IsNullOrEmpty(value) && value.Contains('\"'))
{
return value;
}
if (!string.IsNullOrEmpty(value) && (value.Contains(',') ||
value.Contains('"') ||
value.Contains('\n') || value.Contains('\r')))
{
return $"{q}{value}{q}";
}
return value;
}
}
public static class JsonParserExtensions
{
public static IEnumerable<JObject> FlattenJsonGetJObjects(JObject jObject, string parentName = null)
{
if (!(parentName is null))
jObject = RenamePropertiesByHierarchyName(jObject, parentName);
var fields = jObject.Properties().Where(p => p.Value.GetType().Name == "JValue").ToList();
var objects = jObject.Properties().Where(p => p.Value.GetType().Name == "JObject").ToList();
var arrays = jObject.Properties().Where(p => p.Value.GetType().Name == "JArray").ToList();
var objectsArray = arrays.Where(array => array.Value.All(elements => elements.GetType().Name == "JObject")).ToList();
var valuesArray = arrays.Where(array => array.Value.All(elements => elements.GetType().Name == "JValue")).ToList();
var nestedObjects = ProcessNestedObjects(objects);
var joinedInnerObjects = nestedObjects.Any()
? nestedObjects.Select(innerObject => JoinJObject(new JObject(fields), innerObject))
: new List<JObject> { new JObject(fields) };
var arraysObjectList = GetJObjectsFromArrayOfJProperties(objectsArray);
var arraysValueList = GetJObjectsFromArrayOfValues(valuesArray);
var joinedAll = joinedInnerObjects.SelectMany(inner => JoinMultipleJObjects(arraysObjectList, arraysValueList, inner));
return joinedAll;
}
public static List<JObject> ProcessNestedObjects(List<JProperty> jObjects)
{
var processNestedObjects = new List<JObject>();
var renamedJObjects = jObjects?.Select(obj => RenamePropertiesByHierarchyName(obj.Value.ToObject<JObject>(), obj.Name)).ToList();
if (!(renamedJObjects?.Count > 0)) return processNestedObjects;
var renamed = renamedJObjects.Aggregate((acc, next) => JoinJObject(acc, next));
var nestedObjects = renamed.Properties().Where(p => p.Value.GetType().Name == "JObject").ToList();
var nestedArrays = renamed.Properties().Where(p => p.Value.GetType().Name == "JArray").ToList();
var nestedObjectsArray = nestedArrays.Where(array => array.Value.All(elements => elements.GetType().Name == "JObject")).ToList();
var nestedValuesArray = nestedArrays.Where(array => array.Value.All(elements => elements.GetType().Name == "JValue")).ToList();
nestedArrays.ForEach(p => renamed.Remove(p.Name));
nestedObjects.ForEach(p => renamed.Remove(p.Name));
var nestedObjectList = new List<JObject>();
var nestedMultipleObjectList = new List<JObject>();
foreach (var listJObjects in nestedObjects.Select(innerObject => FlattenJsonGetJObjects(innerObject.Value.ToObject<JObject>(), innerObject.Name)).ToList())
{
if (listJObjects.Count() > 1)
nestedMultipleObjectList.AddRange(listJObjects);
else
nestedObjectList.Add(listJObjects.First());
}
var jObjectsFromArrayOfJProperties = GetJObjectsFromArrayOfJProperties(nestedObjectsArray);
var jObjectsFromArrayOfValues = GetJObjectsFromArrayOfValues(nestedValuesArray);
var aggregate = nestedObjectList.Aggregate(renamed, (acc, next) => JoinJObject(acc, next));
var groupedNestedObjects = (nestedMultipleObjectList.Any()) ? nestedMultipleObjectList.Select(nested => JoinJObject(aggregate, nested))
: new List<JObject> { aggregate };
var groupedNestedObjectsList = groupedNestedObjects.Select(groupedNested => JoinMultipleJObjects(jObjectsFromArrayOfJProperties, jObjectsFromArrayOfValues, groupedNested));
processNestedObjects.AddRange(groupedNestedObjectsList.SelectMany(e => e));
return processNestedObjects;
}
public static List<JObject> JoinMultipleJObjects(List<JObject> nestedArraysObjectList, List<JObject> nestedArraysValueList, JObject groupedNestedObjects)
{
var result = new List<JObject>();
var joined = new List<JObject>();
if (!nestedArraysObjectList.Any())
joined.Add(groupedNestedObjects);
else
nestedArraysObjectList.ForEach(e => joined.Add(JoinJObject(groupedNestedObjects, e)));
result.AddRange(nestedArraysValueList.Any()
? nestedArraysValueList
.SelectMany(value => joined, (value, joinedItem) => JoinJObject(joinedItem, value)).ToList()
: joined);
return result;
}
public static List<JObject> GetJObjectsFromArrayOfJProperties(List<JProperty> nestedJProperties)
{
var fromArrayOfJProperties = new List<JObject>();
foreach (var jProperty in nestedJProperties)
{
var nestedArraysObjectList = new List<JObject>();
var name = jProperty.Name;
var jPropertyValue = jProperty.Value;
var renamedObjects = jPropertyValue?.Select(obj => RenamePropertiesByHierarchyName(obj.ToObject<JObject>(), name)).ToList();
foreach (var jObjects in renamedObjects.Select(innerObject => FlattenJsonGetJObjects(innerObject.ToObject<JObject>())))
{
nestedArraysObjectList.AddRange(jObjects);
}
if (fromArrayOfJProperties.Any() && nestedArraysObjectList.Any())
fromArrayOfJProperties = nestedArraysObjectList
.SelectMany(nested => fromArrayOfJProperties, (current, joined) => JoinJObject(joined, current)).ToList();
if (!fromArrayOfJProperties.Any())
fromArrayOfJProperties.AddRange(nestedArraysObjectList);
}
return fromArrayOfJProperties;
}
public static List<JObject> GetJObjectsFromArrayOfValues(List<JProperty> nestedValuesArray)
{
return (from innerArray in nestedValuesArray let name = innerArray.Name let values = innerArray.Value from innerValue in values select new JObject(new JProperty(name, innerValue.ToObject<JValue>()))).ToList();
}
public static JObject RenamePropertiesByHierarchyName(JObject jObject, string hierarchyName)
{
var properties = jObject.Properties().ToList().Select(p => new JProperty($"{hierarchyName}_{p.Name}", p.Value));
return new JObject(properties);
}
public static JObject JoinJObject(JObject parentJObject, JObject innerObject)
{
var joinJObject = new JObject
{
parentJObject.Properties(),
innerObject.Properties()
};
return joinJObject;
}
}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.