簡體   English   中英


[英]Get first 140 characters of string with special case

我有一個字符串,它的長度限制為140個字符。 通常,我的代碼中有超過140個。 字符串是以這種格式設置的值:Mxxxx其中x可以是任何數字,並且它沒有嚴格的長度。 所以我可以擁有M1或者我也可以擁有M281。




"M5903, M6169, M6753, M619, M6169, M6753, M6919, M6169, M6753, M919, M6169, M6753, M6919, M6169, M6753, M6919, M6169, M6753, M919, M6169, M6753, M6919, M669, M6753, M6919, M69, M6753, M6919, M6169, M63, M6919, M6169, M6753, M6919, M619, M653, M6919, M66, M6753, M19, M6169, M6753, M6919, M6169, M6753, M6919, M6169, M6753, M6919, M6169, M6753, M619"


"M5903, M6169, M6753, M619, M6169, M6753, M6919, M6169, M6753, M919, M6169, M6753, M6919, M6169, M6753, M6919, M6169, M6753, M919, M6169, M6753, M6919, M669, M6753, M6919, M69, M6753, M6919, M6169, M63, M69"

最后一個值是M6919但它M6919 M6919


可能有很多方法可以實現這一目標。 我可以使用if或switch / case循環,並說如果第二個字符串的第一個字母不是'M',我知道該值被拆分,我應該從第一個字符串中刪除它,但是有人有更清晰的解決方案嗎?

private static string CreateSettlmentStringsForUnstructuredField(string settlementsString)
    string returnSettlementsString = settlementsString.Replace(", ", " ");

    if (returnSettlementsString.Length > 140)
        returnSettlementsString.Substring(0, 140);
        /*if returnSettlementsString was spitted in two in a way 
          that last value was broken in two parts, take that value 
          out of returnSettlementStrings and put it in some new 
          string value with the other half of the string.*/
    return returnSettlementsString;


string result;
if (input.Length > 140)
    result = new string(input.Take(140).ToArray());
    if (input[140] != ',') // will ensure that we don´t omit the last complete word if the 140eth character is a comma
        result = result.Substring(0, result.LastIndexOf(','));
else result = input;

如果總長度更大,它只需要前140個字符。 然后它搜索逗號的最后一個索引並獲取所有字符, 直到這個逗號。

最好的辦法是將字符串拆分為“單詞”,然后使用字符串生成器重新組合它們。 未經測試的原始代碼看起來像;

public IEnumerable<string> SplitSettlementStrings(string settlementsString) 
    var sb = new StringBuilder();
    foreach(var word in WordsFrom(settlementsString))
        var extraFragment = $"{word}, ";
        if (sb.Length + extraFragment < 140) {
        // we'd overflow the 140 char limit, so return this fragment and continue;
        yield return sb.ToString();
        sb = new StringBuilder();

    if (sb.Length > 0) {
        // we may have content left in the string builder
        yield return sb.ToString();


 public IEnumerable<string> WordsFrom(string settlementsString) 
    // split on commas, then trim to remove whitespace;
    return settlementsString.split(',').Select(x => x.Trim()).Where(x => x.Length > 0);


 var settlementStringsIn140CharLenghts = SplitSettlementStrings("M234, M456, M452 ...").ToArray()


old-skool .net版本看起來像這樣;

public ICollection<string> SplitSettlementStrings(string settlementsString) 
    List<string> results = new List<string>();
    StringBuilder sb = new StringBuilder();
    foreach(string word in WordsFrom(settlementsString))
        string extraFragment = word + ", ";
        if (sb.Length + extraFragment < 140) {
        // we'd overflow the 140 char limit, so return this fragment and continue;
        sb = new StringBuilder();

    if (sb.Length > 0) {
        // we may have content left in the string builder

 public ICollection<string> WordsFrom(string settlementsString) 
    // split on commas, then trim to remove whitespace;
    string[] fragments = settlementsString.split(',');
    List<string> result = new List<string>();
    foreach(string fragment in fragments) 
        var candidate = fragment.Trim();
        if (candidate.Length > 0) 
    return result;


string test = "M5903, M6169, M6753, M619, M6169, M6753, M6919, M6169, M6753, M919, M6169, M6753, M6919, M6169, M6753, M6919, M6169, M6753, M919, M6169, M6753, M6919, M669, M6753, M6919, M69, M6753, M6919, M6169, M63, M6919, M6169, M6753, M6919, M619, M653, M6919, M66, M6753, M19, M6169, M6753, M6919, M6169, M6753, M6919, M6169, M6753, M6919, M6169, M6753, M619";

if (test.Length > 140)
    if (test[140] != ',' && test[140] != ' ') // Last entry was split?
        test = test.Substring(0, test.LastIndexOf(',', 139)); // Take up to but not including the last ','
        test = test.Substring(0, 139);



var ssplit = theString.Replace(", ", "#").Split('#');       
var sb = new StringBuilder();
for(int i = 0; i < ssplit.Length; i++)
    if(sb.Length + ssplit[i].Length > 138) // 140 minus the ", "
    if(sb.Length > 0) sb.Append(", ");

在這里,我將字符串拆分為Mxxx部分。 然后我遍歷這些部分,直到下一部分溢出140(或138,因為它需要在計數中包含", "分隔符)



string myString = "M19, M42........";
string result;
int index = 141;

    //Decrement index to reduce the substring size

    //Make the result the new length substring
    result = myString.Substring(0, index);

}while (myString[index] != ','); //Check if our result contains a comma as the next char to check if we're at the end of an entry

因此,您基本上只是將原始字符串子串到140,檢查位置141處的字符是否為逗號,表示“干凈”剪切。 如果沒有,它將在139處子串,檢查140是否有逗號等。

這是一個解決方案。 它從第141個字符開始向后處理字符串。

public static string Normalize(string input, int length)
    var terminators = new[] { ',', ' ' };
    if (input.Length <= length + 1)
        return input;

    int i = length + 1;
    while (!terminators.Contains(input[i]) && i > 0)
        i = i - 1;

    return input.Substring(0, i).TrimEnd(' ', ',');

Normalize(settlementsString, 140);

由於新字符串的持續內存分配,可能不是性能最敏感的解決方案,但它確實聽起來像某種類型的一次性原始數據輸入。 我們可以選擇從輸入中刪除“令牌”,而我們有超過140個字符:

const string separator = ", ";

while (input.Length > 140)
     int delStartIndex = input.LastIndexOf(separator);
     int delLength = input.Length - delStartIndex;

     input = input.Remove(delStartIndex, delLength);

更加注重性能的方法是為子string[]創建一個IEnumerable<string>string[]形式,並在加入它們之前計算它們的總長度。 有點像這樣:

const string separator = ", ";
var splitInput = input.Split(separator.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);

var length = splitInput[0].Length;
var targetIndex = 1;

for (targetIndex = 1; length <= 140; targetIndex++)
    length += separator.Length + splitInput[targetIndex].Length;

if (length > 140)

var splitOutput = new string[targetIndex];
Array.Copy(splitInput, 0, splitOutput, 0, targetIndex);

var output = string.Join(separator, splitOutput);


public static class StringUtils
    public static string TrimToLength(this string input, string separator, int targetLength)
        var splitInput = input.Split(separator.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);

        var length = splitInput[0].Length;
        var targetIndex = 1;

        for (targetIndex = 1; length <= targetLength; targetIndex++)
            length += separator.Length + splitInput[targetIndex].Length;

        if (length > targetLength)

        var splitOutput = new string[targetIndex];
        Array.Copy(splitInput, 0, splitOutput, 0, targetIndex);

        return string.Join(separator, splitOutput);


input.TrimToLength(", ", 140);


input.TrimToLength(separator: ", ", targetLength:140);


static string FirstN(string s, int n = 140)
    if (string.IsNullOrEmpty(s) || s.Length <= n) return s;
    while (n > 0 && s[n] != ' ' && s[n] != ',') n--;
    return s.Substring(0, n);


using System;
namespace ConsoleApplication1
    class Program
        static string FirstN(string s, int n = 140)
            if (string.IsNullOrEmpty(s) || s.Length <= n) return s;
            while (n > 0 && s[n] != ' ' && s[n] != ',') n--;
            return s.Substring(0, n);
        static void Main(string[] args)
            var s = FirstN("M5903, M6169, M6753, M619, M6169, M6753, M6919, M6169, M6753, M919, M6169, M6753, M6919, M6169, M6753, M6919, M6169, M6753, M919, M6169, M6753, M6919, M669, M6753, M6919, M69, M6753, M6919, M6169, M63, M6919, M6169, M6753, M6919, M619, M653, M6919, M66, M6753, M19, M6169, M6753, M6919, M6169, M6753, M6919, M6169, M6753, M6919, M6169, M6753, M619");

            Console.WriteLine(s.Length); // 136
            Console.WriteLine(s);  //M5903, M6169, M6753, M619, M6169, M6753, M6919, M6169, M6753, M919, M6169, M6753, M6919, M6169, M6753, M6919, M6169, M6753, M919, M6169,



聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

粵ICP備18138465號  © 2020-2024 STACKOOM.COM