4 回答

TA貢獻1803條經(jīng)驗 獲得超3個贊
var dict = new Dictionary<string, string>();
try
{
var lines = email.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
int starts = 0, end = 0, length = 0;
while (!lines[starts + 1].StartsWith("-")) starts++;
for (int i = starts + 1; i < lines.Length; i += 3)
{
var mc = Regex.Matches(lines[i], @"(?:^| )-");
foreach (Match m in mc)
{
int start = m.Value.StartsWith(" ") ? m.Index + 1 : m.Index;
end = start;
while (lines[i][end++] == '-' && end < lines[i].Length - 1) ;
length = Math.Min(end - start, lines[i - 1].Length - start);
string key = length > 0 ? lines[i - 1].Substring(start, length).Trim() : "";
end = start;
while (lines[i][end++] == '-' && end < lines[i].Length) ;
length = Math.Min(end - start, lines[i + 1].Length - start);
string value = length > 0 ? lines[i + 1].Substring(start, length).Trim() : "";
dict.Add(key, value);
}
}
}
catch (Exception ex)
{
throw new Exception("Email is not in correct format");
}
使用正則表達式:
var dict = new Dictionary<string, string>();
try
{
var lines = email.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
int starts = 0;
while (!lines[starts + 1].StartsWith("-")) starts++;
for (int i = starts + 1; i < lines.Length; i += 3)
{
var keys = Regex.Matches(lines[i - 1], @"(?:^| )(\w+\s?)+");
var values = Regex.Matches(lines[i + 1], @"(?:^| )(\w+\s?)+");
if (keys.Count == values.Count)
for (int j = 0; j < keys.Count; j++)
dict.Add(keys[j].Value.Trim(), values[j].Value.Trim());
else // remove bug if value of first key in a line has no value
{
if (lines[i + 1].StartsWith(" "))
{
dict.Add(keys[0].Value.Trim(), "");
dict.Add(keys[1].Value.Trim(), values[0].Value.Trim());
}
else
{
dict.Add(keys[0].Value, values[0].Value.Trim());
dict.Add(keys[1].Value.Trim(), "");
}
}
}
}
catch (Exception ex)
{
throw new Exception("Email is not in correct format");
}

TA貢獻1818條經(jīng)驗 獲得超3個贊
這是一個假設(shè)您不需要標題的方法,信息按順序和強制性出現(xiàn)。這不適用于包含空格或可選字段的數(shù)據(jù)。
foreach (MailItem mail in publicFolder.Items)
{
MessageBox.Show(mail.Body, "MailItem body");
// Split by line, remove dash lines.
var data = Regex.Split(mail.Body, @"\r?\n|\r")
.Where(l => !l.StartsWith('-'))
.ToList();
// Remove headers
for(var i = data.Count -2; lines >= 0; i -2)
{
data.RemoveAt(i);
}
// now data contains only the info you want in the order it was presented.
// Asuming info doesn't have spaces.
var result = data.SelectMany(d => d.Split(' '));
// WARNING: Missing info will not be present.
// {"GR332230", "0000232323", "GX3472", "1", "3411144"}
}

TA貢獻2037條經(jīng)驗 獲得超6個贊
這是我的嘗試。我不知道電子郵件格式是否可以更改(行、列等)。
除了檢查雙空格(我的解決方案)之外,我想不出一種分隔列的簡單方法。
class Program
{
static void Main(string[] args)
{
var emailBody = GetEmail();
using (var reader = new StringReader(emailBody))
{
var lines = new List<string>();
const int startingRow = 2; // Starting line to read from (start at Marketing ID line)
const int sectionItems = 4; // Header row (ex. Marketing ID & Local Number Line) + Dash Row + Value Row + New Line
// Add all lines to a list
string line = "";
while ((line = reader.ReadLine()) != null)
{
lines.Add(line.Trim()); // Add each line to the list and remove any leading or trailing spaces
}
for (var i = startingRow; i < lines.Count; i += sectionItems)
{
var currentLine = lines[i];
var indexToBeginSeparatingColumns = currentLine.IndexOf(" "); // The first time we see double spaces, we will use as the column delimiter, not the best solution but should work
var header1 = currentLine.Substring(0, indexToBeginSeparatingColumns);
var header2 = currentLine.Substring(indexToBeginSeparatingColumns, currentLine.Length - indexToBeginSeparatingColumns).Trim();
currentLine = lines[i+2]; //Skip dash line
indexToBeginSeparatingColumns = currentLine.IndexOf(" ");
string value1 = "", value2 = "";
if (indexToBeginSeparatingColumns == -1) // Use case of there being no value in the 2nd column, could be better
{
value1 = currentLine.Trim();
}
else
{
value1 = currentLine.Substring(0, indexToBeginSeparatingColumns);
value2 = currentLine.Substring(indexToBeginSeparatingColumns, currentLine.Length - indexToBeginSeparatingColumns).Trim();
}
Console.WriteLine(string.Format("{0},{1},{2},{3}", header1, value1, header2, value2));
}
}
}
static string GetEmail()
{
return @"EMAIL STARTING IN APRIL
Marketing ID Local Number
------------------- ----------------------
GR332230 0000232323
Dispatch Code Logic code
----------------- -------------------
GX3472 1
Destination ID Destination details
----------------- -------------------
3411144";
}
}
輸出看起來像這樣:
營銷 ID,GR332230,本地編號,0000232323 調(diào)度代碼,GX3472,邏輯代碼,1 目的地 ID,3411144,目的地詳細信息,

TA貢獻1827條經(jīng)驗 獲得超8個贊
用 Regex 做這件事不是一個好主意,因為它很容易忘記邊緣情況,不容易理解,也不容易調(diào)試。很容易陷入正則表達式掛起 CPU 并超時的情況。(我還不能對其他答案發(fā)表任何評論。所以,在選擇最終解決方案之前,請至少檢查我的其他兩個案例。)
在您的情況下,以下 Regex 解決方案適用于您提供的示例。但是,還有一些額外的限制:您需要確保非開始或非結(jié)束列中沒有空值。或者,如果有兩列以上且中間的任何一列為空,都會導致該行的名稱和值不匹配。
不幸的是,我不能給你一個非正則表達式的解決方案,因為我不知道規(guī)范,例如:會有空格嗎?會有 TAB 嗎?每個字段的字符數(shù)是固定的還是靈活的?如果它是靈活的并且可以有空值,那么用什么樣的規(guī)則來檢測哪些列是空的?我假設(shè)它們很可能是由列名的長度定義的,并且只有空格作為分隔符。如果是這種情況,有兩種方法可以解決它,兩次通過 Regex 或編寫自己的解析器。如果所有字段的長度都是固定的,那就更簡單了:只需要使用子字符串來切割行,然后修剪它們。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
public class Program
{
public class Record{
public string Name {get;set;}
public string Value {get;set;}
}
public static void Main()
{
var regex = new Regex(@"(?<name>((?!-)[\w]+[ ]?)*)(?>(?>[ \t]+)?(?<name>((?!-)[\w]+[ ]?)+)?)+(?:\r\n|\r|\n)(?>(?<splitters>(-+))(?>[ \t]+)?)+(?:\r\n|\r|\n)(?<value>((?!-)[\w]+[ ]?)*)(?>(?>[ \t]+)?(?<value>((?!-)[\w]+[ ]?)+)?)+", RegexOptions.Compiled);
var testingValue =
@"EMAIL STARTING IN APRIL
Marketing ID Local Number
------------------- ----------------------
GR332230 0000232323
Dispatch Code Logic code
----------------- -------------------
GX3472 1
Destination ID Destination details
----------------- -------------------
3411144";
var matches = regex.Matches(testingValue);
var rows = (
from match in matches.OfType<Match>()
let row = (
from grp in match.Groups.OfType<Group>()
select new {grp.Name, Captures = grp.Captures.OfType<Capture>().ToList()}
).ToDictionary(item=>item.Name, item=>item.Captures.OfType<Capture>().ToList())
let names = row.ContainsKey("name")? row["name"] : null
let splitters = row.ContainsKey("splitters")? row["splitters"] : null
let values = row.ContainsKey("value")? row["value"] : null
where names != null && splitters != null &&
names.Count == splitters.Count &&
(values==null || values.Count <= splitters.Count)
select new {Names = names, Values = values}
);
var records = new List<Record>();
foreach(var row in rows)
{
for(int i=0; i< row.Names.Count; i++)
{
records.Add(new Record{Name=row.Names[i].Value, Value=i < row.Values.Count ? row.Values[i].Value : ""});
}
}
foreach(var record in records)
{
Console.WriteLine(record.Name + " = " + record.Value);
}
}
}
輸出:
Marketing ID = GR332230
Local Number = 0000232323
Dispatch Code = GX3472
Logic code = 1
Destination ID = 3411144
Destination details =
請注意,這也適用于此類消息:EMAIL STARTING IN APRIL
Marketing ID Local Number
------------------- ----------------------
GR332230 0000232323
Dispatch Code Logic code
----------------- -------------------
GX3472 1
Destination ID Destination details
----------------- -------------------
3411144
輸出:
Marketing ID = GR332230
Local Number = 0000232323
Dispatch Code = GX3472
Logic code = 1
Destination ID =
Destination details = 3411144
或這個:
EMAIL STARTING IN APRIL
Marketing ID Local Number
------------------- ----------------------
Dispatch Code Logic code
----------------- -------------------
GX3472 1
Destination ID Destination details
----------------- -------------------
3411144
輸出:
Marketing ID =
Local Number =
Dispatch Code = GX3472
Logic code = 1
Destination ID =
Destination details = 3411144
- 4 回答
- 0 關(guān)注
- 148 瀏覽
添加回答
舉報