2 回答

TA貢獻(xiàn)1804條經(jīng)驗 獲得超3個贊
更新
此選項計算一次并返回一個可在迭代行時使用的logic函數(shù)。compare當(dāng)數(shù)據(jù)有很多行時,速度會更快。
# written as a function because you don't share the definition of load_data
# but the main idea can be translated to a class
def calc_avg(self, specific, filter, logic, threshold):
if isinstance(threshold, str):
threshold = float(threshold)
def lt(a, b): return a < b
def gt(a, b): return a > b
def lte(a, b): return a <= b
def gte(a, b): return a >= b
if logic == 'lt': compare = lt
elif logic == 'gt': compare = gt
elif logic == 'lte': compare = lte
elif logic == 'gte': compare = gte
with io.StringIO(self) as avg_file: # change to open an actual file
running_sum = running_count = 0
for row in csv.DictReader(avg_file, delimiter=','):
if compare(int(row[filter]), threshold):
running_sum += int(row[specific])
# or float(row[specific])
running_count += 1
if running_count == 0:
# no even one row passed the filter
return 0
else:
return running_sum / running_count
print(calc_avg(data, 'Length_of_stay', 'SOFA', 'lt', '15'))
print(calc_avg(data, 'Length_of_stay', 'SOFA', 'lt', '2'))
print(calc_avg(data, 'Length_of_stay', 'SOFA', 'lt', '0'))
輸出
9.25
11.0
0
初步答復(fù)
為了過濾行,一旦確定應(yīng)該使用哪種類型的不等式,就必須進(jìn)行比較。這里的代碼將其存儲在 boolean 中include。
然后你可以有兩個變量:running_sum和running_count稍后應(yīng)該除以返回平均值。
import io
import csv
# written as a function because you don't share the definition of load_data
# but the main idea can be translated to a class
def calc_avg(self, specific, filter, logic, threshold):
if isinstance(threshold, str):
threshold = float(threshold)
with io.StringIO(self) as avg_file: # change to open an actual file
running_sum = running_count = 0
for row in csv.DictReader(avg_file, delimiter=','):
# your code has: filter = int(row[filter])
value = int(row[filter]) # avoid overwriting parameters
if logic == 'lt' and value < threshold:
include = True
elif logic == 'gt' and value > threshold:
include = True
elif logic == 'lte' and value <= threshold: # should it be 'le'
include = True
elif logic == 'gte' and value >= threshold: # should it be 'ge'
include = True
# or import ast and consider all cases in one line
# if ast.literal_eval(f'{value}{logic}{treshold}'):
# include = True
else:
include = False
if include:
running_sum += int(row[specific])
# or float(row[specific])
running_count += 1
return running_sum / running_count
data = """RecordID,SAPS-I,SOFA,Length_of_stay
132539,6,1,5
132540,16,8,8
132541,21,11,19
132545,17,2,4
132547,14,11,6
132548,14,4,9
132551,19,8,6
132554,11,0,17"""
print(calc_avg(data, 'Length_of_stay', 'SOFA', 'lt', '15'))
print(calc_avg(data, 'Length_of_stay', 'SOFA', 'lt', '2'))
輸出
9.25
11.0

TA貢獻(xiàn)1757條經(jīng)驗 獲得超8個贊
您沒有對比較結(jié)果做任何事情。您需要在if報表中使用它們以將特定值包含在平均值計算中。
def calc_avg(self, specific, filter, logic, threshold):
with open(self.load_data, 'r') as avg_file:
values = []
for row in csv.DictReader(avg_file, delimiter= ','):
specific = row[specific]
filter = int(row[filter])
threshold = 0
if logic == 'lt' and filter < threshold:
values.append(specific)
elif logic == 'gt' and filter > threshold:
values.append(specific)
elif logic == 'lte' and filter <= threshold:
values.append(specific)
elif logic == 'gte' and filter >= threshold:
values.append(specific)
if len(values) > 0:
return sum(values) / len(values)
else:
return 0
添加回答
舉報