-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmccance.py
More file actions
58 lines (49 loc) · 1.95 KB
/
Copy pathmccance.py
File metadata and controls
58 lines (49 loc) · 1.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import pandas as pd
import sys
# Mapping from group code to name for the McCance dataset
group_codes = {
'A': 'Cereals',
'B': 'Milk',
'C': 'Eggs',
'D': 'Vegetables',
'F': 'Fruit',
'G': 'Nuts and seeds',
'H': 'Herbs and spices',
'IF': 'Baby foods',
'J': 'Fish',
'M': 'Meat',
'O': 'Fats and oils',
'P': 'Beverages',
'Q': 'Alcohol',
'S': 'Sugars and snacks',
'W': 'Soups, sauces, and misc'
}
def load_mccance():
"""Load the McCance dataset with the nutrients columns we are interested in, plus groups."""
foods = pd.read_csv("data/raw/McCance___Widdowson_s_Composition_of_Foods_Integrated_Dataset.csv", skiprows=[1, 2])
# rename and drop columns
foods = foods.rename(columns={
'Food Code': 'food_code',
'Food Name': 'name',
'Description': 'description',
'Group': 'group',
'Protein (g)': 'protein',
'Fat (g)': 'fat',
'Carbohydrate (g)': 'carbs',
'Energy (kcal) (kcal)': 'cals',
'AOAC fibre (g)': 'fibre'
})
foods = foods[['food_code', 'name', 'group', 'protein', 'fat', 'carbs', 'cals', 'fibre', 'description']]
# replace N and Tr with 0
foods = foods.replace('N', 0)
foods = foods.replace('Tr', 0)
# make columns numeric
foods[['cals', 'carbs', 'fat', 'fibre', 'protein']] = foods[['cals', 'carbs', 'fat', 'fibre', 'protein']].apply(pd.to_numeric)
# drop rows that have NaN in any of the nutrients columns we are interested in (but not fibre, since it is sparsely populated)
#foods = foods.dropna(subset = ['cals', 'carbs', 'fat', 'protein'])
# data fixes
foods.loc[foods['name'] == 'Lemon juice, fresh', 'group'] = 'FC' # fruit juice (FC), not general juice (PE)
# restrict the number of groups by taking the first letter, for a coarser categorization
foods['g'] = foods['group'].apply(lambda x: x[0])
foods['g2'] = foods['group'].apply(lambda x: group_codes[x[0]])
return foods