Best way (performance wise) to add Attribute fields in PyQGIS on large files

One way to do what you want to do is to break the job down into the two tasks: 1. Add field and 2. Populate field.

To add a field create a layer, enter editing mode, add the field(s), and commit the changes. Like:

# create layer
tl = QgsVectorLayer(input_directory + input_file, "bufflayer", "ogr")
QgsMapLayerRegistry.instance().addMapLayer(tl)
pr = tl.dataProvider()

# Enter editing mode
tl.startEditing()

# add fields
pr.addAttributes( [ QgsField("chainage", QVariant.Int),
            QgsField("buffer_rad",  QVariant.Int),
            QgsField("No_kills", QVariant.Int) ] )

# Commit changes
tl.commitChanges()

To populate your fields set a variable to hold the index of the field you are filling, open a vector file with writing enabled, select the feature you want to update, update the feature attribute in the chosen field, commit the changes. This process updates one feature at a time - you can loop through whichever features you want to populate. Like:

 #  set field index for output to stats shapefile
 if v_class == 1:
     fldINDEX = ps_code + 5
 elif v_class == 2:
     fldINDEX = ps_code + 20
 elif v_class == 3:
     fldINDEX = ps_code + 35
 elif v_class == 4:
     fldINDEX = ps_code + 50

 #  open the output shapefile for writng the vegetation metrics
 inShapefile = output_directory + statsout_buff_layer
 inDriver = ogr.GetDriverByName("ESRI Shapefile")  #  specify Shapefile driver
 inDataSource = inDriver.Open(inShapefile, 1)  #  NOTE - "1" required to open for writing
 slayer = inDataSource.GetLayer()

 #  set query string to select record where vegetation metrics will be written
 queryString = "%s%s" % ("XL_ID = ", ExcelID)

 #  select record 
 slayer.SetAttributeFilter(queryString)

 #  set output in appropriate field of selected feature
 for feature in slayer:
    feature.SetField(fldINDEX, attr_patch)
    slayer.SetFeature(feature)

 slayer.CommitTransaction()

from osgeo import ogr

############################################################
shapefile = 'your_shapefile.shp'
driver = ogr.GetDriverByName("ESRI Shapefile")
dataSource = driver.Open(shapefile, 1) # 1 means read/write
############################################################

layer = dataSource.GetLayer()

# I assume you wanna add integer fields
layer.CreateField(ogr.FieldDefn("Field_1", ogr.OFTInteger))
layer.CreateField(ogr.FieldDefn("Field_2", ogr.OFTInteger))
layer.CreateField(ogr.FieldDefn("Field_3", ogr.OFTInteger))

# integer values
value_1 = 11
value_2 = 22
value_3 = 33

for feature in layer:
    feature.SetField("Field_1", value_1)
    feature.SetField("Field_2", value_2)
    feature.SetField("Field_3", value_3)
    layer.SetFeature(feature)

If you want to save the changes in a new file, replace highlighted three lines of code into the following lines. (The shortest way is to copy-paste the shapefile)

from shutil import copyfile

shapefile = 'your_shapefile.shp'
new_shapefile = 'your_new_shapefile.shp'    
copyfile(shapefile, new_shapefile)

driver = ogr.GetDriverByName("ESRI Shapefile")
dataSource = driver.Open(new_shapefile, 1)

For further information, look at Python GDAL/OGR Cookbook.


You could try this (example), but I dont know if this provides you more performance.

list = []
field1 = QgsField("test1", QVariant.String)
field2 = QgsField("test2", QVariant.String)
list.append(field1)
list.append(field2)
layer.startEditing ()
pr = layer.dataProvider()          
pr.addAttributes(list)
layer.updateFields()    
features=layer.getFeatures() 
i=0
idx1=0
idx2=0
for f in features:
 if i == 0:
  idx1=f.fieldNameIndex("test1")
  idx2=f.fieldNameIndex("test2")         
  i=1
 layer.changeAttributeValue(f.id(), idx1, "value1")
 layer.changeAttributeValue(f.id(), idx2, "value2")
layer.commitChanges()